From d38fc3b390ad40afc67a88294a26a37f4176a8a9 Mon Sep 17 00:00:00 2001 From: szbk Date: Mon, 16 Feb 2026 10:50:59 +0300 Subject: [PATCH] =?UTF-8?q?feat(api):=20turkcealtyazi=20ger=C3=A7ek=20modu?= =?UTF-8?q?nu=20stabil=20hale=20getir=20ve=20mock=20altyap=C4=B1s=C4=B1n?= =?UTF-8?q?=C4=B1=20kald=C4=B1r?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mock fallback mantığını ve determinantik mock üretim kodlarını kaldırarak TurkceAltyazi sağlayıcısını tamamen gerçek moda geçirdi. İyileştirilmiş arama, indirme ve çerez yönetimi ile sağlam bir entegrasyon sağlandı. - MockArtifact ve deterministic modüllerini kaldır - TurkceAltyaziProvider'da mock fallback mantığını tamamen kaldır - HTTP çerez yönetimi, retry mantığı ve hata işleme iyileştirmeleri - ENABLE_TA_STEP_LOGS yapılandırması ile adım adım loglama - TURKCEALTYAZI_ALLOW_MOCK_FALLBACK ortam değişkenini kaldır - Dokümantasyonu gerçek mod reflektif olarak güncelle - OpenSubtitles sağlayıcını gerçek entegrasyon tamamlanana kadar pasif yap - Varsayılan kaynak etiketini 'mock' yerine 'unknown' olarak güncelle --- .env.example | 2 +- README.md | 15 +- compose.dev.yml | 2 +- doc/ARCHITECTURE_AND_FLOW.md | 5 +- doc/HANDOVER_2026-02-16.md | 6 +- doc/TURKCEALTYAZI_REAL_STATUS.md | 12 +- services/api/src/config/env.ts | 2 +- services/api/src/lib/deterministic.ts | 18 - services/api/src/lib/mockArtifact.ts | 51 --- services/api/src/lib/subtitleEngine.ts | 2 +- services/api/src/lib/taLog.ts | 21 + services/api/src/lib/turkcealtyaziReal.ts | 367 +++++++++++++----- .../src/providers/OpenSubtitlesProvider.ts | 45 +-- .../src/providers/TurkceAltyaziProvider.ts | 120 +++--- services/api/test/scoring.test.ts | 2 +- services/core/src/workers/pipeline.ts | 2 +- 16 files changed, 374 insertions(+), 298 deletions(-) delete mode 100644 services/api/src/lib/deterministic.ts delete mode 100644 services/api/src/lib/mockArtifact.ts create mode 100644 services/api/src/lib/taLog.ts diff --git a/.env.example b/.env.example index aacbafd..b9fb6f0 100644 --- a/.env.example +++ b/.env.example @@ -12,8 +12,8 @@ MEDIA_TV_PATH=/media/tv MEDIA_MOVIE_PATH=/media/movie ENABLE_API_KEY=false API_KEY= +ENABLE_TA_STEP_LOGS=false ENABLE_TURKCEALTYAZI_REAL=false -TURKCEALTYAZI_ALLOW_MOCK_FALLBACK=true TURKCEALTYAZI_BASE_URL=https://turkcealtyazi.org TURKCEALTYAZI_TIMEOUT_MS=12000 TURKCEALTYAZI_MIN_DELAY_MS=300 diff --git a/README.md b/README.md index 237dbe4..cb873c3 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Docker tabanli altyazi otomasyon sistemi. - `core`: watcher + ffprobe + BullMQ + Mongo job/log API + review akisi -- `api`: mock provider subtitle engine (TurkceAltyazi/OpenSubtitles stub) + archive extraction + security + scoring +- `api`: gercek provider subtitle engine + archive extraction + security + scoring - `ui`: React/Vite panel (dashboard, jobs, detail live logs, review, settings, watched paths) ## Mimari @@ -18,30 +18,18 @@ Docker tabanli altyazi otomasyon sistemi. - UI -> Core API: `http://localhost:3001/api` (CORS acik) - Temp alan: `/temp/{jobToken}` -## Mock Provider Notu - -Gercek scraping/API cagrilari varsayilan olarak kapali gelir. - -- `TurkceAltyaziProvider`: mock + TODO -- `OpenSubtitlesProvider`: mock + TODO - -Deterministik candidate uretimi vardir (aynı input = ayni aday davranisi). - ## TurkceAltyazi Gercek Modu (v2) Gercek entegrasyon feature flag ile acilabilir: ```env ENABLE_TURKCEALTYAZI_REAL=true -TURKCEALTYAZI_ALLOW_MOCK_FALLBACK=true TURKCEALTYAZI_BASE_URL=https://turkcealtyazi.org TURKCEALTYAZI_TIMEOUT_MS=12000 TURKCEALTYAZI_MIN_DELAY_MS=300 ``` - `ENABLE_TURKCEALTYAZI_REAL=true`: `TurkceAltyaziProvider` gercek HTTP+HTML parse dener. -- `TURKCEALTYAZI_ALLOW_MOCK_FALLBACK=true`: real akista hata olursa mock adaptere doner. -- `false` yaparsan real hata durumunda job tarafina hata/not found olarak yansir. ## Gelistirme (Dev) @@ -171,6 +159,5 @@ Kapsam: ## Gelecek (v2) -- Gercek TurkceAltyazi scraping - Gercek OpenSubtitles API entegrasyonu - ClamAV tarama (feature flag hazir) diff --git a/compose.dev.yml b/compose.dev.yml index 834a510..1540d23 100644 --- a/compose.dev.yml +++ b/compose.dev.yml @@ -81,7 +81,7 @@ services: - VITE_PUBLIC_CORE_URL=http://localhost:3001 ports: - "5173:5173" - command: sh -c "npm install && npm run dev" + command: sh -c "rm -rf /app/node_modules/* && npm install --include=optional --no-package-lock && npm run dev" volumes: - ./services/ui:/app - ui_node_modules:/app/node_modules diff --git a/doc/ARCHITECTURE_AND_FLOW.md b/doc/ARCHITECTURE_AND_FLOW.md index ba54b48..c59adeb 100644 --- a/doc/ARCHITECTURE_AND_FLOW.md +++ b/doc/ARCHITECTURE_AND_FLOW.md @@ -49,8 +49,8 @@ Providerlar: - `OpenSubtitlesProvider` Not: -- OpenSubtitles su an mock. -- TurkceAltyazi mock + feature-flag ile real deneme moduna sahip. +- TurkceAltyazi provider gercek HTTP+HTML akisi ile calisir. +- OpenSubtitles provider real entegrasyon tamamlanana kadar pasiftir. ## `services/ui` @@ -80,4 +80,3 @@ Sayfalar: 6. API provider adaylarini indirir/isler. 7. best secilirse core `finalizeWrite` ile dosyayi yazar. 8. Job `DONE` olur, loglar UI'da canli akar. - diff --git a/doc/HANDOVER_2026-02-16.md b/doc/HANDOVER_2026-02-16.md index 1b5ee28..4ed22c8 100644 --- a/doc/HANDOVER_2026-02-16.md +++ b/doc/HANDOVER_2026-02-16.md @@ -27,9 +27,7 @@ Bu belge, 16 Subat 2026 tarihine kadar subwatcher projesinde tamamlanan calismal - Review listesi + manuel secim akisi - Settings - Watched Paths -- API tarafinda mock provider altyapisi aktif: - - TurkceAltyazi (mock) - - OpenSubtitles (mock) +- API tarafinda provider altyapisi aktif. - Archive extraction/security aktif: - 7z extraction - zip slip kontrolu @@ -51,7 +49,6 @@ Bu belge, 16 Subat 2026 tarihine kadar subwatcher projesinde tamamlanan calismal - Feature flag eklendi (API env): - `ENABLE_TURKCEALTYAZI_REAL` - - `TURKCEALTYAZI_ALLOW_MOCK_FALLBACK` - `TURKCEALTYAZI_BASE_URL` - `TURKCEALTYAZI_TIMEOUT_MS` - `TURKCEALTYAZI_MIN_DELAY_MS` @@ -88,4 +85,3 @@ Bu belge, 16 Subat 2026 tarihine kadar subwatcher projesinde tamamlanan calismal 2. TurkceAltyazi parserini fixture testlerle sabitle. 3. TA hata tiplerini ayir (network/parsing/rate-limit/blocked). 4. Core tarafinda retry/policy netlestir. - diff --git a/doc/TURKCEALTYAZI_REAL_STATUS.md b/doc/TURKCEALTYAZI_REAL_STATUS.md index c8180a3..801b162 100644 --- a/doc/TURKCEALTYAZI_REAL_STATUS.md +++ b/doc/TURKCEALTYAZI_REAL_STATUS.md @@ -4,13 +4,12 @@ Guncel durum tarihi: **16 Subat 2026** ## Hedef -Mock yerine TurkceAltyazi kaynagindan gercek aday bulma ve indirme akisini aktif etmek. +TurkceAltyazi kaynagindan gercek aday bulma ve indirme akisini stabil hale getirmek. ## Tamamlananlar 1. Feature flags tanimli: - `ENABLE_TURKCEALTYAZI_REAL` -- `TURKCEALTYAZI_ALLOW_MOCK_FALLBACK` - `TURKCEALTYAZI_BASE_URL` - `TURKCEALTYAZI_TIMEOUT_MS` - `TURKCEALTYAZI_MIN_DELAY_MS` @@ -22,10 +21,9 @@ Mock yerine TurkceAltyazi kaynagindan gercek aday bulma ve indirme akisini aktif - Detail sayfasinda download link cikarimi - Binary indirme -3. Provider real/mok gecisi var: +3. Provider real akisla calisiyor: - `services/api/src/providers/TurkceAltyaziProvider.ts` - - Real aciksa once real dener - - Basarisiz olursa fallback policy'e gore mock'a duser + - Real aciksa search/download adimlari gercek kaynakta calisir 4. Trace log adimlari var: - `TA_SEARCH_REQUEST` @@ -62,7 +60,6 @@ Mock yerine TurkceAltyazi kaynagindan gercek aday bulma ve indirme akisini aktif 1. `.env`: ```env ENABLE_TURKCEALTYAZI_REAL=true -TURKCEALTYAZI_ALLOW_MOCK_FALLBACK=true ``` 2. Servisleri yeniden baslat: ```bash @@ -70,7 +67,7 @@ docker compose -f compose.dev.yml up -d --build api core ``` 3. Yeni bir movie job tetikle. 4. Job logda TA adimlarini dogrula. -5. Mock'a dustuyse sebebi logla ve parseri iyilestir. +5. Hata varsa sebebi logla ve parseri iyilestir. ## Onemli dosyalar @@ -79,4 +76,3 @@ docker compose -f compose.dev.yml up -d --build api core - `services/api/src/lib/subtitleEngine.ts` - `.env` - `.env.example` - diff --git a/services/api/src/config/env.ts b/services/api/src/config/env.ts index 2ae0ecb..b15c944 100644 --- a/services/api/src/config/env.ts +++ b/services/api/src/config/env.ts @@ -8,8 +8,8 @@ export const env = { tempRoot: process.env.TEMP_ROOT ?? '/temp', enableApiKey: process.env.ENABLE_API_KEY === 'true', apiKey: process.env.API_KEY ?? '', + enableTaStepLogs: process.env.ENABLE_TA_STEP_LOGS === 'true', enableTurkcealtyaziReal: process.env.ENABLE_TURKCEALTYAZI_REAL === 'true', - turkcealtyaziAllowMockFallback: process.env.TURKCEALTYAZI_ALLOW_MOCK_FALLBACK !== 'false', turkcealtyaziBaseUrl: process.env.TURKCEALTYAZI_BASE_URL ?? 'https://turkcealtyazi.org', turkcealtyaziTimeoutMs: Number(process.env.TURKCEALTYAZI_TIMEOUT_MS ?? 12000), turkcealtyaziMinDelayMs: Number(process.env.TURKCEALTYAZI_MIN_DELAY_MS ?? 300) diff --git a/services/api/src/lib/deterministic.ts b/services/api/src/lib/deterministic.ts deleted file mode 100644 index 814c229..0000000 --- a/services/api/src/lib/deterministic.ts +++ /dev/null @@ -1,18 +0,0 @@ -export function hashString(input: string): number { - let h = 2166136261; - for (let i = 0; i < input.length; i++) { - h ^= input.charCodeAt(i); - h = Math.imul(h, 16777619); - } - return h >>> 0; -} - -export function seeded(seed: number): () => number { - let t = seed; - return () => { - t += 0x6d2b79f5; - let x = Math.imul(t ^ (t >>> 15), t | 1); - x ^= x + Math.imul(x ^ (x >>> 7), x | 61); - return ((x ^ (x >>> 14)) >>> 0) / 4294967296; - }; -} diff --git a/services/api/src/lib/mockArtifact.ts b/services/api/src/lib/mockArtifact.ts deleted file mode 100644 index d767cae..0000000 --- a/services/api/src/lib/mockArtifact.ts +++ /dev/null @@ -1,51 +0,0 @@ -import fs from 'node:fs/promises'; -import path from 'node:path'; -import AdmZip from 'adm-zip'; -import type { Candidate, SearchParams } from '../types/index.js'; -import { hashString, seeded } from './deterministic.js'; - -function buildSrt(title: string, season?: number, episode?: number): string { - const ep = season && episode ? ` S${String(season).padStart(2, '0')}E${String(episode).padStart(2, '0')}` : ''; - return `1\n00:00:01,000 --> 00:00:04,000\n${title}${ep} satir 1\n\n2\n00:00:05,000 --> 00:00:08,000\n${title}${ep} satir 2\n\n3\n00:00:09,000 --> 00:00:12,000\n${title}${ep} satir 3\n`; -} - -function buildAss(title: string): string { - return `[Script Info]\nTitle: ${title}\n[Events]\nDialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,Ass satiri\n`; -} - -export async function generateMockArtifact(candidate: Candidate, params: SearchParams, jobToken: string, downloadDir: string): Promise<{ type: 'archive' | 'direct'; filePath: string }> { - await fs.mkdir(downloadDir, { recursive: true }); - const seed = hashString(`${jobToken}|${params.title}|${candidate.id}`); - const rnd = seeded(seed); - - if (candidate.downloadType === 'direct') { - const filePath = path.join(downloadDir, `${candidate.id}.srt`); - await fs.writeFile(filePath, buildSrt(params.title, params.season, params.episode), 'utf8'); - return { type: 'direct', filePath }; - } - - const zip = new AdmZip(); - if (params.type === 'tv') { - const s = params.season ?? 1; - const e = params.episode ?? 1; - const base = params.title.replace(/\s+/g, '.'); - zip.addFile(`${base}.S${String(s).padStart(2, '0')}E${String(e).padStart(2, '0')}.1080p.srt`, Buffer.from(buildSrt(params.title, s, e))); - zip.addFile(`${base}.S${String(s).padStart(2, '0')}E${String(e + 1).padStart(2, '0')}.srt`, Buffer.from(buildSrt(params.title, s, e + 1))); - zip.addFile(`${base}.S${String(s).padStart(2, '0')}E${String(Math.max(1, e - 1)).padStart(2, '0')}.srt`, Buffer.from(buildSrt(params.title, s, Math.max(1, e - 1)))); - if (rnd() > 0.5) { - zip.addFile(`${base}.S${String(s).padStart(2, '0')}E${String(e).padStart(2, '0')}.ass`, Buffer.from(buildAss(params.title))); - } - } else { - const name = `${params.title.replace(/\s+/g, '.')}.${params.year ?? 2020}`; - zip.addFile(`${name}.tr.srt`, Buffer.from(buildSrt(params.title))); - if (rnd() > 0.3) { - zip.addFile(`${name}.txt`, Buffer.from('this is not subtitle')); - } - } - - zip.addFile('invalid.bin', Buffer.from([0, 159, 255, 0, 18])); - - const archivePath = path.join(downloadDir, `${candidate.id}.zip`); - zip.writeZip(archivePath); - return { type: 'archive', filePath: archivePath }; -} diff --git a/services/api/src/lib/subtitleEngine.ts b/services/api/src/lib/subtitleEngine.ts index df1fd14..e57be5f 100644 --- a/services/api/src/lib/subtitleEngine.ts +++ b/services/api/src/lib/subtitleEngine.ts @@ -60,7 +60,7 @@ export async function searchSubtitles(input: SearchParams) { level: 'info', step: 'TA_SEARCH_PARSED', message: `TurkceAltyazi candidates parsed`, - meta: { total: c.length, real: realCount, mock: c.length - realCount } + meta: { total: c.length, real: realCount } }); } allCandidates.push(...c); diff --git a/services/api/src/lib/taLog.ts b/services/api/src/lib/taLog.ts new file mode 100644 index 0000000..84e0fb1 --- /dev/null +++ b/services/api/src/lib/taLog.ts @@ -0,0 +1,21 @@ +import { env } from '../config/env.js'; + +function oneLine(input: unknown): string { + if (input === undefined || input === null) return ''; + return String(input).replace(/\s+/g, ' ').trim(); +} + +export function taInfo(step: string, message: string, meta?: Record) { + if (!env.enableTaStepLogs) return; + const base = `[TA] step=${oneLine(step)} msg="${oneLine(message)}"`; + const metaPart = meta ? ` meta=${oneLine(JSON.stringify(meta))}` : ''; + console.log(`${base}${metaPart}`); +} + +export function taError(step: string, error: unknown, meta?: Record) { + if (!env.enableTaStepLogs) return; + const reason = error instanceof Error ? error.message : oneLine(error); + const base = `[TA][ERROR] step=${oneLine(step)} reason="${oneLine(reason)}"`; + const metaPart = meta ? ` meta=${oneLine(JSON.stringify(meta))}` : ''; + console.error(`${base}${metaPart}`); +} diff --git a/services/api/src/lib/turkcealtyaziReal.ts b/services/api/src/lib/turkcealtyaziReal.ts index 0d2aa38..c7ccb9d 100644 --- a/services/api/src/lib/turkcealtyaziReal.ts +++ b/services/api/src/lib/turkcealtyaziReal.ts @@ -1,8 +1,10 @@ import axios from 'axios'; import * as cheerio from 'cheerio'; import { URL } from 'node:url'; +import { Buffer } from 'node:buffer'; import { env } from '../config/env.js'; import type { SearchParams } from '../types/index.js'; +import { taError, taInfo } from './taLog.js'; export interface RealTaCandidate { id: string; @@ -29,15 +31,64 @@ function sleep(ms: number) { return new Promise((resolve) => setTimeout(resolve, ms)); } -async function getWithRetry(url: string, retries = 2): Promise { +interface HttpResultText { + body: string; + finalUrl: string; + setCookie: string[]; +} + +interface HttpResultBinary { + body: Buffer; + finalUrl: string; + setCookie: string[]; + contentType?: string; +} + +function parseSetCookie(setCookie: string[]): Map { + const out = new Map(); + for (const raw of setCookie) { + const first = raw.split(';')[0]?.trim(); + if (!first) continue; + const idx = first.indexOf('='); + if (idx <= 0) continue; + const k = first.slice(0, idx).trim(); + const v = first.slice(idx + 1).trim(); + if (k) out.set(k, v); + } + return out; +} + +function cookieHeader(cookies: Map): string { + return [...cookies.entries()].map(([k, v]) => `${k}=${v}`).join('; '); +} + +function mergeCookies(target: Map, setCookie: string[]) { + const parsed = parseSetCookie(setCookie); + for (const [k, v] of parsed.entries()) target.set(k, v); +} + +async function getWithRetry(url: string, retries = 2, cookies?: Map): Promise { let lastError: unknown; for (let i = 0; i <= retries; i++) { try { if (i > 0) await sleep(250 * i); - const res = await client.get(url); - return typeof res.data === 'string' ? res.data : String(res.data); + taInfo('HTTP_GET_START', 'HTTP GET started', { url, attempt: i + 1, retries: retries + 1 }); + const res = await client.get(url, { + headers: cookies && cookies.size > 0 ? { cookie: cookieHeader(cookies) } : undefined + }); + taInfo('HTTP_GET_RESULT', 'HTTP GET completed', { + url, + finalUrl: (res.request as any)?.res?.responseUrl || url, + contentType: res.headers['content-type'] + }); + return { + body: typeof res.data === 'string' ? res.data : String(res.data), + finalUrl: (res.request as any)?.res?.responseUrl || url, + setCookie: Array.isArray(res.headers['set-cookie']) ? res.headers['set-cookie'] : [] + }; } catch (err) { lastError = err; + taError('HTTP_GET_FAILED', err, { url, attempt: i + 1, retries: retries + 1 }); } } throw lastError; @@ -56,110 +107,252 @@ function abs(base: string, maybeRelative: string): string { return new URL(maybeRelative, base).toString(); } -function parseCandidateNodes(html: string, baseUrl: string): RealTaCandidate[] { +function normalizeText(input: string): string { + return input + .toLowerCase() + .replace(/ç/g, 'c') + .replace(/ğ/g, 'g') + .replace(/ı/g, 'i') + .replace(/ö/g, 'o') + .replace(/ş/g, 's') + .replace(/ü/g, 'u') + .replace(/[^a-z0-9\s]/g, ' ') + .replace(/\s+/g, ' ') + .trim(); +} + +function tokenize(input: string): string[] { + return normalizeText(input) + .split(/\s+/) + .filter(Boolean); +} + +function buildFindQuery(params: SearchParams): string { + const toks = tokenize(params.title).filter((t) => !/^\d+$/.test(t)); + return toks.slice(0, 2).join(' '); +} + +function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null { const $ = cheerio.load(html); - const results: RealTaCandidate[] = []; + const wantedYear = params.year; + const wantedTitleTokens = tokenize(params.title); + const links: Array<{ url: string; title: string; year?: number; score: number }> = []; - $('a[href]').each((_, el) => { + $('a[href^="/mov/"]').each((_, el) => { const href = ($(el).attr('href') || '').trim(); - const text = $(el).text().replace(/\s+/g, ' ').trim(); + if (!href) return; - if (!href || text.length < 3) return; - const looksLikeSubtitle = /(altyazi|subtitle|sub|s\d{1,2}e\d{1,2}|\b\d{4}\b)/i.test(text + ' ' + href); - if (!looksLikeSubtitle) return; + const title = ($(el).attr('title') || $(el).text() || '').replace(/\s+/g, ' ').trim(); + if (!title) return; - const full = abs(baseUrl, href); - if (!/turkcealtyazi\.org/i.test(full)) return; + const containerText = ($(el).closest('div').parent().text() || '').replace(/\s+/g, ' ').trim(); + const yearMatch = containerText.match(/\((19\d{2}|20\d{2})\)/); + const year = yearMatch ? Number(yearMatch[1]) : undefined; - const id = `ta-real-${Buffer.from(full).toString('base64').slice(0, 18)}`; - const lowered = (text + ' ' + href).toLowerCase(); + const titleTokens = tokenize(title); + const overlap = wantedTitleTokens.filter((t) => titleTokens.includes(t)).length; + let score = overlap; + if (wantedYear && year === wantedYear) score += 10; - results.push({ - id, - title: text, - detailUrl: full, - lang: /\btr\b|turkce|türkçe/i.test(lowered) ? 'tr' : 'tr', - releaseHints: normalizeReleaseHints(text), - isHI: /\bhi\b|isitme|hearing/i.test(lowered), - isForced: /forced|zorunlu/i.test(lowered) + links.push({ + url: abs(baseUrl, href), + title, + year, + score }); }); - const uniq = new Map(); - for (const r of results) { - if (!uniq.has(r.detailUrl)) uniq.set(r.detailUrl, r); + const dedup = new Map(); + for (const item of links) { + const prev = dedup.get(item.url); + if (!prev || item.score > prev.score) dedup.set(item.url, item); } - return [...uniq.values()].slice(0, 12); + const ordered = [...dedup.values()].sort((a, b) => b.score - a.score); + if (ordered.length === 0) return null; + + const best = ordered[0]; + if (wantedYear && best.year && best.year !== wantedYear) return null; + return { movieUrl: best.url, movieTitle: best.title }; +} + +function pickSubPageFromMovieDetail(html: string, movieUrl: string, params: SearchParams): { subUrl: string; title: string; releaseHints: string[]; isHI: boolean } | null { + const $ = cheerio.load(html); + const wantedRelease = normalizeText(params.release || ''); + const rows = $('.altsonsez2'); + const candidates: Array<{ subUrl: string; title: string; releaseHints: string[]; isHI: boolean; score: number }> = []; + + rows.each((_, row) => { + const linkEl = $(row).find('a[href^="/sub/"]').first(); + const href = (linkEl.attr('href') || '').trim(); + if (!href) return; + + const title = (linkEl.text() || '').replace(/\s+/g, ' ').trim() || (linkEl.attr('title') || '').trim(); + const ripText = ($(row).find('.ripdiv').text() || '').replace(/\s+/g, ' ').trim(); + const relHints = normalizeReleaseHints(ripText); + const normalizedRip = normalizeText(ripText); + const isHI = /(sdh|hearing|isitme|hi)/i.test(ripText); + + let score = 0; + if (wantedRelease) { + if (normalizedRip.includes(wantedRelease)) score += 20; + const releaseToken = wantedRelease.split(/\s+/).find(Boolean); + if (releaseToken && normalizedRip.includes(releaseToken)) score += 15; + } else { + score += 1; + } + + if ($(row).find('.flagtr').length > 0) score += 3; + + candidates.push({ + subUrl: abs(movieUrl, href), + title, + releaseHints: relHints, + isHI, + score + }); + }); + + if (candidates.length === 0) return null; + const picked = candidates.sort((a, b) => b.score - a.score)[0]; + if (wantedRelease && picked.score < 10) return null; + return picked; } export async function searchTurkceAltyaziReal(params: SearchParams): Promise { - const q = [params.title, params.year, params.type === 'tv' ? `S${String(params.season ?? 1).padStart(2, '0')}E${String(params.episode ?? 1).padStart(2, '0')}` : ''] - .filter(Boolean) - .join(' '); + if (params.type !== 'movie') return []; + const q = buildFindQuery(params); + if (!q) return []; - const candidatesPages = [ - `${env.turkcealtyaziBaseUrl}/arama?q=${encodeURIComponent(q)}`, - `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(q)}` - ]; - - const merged: RealTaCandidate[] = []; - for (const url of candidatesPages) { - try { - await sleep(env.turkcealtyaziMinDelayMs); - const html = await getWithRetry(url, 2); - merged.push(...parseCandidateNodes(html, env.turkcealtyaziBaseUrl)); - if (merged.length >= 8) break; - } catch { - // bir sonraki endpoint denenecek - } - } - - const uniq = new Map(); - for (const item of merged) { - if (!uniq.has(item.detailUrl)) uniq.set(item.detailUrl, item); - } - - return [...uniq.values()].slice(0, 10); -} - -export async function resolveTurkceAltyaziDownloadUrl(detailUrl: string): Promise { - await sleep(env.turkcealtyaziMinDelayMs); - const html = await getWithRetry(detailUrl, 2); - const $ = cheerio.load(html); - - const linkCandidates: string[] = []; - - $('a[href]').each((_, el) => { - const href = ($(el).attr('href') || '').trim(); - const text = $(el).text().trim(); - if (!href) return; - - const looksDownload = /(indir|download|\.zip|\.rar|\.7z|\.srt|\.ass)/i.test(`${href} ${text}`); - if (!looksDownload) return; - - linkCandidates.push(abs(detailUrl, href)); + const searchUrl = `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(q)}`; + const cookies = new Map(); + taInfo('TA_SEARCH_START', 'TurkceAltyazi search started', { + title: params.title, + year: params.year, + release: params.release, + query: q, + searchUrl }); - const preferred = - linkCandidates.find((l) => /\.(zip|rar|7z)(\?|$)/i.test(l)) || - linkCandidates.find((l) => /\.(srt|ass)(\?|$)/i.test(l)) || - linkCandidates[0]; + try { + await sleep(env.turkcealtyaziMinDelayMs); + const searchRes = await getWithRetry(searchUrl, 2, cookies); + mergeCookies(cookies, searchRes.setCookie); + const pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl); + if (!pickedMovie) { + taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', { title: params.title, year: params.year, query: q }); + return []; + } + taInfo('TA_MOVIE_SELECTED', 'Movie detail page selected', { movieUrl: pickedMovie.movieUrl, movieTitle: pickedMovie.movieTitle }); - if (!preferred) { - throw new Error('TA detail page download link parse failed'); + await sleep(env.turkcealtyaziMinDelayMs); + const movieRes = await getWithRetry(pickedMovie.movieUrl, 2, cookies); + mergeCookies(cookies, movieRes.setCookie); + const pickedSub = pickSubPageFromMovieDetail(movieRes.body, pickedMovie.movieUrl, params); + if (!pickedSub) { + taInfo('TA_SEARCH_RESULT', 'Subtitle sub-page not matched by release', { + movieUrl: pickedMovie.movieUrl, + release: params.release + }); + return []; + } + taInfo('TA_SUB_SELECTED', 'Subtitle sub-page selected', { + subUrl: pickedSub.subUrl, + releaseHints: pickedSub.releaseHints + }); + + const id = `ta-real-${Buffer.from(pickedSub.subUrl).toString('base64').slice(0, 18)}`; + const result = [{ + id, + title: pickedSub.title || pickedMovie.movieTitle, + detailUrl: pickedSub.subUrl, + lang: 'tr', + releaseHints: pickedSub.releaseHints, + isHI: pickedSub.isHI, + isForced: false + }]; + taInfo('TA_SEARCH_RESULT', 'TurkceAltyazi search completed', { candidateCount: result.length, subUrl: pickedSub.subUrl }); + return result; + } catch (err) { + taError('TA_SEARCH_FAILED', err, { title: params.title, year: params.year, release: params.release, query: q }); + throw err; } - - return preferred; } -export async function downloadTurkceAltyaziFile(url: string): Promise<{ buffer: Buffer; finalUrl: string; contentType?: string }> { - await sleep(env.turkcealtyaziMinDelayMs); - const res = await client.get(url, { responseType: 'arraybuffer' }); - const buffer = Buffer.from(res.data); - return { - buffer, - finalUrl: (res.request as any)?.res?.responseUrl || url, - contentType: res.headers['content-type'] - }; +function parseDownloadForm(html: string): { idid: string; altid: string; sidid: string } | null { + const $ = cheerio.load(html); + const idid = ($('input[name="idid"]').attr('value') || '').trim(); + const altid = ($('input[name="altid"]').attr('value') || '').trim(); + const sidid = ($('input[name="sidid"]').attr('value') || '').trim(); + if (!idid || !altid || !sidid) return null; + return { idid, altid, sidid }; +} + +async function postIndWithRetry(subPageUrl: string, payload: { idid: string; altid: string; sidid: string }, cookies: Map, retries = 2): Promise { + let lastError: unknown; + for (let i = 0; i <= retries; i++) { + try { + if (i > 0) await sleep(250 * i); + const form = new URLSearchParams(payload).toString(); + const indUrl = `${env.turkcealtyaziBaseUrl}/ind`; + taInfo('TA_IND_POST_START', 'POST /ind started', { subPageUrl, indUrl, attempt: i + 1, retries: retries + 1, altid: payload.altid }); + const res = await client.post(indUrl, form, { + responseType: 'arraybuffer', + headers: { + 'content-type': 'application/x-www-form-urlencoded', + origin: env.turkcealtyaziBaseUrl, + referer: subPageUrl, + cookie: cookieHeader(cookies) + } + }); + + return { + body: Buffer.from(res.data), + finalUrl: (res.request as any)?.res?.responseUrl || indUrl, + setCookie: Array.isArray(res.headers['set-cookie']) ? res.headers['set-cookie'] : [], + contentType: res.headers['content-type'] + }; + } catch (err) { + lastError = err; + taError('TA_IND_POST_FAILED', err, { subPageUrl, attempt: i + 1, retries: retries + 1 }); + } + } + throw lastError; +} + +export async function downloadTurkceAltyaziFile(subPageUrl: string): Promise<{ buffer: Buffer; finalUrl: string; contentType?: string }> { + const cookies = new Map(); + taInfo('TA_DOWNLOAD_START', 'TurkceAltyazi subtitle download started', { subPageUrl }); + + try { + await sleep(env.turkcealtyaziMinDelayMs); + const subPageRes = await getWithRetry(subPageUrl, 2, cookies); + mergeCookies(cookies, subPageRes.setCookie); + const form = parseDownloadForm(subPageRes.body); + if (!form) { + const err = new Error('TA sub page download form parse failed'); + taError('TA_FORM_PARSE_FAILED', err, { subPageUrl }); + throw err; + } + taInfo('TA_FORM_PARSED', 'Download form parsed', { subPageUrl, altid: form.altid, idid: form.idid }); + + await sleep(env.turkcealtyaziMinDelayMs); + const res = await postIndWithRetry(subPageUrl, form, cookies, 2); + mergeCookies(cookies, res.setCookie); + + taInfo('TA_DOWNLOAD_RESULT', 'Subtitle download completed', { + subPageUrl, + finalUrl: res.finalUrl, + contentType: res.contentType, + bytes: res.body.byteLength + }); + return { + buffer: res.body, + finalUrl: res.finalUrl, + contentType: res.contentType + }; + } catch (err) { + taError('TA_DOWNLOAD_FAILED', err, { subPageUrl }); + throw err; + } } diff --git a/services/api/src/providers/OpenSubtitlesProvider.ts b/services/api/src/providers/OpenSubtitlesProvider.ts index 3d9b56a..1d56cb4 100644 --- a/services/api/src/providers/OpenSubtitlesProvider.ts +++ b/services/api/src/providers/OpenSubtitlesProvider.ts @@ -1,45 +1,12 @@ -import type { Candidate, SearchParams, SubtitleProvider } from '../types/index.js'; -import { generateMockArtifact } from '../lib/mockArtifact.js'; -import { hashString, seeded } from '../lib/deterministic.js'; -import { env } from '../config/env.js'; +import type { Candidate, DownloadedArtifact, SearchParams, SubtitleProvider } from '../types/index.js'; export class OpenSubtitlesProvider implements SubtitleProvider { - async search(params: SearchParams): Promise { - // TODO(v2): real OpenSubtitles API integration. - const key = `${params.title}|${params.year}|${params.season}|${params.episode}|os`; - const rnd = seeded(hashString(key)); - const base = params.title.replace(/\s+/g, '.'); - const directForMovie = params.type === 'movie' && rnd() > 0.4; - return [ - { - id: `os-${hashString(`${key}-a`)}`, - provider: 'opensubtitles', - displayName: `OS ${base} Official`, - downloadType: directForMovie ? 'direct' : 'archiveZip', - downloadUrl: directForMovie ? `mock://os/${base}/direct.srt` : `mock://os/${base}/archive.zip`, - lang: 'tr', - releaseHints: ['1080p', rnd() > 0.5 ? 'x265' : 'x264', 'flux'], - scoreHints: ['api_match'], - isHI: rnd() > 0.8, - isForced: rnd() > 0.92 - }, - { - id: `os-${hashString(`${key}-b`)}`, - provider: 'opensubtitles', - displayName: `OS ${base} Backup`, - downloadType: 'archiveZip', - downloadUrl: `mock://os/${base}/backup.zip`, - lang: 'tr', - releaseHints: ['720p', 'x264'], - scoreHints: ['backup'], - isHI: false, - isForced: false - } - ]; + async search(_params: SearchParams): Promise { + // Real OpenSubtitles entegrasyonu tamamlanana kadar provider pasif. + return []; } - async download(candidate: Candidate, params: SearchParams, jobToken: string) { - const artifact = await generateMockArtifact(candidate, params, jobToken, `${env.tempRoot}/${jobToken}/download`); - return { type: artifact.type, filePath: artifact.filePath, candidateId: candidate.id }; + async download(_candidate: Candidate, _params: SearchParams, _jobToken: string): Promise { + throw new Error('OpenSubtitles real download not implemented'); } } diff --git a/services/api/src/providers/TurkceAltyaziProvider.ts b/services/api/src/providers/TurkceAltyaziProvider.ts index 5a7c2bc..473033a 100644 --- a/services/api/src/providers/TurkceAltyaziProvider.ts +++ b/services/api/src/providers/TurkceAltyaziProvider.ts @@ -1,14 +1,12 @@ import fs from 'node:fs/promises'; import path from 'node:path'; -import type { Candidate, SearchParams, SubtitleProvider } from '../types/index.js'; -import { generateMockArtifact } from '../lib/mockArtifact.js'; -import { hashString, seeded } from '../lib/deterministic.js'; +import type { Candidate, DownloadedArtifact, SearchParams, SubtitleProvider } from '../types/index.js'; import { env } from '../config/env.js'; import { downloadTurkceAltyaziFile, - resolveTurkceAltyaziDownloadUrl, searchTurkceAltyaziReal } from '../lib/turkcealtyaziReal.js'; +import { taError, taInfo } from '../lib/taLog.js'; function extensionFromDownload(url: string, contentType?: string): 'zip' | 'rar' | '7z' | 'srt' | 'ass' { const lowerUrl = url.toLowerCase(); @@ -22,84 +20,72 @@ function extensionFromDownload(url: string, contentType?: string): 'zip' | 'rar' export class TurkceAltyaziProvider implements SubtitleProvider { async search(params: SearchParams): Promise { - if (env.enableTurkcealtyaziReal) { - try { - const real = await searchTurkceAltyaziReal(params); - if (real.length > 0) { - return real.map((item, index) => ({ - id: item.id || `ta-real-${index}`, - provider: 'turkcealtyazi', - displayName: item.title, - downloadType: 'archiveZip', - downloadUrl: item.detailUrl, - lang: item.lang || 'tr', - releaseHints: item.releaseHints, - scoreHints: ['real_provider'], - isHI: item.isHI, - isForced: item.isForced - })); - } - } catch (err) { - if (!env.turkcealtyaziAllowMockFallback) { - throw err; - } - } - } + if (!env.enableTurkcealtyaziReal) return []; - const key = `${params.title}|${params.year}|${params.season}|${params.episode}|ta`; - const rnd = seeded(hashString(key)); - const base = params.title.replace(/\s+/g, '.'); - return [ - { - id: `ta-${hashString(`${key}-a`)}`, + taInfo('TA_PROVIDER_SEARCH_START', 'Provider search started', { + title: params.title, + year: params.year, + release: params.release + }); + try { + const real = await searchTurkceAltyaziReal(params); + taInfo('TA_PROVIDER_SEARCH_RESULT', 'Provider search completed', { candidateCount: real.length }); + return real.map((item, index) => ({ + id: item.id || `ta-real-${index}`, provider: 'turkcealtyazi', - displayName: `TA ${base} Ana Surum`, + displayName: item.title, downloadType: 'archiveZip', - downloadUrl: `mock://ta/${base}/a.zip`, - lang: 'tr', - releaseHints: [rnd() > 0.4 ? '1080p' : '720p', 'x265', 'flux'], - scoreHints: ['trusted', 'crowd'], - isHI: rnd() > 0.7, - isForced: false - }, - { - id: `ta-${hashString(`${key}-b`)}`, - provider: 'turkcealtyazi', - displayName: `TA ${base} Alternatif`, - downloadType: 'archiveZip', - downloadUrl: `mock://ta/${base}/b.zip`, - lang: 'tr', - releaseHints: ['webrip', 'x264'], - scoreHints: ['alt'], - isHI: false, - isForced: false - } - ]; + downloadUrl: item.detailUrl, + lang: item.lang || 'tr', + releaseHints: item.releaseHints, + scoreHints: ['real_provider'], + isHI: item.isHI, + isForced: item.isForced + })); + } catch (err) { + taError('TA_PROVIDER_SEARCH_FAILED', err, { title: params.title, year: params.year, release: params.release }); + throw err; + } } - async download(candidate: Candidate, params: SearchParams, jobToken: string) { - if (env.enableTurkcealtyaziReal && /^https?:\/\//i.test(candidate.downloadUrl)) { - const downloadDir = `${env.tempRoot}/${jobToken}/download`; - await fs.mkdir(downloadDir, { recursive: true }); - const trace: Array<{ level: 'info' | 'warn' | 'error'; step: string; message: string; meta?: any }> = []; + async download(candidate: Candidate, _params: SearchParams, jobToken: string): Promise { + if (!/^https?:\/\//i.test(candidate.downloadUrl)) { + throw new Error('TurkceAltyazi candidate download URL must be http(s)'); + } - trace.push({ level: 'info', step: 'TA_DETAIL_FETCHED', message: candidate.downloadUrl }); - const resolved = await resolveTurkceAltyaziDownloadUrl(candidate.downloadUrl); - trace.push({ level: 'info', step: 'TA_DOWNLOAD_URL_RESOLVED', message: resolved }); - const downloaded = await downloadTurkceAltyaziFile(resolved); + const downloadDir = `${env.tempRoot}/${jobToken}/download`; + await fs.mkdir(downloadDir, { recursive: true }); + const trace: Array<{ level: 'info' | 'warn' | 'error'; step: string; message: string; meta?: any }> = []; + + taInfo('TA_PROVIDER_DOWNLOAD_START', 'Provider download started', { + candidateId: candidate.id, + subUrl: candidate.downloadUrl, + jobToken + }); + try { + trace.push({ level: 'info', step: 'TA_SUB_PAGE_FETCHED', message: candidate.downloadUrl }); + const downloaded = await downloadTurkceAltyaziFile(candidate.downloadUrl); + trace.push({ level: 'info', step: 'TA_IND_POST_DONE', message: downloaded.finalUrl }); const ext = extensionFromDownload(downloaded.finalUrl, downloaded.contentType); const filePath = path.join(downloadDir, `${candidate.id}.${ext}`); await fs.writeFile(filePath, downloaded.buffer); + const type: 'direct' | 'archive' = ext === 'srt' || ext === 'ass' ? 'direct' : 'archive'; + taInfo('TA_PROVIDER_DOWNLOAD_RESULT', 'Provider download completed', { + candidateId: candidate.id, + filePath, + type, + ext + }); return { - type: ext === 'srt' || ext === 'ass' ? 'direct' : 'archive', + type, filePath, candidateId: candidate.id, trace }; + } catch (err) { + taError('TA_PROVIDER_DOWNLOAD_FAILED', err, { candidateId: candidate.id, subUrl: candidate.downloadUrl, jobToken }); + throw err; } - - const artifact = await generateMockArtifact(candidate, params, jobToken, `${env.tempRoot}/${jobToken}/download`); - return { type: artifact.type, filePath: artifact.filePath, candidateId: candidate.id }; } } diff --git a/services/api/test/scoring.test.ts b/services/api/test/scoring.test.ts index 6ca43a0..a1abd4c 100644 --- a/services/api/test/scoring.test.ts +++ b/services/api/test/scoring.test.ts @@ -6,7 +6,7 @@ const candidate: any = { provider: 'opensubtitles', displayName: 'x', downloadType: 'archiveZip', - downloadUrl: 'mock://x', + downloadUrl: 'https://example.com/subtitle.zip', lang: 'tr', releaseHints: ['1080p', 'x265', 'flux'], scoreHints: [], diff --git a/services/core/src/workers/pipeline.ts b/services/core/src/workers/pipeline.ts index 446251f..8c86325 100644 --- a/services/core/src/workers/pipeline.ts +++ b/services/core/src/workers/pipeline.ts @@ -165,7 +165,7 @@ export function startWorkers(): void { mediaFileId, bestPath: data.bestPath, lang: (payload.languages[0] ?? 'tr') as string, - source: data.source ?? 'mock', + source: data.source ?? 'unknown', confidence: data.confidence ?? 0.8 } satisfies FinalizeData); return;