feat(api): turkcealtyazi gerçek modu desteği ekle

TurkceAltyazi sağlayıcısı için gerçek HTTP istekleri ve HTML
parsing özelliği eklendi. Özellik bayrak ile açılıp kapatılabilir
ve hata durumunda mock moduna dönüş yapabilir.

Yapılan değişiklikler:
- Yeni ortam değişkenleri eklendi (ENABLE_TURKCEALTYAZI_REAL, vb.)
- axios ve cheerio bağımlılıkları eklendi
- Gerçek indirme ve arama işlemleri için turkcealtyaziReal.ts modülü eklendi
- Dokümantasyon güncellendi
- Detaylı trace logging desteği eklendi
This commit is contained in:
2026-02-16 09:29:01 +03:00
parent a13db011fb
commit 9f07ff445e
13 changed files with 5196 additions and 7 deletions

View File

@@ -7,5 +7,10 @@ export const env = {
port: Number(process.env.API_PORT ?? 3002),
tempRoot: process.env.TEMP_ROOT ?? '/temp',
enableApiKey: process.env.ENABLE_API_KEY === 'true',
apiKey: process.env.API_KEY ?? ''
apiKey: process.env.API_KEY ?? '',
enableTurkcealtyaziReal: process.env.ENABLE_TURKCEALTYAZI_REAL === 'true',
turkcealtyaziAllowMockFallback: process.env.TURKCEALTYAZI_ALLOW_MOCK_FALLBACK !== 'false',
turkcealtyaziBaseUrl: process.env.TURKCEALTYAZI_BASE_URL ?? 'https://turkcealtyazi.org',
turkcealtyaziTimeoutMs: Number(process.env.TURKCEALTYAZI_TIMEOUT_MS ?? 12000),
turkcealtyaziMinDelayMs: Number(process.env.TURKCEALTYAZI_MIN_DELAY_MS ?? 300)
};

View File

@@ -14,7 +14,10 @@ import { chooseBest, scoreCandidateFile } from './scoring.js';
const execFileAsync = promisify(execFile);
const providers: SubtitleProvider[] = [new TurkceAltyaziProvider(), new OpenSubtitlesProvider()];
const providerEntries: Array<{ name: Candidate['provider']; impl: SubtitleProvider }> = [
{ name: 'turkcealtyazi', impl: new TurkceAltyaziProvider() },
{ name: 'opensubtitles', impl: new OpenSubtitlesProvider() }
];
function defaultLimits() {
return { maxFiles: 300, maxTotalBytes: 250 * 1024 * 1024, maxSingleBytes: 10 * 1024 * 1024 };
@@ -44,18 +47,35 @@ export async function searchSubtitles(input: SearchParams) {
const dirs = await ensureJobDirs(jobToken);
const allCandidates: Candidate[] = [];
for (const p of providers) {
const c = await p.search(input);
for (const p of providerEntries) {
if (p.name === 'turkcealtyazi') {
trace.push({ level: 'info', step: 'TA_SEARCH_REQUEST', message: 'TurkceAltyazi provider search started' });
}
trace.push({ level: 'info', step: 'SUBTITLE_SEARCH_STARTED', message: `Provider search started: ${p.name}` });
const c = await p.impl.search(input);
trace.push({ level: 'info', step: 'SUBTITLE_SEARCH_DONE', message: `Provider search done: ${p.name}`, meta: { count: c.length } });
if (p.name === 'turkcealtyazi') {
const realCount = c.filter((item) => item.scoreHints.includes('real_provider')).length;
trace.push({
level: 'info',
step: 'TA_SEARCH_PARSED',
message: `TurkceAltyazi candidates parsed`,
meta: { total: c.length, real: realCount, mock: c.length - realCount }
});
}
allCandidates.push(...c);
}
const scored: any[] = [];
for (const candidate of allCandidates) {
const provider = providers.find((p: any) => p.constructor.name.toLowerCase().includes(candidate.provider === 'turkcealtyazi' ? 'turkce' : 'open'));
const provider = providerEntries.find((p) => p.name === candidate.provider)?.impl;
if (!provider) continue;
const dl = await provider.download(candidate, input, jobToken);
if (Array.isArray(dl.trace)) {
trace.push(...dl.trace);
}
trace.push({ level: 'info', step: 'ARCHIVE_DOWNLOADED', message: `${candidate.provider}:${candidate.id}`, meta: { path: dl.filePath, type: dl.type } });
let files: string[] = [];

View File

@@ -0,0 +1,165 @@
import axios from 'axios';
import * as cheerio from 'cheerio';
import { URL } from 'node:url';
import { env } from '../config/env.js';
import type { SearchParams } from '../types/index.js';
export interface RealTaCandidate {
id: string;
title: string;
detailUrl: string;
lang: string;
releaseHints: string[];
isHI: boolean;
isForced: boolean;
}
const client = axios.create({
timeout: env.turkcealtyaziTimeoutMs,
maxRedirects: 5,
headers: {
'user-agent':
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'accept-language': 'tr-TR,tr;q=0.9,en;q=0.8'
}
});
function sleep(ms: number) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
async function getWithRetry(url: string, retries = 2): Promise<string> {
let lastError: unknown;
for (let i = 0; i <= retries; i++) {
try {
if (i > 0) await sleep(250 * i);
const res = await client.get(url);
return typeof res.data === 'string' ? res.data : String(res.data);
} catch (err) {
lastError = err;
}
}
throw lastError;
}
function normalizeReleaseHints(raw: string): string[] {
return raw
.toLowerCase()
.split(/[^a-z0-9]+/)
.filter((t) => t.length > 1)
.filter((t, i, arr) => arr.indexOf(t) === i)
.slice(0, 10);
}
function abs(base: string, maybeRelative: string): string {
return new URL(maybeRelative, base).toString();
}
function parseCandidateNodes(html: string, baseUrl: string): RealTaCandidate[] {
const $ = cheerio.load(html);
const results: RealTaCandidate[] = [];
$('a[href]').each((_, el) => {
const href = ($(el).attr('href') || '').trim();
const text = $(el).text().replace(/\s+/g, ' ').trim();
if (!href || text.length < 3) return;
const looksLikeSubtitle = /(altyazi|subtitle|sub|s\d{1,2}e\d{1,2}|\b\d{4}\b)/i.test(text + ' ' + href);
if (!looksLikeSubtitle) return;
const full = abs(baseUrl, href);
if (!/turkcealtyazi\.org/i.test(full)) return;
const id = `ta-real-${Buffer.from(full).toString('base64').slice(0, 18)}`;
const lowered = (text + ' ' + href).toLowerCase();
results.push({
id,
title: text,
detailUrl: full,
lang: /\btr\b|turkce|türkçe/i.test(lowered) ? 'tr' : 'tr',
releaseHints: normalizeReleaseHints(text),
isHI: /\bhi\b|isitme|hearing/i.test(lowered),
isForced: /forced|zorunlu/i.test(lowered)
});
});
const uniq = new Map<string, RealTaCandidate>();
for (const r of results) {
if (!uniq.has(r.detailUrl)) uniq.set(r.detailUrl, r);
}
return [...uniq.values()].slice(0, 12);
}
export async function searchTurkceAltyaziReal(params: SearchParams): Promise<RealTaCandidate[]> {
const q = [params.title, params.year, params.type === 'tv' ? `S${String(params.season ?? 1).padStart(2, '0')}E${String(params.episode ?? 1).padStart(2, '0')}` : '']
.filter(Boolean)
.join(' ');
const candidatesPages = [
`${env.turkcealtyaziBaseUrl}/arama?q=${encodeURIComponent(q)}`,
`${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(q)}`
];
const merged: RealTaCandidate[] = [];
for (const url of candidatesPages) {
try {
await sleep(env.turkcealtyaziMinDelayMs);
const html = await getWithRetry(url, 2);
merged.push(...parseCandidateNodes(html, env.turkcealtyaziBaseUrl));
if (merged.length >= 8) break;
} catch {
// bir sonraki endpoint denenecek
}
}
const uniq = new Map<string, RealTaCandidate>();
for (const item of merged) {
if (!uniq.has(item.detailUrl)) uniq.set(item.detailUrl, item);
}
return [...uniq.values()].slice(0, 10);
}
export async function resolveTurkceAltyaziDownloadUrl(detailUrl: string): Promise<string> {
await sleep(env.turkcealtyaziMinDelayMs);
const html = await getWithRetry(detailUrl, 2);
const $ = cheerio.load(html);
const linkCandidates: string[] = [];
$('a[href]').each((_, el) => {
const href = ($(el).attr('href') || '').trim();
const text = $(el).text().trim();
if (!href) return;
const looksDownload = /(indir|download|\.zip|\.rar|\.7z|\.srt|\.ass)/i.test(`${href} ${text}`);
if (!looksDownload) return;
linkCandidates.push(abs(detailUrl, href));
});
const preferred =
linkCandidates.find((l) => /\.(zip|rar|7z)(\?|$)/i.test(l)) ||
linkCandidates.find((l) => /\.(srt|ass)(\?|$)/i.test(l)) ||
linkCandidates[0];
if (!preferred) {
throw new Error('TA detail page download link parse failed');
}
return preferred;
}
export async function downloadTurkceAltyaziFile(url: string): Promise<{ buffer: Buffer; finalUrl: string; contentType?: string }> {
await sleep(env.turkcealtyaziMinDelayMs);
const res = await client.get<ArrayBuffer>(url, { responseType: 'arraybuffer' });
const buffer = Buffer.from(res.data);
return {
buffer,
finalUrl: (res.request as any)?.res?.responseUrl || url,
contentType: res.headers['content-type']
};
}

View File

@@ -1,11 +1,51 @@
import fs from 'node:fs/promises';
import path from 'node:path';
import type { Candidate, SearchParams, SubtitleProvider } from '../types/index.js';
import { generateMockArtifact } from '../lib/mockArtifact.js';
import { hashString, seeded } from '../lib/deterministic.js';
import { env } from '../config/env.js';
import {
downloadTurkceAltyaziFile,
resolveTurkceAltyaziDownloadUrl,
searchTurkceAltyaziReal
} from '../lib/turkcealtyaziReal.js';
function extensionFromDownload(url: string, contentType?: string): 'zip' | 'rar' | '7z' | 'srt' | 'ass' {
const lowerUrl = url.toLowerCase();
if (lowerUrl.includes('.zip')) return 'zip';
if (lowerUrl.includes('.rar')) return 'rar';
if (lowerUrl.includes('.7z')) return '7z';
if (lowerUrl.includes('.ass')) return 'ass';
if (contentType?.includes('zip')) return 'zip';
return 'srt';
}
export class TurkceAltyaziProvider implements SubtitleProvider {
async search(params: SearchParams): Promise<Candidate[]> {
// TODO(v2): real TurkceAltyazi scraping implementation.
if (env.enableTurkcealtyaziReal) {
try {
const real = await searchTurkceAltyaziReal(params);
if (real.length > 0) {
return real.map((item, index) => ({
id: item.id || `ta-real-${index}`,
provider: 'turkcealtyazi',
displayName: item.title,
downloadType: 'archiveZip',
downloadUrl: item.detailUrl,
lang: item.lang || 'tr',
releaseHints: item.releaseHints,
scoreHints: ['real_provider'],
isHI: item.isHI,
isForced: item.isForced
}));
}
} catch (err) {
if (!env.turkcealtyaziAllowMockFallback) {
throw err;
}
}
}
const key = `${params.title}|${params.year}|${params.season}|${params.episode}|ta`;
const rnd = seeded(hashString(key));
const base = params.title.replace(/\s+/g, '.');
@@ -38,6 +78,27 @@ export class TurkceAltyaziProvider implements SubtitleProvider {
}
async download(candidate: Candidate, params: SearchParams, jobToken: string) {
if (env.enableTurkcealtyaziReal && /^https?:\/\//i.test(candidate.downloadUrl)) {
const downloadDir = `${env.tempRoot}/${jobToken}/download`;
await fs.mkdir(downloadDir, { recursive: true });
const trace: Array<{ level: 'info' | 'warn' | 'error'; step: string; message: string; meta?: any }> = [];
trace.push({ level: 'info', step: 'TA_DETAIL_FETCHED', message: candidate.downloadUrl });
const resolved = await resolveTurkceAltyaziDownloadUrl(candidate.downloadUrl);
trace.push({ level: 'info', step: 'TA_DOWNLOAD_URL_RESOLVED', message: resolved });
const downloaded = await downloadTurkceAltyaziFile(resolved);
const ext = extensionFromDownload(downloaded.finalUrl, downloaded.contentType);
const filePath = path.join(downloadDir, `${candidate.id}.${ext}`);
await fs.writeFile(filePath, downloaded.buffer);
return {
type: ext === 'srt' || ext === 'ass' ? 'direct' : 'archive',
filePath,
candidateId: candidate.id,
trace
};
}
const artifact = await generateMockArtifact(candidate, params, jobToken, `${env.tempRoot}/${jobToken}/download`);
return { type: artifact.type, filePath: artifact.filePath, candidateId: candidate.id };
}

View File

@@ -34,6 +34,7 @@ export interface DownloadedArtifact {
type: 'archive' | 'direct';
filePath: string;
candidateId: string;
trace?: TraceLog[];
}
export interface SubtitleProvider {