feat(api): turkcealtyazi gerçek modu desteği ekle
TurkceAltyazi sağlayıcısı için gerçek HTTP istekleri ve HTML parsing özelliği eklendi. Özellik bayrak ile açılıp kapatılabilir ve hata durumunda mock moduna dönüş yapabilir. Yapılan değişiklikler: - Yeni ortam değişkenleri eklendi (ENABLE_TURKCEALTYAZI_REAL, vb.) - axios ve cheerio bağımlılıkları eklendi - Gerçek indirme ve arama işlemleri için turkcealtyaziReal.ts modülü eklendi - Dokümantasyon güncellendi - Detaylı trace logging desteği eklendi
This commit is contained in:
1989
services/api/package-lock.json
generated
Normal file
1989
services/api/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -12,6 +12,8 @@
|
||||
"dependencies": {
|
||||
"@fastify/cors": "^11.0.0",
|
||||
"adm-zip": "^0.5.16",
|
||||
"axios": "^1.8.2",
|
||||
"cheerio": "^1.0.0",
|
||||
"dotenv": "^16.4.7",
|
||||
"fastify": "^5.2.1",
|
||||
"fs-extra": "^11.3.0",
|
||||
|
||||
@@ -7,5 +7,10 @@ export const env = {
|
||||
port: Number(process.env.API_PORT ?? 3002),
|
||||
tempRoot: process.env.TEMP_ROOT ?? '/temp',
|
||||
enableApiKey: process.env.ENABLE_API_KEY === 'true',
|
||||
apiKey: process.env.API_KEY ?? ''
|
||||
apiKey: process.env.API_KEY ?? '',
|
||||
enableTurkcealtyaziReal: process.env.ENABLE_TURKCEALTYAZI_REAL === 'true',
|
||||
turkcealtyaziAllowMockFallback: process.env.TURKCEALTYAZI_ALLOW_MOCK_FALLBACK !== 'false',
|
||||
turkcealtyaziBaseUrl: process.env.TURKCEALTYAZI_BASE_URL ?? 'https://turkcealtyazi.org',
|
||||
turkcealtyaziTimeoutMs: Number(process.env.TURKCEALTYAZI_TIMEOUT_MS ?? 12000),
|
||||
turkcealtyaziMinDelayMs: Number(process.env.TURKCEALTYAZI_MIN_DELAY_MS ?? 300)
|
||||
};
|
||||
|
||||
@@ -14,7 +14,10 @@ import { chooseBest, scoreCandidateFile } from './scoring.js';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
const providers: SubtitleProvider[] = [new TurkceAltyaziProvider(), new OpenSubtitlesProvider()];
|
||||
const providerEntries: Array<{ name: Candidate['provider']; impl: SubtitleProvider }> = [
|
||||
{ name: 'turkcealtyazi', impl: new TurkceAltyaziProvider() },
|
||||
{ name: 'opensubtitles', impl: new OpenSubtitlesProvider() }
|
||||
];
|
||||
|
||||
function defaultLimits() {
|
||||
return { maxFiles: 300, maxTotalBytes: 250 * 1024 * 1024, maxSingleBytes: 10 * 1024 * 1024 };
|
||||
@@ -44,18 +47,35 @@ export async function searchSubtitles(input: SearchParams) {
|
||||
const dirs = await ensureJobDirs(jobToken);
|
||||
|
||||
const allCandidates: Candidate[] = [];
|
||||
for (const p of providers) {
|
||||
const c = await p.search(input);
|
||||
for (const p of providerEntries) {
|
||||
if (p.name === 'turkcealtyazi') {
|
||||
trace.push({ level: 'info', step: 'TA_SEARCH_REQUEST', message: 'TurkceAltyazi provider search started' });
|
||||
}
|
||||
trace.push({ level: 'info', step: 'SUBTITLE_SEARCH_STARTED', message: `Provider search started: ${p.name}` });
|
||||
const c = await p.impl.search(input);
|
||||
trace.push({ level: 'info', step: 'SUBTITLE_SEARCH_DONE', message: `Provider search done: ${p.name}`, meta: { count: c.length } });
|
||||
if (p.name === 'turkcealtyazi') {
|
||||
const realCount = c.filter((item) => item.scoreHints.includes('real_provider')).length;
|
||||
trace.push({
|
||||
level: 'info',
|
||||
step: 'TA_SEARCH_PARSED',
|
||||
message: `TurkceAltyazi candidates parsed`,
|
||||
meta: { total: c.length, real: realCount, mock: c.length - realCount }
|
||||
});
|
||||
}
|
||||
allCandidates.push(...c);
|
||||
}
|
||||
|
||||
const scored: any[] = [];
|
||||
|
||||
for (const candidate of allCandidates) {
|
||||
const provider = providers.find((p: any) => p.constructor.name.toLowerCase().includes(candidate.provider === 'turkcealtyazi' ? 'turkce' : 'open'));
|
||||
const provider = providerEntries.find((p) => p.name === candidate.provider)?.impl;
|
||||
if (!provider) continue;
|
||||
|
||||
const dl = await provider.download(candidate, input, jobToken);
|
||||
if (Array.isArray(dl.trace)) {
|
||||
trace.push(...dl.trace);
|
||||
}
|
||||
trace.push({ level: 'info', step: 'ARCHIVE_DOWNLOADED', message: `${candidate.provider}:${candidate.id}`, meta: { path: dl.filePath, type: dl.type } });
|
||||
|
||||
let files: string[] = [];
|
||||
|
||||
165
services/api/src/lib/turkcealtyaziReal.ts
Normal file
165
services/api/src/lib/turkcealtyaziReal.ts
Normal file
@@ -0,0 +1,165 @@
|
||||
import axios from 'axios';
|
||||
import * as cheerio from 'cheerio';
|
||||
import { URL } from 'node:url';
|
||||
import { env } from '../config/env.js';
|
||||
import type { SearchParams } from '../types/index.js';
|
||||
|
||||
export interface RealTaCandidate {
|
||||
id: string;
|
||||
title: string;
|
||||
detailUrl: string;
|
||||
lang: string;
|
||||
releaseHints: string[];
|
||||
isHI: boolean;
|
||||
isForced: boolean;
|
||||
}
|
||||
|
||||
const client = axios.create({
|
||||
timeout: env.turkcealtyaziTimeoutMs,
|
||||
maxRedirects: 5,
|
||||
headers: {
|
||||
'user-agent':
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
|
||||
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'accept-language': 'tr-TR,tr;q=0.9,en;q=0.8'
|
||||
}
|
||||
});
|
||||
|
||||
function sleep(ms: number) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function getWithRetry(url: string, retries = 2): Promise<string> {
|
||||
let lastError: unknown;
|
||||
for (let i = 0; i <= retries; i++) {
|
||||
try {
|
||||
if (i > 0) await sleep(250 * i);
|
||||
const res = await client.get(url);
|
||||
return typeof res.data === 'string' ? res.data : String(res.data);
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
}
|
||||
}
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
function normalizeReleaseHints(raw: string): string[] {
|
||||
return raw
|
||||
.toLowerCase()
|
||||
.split(/[^a-z0-9]+/)
|
||||
.filter((t) => t.length > 1)
|
||||
.filter((t, i, arr) => arr.indexOf(t) === i)
|
||||
.slice(0, 10);
|
||||
}
|
||||
|
||||
function abs(base: string, maybeRelative: string): string {
|
||||
return new URL(maybeRelative, base).toString();
|
||||
}
|
||||
|
||||
function parseCandidateNodes(html: string, baseUrl: string): RealTaCandidate[] {
|
||||
const $ = cheerio.load(html);
|
||||
const results: RealTaCandidate[] = [];
|
||||
|
||||
$('a[href]').each((_, el) => {
|
||||
const href = ($(el).attr('href') || '').trim();
|
||||
const text = $(el).text().replace(/\s+/g, ' ').trim();
|
||||
|
||||
if (!href || text.length < 3) return;
|
||||
const looksLikeSubtitle = /(altyazi|subtitle|sub|s\d{1,2}e\d{1,2}|\b\d{4}\b)/i.test(text + ' ' + href);
|
||||
if (!looksLikeSubtitle) return;
|
||||
|
||||
const full = abs(baseUrl, href);
|
||||
if (!/turkcealtyazi\.org/i.test(full)) return;
|
||||
|
||||
const id = `ta-real-${Buffer.from(full).toString('base64').slice(0, 18)}`;
|
||||
const lowered = (text + ' ' + href).toLowerCase();
|
||||
|
||||
results.push({
|
||||
id,
|
||||
title: text,
|
||||
detailUrl: full,
|
||||
lang: /\btr\b|turkce|türkçe/i.test(lowered) ? 'tr' : 'tr',
|
||||
releaseHints: normalizeReleaseHints(text),
|
||||
isHI: /\bhi\b|isitme|hearing/i.test(lowered),
|
||||
isForced: /forced|zorunlu/i.test(lowered)
|
||||
});
|
||||
});
|
||||
|
||||
const uniq = new Map<string, RealTaCandidate>();
|
||||
for (const r of results) {
|
||||
if (!uniq.has(r.detailUrl)) uniq.set(r.detailUrl, r);
|
||||
}
|
||||
|
||||
return [...uniq.values()].slice(0, 12);
|
||||
}
|
||||
|
||||
export async function searchTurkceAltyaziReal(params: SearchParams): Promise<RealTaCandidate[]> {
|
||||
const q = [params.title, params.year, params.type === 'tv' ? `S${String(params.season ?? 1).padStart(2, '0')}E${String(params.episode ?? 1).padStart(2, '0')}` : '']
|
||||
.filter(Boolean)
|
||||
.join(' ');
|
||||
|
||||
const candidatesPages = [
|
||||
`${env.turkcealtyaziBaseUrl}/arama?q=${encodeURIComponent(q)}`,
|
||||
`${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(q)}`
|
||||
];
|
||||
|
||||
const merged: RealTaCandidate[] = [];
|
||||
for (const url of candidatesPages) {
|
||||
try {
|
||||
await sleep(env.turkcealtyaziMinDelayMs);
|
||||
const html = await getWithRetry(url, 2);
|
||||
merged.push(...parseCandidateNodes(html, env.turkcealtyaziBaseUrl));
|
||||
if (merged.length >= 8) break;
|
||||
} catch {
|
||||
// bir sonraki endpoint denenecek
|
||||
}
|
||||
}
|
||||
|
||||
const uniq = new Map<string, RealTaCandidate>();
|
||||
for (const item of merged) {
|
||||
if (!uniq.has(item.detailUrl)) uniq.set(item.detailUrl, item);
|
||||
}
|
||||
|
||||
return [...uniq.values()].slice(0, 10);
|
||||
}
|
||||
|
||||
export async function resolveTurkceAltyaziDownloadUrl(detailUrl: string): Promise<string> {
|
||||
await sleep(env.turkcealtyaziMinDelayMs);
|
||||
const html = await getWithRetry(detailUrl, 2);
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
const linkCandidates: string[] = [];
|
||||
|
||||
$('a[href]').each((_, el) => {
|
||||
const href = ($(el).attr('href') || '').trim();
|
||||
const text = $(el).text().trim();
|
||||
if (!href) return;
|
||||
|
||||
const looksDownload = /(indir|download|\.zip|\.rar|\.7z|\.srt|\.ass)/i.test(`${href} ${text}`);
|
||||
if (!looksDownload) return;
|
||||
|
||||
linkCandidates.push(abs(detailUrl, href));
|
||||
});
|
||||
|
||||
const preferred =
|
||||
linkCandidates.find((l) => /\.(zip|rar|7z)(\?|$)/i.test(l)) ||
|
||||
linkCandidates.find((l) => /\.(srt|ass)(\?|$)/i.test(l)) ||
|
||||
linkCandidates[0];
|
||||
|
||||
if (!preferred) {
|
||||
throw new Error('TA detail page download link parse failed');
|
||||
}
|
||||
|
||||
return preferred;
|
||||
}
|
||||
|
||||
export async function downloadTurkceAltyaziFile(url: string): Promise<{ buffer: Buffer; finalUrl: string; contentType?: string }> {
|
||||
await sleep(env.turkcealtyaziMinDelayMs);
|
||||
const res = await client.get<ArrayBuffer>(url, { responseType: 'arraybuffer' });
|
||||
const buffer = Buffer.from(res.data);
|
||||
return {
|
||||
buffer,
|
||||
finalUrl: (res.request as any)?.res?.responseUrl || url,
|
||||
contentType: res.headers['content-type']
|
||||
};
|
||||
}
|
||||
@@ -1,11 +1,51 @@
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import type { Candidate, SearchParams, SubtitleProvider } from '../types/index.js';
|
||||
import { generateMockArtifact } from '../lib/mockArtifact.js';
|
||||
import { hashString, seeded } from '../lib/deterministic.js';
|
||||
import { env } from '../config/env.js';
|
||||
import {
|
||||
downloadTurkceAltyaziFile,
|
||||
resolveTurkceAltyaziDownloadUrl,
|
||||
searchTurkceAltyaziReal
|
||||
} from '../lib/turkcealtyaziReal.js';
|
||||
|
||||
function extensionFromDownload(url: string, contentType?: string): 'zip' | 'rar' | '7z' | 'srt' | 'ass' {
|
||||
const lowerUrl = url.toLowerCase();
|
||||
if (lowerUrl.includes('.zip')) return 'zip';
|
||||
if (lowerUrl.includes('.rar')) return 'rar';
|
||||
if (lowerUrl.includes('.7z')) return '7z';
|
||||
if (lowerUrl.includes('.ass')) return 'ass';
|
||||
if (contentType?.includes('zip')) return 'zip';
|
||||
return 'srt';
|
||||
}
|
||||
|
||||
export class TurkceAltyaziProvider implements SubtitleProvider {
|
||||
async search(params: SearchParams): Promise<Candidate[]> {
|
||||
// TODO(v2): real TurkceAltyazi scraping implementation.
|
||||
if (env.enableTurkcealtyaziReal) {
|
||||
try {
|
||||
const real = await searchTurkceAltyaziReal(params);
|
||||
if (real.length > 0) {
|
||||
return real.map((item, index) => ({
|
||||
id: item.id || `ta-real-${index}`,
|
||||
provider: 'turkcealtyazi',
|
||||
displayName: item.title,
|
||||
downloadType: 'archiveZip',
|
||||
downloadUrl: item.detailUrl,
|
||||
lang: item.lang || 'tr',
|
||||
releaseHints: item.releaseHints,
|
||||
scoreHints: ['real_provider'],
|
||||
isHI: item.isHI,
|
||||
isForced: item.isForced
|
||||
}));
|
||||
}
|
||||
} catch (err) {
|
||||
if (!env.turkcealtyaziAllowMockFallback) {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const key = `${params.title}|${params.year}|${params.season}|${params.episode}|ta`;
|
||||
const rnd = seeded(hashString(key));
|
||||
const base = params.title.replace(/\s+/g, '.');
|
||||
@@ -38,6 +78,27 @@ export class TurkceAltyaziProvider implements SubtitleProvider {
|
||||
}
|
||||
|
||||
async download(candidate: Candidate, params: SearchParams, jobToken: string) {
|
||||
if (env.enableTurkcealtyaziReal && /^https?:\/\//i.test(candidate.downloadUrl)) {
|
||||
const downloadDir = `${env.tempRoot}/${jobToken}/download`;
|
||||
await fs.mkdir(downloadDir, { recursive: true });
|
||||
const trace: Array<{ level: 'info' | 'warn' | 'error'; step: string; message: string; meta?: any }> = [];
|
||||
|
||||
trace.push({ level: 'info', step: 'TA_DETAIL_FETCHED', message: candidate.downloadUrl });
|
||||
const resolved = await resolveTurkceAltyaziDownloadUrl(candidate.downloadUrl);
|
||||
trace.push({ level: 'info', step: 'TA_DOWNLOAD_URL_RESOLVED', message: resolved });
|
||||
const downloaded = await downloadTurkceAltyaziFile(resolved);
|
||||
const ext = extensionFromDownload(downloaded.finalUrl, downloaded.contentType);
|
||||
const filePath = path.join(downloadDir, `${candidate.id}.${ext}`);
|
||||
await fs.writeFile(filePath, downloaded.buffer);
|
||||
|
||||
return {
|
||||
type: ext === 'srt' || ext === 'ass' ? 'direct' : 'archive',
|
||||
filePath,
|
||||
candidateId: candidate.id,
|
||||
trace
|
||||
};
|
||||
}
|
||||
|
||||
const artifact = await generateMockArtifact(candidate, params, jobToken, `${env.tempRoot}/${jobToken}/download`);
|
||||
return { type: artifact.type, filePath: artifact.filePath, candidateId: candidate.id };
|
||||
}
|
||||
|
||||
@@ -34,6 +34,7 @@ export interface DownloadedArtifact {
|
||||
type: 'archive' | 'direct';
|
||||
filePath: string;
|
||||
candidateId: string;
|
||||
trace?: TraceLog[];
|
||||
}
|
||||
|
||||
export interface SubtitleProvider {
|
||||
|
||||
Reference in New Issue
Block a user