feat: clamav tarama sistemi ve hata yönetimi iyileştirmeleri ekle
ClamAV entegrasyonu ile indirilen altyazı dosyalarının otomatik virüs taraması eklendi. Pipeline tabanlı hata yönetimi sistemi ile hatalar kategorize edilip daha iyi işleniyor. Türkcealtyazi sağlayıcısı TV dizileri için sezon/bölüm bazlı eşleştirme ve paket indirme desteği kazandı. Dosya izleyicide olay çiftleme (deduplication) mekanizması eklendi. Metin kodlaması normalizasyonu Türkçe karakterler için geliştirildi.
This commit is contained in:
@@ -9,6 +9,9 @@ export const env = {
|
||||
enableApiKey: process.env.ENABLE_API_KEY === 'true',
|
||||
apiKey: process.env.API_KEY ?? '',
|
||||
enableTaStepLogs: process.env.ENABLE_TA_STEP_LOGS === 'true',
|
||||
clamavAutoUpdate: process.env.CLAMAV_AUTO_UPDATE !== 'false',
|
||||
clamavFailOnUpdateError: process.env.CLAMAV_FAIL_ON_UPDATE_ERROR === 'true',
|
||||
clamavDbDir: process.env.CLAMAV_DB_DIR ?? '/var/lib/clamav',
|
||||
enableTurkcealtyaziReal: process.env.ENABLE_TURKCEALTYAZI_REAL === 'true',
|
||||
turkcealtyaziBaseUrl: process.env.TURKCEALTYAZI_BASE_URL ?? 'https://turkcealtyazi.org',
|
||||
turkcealtyaziTimeoutMs: Number(process.env.TURKCEALTYAZI_TIMEOUT_MS ?? 12000),
|
||||
|
||||
@@ -2,9 +2,11 @@ import fs from 'node:fs/promises';
|
||||
import { buildApp } from './app.js';
|
||||
import { cleanupOldTemp } from './lib/subtitleEngine.js';
|
||||
import { env } from './config/env.js';
|
||||
import { ensureClamavDatabase } from './lib/clamavDb.js';
|
||||
|
||||
async function bootstrap() {
|
||||
await fs.mkdir(env.tempRoot, { recursive: true });
|
||||
await ensureClamavDatabase();
|
||||
const app = await buildApp();
|
||||
|
||||
setInterval(async () => {
|
||||
|
||||
30
services/api/src/lib/clamav.ts
Normal file
30
services/api/src/lib/clamav.ts
Normal file
@@ -0,0 +1,30 @@
|
||||
import { execFile } from 'node:child_process';
|
||||
import { promisify } from 'node:util';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
export interface ClamAvScanResult {
|
||||
clean: boolean;
|
||||
infected: boolean;
|
||||
output: string;
|
||||
}
|
||||
|
||||
export async function scanFileWithClamav(filePath: string): Promise<ClamAvScanResult> {
|
||||
try {
|
||||
const { stdout, stderr } = await execFileAsync('clamscan', ['--no-summary', filePath]);
|
||||
const output = `${stdout || ''}${stderr || ''}`.trim();
|
||||
return { clean: true, infected: false, output };
|
||||
} catch (err: any) {
|
||||
const output = `${err?.stdout || ''}${err?.stderr || ''}`.trim();
|
||||
const code = typeof err?.code === 'number' ? err.code : undefined;
|
||||
|
||||
// clamscan exit code:
|
||||
// 0 = no virus found, 1 = virus found, >1 = error
|
||||
if (code === 1) {
|
||||
return { clean: false, infected: true, output };
|
||||
}
|
||||
|
||||
const reason = output || err?.message || 'clamav scan failed';
|
||||
throw new Error(`clamav scan error: ${reason}`);
|
||||
}
|
||||
}
|
||||
68
services/api/src/lib/clamavDb.ts
Normal file
68
services/api/src/lib/clamavDb.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { execFile } from 'node:child_process';
|
||||
import { promisify } from 'node:util';
|
||||
import { env } from '../config/env.js';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
function hasDbFile(filename: string): boolean {
|
||||
return /\.(cvd|cld)$/i.test(filename);
|
||||
}
|
||||
|
||||
async function hasLocalDatabase(dbDir: string): Promise<boolean> {
|
||||
try {
|
||||
const entries = await fs.readdir(dbDir, { withFileTypes: true });
|
||||
return entries.some((e) => e.isFile() && hasDbFile(e.name));
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeFreshclamOutput(raw: string): string {
|
||||
return raw
|
||||
.split('\n')
|
||||
.map((line) => line.trim())
|
||||
.filter((line) => line.length > 0)
|
||||
.filter((line) => !line.includes('NotifyClamd: Can\'t find or parse configuration file'))
|
||||
.join(' ');
|
||||
}
|
||||
|
||||
export async function ensureClamavDatabase(): Promise<void> {
|
||||
if (!env.clamavAutoUpdate) {
|
||||
console.log('[api][clamav] auto update disabled');
|
||||
return;
|
||||
}
|
||||
|
||||
const dbDir = env.clamavDbDir;
|
||||
await fs.mkdir(dbDir, { recursive: true });
|
||||
|
||||
const before = await hasLocalDatabase(dbDir);
|
||||
if (before) {
|
||||
console.log(`[api][clamav] database already present in ${dbDir}`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[api][clamav] database missing, running freshclam in ${dbDir}`);
|
||||
try {
|
||||
const { stdout, stderr } = await execFileAsync('freshclam', ['--stdout'], { timeout: 5 * 60 * 1000 });
|
||||
const out = normalizeFreshclamOutput(`${stdout || ''}\n${stderr || ''}`);
|
||||
console.log(`[api][clamav] freshclam completed: ${out}`);
|
||||
} catch (err: any) {
|
||||
const details = `${err?.stdout || ''}${err?.stderr || ''}${err?.message || ''}`.replace(/\s+/g, ' ').trim();
|
||||
if (env.clamavFailOnUpdateError) {
|
||||
throw new Error(`clamav freshclam failed: ${details}`);
|
||||
}
|
||||
console.warn(`[api][clamav] freshclam failed (continuing): ${details}`);
|
||||
}
|
||||
|
||||
const after = await hasLocalDatabase(dbDir);
|
||||
if (!after && env.clamavFailOnUpdateError) {
|
||||
throw new Error(`clamav database still missing after freshclam: ${path.resolve(dbDir)}`);
|
||||
}
|
||||
if (after) {
|
||||
console.log(`[api][clamav] database ready in ${dbDir}`);
|
||||
} else {
|
||||
console.warn(`[api][clamav] database not found in ${dbDir}; clamav scans may fail`);
|
||||
}
|
||||
}
|
||||
96
services/api/src/lib/errors.ts
Normal file
96
services/api/src/lib/errors.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
import axios from 'axios';
|
||||
|
||||
export type ErrorCategory =
|
||||
| 'network'
|
||||
| 'parse'
|
||||
| 'blocked'
|
||||
| 'rate-limit'
|
||||
| 'malware'
|
||||
| 'invalid-subtitle'
|
||||
| 'internal';
|
||||
|
||||
export class PipelineError extends Error {
|
||||
code: string;
|
||||
category: ErrorCategory;
|
||||
retryable: boolean;
|
||||
httpStatus: number;
|
||||
|
||||
constructor(opts: {
|
||||
code: string;
|
||||
message: string;
|
||||
category: ErrorCategory;
|
||||
retryable: boolean;
|
||||
httpStatus?: number;
|
||||
cause?: unknown;
|
||||
}) {
|
||||
super(opts.message);
|
||||
this.name = 'PipelineError';
|
||||
this.code = opts.code;
|
||||
this.category = opts.category;
|
||||
this.retryable = opts.retryable;
|
||||
this.httpStatus = opts.httpStatus ?? 500;
|
||||
if (opts.cause !== undefined) {
|
||||
(this as any).cause = opts.cause;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeMsg(input: unknown): string {
|
||||
return String(input ?? 'unknown error').replace(/\s+/g, ' ').trim();
|
||||
}
|
||||
|
||||
export function toPipelineError(err: unknown, fallbackCode = 'INTERNAL_ERROR'): PipelineError {
|
||||
if (err instanceof PipelineError) return err;
|
||||
|
||||
if (axios.isAxiosError(err)) {
|
||||
const status = err.response?.status;
|
||||
const msg = normalizeMsg(err.message);
|
||||
if (status === 429) {
|
||||
return new PipelineError({
|
||||
code: 'UPSTREAM_RATE_LIMIT',
|
||||
message: msg,
|
||||
category: 'rate-limit',
|
||||
retryable: true,
|
||||
httpStatus: 503,
|
||||
cause: err
|
||||
});
|
||||
}
|
||||
if (status === 403) {
|
||||
return new PipelineError({
|
||||
code: 'UPSTREAM_BLOCKED',
|
||||
message: msg,
|
||||
category: 'blocked',
|
||||
retryable: false,
|
||||
httpStatus: 502,
|
||||
cause: err
|
||||
});
|
||||
}
|
||||
if (status && status >= 500) {
|
||||
return new PipelineError({
|
||||
code: 'UPSTREAM_5XX',
|
||||
message: msg,
|
||||
category: 'network',
|
||||
retryable: true,
|
||||
httpStatus: 502,
|
||||
cause: err
|
||||
});
|
||||
}
|
||||
return new PipelineError({
|
||||
code: status ? `UPSTREAM_${status}` : 'NETWORK_ERROR',
|
||||
message: msg,
|
||||
category: 'network',
|
||||
retryable: !status,
|
||||
httpStatus: 502,
|
||||
cause: err
|
||||
});
|
||||
}
|
||||
|
||||
return new PipelineError({
|
||||
code: fallbackCode,
|
||||
message: normalizeMsg((err as any)?.message ?? err),
|
||||
category: 'internal',
|
||||
retryable: false,
|
||||
httpStatus: 500,
|
||||
cause: err
|
||||
});
|
||||
}
|
||||
@@ -24,6 +24,7 @@ export function scoreCandidateFile(filePath: string, ext: 'srt' | 'ass', candida
|
||||
const fn = path.basename(filePath).toLowerCase();
|
||||
let score = 0;
|
||||
const reasons: string[] = [];
|
||||
const isPackageCandidate = candidate.scoreHints.includes('ta_package_candidate');
|
||||
|
||||
if (params.type === 'tv') {
|
||||
const sePattern = /s(\d{1,2})e(\d{1,2})/i;
|
||||
@@ -35,11 +36,12 @@ export function scoreCandidateFile(filePath: string, ext: 'srt' | 'ass', candida
|
||||
reasons.push('season_episode_match');
|
||||
}
|
||||
|
||||
const releaseTokens = tokenize(params.release);
|
||||
const releaseTokens = isPackageCandidate ? [] : tokenize(params.release);
|
||||
const fileTokens = tokenize(fn).concat(candidate.releaseHints.map((x) => x.toLowerCase()));
|
||||
const releaseMatches = releaseTokens.filter((t) => fileTokens.includes(t)).length;
|
||||
score += Math.min(25, releaseMatches * 6);
|
||||
if (releaseMatches > 0) reasons.push('release_match');
|
||||
if (isPackageCandidate) reasons.push('package_mode_episode_only');
|
||||
|
||||
if (candidate.lang === (params.languages[0] || 'tr')) {
|
||||
score += 10;
|
||||
|
||||
@@ -11,6 +11,8 @@ import { OpenSubtitlesProvider } from '../providers/OpenSubtitlesProvider.js';
|
||||
import { collectFilesRecursive, ensureInsideRoot, validateExtractionLimits } from './security.js';
|
||||
import { detectSubtitleType, isProbablyText } from './validators.js';
|
||||
import { chooseBest, scoreCandidateFile } from './scoring.js';
|
||||
import { scanFileWithClamav } from './clamav.js';
|
||||
import { PipelineError } from './errors.js';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
@@ -45,6 +47,10 @@ export async function searchSubtitles(input: SearchParams) {
|
||||
const trace: TraceLog[] = [];
|
||||
const limits = input.securityLimits ?? defaultLimits();
|
||||
const dirs = await ensureJobDirs(jobToken);
|
||||
const clamavEnabled = input.features?.clamavEnabled === true;
|
||||
if (!clamavEnabled) {
|
||||
trace.push({ level: 'info', step: 'CLAMAV_SCAN_SKIPPED', message: 'ClamAV scanning disabled for this request' });
|
||||
}
|
||||
|
||||
const allCandidates: Candidate[] = [];
|
||||
for (const p of providerEntries) {
|
||||
@@ -56,12 +62,21 @@ export async function searchSubtitles(input: SearchParams) {
|
||||
trace.push({ level: 'info', step: 'SUBTITLE_SEARCH_DONE', message: `Provider search done: ${p.name}`, meta: { count: c.length } });
|
||||
if (p.name === 'turkcealtyazi') {
|
||||
const realCount = c.filter((item) => item.scoreHints.includes('real_provider')).length;
|
||||
const strategyHint = c.find((item) => item.scoreHints.some((h) => h.startsWith('ta_strategy_')))?.scoreHints.find((h) => h.startsWith('ta_strategy_'));
|
||||
trace.push({
|
||||
level: 'info',
|
||||
step: 'TA_SEARCH_PARSED',
|
||||
message: `TurkceAltyazi candidates parsed`,
|
||||
meta: { total: c.length, real: realCount }
|
||||
meta: { total: c.length, real: realCount, strategy: strategyHint?.replace('ta_strategy_', '') || 'none' }
|
||||
});
|
||||
if (c.length === 0) {
|
||||
trace.push({
|
||||
level: 'warn',
|
||||
step: 'TA_SEARCH_NO_MATCH',
|
||||
message: 'TurkceAltyazi returned no candidate',
|
||||
meta: { title: input.title, year: input.year, release: input.release }
|
||||
});
|
||||
}
|
||||
}
|
||||
allCandidates.push(...c);
|
||||
}
|
||||
@@ -71,6 +86,21 @@ export async function searchSubtitles(input: SearchParams) {
|
||||
for (const candidate of allCandidates) {
|
||||
const provider = providerEntries.find((p) => p.name === candidate.provider)?.impl;
|
||||
if (!provider) continue;
|
||||
const isPackageCandidate = candidate.scoreHints.includes('ta_package_candidate');
|
||||
let candidateScoreCount = 0;
|
||||
|
||||
if (isPackageCandidate) {
|
||||
trace.push({
|
||||
level: 'info',
|
||||
step: 'TA_PACKAGE_MODE_SELECTED',
|
||||
message: `Package candidate selected: ${candidate.downloadUrl}`,
|
||||
meta: {
|
||||
candidateId: candidate.id,
|
||||
season: input.season,
|
||||
episode: input.episode
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const dl = await provider.download(candidate, input, jobToken);
|
||||
if (Array.isArray(dl.trace)) {
|
||||
@@ -103,6 +133,44 @@ export async function searchSubtitles(input: SearchParams) {
|
||||
}
|
||||
|
||||
for (const file of files) {
|
||||
if (clamavEnabled) {
|
||||
trace.push({ level: 'info', step: 'CLAMAV_SCAN_STARTED', message: file });
|
||||
let scan;
|
||||
try {
|
||||
scan = await scanFileWithClamav(file);
|
||||
} catch (err: any) {
|
||||
trace.push({
|
||||
level: 'error',
|
||||
step: 'CLAMAV_SCAN_ERROR',
|
||||
message: `ClamAV scan failed for ${file}`,
|
||||
meta: { error: err?.message }
|
||||
});
|
||||
throw new PipelineError({
|
||||
code: 'MALWARE_SCAN_ERROR',
|
||||
message: err?.message || `ClamAV scan failed for ${file}`,
|
||||
category: 'malware',
|
||||
retryable: false,
|
||||
httpStatus: 500,
|
||||
cause: err
|
||||
});
|
||||
}
|
||||
if (scan.infected) {
|
||||
await fse.remove(file);
|
||||
trace.push({
|
||||
level: 'warn',
|
||||
step: 'CLAMAV_SCAN_INFECTED_DELETED',
|
||||
message: `Deleted infected file: ${file}`,
|
||||
meta: { output: scan.output }
|
||||
});
|
||||
continue;
|
||||
}
|
||||
trace.push({
|
||||
level: 'info',
|
||||
step: 'CLAMAV_SCAN_CLEAN',
|
||||
message: `Clean file: ${file}`
|
||||
});
|
||||
}
|
||||
|
||||
const buf = await fs.readFile(file);
|
||||
if (!isProbablyText(buf)) {
|
||||
await fse.remove(file);
|
||||
@@ -119,7 +187,31 @@ export async function searchSubtitles(input: SearchParams) {
|
||||
}
|
||||
|
||||
const s = scoreCandidateFile(file, ext, candidate, input);
|
||||
if (s) scored.push(s);
|
||||
if (s) {
|
||||
scored.push(s);
|
||||
candidateScoreCount += 1;
|
||||
if (isPackageCandidate && input.type === 'tv') {
|
||||
trace.push({
|
||||
level: 'info',
|
||||
step: 'TA_PACKAGE_EPISODE_FILE_MATCHED',
|
||||
message: path.basename(file),
|
||||
meta: {
|
||||
candidateId: candidate.id,
|
||||
season: input.season,
|
||||
episode: input.episode
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isPackageCandidate && input.type === 'tv' && candidateScoreCount === 0) {
|
||||
trace.push({
|
||||
level: 'warn',
|
||||
step: 'TA_PACKAGE_EPISODE_FILE_NOT_FOUND',
|
||||
message: `No subtitle file matched S${String(input.season ?? '').padStart(2, '0')}E${String(input.episode ?? '').padStart(2, '0')} in extracted package`,
|
||||
meta: { candidateId: candidate.id }
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -133,6 +225,7 @@ export async function searchSubtitles(input: SearchParams) {
|
||||
const bestPath = path.join(dirs.base, `best.${decision.best.ext}`);
|
||||
await fs.copyFile(decision.best.filePath, bestPath);
|
||||
trace.push({ level: 'info', step: 'BEST_SELECTED', message: `Selected ${decision.best.filePath}`, meta: { score: decision.best.score } });
|
||||
trace.push({ level: 'info', step: 'BEST_EXPORT_DONE', message: `Exported best subtitle to ${bestPath}` });
|
||||
|
||||
return {
|
||||
status: 'FOUND',
|
||||
|
||||
@@ -2,9 +2,11 @@ import axios from 'axios';
|
||||
import * as cheerio from 'cheerio';
|
||||
import { URL } from 'node:url';
|
||||
import { Buffer } from 'node:buffer';
|
||||
import { setTimeout as sleepMs } from 'node:timers/promises';
|
||||
import { env } from '../config/env.js';
|
||||
import type { SearchParams } from '../types/index.js';
|
||||
import { taError, taInfo } from './taLog.js';
|
||||
import { PipelineError, toPipelineError } from './errors.js';
|
||||
|
||||
export interface RealTaCandidate {
|
||||
id: string;
|
||||
@@ -14,6 +16,8 @@ export interface RealTaCandidate {
|
||||
releaseHints: string[];
|
||||
isHI: boolean;
|
||||
isForced: boolean;
|
||||
strategy?: 'exact' | 'token' | 'fallback' | 'default' | 'package_fallback';
|
||||
isPackage?: boolean;
|
||||
}
|
||||
|
||||
const client = axios.create({
|
||||
@@ -31,6 +35,10 @@ function sleep(ms: number) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function computeBackoffMs(attempt: number): number {
|
||||
return Math.min(2000, 300 * 2 ** Math.max(0, attempt - 1));
|
||||
}
|
||||
|
||||
interface HttpResultText {
|
||||
body: string;
|
||||
finalUrl: string;
|
||||
@@ -71,7 +79,7 @@ async function getWithRetry(url: string, retries = 2, cookies?: Map<string, stri
|
||||
let lastError: unknown;
|
||||
for (let i = 0; i <= retries; i++) {
|
||||
try {
|
||||
if (i > 0) await sleep(250 * i);
|
||||
if (i > 0) await sleepMs(computeBackoffMs(i));
|
||||
taInfo('HTTP_GET_START', 'HTTP GET started', { url, attempt: i + 1, retries: retries + 1 });
|
||||
const res = await client.get(url, {
|
||||
headers: cookies && cookies.size > 0 ? { cookie: cookieHeader(cookies) } : undefined
|
||||
@@ -87,8 +95,10 @@ async function getWithRetry(url: string, retries = 2, cookies?: Map<string, stri
|
||||
setCookie: Array.isArray(res.headers['set-cookie']) ? res.headers['set-cookie'] : []
|
||||
};
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
taError('HTTP_GET_FAILED', err, { url, attempt: i + 1, retries: retries + 1 });
|
||||
const pe = toPipelineError(err, 'TA_HTTP_GET_FAILED');
|
||||
lastError = pe;
|
||||
taError('HTTP_GET_FAILED', pe, { url, attempt: i + 1, retries: retries + 1, code: pe.code, retryable: pe.retryable });
|
||||
if (!pe.retryable) throw pe;
|
||||
}
|
||||
}
|
||||
throw lastError;
|
||||
@@ -127,20 +137,29 @@ function tokenize(input: string): string[] {
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
const QUERY_STOPWORDS = new Set([
|
||||
'the', 'of', 'and', 'a', 'an', 'in', 'to', 'for',
|
||||
've', 'bir', 'ile', 'da', 'de'
|
||||
]);
|
||||
|
||||
function buildFindQuery(params: SearchParams): string {
|
||||
const toks = tokenize(params.title).filter((t) => !/^\d+$/.test(t));
|
||||
return toks.slice(0, 2).join(' ');
|
||||
const meaningful = toks.filter((t) => !QUERY_STOPWORDS.has(t));
|
||||
const queryTokens = (meaningful.length > 0 ? meaningful : toks).slice(0, 3);
|
||||
return queryTokens.join(' ');
|
||||
}
|
||||
|
||||
function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null {
|
||||
const $ = cheerio.load(html);
|
||||
const wantedYear = params.year;
|
||||
const wantedTitleTokens = tokenize(params.title);
|
||||
const wantedNormalizedTitle = normalizeText(params.title);
|
||||
const links: Array<{ url: string; title: string; year?: number; score: number }> = [];
|
||||
|
||||
$('a[href^="/mov/"]').each((_, el) => {
|
||||
$('a[href]').each((_, el) => {
|
||||
const href = ($(el).attr('href') || '').trim();
|
||||
if (!href) return;
|
||||
if (!/^\/(mov|tv|dizi)\//i.test(href)) return;
|
||||
|
||||
const title = ($(el).attr('title') || $(el).text() || '').replace(/\s+/g, ' ').trim();
|
||||
if (!title) return;
|
||||
@@ -150,8 +169,28 @@ function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: st
|
||||
const year = yearMatch ? Number(yearMatch[1]) : undefined;
|
||||
|
||||
const titleTokens = tokenize(title);
|
||||
const normalizedTitle = normalizeText(title);
|
||||
const overlap = wantedTitleTokens.filter((t) => titleTokens.includes(t)).length;
|
||||
let score = overlap;
|
||||
|
||||
if (normalizedTitle === wantedNormalizedTitle) score += 30;
|
||||
else if (
|
||||
normalizedTitle.includes(wantedNormalizedTitle) ||
|
||||
wantedNormalizedTitle.includes(normalizedTitle)
|
||||
) {
|
||||
score += 18;
|
||||
}
|
||||
|
||||
const isFilm = /\bfilm\b/i.test(containerText);
|
||||
const isTv = /\b(dizi|tv dizisi)\b/i.test(containerText);
|
||||
if (params.type === 'tv') {
|
||||
if (isTv) score += 8;
|
||||
if (isFilm) score -= 4;
|
||||
} else {
|
||||
if (isFilm) score += 8;
|
||||
if (isTv) score -= 4;
|
||||
}
|
||||
|
||||
if (wantedYear && year === wantedYear) score += 10;
|
||||
|
||||
links.push({
|
||||
@@ -176,11 +215,55 @@ function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: st
|
||||
return { movieUrl: best.url, movieTitle: best.title };
|
||||
}
|
||||
|
||||
function pickSubPageFromMovieDetail(html: string, movieUrl: string, params: SearchParams): { subUrl: string; title: string; releaseHints: string[]; isHI: boolean } | null {
|
||||
function parseSeasonEpisodeFromRow($: cheerio.CheerioAPI, row: any): { season?: number; episode?: number; isPackage: boolean } {
|
||||
const alcd = ($(row).find('.alcd').text() || '').replace(/\s+/g, ' ').trim();
|
||||
const m = alcd.match(/S\s*0?(\d{1,2}).*E\s*0?(\d{1,2})/i);
|
||||
const isPackage = /\bpaket\b/i.test(alcd);
|
||||
if (m) {
|
||||
return { season: Number(m[1]), episode: Number(m[2]), isPackage };
|
||||
}
|
||||
const s = alcd.match(/S\s*0?(\d{1,2})/i);
|
||||
if (s) {
|
||||
return { season: Number(s[1]), isPackage };
|
||||
}
|
||||
return { isPackage };
|
||||
}
|
||||
|
||||
function pickSubPageFromMovieDetail(
|
||||
html: string,
|
||||
movieUrl: string,
|
||||
params: SearchParams
|
||||
): {
|
||||
picked?: {
|
||||
subUrl: string;
|
||||
title: string;
|
||||
releaseHints: string[];
|
||||
isHI: boolean;
|
||||
strategy: 'exact' | 'token' | 'fallback' | 'default' | 'package_fallback';
|
||||
isPackage?: boolean;
|
||||
};
|
||||
noMatchReason?: 'episode_not_matched' | 'release_not_matched' | 'no_sub_rows';
|
||||
} {
|
||||
const $ = cheerio.load(html);
|
||||
const wantedRelease = normalizeText(params.release || '');
|
||||
const rows = $('.altsonsez2');
|
||||
const candidates: Array<{ subUrl: string; title: string; releaseHints: string[]; isHI: boolean; score: number }> = [];
|
||||
const wantedReleaseTokens = wantedRelease.split(/\s+/).filter(Boolean);
|
||||
const wantedSeason = params.type === 'tv' ? params.season : undefined;
|
||||
const wantedEpisode = params.type === 'tv' ? params.episode : undefined;
|
||||
const rows = $('[class*="altsonsez"]');
|
||||
const candidates: Array<{
|
||||
subUrl: string;
|
||||
title: string;
|
||||
releaseHints: string[];
|
||||
isHI: boolean;
|
||||
score: number;
|
||||
releaseExact: boolean;
|
||||
releaseTokenHits: number;
|
||||
trScore: number;
|
||||
downloadCount: number;
|
||||
season?: number;
|
||||
episode?: number;
|
||||
isPackage: boolean;
|
||||
}> = [];
|
||||
|
||||
rows.each((_, row) => {
|
||||
const linkEl = $(row).find('a[href^="/sub/"]').first();
|
||||
@@ -192,35 +275,106 @@ function pickSubPageFromMovieDetail(html: string, movieUrl: string, params: Sear
|
||||
const relHints = normalizeReleaseHints(ripText);
|
||||
const normalizedRip = normalizeText(ripText);
|
||||
const isHI = /(sdh|hearing|isitme|hi)/i.test(ripText);
|
||||
const isTr = $(row).find('.flagtr').length > 0;
|
||||
const indirmeRaw = ($(row).find('.alindirme').text() || '').replace(/\./g, '').replace(/,/g, '').trim();
|
||||
const downloadCount = Number(indirmeRaw.replace(/[^\d]/g, '')) || 0;
|
||||
const { season, episode, isPackage } = parseSeasonEpisodeFromRow($, row);
|
||||
|
||||
let score = 0;
|
||||
if (wantedRelease) {
|
||||
if (normalizedRip.includes(wantedRelease)) score += 20;
|
||||
const releaseToken = wantedRelease.split(/\s+/).find(Boolean);
|
||||
if (releaseToken && normalizedRip.includes(releaseToken)) score += 15;
|
||||
} else {
|
||||
score += 1;
|
||||
if (params.type === 'tv') {
|
||||
if (!season) return;
|
||||
if (wantedSeason && season !== wantedSeason) return;
|
||||
if (wantedEpisode && episode !== wantedEpisode && !isPackage) return;
|
||||
}
|
||||
|
||||
if ($(row).find('.flagtr').length > 0) score += 3;
|
||||
const releaseExact = Boolean(wantedRelease && normalizedRip.includes(wantedRelease));
|
||||
const releaseTokenHits = wantedRelease
|
||||
? wantedReleaseTokens.filter((tok) => normalizedRip.includes(tok)).length
|
||||
: 0;
|
||||
|
||||
let score = 0;
|
||||
if (!wantedRelease) {
|
||||
score += 1;
|
||||
}
|
||||
if (releaseExact) score += 40;
|
||||
if (releaseTokenHits > 0) score += Math.min(20, releaseTokenHits * 8);
|
||||
if (isTr) score += 8;
|
||||
score += Math.min(10, Math.floor(downloadCount / 1500));
|
||||
|
||||
candidates.push({
|
||||
subUrl: abs(movieUrl, href),
|
||||
title,
|
||||
releaseHints: relHints,
|
||||
isHI,
|
||||
score
|
||||
score,
|
||||
releaseExact,
|
||||
releaseTokenHits,
|
||||
trScore: isTr ? 1 : 0,
|
||||
downloadCount,
|
||||
season,
|
||||
episode,
|
||||
isPackage
|
||||
});
|
||||
});
|
||||
|
||||
if (candidates.length === 0) return null;
|
||||
const picked = candidates.sort((a, b) => b.score - a.score)[0];
|
||||
if (wantedRelease && picked.score < 10) return null;
|
||||
return picked;
|
||||
if (candidates.length === 0) {
|
||||
return { noMatchReason: params.type === 'tv' ? 'episode_not_matched' : 'no_sub_rows' };
|
||||
}
|
||||
|
||||
let selectedPool = candidates;
|
||||
let forcedStrategy: 'package_fallback' | undefined;
|
||||
if (params.type === 'tv' && wantedEpisode) {
|
||||
const episodeRows = candidates.filter((c) => c.episode === wantedEpisode);
|
||||
if (episodeRows.length > 0) {
|
||||
selectedPool = episodeRows;
|
||||
} else {
|
||||
const packageRows = candidates.filter((c) => c.isPackage);
|
||||
if (packageRows.length > 0) {
|
||||
selectedPool = packageRows;
|
||||
forcedStrategy = 'package_fallback';
|
||||
} else {
|
||||
return { noMatchReason: 'episode_not_matched' };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!wantedRelease || forcedStrategy === 'package_fallback') {
|
||||
const picked = selectedPool.sort((a, b) => b.score - a.score || b.downloadCount - a.downloadCount)[0];
|
||||
if (forcedStrategy === 'package_fallback') {
|
||||
return { picked: { ...picked, strategy: 'package_fallback', isPackage: true } };
|
||||
}
|
||||
return { picked: { ...picked, strategy: 'default' } };
|
||||
}
|
||||
|
||||
const exact = selectedPool
|
||||
.filter((c) => c.releaseExact)
|
||||
.sort((a, b) => b.score - a.score || b.downloadCount - a.downloadCount)[0];
|
||||
if (exact) {
|
||||
return { picked: { ...exact, strategy: 'exact' } };
|
||||
}
|
||||
|
||||
const token = selectedPool
|
||||
.filter((c) => c.releaseTokenHits > 0)
|
||||
.sort((a, b) => b.releaseTokenHits - a.releaseTokenHits || b.score - a.score || b.downloadCount - a.downloadCount)[0];
|
||||
if (token) {
|
||||
return { picked: { ...token, strategy: 'token' } };
|
||||
}
|
||||
|
||||
if (params.type === 'tv') {
|
||||
// TV'de once bolum dogrulugu, sonra release gelir. Release bulunamasa da en iyi bolum satirini kullan.
|
||||
const tvFallback = selectedPool
|
||||
.sort((a, b) => b.trScore - a.trScore || b.downloadCount - a.downloadCount || b.score - a.score)[0];
|
||||
if (tvFallback) {
|
||||
return { picked: { ...tvFallback, strategy: 'fallback' } };
|
||||
}
|
||||
}
|
||||
|
||||
const fallback = selectedPool
|
||||
.sort((a, b) => b.trScore - a.trScore || b.downloadCount - a.downloadCount || b.score - a.score)[0];
|
||||
if (!fallback) return { noMatchReason: 'release_not_matched' };
|
||||
return { picked: { ...fallback, strategy: 'fallback' } };
|
||||
}
|
||||
|
||||
export async function searchTurkceAltyaziReal(params: SearchParams): Promise<RealTaCandidate[]> {
|
||||
if (params.type !== 'movie') return [];
|
||||
const q = buildFindQuery(params);
|
||||
if (!q) return [];
|
||||
|
||||
@@ -241,24 +395,51 @@ export async function searchTurkceAltyaziReal(params: SearchParams): Promise<Rea
|
||||
const pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl);
|
||||
if (!pickedMovie) {
|
||||
taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', { title: params.title, year: params.year, query: q });
|
||||
return [];
|
||||
throw new PipelineError({
|
||||
code: 'TA_MOVIE_NOT_MATCHED',
|
||||
message: `Movie not matched on search list (title=${params.title}, year=${params.year ?? 'n/a'})`,
|
||||
category: 'parse',
|
||||
retryable: false,
|
||||
httpStatus: 422
|
||||
});
|
||||
}
|
||||
taInfo('TA_MOVIE_SELECTED', 'Movie detail page selected', { movieUrl: pickedMovie.movieUrl, movieTitle: pickedMovie.movieTitle });
|
||||
|
||||
await sleep(env.turkcealtyaziMinDelayMs);
|
||||
const movieRes = await getWithRetry(pickedMovie.movieUrl, 2, cookies);
|
||||
mergeCookies(cookies, movieRes.setCookie);
|
||||
const pickedSub = pickSubPageFromMovieDetail(movieRes.body, pickedMovie.movieUrl, params);
|
||||
if (!pickedSub) {
|
||||
const subPick = pickSubPageFromMovieDetail(movieRes.body, pickedMovie.movieUrl, params);
|
||||
if (!subPick.picked) {
|
||||
taInfo('TA_SEARCH_RESULT', 'Subtitle sub-page not matched by release', {
|
||||
movieUrl: pickedMovie.movieUrl,
|
||||
release: params.release
|
||||
release: params.release,
|
||||
season: params.season,
|
||||
episode: params.episode,
|
||||
reason: subPick.noMatchReason
|
||||
});
|
||||
if (subPick.noMatchReason === 'episode_not_matched') {
|
||||
throw new PipelineError({
|
||||
code: 'TA_EPISODE_NOT_MATCHED',
|
||||
message: `Episode not matched on detail page (S${String(params.season ?? '').padStart(2, '0')}E${String(params.episode ?? '').padStart(2, '0')})`,
|
||||
category: 'parse',
|
||||
retryable: false,
|
||||
httpStatus: 422
|
||||
});
|
||||
}
|
||||
throw new PipelineError({
|
||||
code: 'TA_RELEASE_NOT_MATCHED',
|
||||
message: `Release not matched on movie detail page (release=${params.release ?? 'n/a'})`,
|
||||
category: 'parse',
|
||||
retryable: false,
|
||||
httpStatus: 422
|
||||
});
|
||||
return [];
|
||||
}
|
||||
const pickedSub = subPick.picked;
|
||||
taInfo('TA_SUB_SELECTED', 'Subtitle sub-page selected', {
|
||||
subUrl: pickedSub.subUrl,
|
||||
releaseHints: pickedSub.releaseHints
|
||||
releaseHints: pickedSub.releaseHints,
|
||||
strategy: pickedSub.strategy,
|
||||
isPackage: pickedSub.isPackage === true
|
||||
});
|
||||
|
||||
const id = `ta-real-${Buffer.from(pickedSub.subUrl).toString('base64').slice(0, 18)}`;
|
||||
@@ -269,7 +450,9 @@ export async function searchTurkceAltyaziReal(params: SearchParams): Promise<Rea
|
||||
lang: 'tr',
|
||||
releaseHints: pickedSub.releaseHints,
|
||||
isHI: pickedSub.isHI,
|
||||
isForced: false
|
||||
isForced: false,
|
||||
strategy: pickedSub.strategy,
|
||||
isPackage: pickedSub.isPackage === true
|
||||
}];
|
||||
taInfo('TA_SEARCH_RESULT', 'TurkceAltyazi search completed', { candidateCount: result.length, subUrl: pickedSub.subUrl });
|
||||
return result;
|
||||
@@ -292,7 +475,7 @@ async function postIndWithRetry(subPageUrl: string, payload: { idid: string; alt
|
||||
let lastError: unknown;
|
||||
for (let i = 0; i <= retries; i++) {
|
||||
try {
|
||||
if (i > 0) await sleep(250 * i);
|
||||
if (i > 0) await sleepMs(computeBackoffMs(i));
|
||||
const form = new URLSearchParams(payload).toString();
|
||||
const indUrl = `${env.turkcealtyaziBaseUrl}/ind`;
|
||||
taInfo('TA_IND_POST_START', 'POST /ind started', { subPageUrl, indUrl, attempt: i + 1, retries: retries + 1, altid: payload.altid });
|
||||
@@ -313,8 +496,10 @@ async function postIndWithRetry(subPageUrl: string, payload: { idid: string; alt
|
||||
contentType: res.headers['content-type']
|
||||
};
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
taError('TA_IND_POST_FAILED', err, { subPageUrl, attempt: i + 1, retries: retries + 1 });
|
||||
const pe = toPipelineError(err, 'TA_IND_POST_FAILED');
|
||||
lastError = pe;
|
||||
taError('TA_IND_POST_FAILED', pe, { subPageUrl, attempt: i + 1, retries: retries + 1, code: pe.code, retryable: pe.retryable });
|
||||
if (!pe.retryable) throw pe;
|
||||
}
|
||||
}
|
||||
throw lastError;
|
||||
@@ -330,7 +515,13 @@ export async function downloadTurkceAltyaziFile(subPageUrl: string): Promise<{ b
|
||||
mergeCookies(cookies, subPageRes.setCookie);
|
||||
const form = parseDownloadForm(subPageRes.body);
|
||||
if (!form) {
|
||||
const err = new Error('TA sub page download form parse failed');
|
||||
const err = new PipelineError({
|
||||
code: 'TA_FORM_PARSE_FAILED',
|
||||
message: 'TA sub page download form parse failed',
|
||||
category: 'parse',
|
||||
retryable: false,
|
||||
httpStatus: 422
|
||||
});
|
||||
taError('TA_FORM_PARSE_FAILED', err, { subPageUrl });
|
||||
throw err;
|
||||
}
|
||||
|
||||
@@ -10,8 +10,52 @@ export function isProbablyText(buffer: Buffer): boolean {
|
||||
|
||||
export function validateSrt(text: string): boolean {
|
||||
const lines = text.split(/\r?\n/);
|
||||
const tc = lines.filter((l) => /^\d{2}:\d{2}:\d{2},\d{3}\s-->\s\d{2}:\d{2}:\d{2},\d{3}$/.test(l.trim()));
|
||||
return tc.length >= 3;
|
||||
const tcIndexes: number[] = [];
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
if (/^\d{2}:\d{2}:\d{2},\d{3}\s-->\s\d{2}:\d{2}:\d{2},\d{3}$/.test(lines[i].trim())) {
|
||||
tcIndexes.push(i);
|
||||
}
|
||||
}
|
||||
if (tcIndexes.length < 3) return false;
|
||||
|
||||
let prevStart = -1;
|
||||
let prevIndex = -1;
|
||||
let malformed = 0;
|
||||
|
||||
for (const idx of tcIndexes) {
|
||||
const seq = (lines[idx - 1] || '').trim();
|
||||
if (!/^\d+$/.test(seq)) malformed += 1;
|
||||
|
||||
const m = lines[idx].trim().match(
|
||||
/^(\d{2}):(\d{2}):(\d{2}),(\d{3})\s-->\s(\d{2}):(\d{2}):(\d{2}),(\d{3})$/
|
||||
);
|
||||
if (!m) {
|
||||
malformed += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
const start =
|
||||
Number(m[1]) * 3600000 +
|
||||
Number(m[2]) * 60000 +
|
||||
Number(m[3]) * 1000 +
|
||||
Number(m[4]);
|
||||
const end =
|
||||
Number(m[5]) * 3600000 +
|
||||
Number(m[6]) * 60000 +
|
||||
Number(m[7]) * 1000 +
|
||||
Number(m[8]);
|
||||
|
||||
if (end <= start) malformed += 1;
|
||||
|
||||
const seqNum = Number(seq);
|
||||
if (prevIndex !== -1 && seqNum <= prevIndex) malformed += 1;
|
||||
if (prevStart !== -1 && start < prevStart) malformed += 1;
|
||||
|
||||
prevIndex = seqNum;
|
||||
prevStart = start;
|
||||
}
|
||||
|
||||
return malformed / tcIndexes.length < 0.35;
|
||||
}
|
||||
|
||||
export function validateAss(text: string): boolean {
|
||||
|
||||
@@ -7,15 +7,84 @@ import {
|
||||
searchTurkceAltyaziReal
|
||||
} from '../lib/turkcealtyaziReal.js';
|
||||
import { taError, taInfo } from '../lib/taLog.js';
|
||||
import { detectSubtitleType, isProbablyText } from '../lib/validators.js';
|
||||
import { PipelineError } from '../lib/errors.js';
|
||||
|
||||
function extensionFromDownload(url: string, contentType?: string): 'zip' | 'rar' | '7z' | 'srt' | 'ass' {
|
||||
const lowerUrl = url.toLowerCase();
|
||||
if (lowerUrl.includes('.zip')) return 'zip';
|
||||
if (lowerUrl.includes('.rar')) return 'rar';
|
||||
if (lowerUrl.includes('.7z')) return '7z';
|
||||
if (lowerUrl.includes('.ass')) return 'ass';
|
||||
if (contentType?.includes('zip')) return 'zip';
|
||||
return 'srt';
|
||||
function hasPrefix(buf: Buffer, sig: number[]): boolean {
|
||||
if (buf.length < sig.length) return false;
|
||||
return sig.every((b, i) => buf[i] === b);
|
||||
}
|
||||
|
||||
function classifyDownloadedPayload(
|
||||
buffer: Buffer,
|
||||
finalUrl: string,
|
||||
contentType?: string
|
||||
): { type: 'archive' | 'direct'; ext: 'zip' | 'rar' | '7z' | 'srt' | 'ass'; reason: string } {
|
||||
const ct = (contentType || '').toLowerCase();
|
||||
const url = finalUrl.toLowerCase();
|
||||
|
||||
if (
|
||||
hasPrefix(buffer, [0x50, 0x4b, 0x03, 0x04]) ||
|
||||
hasPrefix(buffer, [0x50, 0x4b, 0x05, 0x06]) ||
|
||||
hasPrefix(buffer, [0x50, 0x4b, 0x07, 0x08]) ||
|
||||
ct.includes('zip') ||
|
||||
url.includes('.zip')
|
||||
) {
|
||||
return { type: 'archive', ext: 'zip', reason: 'zip signature/content-type/url' };
|
||||
}
|
||||
|
||||
if (
|
||||
hasPrefix(buffer, [0x52, 0x61, 0x72, 0x21, 0x1a, 0x07, 0x00]) ||
|
||||
hasPrefix(buffer, [0x52, 0x61, 0x72, 0x21, 0x1a, 0x07, 0x01, 0x00]) ||
|
||||
ct.includes('rar') ||
|
||||
url.includes('.rar')
|
||||
) {
|
||||
return { type: 'archive', ext: 'rar', reason: 'rar signature/content-type/url' };
|
||||
}
|
||||
|
||||
if (
|
||||
hasPrefix(buffer, [0x37, 0x7a, 0xbc, 0xaf, 0x27, 0x1c]) ||
|
||||
ct.includes('7z') ||
|
||||
url.includes('.7z')
|
||||
) {
|
||||
return { type: 'archive', ext: '7z', reason: '7z signature/content-type/url' };
|
||||
}
|
||||
|
||||
if (isProbablyText(buffer)) {
|
||||
const utf8 = buffer.toString('utf8');
|
||||
const latin1 = buffer.toString('latin1');
|
||||
const ext = detectSubtitleType(utf8) || detectSubtitleType(latin1);
|
||||
if (ext) {
|
||||
return { type: 'direct', ext, reason: 'text + subtitle format detected' };
|
||||
}
|
||||
|
||||
const probe = utf8.slice(0, 2000).toLowerCase();
|
||||
if (/<html|<!doctype|<body|cloudflare|captcha|attention required|just a moment|ddos/i.test(probe)) {
|
||||
throw new PipelineError({
|
||||
code: 'INVALID_SUBTITLE_HTML_PAYLOAD',
|
||||
message: 'TA download returned HTML/challenge payload instead of subtitle',
|
||||
category: 'invalid-subtitle',
|
||||
retryable: false,
|
||||
httpStatus: 422
|
||||
});
|
||||
}
|
||||
|
||||
throw new PipelineError({
|
||||
code: 'INVALID_SUBTITLE_TEXT_PAYLOAD',
|
||||
message: 'TA download returned text payload but subtitle format is invalid',
|
||||
category: 'invalid-subtitle',
|
||||
retryable: false,
|
||||
httpStatus: 422
|
||||
});
|
||||
}
|
||||
|
||||
throw new PipelineError({
|
||||
code: 'INVALID_SUBTITLE_BINARY_PAYLOAD',
|
||||
message: 'TA download returned binary payload with unknown signature',
|
||||
category: 'invalid-subtitle',
|
||||
retryable: false,
|
||||
httpStatus: 422
|
||||
});
|
||||
}
|
||||
|
||||
export class TurkceAltyaziProvider implements SubtitleProvider {
|
||||
@@ -38,11 +107,23 @@ export class TurkceAltyaziProvider implements SubtitleProvider {
|
||||
downloadUrl: item.detailUrl,
|
||||
lang: item.lang || 'tr',
|
||||
releaseHints: item.releaseHints,
|
||||
scoreHints: ['real_provider'],
|
||||
scoreHints: [
|
||||
'real_provider',
|
||||
item.strategy ? `ta_strategy_${item.strategy}` : 'ta_strategy_default',
|
||||
item.isPackage ? 'ta_package_candidate' : 'ta_single_candidate'
|
||||
],
|
||||
isHI: item.isHI,
|
||||
isForced: item.isForced
|
||||
}));
|
||||
} catch (err) {
|
||||
if (err instanceof PipelineError && (err.code === 'TA_MOVIE_NOT_MATCHED' || err.code === 'TA_RELEASE_NOT_MATCHED' || err.code === 'TA_EPISODE_NOT_MATCHED')) {
|
||||
taInfo('TA_PROVIDER_SEARCH_RESULT', 'Provider search completed with no match', {
|
||||
candidateCount: 0,
|
||||
reason: err.message,
|
||||
code: err.code
|
||||
});
|
||||
return [];
|
||||
}
|
||||
taError('TA_PROVIDER_SEARCH_FAILED', err, { title: params.title, year: params.year, release: params.release });
|
||||
throw err;
|
||||
}
|
||||
@@ -50,7 +131,13 @@ export class TurkceAltyaziProvider implements SubtitleProvider {
|
||||
|
||||
async download(candidate: Candidate, _params: SearchParams, jobToken: string): Promise<DownloadedArtifact> {
|
||||
if (!/^https?:\/\//i.test(candidate.downloadUrl)) {
|
||||
throw new Error('TurkceAltyazi candidate download URL must be http(s)');
|
||||
throw new PipelineError({
|
||||
code: 'TA_INVALID_DOWNLOAD_URL',
|
||||
message: 'TurkceAltyazi candidate download URL must be http(s)',
|
||||
category: 'parse',
|
||||
retryable: false,
|
||||
httpStatus: 422
|
||||
});
|
||||
}
|
||||
|
||||
const downloadDir = `${env.tempRoot}/${jobToken}/download`;
|
||||
@@ -66,11 +153,18 @@ export class TurkceAltyaziProvider implements SubtitleProvider {
|
||||
trace.push({ level: 'info', step: 'TA_SUB_PAGE_FETCHED', message: candidate.downloadUrl });
|
||||
const downloaded = await downloadTurkceAltyaziFile(candidate.downloadUrl);
|
||||
trace.push({ level: 'info', step: 'TA_IND_POST_DONE', message: downloaded.finalUrl });
|
||||
const ext = extensionFromDownload(downloaded.finalUrl, downloaded.contentType);
|
||||
const detected = classifyDownloadedPayload(downloaded.buffer, downloaded.finalUrl, downloaded.contentType);
|
||||
const ext = detected.ext;
|
||||
const filePath = path.join(downloadDir, `${candidate.id}.${ext}`);
|
||||
await fs.writeFile(filePath, downloaded.buffer);
|
||||
trace.push({
|
||||
level: 'info',
|
||||
step: 'TA_DOWNLOAD_PAYLOAD_CLASSIFIED',
|
||||
message: `${detected.type}:${detected.ext}`,
|
||||
meta: { reason: detected.reason, contentType: downloaded.contentType, finalUrl: downloaded.finalUrl }
|
||||
});
|
||||
|
||||
const type: 'direct' | 'archive' = ext === 'srt' || ext === 'ass' ? 'direct' : 'archive';
|
||||
const type: 'direct' | 'archive' = detected.type;
|
||||
taInfo('TA_PROVIDER_DOWNLOAD_RESULT', 'Provider download completed', {
|
||||
candidateId: candidate.id,
|
||||
filePath,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { FastifyInstance } from 'fastify';
|
||||
import { z } from 'zod';
|
||||
import { chooseSubtitle, cleanupJobToken, searchSubtitles } from '../lib/subtitleEngine.js';
|
||||
import { toPipelineError } from '../lib/errors.js';
|
||||
|
||||
const SearchSchema = z.object({
|
||||
jobToken: z.string().optional(),
|
||||
@@ -20,6 +21,11 @@ const SearchSchema = z.object({
|
||||
maxTotalBytes: z.number().min(1024),
|
||||
maxSingleBytes: z.number().min(1024)
|
||||
})
|
||||
.optional(),
|
||||
features: z
|
||||
.object({
|
||||
clamavEnabled: z.boolean().optional()
|
||||
})
|
||||
.optional()
|
||||
});
|
||||
|
||||
@@ -40,7 +46,22 @@ export async function subtitleRoutes(app: FastifyInstance): Promise<void> {
|
||||
const result = await searchSubtitles(parsed.data);
|
||||
return result;
|
||||
} catch (err: any) {
|
||||
return reply.status(500).send({ status: 'ERROR', message: err.message, trace: [{ level: 'error', step: 'JOB_ERROR', message: err.message }] });
|
||||
const pe = toPipelineError(err);
|
||||
return reply.status(pe.httpStatus).send({
|
||||
status: 'ERROR',
|
||||
code: pe.code,
|
||||
category: pe.category,
|
||||
retryable: pe.retryable,
|
||||
message: pe.message,
|
||||
trace: [
|
||||
{
|
||||
level: 'error',
|
||||
step: 'JOB_ERROR',
|
||||
message: pe.message,
|
||||
meta: { code: pe.code, category: pe.category, retryable: pe.retryable }
|
||||
}
|
||||
]
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -15,6 +15,9 @@ export interface SearchParams {
|
||||
maxTotalBytes: number;
|
||||
maxSingleBytes: number;
|
||||
};
|
||||
features?: {
|
||||
clamavEnabled?: boolean;
|
||||
};
|
||||
}
|
||||
|
||||
export interface Candidate {
|
||||
|
||||
Reference in New Issue
Block a user