feat: altyazı otomasyon sistemi MVP'sini ekle
Docker tabanlı mikro servis mimarisi ile altyazı otomasyon sistemi altyapısı kuruldu. - Core (Node.js): Chokidar dosya izleyici, BullMQ iş kuyrukları, ffprobe medya analizi, MongoDB entegrasyonu ve dosya yazma işlemleri. - API (Fastify): Mock sağlayıcılar, arşiv güvenliği (zip-slip), altyazı doğrulama, puanlama ve aday seçim motoru. - UI (React/Vite): İş yönetimi paneli, canlı SSE log akışı, manuel inceleme arayüzü ve sistem ayarları. - Altyapı: Docker Compose (dev/prod), Redis, Mongo ve çevresel değişken yapılandırmaları.
This commit is contained in:
200
services/api/src/lib/subtitleEngine.ts
Normal file
200
services/api/src/lib/subtitleEngine.ts
Normal file
@@ -0,0 +1,200 @@
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { execFile } from 'node:child_process';
|
||||
import { promisify } from 'node:util';
|
||||
import fse from 'fs-extra';
|
||||
import { env } from '../config/env.js';
|
||||
import type { SearchParams, TraceLog } from '../types/index.js';
|
||||
import { SubtitleProvider, Candidate } from '../types/index.js';
|
||||
import { TurkceAltyaziProvider } from '../providers/TurkceAltyaziProvider.js';
|
||||
import { OpenSubtitlesProvider } from '../providers/OpenSubtitlesProvider.js';
|
||||
import { collectFilesRecursive, ensureInsideRoot, validateExtractionLimits } from './security.js';
|
||||
import { detectSubtitleType, isProbablyText } from './validators.js';
|
||||
import { chooseBest, scoreCandidateFile } from './scoring.js';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
const providers: SubtitleProvider[] = [new TurkceAltyaziProvider(), new OpenSubtitlesProvider()];
|
||||
|
||||
function defaultLimits() {
|
||||
return { maxFiles: 300, maxTotalBytes: 250 * 1024 * 1024, maxSingleBytes: 10 * 1024 * 1024 };
|
||||
}
|
||||
|
||||
async function ensureJobDirs(jobToken: string) {
|
||||
const base = path.join(env.tempRoot, jobToken);
|
||||
const download = path.join(base, 'download');
|
||||
const extracted = path.join(base, 'extracted');
|
||||
await fs.mkdir(download, { recursive: true });
|
||||
await fs.mkdir(extracted, { recursive: true });
|
||||
return { base, download, extracted };
|
||||
}
|
||||
|
||||
async function extractArchive(archivePath: string, extractedDir: string, trace: TraceLog[]): Promise<string[]> {
|
||||
trace.push({ level: 'info', step: 'EXTRACT_STARTED', message: archivePath });
|
||||
await execFileAsync('7z', ['x', '-y', archivePath, `-o${extractedDir}`]);
|
||||
const files = await collectFilesRecursive(extractedDir);
|
||||
trace.push({ level: 'info', step: 'EXTRACT_DONE', message: `Extracted ${files.length} files` });
|
||||
return files;
|
||||
}
|
||||
|
||||
export async function searchSubtitles(input: SearchParams) {
|
||||
const jobToken = input.jobToken ?? `job-${Date.now()}`;
|
||||
const trace: TraceLog[] = [];
|
||||
const limits = input.securityLimits ?? defaultLimits();
|
||||
const dirs = await ensureJobDirs(jobToken);
|
||||
|
||||
const allCandidates: Candidate[] = [];
|
||||
for (const p of providers) {
|
||||
const c = await p.search(input);
|
||||
allCandidates.push(...c);
|
||||
}
|
||||
|
||||
const scored: any[] = [];
|
||||
|
||||
for (const candidate of allCandidates) {
|
||||
const provider = providers.find((p: any) => p.constructor.name.toLowerCase().includes(candidate.provider === 'turkcealtyazi' ? 'turkce' : 'open'));
|
||||
if (!provider) continue;
|
||||
|
||||
const dl = await provider.download(candidate, input, jobToken);
|
||||
trace.push({ level: 'info', step: 'ARCHIVE_DOWNLOADED', message: `${candidate.provider}:${candidate.id}`, meta: { path: dl.filePath, type: dl.type } });
|
||||
|
||||
let files: string[] = [];
|
||||
if (dl.type === 'archive') {
|
||||
const perCandidateExtractDir = path.join(dirs.extracted, candidate.id);
|
||||
await fs.mkdir(perCandidateExtractDir, { recursive: true });
|
||||
files = await extractArchive(dl.filePath, perCandidateExtractDir, trace);
|
||||
|
||||
for (const file of files) {
|
||||
const inside = await ensureInsideRoot(perCandidateExtractDir, file);
|
||||
if (!inside) {
|
||||
trace.push({ level: 'warn', step: 'ZIPSLIP_REJECTED', message: `Rejected path traversal candidate: ${file}` });
|
||||
await fse.remove(file);
|
||||
}
|
||||
}
|
||||
|
||||
files = await collectFilesRecursive(perCandidateExtractDir);
|
||||
const lim = await validateExtractionLimits(files, limits);
|
||||
if (!lim.ok) {
|
||||
trace.push({ level: 'warn', step: 'LIMIT_REJECTED', message: lim.reason ?? 'limit rejected' });
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
files = [dl.filePath];
|
||||
}
|
||||
|
||||
for (const file of files) {
|
||||
const buf = await fs.readFile(file);
|
||||
if (!isProbablyText(buf)) {
|
||||
await fse.remove(file);
|
||||
trace.push({ level: 'warn', step: 'INVALID_SUBTITLE_DELETED', message: `Deleted binary/invalid: ${file}` });
|
||||
continue;
|
||||
}
|
||||
|
||||
const text = buf.toString('utf8');
|
||||
const ext = detectSubtitleType(text);
|
||||
if (!ext) {
|
||||
await fse.remove(file);
|
||||
trace.push({ level: 'warn', step: 'INVALID_SUBTITLE_DELETED', message: `Deleted unknown subtitle content: ${file}` });
|
||||
continue;
|
||||
}
|
||||
|
||||
const s = scoreCandidateFile(file, ext, candidate, input);
|
||||
if (s) scored.push(s);
|
||||
}
|
||||
}
|
||||
|
||||
trace.push({ level: 'info', step: 'CANDIDATES_SCANNED', message: `Scored ${scored.length} subtitle files` });
|
||||
|
||||
const decision = chooseBest(scored);
|
||||
const manifestPath = path.join(dirs.base, 'manifest.json');
|
||||
await fs.writeFile(manifestPath, JSON.stringify({ jobToken, input, scored: decision.candidates }, null, 2), 'utf8');
|
||||
|
||||
if (decision.status === 'FOUND' && decision.best) {
|
||||
const bestPath = path.join(dirs.base, `best.${decision.best.ext}`);
|
||||
await fs.copyFile(decision.best.filePath, bestPath);
|
||||
trace.push({ level: 'info', step: 'BEST_SELECTED', message: `Selected ${decision.best.filePath}`, meta: { score: decision.best.score } });
|
||||
|
||||
return {
|
||||
status: 'FOUND',
|
||||
jobToken,
|
||||
bestPath,
|
||||
confidence: decision.confidence,
|
||||
source: decision.best.provider,
|
||||
candidates: decision.candidates,
|
||||
trace
|
||||
};
|
||||
}
|
||||
|
||||
if (decision.status === 'AMBIGUOUS') {
|
||||
trace.push({ level: 'warn', step: 'AMBIGUOUS_NEEDS_REVIEW', message: 'Top candidates too close' });
|
||||
return {
|
||||
status: 'AMBIGUOUS',
|
||||
jobToken,
|
||||
confidence: 0.5,
|
||||
source: 'multi',
|
||||
candidates: decision.candidates,
|
||||
trace
|
||||
};
|
||||
}
|
||||
|
||||
trace.push({ level: 'warn', step: 'NOT_FOUND_NEEDS_REVIEW', message: 'No valid subtitle file found' });
|
||||
return {
|
||||
status: 'NOT_FOUND',
|
||||
jobToken,
|
||||
confidence: 0,
|
||||
source: 'none',
|
||||
candidates: [],
|
||||
trace
|
||||
};
|
||||
}
|
||||
|
||||
export async function chooseSubtitle(jobToken: string, chosenCandidateId?: string, chosenPath?: string) {
|
||||
const base = path.join(env.tempRoot, jobToken);
|
||||
const manifestPath = path.join(base, 'manifest.json');
|
||||
const raw = await fs.readFile(manifestPath, 'utf8');
|
||||
const manifest = JSON.parse(raw);
|
||||
const list = manifest.scored ?? [];
|
||||
|
||||
const found = chosenPath
|
||||
? list.find((x: any) => x.filePath === chosenPath || x.id === chosenPath)
|
||||
: list.find((x: any) => x.id === chosenCandidateId || x.candidateId === chosenCandidateId);
|
||||
|
||||
if (!found) {
|
||||
return { status: 'NOT_FOUND', message: 'Chosen candidate not found' };
|
||||
}
|
||||
|
||||
const bestPath = path.join(base, `best.${found.ext}`);
|
||||
await fs.copyFile(found.filePath, bestPath);
|
||||
|
||||
return {
|
||||
status: 'FOUND',
|
||||
bestPath,
|
||||
confidence: Math.max(0.5, Math.min(0.98, found.score / 130)),
|
||||
source: found.provider
|
||||
};
|
||||
}
|
||||
|
||||
export async function cleanupJobToken(jobToken: string) {
|
||||
const dir = path.join(env.tempRoot, jobToken);
|
||||
await fse.remove(dir);
|
||||
}
|
||||
|
||||
export async function cleanupOldTemp(hours = 24): Promise<number> {
|
||||
await fs.mkdir(env.tempRoot, { recursive: true });
|
||||
const entries = await fs.readdir(env.tempRoot, { withFileTypes: true });
|
||||
const now = Date.now();
|
||||
let count = 0;
|
||||
|
||||
for (const e of entries) {
|
||||
if (!e.isDirectory()) continue;
|
||||
const p = path.join(env.tempRoot, e.name);
|
||||
const st = await fs.stat(p);
|
||||
const ageHours = (now - st.mtimeMs) / 1000 / 3600;
|
||||
if (ageHours > hours) {
|
||||
await fse.remove(p);
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
Reference in New Issue
Block a user