feat(api): turkcealtyazi aramasına sayfalama desteği ekle

Arama sonuçlarının birden fazla sayfa taranabilmesi için sayfalama
mekanizması eklendi. İlk sayfadan maksimum sayfa sayısı keşfedilir ve
her sayfa taranarak eşleşen film aranır. Sayfalar arası bekleme süresi
korunur ve boş sayfalarda işlem durdurulur. Maksimum 10 sayfa sınırı
eklendi.
This commit is contained in:
2026-02-16 14:47:45 +03:00
parent 14c64d8032
commit 2c60c669c0

View File

@@ -149,7 +149,31 @@ function buildFindQuery(params: SearchParams): string {
return queryTokens.join(' '); return queryTokens.join(' ');
} }
function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null { function buildSearchUrl(query: string, page: number): string {
if (page <= 1) return `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(query)}`;
return `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(query)}&p=${page}`;
}
function parseSearchMaxPage(html: string, baseUrl: string): number {
const $ = cheerio.load(html);
let maxPage = 1;
$('a[href]').each((_, el) => {
const href = ($(el).attr('href') || '').trim();
if (!href) return;
let parsedUrl: URL | null = null;
try {
parsedUrl = new URL(href, baseUrl);
} catch {
return;
}
if (parsedUrl.pathname !== '/find.php') return;
const p = Number(parsedUrl.searchParams.get('p') || '1');
if (Number.isFinite(p) && p > maxPage) maxPage = p;
});
return maxPage;
}
function extractMovieLinksFromSearch(html: string, params: SearchParams, baseUrl: string): Array<{ url: string; title: string; year?: number; score: number }> {
const $ = cheerio.load(html); const $ = cheerio.load(html);
const wantedYear = params.year; const wantedYear = params.year;
const wantedTitleTokens = tokenize(params.title); const wantedTitleTokens = tokenize(params.title);
@@ -207,7 +231,12 @@ function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: st
if (!prev || item.score > prev.score) dedup.set(item.url, item); if (!prev || item.score > prev.score) dedup.set(item.url, item);
} }
const ordered = [...dedup.values()].sort((a, b) => b.score - a.score); return [...dedup.values()].sort((a, b) => b.score - a.score);
}
function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null {
const wantedYear = params.year;
const ordered = extractMovieLinksFromSearch(html, params, baseUrl);
if (ordered.length === 0) return null; if (ordered.length === 0) return null;
const best = ordered[0]; const best = ordered[0];
@@ -378,23 +407,57 @@ export async function searchTurkceAltyaziReal(params: SearchParams): Promise<Rea
const q = buildFindQuery(params); const q = buildFindQuery(params);
if (!q) return []; if (!q) return [];
const searchUrl = `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(q)}`; const firstSearchUrl = buildSearchUrl(q, 1);
const cookies = new Map<string, string>(); const cookies = new Map<string, string>();
taInfo('TA_SEARCH_START', 'TurkceAltyazi search started', { taInfo('TA_SEARCH_START', 'TurkceAltyazi search started', {
title: params.title, title: params.title,
year: params.year, year: params.year,
release: params.release, release: params.release,
query: q, query: q,
searchUrl searchUrl: firstSearchUrl
}); });
try { try {
const hardMaxPages = 10;
let scannedPages = 0;
let discoveredMaxPages = 1;
let pickedMovie: { movieUrl: string; movieTitle: string } | null = null;
for (let page = 1; page <= Math.min(discoveredMaxPages, hardMaxPages); page++) {
const searchUrl = buildSearchUrl(q, page);
await sleep(env.turkcealtyaziMinDelayMs); await sleep(env.turkcealtyaziMinDelayMs);
const searchRes = await getWithRetry(searchUrl, 2, cookies); const searchRes = await getWithRetry(searchUrl, 2, cookies);
mergeCookies(cookies, searchRes.setCookie); mergeCookies(cookies, searchRes.setCookie);
const pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl); scannedPages += 1;
if (page === 1) {
discoveredMaxPages = Math.max(1, parseSearchMaxPage(searchRes.body, env.turkcealtyaziBaseUrl));
}
const pageLinks = extractMovieLinksFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl);
taInfo('TA_SEARCH_PAGE_SCANNED', 'TurkceAltyazi search page scanned', {
page,
pageLinks: pageLinks.length,
discoveredMaxPages
});
// TA may return HTTP 200 with an empty list for out-of-range pages.
if (pageLinks.length === 0 && page > 1) {
taInfo('TA_SEARCH_PAGE_EMPTY_STOP', 'Search page has empty list, stopping pagination', { page });
break;
}
pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl);
if (pickedMovie) break;
}
if (!pickedMovie) { if (!pickedMovie) {
taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', { title: params.title, year: params.year, query: q }); taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', {
title: params.title,
year: params.year,
query: q,
scannedPages
});
throw new PipelineError({ throw new PipelineError({
code: 'TA_MOVIE_NOT_MATCHED', code: 'TA_MOVIE_NOT_MATCHED',
message: `Movie not matched on search list (title=${params.title}, year=${params.year ?? 'n/a'})`, message: `Movie not matched on search list (title=${params.title}, year=${params.year ?? 'n/a'})`,