feat(api): turkcealtyazi aramasına sayfalama desteği ekle

Arama sonuçlarının birden fazla sayfa taranabilmesi için sayfalama
mekanizması eklendi. İlk sayfadan maksimum sayfa sayısı keşfedilir ve
her sayfa taranarak eşleşen film aranır. Sayfalar arası bekleme süresi
korunur ve boş sayfalarda işlem durdurulur. Maksimum 10 sayfa sınırı
eklendi.
This commit is contained in:
2026-02-16 14:47:45 +03:00
parent 14c64d8032
commit 2c60c669c0

View File

@@ -149,7 +149,31 @@ function buildFindQuery(params: SearchParams): string {
return queryTokens.join(' ');
}
function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null {
function buildSearchUrl(query: string, page: number): string {
if (page <= 1) return `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(query)}`;
return `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(query)}&p=${page}`;
}
function parseSearchMaxPage(html: string, baseUrl: string): number {
const $ = cheerio.load(html);
let maxPage = 1;
$('a[href]').each((_, el) => {
const href = ($(el).attr('href') || '').trim();
if (!href) return;
let parsedUrl: URL | null = null;
try {
parsedUrl = new URL(href, baseUrl);
} catch {
return;
}
if (parsedUrl.pathname !== '/find.php') return;
const p = Number(parsedUrl.searchParams.get('p') || '1');
if (Number.isFinite(p) && p > maxPage) maxPage = p;
});
return maxPage;
}
function extractMovieLinksFromSearch(html: string, params: SearchParams, baseUrl: string): Array<{ url: string; title: string; year?: number; score: number }> {
const $ = cheerio.load(html);
const wantedYear = params.year;
const wantedTitleTokens = tokenize(params.title);
@@ -207,7 +231,12 @@ function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: st
if (!prev || item.score > prev.score) dedup.set(item.url, item);
}
const ordered = [...dedup.values()].sort((a, b) => b.score - a.score);
return [...dedup.values()].sort((a, b) => b.score - a.score);
}
function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null {
const wantedYear = params.year;
const ordered = extractMovieLinksFromSearch(html, params, baseUrl);
if (ordered.length === 0) return null;
const best = ordered[0];
@@ -378,23 +407,57 @@ export async function searchTurkceAltyaziReal(params: SearchParams): Promise<Rea
const q = buildFindQuery(params);
if (!q) return [];
const searchUrl = `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(q)}`;
const firstSearchUrl = buildSearchUrl(q, 1);
const cookies = new Map<string, string>();
taInfo('TA_SEARCH_START', 'TurkceAltyazi search started', {
title: params.title,
year: params.year,
release: params.release,
query: q,
searchUrl
searchUrl: firstSearchUrl
});
try {
const hardMaxPages = 10;
let scannedPages = 0;
let discoveredMaxPages = 1;
let pickedMovie: { movieUrl: string; movieTitle: string } | null = null;
for (let page = 1; page <= Math.min(discoveredMaxPages, hardMaxPages); page++) {
const searchUrl = buildSearchUrl(q, page);
await sleep(env.turkcealtyaziMinDelayMs);
const searchRes = await getWithRetry(searchUrl, 2, cookies);
mergeCookies(cookies, searchRes.setCookie);
const pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl);
scannedPages += 1;
if (page === 1) {
discoveredMaxPages = Math.max(1, parseSearchMaxPage(searchRes.body, env.turkcealtyaziBaseUrl));
}
const pageLinks = extractMovieLinksFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl);
taInfo('TA_SEARCH_PAGE_SCANNED', 'TurkceAltyazi search page scanned', {
page,
pageLinks: pageLinks.length,
discoveredMaxPages
});
// TA may return HTTP 200 with an empty list for out-of-range pages.
if (pageLinks.length === 0 && page > 1) {
taInfo('TA_SEARCH_PAGE_EMPTY_STOP', 'Search page has empty list, stopping pagination', { page });
break;
}
pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl);
if (pickedMovie) break;
}
if (!pickedMovie) {
taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', { title: params.title, year: params.year, query: q });
taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', {
title: params.title,
year: params.year,
query: q,
scannedPages
});
throw new PipelineError({
code: 'TA_MOVIE_NOT_MATCHED',
message: `Movie not matched on search list (title=${params.title}, year=${params.year ?? 'n/a'})`,