feat(api): turkcealtyazi aramasına sayfalama desteği ekle
Arama sonuçlarının birden fazla sayfa taranabilmesi için sayfalama mekanizması eklendi. İlk sayfadan maksimum sayfa sayısı keşfedilir ve her sayfa taranarak eşleşen film aranır. Sayfalar arası bekleme süresi korunur ve boş sayfalarda işlem durdurulur. Maksimum 10 sayfa sınırı eklendi.
This commit is contained in:
@@ -149,7 +149,31 @@ function buildFindQuery(params: SearchParams): string {
|
||||
return queryTokens.join(' ');
|
||||
}
|
||||
|
||||
function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null {
|
||||
function buildSearchUrl(query: string, page: number): string {
|
||||
if (page <= 1) return `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(query)}`;
|
||||
return `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(query)}&p=${page}`;
|
||||
}
|
||||
|
||||
function parseSearchMaxPage(html: string, baseUrl: string): number {
|
||||
const $ = cheerio.load(html);
|
||||
let maxPage = 1;
|
||||
$('a[href]').each((_, el) => {
|
||||
const href = ($(el).attr('href') || '').trim();
|
||||
if (!href) return;
|
||||
let parsedUrl: URL | null = null;
|
||||
try {
|
||||
parsedUrl = new URL(href, baseUrl);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
if (parsedUrl.pathname !== '/find.php') return;
|
||||
const p = Number(parsedUrl.searchParams.get('p') || '1');
|
||||
if (Number.isFinite(p) && p > maxPage) maxPage = p;
|
||||
});
|
||||
return maxPage;
|
||||
}
|
||||
|
||||
function extractMovieLinksFromSearch(html: string, params: SearchParams, baseUrl: string): Array<{ url: string; title: string; year?: number; score: number }> {
|
||||
const $ = cheerio.load(html);
|
||||
const wantedYear = params.year;
|
||||
const wantedTitleTokens = tokenize(params.title);
|
||||
@@ -207,7 +231,12 @@ function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: st
|
||||
if (!prev || item.score > prev.score) dedup.set(item.url, item);
|
||||
}
|
||||
|
||||
const ordered = [...dedup.values()].sort((a, b) => b.score - a.score);
|
||||
return [...dedup.values()].sort((a, b) => b.score - a.score);
|
||||
}
|
||||
|
||||
function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null {
|
||||
const wantedYear = params.year;
|
||||
const ordered = extractMovieLinksFromSearch(html, params, baseUrl);
|
||||
if (ordered.length === 0) return null;
|
||||
|
||||
const best = ordered[0];
|
||||
@@ -378,23 +407,57 @@ export async function searchTurkceAltyaziReal(params: SearchParams): Promise<Rea
|
||||
const q = buildFindQuery(params);
|
||||
if (!q) return [];
|
||||
|
||||
const searchUrl = `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(q)}`;
|
||||
const firstSearchUrl = buildSearchUrl(q, 1);
|
||||
const cookies = new Map<string, string>();
|
||||
taInfo('TA_SEARCH_START', 'TurkceAltyazi search started', {
|
||||
title: params.title,
|
||||
year: params.year,
|
||||
release: params.release,
|
||||
query: q,
|
||||
searchUrl
|
||||
searchUrl: firstSearchUrl
|
||||
});
|
||||
|
||||
try {
|
||||
await sleep(env.turkcealtyaziMinDelayMs);
|
||||
const searchRes = await getWithRetry(searchUrl, 2, cookies);
|
||||
mergeCookies(cookies, searchRes.setCookie);
|
||||
const pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl);
|
||||
const hardMaxPages = 10;
|
||||
let scannedPages = 0;
|
||||
let discoveredMaxPages = 1;
|
||||
let pickedMovie: { movieUrl: string; movieTitle: string } | null = null;
|
||||
|
||||
for (let page = 1; page <= Math.min(discoveredMaxPages, hardMaxPages); page++) {
|
||||
const searchUrl = buildSearchUrl(q, page);
|
||||
await sleep(env.turkcealtyaziMinDelayMs);
|
||||
const searchRes = await getWithRetry(searchUrl, 2, cookies);
|
||||
mergeCookies(cookies, searchRes.setCookie);
|
||||
scannedPages += 1;
|
||||
|
||||
if (page === 1) {
|
||||
discoveredMaxPages = Math.max(1, parseSearchMaxPage(searchRes.body, env.turkcealtyaziBaseUrl));
|
||||
}
|
||||
|
||||
const pageLinks = extractMovieLinksFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl);
|
||||
taInfo('TA_SEARCH_PAGE_SCANNED', 'TurkceAltyazi search page scanned', {
|
||||
page,
|
||||
pageLinks: pageLinks.length,
|
||||
discoveredMaxPages
|
||||
});
|
||||
|
||||
// TA may return HTTP 200 with an empty list for out-of-range pages.
|
||||
if (pageLinks.length === 0 && page > 1) {
|
||||
taInfo('TA_SEARCH_PAGE_EMPTY_STOP', 'Search page has empty list, stopping pagination', { page });
|
||||
break;
|
||||
}
|
||||
|
||||
pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl);
|
||||
if (pickedMovie) break;
|
||||
}
|
||||
|
||||
if (!pickedMovie) {
|
||||
taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', { title: params.title, year: params.year, query: q });
|
||||
taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', {
|
||||
title: params.title,
|
||||
year: params.year,
|
||||
query: q,
|
||||
scannedPages
|
||||
});
|
||||
throw new PipelineError({
|
||||
code: 'TA_MOVIE_NOT_MATCHED',
|
||||
message: `Movie not matched on search list (title=${params.title}, year=${params.year ?? 'n/a'})`,
|
||||
|
||||
Reference in New Issue
Block a user