feat(api): turkcealtyazi aramasına sayfalama desteği ekle
Arama sonuçlarının birden fazla sayfa taranabilmesi için sayfalama mekanizması eklendi. İlk sayfadan maksimum sayfa sayısı keşfedilir ve her sayfa taranarak eşleşen film aranır. Sayfalar arası bekleme süresi korunur ve boş sayfalarda işlem durdurulur. Maksimum 10 sayfa sınırı eklendi.
This commit is contained in:
@@ -149,7 +149,31 @@ function buildFindQuery(params: SearchParams): string {
|
|||||||
return queryTokens.join(' ');
|
return queryTokens.join(' ');
|
||||||
}
|
}
|
||||||
|
|
||||||
function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null {
|
function buildSearchUrl(query: string, page: number): string {
|
||||||
|
if (page <= 1) return `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(query)}`;
|
||||||
|
return `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(query)}&p=${page}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseSearchMaxPage(html: string, baseUrl: string): number {
|
||||||
|
const $ = cheerio.load(html);
|
||||||
|
let maxPage = 1;
|
||||||
|
$('a[href]').each((_, el) => {
|
||||||
|
const href = ($(el).attr('href') || '').trim();
|
||||||
|
if (!href) return;
|
||||||
|
let parsedUrl: URL | null = null;
|
||||||
|
try {
|
||||||
|
parsedUrl = new URL(href, baseUrl);
|
||||||
|
} catch {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (parsedUrl.pathname !== '/find.php') return;
|
||||||
|
const p = Number(parsedUrl.searchParams.get('p') || '1');
|
||||||
|
if (Number.isFinite(p) && p > maxPage) maxPage = p;
|
||||||
|
});
|
||||||
|
return maxPage;
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractMovieLinksFromSearch(html: string, params: SearchParams, baseUrl: string): Array<{ url: string; title: string; year?: number; score: number }> {
|
||||||
const $ = cheerio.load(html);
|
const $ = cheerio.load(html);
|
||||||
const wantedYear = params.year;
|
const wantedYear = params.year;
|
||||||
const wantedTitleTokens = tokenize(params.title);
|
const wantedTitleTokens = tokenize(params.title);
|
||||||
@@ -207,7 +231,12 @@ function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: st
|
|||||||
if (!prev || item.score > prev.score) dedup.set(item.url, item);
|
if (!prev || item.score > prev.score) dedup.set(item.url, item);
|
||||||
}
|
}
|
||||||
|
|
||||||
const ordered = [...dedup.values()].sort((a, b) => b.score - a.score);
|
return [...dedup.values()].sort((a, b) => b.score - a.score);
|
||||||
|
}
|
||||||
|
|
||||||
|
function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null {
|
||||||
|
const wantedYear = params.year;
|
||||||
|
const ordered = extractMovieLinksFromSearch(html, params, baseUrl);
|
||||||
if (ordered.length === 0) return null;
|
if (ordered.length === 0) return null;
|
||||||
|
|
||||||
const best = ordered[0];
|
const best = ordered[0];
|
||||||
@@ -378,23 +407,57 @@ export async function searchTurkceAltyaziReal(params: SearchParams): Promise<Rea
|
|||||||
const q = buildFindQuery(params);
|
const q = buildFindQuery(params);
|
||||||
if (!q) return [];
|
if (!q) return [];
|
||||||
|
|
||||||
const searchUrl = `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(q)}`;
|
const firstSearchUrl = buildSearchUrl(q, 1);
|
||||||
const cookies = new Map<string, string>();
|
const cookies = new Map<string, string>();
|
||||||
taInfo('TA_SEARCH_START', 'TurkceAltyazi search started', {
|
taInfo('TA_SEARCH_START', 'TurkceAltyazi search started', {
|
||||||
title: params.title,
|
title: params.title,
|
||||||
year: params.year,
|
year: params.year,
|
||||||
release: params.release,
|
release: params.release,
|
||||||
query: q,
|
query: q,
|
||||||
searchUrl
|
searchUrl: firstSearchUrl
|
||||||
});
|
});
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
const hardMaxPages = 10;
|
||||||
|
let scannedPages = 0;
|
||||||
|
let discoveredMaxPages = 1;
|
||||||
|
let pickedMovie: { movieUrl: string; movieTitle: string } | null = null;
|
||||||
|
|
||||||
|
for (let page = 1; page <= Math.min(discoveredMaxPages, hardMaxPages); page++) {
|
||||||
|
const searchUrl = buildSearchUrl(q, page);
|
||||||
await sleep(env.turkcealtyaziMinDelayMs);
|
await sleep(env.turkcealtyaziMinDelayMs);
|
||||||
const searchRes = await getWithRetry(searchUrl, 2, cookies);
|
const searchRes = await getWithRetry(searchUrl, 2, cookies);
|
||||||
mergeCookies(cookies, searchRes.setCookie);
|
mergeCookies(cookies, searchRes.setCookie);
|
||||||
const pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl);
|
scannedPages += 1;
|
||||||
|
|
||||||
|
if (page === 1) {
|
||||||
|
discoveredMaxPages = Math.max(1, parseSearchMaxPage(searchRes.body, env.turkcealtyaziBaseUrl));
|
||||||
|
}
|
||||||
|
|
||||||
|
const pageLinks = extractMovieLinksFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl);
|
||||||
|
taInfo('TA_SEARCH_PAGE_SCANNED', 'TurkceAltyazi search page scanned', {
|
||||||
|
page,
|
||||||
|
pageLinks: pageLinks.length,
|
||||||
|
discoveredMaxPages
|
||||||
|
});
|
||||||
|
|
||||||
|
// TA may return HTTP 200 with an empty list for out-of-range pages.
|
||||||
|
if (pageLinks.length === 0 && page > 1) {
|
||||||
|
taInfo('TA_SEARCH_PAGE_EMPTY_STOP', 'Search page has empty list, stopping pagination', { page });
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl);
|
||||||
|
if (pickedMovie) break;
|
||||||
|
}
|
||||||
|
|
||||||
if (!pickedMovie) {
|
if (!pickedMovie) {
|
||||||
taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', { title: params.title, year: params.year, query: q });
|
taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', {
|
||||||
|
title: params.title,
|
||||||
|
year: params.year,
|
||||||
|
query: q,
|
||||||
|
scannedPages
|
||||||
|
});
|
||||||
throw new PipelineError({
|
throw new PipelineError({
|
||||||
code: 'TA_MOVIE_NOT_MATCHED',
|
code: 'TA_MOVIE_NOT_MATCHED',
|
||||||
message: `Movie not matched on search list (title=${params.title}, year=${params.year ?? 'n/a'})`,
|
message: `Movie not matched on search list (title=${params.title}, year=${params.year ?? 'n/a'})`,
|
||||||
|
|||||||
Reference in New Issue
Block a user