|
|
|
|
@@ -149,7 +149,31 @@ function buildFindQuery(params: SearchParams): string {
|
|
|
|
|
return queryTokens.join(' ');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null {
|
|
|
|
|
function buildSearchUrl(query: string, page: number): string {
|
|
|
|
|
if (page <= 1) return `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(query)}`;
|
|
|
|
|
return `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(query)}&p=${page}`;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function parseSearchMaxPage(html: string, baseUrl: string): number {
|
|
|
|
|
const $ = cheerio.load(html);
|
|
|
|
|
let maxPage = 1;
|
|
|
|
|
$('a[href]').each((_, el) => {
|
|
|
|
|
const href = ($(el).attr('href') || '').trim();
|
|
|
|
|
if (!href) return;
|
|
|
|
|
let parsedUrl: URL | null = null;
|
|
|
|
|
try {
|
|
|
|
|
parsedUrl = new URL(href, baseUrl);
|
|
|
|
|
} catch {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if (parsedUrl.pathname !== '/find.php') return;
|
|
|
|
|
const p = Number(parsedUrl.searchParams.get('p') || '1');
|
|
|
|
|
if (Number.isFinite(p) && p > maxPage) maxPage = p;
|
|
|
|
|
});
|
|
|
|
|
return maxPage;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function extractMovieLinksFromSearch(html: string, params: SearchParams, baseUrl: string): Array<{ url: string; title: string; year?: number; score: number }> {
|
|
|
|
|
const $ = cheerio.load(html);
|
|
|
|
|
const wantedYear = params.year;
|
|
|
|
|
const wantedTitleTokens = tokenize(params.title);
|
|
|
|
|
@@ -207,7 +231,12 @@ function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: st
|
|
|
|
|
if (!prev || item.score > prev.score) dedup.set(item.url, item);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const ordered = [...dedup.values()].sort((a, b) => b.score - a.score);
|
|
|
|
|
return [...dedup.values()].sort((a, b) => b.score - a.score);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null {
|
|
|
|
|
const wantedYear = params.year;
|
|
|
|
|
const ordered = extractMovieLinksFromSearch(html, params, baseUrl);
|
|
|
|
|
if (ordered.length === 0) return null;
|
|
|
|
|
|
|
|
|
|
const best = ordered[0];
|
|
|
|
|
@@ -378,23 +407,57 @@ export async function searchTurkceAltyaziReal(params: SearchParams): Promise<Rea
|
|
|
|
|
const q = buildFindQuery(params);
|
|
|
|
|
if (!q) return [];
|
|
|
|
|
|
|
|
|
|
const searchUrl = `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(q)}`;
|
|
|
|
|
const firstSearchUrl = buildSearchUrl(q, 1);
|
|
|
|
|
const cookies = new Map<string, string>();
|
|
|
|
|
taInfo('TA_SEARCH_START', 'TurkceAltyazi search started', {
|
|
|
|
|
title: params.title,
|
|
|
|
|
year: params.year,
|
|
|
|
|
release: params.release,
|
|
|
|
|
query: q,
|
|
|
|
|
searchUrl
|
|
|
|
|
searchUrl: firstSearchUrl
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
await sleep(env.turkcealtyaziMinDelayMs);
|
|
|
|
|
const searchRes = await getWithRetry(searchUrl, 2, cookies);
|
|
|
|
|
mergeCookies(cookies, searchRes.setCookie);
|
|
|
|
|
const pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl);
|
|
|
|
|
const hardMaxPages = 10;
|
|
|
|
|
let scannedPages = 0;
|
|
|
|
|
let discoveredMaxPages = 1;
|
|
|
|
|
let pickedMovie: { movieUrl: string; movieTitle: string } | null = null;
|
|
|
|
|
|
|
|
|
|
for (let page = 1; page <= Math.min(discoveredMaxPages, hardMaxPages); page++) {
|
|
|
|
|
const searchUrl = buildSearchUrl(q, page);
|
|
|
|
|
await sleep(env.turkcealtyaziMinDelayMs);
|
|
|
|
|
const searchRes = await getWithRetry(searchUrl, 2, cookies);
|
|
|
|
|
mergeCookies(cookies, searchRes.setCookie);
|
|
|
|
|
scannedPages += 1;
|
|
|
|
|
|
|
|
|
|
if (page === 1) {
|
|
|
|
|
discoveredMaxPages = Math.max(1, parseSearchMaxPage(searchRes.body, env.turkcealtyaziBaseUrl));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const pageLinks = extractMovieLinksFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl);
|
|
|
|
|
taInfo('TA_SEARCH_PAGE_SCANNED', 'TurkceAltyazi search page scanned', {
|
|
|
|
|
page,
|
|
|
|
|
pageLinks: pageLinks.length,
|
|
|
|
|
discoveredMaxPages
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// TA may return HTTP 200 with an empty list for out-of-range pages.
|
|
|
|
|
if (pageLinks.length === 0 && page > 1) {
|
|
|
|
|
taInfo('TA_SEARCH_PAGE_EMPTY_STOP', 'Search page has empty list, stopping pagination', { page });
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl);
|
|
|
|
|
if (pickedMovie) break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!pickedMovie) {
|
|
|
|
|
taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', { title: params.title, year: params.year, query: q });
|
|
|
|
|
taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', {
|
|
|
|
|
title: params.title,
|
|
|
|
|
year: params.year,
|
|
|
|
|
query: q,
|
|
|
|
|
scannedPages
|
|
|
|
|
});
|
|
|
|
|
throw new PipelineError({
|
|
|
|
|
code: 'TA_MOVIE_NOT_MATCHED',
|
|
|
|
|
message: `Movie not matched on search list (title=${params.title}, year=${params.year ?? 'n/a'})`,
|
|
|
|
|
|