diff --git a/services/api/src/lib/turkcealtyaziReal.ts b/services/api/src/lib/turkcealtyaziReal.ts index 5190522..3f7e5d1 100644 --- a/services/api/src/lib/turkcealtyaziReal.ts +++ b/services/api/src/lib/turkcealtyaziReal.ts @@ -149,7 +149,31 @@ function buildFindQuery(params: SearchParams): string { return queryTokens.join(' '); } -function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null { +function buildSearchUrl(query: string, page: number): string { + if (page <= 1) return `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(query)}`; + return `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(query)}&p=${page}`; +} + +function parseSearchMaxPage(html: string, baseUrl: string): number { + const $ = cheerio.load(html); + let maxPage = 1; + $('a[href]').each((_, el) => { + const href = ($(el).attr('href') || '').trim(); + if (!href) return; + let parsedUrl: URL | null = null; + try { + parsedUrl = new URL(href, baseUrl); + } catch { + return; + } + if (parsedUrl.pathname !== '/find.php') return; + const p = Number(parsedUrl.searchParams.get('p') || '1'); + if (Number.isFinite(p) && p > maxPage) maxPage = p; + }); + return maxPage; +} + +function extractMovieLinksFromSearch(html: string, params: SearchParams, baseUrl: string): Array<{ url: string; title: string; year?: number; score: number }> { const $ = cheerio.load(html); const wantedYear = params.year; const wantedTitleTokens = tokenize(params.title); @@ -207,7 +231,12 @@ function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: st if (!prev || item.score > prev.score) dedup.set(item.url, item); } - const ordered = [...dedup.values()].sort((a, b) => b.score - a.score); + return [...dedup.values()].sort((a, b) => b.score - a.score); +} + +function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null { + const wantedYear = params.year; + const ordered = extractMovieLinksFromSearch(html, params, baseUrl); if (ordered.length === 0) return null; const best = ordered[0]; @@ -378,23 +407,57 @@ export async function searchTurkceAltyaziReal(params: SearchParams): Promise(); taInfo('TA_SEARCH_START', 'TurkceAltyazi search started', { title: params.title, year: params.year, release: params.release, query: q, - searchUrl + searchUrl: firstSearchUrl }); try { - await sleep(env.turkcealtyaziMinDelayMs); - const searchRes = await getWithRetry(searchUrl, 2, cookies); - mergeCookies(cookies, searchRes.setCookie); - const pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl); + const hardMaxPages = 10; + let scannedPages = 0; + let discoveredMaxPages = 1; + let pickedMovie: { movieUrl: string; movieTitle: string } | null = null; + + for (let page = 1; page <= Math.min(discoveredMaxPages, hardMaxPages); page++) { + const searchUrl = buildSearchUrl(q, page); + await sleep(env.turkcealtyaziMinDelayMs); + const searchRes = await getWithRetry(searchUrl, 2, cookies); + mergeCookies(cookies, searchRes.setCookie); + scannedPages += 1; + + if (page === 1) { + discoveredMaxPages = Math.max(1, parseSearchMaxPage(searchRes.body, env.turkcealtyaziBaseUrl)); + } + + const pageLinks = extractMovieLinksFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl); + taInfo('TA_SEARCH_PAGE_SCANNED', 'TurkceAltyazi search page scanned', { + page, + pageLinks: pageLinks.length, + discoveredMaxPages + }); + + // TA may return HTTP 200 with an empty list for out-of-range pages. + if (pageLinks.length === 0 && page > 1) { + taInfo('TA_SEARCH_PAGE_EMPTY_STOP', 'Search page has empty list, stopping pagination', { page }); + break; + } + + pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl); + if (pickedMovie) break; + } + if (!pickedMovie) { - taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', { title: params.title, year: params.year, query: q }); + taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', { + title: params.title, + year: params.year, + query: q, + scannedPages + }); throw new PipelineError({ code: 'TA_MOVIE_NOT_MATCHED', message: `Movie not matched on search list (title=${params.title}, year=${params.year ?? 'n/a'})`,