From 2c60c669c0bbc8d46cfb04bf1e0a2ff86ce4d762 Mon Sep 17 00:00:00 2001 From: szbk Date: Mon, 16 Feb 2026 14:47:45 +0300 Subject: [PATCH] =?UTF-8?q?feat(api):=20turkcealtyazi=20aramas=C4=B1na=20s?= =?UTF-8?q?ayfalama=20deste=C4=9Fi=20ekle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Arama sonuçlarının birden fazla sayfa taranabilmesi için sayfalama mekanizması eklendi. İlk sayfadan maksimum sayfa sayısı keşfedilir ve her sayfa taranarak eşleşen film aranır. Sayfalar arası bekleme süresi korunur ve boş sayfalarda işlem durdurulur. Maksimum 10 sayfa sınırı eklendi. --- services/api/src/lib/turkcealtyaziReal.ts | 81 ++++++++++++++++++++--- 1 file changed, 72 insertions(+), 9 deletions(-) diff --git a/services/api/src/lib/turkcealtyaziReal.ts b/services/api/src/lib/turkcealtyaziReal.ts index 5190522..3f7e5d1 100644 --- a/services/api/src/lib/turkcealtyaziReal.ts +++ b/services/api/src/lib/turkcealtyaziReal.ts @@ -149,7 +149,31 @@ function buildFindQuery(params: SearchParams): string { return queryTokens.join(' '); } -function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null { +function buildSearchUrl(query: string, page: number): string { + if (page <= 1) return `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(query)}`; + return `${env.turkcealtyaziBaseUrl}/find.php?cat=sub&find=${encodeURIComponent(query)}&p=${page}`; +} + +function parseSearchMaxPage(html: string, baseUrl: string): number { + const $ = cheerio.load(html); + let maxPage = 1; + $('a[href]').each((_, el) => { + const href = ($(el).attr('href') || '').trim(); + if (!href) return; + let parsedUrl: URL | null = null; + try { + parsedUrl = new URL(href, baseUrl); + } catch { + return; + } + if (parsedUrl.pathname !== '/find.php') return; + const p = Number(parsedUrl.searchParams.get('p') || '1'); + if (Number.isFinite(p) && p > maxPage) maxPage = p; + }); + return maxPage; +} + +function extractMovieLinksFromSearch(html: string, params: SearchParams, baseUrl: string): Array<{ url: string; title: string; year?: number; score: number }> { const $ = cheerio.load(html); const wantedYear = params.year; const wantedTitleTokens = tokenize(params.title); @@ -207,7 +231,12 @@ function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: st if (!prev || item.score > prev.score) dedup.set(item.url, item); } - const ordered = [...dedup.values()].sort((a, b) => b.score - a.score); + return [...dedup.values()].sort((a, b) => b.score - a.score); +} + +function pickMovieLinkFromSearch(html: string, params: SearchParams, baseUrl: string): { movieUrl: string; movieTitle: string } | null { + const wantedYear = params.year; + const ordered = extractMovieLinksFromSearch(html, params, baseUrl); if (ordered.length === 0) return null; const best = ordered[0]; @@ -378,23 +407,57 @@ export async function searchTurkceAltyaziReal(params: SearchParams): Promise(); taInfo('TA_SEARCH_START', 'TurkceAltyazi search started', { title: params.title, year: params.year, release: params.release, query: q, - searchUrl + searchUrl: firstSearchUrl }); try { - await sleep(env.turkcealtyaziMinDelayMs); - const searchRes = await getWithRetry(searchUrl, 2, cookies); - mergeCookies(cookies, searchRes.setCookie); - const pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl); + const hardMaxPages = 10; + let scannedPages = 0; + let discoveredMaxPages = 1; + let pickedMovie: { movieUrl: string; movieTitle: string } | null = null; + + for (let page = 1; page <= Math.min(discoveredMaxPages, hardMaxPages); page++) { + const searchUrl = buildSearchUrl(q, page); + await sleep(env.turkcealtyaziMinDelayMs); + const searchRes = await getWithRetry(searchUrl, 2, cookies); + mergeCookies(cookies, searchRes.setCookie); + scannedPages += 1; + + if (page === 1) { + discoveredMaxPages = Math.max(1, parseSearchMaxPage(searchRes.body, env.turkcealtyaziBaseUrl)); + } + + const pageLinks = extractMovieLinksFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl); + taInfo('TA_SEARCH_PAGE_SCANNED', 'TurkceAltyazi search page scanned', { + page, + pageLinks: pageLinks.length, + discoveredMaxPages + }); + + // TA may return HTTP 200 with an empty list for out-of-range pages. + if (pageLinks.length === 0 && page > 1) { + taInfo('TA_SEARCH_PAGE_EMPTY_STOP', 'Search page has empty list, stopping pagination', { page }); + break; + } + + pickedMovie = pickMovieLinkFromSearch(searchRes.body, params, env.turkcealtyaziBaseUrl); + if (pickedMovie) break; + } + if (!pickedMovie) { - taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', { title: params.title, year: params.year, query: q }); + taInfo('TA_SEARCH_RESULT', 'Movie page not matched from search list', { + title: params.title, + year: params.year, + query: q, + scannedPages + }); throw new PipelineError({ code: 'TA_MOVIE_NOT_MATCHED', message: `Movie not matched on search list (title=${params.title}, year=${params.year ?? 'n/a'})`,