amazon prime scrap özelliği eklendi

2025-11-23 16:09:39 +03:00
parent 46d75b64d5
commit fefa6627e9
6 changed files with 988 additions and 28 deletions
--- a/src/index.js
+++ b/src/index.js
@@ -1,15 +1,21 @@
 import './polyfill.js';
-import { parseNetflixHtml } from './parser.js';
+import { parseNetflixHtml, parsePrimeHtml } from './parser.js';
 import { fetchPageContentWithPlaywright } from './headless.js';

 const DEFAULT_TIMEOUT_MS = 15000;

 // 🎯 LOG SİSTEMİ
-function logPass(message) {
+function logPass(message, data) {
  console.log(`✅ ${message}`);
+  if (data) {
+    console.log(JSON.stringify(data, null, 2));
+  }
 }

 function logError(message, error) {
+  if (process.env.NODE_ENV === 'test') {
+    return;
+  }
  console.error(`❌ ${message}: ${error.message}`);
 }

@@ -46,6 +52,35 @@ function normalizeNetflixUrl(inputUrl) {
  const id = idMatch[1];
  return `https://www.netflix.com/title/${id}`;
 }
+
+// 📋 AMAZON PRIME URL NORMALİZASYON FONKSİYONU
+function normalizePrimeUrl(inputUrl) {
+  if (!inputUrl) {
+    throw new Error('Amazon Prime URL\'i gereklidir.');
+  }
+
+  let parsed;
+  try {
+    parsed = new URL(inputUrl);
+  } catch (err) {
+    throw new Error('Geçersiz URL sağlandı.');
+  }
+
+  if (!parsed.hostname.includes('primevideo.com')) {
+    throw new Error('URL primevideo.com adresini göstermelidir.');
+  }
+
+  const segments = parsed.pathname.split('/').filter(Boolean);
+  const detailIndex = segments.indexOf('detail');
+
+  if (detailIndex >= 0 && segments[detailIndex + 1]) {
+    const id = segments[detailIndex + 1];
+    return `https://www.primevideo.com/detail/${id}`;
+  }
+
+  throw new Error('URL\'de Amazon Prime içerik ID\'si bulunamadı.');
+}
+
 const DEFAULT_USER_AGENT =
  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36';

@@ -139,7 +174,7 @@ function needsHeadless(meta) {
 * Netflix meta verilerini scrape eder.
 * @param {string} inputUrl
 * @param {{ headless?: boolean, timeoutMs?: number, userAgent?: string }} [options]
- * @returns {Promise<{ url: string, id: string, name: string, year: string | number | undefined, seasons: string | null }>}
+ * @returns {Promise<{ url: string, id: string, name: string, year: string | number | undefined, seasons: string | null, thumbnail?: string | null, info?: string | null, genre?: string | null }>}
 */
 export async function scraperNetflix(inputUrl, options = {}) {
  try {
@@ -150,15 +185,11 @@ export async function scraperNetflix(inputUrl, options = {}) {
    const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS;
    const userAgent = options.userAgent || DEFAULT_USER_AGENT;

-    logPass(`Netflix URL normalize edildi: ${normalizedUrl}`);
-
    const staticHtml = await fetchStaticHtml(normalizedUrl, userAgent, timeoutMs);
-    logPass("HTML içeriği başarıyla çekildi");

    let meta = parseNetflixHtml(staticHtml);

    if (needsHeadless(meta) && options.headless !== false) {
-      logPass("Headless mode aktifleştiriliyor");
      const headlessHtml = await fetchPageContentWithPlaywright(normalizedUrl, {
        timeoutMs,
        userAgent,
@@ -172,9 +203,6 @@ export async function scraperNetflix(inputUrl, options = {}) {
          Object.entries(enriched).filter(([_, value]) => value !== undefined && value !== null)
        )
      };
-      logPass("Headless scraping tamamlandı");
-    } else {
-      logPass("Statik scraping yeterli");
    }

    if (!meta.name) {
@@ -186,13 +214,74 @@ export async function scraperNetflix(inputUrl, options = {}) {
      id: id || '',
      name: meta.name,
      year: meta.year,
-      seasons: meta.seasons ?? null
+      seasons: meta.seasons ?? null,
+      thumbnail: meta.thumbnail ?? null,
+      info: meta.info ?? null,
+      genre: meta.genre ?? null
    };

-    logResult(finalResult);
+    logPass('Netflix scraping tamamlandı', finalResult);
    return finalResult;
  } catch (error) {
    logError('Netflix scraping başarısız', error);
    throw error;
  }
 }
+
+/**
+ * Amazon Prime meta verilerini scrape eder.
+ * @param {string} inputUrl
+ * @param {{ headless?: boolean, timeoutMs?: number, userAgent?: string }} [options]
+ * @returns {Promise<{ url: string, id: string, name: string, year: string | number | undefined, seasons: string | null, thumbnail: string | null, info: string | null, genre: string | null }>}
+ */
+export async function scraperPrime(inputUrl, options = {}) {
+  try {
+    await ensureFetchGlobals();
+
+    const normalizedUrl = normalizePrimeUrl(inputUrl);
+    const id = normalizedUrl.split('/').pop();
+    const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS;
+    const userAgent = options.userAgent || DEFAULT_USER_AGENT;
+
+    const staticHtml = await fetchStaticHtml(normalizedUrl, userAgent, timeoutMs);
+
+    let meta = parsePrimeHtml(staticHtml);
+
+    if (needsHeadless(meta) && options.headless !== false) {
+      const headlessHtml = await fetchPageContentWithPlaywright(normalizedUrl, {
+        timeoutMs,
+        userAgent,
+        headless: options.headless !== false
+      });
+
+      const enriched = parsePrimeHtml(headlessHtml);
+      meta = {
+        ...meta,
+        ...Object.fromEntries(
+          Object.entries(enriched).filter(([_, value]) => value !== undefined && value !== null)
+        )
+      };
+    }
+
+    if (!meta.name) {
+      throw new Error('Amazon Prime sayfa meta verisi parse edilemedi.');
+    }
+
+    const finalResult = {
+      url: normalizedUrl,
+      id: id || '',
+      name: meta.name,
+      year: meta.year,
+      seasons: meta.seasons ?? null,
+      thumbnail: meta.thumbnail ?? null,
+      info: meta.info ?? null,
+      genre: meta.genre ?? null
+    };
+
+    logPass('Amazon Prime scraping tamamlandı', finalResult);
+    return finalResult;
+  } catch (error) {
+    logError('Amazon Prime scraping başarısız', error);
+    throw error;
+  }
+}
--- a/src/parser.js
+++ b/src/parser.js
@@ -24,6 +24,26 @@ const UNIVERSAL_UI_PATTERNS = [
 const YEAR_FIELDS = ['datePublished', 'startDate', 'uploadDate', 'copyrightYear', 'releasedEvent', 'releaseYear', 'dateCreated'];
 const SEASON_TYPES = ['TVSeries', 'TVShow', 'Series'];

+/**
+ * Netflix thumbnail image patterns for extraction
+ */
+const THUMBNAIL_SELECTORS = [
+  'meta[property="og:image"]',           // Open Graph image (primary)
+  'meta[name="twitter:image"]',        // Twitter card image
+  'meta[property="og:image:secure_url"]', // Secure image URL
+  'link[rel="image_src"]',             // Image source link
+  'meta[itemprop="image"]'              // Schema.org image
+];
+
+/**
+ * Netflix description/meta description patterns for extraction
+ */
+const DESCRIPTION_SELECTORS = [
+  'meta[name="description"]',           // Standard meta description (primary)
+  'meta[property="og:description"]',  // Open Graph description
+  'meta[itemprop="description"]'       // Schema.org description
+];
+
 /**
 * Extract a usable year value from various JSON-LD fields.
 * @param {unknown} value
@@ -79,6 +99,141 @@ function cleanTitle(title) {
  return trimmed || undefined;
 }

+/**
+ * Netflix thumbnail image extraction from HTML meta tags.
+ * Extracts thumbnail URLs from various meta tags in priority order.
+ * @param {string} html - Raw HTML content
+ * @returns {string | undefined} Thumbnail URL or undefined if not found
+ */
+function extractThumbnail(html) {
+  if (!html) return undefined;
+
+  const $ = load(html);
+
+  // Try different meta tag selectors in priority order
+  for (const selector of THUMBNAIL_SELECTORS) {
+    const imageUrl = $(selector).attr('content');
+    if (imageUrl && isValidThumbnailUrl(imageUrl)) {
+      return normalizeThumbnailUrl(imageUrl);
+    }
+  }
+
+  return undefined;
+}
+
+/**
+ * Check if URL is a valid Netflix thumbnail URL.
+ * @param {string} url - URL to validate
+ * @returns {boolean} True if valid thumbnail URL
+ */
+function isValidThumbnailUrl(url) {
+  if (!url || typeof url !== 'string') return false;
+
+  // Check for Netflix CDN domains
+  const netflixDomains = [
+    'nflxso.net',
+    'assets.nflxext.com',
+    'netflix.com',
+    'occ-0-',
+    'nflximg.net'
+  ];
+
+  const hasNetflixDomain = netflixDomains.some(domain => url.includes(domain));
+  const hasImageExtension = /\.(jpg|jpeg|png|webp)(\?.*)?$/i.test(url);
+
+  return hasNetflixDomain && hasImageExtension;
+}
+
+/**
+ * Normalize thumbnail URL by removing query parameters if needed.
+ * @param {string} url - Original thumbnail URL
+ * @returns {string} Normalized URL
+ */
+function normalizeThumbnailUrl(url) {
+  if (!url) return url;
+
+  try {
+    const urlObj = new URL(url);
+    // Remove certain query parameters that might cause issues
+    const paramsToRemove = ['r', 't', 'e', 'v'];
+    const searchParams = new URLSearchParams(urlObj.search);
+
+    paramsToRemove.forEach(param => searchParams.delete(param));
+
+    // Reconstruct URL without removed parameters
+    const cleanUrl = urlObj.origin + urlObj.pathname + (searchParams.toString() ? '?' + searchParams.toString() : '');
+    return cleanUrl;
+  } catch {
+    // If URL parsing fails, return original
+    return url;
+  }
+}
+
+/**
+ * Netflix description/info extraction from HTML meta tags.
+ * Extracts description information from various meta tags in priority order.
+ * @param {string} html - Raw HTML content
+ * @returns {string | undefined} Description info or undefined if not found
+ */
+function extractInfo(html) {
+  if (!html) return undefined;
+
+  const $ = load(html);
+
+  // Try different meta tag selectors in priority order
+  for (const selector of DESCRIPTION_SELECTORS) {
+    const description = $(selector).attr('content');
+    if (description && description.trim()) {
+      // Clean up description - remove Netflix-specific suffixes
+      const cleaned = description.trim()
+        .replace(/\s*\|\s*Netflix.*$/i, '') // Remove Netflix suffix
+        .replace(/\s+Fragmanları izleyin ve daha fazla bilgi edinin\.$/, ''); // Remove trailing call-to-action
+      return cleaned || undefined;
+    }
+  }
+
+  return undefined;
+}
+
+/**
+ * Normalize and clean genre information.
+ * Maps Netflix genre names to Turkish equivalents and cleans them up.
+ * @param {string | null | undefined} genre - Raw genre from JSON-LD
+ * @returns {string | null} Normalized Turkish genre or null
+ */
+function normalizeGenre(genre) {
+  if (!genre || typeof genre !== 'string') return null;
+
+  const genreMapping = {
+    'Aksiyon': 'Aksiyon',
+    'Action': 'Aksiyon',
+    'Macera': 'Macera',
+    'Adventure': 'Macera',
+    'Bilim Kurgu': 'Bilim Kurgu',
+    'Science Fiction': 'Bilim Kurgu',
+    'Fantastik': 'Fantastik',
+    'Fantasy': 'Fantastik',
+    'Dram': 'Dram',
+    'Drama': 'Dram',
+    'Komedi': 'Komedi',
+    'Comedy': 'Komedi',
+    'Korku': 'Korku',
+    'Horror': 'Korku',
+    'Gerilim': 'Gerilim',
+    'Thriller': 'Gerilim',
+    'Gizem': 'Gizem',
+    'Mystery': 'Gizem',
+    'Romantik': 'Romantik',
+    'Romance': 'Romantik'
+  };
+
+  // Clean up genre name
+  const cleanedGenre = genre.trim();
+
+  // Return mapped genre or original if no mapping exists
+  return genreMapping[cleanedGenre] || cleanedGenre || null;
+}
+
 /**
 * Parse JSON-LD objects for metadata.
 * @param {any} obj
@@ -121,6 +276,19 @@ function parseJsonLdObject(obj) {
        result.seasons = `${entry.seasons.length} Sezon`;
      }
    }
+
+    // Extract info/description from JSON-LD
+    if (!result.info && typeof entry.description === 'string') {
+      const cleanedInfo = entry.description.trim()
+        .replace(/\s*\|\s*Netflix.*$/i, '')
+        .replace(/\s+Fragmanları izleyin ve daha fazla bilgi edinin\.$/, '');
+      result.info = cleanedInfo || undefined;
+    }
+
+    // Extract genre from JSON-LD
+    if (!result.genre && typeof entry.genre === 'string') {
+      result.genre = normalizeGenre(entry.genre);
+    }
  }

  return result;
@@ -129,7 +297,7 @@ function parseJsonLdObject(obj) {
 /**
 * Parse Netflix HTML to extract metadata without executing scripts.
 * @param {string} html
- * @returns {{ name?: string, year?: string | number, seasons?: string | null }}
+ * @returns {{ name?: string, year?: string | number, seasons?: string | null, thumbnail?: string | null, info?: string | null, genre?: string | null }}
 */
 export function parseNetflixHtml(html) {
  if (!html) return {};
@@ -143,20 +311,362 @@ export function parseNetflixHtml(html) {

  let year;
  let seasons = null;
+  let thumbnail = null;
+  let info = null;
+  let genre = null;
+
+  // Extract thumbnail from meta tags
+  thumbnail = extractThumbnail(html);
+
+  // Extract info from meta tags (fallback if JSON-LD doesn't have it)
+  info = extractInfo(html);

  $('script[type="application/ld+json"]').each((_, el) => {
    const raw = $(el).contents().text();
    if (!raw) return;
    try {
      const parsed = JSON.parse(raw);
-      const info = parseJsonLdObject(parsed);
-      if (!name && info.name) name = info.name;
-      if (!year && info.year) year = info.year;
-      if (!seasons && info.seasons) seasons = info.seasons;
+      const jsonLdInfo = parseJsonLdObject(parsed);
+      if (!name && jsonLdInfo.name) name = jsonLdInfo.name;
+      if (!year && jsonLdInfo.year) year = jsonLdInfo.year;
+      if (!seasons && jsonLdInfo.seasons) seasons = jsonLdInfo.seasons;
+      // Also check JSON-LD for image information
+      if (!thumbnail && jsonLdInfo.image) {
+        thumbnail = typeof jsonLdInfo.image === 'string' ? jsonLdInfo.image : jsonLdInfo.image.url;
+      }
+      // Extract info and genre from JSON-LD if available
+      if (!info && jsonLdInfo.info) info = jsonLdInfo.info;
+      if (!genre && jsonLdInfo.genre) genre = jsonLdInfo.genre;
    } catch {
      // Ignore malformed JSON-LD blocks.
    }
  });

-  return { name, year, seasons };
+  return { name, year, seasons, thumbnail, info, genre };
+}
+
+/**
+ * Amazon Prime specific constants and functions
+ */
+
+// Amazon Prime selectors for metadata extraction
+const PRIME_TITLE_SELECTORS = [
+  'meta[property="og:title"]',
+  'meta[name="title"]',
+  'title',
+  '[data-testid="title"]',
+  '.dv-node-dp-title',
+  'h1'
+];
+
+const PRIME_THUMBNAIL_SELECTORS = [
+  'meta[property="og:image"]',
+  'meta[name="twitter:image"]',
+  'meta[property="og:image:secure_url"]',
+  '[data-testid="hero-image"] img',
+  '.dv-node-dp-hero-image img',
+  'img[alt*="poster"]'
+];
+
+const PRIME_DESCRIPTION_SELECTORS = [
+  'meta[name="description"]',
+  'meta[property="og:description"]',
+  'meta[itemprop="description"]',
+  '[data-testid="synopsis"]',
+  '.dv-node-dp-synopsis',
+  '.synopsis'
+];
+
+const PRIME_YEAR_SELECTORS = [
+  'meta[itemprop="dateCreated"]',
+  'meta[property="video:release_date"]',
+  '[data-testid="release-year"]',
+  '.release-year',
+  '[class*="year"]'
+];
+
+const PRIME_GENRE_SELECTORS = [
+  'meta[itemprop="genre"]',
+  '[data-testid="genres"]',
+  '.genres',
+  '[class*="genre"]'
+];
+
+/**
+ * Extract title from Amazon Prime page
+ */
+function extractPrimeTitle($, html) {
+  // Try meta tags first
+  for (const selector of PRIME_TITLE_SELECTORS) {
+    const title = $(selector).attr('content') || $(selector).text();
+    if (title && title.trim()) {
+      return cleanPrimeTitle(title.trim());
+    }
+  }
+
+  // Try to extract from embedded JSON data
+  const jsonMatch = html.match(/"title":"([^"]+)"/);
+  if (jsonMatch && jsonMatch[1]) {
+    return cleanPrimeTitle(jsonMatch[1]);
+  }
+
+  return undefined;
+}
+
+/**
+ * Extract year from Amazon Prime page
+ */
+function extractPrimeYear($, html) {
+  // Try structured data first
+  for (const selector of PRIME_YEAR_SELECTORS) {
+    const yearText = $(selector).attr('content') || $(selector).text();
+    if (yearText) {
+      const yearMatch = yearText.match(/(\d{4})/);
+      if (yearMatch) return yearMatch[1];
+    }
+  }
+
+  // Try to extract from embedded JSON data
+  const jsonMatch = html.match(/"releaseYear"\s*:\s*"(\d{4})"/);
+  if (jsonMatch) return jsonMatch[1];
+
+  // Try to find year in title
+  const title = extractPrimeTitle($, html);
+  if (title) {
+    const yearMatch = title.match(/(\d{4})/);
+    if (yearMatch) return yearMatch[1];
+  }
+
+  return undefined;
+}
+
+/**
+ * Extract thumbnail from Amazon Prime page
+ */
+function extractPrimeThumbnail($, html) {
+  for (const selector of PRIME_THUMBNAIL_SELECTORS) {
+    const imageUrl = $(selector).attr('content') || $(selector).attr('src');
+    if (imageUrl && isValidPrimeThumbnail(imageUrl)) {
+      return imageUrl;
+    }
+  }
+
+  // Try to extract from embedded JSON data
+  const jsonMatch = html.match(/"heroImageUrl":"([^"]+)"/);
+  if (jsonMatch && jsonMatch[1]) {
+    return jsonMatch[1].replace(/\\u002F/g, '/');
+  }
+
+  return undefined;
+}
+
+/**
+ * Extract info/description from Amazon Prime page
+ */
+function extractPrimeInfo($, html) {
+  for (const selector of PRIME_DESCRIPTION_SELECTORS) {
+    const description = $(selector).attr('content') || $(selector).text();
+    if (description && description.trim()) {
+      return cleanPrimeDescription(description.trim());
+    }
+  }
+
+  // Try to extract from embedded JSON data
+  const jsonMatch = html.match(/"synopsis":"([^"]+)"/);
+  if (jsonMatch && jsonMatch[1]) {
+    return cleanPrimeDescription(jsonMatch[1].replace(/\\u002F/g, '/').replace(/\\"/g, '"'));
+  }
+
+  return undefined;
+}
+
+/**
+ * Extract genres from Amazon Prime page
+ */
+function extractPrimeGenre($, html) {
+  for (const selector of PRIME_GENRE_SELECTORS) {
+    const genreText = $(selector).attr('content') || $(selector).text();
+    if (genreText && genreText.trim()) {
+      return normalizePrimeGenre(genreText.trim());
+    }
+  }
+
+  // Try to extract from embedded JSON data
+  const jsonMatch = html.match(/"genres":\["([^"]+)"\]/);
+  if (jsonMatch && jsonMatch[1]) {
+    return normalizePrimeGenre(jsonMatch[1]);
+  }
+
+  return undefined;
+}
+
+/**
+ * Extract seasons information from Amazon Prime page
+ */
+function extractPrimeSeasons($, html) {
+  // Try to find the highest season number from all season matches
+  const allSeasonMatches = html.match(/\d+\s*\.?\s*Sezon/gi);
+  if (allSeasonMatches) {
+    const seasons = allSeasonMatches.map(match => parseInt(match.match(/\d+/)[0]));
+    const maxSeason = Math.max(...seasons);
+    if (maxSeason > 0) {
+      return `${maxSeason} Season`;
+    }
+  }
+
+  // Look for series indicators in a more specific way
+  const seriesIndicators = [
+    /\b(Season|Sezon)\s*\d+/i,
+    /\bepisode\s*\d+/i,
+    /\bbölüm\s*\d+/i,
+    /"type":\s*["']\s*(TV\s*Series|Dizi)/i,
+    /\b(TV\s*Series|Dizi)\s*$/i
+  ];
+
+  const hasSeriesIndicator = seriesIndicators.some(pattern => pattern.test(html));
+  if (hasSeriesIndicator) {
+    return '1 Season'; // Default for series without clear season count
+  }
+
+  // Look for movie indicators
+  const movieIndicators = [
+    /\b(film|movie)\s*$/i,
+    /"type":\s*["']\s*(Movie|Film)/i
+  ];
+
+  const hasMovieIndicator = movieIndicators.some(pattern => pattern.test(html));
+  if (hasMovieIndicator) {
+    return null; // It's explicitly a movie
+  }
+
+  // If we can't determine, look at page structure
+  // Prime Video typically shows season information prominently for series
+  if (html.includes('Sezon') && html.includes('Bölüm')) {
+    return '1 Season';
+  }
+
+  return null; // Default to movie
+}
+
+/**
+ * Clean Amazon Prime title text
+ */
+function cleanPrimeTitle(title) {
+  if (!title) return undefined;
+
+  let cleaned = title;
+
+  // Remove Amazon Prime suffixes
+  cleaned = cleaned.replace(/\s*\|\s*Prime\s*Video.*$/i, '');
+  cleaned = cleaned.replace(/\s*\|\s*Amazon.*$/i, '');
+
+  // Remove common UI text
+  cleaned = cleaned.replace(/\s+(izle|watch|play|oynat)$/i, '');
+
+  return cleaned.trim() || undefined;
+}
+
+/**
+ * Clean Amazon Prime description text
+ */
+function cleanPrimeDescription(description) {
+  if (!description) return undefined;
+
+  let cleaned = description;
+
+  // Remove Amazon/Prime Video suffixes
+  cleaned = cleaned.replace(/\s*\|\s*Prime\s*Video.*$/i, '');
+  cleaned = cleaned.replace(/\s*\|\s*Amazon.*$/i, '');
+
+  // Remove common call-to-action text
+  cleaned = cleaned.replace(/\s+(Daha fazla bilgi için tıklayın|Click for more info).*$/i, '');
+
+  return cleaned.trim() || undefined;
+}
+
+/**
+ * Check if URL is a valid Amazon Prime thumbnail
+ */
+function isValidPrimeThumbnail(url) {
+  if (!url || typeof url !== 'string') return false;
+
+  const primeDomains = [
+    'm.media-amazon.com',
+    'images-na.ssl-images-amazon.com',
+    'media-amazon.com',
+    'primevideo.com'
+  ];
+
+  return primeDomains.some(domain => url.includes(domain)) &&
+         /\.(jpg|jpeg|png|webp)(\?.*)?$/i.test(url);
+}
+
+/**
+ * Normalize Amazon Prime genre information
+ */
+function normalizePrimeGenre(genre) {
+  if (!genre || typeof genre !== 'string') return null;
+
+  const genreMapping = {
+    // English to Turkish mapping
+    'Action': 'Aksiyon',
+    'Adventure': 'Macera',
+    'Comedy': 'Komedi',
+    'Drama': 'Dram',
+    'Fantasy': 'Fantastik',
+    'Horror': 'Korku',
+    'Mystery': 'Gizem',
+    'Romance': 'Romantik',
+    'Romantic': 'Romantik',
+    'Sci-Fi': 'Bilim Kurgu',
+    'Science Fiction': 'Bilim Kurgu',
+    'Thriller': 'Gerilim',
+    'Documentary': 'Belgesel',
+    'Animation': 'Animasyon',
+    'Family': 'Aile',
+    'Kids': 'Çocuk',
+    'War': 'Savaş',
+    'Western': 'Western',
+    'Humorous': 'Mizahi',
+    'Sentimental': 'Duygusal'
+  };
+
+  // Handle multiple genres separated by commas, pipes, or special characters
+  const separators = /[,|•·]/;
+  const genres = genre.split(separators).map(g => g.trim()).filter(g => g);
+
+  const normalizedGenres = genres.map(g => {
+    return genreMapping[g] || genreMapping[g.toLowerCase()] || g;
+  }).filter(g => g);
+
+  // Return first genre as primary (could return array if needed)
+  return normalizedGenres[0] || null;
+}
+
+/**
+ * Parse Amazon Prime HTML to extract metadata
+ * @param {string} html
+ * @returns {{ name?: string, year?: string | number, seasons?: string | null, thumbnail?: string | null, info?: string | null, genre?: string | null }}
+ */
+export function parsePrimeHtml(html) {
+  if (!html) return {};
+
+  const $ = load(html);
+
+  let name = extractPrimeTitle($, html);
+  let year = extractPrimeYear($, html);
+  let seasons = extractPrimeSeasons($, html);
+  let thumbnail = extractPrimeThumbnail($, html);
+  let info = extractPrimeInfo($, html);
+  let genre = extractPrimeGenre($, html);
+
+  // If we couldn't find the year, try to extract it from the title
+  if (!year && name) {
+    const titleYearMatch = name.match(/(\d{4})/);
+    if (titleYearMatch) {
+      year = titleYearMatch[1];
+    }
+  }
+
+  return { name, year, seasons, thumbnail, info, genre };
 }