feat(api): support Prime Video scraping and provider-aware metadata

2026-03-01 01:08:25 +03:00
parent 74348f224d
commit c0e62e778c
11 changed files with 515 additions and 166 deletions
--- a/src/config/socket.ts
+++ b/src/config/socket.ts
@@ -33,7 +33,7 @@ export interface MetricsRealtimeEvent {
  sourceCounts: {
    cache: number;
    database: number;
-    netflix: number;
+    scraper: number;
  };
  occurredAt: string;
 }
--- a/src/middleware/validation.middleware.ts
+++ b/src/middleware/validation.middleware.ts
@@ -1,27 +1,19 @@
 import { Request, Response, NextFunction } from 'express';
 import { z } from 'zod';
 import type { ApiResponse, GetInfoRequest } from '../types/index.js';
+import { isSupportedContentUrl } from '../utils/contentUrl.js';

 /**
 * Validation schema for /api/getinfo endpoint
 */
 const getInfoSchema = z.object({
-  url: z.string().url('Invalid URL format').refine((url) => {
-    // Validate Netflix URL
-    try {
-      const parsedUrl = new URL(url);
-      const validHosts = [
-        'www.netflix.com',
-        'netflix.com',
-        'www.netflix.com.tr',
-        'netflix.com.tr',
-      ];
-      const hasTitlePath = /\/title\/\d+/.test(url);
-      return validHosts.includes(parsedUrl.hostname) && hasTitlePath;
-    } catch {
-      return false;
-    }
-  }, 'URL must be a valid Netflix title URL (e.g., https://www.netflix.com/tr/title/81616256)'),
+  url: z
+    .string()
+    .url('Invalid URL format')
+    .refine(
+      (url) => isSupportedContentUrl(url),
+      'URL must be Netflix /title/... or PrimeVideo /detail/...'
+    ),
 });

 /**
--- a/src/routes/api.routes.ts
+++ b/src/routes/api.routes.ts
@@ -286,6 +286,30 @@ router.post(
  }
 );

+/**
+ * POST /api/admin/content/purge
+ * Delete all content rows from DB (with related entities).
+ */
+router.post(
+  '/admin/content/purge',
+  adminOnlyMiddleware,
+  async (_req: Request, res: Response<ApiResponse<AdminActionResponse>>) => {
+    try {
+      const result = await AdminService.purgeAllContent();
+      res.json({ success: true, data: result });
+    } catch (error) {
+      res.status(500).json({
+        success: false,
+        error: {
+          code: 'ADMIN_CONTENT_PURGE_ERROR',
+          message:
+            error instanceof Error ? error.message : 'Failed to purge content',
+        },
+      });
+    }
+  }
+);
+
 /**
 * POST /api/getinfo/async
 * Create async job for content scraping
--- a/src/services/admin.service.ts
+++ b/src/services/admin.service.ts
@@ -6,21 +6,29 @@ import { MetricsService } from './metrics.service.js';
 import { CacheService } from './cache.service.js';
 import { ContentService } from './content.service.js';
 import type { AdminActionResponse, AdminOverviewResponse } from '../types/index.js';
+import { parseSupportedContentUrl } from '../utils/contentUrl.js';

-const CACHE_PREFIX = 'netflix:content:';
+const CACHE_PREFIX = 'content:';
 const MAX_CACHE_KEYS_FOR_ANALYSIS = 1000;

 function formatCacheKeyLabel(key: string): string {
  return key.replace(CACHE_PREFIX, '');
 }

-function extractTitleIdFromCacheKey(key: string): string | null {
+function extractProviderIdFromCacheKey(key: string): { provider: string; id: string } | null {
  const normalized = formatCacheKeyLabel(key);
-  return /^\d+$/.test(normalized) ? normalized : null;
+  const match = normalized.match(/^(netflix|primevideo):([A-Za-z0-9]+)$/);
+  if (!match) return null;
+  const provider = match[1];
+  const id = match[2];
+  if (!provider || !id) return null;
+  return { provider, id };
 }

-function extractTitleIdFromUrl(url: string): string | null {
-  return url.match(/\/title\/(\d+)/)?.[1] ?? null;
+function extractProviderIdFromUrl(url: string): { provider: string; id: string } | null {
+  const parsed = parseSupportedContentUrl(url);
+  if (!parsed) return null;
+  return { provider: parsed.provider, id: parsed.id };
 }

 function parseRedisInfoValue(info: string, key: string): number | null {
@@ -144,16 +152,25 @@ export class AdminService {
      min30Plus: 0,
    };

-    const cacheTitleIds = Array.from(
-      new Set(cacheKeys.map((key) => extractTitleIdFromCacheKey(key)).filter((id): id is string => Boolean(id)))
+    const cacheProviderIds = Array.from(
+      new Set(
+        cacheKeys
+          .map((key) => extractProviderIdFromCacheKey(key))
+          .filter((item): item is { provider: string; id: string } => Boolean(item))
+          .map((item) => `${item.provider}:${item.id}`)
+      )
    );

-    const relatedContent = cacheTitleIds.length
+    const relatedContent = cacheProviderIds.length
      ? await prisma.content.findMany({
          where: {
-            OR: cacheTitleIds.map((id) => ({
-              url: { contains: `/title/${id}` },
-            })),
+            OR: cacheProviderIds.map((providerId) => {
+              const [provider, id] = providerId.split(':');
+              if (provider === 'primevideo') {
+                return { url: { contains: `/detail/${id}` } };
+              }
+              return { url: { contains: `/title/${id}` } };
+            }),
          },
          select: {
            url: true,
@@ -164,9 +181,12 @@ export class AdminService {

    const titleMap = new Map<string, string>();
    for (const item of relatedContent) {
-      const id = extractTitleIdFromUrl(item.url);
-      if (id && !titleMap.has(id)) {
-        titleMap.set(id, item.title);
+      const parsed = extractProviderIdFromUrl(item.url);
+      if (parsed) {
+        const key = `${parsed.provider}:${parsed.id}`;
+        if (!titleMap.has(key)) {
+          titleMap.set(key, item.title);
+        }
      }
    }

@@ -196,7 +216,7 @@ export class AdminService {

      if (ttlValue > 0) {
        const formattedKey = formatCacheKeyLabel(cacheKeys[i] || '');
-        const titleId = extractTitleIdFromCacheKey(cacheKeys[i] || '');
+        const providerId = extractProviderIdFromCacheKey(cacheKeys[i] || '');
        const rawValue = valueResults?.[i]?.[1];
        let cachedAt: number | null = null;
        if (typeof rawValue === 'string') {
@@ -209,7 +229,9 @@ export class AdminService {
        }
        expiringSoon.push({
          key: formattedKey,
-          mediaTitle: titleId ? titleMap.get(titleId) ?? null : null,
+          mediaTitle: providerId
+            ? titleMap.get(`${providerId.provider}:${providerId.id}`) ?? null
+            : null,
          cachedAt,
          ttlSeconds: ttlValue,
        });
@@ -450,6 +472,23 @@ export class AdminService {
      details: `Stale content refresh queued for items older than ${days} days`,
    };
  }
+
+  static async purgeAllContent(): Promise<AdminActionResponse> {
+    const totalContent = await prisma.content.count();
+
+    await prisma.$transaction([
+      prisma.content.deleteMany({}),
+      prisma.genre.deleteMany({}),
+    ]);
+
+    await CacheService.clearAll();
+
+    return {
+      queued: totalContent,
+      skipped: 0,
+      details: 'Tum icerik verileri veritabanindan silindi',
+    };
+  }
 }

 export default AdminService;
--- a/src/services/cache.service.ts
+++ b/src/services/cache.service.ts
@@ -3,19 +3,35 @@ import { env } from '../config/env.js';
 import { emitCacheEvent } from '../config/socket.js';
 import logger from '../utils/logger.js';
 import type { GetInfoResponse, CacheEntry } from '../types/index.js';
+import { parseSupportedContentUrl } from '../utils/contentUrl.js';

 /**
- * Cache key prefix for Netflix content
+ * Cache key prefix for scraped content
 */
-const CACHE_PREFIX = 'netflix:content:';
+const CACHE_PREFIX = 'content:';

 /**
 * Generate cache key from URL
 */
 function getCacheKey(url: string): string {
-  // Use URL hash or title ID as key
-  const titleId = url.match(/\/title\/(\d+)/)?.[1] || url;
-  return `${CACHE_PREFIX}${titleId}`;
+  const parsed = parseSupportedContentUrl(url);
+
+  if (parsed) {
+    return `${CACHE_PREFIX}${parsed.provider}:${parsed.id}`;
+  }
+
+  return `${CACHE_PREFIX}url:${encodeURIComponent(url)}`;
+}
+
+function normalizeCachedResponse(url: string, data: GetInfoResponse): GetInfoResponse {
+  if (data.provider === 'netflix' || data.provider === 'primevideo') {
+    return data;
+  }
+
+  return {
+    ...data,
+    provider: parseSupportedContentUrl(url)?.provider ?? 'netflix',
+  };
 }

 /**
@@ -39,7 +55,7 @@ export class CacheService {

      logger.debug('Cache hit', { url });
      const entry: CacheEntry<GetInfoResponse> = JSON.parse(cached);
-      return entry.data;
+      return normalizeCachedResponse(url, entry.data);
    } catch (error) {
      logger.error('Cache get error', {
        url,
@@ -57,7 +73,7 @@ export class CacheService {
    const ttl = env.REDIS_TTL_SECONDS;

    const entry: CacheEntry<GetInfoResponse> = {
-      data,
+      data: normalizeCachedResponse(url, data),
      cachedAt: Date.now(),
      ttl,
    };
@@ -137,7 +153,7 @@ export class CacheService {
  }

  /**
-   * Clear all Netflix content cache
+   * Clear all scraped content cache
   */
  static async clearAll(): Promise<void> {
    try {
--- a/src/services/content.service.ts
+++ b/src/services/content.service.ts
@@ -1,6 +1,7 @@
 import prisma from '../config/database.js';
 import { emitContentEvent } from '../config/socket.js';
 import type { ContentData, ScraperResult, GetInfoResponse } from '../types/index.js';
+import { parseSupportedContentUrl } from '../utils/contentUrl.js';

 /**
 * Content Service for database operations
@@ -242,7 +243,9 @@ export class ContentService {
   * Convert ContentData to API response format
   */
  static toApiResponse(data: ContentData): GetInfoResponse {
+    const provider = parseSupportedContentUrl(data.url)?.provider ?? 'netflix';
    return {
+      provider,
      title: data.title,
      year: data.year,
      plot: data.plot,
--- a/src/services/job.service.ts
+++ b/src/services/job.service.ts
@@ -1,4 +1,5 @@
 import { v4 as uuidv4 } from 'uuid';
+import type { Prisma } from '@prisma/client';
 import prisma from '../config/database.js';
 import { CacheService } from './cache.service.js';
 import { ContentService } from './content.service.js';
@@ -60,7 +61,7 @@ export class JobService {
      status?: JobStatus;
      progress?: number;
      step?: string;
-      result?: unknown;
+      result?: Prisma.InputJsonValue;
      error?: string;
    }
  ): Promise<ScrapeJob> {
@@ -73,7 +74,7 @@ export class JobService {
  }

  /**
-   * Process a scrape job (hybrid: cache -> db -> netflix)
+   * Process a scrape job (hybrid: cache -> db -> scraper)
   */
  static async process(jobId: string): Promise<void> {
    const job = await this.getById(jobId);
@@ -117,11 +118,14 @@ export class JobService {
        return;
      }

-      // Update progress
-      await this.update(jobId, { progress: 50, step: 'scraping_netflix' });
-      emitJobProgress(jobId, 50, 'processing', 'Scraping Netflix');
+      const provider = ScraperService.detectProvider(job.url);
+      const providerLabel = provider === 'primevideo' ? 'Prime Video' : 'Netflix';

-      // Step 3: Scrape from Netflix
+      // Update progress
+      await this.update(jobId, { progress: 50, step: `scraping_${provider ?? 'source'}` });
+      emitJobProgress(jobId, 50, 'processing', `Scraping ${providerLabel}`);
+
+      // Step 3: Scrape from source URL
      const scraperResult = await ScraperService.scrape(job.url);

      // Update progress
@@ -136,7 +140,7 @@ export class JobService {
      await CacheService.set(job.url, responseData);

      // Complete the job
-      await this.completeJob(jobId, responseData, 'netflix');
+      await this.completeJob(jobId, responseData, 'scraper');
    } catch (error) {
      const apiError: ApiError = {
        code: 'SCRAPE_ERROR',
@@ -168,7 +172,7 @@ export class JobService {
      status: 'completed',
      progress: 100,
      step: 'completed',
-      result: data,
+      result: data as unknown as Prisma.InputJsonValue,
    });

    emitJobCompleted(jobId, data, source);
@@ -201,7 +205,7 @@ export class JobService {
      return { data: responseData, source: 'database' };
    }

-    // Step 3: Scrape from Netflix
+    // Step 3: Scrape from source URL
    const scraperResult = await ScraperService.scrape(url);

    // Step 4: Save to database
@@ -210,9 +214,9 @@ export class JobService {

    // Step 5: Cache the result
    await CacheService.set(url, responseData);
-    await MetricsService.incrementSource('netflix');
+    await MetricsService.incrementSource('scraper');

-    return { data: responseData, source: 'netflix' };
+    return { data: responseData, source: 'scraper' };
  }

  /**
--- a/src/services/metrics.service.ts
+++ b/src/services/metrics.service.ts
@@ -59,7 +59,7 @@ export class MetricsService {
    bySource: {
      cache: number;
      database: number;
-      netflix: number;
+      scraper: number;
    };
  }> {
    const [counters, sources] = await Promise.all([
@@ -73,7 +73,7 @@ export class MetricsService {
      bySource: {
        cache: toInt(sources.cache),
        database: toInt(sources.database),
-        netflix: toInt(sources.netflix),
+        scraper: toInt(sources.scraper),
      },
    };
  }
--- a/src/services/scraper.service.ts
+++ b/src/services/scraper.service.ts
@@ -1,6 +1,10 @@
 import * as cheerio from 'cheerio';
 import type { ScraperResult, ContentType } from '../types/index.js';
 import logger from '../utils/logger.js';
+import {
+  parseSupportedContentUrl,
+  type SupportedProvider,
+} from '../utils/contentUrl.js';

 /**
 * Age rating patterns to detect and exclude from genres
@@ -14,43 +18,55 @@ const AGE_RATING_PATTERN = /^[\u2066-\u2069\u202A-\u202E\u200E-\u200F]*(\d+\+|PG
 * Matches patterns like "3 Sezon", "2 Seasons", "1. Sezon", etc.
 */
 const SEASON_PATTERN = /(\d+)\.?\s*(sezon|season|sezonlar|seasons)/i;
+const EPISODE_PATTERN = /(\d+)\.?\s*(bölüm|bolum|bölümler|bolumler|episode|episodes)/i;
+const EPISODE_TOKEN_PATTERN = /\b(bölüm|bolum|bölümler|bolumler|episode|episodes)\b/i;

 /**
- * Netflix HTML Scraper Service
+ * Scraper Service (Netflix + Prime Video)
 * Uses Cheerio for parsing HTML content
 */
 export class ScraperService {
+  /**
+   * Detect content provider from URL
+   */
+  static detectProvider(url: string): SupportedProvider | null {
+    return parseSupportedContentUrl(url)?.provider ?? null;
+  }
+
+  /**
+   * Validate if URL is a supported content URL
+   */
+  static isSupportedUrl(url: string): boolean {
+    return Boolean(parseSupportedContentUrl(url));
+  }
+
  /**
   * Validate if URL is a valid Netflix URL
   */
  static isValidNetflixUrl(url: string): boolean {
-    try {
-      const parsedUrl = new URL(url);
-      const validHosts = [
-        'www.netflix.com',
-        'netflix.com',
-        'www.netflix.com.tr',
-        'netflix.com.tr',
-      ];
-      return validHosts.includes(parsedUrl.hostname);
-    } catch {
-      return false;
+    return parseSupportedContentUrl(url)?.provider === 'netflix';
  }
+
+  /**
+   * Validate if URL is a valid Prime Video URL
+   */
+  static isValidPrimeVideoUrl(url: string): boolean {
+    return parseSupportedContentUrl(url)?.provider === 'primevideo';
  }

  /**
   * Extract Netflix title ID from URL
   */
  static extractTitleId(url: string): string | null {
-    const match = url.match(/\/title\/(\d+)/);
-    return match ? match[1] : null;
+    const parsed = parseSupportedContentUrl(url);
+    return parsed?.provider === 'netflix' ? parsed.id : null;
  }

  /**
-   * Fetch HTML content from Netflix URL
+   * Fetch HTML content from URL
   */
-  private static async fetchHtml(url: string): Promise<string> {
-    logger.info('Fetching Netflix page', { url });
+  private static async fetchHtml(url: string, provider: SupportedProvider): Promise<string> {
+    logger.info('Fetching content page', { provider, url });

    const response = await fetch(url, {
      headers: {
@@ -63,7 +79,7 @@ export class ScraperService {
    });

    if (!response.ok) {
-      throw new Error(`Failed to fetch Netflix page: ${response.status}`);
+      throw new Error(`Failed to fetch ${provider} page: ${response.status}`);
    }

    return response.text();
@@ -73,22 +89,46 @@ export class ScraperService {
   * Parse HTML and extract content data
   */
  static async scrape(url: string): Promise<ScraperResult> {
-    if (!this.isValidNetflixUrl(url)) {
-      throw new Error('Invalid Netflix URL');
+    const parsed = parseSupportedContentUrl(url);
+
+    if (!parsed) {
+      throw new Error(
+        'Invalid content URL. Use Netflix /title/... or PrimeVideo /detail/...'
+      );
    }

-    const html = await this.fetchHtml(url);
+    const html = await this.fetchHtml(url, parsed.provider);
    const $ = cheerio.load(html);

-    const title = this.extractTitle($);
-    const year = this.extractYear($);
-    const plot = this.extractPlot($);
-    const ageRating = this.extractAgeRating($);
-    const { genres, type, currentSeason } = this.extractGenresTypeAndSeason($);
-    const cast = this.extractCast($);
-    const backdropUrl = this.extractBackdrop($);
+    const result =
+      parsed.provider === 'netflix'
+        ? this.scrapeNetflix($)
+        : this.scrapePrimeVideo($, parsed.id);

-    const result: ScraperResult = {
+    logger.info('Scraping completed', {
+      provider: parsed.provider,
+      url,
+      title: result.title,
+      year: result.year,
+      ageRating: result.ageRating,
+      type: result.type,
+      genresCount: result.genres.length,
+      castCount: result.cast.length,
+    });
+
+    return result;
+  }
+
+  private static scrapeNetflix($: cheerio.CheerioAPI): ScraperResult {
+    const title = this.extractNetflixTitle($);
+    const year = this.extractNetflixYear($);
+    const plot = this.extractNetflixPlot($);
+    const ageRating = this.extractNetflixAgeRating($);
+    const { genres, type, currentSeason } = this.extractNetflixGenresTypeAndSeason($);
+    const cast = this.extractNetflixCast($);
+    const backdropUrl = this.extractNetflixBackdrop($);
+
+    return {
      title,
      year,
      plot,
@@ -99,24 +139,71 @@ export class ScraperService {
      backdropUrl,
      currentSeason,
    };
+  }

-    logger.info('Scraping completed', {
-      url,
+  private static scrapePrimeVideo($: cheerio.CheerioAPI, detailId: string): ScraperResult {
+    const title = this.extractPrimeTitle($, detailId);
+    const year = this.extractPrimeYear($);
+    const { type, currentSeason } = this.extractPrimeTypeAndSeason($);
+    const plot = this.extractPrimePlot($);
+    const cast = this.extractPrimeCast($);
+    const genres = this.extractPrimeGenres($);
+    const backdropUrl = this.extractPrimeBackdrop($);
+    const ageRating = this.extractPrimeAgeRating($);
+
+    return {
      title,
      year,
+      plot,
      ageRating,
      type,
-      genresCount: genres.length,
-      castCount: cast.length,
-    });
+      genres,
+      cast,
+      backdropUrl,
+      currentSeason,
+    };
+  }

-    return result;
+  private static parseYear(text: string): number | null {
+    const yearMatch = text.match(/(19|20)\d{2}/);
+    if (!yearMatch) return null;
+
+    const year = Number.parseInt(yearMatch[0], 10);
+    if (Number.isNaN(year)) return null;
+    if (year < 1900 || year > new Date().getFullYear() + 5) return null;
+    return year;
+  }
+
+  private static cleanText(text: string): string {
+    return text.replace(/\s+/g, ' ').trim();
+  }
+
+  private static normalizePrimeTitleCandidate(text: string): string {
+    return this.cleanText(text)
+      .replace(/^[İIiı]zle:\s*/i, '')
+      .replace(/^canl[ıi]\s+izleyin:\s*/i, '')
+      .replace(/^watch\s+now:\s*/i, '')
+      .replace(/^prime\s+video:\s*/i, '')
+      .replace(/\s*(sezon|season)\s+\d+(?=\s*[-–—]\s*prime\s+video$)/i, '')
+      .replace(/\s*[-–—]\s*prime\s+video$/i, '')
+      .replace(/\s*\|\s*prime\s*video$/i, '')
+      .replace(/\s+(sezon|season)\s+\d+\s*$/i, '')
+      .trim();
+  }
+
+  private static uniqueTextList(items: string[]): string[] {
+    const unique = new Set<string>();
+    for (const item of items) {
+      const normalized = this.cleanText(item);
+      if (normalized) unique.add(normalized);
+    }
+    return Array.from(unique);
  }

  /**
-   * Extract title from HTML
+   * Netflix extractors
   */
-  private static extractTitle($: cheerio.CheerioAPI): string {
+  private static extractNetflixTitle($: cheerio.CheerioAPI): string {
    let title = $('h2.default-ltr-iqcdef-cache-tnklrp').first().text().trim();

    if (!title) {
@@ -131,24 +218,12 @@ export class ScraperService {
    return title || 'Unknown Title';
  }

-  /**
-   * Extract year from HTML (first li element)
-   */
-  private static extractYear($: cheerio.CheerioAPI): number | null {
+  private static extractNetflixYear($: cheerio.CheerioAPI): number | null {
    const yearText = $('li.default-ltr-iqcdef-cache-6prs41').first().text().trim();
-    const year = parseInt(yearText, 10);
-
-    if (!isNaN(year) && year >= 1900 && year <= new Date().getFullYear() + 5) {
-      return year;
+    return this.parseYear(yearText);
  }

-    return null;
-  }
-
-  /**
-   * Extract plot/description from HTML
-   */
-  private static extractPlot($: cheerio.CheerioAPI): string | null {
+  private static extractNetflixPlot($: cheerio.CheerioAPI): string | null {
    const plot = $('span.default-ltr-iqcdef-cache-6ukeej').first().text().trim();

    if (!plot) {
@@ -159,91 +234,70 @@ export class ScraperService {
    return plot || null;
  }

-  /**
-   * Extract age rating from HTML (e.g., "18+", "16+")
-   * Searches all li elements (except first which is year)
-   */
-  private static extractAgeRating($: cheerio.CheerioAPI): string | null {
-    let ageRating: string | null = null;
-    const foundTexts: string[] = [];
-
-    $('li.default-ltr-iqcdef-cache-6prs41').each((index, element) => {
-      if (index === 0) return; // Skip year
+  private static extractNetflixAgeRating($: cheerio.CheerioAPI): string | null {
+    const items = $('li.default-ltr-iqcdef-cache-6prs41').toArray();
+    for (let i = 1; i < items.length; i += 1) {
+      const element = items[i];
+      if (!element) continue;

      const text = $(element).text().trim();
-      foundTexts.push(text);
-
-      // Clean Unicode characters first
-      const cleanText = text.replace(/[\u2066-\u2069\u202A-\u202E\u200E-\u200F]/g, '').trim();
+      const cleanText = text
+        .replace(/[\u2066-\u2069\u202A-\u202E\u200E-\u200F]/g, '')
+        .trim();

      if (cleanText && AGE_RATING_PATTERN.test(cleanText)) {
-        ageRating = cleanText;
-        return false; // Break loop
+        return cleanText;
      }
-    });
-
-    // Debug logging
-    if (!ageRating && foundTexts.length > 0) {
-      logger.debug('Age rating not found in elements', {
-        foundTexts,
-        pattern: AGE_RATING_PATTERN.source,
-      });
    }

-    return ageRating;
+    return null;
  }

-  /**
-   * Extract genres from HTML (skip year, age rating, and season info)
-   * Also detects content type (movie/tvshow) based on season presence
-   * Extracts current season number from season text
-   */
-  private static extractGenresTypeAndSeason($: cheerio.CheerioAPI): { genres: string[]; type: ContentType; currentSeason: number | null } {
+  private static extractNetflixGenresTypeAndSeason(
+    $: cheerio.CheerioAPI
+  ): { genres: string[]; type: ContentType; currentSeason: number | null } {
    const genres: string[] = [];
    let type: ContentType = 'movie';
    let currentSeason: number | null = null;
-    const foundTexts: string[] = [];

    $('li.default-ltr-iqcdef-cache-6prs41').each((index, element) => {
-      if (index === 0) return; // Skip year
+      if (index === 0) return;

      const text = $(element).text().trim();
-      const cleanText = text.replace(/[\u2066\u2069\u202A\u202B\u202C\u202D\u202E\u200E\u200F]/g, '').trim();
-      foundTexts.push(cleanText);
+      const cleanText = text
+        .replace(/[\u2066\u2069\u202A\u202B\u202C\u202D\u202E\u200E\u200F]/g, '')
+        .trim();

-      // Check for season pattern - indicates TV show
      const seasonMatch = cleanText.match(SEASON_PATTERN);
      if (cleanText && seasonMatch) {
        type = 'tvshow';
-        // Extract season number from the text
-        const seasonNum = parseInt(seasonMatch[1], 10);
-        if (!isNaN(seasonNum)) {
+        const seasonValue = seasonMatch[1];
+        const seasonNum = seasonValue ? Number.parseInt(seasonValue, 10) : Number.NaN;
+        if (Number.isFinite(seasonNum)) {
          currentSeason = seasonNum;
        }
-        return; // Skip adding to genres
+        return;
+      }
+
+      const episodeMatch = cleanText.match(EPISODE_PATTERN);
+      const hasEpisodeToken = EPISODE_TOKEN_PATTERN.test(cleanText);
+      if (cleanText && (episodeMatch || hasEpisodeToken)) {
+        type = 'tvshow';
+        if (currentSeason == null) {
+          currentSeason = 1;
+        }
+        return;
      }

-      // Skip age rating - only add actual genres
      if (cleanText && !AGE_RATING_PATTERN.test(cleanText)) {
        genres.push(cleanText);
      }
    });

-    // Debug logging
-    logger.debug('extractGenresTypeAndSeason completed', {
-      foundTexts,
-      genres,
-      type,
-      currentSeason,
-    });
-
    return { genres, type, currentSeason };
  }

-  /**
-   * Extract cast members from HTML
-   */
-  private static extractCast($: cheerio.CheerioAPI): string[] {
+  private static extractNetflixCast($: cheerio.CheerioAPI): string[] {
    const castText = $('span.default-ltr-iqcdef-cache-m0886o').first().text().trim();

    if (!castText) {
@@ -256,10 +310,7 @@ export class ScraperService {
      .filter((name) => name.length > 0);
  }

-  /**
-   * Extract backdrop image URL from HTML
-   */
-  private static extractBackdrop($: cheerio.CheerioAPI): string | null {
+  private static extractNetflixBackdrop($: cheerio.CheerioAPI): string | null {
    const backdropDiv = $('div.default-ltr-iqcdef-cache-1wezh7a').first();
    const img = backdropDiv.find('img').first();

@@ -279,6 +330,176 @@ export class ScraperService {

    return null;
  }
+
+  /**
+   * Prime Video extractors
+   */
+  private static extractPrimeTitle($: cheerio.CheerioAPI, detailId: string): string {
+    const primaryTitle = this.normalizePrimeTitleCandidate(
+      $('h1[data-automation-id="title"]').first().text() || ''
+    );
+    const detailLinkSelector = `a[href*="/detail/${detailId}"]`;
+    const imageLinkAriaTitle = this.normalizePrimeTitleCandidate(
+      $(`a[data-testid="image-link"][aria-label][href*="/detail/${detailId}"]`).first().attr('aria-label') ||
+        $(`${detailLinkSelector}[aria-label]`).first().attr('aria-label') ||
+        ''
+    );
+    const imageLinkTextTitle = this.normalizePrimeTitleCandidate(
+      $(`a[data-testid="image-link"][href*="/detail/${detailId}"]`).first().text() ||
+        $(detailLinkSelector).first().text() ||
+        ''
+    );
+    const metaOgTitle = this.normalizePrimeTitleCandidate(
+      $('meta[property="og:title"]').attr('content') || ''
+    );
+    const metaNameTitle = this.normalizePrimeTitleCandidate(
+      $('meta[name="title"]').attr('content') || ''
+    );
+    const pageTitle = this.normalizePrimeTitleCandidate(
+      $('title').first().text() || ''
+    );
+    const canonicalHref = $('link[rel="canonical"]').attr('href') || '';
+    let canonicalTitle = '';
+    if (canonicalHref) {
+      try {
+        const canonicalUrl = new URL(canonicalHref, 'https://www.primevideo.com');
+        const canonicalMatch = canonicalUrl.pathname.match(/\/detail\/([^/]+)\/([A-Za-z0-9]+)/i);
+        if (canonicalMatch && canonicalMatch[2] === detailId) {
+          canonicalTitle = this.normalizePrimeTitleCandidate(
+            decodeURIComponent(canonicalMatch[1] || '')
+          );
+        }
+      } catch {
+        // best effort
+      }
+    }
+
+    const title =
+      primaryTitle ||
+      imageLinkAriaTitle ||
+      imageLinkTextTitle ||
+      metaOgTitle ||
+      metaNameTitle ||
+      pageTitle ||
+      canonicalTitle;
+
+    return title || 'Unknown Title';
+  }
+
+  private static extractPrimeYear($: cheerio.CheerioAPI): number | null {
+    const releaseBadge = $('span[data-automation-id="release-year-badge"]').first();
+    return (
+      this.parseYear(this.cleanText(releaseBadge.text())) ||
+      this.parseYear(this.cleanText(releaseBadge.attr('aria-label') || ''))
+    );
+  }
+
+  private static extractPrimeTypeAndSeason(
+    $: cheerio.CheerioAPI
+  ): { type: ContentType; currentSeason: number | null } {
+    const seasonNodeText = this.cleanText(
+      $('div.dv-node-dp-seasons, [data-testid="dp-season-selector"]').text()
+    );
+    const hasSeasonMarker = /\b(sezon|season)\b/i.test(seasonNodeText);
+
+    const seasonLabel =
+      $('input#av-droplist-av-atf-season-selector').attr('aria-label') ||
+      $('label[for="av-droplist-av-atf-season-selector"] ._36qUej').first().text() ||
+      '';
+
+    const seasonMatch = this.cleanText(seasonLabel).match(
+      /(?:sezon|season)\s*(\d+)|(\d+)\.?\s*(?:sezon|season)/i
+    );
+
+    const currentSeasonRaw = seasonMatch ? seasonMatch[1] || seasonMatch[2] : null;
+    const currentSeason = currentSeasonRaw
+      ? Number.parseInt(currentSeasonRaw, 10)
+      : null;
+
+    return {
+      type: hasSeasonMarker ? 'tvshow' : 'movie',
+      currentSeason: Number.isNaN(currentSeason as number) ? null : currentSeason,
+    };
+  }
+
+  private static extractPrimeCast($: cheerio.CheerioAPI): string[] {
+    const cast = $('dd.skJCpF a._1NNx6V')
+      .map((_, el) => $(el).text())
+      .get();
+
+    return this.uniqueTextList(cast);
+  }
+
+  private static extractPrimeGenres($: cheerio.CheerioAPI): string[] {
+    const genres = $(
+      'div[data-testid="dv-node-dp-genres"] [data-testid="genre-texts"], div[data-testid="dv-node-dp-genres"] [data-testid="mood-texts"]'
+    )
+      .map((_, el) => $(el).text())
+      .get();
+
+    return this.uniqueTextList(genres);
+  }
+
+  private static extractPrimePlot($: cheerio.CheerioAPI): string | null {
+    const plot = this.cleanText(
+      $('span.fbl-expandable-text span._1H6ABQ').first().text() ||
+        $('meta[property="og:description"]').attr('content') ||
+        ''
+    );
+
+    return plot || null;
+  }
+
+  private static extractPrimeAgeRating($: cheerio.CheerioAPI): string | null {
+    const ageRating = this.cleanText(
+      $('span[data-automation-id="age-rating-badge"]').first().text() ||
+        $('[data-testid="age-rating-badge"]').first().text() ||
+        ''
+    );
+
+    return ageRating || null;
+  }
+
+  private static extractPrimeBackdrop($: cheerio.CheerioAPI): string | null {
+    const webpSrcSet =
+      $('div.Kc5eKF picture source[type="image/webp"]').first().attr('srcset') ||
+      $('picture source[type="image/webp"]').first().attr('srcset') ||
+      '';
+
+    if (webpSrcSet) {
+      const sources = webpSrcSet
+        .split(',')
+        .map((item) => item.trim())
+        .map((item) => {
+          const match = item.match(/^(\S+)\s+(\d+)w$/);
+          if (!match) return null;
+          const url = match[1];
+          const widthRaw = match[2];
+          if (!url || !widthRaw) return null;
+          return {
+            url,
+            width: Number.parseInt(widthRaw, 10),
+          };
+        })
+        .filter((item): item is { url: string; width: number } => Boolean(item));
+
+      if (sources.length > 0) {
+        const exact1080 = sources.find((item) => item.width === 1080);
+        if (exact1080) return exact1080.url;
+
+        const nextLargest = sources
+          .filter((item) => item.width > 1080)
+          .sort((a, b) => a.width - b.width)[0];
+        if (nextLargest) return nextLargest.url;
+
+        const largest = sources.sort((a, b) => b.width - a.width)[0];
+        if (largest) return largest.url;
+      }
+    }
+
+    const fallback = $('img[data-testid="base-image"]').first().attr('src');
+    return fallback || null;
+  }
 }

 export default ScraperService;
--- a/src/types/index.ts
+++ b/src/types/index.ts
@@ -57,6 +57,7 @@ export interface GetInfoRequest {
 }

 export interface GetInfoResponse {
+  provider: 'netflix' | 'primevideo';
  title: string;
  year: number | null;
  plot: string | null;
@@ -134,7 +135,7 @@ export interface AdminOverviewResponse {
    sourceCounts: {
      cache: number;
      database: number;
-      netflix: number;
+      scraper: number;
    };
  };
 }
@@ -155,7 +156,7 @@ export interface CacheEntry<T> {
  ttl: number;
 }

-export type DataSource = 'cache' | 'database' | 'netflix';
+export type DataSource = 'cache' | 'database' | 'scraper';

 // ============================================
 // Socket Event Types
--- a/src/utils/contentUrl.ts
+++ b/src/utils/contentUrl.ts
@@ -0,0 +1,49 @@
+export type SupportedProvider = 'netflix' | 'primevideo';
+
+const NETFLIX_HOSTS = new Set([
+  'www.netflix.com',
+  'netflix.com',
+  'www.netflix.com.tr',
+  'netflix.com.tr',
+]);
+
+const PRIME_HOSTS = new Set([
+  'www.primevideo.com',
+  'primevideo.com',
+]);
+
+export interface ParsedContentUrl {
+  provider: SupportedProvider;
+  id: string;
+}
+
+export function parseSupportedContentUrl(rawUrl: string): ParsedContentUrl | null {
+  try {
+    const parsedUrl = new URL(rawUrl);
+    const hostname = parsedUrl.hostname.toLowerCase();
+
+    if (NETFLIX_HOSTS.has(hostname)) {
+      const titleIdMatch = parsedUrl.pathname.match(/\/title\/(\d+)/);
+      if (!titleIdMatch) return null;
+      const id = titleIdMatch[1];
+      if (!id) return null;
+      return { provider: 'netflix', id };
+    }
+
+    if (PRIME_HOSTS.has(hostname)) {
+      const detailIdMatch = parsedUrl.pathname.match(/\/detail\/([A-Za-z0-9]+)/);
+      if (!detailIdMatch) return null;
+      const id = detailIdMatch[1];
+      if (!id) return null;
+      return { provider: 'primevideo', id };
+    }
+
+    return null;
+  } catch {
+    return null;
+  }
+}
+
+export function isSupportedContentUrl(rawUrl: string): boolean {
+  return Boolean(parseSupportedContentUrl(rawUrl));
+}