Compare commits

...

3 Commits

17 changed files with 656 additions and 179 deletions

View File

@@ -25,6 +25,7 @@ docker compose -f docker-compose.dev.yml up --build
``` ```
API şu adreste çalışacak: `http://localhost:3000` API şu adreste çalışacak: `http://localhost:3000`
Frontend şu adreste çalışacak: `http://localhost:5173`
## API Kullanımı ## API Kullanımı

View File

@@ -48,6 +48,25 @@ services:
networks: networks:
- netflix-scraper-network - netflix-scraper-network
frontend:
image: node:20-alpine
container_name: netflix-scraper-frontend-dev
restart: unless-stopped
working_dir: /app
ports:
- "5173:5173"
environment:
- VITE_API_PROXY_TARGET=http://app:3000
command: sh -c "npm install && npm run dev -- --host 0.0.0.0 --port 5173"
volumes:
- ./frontend:/app:delegated
- frontend_node_modules_data:/app/node_modules
depends_on:
app:
condition: service_healthy
networks:
- netflix-scraper-network
postgres: postgres:
image: postgres:16-alpine image: postgres:16-alpine
container_name: netflix-scraper-postgres-dev container_name: netflix-scraper-postgres-dev
@@ -89,6 +108,7 @@ volumes:
postgres_data_dev: postgres_data_dev:
redis_data_dev: redis_data_dev:
node_modules_data: node_modules_data:
frontend_node_modules_data:
networks: networks:
netflix-scraper-network: netflix-scraper-network:

BIN
frontend/public/prime.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 132 KiB

View File

@@ -32,6 +32,7 @@ import {
import './movies-page.css' import './movies-page.css'
type ContentType = 'movie' | 'tvshow' type ContentType = 'movie' | 'tvshow'
type ContentProvider = 'netflix' | 'primevideo'
type SortBy = 'title' | 'year' type SortBy = 'title' | 'year'
const WEB_API_KEY = import.meta.env.VITE_WEB_API_KEY ?? 'web-dev-key-change-me' const WEB_API_KEY = import.meta.env.VITE_WEB_API_KEY ?? 'web-dev-key-change-me'
const ADMIN_API_KEY = import.meta.env.VITE_ADMIN_API_KEY ?? 'admin-dev-key-change-me' const ADMIN_API_KEY = import.meta.env.VITE_ADMIN_API_KEY ?? 'admin-dev-key-change-me'
@@ -51,6 +52,7 @@ declare global {
} }
interface ContentListItem { interface ContentListItem {
provider?: ContentProvider
title: string title: string
year: number | null year: number | null
plot: string | null plot: string | null
@@ -143,7 +145,7 @@ interface AdminOverview {
sourceCounts: { sourceCounts: {
cache: number cache: number
database: number database: number
netflix: number scraper: number
} }
} }
} }
@@ -195,6 +197,13 @@ function getContentKey(item: Pick<ContentListItem, 'type' | 'title' | 'year'>):
return `${item.type}::${item.title}::${item.year ?? 'na'}` return `${item.type}::${item.title}::${item.year ?? 'na'}`
} }
function getProviderBrand(item: Pick<ContentListItem, 'provider'>): { src: string; alt: string } {
if (item.provider === 'primevideo') {
return { src: '/prime.png', alt: 'Prime Video' }
}
return { src: '/netflix.png', alt: 'Netflix' }
}
async function loadSocketClient(): Promise<SocketClient | null> { async function loadSocketClient(): Promise<SocketClient | null> {
if (typeof window === 'undefined') return null if (typeof window === 'undefined') return null
if (window.io) { if (window.io) {
@@ -260,6 +269,7 @@ export function MoviesPage() {
const [adminError, setAdminError] = useState<string | null>(null) const [adminError, setAdminError] = useState<string | null>(null)
const [adminActionMessage, setAdminActionMessage] = useState<string | null>(null) const [adminActionMessage, setAdminActionMessage] = useState<string | null>(null)
const [adminActionPending, setAdminActionPending] = useState<string | null>(null) const [adminActionPending, setAdminActionPending] = useState<string | null>(null)
const [purgeConfirmOpened, setPurgeConfirmOpened] = useState(false)
const [typeFilter, setTypeFilter] = useState<'all' | ContentType>('all') const [typeFilter, setTypeFilter] = useState<'all' | ContentType>('all')
const [search, setSearch] = useState('') const [search, setSearch] = useState('')
const [sortBy, setSortBy] = useState<SortBy>('title') const [sortBy, setSortBy] = useState<SortBy>('title')
@@ -401,7 +411,7 @@ export function MoviesPage() {
actionKey: string, actionKey: string,
endpoint: string, endpoint: string,
body?: Record<string, unknown> body?: Record<string, unknown>
) => { ): Promise<boolean> => {
const startedAt = Date.now() const startedAt = Date.now()
setAdminActionPending(actionKey) setAdminActionPending(actionKey)
setAdminActionMessage(null) setAdminActionMessage(null)
@@ -424,21 +434,32 @@ export function MoviesPage() {
if (!result.success || !result.data) { if (!result.success || !result.data) {
setAdminError(result.error?.message || 'Admin aksiyonu basarisiz') setAdminError(result.error?.message || 'Admin aksiyonu basarisiz')
return return false
} }
setAdminActionMessage( setAdminActionMessage(
`${result.data.details ?? 'Aksiyon tamamlandi'} | queued: ${result.data.queued}, skipped: ${result.data.skipped}` `${result.data.details ?? 'Aksiyon tamamlandi'} | queued: ${result.data.queued}, skipped: ${result.data.skipped}`
) )
await loadAdminOverview(undefined, true) await loadAdminOverview(undefined, true)
return true
} catch { } catch {
setAdminError('Admin aksiyonu baglanti hatasi') setAdminError('Admin aksiyonu baglanti hatasi')
return false
} finally { } finally {
await waitForMinimumDuration(startedAt, MIN_BUTTON_LOADING_MS) await waitForMinimumDuration(startedAt, MIN_BUTTON_LOADING_MS)
setAdminActionPending(null) setAdminActionPending(null)
} }
} }
const handlePurgeAllContent = async () => {
const success = await runAdminAction('purge-content', '/api/admin/content/purge')
if (!success) return
setPurgeConfirmOpened(false)
setSelectedContentKey(null)
await loadContent(undefined, true, typeFilterRef.current)
}
const handleAdminRefresh = async () => { const handleAdminRefresh = async () => {
const startedAt = Date.now() const startedAt = Date.now()
await loadAdminOverview(undefined, false) await loadAdminOverview(undefined, false)
@@ -582,6 +603,10 @@ export function MoviesPage() {
() => items.find((item) => getContentKey(item) === selectedContentKey) ?? null, () => items.find((item) => getContentKey(item) === selectedContentKey) ?? null,
[items, selectedContentKey] [items, selectedContentKey]
) )
const selectedContentBrand = useMemo(
() => (selectedContent ? getProviderBrand(selectedContent) : null),
[selectedContent]
)
const movieCount = items.filter((item) => item.type === 'movie').length const movieCount = items.filter((item) => item.type === 'movie').length
const tvCount = items.filter((item) => item.type === 'tvshow').length const tvCount = items.filter((item) => item.type === 'tvshow').length
@@ -725,6 +750,14 @@ export function MoviesPage() {
> >
Stale refresh Stale refresh
</Button> </Button>
<Button
size="compact-sm"
color="red"
variant="light"
onClick={() => setPurgeConfirmOpened(true)}
>
DB tum veriyi sil
</Button>
</Group> </Group>
</Group> </Group>
@@ -904,7 +937,7 @@ export function MoviesPage() {
<Group gap="xs" mt="sm"> <Group gap="xs" mt="sm">
<Badge variant="light">Kaynak Cache: {adminOverview.requestMetrics.sourceCounts.cache}</Badge> <Badge variant="light">Kaynak Cache: {adminOverview.requestMetrics.sourceCounts.cache}</Badge>
<Badge variant="light">Kaynak DB: {adminOverview.requestMetrics.sourceCounts.database}</Badge> <Badge variant="light">Kaynak DB: {adminOverview.requestMetrics.sourceCounts.database}</Badge>
<Badge variant="light">Kaynak Netflix: {adminOverview.requestMetrics.sourceCounts.netflix}</Badge> <Badge variant="light">Kaynak Scraper: {adminOverview.requestMetrics.sourceCounts.scraper}</Badge>
</Group> </Group>
<Stack gap={6} mt="md"> <Stack gap={6} mt="md">
<Text size="sm" fw={600}> <Text size="sm" fw={600}>
@@ -1057,6 +1090,7 @@ export function MoviesPage() {
{visibleItems.map((item, index) => { {visibleItems.map((item, index) => {
const itemKey = getContentKey(item) const itemKey = getContentKey(item)
const isLive = flashItemKeys.includes(itemKey) const isLive = flashItemKeys.includes(itemKey)
const brand = getProviderBrand(item)
return ( return (
<Grid.Col key={itemKey} span={{ base: 12, sm: 6, md: 4 }}> <Grid.Col key={itemKey} span={{ base: 12, sm: 6, md: 4 }}>
<Card <Card
@@ -1099,7 +1133,7 @@ export function MoviesPage() {
</Badge> </Badge>
</Group> </Group>
<Group gap="xs"> <Group gap="xs" className="card-meta-row">
{item.year && <Badge variant="light">{item.year}</Badge>} {item.year && <Badge variant="light">{item.year}</Badge>}
{item.ageRating && <Badge variant="outline">{item.ageRating}</Badge>} {item.ageRating && <Badge variant="outline">{item.ageRating}</Badge>}
{item.currentSeason && item.type === 'tvshow' && ( {item.currentSeason && item.type === 'tvshow' && (
@@ -1109,11 +1143,11 @@ export function MoviesPage() {
)} )}
</Group> </Group>
<Text size="sm" c="dimmed" lineClamp={3}> <Text size="sm" c="dimmed" lineClamp={3} className="card-plot">
{item.plot || 'Açıklama bulunamadı.'} {item.plot || 'Açıklama bulunamadı.'}
</Text> </Text>
<Group gap={6}> <Group gap={6} className="card-genres">
{item.genres.slice(0, 3).map((genre) => ( {item.genres.slice(0, 3).map((genre) => (
<Badge key={genre} size="sm" variant="dot"> <Badge key={genre} size="sm" variant="dot">
{genre} {genre}
@@ -1122,7 +1156,7 @@ export function MoviesPage() {
</Group> </Group>
</Stack> </Stack>
<img src="/netflix.png" alt="Netflix" className="brand-stamp" /> <img src={brand.src} alt={brand.alt} className="brand-stamp" />
</Card> </Card>
</Grid.Col> </Grid.Col>
) )
@@ -1142,6 +1176,39 @@ export function MoviesPage() {
</Stack> </Stack>
)} )}
</Stack> </Stack>
<Modal
opened={purgeConfirmOpened}
onClose={() => {
if (adminActionPending === 'purge-content') return
setPurgeConfirmOpened(false)
}}
centered
lockScroll={false}
radius="md"
title="Veritabani verilerini sil"
>
<Stack gap="md">
<Text size="sm" c="dimmed">
Bu islem geri alinmaz. Icerik tablosundaki tum film/dizi kayitlari silinecek.
</Text>
<Group justify="flex-end">
<Button
variant="default"
onClick={() => setPurgeConfirmOpened(false)}
disabled={adminActionPending === 'purge-content'}
>
Vazgec
</Button>
<Button
color="red"
loading={adminActionPending === 'purge-content'}
onClick={() => void handlePurgeAllContent()}
>
Evet, tumunu sil
</Button>
</Group>
</Stack>
</Modal>
<Modal <Modal
opened={Boolean(selectedContent)} opened={Boolean(selectedContent)}
onClose={closeContentModal} onClose={closeContentModal}
@@ -1159,7 +1226,7 @@ export function MoviesPage() {
title={null} title={null}
> >
{selectedContent && ( {selectedContent && (
<Stack gap="md"> <Stack gap="md" className="detail-content-stack">
<div className="detail-media-wrap"> <div className="detail-media-wrap">
{selectedContent.backdrop ? ( {selectedContent.backdrop ? (
<Image src={selectedContent.backdrop} alt={selectedContent.title} h={230} /> <Image src={selectedContent.backdrop} alt={selectedContent.title} h={230} />
@@ -1220,7 +1287,7 @@ export function MoviesPage() {
</Text> </Text>
<Group gap={6}> <Group gap={6}>
{selectedContent.cast && selectedContent.cast.length > 0 ? ( {selectedContent.cast && selectedContent.cast.length > 0 ? (
selectedContent.cast.slice(0, 14).map((castName) => ( selectedContent.cast.slice(0, 5).map((castName) => (
<Badge key={castName} size="sm" variant="light" color="gray"> <Badge key={castName} size="sm" variant="light" color="gray">
{castName} {castName}
</Badge> </Badge>
@@ -1232,7 +1299,13 @@ export function MoviesPage() {
)} )}
</Group> </Group>
</Stack> </Stack>
<img src="/netflix.png" alt="Netflix" className="detail-brand-stamp" /> {selectedContentBrand && (
<img
src={selectedContentBrand.src}
alt={selectedContentBrand.alt}
className="detail-brand-stamp"
/>
)}
</Stack> </Stack>
)} )}
</Modal> </Modal>

View File

@@ -142,6 +142,10 @@
.catalog-card { .catalog-card {
position: relative; position: relative;
display: flex;
flex-direction: column;
height: 100%;
min-height: 420px;
overflow: hidden; overflow: hidden;
cursor: pointer; cursor: pointer;
background: linear-gradient(165deg, rgba(29, 33, 44, 0.95), rgba(15, 19, 30, 0.96)); background: linear-gradient(165deg, rgba(29, 33, 44, 0.95), rgba(15, 19, 30, 0.96));
@@ -212,6 +216,8 @@
.media-wrap { .media-wrap {
position: relative; position: relative;
height: 190px;
flex: 0 0 190px;
} }
.image-shell { .image-shell {
@@ -305,6 +311,10 @@
line-height: 1.62; line-height: 1.62;
} }
.detail-content-stack {
padding-bottom: 38px;
}
.detail-brand-stamp { .detail-brand-stamp {
position: absolute; position: absolute;
right: 20px; right: 20px;
@@ -319,9 +329,30 @@
.card-content { .card-content {
position: relative; position: relative;
display: flex;
flex-direction: column;
flex: 1;
min-height: 168px;
z-index: 2; z-index: 2;
} }
.card-meta-row {
min-height: 26px;
align-items: center;
}
.card-plot {
min-height: 4.8em;
line-height: 1.6;
}
.card-genres {
margin-top: auto;
min-height: 24px;
padding-right: 36px;
overflow: hidden;
}
.card-title { .card-title {
font-family: 'Bricolage Grotesque', 'IBM Plex Sans', sans-serif; font-family: 'Bricolage Grotesque', 'IBM Plex Sans', sans-serif;
letter-spacing: 0.01em; letter-spacing: 0.01em;

View File

@@ -1,18 +1,21 @@
import { defineConfig } from 'vite' import { defineConfig } from 'vite'
import react from '@vitejs/plugin-react' import react from '@vitejs/plugin-react'
const apiTarget = process.env.VITE_API_PROXY_TARGET || 'http://localhost:3000'
// https://vite.dev/config/ // https://vite.dev/config/
export default defineConfig({ export default defineConfig({
plugins: [react()], plugins: [react()],
server: { server: {
port: 5173, port: 5173,
host: '0.0.0.0',
proxy: { proxy: {
'/api': { '/api': {
target: 'http://localhost:3000', target: apiTarget,
changeOrigin: true, changeOrigin: true,
}, },
'/socket.io': { '/socket.io': {
target: 'http://localhost:3000', target: apiTarget,
changeOrigin: true, changeOrigin: true,
ws: true, ws: true,
}, },

View File

@@ -33,7 +33,7 @@ export interface MetricsRealtimeEvent {
sourceCounts: { sourceCounts: {
cache: number; cache: number;
database: number; database: number;
netflix: number; scraper: number;
}; };
occurredAt: string; occurredAt: string;
} }

View File

@@ -1,27 +1,19 @@
import { Request, Response, NextFunction } from 'express'; import { Request, Response, NextFunction } from 'express';
import { z } from 'zod'; import { z } from 'zod';
import type { ApiResponse, GetInfoRequest } from '../types/index.js'; import type { ApiResponse, GetInfoRequest } from '../types/index.js';
import { isSupportedContentUrl } from '../utils/contentUrl.js';
/** /**
* Validation schema for /api/getinfo endpoint * Validation schema for /api/getinfo endpoint
*/ */
const getInfoSchema = z.object({ const getInfoSchema = z.object({
url: z.string().url('Invalid URL format').refine((url) => { url: z
// Validate Netflix URL .string()
try { .url('Invalid URL format')
const parsedUrl = new URL(url); .refine(
const validHosts = [ (url) => isSupportedContentUrl(url),
'www.netflix.com', 'URL must be Netflix /title/... or PrimeVideo /detail/...'
'netflix.com', ),
'www.netflix.com.tr',
'netflix.com.tr',
];
const hasTitlePath = /\/title\/\d+/.test(url);
return validHosts.includes(parsedUrl.hostname) && hasTitlePath;
} catch {
return false;
}
}, 'URL must be a valid Netflix title URL (e.g., https://www.netflix.com/tr/title/81616256)'),
}); });
/** /**

View File

@@ -286,6 +286,30 @@ router.post(
} }
); );
/**
* POST /api/admin/content/purge
* Delete all content rows from DB (with related entities).
*/
router.post(
'/admin/content/purge',
adminOnlyMiddleware,
async (_req: Request, res: Response<ApiResponse<AdminActionResponse>>) => {
try {
const result = await AdminService.purgeAllContent();
res.json({ success: true, data: result });
} catch (error) {
res.status(500).json({
success: false,
error: {
code: 'ADMIN_CONTENT_PURGE_ERROR',
message:
error instanceof Error ? error.message : 'Failed to purge content',
},
});
}
}
);
/** /**
* POST /api/getinfo/async * POST /api/getinfo/async
* Create async job for content scraping * Create async job for content scraping

View File

@@ -6,21 +6,29 @@ import { MetricsService } from './metrics.service.js';
import { CacheService } from './cache.service.js'; import { CacheService } from './cache.service.js';
import { ContentService } from './content.service.js'; import { ContentService } from './content.service.js';
import type { AdminActionResponse, AdminOverviewResponse } from '../types/index.js'; import type { AdminActionResponse, AdminOverviewResponse } from '../types/index.js';
import { parseSupportedContentUrl } from '../utils/contentUrl.js';
const CACHE_PREFIX = 'netflix:content:'; const CACHE_PREFIX = 'content:';
const MAX_CACHE_KEYS_FOR_ANALYSIS = 1000; const MAX_CACHE_KEYS_FOR_ANALYSIS = 1000;
function formatCacheKeyLabel(key: string): string { function formatCacheKeyLabel(key: string): string {
return key.replace(CACHE_PREFIX, ''); return key.replace(CACHE_PREFIX, '');
} }
function extractTitleIdFromCacheKey(key: string): string | null { function extractProviderIdFromCacheKey(key: string): { provider: string; id: string } | null {
const normalized = formatCacheKeyLabel(key); const normalized = formatCacheKeyLabel(key);
return /^\d+$/.test(normalized) ? normalized : null; const match = normalized.match(/^(netflix|primevideo):([A-Za-z0-9]+)$/);
if (!match) return null;
const provider = match[1];
const id = match[2];
if (!provider || !id) return null;
return { provider, id };
} }
function extractTitleIdFromUrl(url: string): string | null { function extractProviderIdFromUrl(url: string): { provider: string; id: string } | null {
return url.match(/\/title\/(\d+)/)?.[1] ?? null; const parsed = parseSupportedContentUrl(url);
if (!parsed) return null;
return { provider: parsed.provider, id: parsed.id };
} }
function parseRedisInfoValue(info: string, key: string): number | null { function parseRedisInfoValue(info: string, key: string): number | null {
@@ -144,16 +152,25 @@ export class AdminService {
min30Plus: 0, min30Plus: 0,
}; };
const cacheTitleIds = Array.from( const cacheProviderIds = Array.from(
new Set(cacheKeys.map((key) => extractTitleIdFromCacheKey(key)).filter((id): id is string => Boolean(id))) new Set(
cacheKeys
.map((key) => extractProviderIdFromCacheKey(key))
.filter((item): item is { provider: string; id: string } => Boolean(item))
.map((item) => `${item.provider}:${item.id}`)
)
); );
const relatedContent = cacheTitleIds.length const relatedContent = cacheProviderIds.length
? await prisma.content.findMany({ ? await prisma.content.findMany({
where: { where: {
OR: cacheTitleIds.map((id) => ({ OR: cacheProviderIds.map((providerId) => {
url: { contains: `/title/${id}` }, const [provider, id] = providerId.split(':');
})), if (provider === 'primevideo') {
return { url: { contains: `/detail/${id}` } };
}
return { url: { contains: `/title/${id}` } };
}),
}, },
select: { select: {
url: true, url: true,
@@ -164,9 +181,12 @@ export class AdminService {
const titleMap = new Map<string, string>(); const titleMap = new Map<string, string>();
for (const item of relatedContent) { for (const item of relatedContent) {
const id = extractTitleIdFromUrl(item.url); const parsed = extractProviderIdFromUrl(item.url);
if (id && !titleMap.has(id)) { if (parsed) {
titleMap.set(id, item.title); const key = `${parsed.provider}:${parsed.id}`;
if (!titleMap.has(key)) {
titleMap.set(key, item.title);
}
} }
} }
@@ -196,7 +216,7 @@ export class AdminService {
if (ttlValue > 0) { if (ttlValue > 0) {
const formattedKey = formatCacheKeyLabel(cacheKeys[i] || ''); const formattedKey = formatCacheKeyLabel(cacheKeys[i] || '');
const titleId = extractTitleIdFromCacheKey(cacheKeys[i] || ''); const providerId = extractProviderIdFromCacheKey(cacheKeys[i] || '');
const rawValue = valueResults?.[i]?.[1]; const rawValue = valueResults?.[i]?.[1];
let cachedAt: number | null = null; let cachedAt: number | null = null;
if (typeof rawValue === 'string') { if (typeof rawValue === 'string') {
@@ -209,7 +229,9 @@ export class AdminService {
} }
expiringSoon.push({ expiringSoon.push({
key: formattedKey, key: formattedKey,
mediaTitle: titleId ? titleMap.get(titleId) ?? null : null, mediaTitle: providerId
? titleMap.get(`${providerId.provider}:${providerId.id}`) ?? null
: null,
cachedAt, cachedAt,
ttlSeconds: ttlValue, ttlSeconds: ttlValue,
}); });
@@ -450,6 +472,23 @@ export class AdminService {
details: `Stale content refresh queued for items older than ${days} days`, details: `Stale content refresh queued for items older than ${days} days`,
}; };
} }
static async purgeAllContent(): Promise<AdminActionResponse> {
const totalContent = await prisma.content.count();
await prisma.$transaction([
prisma.content.deleteMany({}),
prisma.genre.deleteMany({}),
]);
await CacheService.clearAll();
return {
queued: totalContent,
skipped: 0,
details: 'Tum icerik verileri veritabanindan silindi',
};
}
} }
export default AdminService; export default AdminService;

View File

@@ -3,19 +3,35 @@ import { env } from '../config/env.js';
import { emitCacheEvent } from '../config/socket.js'; import { emitCacheEvent } from '../config/socket.js';
import logger from '../utils/logger.js'; import logger from '../utils/logger.js';
import type { GetInfoResponse, CacheEntry } from '../types/index.js'; import type { GetInfoResponse, CacheEntry } from '../types/index.js';
import { parseSupportedContentUrl } from '../utils/contentUrl.js';
/** /**
* Cache key prefix for Netflix content * Cache key prefix for scraped content
*/ */
const CACHE_PREFIX = 'netflix:content:'; const CACHE_PREFIX = 'content:';
/** /**
* Generate cache key from URL * Generate cache key from URL
*/ */
function getCacheKey(url: string): string { function getCacheKey(url: string): string {
// Use URL hash or title ID as key const parsed = parseSupportedContentUrl(url);
const titleId = url.match(/\/title\/(\d+)/)?.[1] || url;
return `${CACHE_PREFIX}${titleId}`; if (parsed) {
return `${CACHE_PREFIX}${parsed.provider}:${parsed.id}`;
}
return `${CACHE_PREFIX}url:${encodeURIComponent(url)}`;
}
function normalizeCachedResponse(url: string, data: GetInfoResponse): GetInfoResponse {
if (data.provider === 'netflix' || data.provider === 'primevideo') {
return data;
}
return {
...data,
provider: parseSupportedContentUrl(url)?.provider ?? 'netflix',
};
} }
/** /**
@@ -39,7 +55,7 @@ export class CacheService {
logger.debug('Cache hit', { url }); logger.debug('Cache hit', { url });
const entry: CacheEntry<GetInfoResponse> = JSON.parse(cached); const entry: CacheEntry<GetInfoResponse> = JSON.parse(cached);
return entry.data; return normalizeCachedResponse(url, entry.data);
} catch (error) { } catch (error) {
logger.error('Cache get error', { logger.error('Cache get error', {
url, url,
@@ -57,7 +73,7 @@ export class CacheService {
const ttl = env.REDIS_TTL_SECONDS; const ttl = env.REDIS_TTL_SECONDS;
const entry: CacheEntry<GetInfoResponse> = { const entry: CacheEntry<GetInfoResponse> = {
data, data: normalizeCachedResponse(url, data),
cachedAt: Date.now(), cachedAt: Date.now(),
ttl, ttl,
}; };
@@ -137,7 +153,7 @@ export class CacheService {
} }
/** /**
* Clear all Netflix content cache * Clear all scraped content cache
*/ */
static async clearAll(): Promise<void> { static async clearAll(): Promise<void> {
try { try {

View File

@@ -1,6 +1,7 @@
import prisma from '../config/database.js'; import prisma from '../config/database.js';
import { emitContentEvent } from '../config/socket.js'; import { emitContentEvent } from '../config/socket.js';
import type { ContentData, ScraperResult, GetInfoResponse } from '../types/index.js'; import type { ContentData, ScraperResult, GetInfoResponse } from '../types/index.js';
import { parseSupportedContentUrl } from '../utils/contentUrl.js';
/** /**
* Content Service for database operations * Content Service for database operations
@@ -242,7 +243,9 @@ export class ContentService {
* Convert ContentData to API response format * Convert ContentData to API response format
*/ */
static toApiResponse(data: ContentData): GetInfoResponse { static toApiResponse(data: ContentData): GetInfoResponse {
const provider = parseSupportedContentUrl(data.url)?.provider ?? 'netflix';
return { return {
provider,
title: data.title, title: data.title,
year: data.year, year: data.year,
plot: data.plot, plot: data.plot,

View File

@@ -1,4 +1,5 @@
import { v4 as uuidv4 } from 'uuid'; import { v4 as uuidv4 } from 'uuid';
import type { Prisma } from '@prisma/client';
import prisma from '../config/database.js'; import prisma from '../config/database.js';
import { CacheService } from './cache.service.js'; import { CacheService } from './cache.service.js';
import { ContentService } from './content.service.js'; import { ContentService } from './content.service.js';
@@ -60,7 +61,7 @@ export class JobService {
status?: JobStatus; status?: JobStatus;
progress?: number; progress?: number;
step?: string; step?: string;
result?: unknown; result?: Prisma.InputJsonValue;
error?: string; error?: string;
} }
): Promise<ScrapeJob> { ): Promise<ScrapeJob> {
@@ -73,7 +74,7 @@ export class JobService {
} }
/** /**
* Process a scrape job (hybrid: cache -> db -> netflix) * Process a scrape job (hybrid: cache -> db -> scraper)
*/ */
static async process(jobId: string): Promise<void> { static async process(jobId: string): Promise<void> {
const job = await this.getById(jobId); const job = await this.getById(jobId);
@@ -117,11 +118,14 @@ export class JobService {
return; return;
} }
// Update progress const provider = ScraperService.detectProvider(job.url);
await this.update(jobId, { progress: 50, step: 'scraping_netflix' }); const providerLabel = provider === 'primevideo' ? 'Prime Video' : 'Netflix';
emitJobProgress(jobId, 50, 'processing', 'Scraping Netflix');
// Step 3: Scrape from Netflix // Update progress
await this.update(jobId, { progress: 50, step: `scraping_${provider ?? 'source'}` });
emitJobProgress(jobId, 50, 'processing', `Scraping ${providerLabel}`);
// Step 3: Scrape from source URL
const scraperResult = await ScraperService.scrape(job.url); const scraperResult = await ScraperService.scrape(job.url);
// Update progress // Update progress
@@ -136,7 +140,7 @@ export class JobService {
await CacheService.set(job.url, responseData); await CacheService.set(job.url, responseData);
// Complete the job // Complete the job
await this.completeJob(jobId, responseData, 'netflix'); await this.completeJob(jobId, responseData, 'scraper');
} catch (error) { } catch (error) {
const apiError: ApiError = { const apiError: ApiError = {
code: 'SCRAPE_ERROR', code: 'SCRAPE_ERROR',
@@ -168,7 +172,7 @@ export class JobService {
status: 'completed', status: 'completed',
progress: 100, progress: 100,
step: 'completed', step: 'completed',
result: data, result: data as unknown as Prisma.InputJsonValue,
}); });
emitJobCompleted(jobId, data, source); emitJobCompleted(jobId, data, source);
@@ -201,7 +205,7 @@ export class JobService {
return { data: responseData, source: 'database' }; return { data: responseData, source: 'database' };
} }
// Step 3: Scrape from Netflix // Step 3: Scrape from source URL
const scraperResult = await ScraperService.scrape(url); const scraperResult = await ScraperService.scrape(url);
// Step 4: Save to database // Step 4: Save to database
@@ -210,9 +214,9 @@ export class JobService {
// Step 5: Cache the result // Step 5: Cache the result
await CacheService.set(url, responseData); await CacheService.set(url, responseData);
await MetricsService.incrementSource('netflix'); await MetricsService.incrementSource('scraper');
return { data: responseData, source: 'netflix' }; return { data: responseData, source: 'scraper' };
} }
/** /**

View File

@@ -59,7 +59,7 @@ export class MetricsService {
bySource: { bySource: {
cache: number; cache: number;
database: number; database: number;
netflix: number; scraper: number;
}; };
}> { }> {
const [counters, sources] = await Promise.all([ const [counters, sources] = await Promise.all([
@@ -73,7 +73,7 @@ export class MetricsService {
bySource: { bySource: {
cache: toInt(sources.cache), cache: toInt(sources.cache),
database: toInt(sources.database), database: toInt(sources.database),
netflix: toInt(sources.netflix), scraper: toInt(sources.scraper),
}, },
}; };
} }

View File

@@ -1,6 +1,10 @@
import * as cheerio from 'cheerio'; import * as cheerio from 'cheerio';
import type { ScraperResult, ContentType } from '../types/index.js'; import type { ScraperResult, ContentType } from '../types/index.js';
import logger from '../utils/logger.js'; import logger from '../utils/logger.js';
import {
parseSupportedContentUrl,
type SupportedProvider,
} from '../utils/contentUrl.js';
/** /**
* Age rating patterns to detect and exclude from genres * Age rating patterns to detect and exclude from genres
@@ -14,43 +18,55 @@ const AGE_RATING_PATTERN = /^[\u2066-\u2069\u202A-\u202E\u200E-\u200F]*(\d+\+|PG
* Matches patterns like "3 Sezon", "2 Seasons", "1. Sezon", etc. * Matches patterns like "3 Sezon", "2 Seasons", "1. Sezon", etc.
*/ */
const SEASON_PATTERN = /(\d+)\.?\s*(sezon|season|sezonlar|seasons)/i; const SEASON_PATTERN = /(\d+)\.?\s*(sezon|season|sezonlar|seasons)/i;
const EPISODE_PATTERN = /(\d+)\.?\s*(bölüm|bolum|bölümler|bolumler|episode|episodes)/i;
const EPISODE_TOKEN_PATTERN = /\b(bölüm|bolum|bölümler|bolumler|episode|episodes)\b/i;
/** /**
* Netflix HTML Scraper Service * Scraper Service (Netflix + Prime Video)
* Uses Cheerio for parsing HTML content * Uses Cheerio for parsing HTML content
*/ */
export class ScraperService { export class ScraperService {
/**
* Detect content provider from URL
*/
static detectProvider(url: string): SupportedProvider | null {
return parseSupportedContentUrl(url)?.provider ?? null;
}
/**
* Validate if URL is a supported content URL
*/
static isSupportedUrl(url: string): boolean {
return Boolean(parseSupportedContentUrl(url));
}
/** /**
* Validate if URL is a valid Netflix URL * Validate if URL is a valid Netflix URL
*/ */
static isValidNetflixUrl(url: string): boolean { static isValidNetflixUrl(url: string): boolean {
try { return parseSupportedContentUrl(url)?.provider === 'netflix';
const parsedUrl = new URL(url); }
const validHosts = [
'www.netflix.com', /**
'netflix.com', * Validate if URL is a valid Prime Video URL
'www.netflix.com.tr', */
'netflix.com.tr', static isValidPrimeVideoUrl(url: string): boolean {
]; return parseSupportedContentUrl(url)?.provider === 'primevideo';
return validHosts.includes(parsedUrl.hostname);
} catch {
return false;
}
} }
/** /**
* Extract Netflix title ID from URL * Extract Netflix title ID from URL
*/ */
static extractTitleId(url: string): string | null { static extractTitleId(url: string): string | null {
const match = url.match(/\/title\/(\d+)/); const parsed = parseSupportedContentUrl(url);
return match ? match[1] : null; return parsed?.provider === 'netflix' ? parsed.id : null;
} }
/** /**
* Fetch HTML content from Netflix URL * Fetch HTML content from URL
*/ */
private static async fetchHtml(url: string): Promise<string> { private static async fetchHtml(url: string, provider: SupportedProvider): Promise<string> {
logger.info('Fetching Netflix page', { url }); logger.info('Fetching content page', { provider, url });
const response = await fetch(url, { const response = await fetch(url, {
headers: { headers: {
@@ -63,7 +79,7 @@ export class ScraperService {
}); });
if (!response.ok) { if (!response.ok) {
throw new Error(`Failed to fetch Netflix page: ${response.status}`); throw new Error(`Failed to fetch ${provider} page: ${response.status}`);
} }
return response.text(); return response.text();
@@ -73,22 +89,46 @@ export class ScraperService {
* Parse HTML and extract content data * Parse HTML and extract content data
*/ */
static async scrape(url: string): Promise<ScraperResult> { static async scrape(url: string): Promise<ScraperResult> {
if (!this.isValidNetflixUrl(url)) { const parsed = parseSupportedContentUrl(url);
throw new Error('Invalid Netflix URL');
if (!parsed) {
throw new Error(
'Invalid content URL. Use Netflix /title/... or PrimeVideo /detail/...'
);
} }
const html = await this.fetchHtml(url); const html = await this.fetchHtml(url, parsed.provider);
const $ = cheerio.load(html); const $ = cheerio.load(html);
const title = this.extractTitle($); const result =
const year = this.extractYear($); parsed.provider === 'netflix'
const plot = this.extractPlot($); ? this.scrapeNetflix($)
const ageRating = this.extractAgeRating($); : this.scrapePrimeVideo($, parsed.id);
const { genres, type, currentSeason } = this.extractGenresTypeAndSeason($);
const cast = this.extractCast($);
const backdropUrl = this.extractBackdrop($);
const result: ScraperResult = { logger.info('Scraping completed', {
provider: parsed.provider,
url,
title: result.title,
year: result.year,
ageRating: result.ageRating,
type: result.type,
genresCount: result.genres.length,
castCount: result.cast.length,
});
return result;
}
private static scrapeNetflix($: cheerio.CheerioAPI): ScraperResult {
const title = this.extractNetflixTitle($);
const year = this.extractNetflixYear($);
const plot = this.extractNetflixPlot($);
const ageRating = this.extractNetflixAgeRating($);
const { genres, type, currentSeason } = this.extractNetflixGenresTypeAndSeason($);
const cast = this.extractNetflixCast($);
const backdropUrl = this.extractNetflixBackdrop($);
return {
title, title,
year, year,
plot, plot,
@@ -99,24 +139,71 @@ export class ScraperService {
backdropUrl, backdropUrl,
currentSeason, currentSeason,
}; };
}
logger.info('Scraping completed', { private static scrapePrimeVideo($: cheerio.CheerioAPI, detailId: string): ScraperResult {
url, const title = this.extractPrimeTitle($, detailId);
const year = this.extractPrimeYear($);
const { type, currentSeason } = this.extractPrimeTypeAndSeason($);
const plot = this.extractPrimePlot($);
const cast = this.extractPrimeCast($);
const genres = this.extractPrimeGenres($);
const backdropUrl = this.extractPrimeBackdrop($);
const ageRating = this.extractPrimeAgeRating($);
return {
title, title,
year, year,
plot,
ageRating, ageRating,
type, type,
genresCount: genres.length, genres,
castCount: cast.length, cast,
}); backdropUrl,
currentSeason,
};
}
return result; private static parseYear(text: string): number | null {
const yearMatch = text.match(/(19|20)\d{2}/);
if (!yearMatch) return null;
const year = Number.parseInt(yearMatch[0], 10);
if (Number.isNaN(year)) return null;
if (year < 1900 || year > new Date().getFullYear() + 5) return null;
return year;
}
private static cleanText(text: string): string {
return text.replace(/\s+/g, ' ').trim();
}
private static normalizePrimeTitleCandidate(text: string): string {
return this.cleanText(text)
.replace(/^[İIiı]zle:\s*/i, '')
.replace(/^canl[ıi]\s+izleyin:\s*/i, '')
.replace(/^watch\s+now:\s*/i, '')
.replace(/^prime\s+video:\s*/i, '')
.replace(/\s*(sezon|season)\s+\d+(?=\s*[-–—]\s*prime\s+video$)/i, '')
.replace(/\s*[-–—]\s*prime\s+video$/i, '')
.replace(/\s*\|\s*prime\s*video$/i, '')
.replace(/\s+(sezon|season)\s+\d+\s*$/i, '')
.trim();
}
private static uniqueTextList(items: string[]): string[] {
const unique = new Set<string>();
for (const item of items) {
const normalized = this.cleanText(item);
if (normalized) unique.add(normalized);
}
return Array.from(unique);
} }
/** /**
* Extract title from HTML * Netflix extractors
*/ */
private static extractTitle($: cheerio.CheerioAPI): string { private static extractNetflixTitle($: cheerio.CheerioAPI): string {
let title = $('h2.default-ltr-iqcdef-cache-tnklrp').first().text().trim(); let title = $('h2.default-ltr-iqcdef-cache-tnklrp').first().text().trim();
if (!title) { if (!title) {
@@ -131,24 +218,12 @@ export class ScraperService {
return title || 'Unknown Title'; return title || 'Unknown Title';
} }
/** private static extractNetflixYear($: cheerio.CheerioAPI): number | null {
* Extract year from HTML (first li element)
*/
private static extractYear($: cheerio.CheerioAPI): number | null {
const yearText = $('li.default-ltr-iqcdef-cache-6prs41').first().text().trim(); const yearText = $('li.default-ltr-iqcdef-cache-6prs41').first().text().trim();
const year = parseInt(yearText, 10); return this.parseYear(yearText);
if (!isNaN(year) && year >= 1900 && year <= new Date().getFullYear() + 5) {
return year;
}
return null;
} }
/** private static extractNetflixPlot($: cheerio.CheerioAPI): string | null {
* Extract plot/description from HTML
*/
private static extractPlot($: cheerio.CheerioAPI): string | null {
const plot = $('span.default-ltr-iqcdef-cache-6ukeej').first().text().trim(); const plot = $('span.default-ltr-iqcdef-cache-6ukeej').first().text().trim();
if (!plot) { if (!plot) {
@@ -159,91 +234,70 @@ export class ScraperService {
return plot || null; return plot || null;
} }
/** private static extractNetflixAgeRating($: cheerio.CheerioAPI): string | null {
* Extract age rating from HTML (e.g., "18+", "16+") const items = $('li.default-ltr-iqcdef-cache-6prs41').toArray();
* Searches all li elements (except first which is year) for (let i = 1; i < items.length; i += 1) {
*/ const element = items[i];
private static extractAgeRating($: cheerio.CheerioAPI): string | null { if (!element) continue;
let ageRating: string | null = null;
const foundTexts: string[] = [];
$('li.default-ltr-iqcdef-cache-6prs41').each((index, element) => {
if (index === 0) return; // Skip year
const text = $(element).text().trim(); const text = $(element).text().trim();
foundTexts.push(text); const cleanText = text
.replace(/[\u2066-\u2069\u202A-\u202E\u200E-\u200F]/g, '')
// Clean Unicode characters first .trim();
const cleanText = text.replace(/[\u2066-\u2069\u202A-\u202E\u200E-\u200F]/g, '').trim();
if (cleanText && AGE_RATING_PATTERN.test(cleanText)) { if (cleanText && AGE_RATING_PATTERN.test(cleanText)) {
ageRating = cleanText; return cleanText;
return false; // Break loop
} }
});
// Debug logging
if (!ageRating && foundTexts.length > 0) {
logger.debug('Age rating not found in elements', {
foundTexts,
pattern: AGE_RATING_PATTERN.source,
});
} }
return ageRating; return null;
} }
/** private static extractNetflixGenresTypeAndSeason(
* Extract genres from HTML (skip year, age rating, and season info) $: cheerio.CheerioAPI
* Also detects content type (movie/tvshow) based on season presence ): { genres: string[]; type: ContentType; currentSeason: number | null } {
* Extracts current season number from season text
*/
private static extractGenresTypeAndSeason($: cheerio.CheerioAPI): { genres: string[]; type: ContentType; currentSeason: number | null } {
const genres: string[] = []; const genres: string[] = [];
let type: ContentType = 'movie'; let type: ContentType = 'movie';
let currentSeason: number | null = null; let currentSeason: number | null = null;
const foundTexts: string[] = [];
$('li.default-ltr-iqcdef-cache-6prs41').each((index, element) => { $('li.default-ltr-iqcdef-cache-6prs41').each((index, element) => {
if (index === 0) return; // Skip year if (index === 0) return;
const text = $(element).text().trim(); const text = $(element).text().trim();
const cleanText = text.replace(/[\u2066\u2069\u202A\u202B\u202C\u202D\u202E\u200E\u200F]/g, '').trim(); const cleanText = text
foundTexts.push(cleanText); .replace(/[\u2066\u2069\u202A\u202B\u202C\u202D\u202E\u200E\u200F]/g, '')
.trim();
// Check for season pattern - indicates TV show
const seasonMatch = cleanText.match(SEASON_PATTERN); const seasonMatch = cleanText.match(SEASON_PATTERN);
if (cleanText && seasonMatch) { if (cleanText && seasonMatch) {
type = 'tvshow'; type = 'tvshow';
// Extract season number from the text const seasonValue = seasonMatch[1];
const seasonNum = parseInt(seasonMatch[1], 10); const seasonNum = seasonValue ? Number.parseInt(seasonValue, 10) : Number.NaN;
if (!isNaN(seasonNum)) { if (Number.isFinite(seasonNum)) {
currentSeason = seasonNum; currentSeason = seasonNum;
} }
return; // Skip adding to genres return;
}
const episodeMatch = cleanText.match(EPISODE_PATTERN);
const hasEpisodeToken = EPISODE_TOKEN_PATTERN.test(cleanText);
if (cleanText && (episodeMatch || hasEpisodeToken)) {
type = 'tvshow';
if (currentSeason == null) {
currentSeason = 1;
}
return;
} }
// Skip age rating - only add actual genres
if (cleanText && !AGE_RATING_PATTERN.test(cleanText)) { if (cleanText && !AGE_RATING_PATTERN.test(cleanText)) {
genres.push(cleanText); genres.push(cleanText);
} }
}); });
// Debug logging
logger.debug('extractGenresTypeAndSeason completed', {
foundTexts,
genres,
type,
currentSeason,
});
return { genres, type, currentSeason }; return { genres, type, currentSeason };
} }
/** private static extractNetflixCast($: cheerio.CheerioAPI): string[] {
* Extract cast members from HTML
*/
private static extractCast($: cheerio.CheerioAPI): string[] {
const castText = $('span.default-ltr-iqcdef-cache-m0886o').first().text().trim(); const castText = $('span.default-ltr-iqcdef-cache-m0886o').first().text().trim();
if (!castText) { if (!castText) {
@@ -256,10 +310,7 @@ export class ScraperService {
.filter((name) => name.length > 0); .filter((name) => name.length > 0);
} }
/** private static extractNetflixBackdrop($: cheerio.CheerioAPI): string | null {
* Extract backdrop image URL from HTML
*/
private static extractBackdrop($: cheerio.CheerioAPI): string | null {
const backdropDiv = $('div.default-ltr-iqcdef-cache-1wezh7a').first(); const backdropDiv = $('div.default-ltr-iqcdef-cache-1wezh7a').first();
const img = backdropDiv.find('img').first(); const img = backdropDiv.find('img').first();
@@ -279,6 +330,176 @@ export class ScraperService {
return null; return null;
} }
/**
* Prime Video extractors
*/
private static extractPrimeTitle($: cheerio.CheerioAPI, detailId: string): string {
const primaryTitle = this.normalizePrimeTitleCandidate(
$('h1[data-automation-id="title"]').first().text() || ''
);
const detailLinkSelector = `a[href*="/detail/${detailId}"]`;
const imageLinkAriaTitle = this.normalizePrimeTitleCandidate(
$(`a[data-testid="image-link"][aria-label][href*="/detail/${detailId}"]`).first().attr('aria-label') ||
$(`${detailLinkSelector}[aria-label]`).first().attr('aria-label') ||
''
);
const imageLinkTextTitle = this.normalizePrimeTitleCandidate(
$(`a[data-testid="image-link"][href*="/detail/${detailId}"]`).first().text() ||
$(detailLinkSelector).first().text() ||
''
);
const metaOgTitle = this.normalizePrimeTitleCandidate(
$('meta[property="og:title"]').attr('content') || ''
);
const metaNameTitle = this.normalizePrimeTitleCandidate(
$('meta[name="title"]').attr('content') || ''
);
const pageTitle = this.normalizePrimeTitleCandidate(
$('title').first().text() || ''
);
const canonicalHref = $('link[rel="canonical"]').attr('href') || '';
let canonicalTitle = '';
if (canonicalHref) {
try {
const canonicalUrl = new URL(canonicalHref, 'https://www.primevideo.com');
const canonicalMatch = canonicalUrl.pathname.match(/\/detail\/([^/]+)\/([A-Za-z0-9]+)/i);
if (canonicalMatch && canonicalMatch[2] === detailId) {
canonicalTitle = this.normalizePrimeTitleCandidate(
decodeURIComponent(canonicalMatch[1] || '')
);
}
} catch {
// best effort
}
}
const title =
primaryTitle ||
imageLinkAriaTitle ||
imageLinkTextTitle ||
metaOgTitle ||
metaNameTitle ||
pageTitle ||
canonicalTitle;
return title || 'Unknown Title';
}
private static extractPrimeYear($: cheerio.CheerioAPI): number | null {
const releaseBadge = $('span[data-automation-id="release-year-badge"]').first();
return (
this.parseYear(this.cleanText(releaseBadge.text())) ||
this.parseYear(this.cleanText(releaseBadge.attr('aria-label') || ''))
);
}
private static extractPrimeTypeAndSeason(
$: cheerio.CheerioAPI
): { type: ContentType; currentSeason: number | null } {
const seasonNodeText = this.cleanText(
$('div.dv-node-dp-seasons, [data-testid="dp-season-selector"]').text()
);
const hasSeasonMarker = /\b(sezon|season)\b/i.test(seasonNodeText);
const seasonLabel =
$('input#av-droplist-av-atf-season-selector').attr('aria-label') ||
$('label[for="av-droplist-av-atf-season-selector"] ._36qUej').first().text() ||
'';
const seasonMatch = this.cleanText(seasonLabel).match(
/(?:sezon|season)\s*(\d+)|(\d+)\.?\s*(?:sezon|season)/i
);
const currentSeasonRaw = seasonMatch ? seasonMatch[1] || seasonMatch[2] : null;
const currentSeason = currentSeasonRaw
? Number.parseInt(currentSeasonRaw, 10)
: null;
return {
type: hasSeasonMarker ? 'tvshow' : 'movie',
currentSeason: Number.isNaN(currentSeason as number) ? null : currentSeason,
};
}
private static extractPrimeCast($: cheerio.CheerioAPI): string[] {
const cast = $('dd.skJCpF a._1NNx6V')
.map((_, el) => $(el).text())
.get();
return this.uniqueTextList(cast);
}
private static extractPrimeGenres($: cheerio.CheerioAPI): string[] {
const genres = $(
'div[data-testid="dv-node-dp-genres"] [data-testid="genre-texts"], div[data-testid="dv-node-dp-genres"] [data-testid="mood-texts"]'
)
.map((_, el) => $(el).text())
.get();
return this.uniqueTextList(genres);
}
private static extractPrimePlot($: cheerio.CheerioAPI): string | null {
const plot = this.cleanText(
$('span.fbl-expandable-text span._1H6ABQ').first().text() ||
$('meta[property="og:description"]').attr('content') ||
''
);
return plot || null;
}
private static extractPrimeAgeRating($: cheerio.CheerioAPI): string | null {
const ageRating = this.cleanText(
$('span[data-automation-id="age-rating-badge"]').first().text() ||
$('[data-testid="age-rating-badge"]').first().text() ||
''
);
return ageRating || null;
}
private static extractPrimeBackdrop($: cheerio.CheerioAPI): string | null {
const webpSrcSet =
$('div.Kc5eKF picture source[type="image/webp"]').first().attr('srcset') ||
$('picture source[type="image/webp"]').first().attr('srcset') ||
'';
if (webpSrcSet) {
const sources = webpSrcSet
.split(',')
.map((item) => item.trim())
.map((item) => {
const match = item.match(/^(\S+)\s+(\d+)w$/);
if (!match) return null;
const url = match[1];
const widthRaw = match[2];
if (!url || !widthRaw) return null;
return {
url,
width: Number.parseInt(widthRaw, 10),
};
})
.filter((item): item is { url: string; width: number } => Boolean(item));
if (sources.length > 0) {
const exact1080 = sources.find((item) => item.width === 1080);
if (exact1080) return exact1080.url;
const nextLargest = sources
.filter((item) => item.width > 1080)
.sort((a, b) => a.width - b.width)[0];
if (nextLargest) return nextLargest.url;
const largest = sources.sort((a, b) => b.width - a.width)[0];
if (largest) return largest.url;
}
}
const fallback = $('img[data-testid="base-image"]').first().attr('src');
return fallback || null;
}
} }
export default ScraperService; export default ScraperService;

View File

@@ -57,6 +57,7 @@ export interface GetInfoRequest {
} }
export interface GetInfoResponse { export interface GetInfoResponse {
provider: 'netflix' | 'primevideo';
title: string; title: string;
year: number | null; year: number | null;
plot: string | null; plot: string | null;
@@ -134,7 +135,7 @@ export interface AdminOverviewResponse {
sourceCounts: { sourceCounts: {
cache: number; cache: number;
database: number; database: number;
netflix: number; scraper: number;
}; };
}; };
} }
@@ -155,7 +156,7 @@ export interface CacheEntry<T> {
ttl: number; ttl: number;
} }
export type DataSource = 'cache' | 'database' | 'netflix'; export type DataSource = 'cache' | 'database' | 'scraper';
// ============================================ // ============================================
// Socket Event Types // Socket Event Types

49
src/utils/contentUrl.ts Normal file
View File

@@ -0,0 +1,49 @@
export type SupportedProvider = 'netflix' | 'primevideo';
const NETFLIX_HOSTS = new Set([
'www.netflix.com',
'netflix.com',
'www.netflix.com.tr',
'netflix.com.tr',
]);
const PRIME_HOSTS = new Set([
'www.primevideo.com',
'primevideo.com',
]);
export interface ParsedContentUrl {
provider: SupportedProvider;
id: string;
}
export function parseSupportedContentUrl(rawUrl: string): ParsedContentUrl | null {
try {
const parsedUrl = new URL(rawUrl);
const hostname = parsedUrl.hostname.toLowerCase();
if (NETFLIX_HOSTS.has(hostname)) {
const titleIdMatch = parsedUrl.pathname.match(/\/title\/(\d+)/);
if (!titleIdMatch) return null;
const id = titleIdMatch[1];
if (!id) return null;
return { provider: 'netflix', id };
}
if (PRIME_HOSTS.has(hostname)) {
const detailIdMatch = parsedUrl.pathname.match(/\/detail\/([A-Za-z0-9]+)/);
if (!detailIdMatch) return null;
const id = detailIdMatch[1];
if (!id) return null;
return { provider: 'primevideo', id };
}
return null;
} catch {
return null;
}
}
export function isSupportedContentUrl(rawUrl: string): boolean {
return Boolean(parseSupportedContentUrl(rawUrl));
}