first commit

This commit is contained in:
2026-02-28 02:44:41 +03:00
commit 97fb289fe7
70 changed files with 11928 additions and 0 deletions

63
src/config/database.ts Normal file
View File

@@ -0,0 +1,63 @@
import { PrismaClient } from '@prisma/client';
import { getDatabaseUrl, env } from './env.js';
import logger from '../utils/logger.js';
// Set database URL for Prisma
process.env.DATABASE_URL = getDatabaseUrl();
/**
* Prisma Client singleton
* Handles connection pooling and retries
*/
const globalForPrisma = globalThis as unknown as {
prisma: PrismaClient | undefined;
};
export const prisma =
globalForPrisma.prisma ??
new PrismaClient({
log:
env.NODE_ENV === 'development'
? ['query', 'error', 'warn']
: ['error'],
});
if (env.NODE_ENV !== 'production') {
globalForPrisma.prisma = prisma;
}
/**
* Connect to database with retry logic
*/
export async function connectDatabase(retries = 5, delay = 2000): Promise<void> {
for (let i = 0; i < retries; i++) {
try {
await prisma.$connect();
logger.info('Database connected successfully', {
host: env.POSTGRES_HOST,
database: env.POSTGRES_DB,
});
return;
} catch (error) {
logger.warn(`Database connection attempt ${i + 1}/${retries} failed`, {
error: error instanceof Error ? error.message : 'Unknown error',
});
if (i < retries - 1) {
await new Promise((resolve) => setTimeout(resolve, delay));
}
}
}
throw new Error('Failed to connect to database after maximum retries');
}
/**
* Disconnect from database
*/
export async function disconnectDatabase(): Promise<void> {
await prisma.$disconnect();
logger.info('Database disconnected');
}
export default prisma;

77
src/config/env.ts Normal file
View File

@@ -0,0 +1,77 @@
import { z } from 'zod';
/**
* Environment variable schema with validation
* Fails fast on startup if required variables are missing
*/
const envSchema = z.object({
// Server
NODE_ENV: z.enum(['development', 'production', 'test']).default('development'),
PORT: z.string().transform(Number).pipe(z.number().positive()).default('3000'),
// PostgreSQL
POSTGRES_HOST: z.string().min(1),
POSTGRES_PORT: z.string().transform(Number).pipe(z.number().positive()).default('5432'),
POSTGRES_USER: z.string().min(1),
POSTGRES_PASSWORD: z.string().min(1),
POSTGRES_DB: z.string().min(1),
// Redis
REDIS_HOST: z.string().min(1),
REDIS_PORT: z.string().transform(Number).pipe(z.number().positive()).default('6379'),
REDIS_TTL_SECONDS: z.string().transform(Number).pipe(z.number().positive()).default('604800'), // 7 days
// Rate Limiting
RATE_LIMIT_WINDOW_MS: z.string().transform(Number).pipe(z.number().positive()).default('60000'), // 1 minute
RATE_LIMIT_MAX_REQUESTS: z.string().transform(Number).pipe(z.number().positive()).default('30'),
// API Keys (named keys for different frontends)
API_KEY_WEB: z.string().min(1),
API_KEY_MOBILE: z.string().min(1),
API_KEY_ADMIN: z.string().min(1),
// TMDB API
TMDB_API_KEY: z.string().min(1),
TMDB_ACCESS_TOKEN: z.string().min(1),
});
export type EnvConfig = z.infer<typeof envSchema>;
/**
* Parse and validate environment variables
* Throws error on validation failure
*/
function parseEnv(): EnvConfig {
const result = envSchema.safeParse(process.env);
if (!result.success) {
const errors = result.error.issues.map(
(issue) => ` - ${issue.path.join('.')}: ${issue.message}`
);
throw new Error(
`Environment validation failed:\n${errors.join('\n')}\n\nPlease check your .env file.`
);
}
return result.data;
}
export const env = parseEnv();
/**
* Get all valid API keys as a Set for O(1) lookup
*/
export function getValidApiKeys(): Set<string> {
return new Set([
env.API_KEY_WEB,
env.API_KEY_MOBILE,
env.API_KEY_ADMIN,
]);
}
/**
* Get database connection URL
*/
export function getDatabaseUrl(): string {
return `postgresql://${env.POSTGRES_USER}:${env.POSTGRES_PASSWORD}@${env.POSTGRES_HOST}:${env.POSTGRES_PORT}/${env.POSTGRES_DB}`;
}

66
src/config/redis.ts Normal file
View File

@@ -0,0 +1,66 @@
import Redis from 'ioredis';
import { env } from './env.js';
import logger from '../utils/logger.js';
/**
* Redis Client singleton
*/
const globalForRedis = globalThis as unknown as {
redis: Redis | undefined;
};
export const redis =
globalForRedis.redis ??
new Redis({
host: env.REDIS_HOST,
port: env.REDIS_PORT,
retryStrategy: (times: number) => {
if (times > 5) {
logger.error('Redis connection failed after 5 retries');
return null;
}
const delay = Math.min(times * 1000, 5000);
logger.warn(`Redis retrying connection in ${delay}ms`, { attempt: times });
return delay;
},
maxRetriesPerRequest: 3,
});
if (env.NODE_ENV !== 'production') {
globalForRedis.redis = redis;
}
redis.on('connect', () => {
logger.info('Redis connected successfully', {
host: env.REDIS_HOST,
port: env.REDIS_PORT,
});
});
redis.on('error', (error) => {
logger.error('Redis connection error', {
error: error.message,
});
});
/**
* Check Redis connection
*/
export async function checkRedisConnection(): Promise<boolean> {
try {
const result = await redis.ping();
return result === 'PONG';
} catch {
return false;
}
}
/**
* Disconnect from Redis
*/
export async function disconnectRedis(): Promise<void> {
await redis.quit();
logger.info('Redis disconnected');
}
export default redis;

131
src/config/socket.ts Normal file
View File

@@ -0,0 +1,131 @@
import { Server as HttpServer } from 'http';
import { Server, Socket } from 'socket.io';
import logger from '../utils/logger.js';
/**
* Socket.IO Server singleton
*/
let io: Server | null = null;
export interface SocketData {
subscribedJobs: Set<string>;
}
/**
* Initialize Socket.IO server
*/
export function initializeSocket(httpServer: HttpServer): Server {
io = new Server(httpServer, {
cors: {
origin: '*', // Configure based on your frontend domains
methods: ['GET', 'POST'],
},
transports: ['websocket', 'polling'],
});
io.on('connection', (socket: Socket) => {
logger.info('Client connected', { socketId: socket.id });
// Initialize socket data
(socket.data as SocketData).subscribedJobs = new Set();
// Handle job subscription
socket.on('job:subscribe', (jobId: string) => {
(socket.data as SocketData).subscribedJobs.add(jobId);
socket.join(`job:${jobId}`);
logger.debug('Client subscribed to job', { socketId: socket.id, jobId });
});
// Handle job unsubscription
socket.on('job:unsubscribe', (jobId: string) => {
(socket.data as SocketData).subscribedJobs.delete(jobId);
socket.leave(`job:${jobId}`);
logger.debug('Client unsubscribed from job', { socketId: socket.id, jobId });
});
socket.on('disconnect', () => {
logger.info('Client disconnected', { socketId: socket.id });
});
});
logger.info('Socket.IO server initialized');
return io;
}
/**
* Get Socket.IO server instance
*/
export function getSocketIO(): Server {
if (!io) {
throw new Error('Socket.IO not initialized. Call initializeSocket first.');
}
return io;
}
/**
* Emit job progress to subscribers
*/
export function emitJobProgress(
jobId: string,
progress: number,
status: string,
step: string
): void {
if (io) {
io.to(`job:${jobId}`).emit('job:progress', {
jobId,
progress,
status,
step,
});
}
}
/**
* Emit job completed event
*/
export function emitJobCompleted(
jobId: string,
data: unknown,
source: string
): void {
if (io) {
io.to(`job:${jobId}`).emit('job:completed', {
jobId,
data,
source,
});
}
}
/**
* Emit job error event
*/
export function emitJobError(
jobId: string,
error: { code: string; message: string }
): void {
if (io) {
io.to(`job:${jobId}`).emit('job:error', {
jobId,
error,
});
}
}
/**
* Close Socket.IO server
*/
export async function closeSocket(): Promise<void> {
if (io) {
await new Promise<void>((resolve) => {
io!.close(() => {
logger.info('Socket.IO server closed');
resolve();
});
});
io = null;
}
}
export default io;

112
src/index.ts Normal file
View File

@@ -0,0 +1,112 @@
import express from 'express';
import { createServer } from 'http';
import { env, getDatabaseUrl } from './config/env.js';
import { connectDatabase, disconnectDatabase } from './config/database.js';
import { disconnectRedis } from './config/redis.js';
import { initializeSocket, closeSocket } from './config/socket.js';
import { rateLimiter } from './middleware/rateLimit.middleware.js';
import { errorHandler, notFoundHandler } from './middleware/error.middleware.js';
import apiRoutes from './routes/api.routes.js';
import tmdbRoutes from './routes/tmdb.routes.js';
import healthRoutes from './routes/health.routes.js';
import logger from './utils/logger.js';
// Set DATABASE_URL for Prisma
process.env.DATABASE_URL = getDatabaseUrl();
/**
* Application entry point
*/
async function main() {
const app = express();
const httpServer = createServer(app);
// Initialize Socket.IO
initializeSocket(httpServer);
// Middleware
app.use(express.json({ limit: '10mb' }));
app.use(express.urlencoded({ extended: true }));
// Apply general rate limiting
app.use(rateLimiter);
// Request logging middleware
app.use((req, res, next) => {
logger.info('Incoming request', {
method: req.method,
path: req.path,
ip: req.ip,
userAgent: req.headers['user-agent'],
});
next();
});
// Health check routes (no auth required)
app.use(healthRoutes);
// API routes
app.use('/api', apiRoutes);
app.use('/api/tmdb', tmdbRoutes);
// 404 handler
app.use(notFoundHandler);
// Error handler
app.use(errorHandler);
// Connect to database with retry
await connectDatabase();
// Start server
httpServer.listen(env.PORT, () => {
logger.info('Server started', {
port: env.PORT,
env: env.NODE_ENV,
});
});
// Graceful shutdown handlers
const gracefulShutdown = async (signal: string) => {
logger.info(`Received ${signal}, starting graceful shutdown`);
// Close server
httpServer.close(() => {
logger.info('HTTP server closed');
});
// Close connections
await closeSocket();
await disconnectRedis();
await disconnectDatabase();
logger.info('Graceful shutdown completed');
process.exit(0);
};
process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
process.on('SIGINT', () => gracefulShutdown('SIGINT'));
// Handle uncaught exceptions
process.on('uncaughtException', (error) => {
logger.error('Uncaught exception', {
error: error.message,
stack: error.stack,
});
process.exit(1);
});
process.on('unhandledRejection', (reason) => {
logger.error('Unhandled rejection', {
reason: reason instanceof Error ? reason.message : String(reason),
});
});
}
main().catch((error) => {
logger.error('Application startup failed', {
error: error.message,
stack: error.stack,
});
process.exit(1);
});

View File

@@ -0,0 +1,73 @@
import { Request, Response, NextFunction } from 'express';
import { getValidApiKeys } from '../config/env.js';
import logger from '../utils/logger.js';
import type { ApiResponse } from '../types/index.js';
/**
* API Key Authentication Middleware
* Validates API key from X-API-Key header
*/
export function authMiddleware(
req: Request,
res: Response,
next: NextFunction
): void {
const apiKey = req.headers['x-api-key'] as string | undefined;
if (!apiKey) {
const response: ApiResponse<never> = {
success: false,
error: {
code: 'MISSING_API_KEY',
message: 'API key is required. Include X-API-Key header.',
},
};
logger.warn('Request missing API key', {
ip: req.ip,
path: req.path,
});
res.status(401).json(response);
return;
}
const validKeys = getValidApiKeys();
if (!validKeys.has(apiKey)) {
const response: ApiResponse<never> = {
success: false,
error: {
code: 'INVALID_API_KEY',
message: 'Invalid API key provided.',
},
};
logger.warn('Invalid API key attempt', {
ip: req.ip,
path: req.path,
keyPrefix: apiKey.substring(0, 8) + '...',
});
res.status(403).json(response);
return;
}
// Valid API key, proceed
next();
}
/**
* Optional: Identify which client made the request
*/
export function identifyClient(apiKey: string): string {
const { env } = require('../config/env.js');
if (apiKey === env.API_KEY_WEB) return 'web';
if (apiKey === env.API_KEY_MOBILE) return 'mobile';
if (apiKey === env.API_KEY_ADMIN) return 'admin';
return 'unknown';
}
export default authMiddleware;

View File

@@ -0,0 +1,50 @@
import { Request, Response, NextFunction } from 'express';
import logger from '../utils/logger.js';
import type { ApiResponse } from '../types/index.js';
/**
* Global Error Handler Middleware
*/
export function errorHandler(
error: Error,
req: Request,
res: Response,
_next: NextFunction
): void {
logger.error('Unhandled error', {
error: error.message,
stack: error.stack,
path: req.path,
method: req.method,
});
const response: ApiResponse<never> = {
success: false,
error: {
code: 'INTERNAL_ERROR',
message: 'An unexpected error occurred. Please try again later.',
},
};
res.status(500).json(response);
}
/**
* 404 Not Found Handler
*/
export function notFoundHandler(
req: Request,
res: Response
): void {
const response: ApiResponse<never> = {
success: false,
error: {
code: 'NOT_FOUND',
message: `Endpoint ${req.method} ${req.path} not found`,
},
};
res.status(404).json(response);
}
export default errorHandler;

View File

@@ -0,0 +1,87 @@
import rateLimit from 'express-rate-limit';
import { env } from '../config/env.js';
import logger from '../utils/logger.js';
import type { ApiResponse } from '../types/index.js';
/**
* Rate Limiter Configuration
* Limits requests per IP within a time window
*/
export const rateLimiter = rateLimit({
windowMs: env.RATE_LIMIT_WINDOW_MS, // Time window in milliseconds
max: env.RATE_LIMIT_MAX_REQUESTS, // Max requests per window per IP
standardHeaders: true, // Return rate limit info in RateLimit-* headers
legacyHeaders: false, // Disable X-RateLimit-* headers
// Custom key generator (use IP + API key for more granular limiting)
keyGenerator: (req) => {
const apiKey = req.headers['x-api-key'] as string | undefined;
return `${req.ip}:${apiKey || 'no-key'}`;
},
// Custom handler for rate limit exceeded
handler: (req, res) => {
const response: ApiResponse<never> = {
success: false,
error: {
code: 'RATE_LIMIT_EXCEEDED',
message: `Too many requests. Maximum ${env.RATE_LIMIT_MAX_REQUESTS} requests per ${env.RATE_LIMIT_WINDOW_MS / 1000} seconds.`,
details: {
retryAfter: Math.ceil(env.RATE_LIMIT_WINDOW_MS / 1000),
},
},
};
logger.warn('Rate limit exceeded', {
ip: req.ip,
path: req.path,
maxRequests: env.RATE_LIMIT_MAX_REQUESTS,
windowMs: env.RATE_LIMIT_WINDOW_MS,
});
res.status(429).json(response);
},
// Skip rate limiting for health checks
skip: (req) => {
return req.path === '/health' || req.path === '/ready';
},
});
/**
* Stricter rate limiter for scraping endpoints
* Prevents abuse of Netflix scraping
*/
export const scrapeRateLimiter = rateLimit({
windowMs: 60 * 1000, // 1 minute
max: 10, // Only 10 scrape requests per minute
standardHeaders: true,
legacyHeaders: false,
keyGenerator: (req) => {
const apiKey = req.headers['x-api-key'] as string | undefined;
return `scrape:${req.ip}:${apiKey || 'no-key'}`;
},
handler: (req, res) => {
const response: ApiResponse<never> = {
success: false,
error: {
code: 'SCRAPE_RATE_LIMIT_EXCEEDED',
message: 'Too many scrape requests. Please wait before trying again.',
details: {
retryAfter: 60,
},
},
};
logger.warn('Scrape rate limit exceeded', {
ip: req.ip,
path: req.path,
});
res.status(429).json(response);
},
});
export default rateLimiter;

View File

@@ -0,0 +1,93 @@
import { Request, Response, NextFunction } from 'express';
import { z } from 'zod';
import type { ApiResponse, GetInfoRequest } from '../types/index.js';
/**
* Validation schema for /api/getinfo endpoint
*/
const getInfoSchema = z.object({
url: z.string().url('Invalid URL format').refine((url) => {
// Validate Netflix URL
try {
const parsedUrl = new URL(url);
const validHosts = [
'www.netflix.com',
'netflix.com',
'www.netflix.com.tr',
'netflix.com.tr',
];
const hasTitlePath = /\/title\/\d+/.test(url);
return validHosts.includes(parsedUrl.hostname) && hasTitlePath;
} catch {
return false;
}
}, 'URL must be a valid Netflix title URL (e.g., https://www.netflix.com/tr/title/81616256)'),
});
/**
* Validate request body for /api/getinfo
*/
export function validateGetInfo(
req: Request,
res: Response,
next: NextFunction
): void {
const result = getInfoSchema.safeParse(req.body);
if (!result.success) {
const errors = result.error.issues.map((issue) => ({
field: issue.path.join('.'),
message: issue.message,
}));
const response: ApiResponse<never> = {
success: false,
error: {
code: 'VALIDATION_ERROR',
message: 'Invalid request parameters',
details: { errors },
},
};
res.status(400).json(response);
return;
}
// Attach validated data to request
(req as Request & { validated: GetInfoRequest }).validated = result.data;
next();
}
/**
* Generic validation middleware factory
*/
export function validateBody<T extends z.ZodType>(
schema: T
): (req: Request, res: Response, next: NextFunction) => void {
return (req, res, next) => {
const result = schema.safeParse(req.body);
if (!result.success) {
const errors = result.error.issues.map((issue) => ({
field: issue.path.join('.'),
message: issue.message,
}));
const response: ApiResponse<never> = {
success: false,
error: {
code: 'VALIDATION_ERROR',
message: 'Invalid request parameters',
details: { errors },
},
};
res.status(400).json(response);
return;
}
next();
};
}
export default validateGetInfo;

234
src/routes/api.routes.ts Normal file
View File

@@ -0,0 +1,234 @@
import { Router, Request, Response } from 'express';
import { z } from 'zod';
import { authMiddleware } from '../middleware/auth.middleware.js';
import { scrapeRateLimiter } from '../middleware/rateLimit.middleware.js';
import { validateGetInfo } from '../middleware/validation.middleware.js';
import { JobService } from '../services/job.service.js';
import { ContentService } from '../services/content.service.js';
import type { ApiResponse, GetInfoRequest, GetInfoResponse } from '../types/index.js';
const router = Router();
const listContentSchema = z.object({
type: z.enum(['movie', 'tvshow']).optional(),
limit: z.coerce.number().int().min(1).max(100).optional(),
});
/**
* POST /api/getinfo
* Get content information from Netflix URL
*
* Request body: { url: string }
* Headers: X-API-Key: <api_key>
*
* Response: { success: boolean, data?: ContentData, error?: ApiError }
*/
router.post(
'/getinfo',
authMiddleware,
scrapeRateLimiter,
validateGetInfo,
async (
req: Request & { validated: GetInfoRequest },
res: Response<ApiResponse<GetInfoResponse>>
) => {
const { url } = req.validated;
try {
// Process synchronously (hybrid: cache -> db -> netflix)
const result = await JobService.processSync(url);
const response: ApiResponse<GetInfoResponse> = {
success: true,
data: result.data,
};
res.json(response);
} catch (error) {
const response: ApiResponse<GetInfoResponse> = {
success: false,
error: {
code: 'SCRAPE_ERROR',
message:
error instanceof Error ? error.message : 'Failed to scrape content',
},
};
res.status(500).json(response);
}
}
);
/**
* GET /api/content
* List content already stored in DB
*
* Query params: type?: movie|tvshow, limit?: 1-100
* Headers: X-API-Key: <api_key>
*/
router.get(
'/content',
authMiddleware,
async (
req: Request,
res: Response<ApiResponse<GetInfoResponse[]>>
) => {
const validation = listContentSchema.safeParse(req.query);
if (!validation.success) {
const errors = validation.error.issues.map((issue) => ({
field: issue.path.join('.'),
message: issue.message,
}));
const response: ApiResponse<GetInfoResponse[]> = {
success: false,
error: {
code: 'VALIDATION_ERROR',
message: 'Invalid query parameters',
details: { errors },
},
};
res.status(400).json(response);
return;
}
try {
const content = await ContentService.list({
type: validation.data.type,
limit: validation.data.limit ?? 100,
});
const response: ApiResponse<GetInfoResponse[]> = {
success: true,
data: content.map((item) => ContentService.toApiResponse(item)),
};
res.json(response);
} catch (error) {
const response: ApiResponse<GetInfoResponse[]> = {
success: false,
error: {
code: 'CONTENT_LIST_ERROR',
message:
error instanceof Error ? error.message : 'Failed to fetch content',
},
};
res.status(500).json(response);
}
}
);
/**
* POST /api/getinfo/async
* Create async job for content scraping
*
* Request body: { url: string }
* Headers: X-API-Key: <api_key>
*
* Response: { success: boolean, data?: { jobId: string }, error?: ApiError }
*/
router.post(
'/getinfo/async',
authMiddleware,
scrapeRateLimiter,
validateGetInfo,
async (
req: Request & { validated: GetInfoRequest },
res: Response<ApiResponse<{ jobId: string; status: string }>>
) => {
const { url } = req.validated;
try {
// Create job
const job = await JobService.create(url);
// Start processing in background
JobService.process(job.id).catch((err) => {
console.error('Job processing error:', err);
});
const response: ApiResponse<{ jobId: string; status: string }> = {
success: true,
data: {
jobId: job.id,
status: job.status,
},
};
res.status(202).json(response);
} catch (error) {
const response: ApiResponse<{ jobId: string; status: string }> = {
success: false,
error: {
code: 'JOB_CREATE_ERROR',
message:
error instanceof Error ? error.message : 'Failed to create job',
},
};
res.status(500).json(response);
}
}
);
/**
* GET /api/jobs/:jobId
* Get job status
*
* Headers: X-API-Key: <api_key>
*/
router.get(
'/jobs/:jobId',
authMiddleware,
async (req: Request, res: Response) => {
const { jobId } = req.params;
if (!jobId) {
const response: ApiResponse<never> = {
success: false,
error: {
code: 'VALIDATION_ERROR',
message: 'jobId is required',
},
};
res.status(400).json(response);
return;
}
try {
const job = await JobService.getById(jobId);
if (!job) {
const response: ApiResponse<never> = {
success: false,
error: {
code: 'JOB_NOT_FOUND',
message: 'Job not found',
},
};
res.status(404).json(response);
return;
}
const response = {
success: true,
data: job,
};
res.json(response);
} catch (error) {
const response: ApiResponse<never> = {
success: false,
error: {
code: 'JOB_FETCH_ERROR',
message:
error instanceof Error ? error.message : 'Failed to fetch job',
},
};
res.status(500).json(response);
}
}
);
export default router;

View File

@@ -0,0 +1,54 @@
import { Router, Request, Response } from 'express';
import { checkRedisConnection } from '../config/redis.js';
import prisma from '../config/database.js';
import { env } from '../config/env.js';
const router = Router();
/**
* GET /health
* Basic health check endpoint
*/
router.get('/health', (_req: Request, res: Response) => {
res.status(200).json({
status: 'ok',
timestamp: new Date().toISOString(),
uptime: process.uptime(),
});
});
/**
* GET /ready
* Readiness check - verifies all dependencies are available
*/
router.get('/ready', async (_req: Request, res: Response) => {
const checks = {
database: false,
redis: false,
};
// Check database
try {
await prisma.$queryRaw`SELECT 1`;
checks.database = true;
} catch (error) {
console.error('Database health check failed:', error);
}
// Check Redis
checks.redis = await checkRedisConnection();
const allHealthy = checks.database && checks.redis;
res.status(allHealthy ? 200 : 503).json({
status: allHealthy ? 'ready' : 'not_ready',
timestamp: new Date().toISOString(),
checks: {
database: checks.database ? 'healthy' : 'unhealthy',
redis: checks.redis ? 'healthy' : 'unhealthy',
},
env: env.NODE_ENV,
});
});
export default router;

222
src/routes/tmdb.routes.ts Normal file
View File

@@ -0,0 +1,222 @@
import { Router, Request, Response } from 'express';
import { z } from 'zod';
import { authMiddleware } from '../middleware/auth.middleware.js';
import { scrapeRateLimiter } from '../middleware/rateLimit.middleware.js';
import { TmdbService } from '../services/tmdb.service.js';
import type {
ApiResponse,
TmdbSearchResponse,
} from '../types/index.js';
const router = Router();
/**
* Validation schema for TMDB search
*/
const tmdbSearchSchema = z.object({
query: z.string().trim().min(1, 'Query must be at least 1 character').max(200, 'Query must be at most 200 characters'),
year: z.coerce.number().int().min(1900).max(new Date().getFullYear() + 10).optional(),
type: z.enum(['movie', 'tv', 'multi']).optional(),
seasonYear: z.coerce.number().int().min(1900).max(new Date().getFullYear() + 10).optional(),
seasonNumber: z.coerce.number().int().min(1).max(100).optional(),
});
/**
* POST /api/tmdb/search
* Search for movies and TV shows using TMDB API
*
* Request body: { query: string, year?: number, type?: 'movie' | 'tv' | 'multi' }
* Headers: X-API-Key: <api_key>
*
* Response: { success: boolean, data?: TmdbSearchResponse, error?: ApiError }
*/
router.post(
'/search',
authMiddleware,
scrapeRateLimiter,
async (
req: Request,
res: Response<ApiResponse<TmdbSearchResponse>>
) => {
// Validate request body
const result = tmdbSearchSchema.safeParse(req.body);
if (!result.success) {
const errors = result.error.issues.map((issue) => ({
field: issue.path.join('.'),
message: issue.message,
}));
const response: ApiResponse<TmdbSearchResponse> = {
success: false,
error: {
code: 'VALIDATION_ERROR',
message: 'Invalid request parameters',
details: { errors },
},
};
res.status(400).json(response);
return;
}
const { query, year, type, seasonYear, seasonNumber } = result.data;
try {
const searchResult = await TmdbService.search({
query,
year,
type: type || 'multi',
seasonYear,
seasonNumber,
});
const response: ApiResponse<TmdbSearchResponse> = {
success: true,
data: searchResult,
};
res.json(response);
} catch (error) {
const response: ApiResponse<TmdbSearchResponse> = {
success: false,
error: {
code: 'TMDB_ERROR',
message:
error instanceof Error ? error.message : 'Failed to search TMDB',
},
};
res.status(500).json(response);
}
}
);
/**
* POST /api/tmdb/search/movie
* Search for movies only
*/
router.post(
'/search/movie',
authMiddleware,
scrapeRateLimiter,
async (
req: Request,
res: Response<ApiResponse<TmdbSearchResponse>>
) => {
const movieSearchSchema = z.object({
query: z.string().trim().min(1).max(200),
year: z.coerce.number().int().min(1900).max(new Date().getFullYear() + 10).optional(),
});
const result = movieSearchSchema.safeParse(req.body);
if (!result.success) {
const errors = result.error.issues.map((issue) => ({
field: issue.path.join('.'),
message: issue.message,
}));
const response: ApiResponse<TmdbSearchResponse> = {
success: false,
error: {
code: 'VALIDATION_ERROR',
message: 'Invalid request parameters',
details: { errors },
},
};
res.status(400).json(response);
return;
}
const { query, year } = result.data;
try {
const searchResult = await TmdbService.searchMovies(query, year);
const response: ApiResponse<TmdbSearchResponse> = {
success: true,
data: searchResult,
};
res.json(response);
} catch (error) {
const response: ApiResponse<TmdbSearchResponse> = {
success: false,
error: {
code: 'TMDB_ERROR',
message:
error instanceof Error ? error.message : 'Failed to search movies',
},
};
res.status(500).json(response);
}
}
);
/**
* POST /api/tmdb/search/tv
* Search for TV shows only
*/
router.post(
'/search/tv',
authMiddleware,
scrapeRateLimiter,
async (
req: Request,
res: Response<ApiResponse<TmdbSearchResponse>>
) => {
const tvSearchSchema = z.object({
query: z.string().trim().min(1).max(200),
year: z.coerce.number().int().min(1900).max(new Date().getFullYear() + 10).optional(),
seasonYear: z.coerce.number().int().min(1900).max(new Date().getFullYear() + 10).optional(),
seasonNumber: z.coerce.number().int().min(1).max(100).optional(),
});
const result = tvSearchSchema.safeParse(req.body);
if (!result.success) {
const errors = result.error.issues.map((issue) => ({
field: issue.path.join('.'),
message: issue.message,
}));
const response: ApiResponse<TmdbSearchResponse> = {
success: false,
error: {
code: 'VALIDATION_ERROR',
message: 'Invalid request parameters',
details: { errors },
},
};
res.status(400).json(response);
return;
}
const { query, year, seasonYear, seasonNumber } = result.data;
try {
const searchResult = await TmdbService.searchTv(query, year, seasonNumber, seasonYear);
const response: ApiResponse<TmdbSearchResponse> = {
success: true,
data: searchResult,
};
res.json(response);
} catch (error) {
const response: ApiResponse<TmdbSearchResponse> = {
success: false,
error: {
code: 'TMDB_ERROR',
message:
error instanceof Error ? error.message : 'Failed to search TV shows',
},
};
res.status(500).json(response);
}
}
);
export default router;

View File

@@ -0,0 +1,146 @@
import redis from '../config/redis.js';
import { env } from '../config/env.js';
import logger from '../utils/logger.js';
import type { GetInfoResponse, CacheEntry, DataSource } from '../types/index.js';
/**
* Cache key prefix for Netflix content
*/
const CACHE_PREFIX = 'netflix:content:';
/**
* Generate cache key from URL
*/
function getCacheKey(url: string): string {
// Use URL hash or title ID as key
const titleId = url.match(/\/title\/(\d+)/)?.[1] || url;
return `${CACHE_PREFIX}${titleId}`;
}
/**
* Cache Service for Redis operations
* Handles caching with TTL support
*/
export class CacheService {
/**
* Get cached content by URL
*/
static async get(url: string): Promise<GetInfoResponse | null> {
const key = getCacheKey(url);
try {
const cached = await redis.get(key);
if (!cached) {
logger.debug('Cache miss', { url });
return null;
}
logger.debug('Cache hit', { url });
const entry: CacheEntry<GetInfoResponse> = JSON.parse(cached);
return entry.data;
} catch (error) {
logger.error('Cache get error', {
url,
error: error instanceof Error ? error.message : 'Unknown error',
});
return null;
}
}
/**
* Set cache entry with TTL
*/
static async set(url: string, data: GetInfoResponse): Promise<void> {
const key = getCacheKey(url);
const ttl = env.REDIS_TTL_SECONDS;
const entry: CacheEntry<GetInfoResponse> = {
data,
cachedAt: Date.now(),
ttl,
};
try {
await redis.setex(key, ttl, JSON.stringify(entry));
logger.debug('Cache set', { url, ttl });
} catch (error) {
logger.error('Cache set error', {
url,
error: error instanceof Error ? error.message : 'Unknown error',
});
}
}
/**
* Delete cached content
*/
static async delete(url: string): Promise<void> {
const key = getCacheKey(url);
try {
await redis.del(key);
logger.debug('Cache deleted', { url });
} catch (error) {
logger.error('Cache delete error', {
url,
error: error instanceof Error ? error.message : 'Unknown error',
});
}
}
/**
* Check if cache exists
*/
static async exists(url: string): Promise<boolean> {
const key = getCacheKey(url);
try {
const result = await redis.exists(key);
return result === 1;
} catch (error) {
logger.error('Cache exists check error', {
url,
error: error instanceof Error ? error.message : 'Unknown error',
});
return false;
}
}
/**
* Get cache TTL remaining
*/
static async getTTL(url: string): Promise<number> {
const key = getCacheKey(url);
try {
return await redis.ttl(key);
} catch (error) {
logger.error('Cache TTL check error', {
url,
error: error instanceof Error ? error.message : 'Unknown error',
});
return -1;
}
}
/**
* Clear all Netflix content cache
*/
static async clearAll(): Promise<void> {
try {
const keys = await redis.keys(`${CACHE_PREFIX}*`);
if (keys.length > 0) {
await redis.del(...keys);
logger.info('Cache cleared', { count: keys.length });
}
} catch (error) {
logger.error('Cache clear error', {
error: error instanceof Error ? error.message : 'Unknown error',
});
}
}
}
export default CacheService;

View File

@@ -0,0 +1,239 @@
import prisma from '../config/database.js';
import type { ContentData, ScraperResult, GetInfoResponse } from '../types/index.js';
/**
* Content Service for database operations
*/
export class ContentService {
/**
* List content items from database
*/
static async list(options?: {
type?: 'movie' | 'tvshow';
limit?: number;
}): Promise<ContentData[]> {
const content = await prisma.content.findMany({
where: options?.type ? { type: options.type } : undefined,
include: {
genres: {
include: {
genre: true,
},
},
castMembers: {
orderBy: { name: 'asc' },
},
},
orderBy: { createdAt: 'desc' },
take: options?.limit,
});
return content.map((item) => this.mapToContentData(item));
}
/**
* Find content by URL
*/
static async findByUrl(url: string): Promise<ContentData | null> {
const content = await prisma.content.findUnique({
where: { url },
include: {
genres: {
include: {
genre: true,
},
},
castMembers: {
orderBy: { name: 'asc' },
},
},
});
if (!content) {
return null;
}
return this.mapToContentData(content);
}
/**
* Create new content from scraper result
*/
static async create(
url: string,
scraperResult: ScraperResult
): Promise<ContentData> {
// Create or find genres
const genreConnections = await Promise.all(
scraperResult.genres.map(async (genreName) => {
const genre = await prisma.genre.upsert({
where: { name: genreName },
update: {},
create: { name: genreName },
});
return { genreId: genre.id };
})
);
// Create content with genres and cast
const content = await prisma.content.create({
data: {
url,
title: scraperResult.title,
year: scraperResult.year,
plot: scraperResult.plot,
backdropUrl: scraperResult.backdropUrl,
ageRating: scraperResult.ageRating,
type: scraperResult.type,
currentSeason: scraperResult.currentSeason,
genres: {
create: genreConnections,
},
castMembers: {
create: scraperResult.cast.map((name) => ({ name })),
},
},
include: {
genres: {
include: {
genre: true,
},
},
castMembers: {
orderBy: { name: 'asc' },
},
},
});
return this.mapToContentData(content);
}
/**
* Update existing content
*/
static async update(
url: string,
scraperResult: ScraperResult
): Promise<ContentData> {
// Delete existing genres and cast
const existingContent = await prisma.content.findUnique({
where: { url },
});
if (existingContent) {
await prisma.contentGenre.deleteMany({
where: { contentId: existingContent.id },
});
await prisma.castMember.deleteMany({
where: { contentId: existingContent.id },
});
}
// Create or find genres
const genreConnections = await Promise.all(
scraperResult.genres.map(async (genreName) => {
const genre = await prisma.genre.upsert({
where: { name: genreName },
update: {},
create: { name: genreName },
});
return { genreId: genre.id };
})
);
// Update content
const content = await prisma.content.update({
where: { url },
data: {
title: scraperResult.title,
year: scraperResult.year,
plot: scraperResult.plot,
backdropUrl: scraperResult.backdropUrl,
ageRating: scraperResult.ageRating,
type: scraperResult.type,
currentSeason: scraperResult.currentSeason,
genres: {
create: genreConnections,
},
castMembers: {
create: scraperResult.cast.map((name) => ({ name })),
},
},
include: {
genres: {
include: {
genre: true,
},
},
castMembers: {
orderBy: { name: 'asc' },
},
},
});
return this.mapToContentData(content);
}
/**
* Delete content by URL
*/
static async delete(url: string): Promise<void> {
await prisma.content.delete({
where: { url },
});
}
/**
* Map database result to ContentData type
*/
private static mapToContentData(content: {
id: string;
url: string;
title: string;
year: number | null;
plot: string | null;
backdropUrl: string | null;
ageRating: string | null;
type: string;
currentSeason: number | null;
createdAt: Date;
updatedAt: Date;
genres: { genre: { name: string } }[];
castMembers: { name: string }[];
}): ContentData {
return {
id: content.id,
url: content.url,
title: content.title,
year: content.year,
plot: content.plot,
backdropUrl: content.backdropUrl,
ageRating: content.ageRating,
type: content.type as 'movie' | 'tvshow',
currentSeason: content.currentSeason,
genres: content.genres.map((g) => g.genre.name),
cast: content.castMembers.map((c) => c.name),
createdAt: content.createdAt,
updatedAt: content.updatedAt,
};
}
/**
* Convert ContentData to API response format
*/
static toApiResponse(data: ContentData): GetInfoResponse {
return {
title: data.title,
year: data.year,
plot: data.plot,
ageRating: data.ageRating,
type: data.type,
currentSeason: data.currentSeason,
genres: data.genres,
cast: data.cast,
backdrop: data.backdropUrl,
};
}
}
export default ContentService;

237
src/services/job.service.ts Normal file
View File

@@ -0,0 +1,237 @@
import { v4 as uuidv4 } from 'uuid';
import prisma from '../config/database.js';
import { CacheService } from './cache.service.js';
import { ContentService } from './content.service.js';
import { ScraperService } from './scraper.service.js';
import {
emitJobProgress,
emitJobCompleted,
emitJobError,
} from '../config/socket.js';
import logger from '../utils/logger.js';
import type {
ScrapeJob,
JobStatus,
GetInfoResponse,
DataSource,
ApiError,
} from '../types/index.js';
/**
* Job Service for async scrape operations
*/
export class JobService {
/**
* Create a new scrape job
*/
static async create(url: string): Promise<ScrapeJob> {
const job = await prisma.scrapeJob.create({
data: {
id: uuidv4(),
url,
status: 'pending',
progress: 0,
step: 'created',
},
});
logger.info('Job created', { jobId: job.id, url });
return this.mapToScrapeJob(job);
}
/**
* Get job by ID
*/
static async getById(jobId: string): Promise<ScrapeJob | null> {
const job = await prisma.scrapeJob.findUnique({
where: { id: jobId },
});
return job ? this.mapToScrapeJob(job) : null;
}
/**
* Update job status
*/
static async update(
jobId: string,
data: {
status?: JobStatus;
progress?: number;
step?: string;
result?: unknown;
error?: string;
}
): Promise<ScrapeJob> {
const job = await prisma.scrapeJob.update({
where: { id: jobId },
data,
});
return this.mapToScrapeJob(job);
}
/**
* Process a scrape job (hybrid: cache -> db -> netflix)
*/
static async process(jobId: string): Promise<void> {
const job = await this.getById(jobId);
if (!job) {
logger.error('Job not found', { jobId });
return;
}
try {
// Update status to processing
await this.update(jobId, {
status: 'processing',
progress: 10,
step: 'checking_cache',
});
emitJobProgress(jobId, 10, 'processing', 'Checking cache');
// Step 1: Check cache
const cachedData = await CacheService.get(job.url);
if (cachedData) {
await this.completeJob(jobId, cachedData, 'cache');
return;
}
// Update progress
await this.update(jobId, { progress: 30, step: 'checking_database' });
emitJobProgress(jobId, 30, 'processing', 'Checking database');
// Step 2: Check database
const dbContent = await ContentService.findByUrl(job.url);
if (dbContent) {
const responseData = ContentService.toApiResponse(dbContent);
// Cache the result
await CacheService.set(job.url, responseData);
await this.completeJob(jobId, responseData, 'database');
return;
}
// Update progress
await this.update(jobId, { progress: 50, step: 'scraping_netflix' });
emitJobProgress(jobId, 50, 'processing', 'Scraping Netflix');
// Step 3: Scrape from Netflix
const scraperResult = await ScraperService.scrape(job.url);
// Update progress
await this.update(jobId, { progress: 80, step: 'saving_to_database' });
emitJobProgress(jobId, 80, 'processing', 'Saving to database');
// Step 4: Save to database
const contentData = await ContentService.create(job.url, scraperResult);
const responseData = ContentService.toApiResponse(contentData);
// Step 5: Cache the result
await CacheService.set(job.url, responseData);
// Complete the job
await this.completeJob(jobId, responseData, 'netflix');
} catch (error) {
const apiError: ApiError = {
code: 'SCRAPE_ERROR',
message: error instanceof Error ? error.message : 'Unknown error occurred',
};
await this.update(jobId, {
status: 'failed',
error: apiError.message,
});
emitJobError(jobId, apiError);
logger.error('Job failed', {
jobId,
error: apiError.message,
});
}
}
/**
* Complete a job with result
*/
private static async completeJob(
jobId: string,
data: GetInfoResponse,
source: DataSource
): Promise<void> {
await this.update(jobId, {
status: 'completed',
progress: 100,
step: 'completed',
result: data,
});
emitJobCompleted(jobId, data, source);
logger.info('Job completed', { jobId, source });
}
/**
* Process job synchronously (for direct API calls)
*/
static async processSync(url: string): Promise<{
data: GetInfoResponse;
source: DataSource;
}> {
// Step 1: Check cache
const cachedData = await CacheService.get(url);
if (cachedData) {
return { data: cachedData, source: 'cache' };
}
// Step 2: Check database
const dbContent = await ContentService.findByUrl(url);
if (dbContent) {
const responseData = ContentService.toApiResponse(dbContent);
await CacheService.set(url, responseData);
return { data: responseData, source: 'database' };
}
// Step 3: Scrape from Netflix
const scraperResult = await ScraperService.scrape(url);
// Step 4: Save to database
const contentData = await ContentService.create(url, scraperResult);
const responseData = ContentService.toApiResponse(contentData);
// Step 5: Cache the result
await CacheService.set(url, responseData);
return { data: responseData, source: 'netflix' };
}
/**
* Map database result to ScrapeJob type
*/
private static mapToScrapeJob(job: {
id: string;
url: string;
status: string;
progress: number;
step: string | null;
result: unknown;
error: string | null;
createdAt: Date;
updatedAt: Date;
}): ScrapeJob {
return {
id: job.id,
url: job.url,
status: job.status as JobStatus,
progress: job.progress,
step: job.step || '',
result: job.result as ScrapeJob['result'],
error: job.error ? { code: 'JOB_ERROR', message: job.error } : undefined,
createdAt: job.createdAt,
updatedAt: job.updatedAt,
};
}
}
export default JobService;

View File

@@ -0,0 +1,284 @@
import * as cheerio from 'cheerio';
import type { ScraperResult, ContentType } from '../types/index.js';
import logger from '../utils/logger.js';
/**
* Age rating patterns to detect and exclude from genres
* Supports various formats including Unicode bidirectional text characters
* Unicode chars: \u2066-\u2069 (isolate), \u202A-\u202E (embedding), \u200E-\u200F (marks)
*/
const AGE_RATING_PATTERN = /^[\u2066-\u2069\u202A-\u202E\u200E-\u200F]*(\d+\+|PG-?13|PG|NC-?17|R|G|TV-?MA|TV-?14|TV-?PG|TV-?G|TV-?Y7?-?FV?|TV-?Y)[\u2066-\u2069\u202A-\u202E\u200E-\u200F]*$/i;
/**
* Season pattern to detect TV shows and extract season number
* Matches patterns like "3 Sezon", "2 Seasons", "1. Sezon", etc.
*/
const SEASON_PATTERN = /(\d+)\.?\s*(sezon|season|sezonlar|seasons)/i;
/**
* Netflix HTML Scraper Service
* Uses Cheerio for parsing HTML content
*/
export class ScraperService {
/**
* Validate if URL is a valid Netflix URL
*/
static isValidNetflixUrl(url: string): boolean {
try {
const parsedUrl = new URL(url);
const validHosts = [
'www.netflix.com',
'netflix.com',
'www.netflix.com.tr',
'netflix.com.tr',
];
return validHosts.includes(parsedUrl.hostname);
} catch {
return false;
}
}
/**
* Extract Netflix title ID from URL
*/
static extractTitleId(url: string): string | null {
const match = url.match(/\/title\/(\d+)/);
return match ? match[1] : null;
}
/**
* Fetch HTML content from Netflix URL
*/
private static async fetchHtml(url: string): Promise<string> {
logger.info('Fetching Netflix page', { url });
const response = await fetch(url, {
headers: {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept-Language': 'tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7',
Accept:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
},
});
if (!response.ok) {
throw new Error(`Failed to fetch Netflix page: ${response.status}`);
}
return response.text();
}
/**
* Parse HTML and extract content data
*/
static async scrape(url: string): Promise<ScraperResult> {
if (!this.isValidNetflixUrl(url)) {
throw new Error('Invalid Netflix URL');
}
const html = await this.fetchHtml(url);
const $ = cheerio.load(html);
const title = this.extractTitle($);
const year = this.extractYear($);
const plot = this.extractPlot($);
const ageRating = this.extractAgeRating($);
const { genres, type, currentSeason } = this.extractGenresTypeAndSeason($);
const cast = this.extractCast($);
const backdropUrl = this.extractBackdrop($);
const result: ScraperResult = {
title,
year,
plot,
ageRating,
type,
genres,
cast,
backdropUrl,
currentSeason,
};
logger.info('Scraping completed', {
url,
title,
year,
ageRating,
type,
genresCount: genres.length,
castCount: cast.length,
});
return result;
}
/**
* Extract title from HTML
*/
private static extractTitle($: cheerio.CheerioAPI): string {
let title = $('h2.default-ltr-iqcdef-cache-tnklrp').first().text().trim();
if (!title) {
title = $('meta[property="og:title"]').attr('content') || '';
}
if (!title) {
const pageTitle = $('title').text();
title = pageTitle.replace(' | Netflix', '').trim();
}
return title || 'Unknown Title';
}
/**
* Extract year from HTML (first li element)
*/
private static extractYear($: cheerio.CheerioAPI): number | null {
const yearText = $('li.default-ltr-iqcdef-cache-6prs41').first().text().trim();
const year = parseInt(yearText, 10);
if (!isNaN(year) && year >= 1900 && year <= new Date().getFullYear() + 5) {
return year;
}
return null;
}
/**
* Extract plot/description from HTML
*/
private static extractPlot($: cheerio.CheerioAPI): string | null {
const plot = $('span.default-ltr-iqcdef-cache-6ukeej').first().text().trim();
if (!plot) {
const metaDesc = $('meta[property="og:description"]').attr('content');
return metaDesc || null;
}
return plot || null;
}
/**
* Extract age rating from HTML (e.g., "18+", "16+")
* Searches all li elements (except first which is year)
*/
private static extractAgeRating($: cheerio.CheerioAPI): string | null {
let ageRating: string | null = null;
const foundTexts: string[] = [];
$('li.default-ltr-iqcdef-cache-6prs41').each((index, element) => {
if (index === 0) return; // Skip year
const text = $(element).text().trim();
foundTexts.push(text);
// Clean Unicode characters first
const cleanText = text.replace(/[\u2066-\u2069\u202A-\u202E\u200E-\u200F]/g, '').trim();
if (cleanText && AGE_RATING_PATTERN.test(cleanText)) {
ageRating = cleanText;
return false; // Break loop
}
});
// Debug logging
if (!ageRating && foundTexts.length > 0) {
logger.debug('Age rating not found in elements', {
foundTexts,
pattern: AGE_RATING_PATTERN.source,
});
}
return ageRating;
}
/**
* Extract genres from HTML (skip year, age rating, and season info)
* Also detects content type (movie/tvshow) based on season presence
* Extracts current season number from season text
*/
private static extractGenresTypeAndSeason($: cheerio.CheerioAPI): { genres: string[]; type: ContentType; currentSeason: number | null } {
const genres: string[] = [];
let type: ContentType = 'movie';
let currentSeason: number | null = null;
const foundTexts: string[] = [];
$('li.default-ltr-iqcdef-cache-6prs41').each((index, element) => {
if (index === 0) return; // Skip year
const text = $(element).text().trim();
const cleanText = text.replace(/[\u2066\u2069\u202A\u202B\u202C\u202D\u202E\u200E\u200F]/g, '').trim();
foundTexts.push(cleanText);
// Check for season pattern - indicates TV show
const seasonMatch = cleanText.match(SEASON_PATTERN);
if (cleanText && seasonMatch) {
type = 'tvshow';
// Extract season number from the text
const seasonNum = parseInt(seasonMatch[1], 10);
if (!isNaN(seasonNum)) {
currentSeason = seasonNum;
}
return; // Skip adding to genres
}
// Skip age rating - only add actual genres
if (cleanText && !AGE_RATING_PATTERN.test(cleanText)) {
genres.push(cleanText);
}
});
// Debug logging
logger.debug('extractGenresTypeAndSeason completed', {
foundTexts,
genres,
type,
currentSeason,
});
return { genres, type, currentSeason };
}
/**
* Extract cast members from HTML
*/
private static extractCast($: cheerio.CheerioAPI): string[] {
const castText = $('span.default-ltr-iqcdef-cache-m0886o').first().text().trim();
if (!castText) {
return [];
}
return castText
.split(',')
.map((name) => name.trim())
.filter((name) => name.length > 0);
}
/**
* Extract backdrop image URL from HTML
*/
private static extractBackdrop($: cheerio.CheerioAPI): string | null {
const backdropDiv = $('div.default-ltr-iqcdef-cache-1wezh7a').first();
const img = backdropDiv.find('img').first();
const srcset = img.attr('srcset');
if (srcset) {
const sources = srcset.split(',');
const lastSource = sources[sources.length - 1]?.trim().split(' ')[0];
if (lastSource) {
return lastSource;
}
}
const src = img.attr('src');
if (src) {
return src;
}
return null;
}
}
export default ScraperService;

View File

@@ -0,0 +1,429 @@
import { env } from '../config/env.js';
import type {
TmdbSearchRequest,
TmdbSearchResult,
TmdbSearchResponse,
TmdbRawResponse,
TmdbRawMovie,
TmdbRawTv,
} from '../types/index.js';
import logger from '../utils/logger.js';
/**
* TMDB Genre ID to Name mapping
* Common genres used in movies and TV shows
*/
const GENRE_MAP: Record<number, string> = {
28: 'Action',
12: 'Adventure',
16: 'Animation',
35: 'Comedy',
80: 'Crime',
99: 'Documentary',
18: 'Drama',
10751: 'Family',
14: 'Fantasy',
36: 'History',
27: 'Horror',
10402: 'Music',
9648: 'Mystery',
10749: 'Romance',
878: 'Science Fiction',
10770: 'TV Movie',
53: 'Thriller',
10752: 'War',
37: 'Western',
10759: 'Action & Adventure',
10762: 'Kids',
10763: 'News',
10764: 'Reality',
10765: 'Sci-Fi & Fantasy',
10766: 'Soap',
10767: 'Talk',
10768: 'War & Politics',
};
/**
* TMDB API Base URL
*/
const TMDB_BASE_URL = 'https://api.themoviedb.org/3';
/**
* TMDB Image Base URL
*/
const TMDB_IMAGE_BASE_URL = 'https://image.tmdb.org/t/p/original';
/**
* TMDB Service for movie/TV show search
*/
export class TmdbService {
/**
* Get common headers for TMDB API requests
*/
private static getHeaders(): Record<string, string> {
return {
Authorization: `Bearer ${env.TMDB_ACCESS_TOKEN}`,
'Content-Type': 'application/json',
};
}
/**
* Extract year from date string
*/
private static extractYear(dateStr: string | null): number | null {
if (!dateStr) return null;
const year = parseInt(dateStr.split('-')[0] || '0', 10);
return isNaN(year) ? null : year;
}
/**
* Convert genre IDs to genre names
*/
private static mapGenreIds(genreIds: number[]): string[] {
return genreIds
.map((id) => GENRE_MAP[id])
.filter((name): name is string => name !== undefined);
}
/**
* Build full image URL
*/
private static buildImageUrl(path: string | null): string | null {
if (!path) return null;
return `${TMDB_IMAGE_BASE_URL}${path}`;
}
/**
* Normalize raw movie result to TmdbSearchResult
*/
private static normalizeMovie(movie: TmdbRawMovie): TmdbSearchResult {
return {
id: movie.id,
title: movie.title,
originalTitle: movie.original_title,
overview: movie.overview,
releaseDate: movie.release_date || null,
year: this.extractYear(movie.release_date),
type: 'movie',
posterPath: this.buildImageUrl(movie.poster_path),
backdropPath: this.buildImageUrl(movie.backdrop_path),
voteAverage: movie.vote_average,
voteCount: movie.vote_count,
popularity: movie.popularity,
genres: this.mapGenreIds(movie.genre_ids),
originalLanguage: movie.original_language,
};
}
/**
* Normalize raw TV result to TmdbSearchResult
*/
private static normalizeTv(tv: TmdbRawTv): TmdbSearchResult {
return {
id: tv.id,
title: tv.name,
originalTitle: tv.original_name,
overview: tv.overview,
releaseDate: tv.first_air_date || null,
year: this.extractYear(tv.first_air_date),
type: 'tv',
posterPath: this.buildImageUrl(tv.poster_path),
backdropPath: this.buildImageUrl(tv.backdrop_path),
voteAverage: tv.vote_average,
voteCount: tv.vote_count,
popularity: tv.popularity,
genres: this.mapGenreIds(tv.genre_ids),
originalLanguage: tv.original_language,
currentSeason: null,
totalSeasons: null,
};
}
/**
* Get TV show details including season count
*/
private static async getTvDetails(tvId: number): Promise<{ numberOfSeasons: number } | null> {
const url = `${TMDB_BASE_URL}/tv/${tvId}?language=tr-TR`;
try {
const response = await fetch(url, {
method: 'GET',
headers: this.getHeaders(),
});
if (!response.ok) {
return null;
}
const data = await response.json();
return {
numberOfSeasons: data.number_of_seasons || 0,
};
} catch {
return null;
}
}
/**
* Get specific season details including air date
*/
private static async getSeasonDetails(
tvId: number,
seasonNumber: number
): Promise<{ airDate: string | null; year: number | null } | null> {
const url = `${TMDB_BASE_URL}/tv/${tvId}/season/${seasonNumber}?language=tr-TR`;
try {
const response = await fetch(url, {
method: 'GET',
headers: this.getHeaders(),
});
if (!response.ok) {
return null;
}
const data = await response.json();
const airDate = data.air_date || null;
const year = airDate ? this.extractYear(airDate) : null;
return { airDate, year };
} catch {
return null;
}
}
/**
* Filter and enrich TV results based on season criteria
* Only returns shows that match the season requirements
*/
private static async filterAndEnrichTvResultsBySeason(
results: TmdbSearchResult[],
seasonNumber: number,
seasonYear?: number
): Promise<TmdbSearchResult[]> {
const enrichedResults: TmdbSearchResult[] = [];
// Process results sequentially to avoid rate limiting
for (const result of results) {
if (result.type !== 'tv') continue;
// Get TV details
const tvDetails = await this.getTvDetails(result.id);
if (!tvDetails) continue;
// Check if show has enough seasons
if (tvDetails.numberOfSeasons < seasonNumber) {
logger.debug('TV show filtered out - not enough seasons', {
title: result.title,
totalSeasons: tvDetails.numberOfSeasons,
requestedSeason: seasonNumber,
});
continue;
}
// If seasonYear is provided, check if the season's air year matches
if (seasonYear) {
const seasonDetails = await this.getSeasonDetails(result.id, seasonNumber);
if (!seasonDetails || seasonDetails.year !== seasonYear) {
logger.debug('TV show filtered out - season year mismatch', {
title: result.title,
requestedSeason: seasonNumber,
requestedYear: seasonYear,
actualYear: seasonDetails?.year,
});
continue;
}
}
// Show matches all criteria - add to results
enrichedResults.push({
...result,
totalSeasons: tvDetails.numberOfSeasons,
currentSeason: seasonNumber,
});
}
return enrichedResults;
}
/**
* Normalize raw result based on media type
*/
private static normalizeResult(result: TmdbRawMovie | TmdbRawTv): TmdbSearchResult | null {
const mediaType = result.media_type || ('title' in result ? 'movie' : 'tv');
if (mediaType === 'movie') {
return this.normalizeMovie(result as TmdbRawMovie);
} else if (mediaType === 'tv') {
return this.normalizeTv(result as TmdbRawTv);
}
return null;
}
/**
* Search for movies
*/
static async searchMovies(query: string, year?: number): Promise<TmdbSearchResponse> {
const params = new URLSearchParams({
query,
language: 'tr-TR',
});
if (year) {
params.append('year', year.toString());
}
const url = `${TMDB_BASE_URL}/search/movie?${params.toString()}`;
logger.info('TMDB: Searching movies', { query, year });
const response = await fetch(url, {
method: 'GET',
headers: this.getHeaders(),
});
if (!response.ok) {
const errorText = await response.text();
logger.error('TMDB API error', { status: response.status, error: errorText });
throw new Error(`TMDB API error: ${response.status}`);
}
const data: TmdbRawResponse = await response.json();
const results = data.results
.map((r) => this.normalizeMovie(r as TmdbRawMovie))
.filter((r): r is TmdbSearchResult => r !== null);
return {
page: data.page,
results,
totalPages: data.total_pages,
totalResults: data.total_results,
};
}
/**
* Search for TV shows
* @param query Search query
* @param year First air date year (optional - not recommended for accurate results)
* @param seasonNumber Required season number - only shows with this season will be returned
* @param seasonYear Required season year - only shows with matching season air year will be returned
*/
static async searchTv(
query: string,
year?: number,
seasonNumber?: number,
seasonYear?: number
): Promise<TmdbSearchResponse> {
const params = new URLSearchParams({
query,
language: 'tr-TR',
});
// Note: We don't use year for TV searches when seasonNumber is provided
// because the year from Netflix is the season's year, not the show's first air year
if (year && !seasonNumber) {
params.append('first_air_date_year', year.toString());
}
const url = `${TMDB_BASE_URL}/search/tv?${params.toString()}`;
logger.info('TMDB: Searching TV shows', { query, year, seasonNumber, seasonYear });
const response = await fetch(url, {
method: 'GET',
headers: this.getHeaders(),
});
if (!response.ok) {
const errorText = await response.text();
logger.error('TMDB API error', { status: response.status, error: errorText });
throw new Error(`TMDB API error: ${response.status}`);
}
const data: TmdbRawResponse = await response.json();
let results = data.results
.map((r) => this.normalizeTv(r as TmdbRawTv))
.filter((r): r is TmdbSearchResult => r !== null);
// Filter and enrich results based on season criteria
if (seasonNumber !== undefined) {
results = await this.filterAndEnrichTvResultsBySeason(results, seasonNumber, seasonYear);
}
return {
page: data.page,
results,
totalPages: data.total_pages,
totalResults: results.length, // Update total to reflect filtered count
};
}
/**
* Multi search (movies, TV shows, and people)
*/
static async searchMulti(query: string, year?: number): Promise<TmdbSearchResponse> {
const params = new URLSearchParams({
query,
language: 'tr-TR',
});
if (year) {
params.append('year', year.toString());
}
const url = `${TMDB_BASE_URL}/search/multi?${params.toString()}`;
logger.info('TMDB: Multi search', { query, year });
const response = await fetch(url, {
method: 'GET',
headers: this.getHeaders(),
});
if (!response.ok) {
const errorText = await response.text();
logger.error('TMDB API error', { status: response.status, error: errorText });
throw new Error(`TMDB API error: ${response.status}`);
}
const data: TmdbRawResponse = await response.json();
// Filter out person results and normalize
const results = data.results
.filter((r) => r.media_type !== 'person')
.map((r) => this.normalizeResult(r))
.filter((r): r is TmdbSearchResult => r !== null);
return {
page: data.page,
results,
totalPages: data.total_pages,
totalResults: data.total_results,
};
}
/**
* Search for content based on type
* @param request Search request with query, year, type, and optional season parameters
*/
static async search(request: TmdbSearchRequest): Promise<TmdbSearchResponse> {
const { query, year, type = 'multi', seasonYear, seasonNumber } = request;
switch (type) {
case 'movie':
return this.searchMovies(query, year);
case 'tv':
// For TV shows, use season parameters if provided
return this.searchTv(query, year, seasonNumber, seasonYear);
case 'multi':
default:
return this.searchMulti(query, year);
}
}
}
export default TmdbService;

210
src/types/index.ts Normal file
View File

@@ -0,0 +1,210 @@
/**
* Type definitions for Netflix Scraper API
*/
// ============================================
// Content Types
// ============================================
export interface ContentData {
id: string;
url: string;
title: string;
year: number | null;
plot: string | null;
backdropUrl: string | null;
ageRating: string | null;
type: 'movie' | 'tvshow';
currentSeason: number | null;
genres: string[];
cast: string[];
createdAt: Date;
updatedAt: Date;
}
export type ContentType = 'movie' | 'tvshow';
export interface ScraperResult {
title: string;
year: number | null;
plot: string | null;
ageRating: string | null;
type: ContentType;
genres: string[];
cast: string[];
backdropUrl: string | null;
currentSeason: number | null;
}
// ============================================
// API Types
// ============================================
export interface ApiResponse<T> {
success: boolean;
data?: T;
error?: ApiError;
}
export interface ApiError {
code: string;
message: string;
details?: Record<string, unknown>;
}
export interface GetInfoRequest {
url: string;
}
export interface GetInfoResponse {
title: string;
year: number | null;
plot: string | null;
ageRating: string | null;
type: ContentType;
genres: string[];
cast: string[];
backdrop: string | null;
currentSeason: number | null;
}
// ============================================
// Cache Types
// ============================================
export interface CacheEntry<T> {
data: T;
cachedAt: number;
ttl: number;
}
export type DataSource = 'cache' | 'database' | 'netflix';
// ============================================
// Socket Event Types
// ============================================
export interface SocketEvents {
// Client -> Server
'job:subscribe': (jobId: string) => void;
'job:unsubscribe': (jobId: string) => void;
// Server -> Client
'job:progress': (data: JobProgress) => void;
'job:completed': (data: JobCompleted) => void;
'job:error': (data: JobError) => void;
}
export interface JobProgress {
jobId: string;
progress: number; // 0-100
status: string;
step: string;
}
export interface JobCompleted {
jobId: string;
data: GetInfoResponse;
source: DataSource;
}
export interface JobError {
jobId: string;
error: ApiError;
}
// ============================================
// Job Types
// ============================================
export type JobStatus = 'pending' | 'processing' | 'completed' | 'failed';
export interface ScrapeJob {
id: string;
url: string;
status: JobStatus;
progress: number;
step: string;
result?: ScraperResult;
error?: ApiError;
createdAt: Date;
updatedAt: Date;
}
// ============================================
// TMDB API Types
// ============================================
export interface TmdbSearchRequest {
query: string;
year?: number;
type?: 'movie' | 'tv' | 'multi';
seasonYear?: number;
seasonNumber?: number;
}
export interface TmdbSearchResult {
id: number;
title: string;
originalTitle: string;
overview: string | null;
releaseDate: string | null;
year: number | null;
type: 'movie' | 'tv';
posterPath: string | null;
backdropPath: string | null;
voteAverage: number;
voteCount: number;
popularity: number;
genres: string[];
originalLanguage: string;
currentSeason?: number | null;
totalSeasons?: number | null;
}
export interface TmdbSearchResponse {
page: number;
results: TmdbSearchResult[];
totalPages: number;
totalResults: number;
}
// Raw TMDB API Response Types
export interface TmdbRawMovie {
id: number;
title: string;
original_title: string;
overview: string | null;
release_date: string;
poster_path: string | null;
backdrop_path: string | null;
vote_average: number;
vote_count: number;
popularity: number;
genre_ids: number[];
original_language: string;
media_type?: 'movie';
}
export interface TmdbRawTv {
id: number;
name: string;
original_name: string;
overview: string | null;
first_air_date: string;
poster_path: string | null;
backdrop_path: string | null;
vote_average: number;
vote_count: number;
popularity: number;
genre_ids: number[];
original_language: string;
media_type?: 'tv';
}
export interface TmdbRawResponse {
page: number;
results: (TmdbRawMovie | TmdbRawTv)[];
total_pages: number;
total_results: number;
}

97
src/utils/logger.ts Normal file
View File

@@ -0,0 +1,97 @@
/**
* Structured JSON Logger
* Standardized log levels: debug, info, warn, error
*/
type LogLevel = 'debug' | 'info' | 'warn' | 'error';
interface LogEntry {
timestamp: string;
level: LogLevel;
message: string;
service: string;
traceId?: string;
[key: string]: unknown;
}
class Logger {
private service: string;
private level: LogLevel;
private levels: Record<LogLevel, number> = {
debug: 0,
info: 1,
warn: 2,
error: 3,
};
constructor(service: string = 'netflix-scraper-api') {
this.service = service;
this.level = (process.env.LOG_LEVEL as LogLevel) || 'info';
}
private shouldLog(level: LogLevel): boolean {
return this.levels[level] >= this.levels[this.level];
}
private formatEntry(level: LogLevel, message: string, data?: Record<string, unknown>): LogEntry {
const entry: LogEntry = {
timestamp: new Date().toISOString(),
level,
message,
service: this.service,
};
if (data) {
Object.assign(entry, data);
}
return entry;
}
private output(entry: LogEntry): void {
const output = JSON.stringify(entry);
if (entry.level === 'error') {
process.stderr.write(output + '\n');
} else {
process.stdout.write(output + '\n');
}
}
debug(message: string, data?: Record<string, unknown>): void {
if (this.shouldLog('debug')) {
this.output(this.formatEntry('debug', message, data));
}
}
info(message: string, data?: Record<string, unknown>): void {
if (this.shouldLog('info')) {
this.output(this.formatEntry('info', message, data));
}
}
warn(message: string, data?: Record<string, unknown>): void {
if (this.shouldLog('warn')) {
this.output(this.formatEntry('warn', message, data));
}
}
error(message: string, data?: Record<string, unknown>): void {
if (this.shouldLog('error')) {
this.output(this.formatEntry('error', message, data));
}
}
withContext(context: Record<string, unknown>): Logger {
const childLogger = new Logger(this.service);
const parentLog = this.formatEntry.bind(this);
childLogger.formatEntry = (level: LogLevel, message: string, data?: Record<string, unknown>) => {
return parentLog(level, message, { ...context, ...data });
};
return childLogger;
}
}
export const logger = new Logger();
export default logger;