649 lines
19 KiB
Markdown
649 lines
19 KiB
Markdown
# MetaScraper API Reference
|
||
|
||
## 🎯 Main API
|
||
|
||
### `scraperNetflix(inputUrl, options?)`
|
||
|
||
Netflix metadata extraction function with automatic fallback and Turkish localization.
|
||
|
||
### `scraperPrime(inputUrl, options?)`
|
||
|
||
Amazon Prime Video metadata extraction function with automatic fallback and Turkish localization.
|
||
|
||
#### Parameters
|
||
|
||
| Parameter | Type | Required | Default | Description |
|
||
|-----------|------|----------|---------|-------------|
|
||
| `inputUrl` | `string` | ✅ | - | Netflix title URL (any format) |
|
||
| `options` | `object` | ❌ | `{}` | Configuration options |
|
||
|
||
#### Options
|
||
|
||
| Option | Type | Default | Description |
|
||
|--------|------|---------|-------------|
|
||
| `headless` | `boolean` | `true` | Enable Playwright fallback for missing data |
|
||
| `timeoutMs` | `number` | `15000` | Request timeout in milliseconds |
|
||
| `userAgent` | `string` | Chrome 118 User-Agent | Custom User-Agent string |
|
||
|
||
#### Returns
|
||
|
||
```typescript
|
||
Promise<{
|
||
url: string; // Normalized Netflix URL
|
||
id: string; // Netflix title ID
|
||
name: string; // Clean title (Turkish UI removed)
|
||
year: string \| number \| undefined; // Release year
|
||
seasons: string \| null; // Season info for TV series
|
||
thumbnail: string \| null; // Poster/thumbnail image URL
|
||
info: string \| null; // Content description/summary
|
||
genre: string \| null; // Genre (Turkish normalized)
|
||
}>
|
||
```
|
||
|
||
#### Examples
|
||
|
||
**Basic Usage**
|
||
```javascript
|
||
import { scraperNetflix } from 'metascraper';
|
||
|
||
const result = await scraperNetflix('https://www.netflix.com/tr/title/82123114');
|
||
console.log(result);
|
||
// {
|
||
// "url": "https://www.netflix.com/title/82123114",
|
||
// "id": "82123114",
|
||
// "name": "ONE SHOT with Ed Sheeran",
|
||
// "year": "2025",
|
||
// "seasons": null,
|
||
// "thumbnail": "https://occ-0-7335-778.1.nflxso.net/dnm/api/v6/6AYY37jfdO6hpXcMjf9Yu5cnmO0/AAAABSkrIGPSyEfSWYQzc8rEFo6EtVV6Ls8WtPpNwR42MSKSNPNomZWV5P_l2MxGuJEkoPm71UT_eBK_SsTEH8pRslQr0sjpdhVHjxh4.jpg",
|
||
// "info": "Ed Sheeran, matematiğin mucizevi gücünü ve müziğin birleştirici gücünü sergileyen benzersiz bir performansla sahneye çıkıyor.",
|
||
// "genre": "Belgesel"
|
||
// }
|
||
```
|
||
|
||
**Advanced Configuration**
|
||
```javascript
|
||
import { scraperNetflix } from 'metascraper';
|
||
|
||
const result = await scraperNetflix(
|
||
'https://www.netflix.com/title/80189685',
|
||
{
|
||
headless: false, // Disable browser fallback
|
||
timeoutMs: 30000, // 30 second timeout
|
||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||
}
|
||
);
|
||
```
|
||
|
||
**Error Handling**
|
||
```javascript
|
||
import { scraperNetflix } from 'metascraper';
|
||
|
||
try {
|
||
const result = await scraperNetflix('https://www.netflix.com/title/80189685');
|
||
console.log('Success:', result);
|
||
} catch (error) {
|
||
console.error('Scraping failed:', error.message);
|
||
// Turkish error messages for Turkish users
|
||
// "Netflix scraping başarısız: Netflix URL'i gereklidir."
|
||
}
|
||
```
|
||
|
||
### `scraperPrime(inputUrl, options?)`
|
||
|
||
Amazon Prime Video metadata extraction function with automatic fallback and Turkish localization.
|
||
|
||
#### Parameters
|
||
|
||
| Parameter | Type | Required | Default | Description |
|
||
|-----------|------|----------|---------|-------------|
|
||
| `inputUrl` | `string` | ✅ | - | Amazon Prime Video URL (any format) |
|
||
| `options` | `object` | ❌ | `{}` | Configuration options |
|
||
|
||
#### Options
|
||
|
||
| Option | Type | Default | Description |
|
||
|--------|------|---------|-------------|
|
||
| `headless` | `boolean` | `true` | Enable Playwright fallback for missing data |
|
||
| `timeoutMs` | `number` | `15000` | Request timeout in milliseconds |
|
||
| `userAgent` | `string` | Chrome 118 User-Agent | Custom User-Agent string |
|
||
|
||
#### Returns
|
||
|
||
```typescript
|
||
Promise<{
|
||
url: string; // Normalized Prime Video URL
|
||
id: string; // Prime Video content ID
|
||
name: string; // Clean title (Amazon UI removed)
|
||
year: string | number | undefined; // Release year
|
||
seasons: string | null; // Season info for TV series (null for movies)
|
||
thumbnail: string | null; // Poster/thumbnail image URL
|
||
info: string | null; // Content description/summary
|
||
genre: string | null; // Genre (Turkish normalized)
|
||
}>
|
||
```
|
||
|
||
#### Examples
|
||
|
||
**Basic Usage**
|
||
```javascript
|
||
import { scraperPrime } from 'metascraper';
|
||
|
||
const result = await scraperPrime('https://www.primevideo.com/-/tr/detail/0NHIN3TGAI9L7VZ45RS52RHUPL/ref=share_ios_movie');
|
||
console.log(result);
|
||
// {
|
||
// "url": "https://www.primevideo.com/detail/0NHIN3TGAI9L7VZ45RS52RHUPL",
|
||
// "id": "0NHIN3TGAI9L7VZ45RS52RHUPL",
|
||
// "name": "Little Women",
|
||
// "year": "2020",
|
||
// "seasons": null,
|
||
// "thumbnail": "https://m.media-amazon.com/images/S/pv-target-images/c1b08ebea5ba29c47145c623e7d1c586290221ec12fa93850029e581f54049c4.jpg",
|
||
// "info": "In the years after the Civil War, Jo March lives in New York and makes her living as a writer...",
|
||
// "genre": "Dram"
|
||
// }
|
||
```
|
||
|
||
**Advanced Configuration**
|
||
```javascript
|
||
import { scraperPrime } from 'metascraper';
|
||
|
||
const result = await scraperPrime(
|
||
'https://www.primevideo.com/detail/0NHIN3TGAI9L7VZ45RS52RHUPL',
|
||
{
|
||
headless: false, // Disable browser fallback
|
||
timeoutMs: 30000, // 30 second timeout
|
||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||
}
|
||
);
|
||
```
|
||
|
||
**Error Handling**
|
||
```javascript
|
||
import { scraperPrime } from 'metascraper';
|
||
|
||
try {
|
||
const result = await scraperPrime('https://www.primevideo.com/detail/0NHIN3TGAI9L7VZ45RS52RHUPL');
|
||
console.log('Success:', result);
|
||
} catch (error) {
|
||
console.error('Scraping failed:', error.message);
|
||
// Turkish error messages for Turkish users
|
||
// "Amazon Prime scraping başarısız: Amazon Prime URL'i gereklidir."
|
||
}
|
||
```
|
||
|
||
## 🧩 Internal APIs
|
||
|
||
### `parseNetflixHtml(html)` - Parser API
|
||
|
||
Parse Netflix HTML content to extract metadata without network requests.
|
||
|
||
#### Parameters
|
||
|
||
| Parameter | Type | Required | Description |
|
||
|-----------|------|----------|-------------|
|
||
| `html` | `string` | ✅ | Raw HTML content from Netflix page |
|
||
|
||
#### Returns
|
||
|
||
```typescript
|
||
{
|
||
name?: string; // Clean title
|
||
year?: string \| number; // Release year
|
||
seasons?: string \| null; // Season information
|
||
thumbnail?: string \| null; // Thumbnail image URL
|
||
info?: string \| null; // Content description
|
||
genre?: string \| null; // Genre information
|
||
}
|
||
```
|
||
|
||
#### Examples
|
||
|
||
```javascript
|
||
import { parseNetflixHtml } from 'metascraper/parser';
|
||
|
||
// With cached HTML
|
||
const fs = await import('node:fs');
|
||
const html = fs.readFileSync('netflix-page.html', 'utf8');
|
||
const metadata = parseNetflixHtml(html);
|
||
|
||
console.log(metadata);
|
||
// {
|
||
// "name": "The Witcher",
|
||
// "year": "2025",
|
||
// "seasons": "4 Sezon",
|
||
// "thumbnail": "https://occ-0-7335-778.1.nflxso.net/dnm/api/v6/6AYY37jfdO6hpXcMjf9Yu5cnmO0/AAAABSkrIGPSyEfSWYQzc8rEFo6EtVV6Ls8WtPpNwR42MSKSNPNomZWV5P_l2MxGuJEkoPm71UT_eBK_SsTEH8pRslQr0sjpdhVHjxh4.jpg",
|
||
// "info": "Mutasyona uğramış bir canavar avcısı olan Rivyalı Geralt, insanların çoğunlukla yaratıklardan daha uğursuz olduğu, karmaşa içindeki bir dünyada kaderine doğru yol alıyor.",
|
||
// "genre": "Aksiyon"
|
||
// }
|
||
```
|
||
|
||
### `fetchPageContentWithPlaywright(url, options)` - Headless API
|
||
|
||
Fetch Netflix page content using Playwright browser automation.
|
||
|
||
#### Parameters
|
||
|
||
| Parameter | Type | Required | Description |
|
||
|-----------|------|----------|-------------|
|
||
| `url` | `string` | ✅ | Complete URL to fetch |
|
||
| `options` | `object` | ✅ | Browser configuration |
|
||
|
||
#### Options
|
||
|
||
| Option | Type | Default | Description |
|
||
|--------|------|---------|-------------|
|
||
| `timeoutMs` | `number` | `15000` | Page load timeout |
|
||
| `userAgent` | `string` | Chrome 118 | Browser User-Agent |
|
||
| `headless` | `boolean` | `true` | Run browser in headless mode |
|
||
|
||
#### Returns
|
||
|
||
```typescript
|
||
Promise<string> // HTML content of the page
|
||
```
|
||
|
||
#### Examples
|
||
|
||
```javascript
|
||
import { fetchPageContentWithPlaywright } from 'metascraper/headless';
|
||
|
||
try {
|
||
const html = await fetchPageContentWithPlaywright(
|
||
'https://www.netflix.com/title/80189685',
|
||
{
|
||
timeoutMs: 30000,
|
||
headless: false // Show browser (useful for debugging)
|
||
}
|
||
);
|
||
|
||
// Process the HTML with parser
|
||
const metadata = parseNetflixHtml(html);
|
||
console.log(metadata);
|
||
} catch (error) {
|
||
console.error('Browser automation failed:', error.message);
|
||
}
|
||
```
|
||
|
||
### `parsePrimeHtml(html)` - Prime Video Parser API
|
||
|
||
Parse Amazon Prime Video HTML content to extract metadata without network requests.
|
||
|
||
#### Parameters
|
||
|
||
| Parameter | Type | Required | Description |
|
||
|-----------|------|----------|-------------|
|
||
| `html` | `string` | ✅ | Raw HTML content from Prime Video page |
|
||
|
||
#### Returns
|
||
|
||
```typescript
|
||
{
|
||
name?: string; // Clean title
|
||
year?: string | number; // Release year
|
||
seasons?: string | null; // Season information
|
||
thumbnail?: string | null; // Thumbnail image URL
|
||
info?: string | null; // Content description
|
||
genre?: string | null; // Genre information
|
||
}
|
||
```
|
||
|
||
#### Examples
|
||
|
||
```javascript
|
||
import { parsePrimeHtml } from 'metascraper/parser';
|
||
|
||
// With cached HTML
|
||
const fs = await import('node:fs');
|
||
const html = fs.readFileSync('prime-page.html', 'utf8');
|
||
const metadata = parsePrimeHtml(html);
|
||
|
||
console.log(metadata);
|
||
// {
|
||
// "name": "Little Women",
|
||
// "year": "2020",
|
||
// "seasons": null,
|
||
// "thumbnail": "https://m.media-amazon.com/images/S/pv-target-images/...",
|
||
// "info": "In the years after the Civil War, Jo March lives in New York...",
|
||
// "genre": "Dram"
|
||
// }
|
||
```
|
||
|
||
## 🔧 URL Processing
|
||
|
||
### Supported URL Formats
|
||
|
||
The `scraperNetflix` function automatically normalizes various Netflix URL formats:
|
||
|
||
| Input Format | Normalized Output | Notes |
|
||
|--------------|-------------------|-------|
|
||
| `https://www.netflix.com/title/80189685` | `https://www.netflix.com/title/80189685` | Standard format |
|
||
| `https://www.netflix.com/tr/title/80189685` | `https://www.netflix.com/title/80189685` | Turkish locale |
|
||
| `https://www.netflix.com/tr/title/80189685?s=i&trkid=264356104&vlang=tr` | `https://www.netflix.com/title/80189685` | With parameters |
|
||
| `https://www.netflix.com/title/80189685?trackId=12345` | `https://www.netflix.com/title/80189685` | With tracking |
|
||
|
||
### URL Validation
|
||
|
||
The function validates URLs with these rules:
|
||
|
||
1. **Format**: Must be a valid URL
|
||
2. **Domain**: Must contain `netflix.com`
|
||
3. **Path**: Must contain `title/` followed by numeric ID
|
||
4. **ID Extraction**: Uses regex to extract title ID
|
||
|
||
```javascript
|
||
// These will work:
|
||
'https://www.netflix.com/title/80189685'
|
||
'https://www.netflix.com/tr/title/80189685?s=i&vlang=tr'
|
||
|
||
// These will fail:
|
||
'https://google.com' // Wrong domain
|
||
'https://www.netflix.com/browse' // No title ID
|
||
'not-a-url' // Invalid format
|
||
'https://www.netflix.com/title/abc' // Non-numeric ID
|
||
```
|
||
|
||
### Amazon Prime Video URL Formats
|
||
|
||
The `scraperPrime` function automatically normalizes various Prime Video URL formats:
|
||
|
||
| Input Format | Normalized Output | Notes |
|
||
|--------------|-------------------|-------|
|
||
| `https://www.primevideo.com/detail/0NHIN3TGAI9L7VZ45RS52RHUPL` | `https://www.primevideo.com/detail/0NHIN3TGAI9L7VZ45RS52RHUPL` | Standard format |
|
||
| `https://www.primevideo.com/-/tr/detail/0NHIN3TGAI9L7VZ45RS52RHUPL/ref=share_ios_movie` | `https://www.primevideo.com/detail/0NHIN3TGAI9L7VZ45RS52RHUPL` | Turkish locale with tracking |
|
||
| `https://www.primevideo.com/detail/0NHIN3TGAI9L7VZ45RS52RHUPL?ref_=atv_dp` | `https://www.primevideo.com/detail/0NHIN3TGAI9L7VZ45RS52RHUPL` | With parameters |
|
||
|
||
### Prime Video URL Validation
|
||
|
||
The function validates URLs with these rules:
|
||
|
||
1. **Format**: Must be a valid URL
|
||
2. **Domain**: Must contain `primevideo.com`
|
||
3. **Path**: Must contain `detail/` followed by content ID
|
||
4. **ID Extraction**: Uses path parsing to extract content ID
|
||
|
||
```javascript
|
||
// These will work:
|
||
'https://www.primevideo.com/detail/0NHIN3TGAI9L7VZ45RS52RHUPL'
|
||
'https://www.primevideo.com/-/tr/detail/0NHIN3TGAI9L7VZ45RS52RHUPL/ref=share_ios_movie'
|
||
|
||
// These will fail:
|
||
'https://google.com' // Wrong domain
|
||
'https://www.primevideo.com/browse' // No content ID
|
||
'not-a-url' // Invalid format
|
||
```
|
||
|
||
## 🌍 Localization Features
|
||
|
||
### Turkish UI Text Removal
|
||
|
||
The parser automatically removes Turkish Netflix UI text from titles:
|
||
|
||
| Original Title | Cleaned Title | Removed Pattern |
|
||
|----------------|---------------|-----------------|
|
||
| "The Witcher izlemenizi bekliyor" | "The Witcher | `izlemenizi bekliyor` |
|
||
| "Stranger Things izleyin" | "Stranger Things" | `izleyin` |
|
||
| "Sezon 4 devam et" | "Sezon 4" | `devam et` |
|
||
| "Dark başla" | "Dark" | `başla` |
|
||
| "The Crown izlemeye devam" | "The Crown" | `izlemeye devam` |
|
||
|
||
### Supported Turkish Patterns
|
||
|
||
```javascript
|
||
const TURKISH_UI_PATTERNS = [
|
||
/\s+izlemenizi bekliyor$/i, // "waiting for you to watch"
|
||
/\s+izleyin$/i, // "watch"
|
||
/\s+devam et$/i, // "continue"
|
||
/\s+başla$/i, // "start"
|
||
/\s+izlemeye devam$/i, // "continue watching"
|
||
/\s+Sezon\s+\d+.*izlemeye devam$/i, // "Sezon X izlemeye devam"
|
||
/\s+Sezon\s+\d+.*başla$/i, // "Sezon X başla"
|
||
];
|
||
```
|
||
|
||
### English UI Pattern Removal
|
||
|
||
Also removes universal English UI text:
|
||
|
||
| Original Title | Cleaned Title | Removed Pattern |
|
||
|----------------|---------------|-----------------|
|
||
| "Watch Now The Witcher" | "The Witcher" | `Watch Now` |
|
||
| "The Witcher Continue Watching" | "The Witcher" | `Continue Watching` |
|
||
| "Season 4 Play" | "Season 4" | `Season X Play` |
|
||
|
||
## 📊 Data Extraction Patterns
|
||
|
||
### JSON-LD Processing
|
||
|
||
The parser extracts metadata from JSON-LD structured data:
|
||
|
||
```javascript
|
||
// Looks for these JSON-LD fields:
|
||
const YEAR_FIELDS = [
|
||
'datePublished', 'startDate', 'uploadDate',
|
||
'copyrightYear', 'releasedEvent', 'releaseYear', 'dateCreated'
|
||
];
|
||
|
||
const SEASON_TYPES = ['TVSeries', 'TVShow', 'Series'];
|
||
```
|
||
|
||
### Meta Tag Fallbacks
|
||
|
||
If JSON-LD is unavailable, falls back to HTML meta tags:
|
||
|
||
```html
|
||
<meta property="og:title" content="The Witcher izlemenizi bekliyor | Netflix">
|
||
<meta name="title" content="The Witcher | Netflix">
|
||
<title>The Witcher izlemenizi bekliyor | Netflix</title>
|
||
```
|
||
|
||
### Thumbnail Image Extraction
|
||
|
||
The parser automatically extracts poster/thumbnail images from Netflix meta tags:
|
||
|
||
```javascript
|
||
// Thumbnail selectors in priority order:
|
||
const THUMBNAIL_SELECTORS = [
|
||
'meta[property="og:image"]', // Open Graph image (primary)
|
||
'meta[name="twitter:image"]', // Twitter card image
|
||
'meta[property="og:image:secure_url"]', // Secure image URL
|
||
'link[rel="image_src"]', // Image source link
|
||
'meta[itemprop="image"]' // Schema.org image
|
||
];
|
||
```
|
||
|
||
**Example Netflix HTML:**
|
||
```html
|
||
<meta property="og:image" content="https://occ-0-7335-778.1.nflxso.net/dnm/api/v6/6AYY37jfdO6hpXcMjf9Yu5cnmO0/AAAABSkrIGPSyEfSWYQzc8rEFo6EtVV6Ls8WtPpNwR42MSKSNPNomZWV5P_l2MxGuJEkoPm71UT_eBK_SsTEH8pRslQr0sjpdhVHjxh4.jpg">
|
||
```
|
||
|
||
**URL Validation:**
|
||
- Only Netflix CDN domains are accepted (nflxso.net, nflximg.net, etc.)
|
||
- Image file extensions are verified (.jpg, .jpeg, .png, .webp)
|
||
- Query parameters are cleaned for stability
|
||
|
||
**Fallback Strategy:**
|
||
1. Try Open Graph image first (most reliable)
|
||
2. Fall back to Twitter card image
|
||
3. Try other meta tags if needed
|
||
4. Return null if no valid thumbnail found
|
||
|
||
### Season Detection
|
||
|
||
For TV series, extracts season information:
|
||
|
||
```javascript
|
||
// Example JSON-LD for TV series:
|
||
{
|
||
"@type": "TVSeries",
|
||
"name": "The Witcher",
|
||
"numberOfSeasons": 4,
|
||
"datePublished": "2025"
|
||
}
|
||
|
||
// Result: "4 Sezon"
|
||
```
|
||
|
||
## ⚡ Performance Characteristics
|
||
|
||
### Response Times by Mode
|
||
|
||
| Mode | Typical Response | Success Rate | Resource Usage |
|
||
|------|------------------|--------------|----------------|
|
||
| Static Only | 200-500ms | ~85% | Very Low |
|
||
| Static + Headless Fallback | 2-5s | ~95% | Medium |
|
||
| Headless Only | 2-3s | ~90% | High |
|
||
|
||
### Resource Requirements
|
||
|
||
**Static Mode:**
|
||
- CPU: Low (< 5%)
|
||
- Memory: < 20MB
|
||
- Network: 1 HTTP request
|
||
|
||
**Headless Mode:**
|
||
- CPU: Medium (10-20%)
|
||
- Memory: 100-200MB
|
||
- Network: Multiple requests
|
||
- Browser: Chromium instance
|
||
|
||
## 🚨 Error Types & Handling
|
||
|
||
### Common Error Scenarios
|
||
|
||
#### 1. Invalid URL
|
||
```javascript
|
||
await scraperNetflix('invalid-url');
|
||
// Throws: "Geçersiz URL sağlandı."
|
||
```
|
||
|
||
#### 2. Non-Netflix URL
|
||
```javascript
|
||
await scraperNetflix('https://google.com');
|
||
// Throws: "URL netflix.com adresini göstermelidir."
|
||
```
|
||
|
||
#### 3. Missing Title ID
|
||
```javascript
|
||
await scraperNetflix('https://www.netflix.com/browse');
|
||
// Throws: "URL'de Netflix başlık ID'si bulunamadı."
|
||
```
|
||
|
||
#### 4. Network Timeout
|
||
```javascript
|
||
await scraperNetflix('https://www.netflix.com/title/80189685', { timeoutMs: 1 });
|
||
// Throws: "Request timed out while reaching Netflix."
|
||
```
|
||
|
||
#### 5. 404 Not Found
|
||
```javascript
|
||
await scraperNetflix('https://www.netflix.com/title/99999999');
|
||
// Throws: "Netflix title not found (404)."
|
||
```
|
||
|
||
#### 6. Playwright Not Available
|
||
```javascript
|
||
// When headless mode needed but Playwright not installed
|
||
// Throws: "Playwright is not installed. Install the optional dependency..."
|
||
```
|
||
|
||
#### 7. Parsing Failed
|
||
```javascript
|
||
// When HTML cannot be parsed for metadata
|
||
// Throws: "Netflix sayfa meta verisi parse edilemedi."
|
||
```
|
||
|
||
### Error Object Structure
|
||
|
||
```javascript
|
||
{
|
||
name: "Error",
|
||
message: "Netflix scraping başarısız: Geçersiz URL sağlandı.",
|
||
stack: "Error: Netflix scraping başarısız: Geçersiz URL sağlandı.\n at scraperNetflix...",
|
||
// Additional context for debugging
|
||
}
|
||
```
|
||
|
||
## 🔧 Advanced Usage Patterns
|
||
|
||
### Batch Processing
|
||
|
||
```javascript
|
||
import { scraperNetflix } from 'metascraper';
|
||
|
||
const urls = [
|
||
'https://www.netflix.com/title/80189685',
|
||
'https://www.netflix.com/title/82123114',
|
||
'https://www.netflix.com/title/70177057'
|
||
];
|
||
|
||
const results = await Promise.allSettled(
|
||
urls.map(url => scraperNetflix(url))
|
||
);
|
||
|
||
results.forEach((result, index) => {
|
||
if (result.status === 'fulfilled') {
|
||
console.log(`✅ ${urls[index]}:`, result.value.name);
|
||
} else {
|
||
console.log(`❌ ${urls[index]}:`, result.reason.message);
|
||
}
|
||
});
|
||
```
|
||
|
||
### Custom User-Agent Rotation
|
||
|
||
```javascript
|
||
const userAgents = [
|
||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
|
||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36'
|
||
];
|
||
|
||
const getRandomUA = () => userAgents[Math.floor(Math.random() * userAgents.length)];
|
||
|
||
const result = await scraperNetflix(url, {
|
||
userAgent: getRandomUA()
|
||
});
|
||
```
|
||
|
||
### Retry Logic Implementation
|
||
|
||
```javascript
|
||
async function scrapeWithRetry(url, maxRetries = 3) {
|
||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||
try {
|
||
return await scraperNetflix(url);
|
||
} catch (error) {
|
||
if (attempt === maxRetries) throw error;
|
||
|
||
console.log(`Attempt ${attempt} failed, retrying in ${attempt * 1000}ms...`);
|
||
await new Promise(resolve => setTimeout(resolve, attempt * 1000));
|
||
}
|
||
}
|
||
}
|
||
```
|
||
|
||
### Caching Integration
|
||
|
||
```javascript
|
||
const cache = new Map();
|
||
|
||
async function scrapeWithCache(url) {
|
||
const cacheKey = `netflix:${url}`;
|
||
|
||
if (cache.has(cacheKey)) {
|
||
console.log('Cache hit for:', url);
|
||
return cache.get(cacheKey);
|
||
}
|
||
|
||
const result = await scraperNetflix(url);
|
||
cache.set(cacheKey, result);
|
||
|
||
// Optional: Cache expiration
|
||
setTimeout(() => cache.delete(cacheKey), 30 * 60 * 1000); // 30 minutes
|
||
|
||
return result;
|
||
}
|
||
```
|
||
|
||
---
|
||
|
||
*API documentation last updated: 2025-11-23* |