first commit
This commit is contained in:
627
doc/TESTING.md
Normal file
627
doc/TESTING.md
Normal file
@@ -0,0 +1,627 @@
|
||||
# MetaScraper Testing Guide
|
||||
|
||||
## 🧪 Testing Philosophy
|
||||
|
||||
MetaScraper follows a comprehensive testing strategy that ensures reliability, performance, and maintainability:
|
||||
|
||||
- **Integration First**: Focus on end-to-end functionality
|
||||
- **Live Data Testing**: Test against real Netflix pages
|
||||
- **Performance Awareness**: Monitor response times and resource usage
|
||||
- **Error Coverage**: Test failure scenarios and edge cases
|
||||
- **Localization Testing**: Verify Turkish UI text removal
|
||||
|
||||
## 📋 Test Structure
|
||||
|
||||
### Test Categories
|
||||
|
||||
```
|
||||
tests/
|
||||
├── scrape.test.js # Main integration tests
|
||||
├── unit/ # Unit tests (future)
|
||||
│ ├── parser.test.js # Parser function tests
|
||||
│ ├── url-normalizer.test.js # URL normalization tests
|
||||
│ └── title-cleaner.test.js # Title cleaning tests
|
||||
├── integration/ # Integration tests (current)
|
||||
│ ├── live-scraping.test.js # Real Netflix URL tests
|
||||
│ └── headless-fallback.test.js # Browser fallback tests
|
||||
├── performance/ # Performance benchmarks (future)
|
||||
│ ├── response-times.test.js # Timing tests
|
||||
│ └── concurrent.test.js # Multiple request tests
|
||||
├── fixtures/ # Test data
|
||||
│ ├── sample-title.html # Sample Netflix HTML
|
||||
│ ├── turkish-ui.json # Turkish UI patterns
|
||||
│ └── test-urls.json # Test URL collection
|
||||
└── helpers/ # Test utilities (future)
|
||||
├── mock-data.js # Mock HTML generators
|
||||
└── test-utils.js # Common test helpers
|
||||
```
|
||||
|
||||
## 🏗️ Current Test Implementation
|
||||
|
||||
### Main Test Suite: `tests/scrape.test.js`
|
||||
|
||||
```javascript
|
||||
import { beforeAll, describe, expect, it } from 'vitest';
|
||||
import { scraperNetflix } from '../src/index.js';
|
||||
import { parseNetflixHtml } from '../src/parser.js';
|
||||
|
||||
const TEST_URL = 'https://www.netflix.com/title/80189685'; // The Witcher
|
||||
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36';
|
||||
|
||||
let liveHtml = '';
|
||||
|
||||
beforeAll(async () => {
|
||||
// Fetch real Netflix page for testing
|
||||
const res = await fetch(TEST_URL, {
|
||||
headers: {
|
||||
'User-Agent': UA,
|
||||
Accept: 'text/html,application/xhtml+xml'
|
||||
}
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`Live fetch başarısız: ${res.status}`);
|
||||
}
|
||||
|
||||
liveHtml = await res.text();
|
||||
}, 20000); // 20 second timeout for network requests
|
||||
```
|
||||
|
||||
### Test Coverage Areas
|
||||
|
||||
#### 1. HTML Parsing Tests
|
||||
|
||||
```javascript
|
||||
describe('parseNetflixHtml (canlı sayfa)', () => {
|
||||
it(
|
||||
'static HTML\'den en az isim ve yıl bilgisini okur',
|
||||
() => {
|
||||
const meta = parseNetflixHtml(liveHtml);
|
||||
expect(meta.name).toBeTruthy();
|
||||
expect(String(meta.name).toLowerCase()).toContain('witcher');
|
||||
expect(meta.year).toMatch(/\d{4}/);
|
||||
},
|
||||
20000
|
||||
);
|
||||
});
|
||||
```
|
||||
|
||||
#### 2. End-to-End Scraping Tests
|
||||
|
||||
```javascript
|
||||
describe('scraperNetflix (canlı istek)', () => {
|
||||
it(
|
||||
'normalize edilmiş url, id ve meta bilgilerini döner',
|
||||
async () => {
|
||||
const meta = await scraperNetflix(TEST_URL, { headless: false, userAgent: UA });
|
||||
expect(meta.url).toBe('https://www.netflix.com/title/80189685');
|
||||
expect(meta.id).toBe('80189685');
|
||||
expect(meta.name).toBeTruthy();
|
||||
expect(String(meta.name).toLowerCase()).toContain('witcher');
|
||||
expect(meta.year).toMatch(/\d{4}/);
|
||||
},
|
||||
20000
|
||||
);
|
||||
});
|
||||
```
|
||||
|
||||
## 🧪 Running Tests
|
||||
|
||||
### Basic Test Commands
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
npm test
|
||||
|
||||
# Run tests in watch mode
|
||||
npm test -- --watch
|
||||
|
||||
# Run tests once
|
||||
npm test -- --run
|
||||
|
||||
# Run tests with coverage
|
||||
npm test -- --coverage
|
||||
|
||||
# Run specific test file
|
||||
npm test scrape.test.js
|
||||
|
||||
# Run tests matching pattern
|
||||
npm test -- --grep "Turkish"
|
||||
```
|
||||
|
||||
### Test Configuration
|
||||
|
||||
```javascript
|
||||
// vitest.config.js (if needed)
|
||||
import { defineConfig } from 'vitest/config';
|
||||
|
||||
export default defineConfig({
|
||||
test: {
|
||||
timeout: 30000, // 30 second timeout for network tests
|
||||
hookTimeout: 30000, // Timeout for beforeAll hooks
|
||||
environment: 'node', // Node.js environment
|
||||
globals: true, // Use global test functions
|
||||
coverage: {
|
||||
reporter: ['text', 'json'],
|
||||
exclude: [
|
||||
'node_modules/',
|
||||
'tests/',
|
||||
'doc/'
|
||||
]
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
## 📊 Test Data Management
|
||||
|
||||
### Live Test URLs
|
||||
|
||||
```javascript
|
||||
// tests/fixtures/test-urls.json
|
||||
[
|
||||
{
|
||||
"name": "The Witcher (TV Series)",
|
||||
"url": "https://www.netflix.com/title/80189685",
|
||||
"expected": {
|
||||
"type": "series",
|
||||
"hasSeasons": true,
|
||||
"titleContains": "witcher"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "ONE SHOT (Movie)",
|
||||
"url": "https://www.netflix.com/title/82123114",
|
||||
"expected": {
|
||||
"type": "movie",
|
||||
"hasSeasons": false,
|
||||
"titleContains": "one shot"
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### Sample HTML Fixtures
|
||||
|
||||
```html
|
||||
<!-- tests/fixtures/sample-title.html -->
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta property="og:title" content="The Witcher izlemenizi bekliyor | Netflix">
|
||||
<meta name="title" content="The Witcher | Netflix">
|
||||
<title>The Witcher izlemenizi bekliyor | Netflix</title>
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@type": "TVSeries",
|
||||
"name": "The Witcher izlemenizi bekliyor",
|
||||
"numberOfSeasons": 4,
|
||||
"datePublished": "2025"
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<!-- Netflix page content -->
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
|
||||
### Turkish UI Pattern Tests
|
||||
|
||||
```javascript
|
||||
// tests/fixtures/turkish-ui-patterns.json
|
||||
{
|
||||
"title_cleaning_tests": [
|
||||
{
|
||||
"input": "The Witcher izlemenizi bekliyor | Netflix",
|
||||
"expected": "The Witcher",
|
||||
"removed": "izlemenizi bekliyor | Netflix"
|
||||
},
|
||||
{
|
||||
"input": "Stranger Things izleyin",
|
||||
"expected": "Stranger Things",
|
||||
"removed": "izleyin"
|
||||
},
|
||||
{
|
||||
"input": "Sezon 4 devam et",
|
||||
"expected": "Sezon 4",
|
||||
"removed": "devam et"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## 🔧 Test Utilities
|
||||
|
||||
### Custom Test Helpers
|
||||
|
||||
```javascript
|
||||
// tests/helpers/test-utils.js
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
export function loadFixture(filename) {
|
||||
const fixturePath = path.join(__dirname, '../fixtures', filename);
|
||||
return fs.readFileSync(fixturePath, 'utf8');
|
||||
}
|
||||
|
||||
export function loadJSONFixture(filename) {
|
||||
const content = loadFixture(filename);
|
||||
return JSON.parse(content);
|
||||
}
|
||||
|
||||
export async function withTimeout(promise, timeoutMs = 5000) {
|
||||
const timeout = new Promise((_, reject) => {
|
||||
setTimeout(() => reject(new Error(`Test timeout after ${timeoutMs}ms`)), timeoutMs);
|
||||
});
|
||||
|
||||
return Promise.race([promise, timeout]);
|
||||
}
|
||||
|
||||
export function expectTurkishTitleClean(input, expected) {
|
||||
const result = cleanTitle(input);
|
||||
expect(result).toBe(expected);
|
||||
}
|
||||
```
|
||||
|
||||
### Mock Browser Automation
|
||||
|
||||
```javascript
|
||||
// tests/helpers/mock-playwright.js
|
||||
import { vi } from 'vitest';
|
||||
|
||||
export function mockPlaywrightSuccess(html) {
|
||||
vi.doMock('playwright', () => ({
|
||||
chromium: {
|
||||
launch: vi.fn(() => ({
|
||||
newContext: vi.fn(() => ({
|
||||
newPage: vi.fn(() => ({
|
||||
goto: vi.fn().mockResolvedValue(undefined),
|
||||
content: vi.fn().mockResolvedValue(html),
|
||||
waitForLoadState: vi.fn().mockResolvedValue(undefined)
|
||||
}))
|
||||
})),
|
||||
close: vi.fn().mockResolvedValue(undefined)
|
||||
}))
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
export function mockPlaywrightFailure() {
|
||||
vi.doMock('playwright', () => {
|
||||
throw new Error('Playwright not available');
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
## 🎯 Test Scenarios
|
||||
|
||||
### 1. URL Normalization Tests
|
||||
|
||||
```javascript
|
||||
describe('URL Normalization', () => {
|
||||
const testCases = [
|
||||
{
|
||||
input: 'https://www.netflix.com/tr/title/80189685?s=i&vlang=tr',
|
||||
expected: 'https://www.netflix.com/title/80189685',
|
||||
description: 'Turkish URL with parameters'
|
||||
},
|
||||
{
|
||||
input: 'https://www.netflix.com/title/80189685?trackId=12345',
|
||||
expected: 'https://www.netflix.com/title/80189685',
|
||||
description: 'URL with tracking parameters'
|
||||
}
|
||||
];
|
||||
|
||||
testCases.forEach(({ input, expected, description }) => {
|
||||
it(description, () => {
|
||||
const result = normalizeNetflixUrl(input);
|
||||
expect(result).toBe(expected);
|
||||
});
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 2. Turkish UI Text Removal Tests
|
||||
|
||||
```javascript
|
||||
describe('Turkish UI Text Cleaning', () => {
|
||||
const turkishCases = [
|
||||
{
|
||||
input: 'The Witcher izlemenizi bekliyor',
|
||||
expected: 'The Witcher',
|
||||
pattern: 'waiting for you to watch'
|
||||
},
|
||||
{
|
||||
input: 'Dark izleyin',
|
||||
expected: 'Dark',
|
||||
pattern: 'watch'
|
||||
},
|
||||
{
|
||||
input: 'Money Heist devam et',
|
||||
expected: 'Money Heist',
|
||||
pattern: 'continue'
|
||||
}
|
||||
];
|
||||
|
||||
turkishCases.forEach(({ input, expected, pattern }) => {
|
||||
it(`removes Turkish UI text: ${pattern}`, () => {
|
||||
expect(cleanTitle(input)).toBe(expected);
|
||||
});
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 3. JSON-LD Parsing Tests
|
||||
|
||||
```javascript
|
||||
describe('JSON-LD Metadata Extraction', () => {
|
||||
it('extracts movie metadata correctly', () => {
|
||||
const jsonLd = {
|
||||
'@type': 'Movie',
|
||||
'name': 'Inception',
|
||||
'datePublished': '2010',
|
||||
'copyrightYear': 2010
|
||||
};
|
||||
|
||||
const result = parseJsonLdObject(jsonLd);
|
||||
expect(result.name).toBe('Inception');
|
||||
expect(result.year).toBe(2010);
|
||||
expect(result.seasons).toBeUndefined();
|
||||
});
|
||||
|
||||
it('extracts TV series metadata with seasons', () => {
|
||||
const jsonLd = {
|
||||
'@type': 'TVSeries',
|
||||
'name': 'Stranger Things',
|
||||
'numberOfSeasons': 4,
|
||||
'datePublished': '2016'
|
||||
};
|
||||
|
||||
const result = parseJsonLdObject(jsonLd);
|
||||
expect(result.name).toBe('Stranger Things');
|
||||
expect(result.seasons).toBe('4 Sezon');
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 4. Error Handling Tests
|
||||
|
||||
```javascript
|
||||
describe('Error Handling', () => {
|
||||
it('throws error for invalid URL', async () => {
|
||||
await expect(scraperNetflix('invalid-url')).rejects.toThrow('Geçersiz URL sağlandı');
|
||||
});
|
||||
|
||||
it('throws error for non-Netflix URL', async () => {
|
||||
await expect(scraperNetflix('https://google.com')).rejects.toThrow('URL netflix.com adresini göstermelidir');
|
||||
});
|
||||
|
||||
it('throws error for URL without title ID', async () => {
|
||||
await expect(scraperNetflix('https://www.netflix.com/browse')).rejects.toThrow('URL\'de Netflix başlık ID\'si bulunamadı');
|
||||
});
|
||||
|
||||
it('handles network timeouts gracefully', async () => {
|
||||
await expect(scraperNetflix(TEST_URL, { timeoutMs: 1 })).rejects.toThrow('Request timed out');
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 5. Performance Tests
|
||||
|
||||
```javascript
|
||||
describe('Performance', () => {
|
||||
it('completes static scraping within 1 second', async () => {
|
||||
const start = performance.now();
|
||||
await scraperNetflix(TEST_URL, { headless: false });
|
||||
const duration = performance.now() - start;
|
||||
|
||||
expect(duration).toBeLessThan(1000);
|
||||
}, 10000);
|
||||
|
||||
it('handles concurrent requests efficiently', async () => {
|
||||
const urls = Array(5).fill(TEST_URL);
|
||||
const start = performance.now();
|
||||
|
||||
const results = await Promise.allSettled(
|
||||
urls.map(url => scraperNetflix(url, { headless: false }))
|
||||
);
|
||||
|
||||
const duration = performance.now() - start;
|
||||
const successful = results.filter(r => r.status === 'fulfilled').length;
|
||||
|
||||
expect(duration).toBeLessThan(3000); // Should be faster than sequential
|
||||
expect(successful).toBeGreaterThan(0); // At least some should succeed
|
||||
}, 30000);
|
||||
});
|
||||
```
|
||||
|
||||
## 🔍 Test Debugging
|
||||
|
||||
### 1. Visual HTML Inspection
|
||||
|
||||
```javascript
|
||||
// Save HTML for manual debugging
|
||||
it('captures HTML for debugging', async () => {
|
||||
const html = await fetchStaticHtml(TEST_URL);
|
||||
fs.writeFileSync('debug-netflix-page.html', html);
|
||||
console.log('HTML saved to debug-netflix-page.html');
|
||||
|
||||
expect(html).toContain('<html');
|
||||
expect(html).toContain('netflix');
|
||||
});
|
||||
```
|
||||
|
||||
### 2. Network Request Debugging
|
||||
|
||||
```javascript
|
||||
// Debug network requests
|
||||
it('logs network request details', async () => {
|
||||
const originalFetch = global.fetch;
|
||||
|
||||
global.fetch = async (url, options) => {
|
||||
console.log('🌐 Request URL:', url);
|
||||
console.log('📋 Headers:', options.headers);
|
||||
console.log('⏰ Time:', new Date().toISOString());
|
||||
|
||||
const response = await originalFetch(url, options);
|
||||
console.log('📊 Response status:', response.status);
|
||||
console.log('📏 Response size:', response.headers.get('content-length'));
|
||||
|
||||
return response;
|
||||
};
|
||||
|
||||
const result = await scraperNetflix(TEST_URL, { headless: false });
|
||||
|
||||
// Restore original fetch
|
||||
global.fetch = originalFetch;
|
||||
|
||||
expect(result.name).toBeTruthy();
|
||||
});
|
||||
```
|
||||
|
||||
### 3. Step-by-Step Processing
|
||||
|
||||
```javascript
|
||||
// Debug each step of the process
|
||||
it('logs processing steps', async () => {
|
||||
console.log('🚀 Starting Netflix scraping test');
|
||||
|
||||
// Step 1: URL normalization
|
||||
const normalized = normalizeNetflixUrl(TEST_URL);
|
||||
console.log('🔗 Normalized URL:', normalized);
|
||||
|
||||
// Step 2: HTML fetch
|
||||
const html = await fetchStaticHtml(normalized);
|
||||
console.log('📄 HTML length:', html.length);
|
||||
|
||||
// Step 3: Parsing
|
||||
const parsed = parseNetflixHtml(html);
|
||||
console.log('📊 Parsed metadata:', parsed);
|
||||
|
||||
// Step 4: Full process
|
||||
const fullResult = await scraperNetflix(TEST_URL);
|
||||
console.log('✅ Full result:', fullResult);
|
||||
|
||||
expect(fullResult.name).toBeTruthy();
|
||||
});
|
||||
```
|
||||
|
||||
## 📈 Continuous Testing
|
||||
|
||||
### GitHub Actions Workflow
|
||||
|
||||
```yaml
|
||||
# .github/workflows/test.yml
|
||||
name: Test Suite
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, develop ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
node-version: [18.x, 20.x, 22.x]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Use Node.js ${{ matrix.node-version }}
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
cache: 'npm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Install Playwright
|
||||
run: npx playwright install chromium
|
||||
|
||||
- name: Run tests
|
||||
run: npm test -- --coverage
|
||||
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
file: ./coverage/lcov.info
|
||||
```
|
||||
|
||||
### Pre-commit Hooks
|
||||
|
||||
```json
|
||||
// package.json
|
||||
{
|
||||
"husky": {
|
||||
"hooks": {
|
||||
"pre-commit": "npm test && npm run lint"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 🚨 Test Environment Considerations
|
||||
|
||||
### Network Dependencies
|
||||
|
||||
- **Live Tests**: Require internet connection to Netflix
|
||||
- **Timeouts**: Extended timeouts for network requests (30s+)
|
||||
- **Rate Limiting**: Be respectful to Netflix's servers
|
||||
- **Geographic**: Tests may behave differently by region
|
||||
|
||||
### Browser Dependencies
|
||||
|
||||
- **Playwright**: Optional dependency for headless tests
|
||||
- **Browser Installation**: Requires `npx playwright install`
|
||||
- **Memory**: Browser tests use more memory
|
||||
- **CI/CD**: Need to install browsers in CI environment
|
||||
|
||||
### Test Data Updates
|
||||
|
||||
- **Netflix Changes**: UI changes may break tests
|
||||
- **Pattern Updates**: Turkish UI patterns may change
|
||||
- **JSON-LD Structure**: Netflix may modify structured data
|
||||
- **URL Formats**: New URL patterns may emerge
|
||||
|
||||
## 📊 Test Metrics
|
||||
|
||||
### Success Criteria
|
||||
|
||||
- **Unit Tests**: 90%+ code coverage
|
||||
- **Integration Tests**: 100% API coverage
|
||||
- **Performance**: <1s response time for static mode
|
||||
- **Reliability**: 95%+ success rate for known URLs
|
||||
|
||||
### Test Monitoring
|
||||
|
||||
```javascript
|
||||
// Performance tracking
|
||||
const testMetrics = {
|
||||
staticScrapingTimes: [],
|
||||
headlessScrapingTimes: [],
|
||||
successRates: {},
|
||||
errorCounts: {}
|
||||
};
|
||||
|
||||
function recordMetric(type, value) {
|
||||
if (Array.isArray(testMetrics[type])) {
|
||||
testMetrics[type].push(value);
|
||||
} else {
|
||||
testMetrics[type][value] = (testMetrics[type][value] || 0) + 1;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
*Testing guide last updated: 2025-11-23*
|
||||
Reference in New Issue
Block a user