diff --git a/src/parser.js b/src/parser.js index f0b759b..8ca6e02 100644 --- a/src/parser.js +++ b/src/parser.js @@ -368,6 +368,20 @@ const PRIME_THUMBNAIL_SELECTORS = [ 'img[alt*="poster"]' ]; +const PRIME_16x9_STYLE_REGEX = + /aspect-ratio:\s*16\/9[^;{]*[^}]*(?:url|background-image)\((['"]?)(https?:\/\/[^"')]+)\1\)/gi; +const PRIME_IMAGE_REGEX = /https:\/\/m\.media-amazon\.com\/images\/S\/pv-target-images\/[^\s"')]+/gi; + +const scorePrimeImage = (url) => { + let score = 0; + if (/_SX1080_/i.test(url) || /_UX1080_/i.test(url)) score += 60; + if (/_SX\d{3,4}_/i.test(url) || /_UX\d{3,4}_/i.test(url)) score += 30; + if (/_FMjpe?g_/i.test(url)) score += 10; + if (/pv-target-images/.test(url)) score += 10; + if (/16x9|16\/9/i.test(url)) score += 5; + return score; +}; + const PRIME_DESCRIPTION_SELECTORS = [ 'meta[name="description"]', 'meta[property="og:description"]', @@ -444,20 +458,43 @@ function extractPrimeYear($, html) { * Extract thumbnail from Amazon Prime page */ function extractPrimeThumbnail($, html) { + const candidates = []; + for (const selector of PRIME_THUMBNAIL_SELECTORS) { const imageUrl = $(selector).attr('content') || $(selector).attr('src'); if (imageUrl && isValidPrimeThumbnail(imageUrl)) { - return imageUrl; + candidates.push(imageUrl); + } + } + + // Try to extract 16:9 images from inline styles + for (const match of html.matchAll(PRIME_16x9_STYLE_REGEX)) { + if (match[2] && isValidPrimeThumbnail(match[2])) { + candidates.push(match[2]); } } // Try to extract from embedded JSON data const jsonMatch = html.match(/"heroImageUrl":"([^"]+)"/); if (jsonMatch && jsonMatch[1]) { - return jsonMatch[1].replace(/\\u002F/g, '/'); + candidates.push(jsonMatch[1].replace(/\\u002F/g, '/')); } - return undefined; + // Extract any pv-target-images occurrences + for (const match of html.matchAll(PRIME_IMAGE_REGEX)) { + if (match[0]) { + candidates.push(match[0]); + } + } + + if (!candidates.length) return undefined; + + const unique = Array.from(new Set(candidates)); + const best = unique + .map((url) => ({ url, score: scorePrimeImage(url) })) + .sort((a, b) => b.score - a.score)[0]; + + return best?.url; } /**