feat: add browser-based torrent download and package-only Scrapling usage
This commit is contained in:
@@ -5,21 +5,12 @@ import argparse
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
try:
|
||||
from scrapling.fetchers import DynamicSession
|
||||
except ModuleNotFoundError:
|
||||
local_repo = Path(__file__).resolve().parent / "Scrapling"
|
||||
if local_repo.exists():
|
||||
sys.path.insert(0, str(local_repo))
|
||||
from scrapling.fetchers import DynamicSession
|
||||
else:
|
||||
raise
|
||||
from scrapling.fetchers import DynamicSession
|
||||
|
||||
STOP_TEXT = "You have not bookmarked any torrents."
|
||||
BG_URL_RE = re.compile(r"url\((?:'|\")?(.*?)(?:'|\")?\)")
|
||||
@@ -122,11 +113,13 @@ def extract_background_image(style: str) -> str | None:
|
||||
return value or None
|
||||
|
||||
|
||||
def extract_torrent_cards(response: Any) -> list[dict[str, Any]]:
|
||||
def extract_torrent_cards(response: Any, base_url: str) -> list[dict[str, Any]]:
|
||||
records: list[dict[str, Any]] = []
|
||||
cards = response.css("div.torrent_grid div.torrent_grid__torrent")
|
||||
for card in cards:
|
||||
page_url = (card.css('a[href^="/torrents.php?id="]::attr(href)').get("") or "").strip()
|
||||
if page_url and not page_url.startswith("http"):
|
||||
page_url = f"{base_url.rstrip('/')}{page_url}"
|
||||
category = (card.css("span.torrent_grid__torrent__cat::text").get("") or "").strip()
|
||||
title = (
|
||||
card.css("h3.trim::attr(title)").get("")
|
||||
@@ -220,7 +213,7 @@ def run(args: argparse.Namespace) -> None:
|
||||
if should_stop(response):
|
||||
break
|
||||
|
||||
page_records = extract_torrent_cards(response)
|
||||
page_records = extract_torrent_cards(response, args.base_url)
|
||||
all_records.extend(page_records)
|
||||
print(f"[page={page}] extracted={len(page_records)} total={len(all_records)}")
|
||||
page += 1
|
||||
|
||||
Reference in New Issue
Block a user