feat: add browser-based torrent download and package-only Scrapling usage

This commit is contained in:
2026-03-07 01:04:36 +03:00
parent 690733a224
commit bea3010839
3 changed files with 307 additions and 13 deletions

View File

@@ -5,21 +5,12 @@ import argparse
import json
import random
import re
import sys
import time
from pathlib import Path
from typing import Any
from urllib.parse import urlparse
try:
from scrapling.fetchers import DynamicSession
except ModuleNotFoundError:
local_repo = Path(__file__).resolve().parent / "Scrapling"
if local_repo.exists():
sys.path.insert(0, str(local_repo))
from scrapling.fetchers import DynamicSession
else:
raise
from scrapling.fetchers import DynamicSession
STOP_TEXT = "You have not bookmarked any torrents."
BG_URL_RE = re.compile(r"url\((?:'|\")?(.*?)(?:'|\")?\)")
@@ -122,11 +113,13 @@ def extract_background_image(style: str) -> str | None:
return value or None
def extract_torrent_cards(response: Any) -> list[dict[str, Any]]:
def extract_torrent_cards(response: Any, base_url: str) -> list[dict[str, Any]]:
records: list[dict[str, Any]] = []
cards = response.css("div.torrent_grid div.torrent_grid__torrent")
for card in cards:
page_url = (card.css('a[href^="/torrents.php?id="]::attr(href)').get("") or "").strip()
if page_url and not page_url.startswith("http"):
page_url = f"{base_url.rstrip('/')}{page_url}"
category = (card.css("span.torrent_grid__torrent__cat::text").get("") or "").strip()
title = (
card.css("h3.trim::attr(title)").get("")
@@ -220,7 +213,7 @@ def run(args: argparse.Namespace) -> None:
if should_stop(response):
break
page_records = extract_torrent_cards(response)
page_records = extract_torrent_cards(response, args.base_url)
all_records.extend(page_records)
print(f"[page={page}] extracted={len(page_records)} total={len(all_records)}")
page += 1