diff --git a/README.md b/README.md index dc96e60..50ca743 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Gitea - Python + Python E2E Tests Platform Layout @@ -24,7 +24,7 @@ cd ### macOS / Linux ```bash -python3.12 -m venv .venv +python3 -m venv .venv source .venv/bin/activate python -m pip install -U pip python -m pip install -e . @@ -34,7 +34,7 @@ scrapling install ### Windows (PowerShell) ```powershell -py -3.12 -m venv .venv +py -3.11 -m venv .venv .venv\Scripts\Activate.ps1 python -m pip install -U pip python -m pip install -e . @@ -44,7 +44,7 @@ scrapling install ### Windows (CMD) ```bat -py -3.12 -m venv .venv +py -3.11 -m venv .venv .venv\Scripts\activate.bat python -m pip install -U pip python -m pip install -e . diff --git a/pyproject.toml b/pyproject.toml index 4dbca05..fe5f46c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools", "wheel"] +requires = ["setuptools"] build-backend = "setuptools.build_meta" [project] @@ -7,7 +7,7 @@ name = "wscraper" version = "0.1.0" description = "Multi-site scraper CLI" readme = "README.md" -requires-python = ">=3.12" +requires-python = ">=3.11" dependencies = [ "scrapling[fetchers]==0.4.1", ] diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..6068493 --- /dev/null +++ b/setup.py @@ -0,0 +1,3 @@ +from setuptools import setup + +setup() diff --git a/src/wscraper/sites/happyfappy.py b/src/wscraper/sites/happyfappy.py index 339570a..c01abfb 100644 --- a/src/wscraper/sites/happyfappy.py +++ b/src/wscraper/sites/happyfappy.py @@ -4,6 +4,7 @@ import argparse import json import random import re +import socket import time from pathlib import Path from typing import Any @@ -101,6 +102,44 @@ def absolute_url(base_url: str, href: str) -> str: return f"{base_url.rstrip('/')}/{href}" +def ensure_hosts_entry(host: str) -> None: + try: + ip = socket.gethostbyname(host) + except OSError: + return + + hosts_path = Path("/etc/hosts") + try: + current = hosts_path.read_text(encoding="utf-8") + except OSError: + return + + if re.search(rf"(^|\s){re.escape(host)}(\s|$)", current, flags=re.MULTILINE): + return + + try: + with hosts_path.open("a", encoding="utf-8") as handle: + handle.write(f"\n{ip} {host}\n") + except OSError: + return + + +def ensure_tracker_hosts(base_url: str) -> None: + parsed = urlparse(base_url) + host = parsed.hostname + if not host: + return + + variants = {host} + if host.startswith("www."): + variants.add(host[4:]) + else: + variants.add(f"www.{host}") + + for candidate in variants: + ensure_hosts_entry(candidate) + + def fetch_dynamic_with_retry(session: Any, url: str, retries: int, backoff_base: float) -> Any: last_error: Exception | None = None for attempt in range(retries): @@ -172,6 +211,7 @@ def build_bookmarks_url(base_url: str, page: int) -> str: def run_get_bookmarks(args: argparse.Namespace) -> None: target_host = urlparse(args.base_url).hostname or "www.happyfappy.net" + ensure_tracker_hosts(args.base_url) cookie_value = args.cookie or "" if not cookie_value and args.cookie_file: @@ -316,6 +356,7 @@ def validate_torrent_response(download_url: str, filename: str, data: bytes) -> def run_download_torrent_files(args: argparse.Namespace) -> None: base_url = args.base_url.rstrip("/") target_host = urlparse(base_url).hostname or "www.happyfappy.net" + ensure_tracker_hosts(base_url) cookie_value = args.cookie or "" if not cookie_value and args.cookie_file: