feat: q-buffer watcher akisini destekle
This commit is contained in:
@@ -5,7 +5,7 @@
|
|||||||
<a href="https://gitea.wisecolt-panda.net/wisecolt/Bookmark-Tracker">
|
<a href="https://gitea.wisecolt-panda.net/wisecolt/Bookmark-Tracker">
|
||||||
<img src="https://img.shields.io/badge/Gitea-Repository-609926?logo=gitea&logoColor=white" alt="Gitea">
|
<img src="https://img.shields.io/badge/Gitea-Repository-609926?logo=gitea&logoColor=white" alt="Gitea">
|
||||||
</a>
|
</a>
|
||||||
<img src="https://img.shields.io/badge/Python-3.12%2B-3776AB?logo=python&logoColor=white" alt="Python">
|
<img src="https://img.shields.io/badge/Python-3.11%2B-3776AB?logo=python&logoColor=white" alt="Python">
|
||||||
<img src="https://img.shields.io/badge/E2E_Tests-enabled-22c55e?logo=pytest&logoColor=white" alt="E2E Tests">
|
<img src="https://img.shields.io/badge/E2E_Tests-enabled-22c55e?logo=pytest&logoColor=white" alt="E2E Tests">
|
||||||
<img src="https://img.shields.io/badge/Platform-macOS%20%7C%20Linux%20%7C%20Windows-1f6feb" alt="Platform">
|
<img src="https://img.shields.io/badge/Platform-macOS%20%7C%20Linux%20%7C%20Windows-1f6feb" alt="Platform">
|
||||||
<img src="https://img.shields.io/badge/Layout-src%2F%20package-0A7B83" alt="Layout">
|
<img src="https://img.shields.io/badge/Layout-src%2F%20package-0A7B83" alt="Layout">
|
||||||
@@ -24,7 +24,7 @@ cd <REPO_FOLDER>
|
|||||||
### macOS / Linux
|
### macOS / Linux
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python3.12 -m venv .venv
|
python3 -m venv .venv
|
||||||
source .venv/bin/activate
|
source .venv/bin/activate
|
||||||
python -m pip install -U pip
|
python -m pip install -U pip
|
||||||
python -m pip install -e .
|
python -m pip install -e .
|
||||||
@@ -34,7 +34,7 @@ scrapling install
|
|||||||
### Windows (PowerShell)
|
### Windows (PowerShell)
|
||||||
|
|
||||||
```powershell
|
```powershell
|
||||||
py -3.12 -m venv .venv
|
py -3.11 -m venv .venv
|
||||||
.venv\Scripts\Activate.ps1
|
.venv\Scripts\Activate.ps1
|
||||||
python -m pip install -U pip
|
python -m pip install -U pip
|
||||||
python -m pip install -e .
|
python -m pip install -e .
|
||||||
@@ -44,7 +44,7 @@ scrapling install
|
|||||||
### Windows (CMD)
|
### Windows (CMD)
|
||||||
|
|
||||||
```bat
|
```bat
|
||||||
py -3.12 -m venv .venv
|
py -3.11 -m venv .venv
|
||||||
.venv\Scripts\activate.bat
|
.venv\Scripts\activate.bat
|
||||||
python -m pip install -U pip
|
python -m pip install -U pip
|
||||||
python -m pip install -e .
|
python -m pip install -e .
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[build-system]
|
[build-system]
|
||||||
requires = ["setuptools", "wheel"]
|
requires = ["setuptools"]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
@@ -7,7 +7,7 @@ name = "wscraper"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = "Multi-site scraper CLI"
|
description = "Multi-site scraper CLI"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.12"
|
requires-python = ">=3.11"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"scrapling[fetchers]==0.4.1",
|
"scrapling[fetchers]==0.4.1",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import argparse
|
|||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
import socket
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
@@ -101,6 +102,44 @@ def absolute_url(base_url: str, href: str) -> str:
|
|||||||
return f"{base_url.rstrip('/')}/{href}"
|
return f"{base_url.rstrip('/')}/{href}"
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_hosts_entry(host: str) -> None:
|
||||||
|
try:
|
||||||
|
ip = socket.gethostbyname(host)
|
||||||
|
except OSError:
|
||||||
|
return
|
||||||
|
|
||||||
|
hosts_path = Path("/etc/hosts")
|
||||||
|
try:
|
||||||
|
current = hosts_path.read_text(encoding="utf-8")
|
||||||
|
except OSError:
|
||||||
|
return
|
||||||
|
|
||||||
|
if re.search(rf"(^|\s){re.escape(host)}(\s|$)", current, flags=re.MULTILINE):
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
with hosts_path.open("a", encoding="utf-8") as handle:
|
||||||
|
handle.write(f"\n{ip} {host}\n")
|
||||||
|
except OSError:
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_tracker_hosts(base_url: str) -> None:
|
||||||
|
parsed = urlparse(base_url)
|
||||||
|
host = parsed.hostname
|
||||||
|
if not host:
|
||||||
|
return
|
||||||
|
|
||||||
|
variants = {host}
|
||||||
|
if host.startswith("www."):
|
||||||
|
variants.add(host[4:])
|
||||||
|
else:
|
||||||
|
variants.add(f"www.{host}")
|
||||||
|
|
||||||
|
for candidate in variants:
|
||||||
|
ensure_hosts_entry(candidate)
|
||||||
|
|
||||||
|
|
||||||
def fetch_dynamic_with_retry(session: Any, url: str, retries: int, backoff_base: float) -> Any:
|
def fetch_dynamic_with_retry(session: Any, url: str, retries: int, backoff_base: float) -> Any:
|
||||||
last_error: Exception | None = None
|
last_error: Exception | None = None
|
||||||
for attempt in range(retries):
|
for attempt in range(retries):
|
||||||
@@ -172,6 +211,7 @@ def build_bookmarks_url(base_url: str, page: int) -> str:
|
|||||||
|
|
||||||
def run_get_bookmarks(args: argparse.Namespace) -> None:
|
def run_get_bookmarks(args: argparse.Namespace) -> None:
|
||||||
target_host = urlparse(args.base_url).hostname or "www.happyfappy.net"
|
target_host = urlparse(args.base_url).hostname or "www.happyfappy.net"
|
||||||
|
ensure_tracker_hosts(args.base_url)
|
||||||
|
|
||||||
cookie_value = args.cookie or ""
|
cookie_value = args.cookie or ""
|
||||||
if not cookie_value and args.cookie_file:
|
if not cookie_value and args.cookie_file:
|
||||||
@@ -316,6 +356,7 @@ def validate_torrent_response(download_url: str, filename: str, data: bytes) ->
|
|||||||
def run_download_torrent_files(args: argparse.Namespace) -> None:
|
def run_download_torrent_files(args: argparse.Namespace) -> None:
|
||||||
base_url = args.base_url.rstrip("/")
|
base_url = args.base_url.rstrip("/")
|
||||||
target_host = urlparse(base_url).hostname or "www.happyfappy.net"
|
target_host = urlparse(base_url).hostname or "www.happyfappy.net"
|
||||||
|
ensure_tracker_hosts(base_url)
|
||||||
|
|
||||||
cookie_value = args.cookie or ""
|
cookie_value = args.cookie or ""
|
||||||
if not cookie_value and args.cookie_file:
|
if not cookie_value and args.cookie_file:
|
||||||
|
|||||||
Reference in New Issue
Block a user