docs: add cross-platform setup and wscraper usage examples
This commit is contained in:
61
README.md
61
README.md
@@ -1,6 +1,17 @@
|
||||
# HappyFappy Bookmarks Scraper
|
||||
# wscraper
|
||||
|
||||
## Setup
|
||||
HappyFappy için komutlar `wscraper.py` üzerinden çalışır. Proje ileride başka siteleri de destekleyecek şekilde yapılandırılmıştır.
|
||||
|
||||
## 1) Repo Clone
|
||||
|
||||
```bash
|
||||
git clone <REPO_URL>
|
||||
cd <REPO_FOLDER>
|
||||
```
|
||||
|
||||
## 2) Kurulum
|
||||
|
||||
### macOS / Linux
|
||||
|
||||
```bash
|
||||
python3.12 -m venv .venv
|
||||
@@ -10,17 +21,47 @@ python -m pip install -r requirements.txt
|
||||
scrapling install
|
||||
```
|
||||
|
||||
## Run
|
||||
### Windows (PowerShell)
|
||||
|
||||
```bash
|
||||
.venv/bin/python scrape_happyfappy_bookmarks.py --cookie-file cookies.txt --output bookmarks.json
|
||||
```powershell
|
||||
py -3.12 -m venv .venv
|
||||
.venv\Scripts\Activate.ps1
|
||||
python -m pip install -U pip
|
||||
python -m pip install -r requirements.txt
|
||||
scrapling install
|
||||
```
|
||||
|
||||
## Download Single Torrent
|
||||
### Windows (CMD)
|
||||
|
||||
```bat
|
||||
py -3.12 -m venv .venv
|
||||
.venv\Scripts\activate.bat
|
||||
python -m pip install -U pip
|
||||
python -m pip install -r requirements.txt
|
||||
scrapling install
|
||||
```
|
||||
|
||||
Not: Ortamı aktive ettikten sonra komutları `python ...` şeklinde çalıştırman yeterli, `.venv/bin/python` yazmak zorunda değilsin.
|
||||
|
||||
## 3) HappyFappy Komutları
|
||||
|
||||
### Bookmarks Çekme
|
||||
|
||||
```bash
|
||||
.venv/bin/python download_happyfappy_torrent.py \
|
||||
--url "https://www.happyfappy.net/torrents.php?id=110178" \
|
||||
--cookie-file cookies.txt \
|
||||
--output-dir torrent
|
||||
python wscraper.py happyfappy --action get-bookmarks -c cookies.txt -o bookmarks.json
|
||||
```
|
||||
|
||||
### Torrent Dosyası İndirme
|
||||
|
||||
```bash
|
||||
python wscraper.py happyfappy --action download-torrent-files -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
|
||||
```
|
||||
|
||||
## 4) Kısa Alias Kullanımı
|
||||
|
||||
```bash
|
||||
# site alias: hf
|
||||
# action alias: gb (get-bookmarks), dtf (download-torrent-files)
|
||||
python wscraper.py hf -a gb -c cookies.txt -o bookmarks.json
|
||||
python wscraper.py hf -a dtf -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
|
||||
```
|
||||
|
||||
131
wscraper.py
Normal file
131
wscraper.py
Normal file
@@ -0,0 +1,131 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
|
||||
from download_happyfappy_torrent import run as run_happyfappy_download
|
||||
from scrape_happyfappy_bookmarks import run as run_happyfappy_bookmarks
|
||||
|
||||
|
||||
SITE_ALIASES = {
|
||||
"happyfappy": "happyfappy",
|
||||
"hf": "happyfappy",
|
||||
}
|
||||
|
||||
ACTION_ALIASES = {
|
||||
"get-bookmarks": "get-bookmarks",
|
||||
"gb": "get-bookmarks",
|
||||
"bookmarks": "get-bookmarks",
|
||||
"download-torrent-files": "download-torrent-files",
|
||||
"dtf": "download-torrent-files",
|
||||
"download": "download-torrent-files",
|
||||
}
|
||||
|
||||
|
||||
def normalize_site(value: str) -> str:
|
||||
key = value.strip().lower()
|
||||
if key not in SITE_ALIASES:
|
||||
supported = ", ".join(sorted(SITE_ALIASES))
|
||||
raise ValueError(f"Unsupported site: {value!r}. Supported values: {supported}")
|
||||
return SITE_ALIASES[key]
|
||||
|
||||
|
||||
def normalize_action(value: str) -> str:
|
||||
key = value.strip().lower()
|
||||
if key not in ACTION_ALIASES:
|
||||
supported = ", ".join(sorted(ACTION_ALIASES))
|
||||
raise ValueError(f"Unsupported action: {value!r}. Supported values: {supported}")
|
||||
return ACTION_ALIASES[key]
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="wscraper: multi-site scraping entrypoint",
|
||||
)
|
||||
parser.add_argument("site", help="Site key, e.g. happyfappy or hf")
|
||||
parser.add_argument("-a", "--action", required=True, help="Action to run")
|
||||
|
||||
parser.add_argument("--base-url", help="Override site base URL")
|
||||
parser.add_argument("--cookie", help='Raw cookie string, e.g. "a=1; b=2"')
|
||||
parser.add_argument("-c", "--cookie-file", help="Path to cookie file")
|
||||
|
||||
parser.add_argument("-u", "--url", help="Detail page URL (required for download action)")
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
help="Output target: file path for get-bookmarks, directory path for download-torrent-files",
|
||||
)
|
||||
|
||||
parser.add_argument("-r", "--retries", type=int, default=3)
|
||||
parser.add_argument("--backoff-base", type=float, default=5.0)
|
||||
|
||||
parser.add_argument("--delay-min", type=float, default=1.8)
|
||||
parser.add_argument("--delay-max", type=float, default=3.2)
|
||||
parser.add_argument("--max-pages", type=int, default=200)
|
||||
return parser
|
||||
|
||||
|
||||
def run_happyfappy(args: argparse.Namespace, action: str) -> None:
|
||||
base_url = args.base_url or "https://www.happyfappy.net"
|
||||
|
||||
if action == "get-bookmarks":
|
||||
bookmarks_args = argparse.Namespace(
|
||||
base_url=base_url,
|
||||
cookie=args.cookie,
|
||||
cookie_file=args.cookie_file,
|
||||
output=args.output or "bookmarks.json",
|
||||
delay_min=args.delay_min,
|
||||
delay_max=args.delay_max,
|
||||
retries=args.retries,
|
||||
backoff_base=args.backoff_base,
|
||||
max_pages=args.max_pages,
|
||||
)
|
||||
run_happyfappy_bookmarks(bookmarks_args)
|
||||
return
|
||||
|
||||
if action == "download-torrent-files":
|
||||
if not args.url:
|
||||
raise ValueError("--url is required for action=download-torrent-files.")
|
||||
download_args = argparse.Namespace(
|
||||
url=args.url,
|
||||
base_url=base_url,
|
||||
cookie=args.cookie,
|
||||
cookie_file=args.cookie_file,
|
||||
output_dir=args.output or "torrent",
|
||||
retries=args.retries,
|
||||
backoff_base=args.backoff_base,
|
||||
)
|
||||
run_happyfappy_download(download_args)
|
||||
return
|
||||
|
||||
raise ValueError(f"Unsupported action for happyfappy: {action}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = build_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.retries < 1:
|
||||
raise ValueError("--retries must be at least 1.")
|
||||
if args.backoff_base < 0:
|
||||
raise ValueError("--backoff-base must be >= 0.")
|
||||
if args.delay_min < 0 or args.delay_max < 0:
|
||||
raise ValueError("Delay values must be non-negative.")
|
||||
if args.delay_min > args.delay_max:
|
||||
raise ValueError("--delay-min cannot be greater than --delay-max.")
|
||||
|
||||
site = normalize_site(args.site)
|
||||
action = normalize_action(args.action)
|
||||
|
||||
if not args.cookie and not args.cookie_file:
|
||||
raise ValueError("Cookie is required. Use --cookie or --cookie-file/-c.")
|
||||
|
||||
if site == "happyfappy":
|
||||
run_happyfappy(args, action)
|
||||
return
|
||||
|
||||
raise ValueError(f"Unsupported site: {site}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user