169 lines
6.2 KiB
Python
169 lines
6.2 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import base64
|
|
import json
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
from http import HTTPStatus
|
|
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
WSCRAPER_SRC = REPO_ROOT / "bin" / "wscraper" / "src"
|
|
if str(WSCRAPER_SRC) not in sys.path:
|
|
sys.path.insert(0, str(WSCRAPER_SRC))
|
|
|
|
from wscraper.sites.happyfappy import run_download_torrent_files, run_get_bookmarks
|
|
|
|
HOST = os.environ.get("WSCRAPER_SERVICE_HOST", "0.0.0.0")
|
|
PORT = int(os.environ.get("WSCRAPER_SERVICE_PORT", "8787"))
|
|
TOKEN = os.environ.get("WSCRAPER_SERVICE_TOKEN", "")
|
|
|
|
|
|
def json_response(handler: BaseHTTPRequestHandler, status: int, payload: dict) -> None:
|
|
body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
|
handler.send_response(status)
|
|
handler.send_header("Content-Type", "application/json; charset=utf-8")
|
|
handler.send_header("Content-Length", str(len(body)))
|
|
handler.end_headers()
|
|
handler.wfile.write(body)
|
|
|
|
|
|
def parse_json_body(handler: BaseHTTPRequestHandler) -> dict:
|
|
length = int(handler.headers.get("Content-Length", "0"))
|
|
raw = handler.rfile.read(length) if length > 0 else b"{}"
|
|
return json.loads(raw.decode("utf-8"))
|
|
|
|
|
|
def require_auth(handler: BaseHTTPRequestHandler) -> bool:
|
|
if not TOKEN:
|
|
return True
|
|
auth_header = handler.headers.get("Authorization", "")
|
|
if auth_header == f"Bearer {TOKEN}":
|
|
return True
|
|
json_response(handler, HTTPStatus.UNAUTHORIZED, {"error": "Unauthorized"})
|
|
return False
|
|
|
|
|
|
def normalize_tracker(payload: dict) -> str:
|
|
tracker = str(payload.get("tracker", "")).strip().lower()
|
|
if tracker not in {"happyfappy", "hf"}:
|
|
raise ValueError("Unsupported tracker")
|
|
return "happyfappy"
|
|
|
|
|
|
class Handler(BaseHTTPRequestHandler):
|
|
server_version = "wscraper-service/1.0"
|
|
|
|
def do_GET(self) -> None: # noqa: N802
|
|
parsed = urlparse(self.path)
|
|
if parsed.path == "/health":
|
|
if not require_auth(self):
|
|
return
|
|
json_response(self, HTTPStatus.OK, {"ok": True, "service": "wscraper-service"})
|
|
return
|
|
if parsed.path == "/trackers":
|
|
if not require_auth(self):
|
|
return
|
|
json_response(
|
|
self,
|
|
HTTPStatus.OK,
|
|
{"items": [{"key": "happyfappy", "label": "HappyFappy"}]},
|
|
)
|
|
return
|
|
json_response(self, HTTPStatus.NOT_FOUND, {"error": "Not found"})
|
|
|
|
def do_POST(self) -> None: # noqa: N802
|
|
if not require_auth(self):
|
|
return
|
|
|
|
parsed = urlparse(self.path)
|
|
try:
|
|
payload = parse_json_body(self)
|
|
if parsed.path == "/bookmarks":
|
|
tracker = normalize_tracker(payload)
|
|
cookie = str(payload.get("cookie", "")).strip()
|
|
if not cookie:
|
|
raise ValueError("Cookie is required")
|
|
with tempfile.TemporaryDirectory(prefix="wscraper-bookmarks-") as tmpdir:
|
|
output_path = Path(tmpdir) / "bookmarks.json"
|
|
run_get_bookmarks(
|
|
argparse.Namespace(
|
|
base_url="https://www.happyfappy.net",
|
|
cookie=cookie,
|
|
cookie_file=None,
|
|
output=str(output_path),
|
|
delay_min=1.8,
|
|
delay_max=3.2,
|
|
retries=3,
|
|
backoff_base=5.0,
|
|
max_pages=200,
|
|
)
|
|
)
|
|
items = json.loads(output_path.read_text(encoding="utf-8"))
|
|
json_response(self, HTTPStatus.OK, {"tracker": tracker, "items": items})
|
|
return
|
|
|
|
if parsed.path == "/download":
|
|
tracker = normalize_tracker(payload)
|
|
cookie = str(payload.get("cookie", "")).strip()
|
|
detail_url = str(payload.get("url", "")).strip()
|
|
remove_bookmark = bool(payload.get("removeBookmark", True))
|
|
if not cookie:
|
|
raise ValueError("Cookie is required")
|
|
if not detail_url:
|
|
raise ValueError("Detail url is required")
|
|
with tempfile.TemporaryDirectory(prefix="wscraper-download-") as tmpdir:
|
|
output_dir = Path(tmpdir) / "torrent"
|
|
run_download_torrent_files(
|
|
argparse.Namespace(
|
|
url=detail_url,
|
|
base_url="https://www.happyfappy.net",
|
|
cookie=cookie,
|
|
cookie_file=None,
|
|
output_dir=str(output_dir),
|
|
rm_bookmark=remove_bookmark,
|
|
retries=3,
|
|
backoff_base=5.0,
|
|
)
|
|
)
|
|
files = sorted(output_dir.glob("*.torrent"))
|
|
if not files:
|
|
raise RuntimeError("No torrent file produced")
|
|
torrent_path = files[0]
|
|
content = base64.b64encode(torrent_path.read_bytes()).decode("ascii")
|
|
json_response(
|
|
self,
|
|
HTTPStatus.OK,
|
|
{
|
|
"tracker": tracker,
|
|
"filename": torrent_path.name,
|
|
"contentBase64": content,
|
|
},
|
|
)
|
|
return
|
|
|
|
json_response(self, HTTPStatus.NOT_FOUND, {"error": "Not found"})
|
|
except Exception as error: # noqa: BLE001
|
|
json_response(
|
|
self,
|
|
HTTPStatus.BAD_REQUEST,
|
|
{"error": str(error)},
|
|
)
|
|
|
|
def log_message(self, fmt: str, *args) -> None:
|
|
print(f"[wscraper-service] {self.address_string()} - {fmt % args}")
|
|
|
|
|
|
def main() -> None:
|
|
server = ThreadingHTTPServer((HOST, PORT), Handler)
|
|
print(f"wscraper-service listening on http://{HOST}:{PORT}")
|
|
server.serve_forever()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|