From 55459373e513bbd6c2c5c6edcd42abde24018d24 Mon Sep 17 00:00:00 2001 From: wisecolt Date: Sun, 8 Mar 2026 02:58:41 +0300 Subject: [PATCH] feat: add optional bookmark removal to torrent download action --- README.md | 23 +++ src/wscraper/cli.py | 7 + src/wscraper/sites/happyfappy.py | 277 +++++++++++++++++++++++++++++++ 3 files changed, 307 insertions(+) diff --git a/README.md b/README.md index dc1494d..dc96e60 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,27 @@ wscraper happyfappy --action get-bookmarks -c cookies.txt -o bookmarks.json wscraper happyfappy --action download-torrent-files -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent ``` +### Torrent İndir + Bookmark Kaldır + +`--rm-bookmark` (kisa alias: `-rmb`) flag'i verildiginde, torrent dosyasi basariyla indirildikten sonra ayni torrent detay sayfasindaki bookmark remove mekanizmasi dinamik olarak bulunur ve tetiklenir. + +```bash +wscraper happyfappy --action download-torrent-files --rm-bookmark -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent +``` + +Davranis kurallari: + +- Torrent indirme basarisizsa bookmark silme adimina gecilmez. +- `--rm-bookmark` verilmis ve bookmark silme basarisizsa komut `exit code 1` ile biter. +- `--rm-bookmark` verilmemisse yalnizca indirme akisi calisir. + +Bookmark remove tespiti tek bir sabit HTML selector'ine bagli degildir: + +- oncelik `onclick`/event sinyalleri (or. `Unbookmark(...)`) +- `id/class/href` icinde bookmark semantigi +- remove/delete/unbookmark benzeri metin ve attribute sinyalleri +- torrent id ile iliskili kontrol eslestirmesi + ## 4) Kısa Alias Kullanımı ```bash @@ -74,6 +95,8 @@ wscraper happyfappy --action download-torrent-files -u "https://www.happyfappy.n # action alias: gb (get-bookmarks), dtf (download-torrent-files) wscraper hf -a gb -c cookies.txt -o bookmarks.json wscraper hf -a dtf -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent +wscraper hf -a dtf --rm-bookmark -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent +wscraper hf -a dtf -rmb -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent ``` ## 5) Proje Dizini diff --git a/src/wscraper/cli.py b/src/wscraper/cli.py index 2e86b70..dbeab10 100644 --- a/src/wscraper/cli.py +++ b/src/wscraper/cli.py @@ -50,6 +50,12 @@ def build_parser() -> argparse.ArgumentParser: "--output", help="Output target: file path for get-bookmarks, directory path for download-torrent-files", ) + parser.add_argument( + "-rmb", + "--rm-bookmark", + action="store_true", + help="When used with download-torrent-files, remove bookmark after successful torrent download", + ) parser.add_argument("-r", "--retries", type=int, default=3) parser.add_argument("--backoff-base", type=float, default=5.0) @@ -88,6 +94,7 @@ def run_happyfappy(args: argparse.Namespace, action: str) -> None: cookie=args.cookie, cookie_file=args.cookie_file, output_dir=args.output or "torrent", + rm_bookmark=args.rm_bookmark, retries=args.retries, backoff_base=args.backoff_base, ) diff --git a/src/wscraper/sites/happyfappy.py b/src/wscraper/sites/happyfappy.py index 4f4fa51..339570a 100644 --- a/src/wscraper/sites/happyfappy.py +++ b/src/wscraper/sites/happyfappy.py @@ -353,3 +353,280 @@ def run_download_torrent_files(args: argparse.Namespace) -> None: output_path = output_dir / filename output_path.write_bytes(data) print(f"Saved torrent to {output_path}") + + if getattr(args, "rm_bookmark", False): + torrent_id = extract_torrent_id(args.url) + removed = remove_bookmark_with_retry( + session=session, + detail_url=args.url, + torrent_id=torrent_id, + retries=args.retries, + backoff_base=args.backoff_base, + ) + if not removed: + raise RuntimeError("Torrent downloaded but bookmark removal could not be verified.") + print("Bookmark removed successfully.") + + +def extract_torrent_id(detail_url: str) -> str | None: + parsed = urlparse(detail_url) + query = parsed.query or "" + match = re.search(r"(?:^|&)id=(\d+)(?:&|$)", query) + if match: + return match.group(1) + path_match = re.search(r"/torrents\.php/(\d+)", parsed.path or "") + if path_match: + return path_match.group(1) + return None + + +def _click_remove_control(page: Any, torrent_id: str | None) -> dict[str, Any]: + return page.evaluate( + """ + ({ torrentId }) => { + const normalize = (v) => (v || "").toString().toLowerCase(); + const hasAny = (source, tokens) => tokens.some((t) => source.includes(t)); + const removeTokens = ["unbookmark", "remove", "delete", "forget", "unmark"]; + const addTokens = ["bookmark", "add", "mark", "save"]; + + const nodes = Array.from(document.querySelectorAll( + "a,button,[role='button'],[onclick],input[type='button'],input[type='submit']" + )); + let best = null; + + for (const node of nodes) { + const text = normalize(node.textContent || ""); + const title = normalize(node.getAttribute("title")); + const aria = normalize(node.getAttribute("aria-label")); + const id = normalize(node.id); + const cls = normalize(node.className); + const href = normalize(node.getAttribute("href")); + const onclick = normalize(node.getAttribute("onclick")); + const attrs = normalize(node.outerHTML); + const all = [text, title, aria, id, cls, href, onclick, attrs].join(" "); + + let score = 0; + const reasons = []; + if (hasAny(onclick, ["unbookmark"])) { + score += 60; + reasons.push("onclick:unbookmark"); + } + if (hasAny(all, ["bookmark"])) { + score += 16; + reasons.push("bookmark-signal"); + } + if (hasAny(all, removeTokens)) { + score += 26; + reasons.push("remove-signal"); + } + if (!hasAny(all, removeTokens) && hasAny(all, addTokens)) { + score -= 20; + reasons.push("add-like-signal"); + } + if (torrentId && all.includes(torrentId)) { + score += 30; + reasons.push("torrent-id"); + } + if (hasAny(href, ["javascript", "#"])) { + score += 4; + } + + if (!best || score > best.score) { + best = { node, score, reasons, snapshot: (node.outerHTML || "").slice(0, 220) }; + } + } + + if (!best || best.score < 20) { + return { clicked: false, score: best ? best.score : -1, reasons: best ? best.reasons : [], snapshot: best ? best.snapshot : "" }; + } + + best.node.click(); + return { clicked: true, score: best.score, reasons: best.reasons, snapshot: best.snapshot }; + } + """, + {"torrentId": torrent_id}, + ) + + +def _remove_control_exists(page: Any, torrent_id: str | None) -> bool: + return bool( + page.evaluate( + """ + ({ torrentId }) => { + const normalize = (v) => (v || "").toString().toLowerCase(); + const removeTokens = ["unbookmark", "remove", "delete", "forget", "unmark"]; + const nodes = Array.from(document.querySelectorAll( + "a,button,[role='button'],[onclick],input[type='button'],input[type='submit']" + )); + + for (const node of nodes) { + const text = normalize(node.textContent || ""); + const title = normalize(node.getAttribute("title")); + const aria = normalize(node.getAttribute("aria-label")); + const id = normalize(node.id); + const cls = normalize(node.className); + const href = normalize(node.getAttribute("href")); + const onclick = normalize(node.getAttribute("onclick")); + const all = [text, title, aria, id, cls, href, onclick].join(" "); + const looksLikeRemove = removeTokens.some((t) => all.includes(t)) || onclick.includes("unbookmark"); + const matchesId = torrentId ? all.includes(torrentId) : true; + if (looksLikeRemove && matchesId) { + return true; + } + } + return false; + } + """, + {"torrentId": torrent_id}, + ) + ) + + +def _bookmark_control_state(page: Any, torrent_id: str | None) -> dict[str, Any]: + return page.evaluate( + """ + ({ torrentId }) => { + const normalize = (v) => (v || "").toString().toLowerCase(); + const candidates = []; + + if (torrentId) { + const direct = document.getElementById(`bookmarklink_torrent_${torrentId}`); + if (direct) candidates.push(direct); + } + + const nodes = Array.from(document.querySelectorAll("a,button,[onclick],[id*='bookmark']")); + for (const node of nodes) { + if (!candidates.includes(node)) candidates.push(node); + } + + const scored = []; + for (const node of candidates) { + const text = normalize(node.textContent || ""); + const id = normalize(node.id); + const href = normalize(node.getAttribute("href")); + const onclick = normalize(node.getAttribute("onclick")); + const title = normalize(node.getAttribute("title")); + const all = [text, id, href, onclick, title].join(" "); + + let score = 0; + if (all.includes("bookmark")) score += 12; + if (torrentId && all.includes(torrentId)) score += 28; + if (onclick.includes("unbookmark")) score += 45; + if (onclick.includes("bookmark(") && !onclick.includes("unbookmark")) score += 20; + if (id.includes("bookmarklink_torrent")) score += 35; + + const action = onclick.includes("unbookmark") + ? "remove" + : (onclick.includes("bookmark(") ? "add" : "unknown"); + + scored.push({ + score, + action, + text, + id, + onclick, + snapshot: (node.outerHTML || "").slice(0, 220), + }); + } + + scored.sort((a, b) => b.score - a.score); + const best = scored[0] || null; + return { best, total: scored.length }; + } + """, + {"torrentId": torrent_id}, + ) + + +def _click_bookmark_control(page: Any, torrent_id: str | None) -> dict[str, Any]: + return page.evaluate( + """ + ({ torrentId }) => { + const normalize = (v) => (v || "").toString().toLowerCase(); + let target = null; + + if (torrentId) { + target = document.getElementById(`bookmarklink_torrent_${torrentId}`); + } + + if (!target) { + const nodes = Array.from(document.querySelectorAll("a,button,[onclick],[id*='bookmark']")); + let best = null; + for (const node of nodes) { + const text = normalize(node.textContent || ""); + const id = normalize(node.id); + const href = normalize(node.getAttribute("href")); + const onclick = normalize(node.getAttribute("onclick")); + const title = normalize(node.getAttribute("title")); + const all = [text, id, href, onclick, title].join(" "); + let score = 0; + if (all.includes("bookmark")) score += 12; + if (torrentId && all.includes(torrentId)) score += 28; + if (onclick.includes("unbookmark")) score += 45; + if (id.includes("bookmarklink_torrent")) score += 35; + if (!best || score > best.score) best = { node, score, all, onclick }; + } + if (best) target = best.node; + } + + if (!target) return { clicked: false, reason: "no-target" }; + + const onclick = normalize(target.getAttribute("onclick")); + if (onclick.includes("bookmark(") && !onclick.includes("unbookmark")) { + return { clicked: false, reason: "already-removed" }; + } + + target.click(); + return { clicked: true, reason: "clicked", snapshot: (target.outerHTML || "").slice(0, 220) }; + } + """, + {"torrentId": torrent_id}, + ) + + +def remove_bookmark_with_retry( + session: DynamicSession, + detail_url: str, + torrent_id: str | None, + retries: int, + backoff_base: float, +) -> bool: + last_error: Exception | None = None + for attempt in range(retries): + page = session.context.new_page() + try: + page.goto(detail_url, wait_until="domcontentloaded", timeout=45_000) + state_before = _bookmark_control_state(page, torrent_id) + best_before = (state_before or {}).get("best") or {} + action_before = best_before.get("action") + if action_before == "add": + return True + if action_before != "remove": + raise RuntimeError("Bookmark remove control not detected on detail page.") + + click_result = _click_bookmark_control(page, torrent_id) + if not click_result.get("clicked"): + if click_result.get("reason") == "already-removed": + return True + raise RuntimeError( + "Bookmark remove action could not be clicked. " + f"reason={click_result.get('reason')}" + ) + + page.wait_for_timeout(2200) + page.reload(wait_until="domcontentloaded", timeout=45_000) + state_after = _bookmark_control_state(page, torrent_id) + best_after = (state_after or {}).get("best") or {} + action_after = best_after.get("action") + if action_after == "remove": + raise RuntimeError("Bookmark remove control still present after click; remove likely failed.") + return True + except Exception as err: # noqa: BLE001 + last_error = err + if attempt == retries - 1: + break + time.sleep(backoff_base * (2**attempt)) + finally: + page.close() + + raise RuntimeError(f"Bookmark remove failed for {detail_url}: {last_error}") from last_error