feat: add optional bookmark removal to torrent download action

This commit is contained in:
2026-03-08 02:58:41 +03:00
parent 7d307c31f9
commit 55459373e5
3 changed files with 307 additions and 0 deletions

View File

@@ -67,6 +67,27 @@ wscraper happyfappy --action get-bookmarks -c cookies.txt -o bookmarks.json
wscraper happyfappy --action download-torrent-files -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
```
### Torrent İndir + Bookmark Kaldır
`--rm-bookmark` (kisa alias: `-rmb`) flag'i verildiginde, torrent dosyasi basariyla indirildikten sonra ayni torrent detay sayfasindaki bookmark remove mekanizmasi dinamik olarak bulunur ve tetiklenir.
```bash
wscraper happyfappy --action download-torrent-files --rm-bookmark -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
```
Davranis kurallari:
- Torrent indirme basarisizsa bookmark silme adimina gecilmez.
- `--rm-bookmark` verilmis ve bookmark silme basarisizsa komut `exit code 1` ile biter.
- `--rm-bookmark` verilmemisse yalnizca indirme akisi calisir.
Bookmark remove tespiti tek bir sabit HTML selector'ine bagli degildir:
- oncelik `onclick`/event sinyalleri (or. `Unbookmark(...)`)
- `id/class/href` icinde bookmark semantigi
- remove/delete/unbookmark benzeri metin ve attribute sinyalleri
- torrent id ile iliskili kontrol eslestirmesi
## 4) Kısa Alias Kullanımı
```bash
@@ -74,6 +95,8 @@ wscraper happyfappy --action download-torrent-files -u "https://www.happyfappy.n
# action alias: gb (get-bookmarks), dtf (download-torrent-files)
wscraper hf -a gb -c cookies.txt -o bookmarks.json
wscraper hf -a dtf -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
wscraper hf -a dtf --rm-bookmark -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
wscraper hf -a dtf -rmb -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
```
## 5) Proje Dizini

View File

@@ -50,6 +50,12 @@ def build_parser() -> argparse.ArgumentParser:
"--output",
help="Output target: file path for get-bookmarks, directory path for download-torrent-files",
)
parser.add_argument(
"-rmb",
"--rm-bookmark",
action="store_true",
help="When used with download-torrent-files, remove bookmark after successful torrent download",
)
parser.add_argument("-r", "--retries", type=int, default=3)
parser.add_argument("--backoff-base", type=float, default=5.0)
@@ -88,6 +94,7 @@ def run_happyfappy(args: argparse.Namespace, action: str) -> None:
cookie=args.cookie,
cookie_file=args.cookie_file,
output_dir=args.output or "torrent",
rm_bookmark=args.rm_bookmark,
retries=args.retries,
backoff_base=args.backoff_base,
)

View File

@@ -353,3 +353,280 @@ def run_download_torrent_files(args: argparse.Namespace) -> None:
output_path = output_dir / filename
output_path.write_bytes(data)
print(f"Saved torrent to {output_path}")
if getattr(args, "rm_bookmark", False):
torrent_id = extract_torrent_id(args.url)
removed = remove_bookmark_with_retry(
session=session,
detail_url=args.url,
torrent_id=torrent_id,
retries=args.retries,
backoff_base=args.backoff_base,
)
if not removed:
raise RuntimeError("Torrent downloaded but bookmark removal could not be verified.")
print("Bookmark removed successfully.")
def extract_torrent_id(detail_url: str) -> str | None:
parsed = urlparse(detail_url)
query = parsed.query or ""
match = re.search(r"(?:^|&)id=(\d+)(?:&|$)", query)
if match:
return match.group(1)
path_match = re.search(r"/torrents\.php/(\d+)", parsed.path or "")
if path_match:
return path_match.group(1)
return None
def _click_remove_control(page: Any, torrent_id: str | None) -> dict[str, Any]:
return page.evaluate(
"""
({ torrentId }) => {
const normalize = (v) => (v || "").toString().toLowerCase();
const hasAny = (source, tokens) => tokens.some((t) => source.includes(t));
const removeTokens = ["unbookmark", "remove", "delete", "forget", "unmark"];
const addTokens = ["bookmark", "add", "mark", "save"];
const nodes = Array.from(document.querySelectorAll(
"a,button,[role='button'],[onclick],input[type='button'],input[type='submit']"
));
let best = null;
for (const node of nodes) {
const text = normalize(node.textContent || "");
const title = normalize(node.getAttribute("title"));
const aria = normalize(node.getAttribute("aria-label"));
const id = normalize(node.id);
const cls = normalize(node.className);
const href = normalize(node.getAttribute("href"));
const onclick = normalize(node.getAttribute("onclick"));
const attrs = normalize(node.outerHTML);
const all = [text, title, aria, id, cls, href, onclick, attrs].join(" ");
let score = 0;
const reasons = [];
if (hasAny(onclick, ["unbookmark"])) {
score += 60;
reasons.push("onclick:unbookmark");
}
if (hasAny(all, ["bookmark"])) {
score += 16;
reasons.push("bookmark-signal");
}
if (hasAny(all, removeTokens)) {
score += 26;
reasons.push("remove-signal");
}
if (!hasAny(all, removeTokens) && hasAny(all, addTokens)) {
score -= 20;
reasons.push("add-like-signal");
}
if (torrentId && all.includes(torrentId)) {
score += 30;
reasons.push("torrent-id");
}
if (hasAny(href, ["javascript", "#"])) {
score += 4;
}
if (!best || score > best.score) {
best = { node, score, reasons, snapshot: (node.outerHTML || "").slice(0, 220) };
}
}
if (!best || best.score < 20) {
return { clicked: false, score: best ? best.score : -1, reasons: best ? best.reasons : [], snapshot: best ? best.snapshot : "" };
}
best.node.click();
return { clicked: true, score: best.score, reasons: best.reasons, snapshot: best.snapshot };
}
""",
{"torrentId": torrent_id},
)
def _remove_control_exists(page: Any, torrent_id: str | None) -> bool:
return bool(
page.evaluate(
"""
({ torrentId }) => {
const normalize = (v) => (v || "").toString().toLowerCase();
const removeTokens = ["unbookmark", "remove", "delete", "forget", "unmark"];
const nodes = Array.from(document.querySelectorAll(
"a,button,[role='button'],[onclick],input[type='button'],input[type='submit']"
));
for (const node of nodes) {
const text = normalize(node.textContent || "");
const title = normalize(node.getAttribute("title"));
const aria = normalize(node.getAttribute("aria-label"));
const id = normalize(node.id);
const cls = normalize(node.className);
const href = normalize(node.getAttribute("href"));
const onclick = normalize(node.getAttribute("onclick"));
const all = [text, title, aria, id, cls, href, onclick].join(" ");
const looksLikeRemove = removeTokens.some((t) => all.includes(t)) || onclick.includes("unbookmark");
const matchesId = torrentId ? all.includes(torrentId) : true;
if (looksLikeRemove && matchesId) {
return true;
}
}
return false;
}
""",
{"torrentId": torrent_id},
)
)
def _bookmark_control_state(page: Any, torrent_id: str | None) -> dict[str, Any]:
return page.evaluate(
"""
({ torrentId }) => {
const normalize = (v) => (v || "").toString().toLowerCase();
const candidates = [];
if (torrentId) {
const direct = document.getElementById(`bookmarklink_torrent_${torrentId}`);
if (direct) candidates.push(direct);
}
const nodes = Array.from(document.querySelectorAll("a,button,[onclick],[id*='bookmark']"));
for (const node of nodes) {
if (!candidates.includes(node)) candidates.push(node);
}
const scored = [];
for (const node of candidates) {
const text = normalize(node.textContent || "");
const id = normalize(node.id);
const href = normalize(node.getAttribute("href"));
const onclick = normalize(node.getAttribute("onclick"));
const title = normalize(node.getAttribute("title"));
const all = [text, id, href, onclick, title].join(" ");
let score = 0;
if (all.includes("bookmark")) score += 12;
if (torrentId && all.includes(torrentId)) score += 28;
if (onclick.includes("unbookmark")) score += 45;
if (onclick.includes("bookmark(") && !onclick.includes("unbookmark")) score += 20;
if (id.includes("bookmarklink_torrent")) score += 35;
const action = onclick.includes("unbookmark")
? "remove"
: (onclick.includes("bookmark(") ? "add" : "unknown");
scored.push({
score,
action,
text,
id,
onclick,
snapshot: (node.outerHTML || "").slice(0, 220),
});
}
scored.sort((a, b) => b.score - a.score);
const best = scored[0] || null;
return { best, total: scored.length };
}
""",
{"torrentId": torrent_id},
)
def _click_bookmark_control(page: Any, torrent_id: str | None) -> dict[str, Any]:
return page.evaluate(
"""
({ torrentId }) => {
const normalize = (v) => (v || "").toString().toLowerCase();
let target = null;
if (torrentId) {
target = document.getElementById(`bookmarklink_torrent_${torrentId}`);
}
if (!target) {
const nodes = Array.from(document.querySelectorAll("a,button,[onclick],[id*='bookmark']"));
let best = null;
for (const node of nodes) {
const text = normalize(node.textContent || "");
const id = normalize(node.id);
const href = normalize(node.getAttribute("href"));
const onclick = normalize(node.getAttribute("onclick"));
const title = normalize(node.getAttribute("title"));
const all = [text, id, href, onclick, title].join(" ");
let score = 0;
if (all.includes("bookmark")) score += 12;
if (torrentId && all.includes(torrentId)) score += 28;
if (onclick.includes("unbookmark")) score += 45;
if (id.includes("bookmarklink_torrent")) score += 35;
if (!best || score > best.score) best = { node, score, all, onclick };
}
if (best) target = best.node;
}
if (!target) return { clicked: false, reason: "no-target" };
const onclick = normalize(target.getAttribute("onclick"));
if (onclick.includes("bookmark(") && !onclick.includes("unbookmark")) {
return { clicked: false, reason: "already-removed" };
}
target.click();
return { clicked: true, reason: "clicked", snapshot: (target.outerHTML || "").slice(0, 220) };
}
""",
{"torrentId": torrent_id},
)
def remove_bookmark_with_retry(
session: DynamicSession,
detail_url: str,
torrent_id: str | None,
retries: int,
backoff_base: float,
) -> bool:
last_error: Exception | None = None
for attempt in range(retries):
page = session.context.new_page()
try:
page.goto(detail_url, wait_until="domcontentloaded", timeout=45_000)
state_before = _bookmark_control_state(page, torrent_id)
best_before = (state_before or {}).get("best") or {}
action_before = best_before.get("action")
if action_before == "add":
return True
if action_before != "remove":
raise RuntimeError("Bookmark remove control not detected on detail page.")
click_result = _click_bookmark_control(page, torrent_id)
if not click_result.get("clicked"):
if click_result.get("reason") == "already-removed":
return True
raise RuntimeError(
"Bookmark remove action could not be clicked. "
f"reason={click_result.get('reason')}"
)
page.wait_for_timeout(2200)
page.reload(wait_until="domcontentloaded", timeout=45_000)
state_after = _bookmark_control_state(page, torrent_id)
best_after = (state_after or {}).get("best") or {}
action_after = best_after.get("action")
if action_after == "remove":
raise RuntimeError("Bookmark remove control still present after click; remove likely failed.")
return True
except Exception as err: # noqa: BLE001
last_error = err
if attempt == retries - 1:
break
time.sleep(backoff_base * (2**attempt))
finally:
page.close()
raise RuntimeError(f"Bookmark remove failed for {detail_url}: {last_error}") from last_error