feat: add optional bookmark removal to torrent download action
This commit is contained in:
23
README.md
23
README.md
@@ -67,6 +67,27 @@ wscraper happyfappy --action get-bookmarks -c cookies.txt -o bookmarks.json
|
||||
wscraper happyfappy --action download-torrent-files -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
|
||||
```
|
||||
|
||||
### Torrent İndir + Bookmark Kaldır
|
||||
|
||||
`--rm-bookmark` (kisa alias: `-rmb`) flag'i verildiginde, torrent dosyasi basariyla indirildikten sonra ayni torrent detay sayfasindaki bookmark remove mekanizmasi dinamik olarak bulunur ve tetiklenir.
|
||||
|
||||
```bash
|
||||
wscraper happyfappy --action download-torrent-files --rm-bookmark -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
|
||||
```
|
||||
|
||||
Davranis kurallari:
|
||||
|
||||
- Torrent indirme basarisizsa bookmark silme adimina gecilmez.
|
||||
- `--rm-bookmark` verilmis ve bookmark silme basarisizsa komut `exit code 1` ile biter.
|
||||
- `--rm-bookmark` verilmemisse yalnizca indirme akisi calisir.
|
||||
|
||||
Bookmark remove tespiti tek bir sabit HTML selector'ine bagli degildir:
|
||||
|
||||
- oncelik `onclick`/event sinyalleri (or. `Unbookmark(...)`)
|
||||
- `id/class/href` icinde bookmark semantigi
|
||||
- remove/delete/unbookmark benzeri metin ve attribute sinyalleri
|
||||
- torrent id ile iliskili kontrol eslestirmesi
|
||||
|
||||
## 4) Kısa Alias Kullanımı
|
||||
|
||||
```bash
|
||||
@@ -74,6 +95,8 @@ wscraper happyfappy --action download-torrent-files -u "https://www.happyfappy.n
|
||||
# action alias: gb (get-bookmarks), dtf (download-torrent-files)
|
||||
wscraper hf -a gb -c cookies.txt -o bookmarks.json
|
||||
wscraper hf -a dtf -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
|
||||
wscraper hf -a dtf --rm-bookmark -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
|
||||
wscraper hf -a dtf -rmb -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
|
||||
```
|
||||
|
||||
## 5) Proje Dizini
|
||||
|
||||
@@ -50,6 +50,12 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
"--output",
|
||||
help="Output target: file path for get-bookmarks, directory path for download-torrent-files",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-rmb",
|
||||
"--rm-bookmark",
|
||||
action="store_true",
|
||||
help="When used with download-torrent-files, remove bookmark after successful torrent download",
|
||||
)
|
||||
|
||||
parser.add_argument("-r", "--retries", type=int, default=3)
|
||||
parser.add_argument("--backoff-base", type=float, default=5.0)
|
||||
@@ -88,6 +94,7 @@ def run_happyfappy(args: argparse.Namespace, action: str) -> None:
|
||||
cookie=args.cookie,
|
||||
cookie_file=args.cookie_file,
|
||||
output_dir=args.output or "torrent",
|
||||
rm_bookmark=args.rm_bookmark,
|
||||
retries=args.retries,
|
||||
backoff_base=args.backoff_base,
|
||||
)
|
||||
|
||||
@@ -353,3 +353,280 @@ def run_download_torrent_files(args: argparse.Namespace) -> None:
|
||||
output_path = output_dir / filename
|
||||
output_path.write_bytes(data)
|
||||
print(f"Saved torrent to {output_path}")
|
||||
|
||||
if getattr(args, "rm_bookmark", False):
|
||||
torrent_id = extract_torrent_id(args.url)
|
||||
removed = remove_bookmark_with_retry(
|
||||
session=session,
|
||||
detail_url=args.url,
|
||||
torrent_id=torrent_id,
|
||||
retries=args.retries,
|
||||
backoff_base=args.backoff_base,
|
||||
)
|
||||
if not removed:
|
||||
raise RuntimeError("Torrent downloaded but bookmark removal could not be verified.")
|
||||
print("Bookmark removed successfully.")
|
||||
|
||||
|
||||
def extract_torrent_id(detail_url: str) -> str | None:
|
||||
parsed = urlparse(detail_url)
|
||||
query = parsed.query or ""
|
||||
match = re.search(r"(?:^|&)id=(\d+)(?:&|$)", query)
|
||||
if match:
|
||||
return match.group(1)
|
||||
path_match = re.search(r"/torrents\.php/(\d+)", parsed.path or "")
|
||||
if path_match:
|
||||
return path_match.group(1)
|
||||
return None
|
||||
|
||||
|
||||
def _click_remove_control(page: Any, torrent_id: str | None) -> dict[str, Any]:
|
||||
return page.evaluate(
|
||||
"""
|
||||
({ torrentId }) => {
|
||||
const normalize = (v) => (v || "").toString().toLowerCase();
|
||||
const hasAny = (source, tokens) => tokens.some((t) => source.includes(t));
|
||||
const removeTokens = ["unbookmark", "remove", "delete", "forget", "unmark"];
|
||||
const addTokens = ["bookmark", "add", "mark", "save"];
|
||||
|
||||
const nodes = Array.from(document.querySelectorAll(
|
||||
"a,button,[role='button'],[onclick],input[type='button'],input[type='submit']"
|
||||
));
|
||||
let best = null;
|
||||
|
||||
for (const node of nodes) {
|
||||
const text = normalize(node.textContent || "");
|
||||
const title = normalize(node.getAttribute("title"));
|
||||
const aria = normalize(node.getAttribute("aria-label"));
|
||||
const id = normalize(node.id);
|
||||
const cls = normalize(node.className);
|
||||
const href = normalize(node.getAttribute("href"));
|
||||
const onclick = normalize(node.getAttribute("onclick"));
|
||||
const attrs = normalize(node.outerHTML);
|
||||
const all = [text, title, aria, id, cls, href, onclick, attrs].join(" ");
|
||||
|
||||
let score = 0;
|
||||
const reasons = [];
|
||||
if (hasAny(onclick, ["unbookmark"])) {
|
||||
score += 60;
|
||||
reasons.push("onclick:unbookmark");
|
||||
}
|
||||
if (hasAny(all, ["bookmark"])) {
|
||||
score += 16;
|
||||
reasons.push("bookmark-signal");
|
||||
}
|
||||
if (hasAny(all, removeTokens)) {
|
||||
score += 26;
|
||||
reasons.push("remove-signal");
|
||||
}
|
||||
if (!hasAny(all, removeTokens) && hasAny(all, addTokens)) {
|
||||
score -= 20;
|
||||
reasons.push("add-like-signal");
|
||||
}
|
||||
if (torrentId && all.includes(torrentId)) {
|
||||
score += 30;
|
||||
reasons.push("torrent-id");
|
||||
}
|
||||
if (hasAny(href, ["javascript", "#"])) {
|
||||
score += 4;
|
||||
}
|
||||
|
||||
if (!best || score > best.score) {
|
||||
best = { node, score, reasons, snapshot: (node.outerHTML || "").slice(0, 220) };
|
||||
}
|
||||
}
|
||||
|
||||
if (!best || best.score < 20) {
|
||||
return { clicked: false, score: best ? best.score : -1, reasons: best ? best.reasons : [], snapshot: best ? best.snapshot : "" };
|
||||
}
|
||||
|
||||
best.node.click();
|
||||
return { clicked: true, score: best.score, reasons: best.reasons, snapshot: best.snapshot };
|
||||
}
|
||||
""",
|
||||
{"torrentId": torrent_id},
|
||||
)
|
||||
|
||||
|
||||
def _remove_control_exists(page: Any, torrent_id: str | None) -> bool:
|
||||
return bool(
|
||||
page.evaluate(
|
||||
"""
|
||||
({ torrentId }) => {
|
||||
const normalize = (v) => (v || "").toString().toLowerCase();
|
||||
const removeTokens = ["unbookmark", "remove", "delete", "forget", "unmark"];
|
||||
const nodes = Array.from(document.querySelectorAll(
|
||||
"a,button,[role='button'],[onclick],input[type='button'],input[type='submit']"
|
||||
));
|
||||
|
||||
for (const node of nodes) {
|
||||
const text = normalize(node.textContent || "");
|
||||
const title = normalize(node.getAttribute("title"));
|
||||
const aria = normalize(node.getAttribute("aria-label"));
|
||||
const id = normalize(node.id);
|
||||
const cls = normalize(node.className);
|
||||
const href = normalize(node.getAttribute("href"));
|
||||
const onclick = normalize(node.getAttribute("onclick"));
|
||||
const all = [text, title, aria, id, cls, href, onclick].join(" ");
|
||||
const looksLikeRemove = removeTokens.some((t) => all.includes(t)) || onclick.includes("unbookmark");
|
||||
const matchesId = torrentId ? all.includes(torrentId) : true;
|
||||
if (looksLikeRemove && matchesId) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
""",
|
||||
{"torrentId": torrent_id},
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _bookmark_control_state(page: Any, torrent_id: str | None) -> dict[str, Any]:
|
||||
return page.evaluate(
|
||||
"""
|
||||
({ torrentId }) => {
|
||||
const normalize = (v) => (v || "").toString().toLowerCase();
|
||||
const candidates = [];
|
||||
|
||||
if (torrentId) {
|
||||
const direct = document.getElementById(`bookmarklink_torrent_${torrentId}`);
|
||||
if (direct) candidates.push(direct);
|
||||
}
|
||||
|
||||
const nodes = Array.from(document.querySelectorAll("a,button,[onclick],[id*='bookmark']"));
|
||||
for (const node of nodes) {
|
||||
if (!candidates.includes(node)) candidates.push(node);
|
||||
}
|
||||
|
||||
const scored = [];
|
||||
for (const node of candidates) {
|
||||
const text = normalize(node.textContent || "");
|
||||
const id = normalize(node.id);
|
||||
const href = normalize(node.getAttribute("href"));
|
||||
const onclick = normalize(node.getAttribute("onclick"));
|
||||
const title = normalize(node.getAttribute("title"));
|
||||
const all = [text, id, href, onclick, title].join(" ");
|
||||
|
||||
let score = 0;
|
||||
if (all.includes("bookmark")) score += 12;
|
||||
if (torrentId && all.includes(torrentId)) score += 28;
|
||||
if (onclick.includes("unbookmark")) score += 45;
|
||||
if (onclick.includes("bookmark(") && !onclick.includes("unbookmark")) score += 20;
|
||||
if (id.includes("bookmarklink_torrent")) score += 35;
|
||||
|
||||
const action = onclick.includes("unbookmark")
|
||||
? "remove"
|
||||
: (onclick.includes("bookmark(") ? "add" : "unknown");
|
||||
|
||||
scored.push({
|
||||
score,
|
||||
action,
|
||||
text,
|
||||
id,
|
||||
onclick,
|
||||
snapshot: (node.outerHTML || "").slice(0, 220),
|
||||
});
|
||||
}
|
||||
|
||||
scored.sort((a, b) => b.score - a.score);
|
||||
const best = scored[0] || null;
|
||||
return { best, total: scored.length };
|
||||
}
|
||||
""",
|
||||
{"torrentId": torrent_id},
|
||||
)
|
||||
|
||||
|
||||
def _click_bookmark_control(page: Any, torrent_id: str | None) -> dict[str, Any]:
|
||||
return page.evaluate(
|
||||
"""
|
||||
({ torrentId }) => {
|
||||
const normalize = (v) => (v || "").toString().toLowerCase();
|
||||
let target = null;
|
||||
|
||||
if (torrentId) {
|
||||
target = document.getElementById(`bookmarklink_torrent_${torrentId}`);
|
||||
}
|
||||
|
||||
if (!target) {
|
||||
const nodes = Array.from(document.querySelectorAll("a,button,[onclick],[id*='bookmark']"));
|
||||
let best = null;
|
||||
for (const node of nodes) {
|
||||
const text = normalize(node.textContent || "");
|
||||
const id = normalize(node.id);
|
||||
const href = normalize(node.getAttribute("href"));
|
||||
const onclick = normalize(node.getAttribute("onclick"));
|
||||
const title = normalize(node.getAttribute("title"));
|
||||
const all = [text, id, href, onclick, title].join(" ");
|
||||
let score = 0;
|
||||
if (all.includes("bookmark")) score += 12;
|
||||
if (torrentId && all.includes(torrentId)) score += 28;
|
||||
if (onclick.includes("unbookmark")) score += 45;
|
||||
if (id.includes("bookmarklink_torrent")) score += 35;
|
||||
if (!best || score > best.score) best = { node, score, all, onclick };
|
||||
}
|
||||
if (best) target = best.node;
|
||||
}
|
||||
|
||||
if (!target) return { clicked: false, reason: "no-target" };
|
||||
|
||||
const onclick = normalize(target.getAttribute("onclick"));
|
||||
if (onclick.includes("bookmark(") && !onclick.includes("unbookmark")) {
|
||||
return { clicked: false, reason: "already-removed" };
|
||||
}
|
||||
|
||||
target.click();
|
||||
return { clicked: true, reason: "clicked", snapshot: (target.outerHTML || "").slice(0, 220) };
|
||||
}
|
||||
""",
|
||||
{"torrentId": torrent_id},
|
||||
)
|
||||
|
||||
|
||||
def remove_bookmark_with_retry(
|
||||
session: DynamicSession,
|
||||
detail_url: str,
|
||||
torrent_id: str | None,
|
||||
retries: int,
|
||||
backoff_base: float,
|
||||
) -> bool:
|
||||
last_error: Exception | None = None
|
||||
for attempt in range(retries):
|
||||
page = session.context.new_page()
|
||||
try:
|
||||
page.goto(detail_url, wait_until="domcontentloaded", timeout=45_000)
|
||||
state_before = _bookmark_control_state(page, torrent_id)
|
||||
best_before = (state_before or {}).get("best") or {}
|
||||
action_before = best_before.get("action")
|
||||
if action_before == "add":
|
||||
return True
|
||||
if action_before != "remove":
|
||||
raise RuntimeError("Bookmark remove control not detected on detail page.")
|
||||
|
||||
click_result = _click_bookmark_control(page, torrent_id)
|
||||
if not click_result.get("clicked"):
|
||||
if click_result.get("reason") == "already-removed":
|
||||
return True
|
||||
raise RuntimeError(
|
||||
"Bookmark remove action could not be clicked. "
|
||||
f"reason={click_result.get('reason')}"
|
||||
)
|
||||
|
||||
page.wait_for_timeout(2200)
|
||||
page.reload(wait_until="domcontentloaded", timeout=45_000)
|
||||
state_after = _bookmark_control_state(page, torrent_id)
|
||||
best_after = (state_after or {}).get("best") or {}
|
||||
action_after = best_after.get("action")
|
||||
if action_after == "remove":
|
||||
raise RuntimeError("Bookmark remove control still present after click; remove likely failed.")
|
||||
return True
|
||||
except Exception as err: # noqa: BLE001
|
||||
last_error = err
|
||||
if attempt == retries - 1:
|
||||
break
|
||||
time.sleep(backoff_base * (2**attempt))
|
||||
finally:
|
||||
page.close()
|
||||
|
||||
raise RuntimeError(f"Bookmark remove failed for {detail_url}: {last_error}") from last_error
|
||||
|
||||
Reference in New Issue
Block a user