feat: add optional bookmark removal to torrent download action
This commit is contained in:
23
README.md
23
README.md
@@ -67,6 +67,27 @@ wscraper happyfappy --action get-bookmarks -c cookies.txt -o bookmarks.json
|
|||||||
wscraper happyfappy --action download-torrent-files -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
|
wscraper happyfappy --action download-torrent-files -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Torrent İndir + Bookmark Kaldır
|
||||||
|
|
||||||
|
`--rm-bookmark` (kisa alias: `-rmb`) flag'i verildiginde, torrent dosyasi basariyla indirildikten sonra ayni torrent detay sayfasindaki bookmark remove mekanizmasi dinamik olarak bulunur ve tetiklenir.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wscraper happyfappy --action download-torrent-files --rm-bookmark -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
|
||||||
|
```
|
||||||
|
|
||||||
|
Davranis kurallari:
|
||||||
|
|
||||||
|
- Torrent indirme basarisizsa bookmark silme adimina gecilmez.
|
||||||
|
- `--rm-bookmark` verilmis ve bookmark silme basarisizsa komut `exit code 1` ile biter.
|
||||||
|
- `--rm-bookmark` verilmemisse yalnizca indirme akisi calisir.
|
||||||
|
|
||||||
|
Bookmark remove tespiti tek bir sabit HTML selector'ine bagli degildir:
|
||||||
|
|
||||||
|
- oncelik `onclick`/event sinyalleri (or. `Unbookmark(...)`)
|
||||||
|
- `id/class/href` icinde bookmark semantigi
|
||||||
|
- remove/delete/unbookmark benzeri metin ve attribute sinyalleri
|
||||||
|
- torrent id ile iliskili kontrol eslestirmesi
|
||||||
|
|
||||||
## 4) Kısa Alias Kullanımı
|
## 4) Kısa Alias Kullanımı
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -74,6 +95,8 @@ wscraper happyfappy --action download-torrent-files -u "https://www.happyfappy.n
|
|||||||
# action alias: gb (get-bookmarks), dtf (download-torrent-files)
|
# action alias: gb (get-bookmarks), dtf (download-torrent-files)
|
||||||
wscraper hf -a gb -c cookies.txt -o bookmarks.json
|
wscraper hf -a gb -c cookies.txt -o bookmarks.json
|
||||||
wscraper hf -a dtf -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
|
wscraper hf -a dtf -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
|
||||||
|
wscraper hf -a dtf --rm-bookmark -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
|
||||||
|
wscraper hf -a dtf -rmb -u "https://www.happyfappy.net/torrents.php?id=110178" -c cookies.txt -o torrent
|
||||||
```
|
```
|
||||||
|
|
||||||
## 5) Proje Dizini
|
## 5) Proje Dizini
|
||||||
|
|||||||
@@ -50,6 +50,12 @@ def build_parser() -> argparse.ArgumentParser:
|
|||||||
"--output",
|
"--output",
|
||||||
help="Output target: file path for get-bookmarks, directory path for download-torrent-files",
|
help="Output target: file path for get-bookmarks, directory path for download-torrent-files",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-rmb",
|
||||||
|
"--rm-bookmark",
|
||||||
|
action="store_true",
|
||||||
|
help="When used with download-torrent-files, remove bookmark after successful torrent download",
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument("-r", "--retries", type=int, default=3)
|
parser.add_argument("-r", "--retries", type=int, default=3)
|
||||||
parser.add_argument("--backoff-base", type=float, default=5.0)
|
parser.add_argument("--backoff-base", type=float, default=5.0)
|
||||||
@@ -88,6 +94,7 @@ def run_happyfappy(args: argparse.Namespace, action: str) -> None:
|
|||||||
cookie=args.cookie,
|
cookie=args.cookie,
|
||||||
cookie_file=args.cookie_file,
|
cookie_file=args.cookie_file,
|
||||||
output_dir=args.output or "torrent",
|
output_dir=args.output or "torrent",
|
||||||
|
rm_bookmark=args.rm_bookmark,
|
||||||
retries=args.retries,
|
retries=args.retries,
|
||||||
backoff_base=args.backoff_base,
|
backoff_base=args.backoff_base,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -353,3 +353,280 @@ def run_download_torrent_files(args: argparse.Namespace) -> None:
|
|||||||
output_path = output_dir / filename
|
output_path = output_dir / filename
|
||||||
output_path.write_bytes(data)
|
output_path.write_bytes(data)
|
||||||
print(f"Saved torrent to {output_path}")
|
print(f"Saved torrent to {output_path}")
|
||||||
|
|
||||||
|
if getattr(args, "rm_bookmark", False):
|
||||||
|
torrent_id = extract_torrent_id(args.url)
|
||||||
|
removed = remove_bookmark_with_retry(
|
||||||
|
session=session,
|
||||||
|
detail_url=args.url,
|
||||||
|
torrent_id=torrent_id,
|
||||||
|
retries=args.retries,
|
||||||
|
backoff_base=args.backoff_base,
|
||||||
|
)
|
||||||
|
if not removed:
|
||||||
|
raise RuntimeError("Torrent downloaded but bookmark removal could not be verified.")
|
||||||
|
print("Bookmark removed successfully.")
|
||||||
|
|
||||||
|
|
||||||
|
def extract_torrent_id(detail_url: str) -> str | None:
|
||||||
|
parsed = urlparse(detail_url)
|
||||||
|
query = parsed.query or ""
|
||||||
|
match = re.search(r"(?:^|&)id=(\d+)(?:&|$)", query)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
path_match = re.search(r"/torrents\.php/(\d+)", parsed.path or "")
|
||||||
|
if path_match:
|
||||||
|
return path_match.group(1)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _click_remove_control(page: Any, torrent_id: str | None) -> dict[str, Any]:
|
||||||
|
return page.evaluate(
|
||||||
|
"""
|
||||||
|
({ torrentId }) => {
|
||||||
|
const normalize = (v) => (v || "").toString().toLowerCase();
|
||||||
|
const hasAny = (source, tokens) => tokens.some((t) => source.includes(t));
|
||||||
|
const removeTokens = ["unbookmark", "remove", "delete", "forget", "unmark"];
|
||||||
|
const addTokens = ["bookmark", "add", "mark", "save"];
|
||||||
|
|
||||||
|
const nodes = Array.from(document.querySelectorAll(
|
||||||
|
"a,button,[role='button'],[onclick],input[type='button'],input[type='submit']"
|
||||||
|
));
|
||||||
|
let best = null;
|
||||||
|
|
||||||
|
for (const node of nodes) {
|
||||||
|
const text = normalize(node.textContent || "");
|
||||||
|
const title = normalize(node.getAttribute("title"));
|
||||||
|
const aria = normalize(node.getAttribute("aria-label"));
|
||||||
|
const id = normalize(node.id);
|
||||||
|
const cls = normalize(node.className);
|
||||||
|
const href = normalize(node.getAttribute("href"));
|
||||||
|
const onclick = normalize(node.getAttribute("onclick"));
|
||||||
|
const attrs = normalize(node.outerHTML);
|
||||||
|
const all = [text, title, aria, id, cls, href, onclick, attrs].join(" ");
|
||||||
|
|
||||||
|
let score = 0;
|
||||||
|
const reasons = [];
|
||||||
|
if (hasAny(onclick, ["unbookmark"])) {
|
||||||
|
score += 60;
|
||||||
|
reasons.push("onclick:unbookmark");
|
||||||
|
}
|
||||||
|
if (hasAny(all, ["bookmark"])) {
|
||||||
|
score += 16;
|
||||||
|
reasons.push("bookmark-signal");
|
||||||
|
}
|
||||||
|
if (hasAny(all, removeTokens)) {
|
||||||
|
score += 26;
|
||||||
|
reasons.push("remove-signal");
|
||||||
|
}
|
||||||
|
if (!hasAny(all, removeTokens) && hasAny(all, addTokens)) {
|
||||||
|
score -= 20;
|
||||||
|
reasons.push("add-like-signal");
|
||||||
|
}
|
||||||
|
if (torrentId && all.includes(torrentId)) {
|
||||||
|
score += 30;
|
||||||
|
reasons.push("torrent-id");
|
||||||
|
}
|
||||||
|
if (hasAny(href, ["javascript", "#"])) {
|
||||||
|
score += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!best || score > best.score) {
|
||||||
|
best = { node, score, reasons, snapshot: (node.outerHTML || "").slice(0, 220) };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!best || best.score < 20) {
|
||||||
|
return { clicked: false, score: best ? best.score : -1, reasons: best ? best.reasons : [], snapshot: best ? best.snapshot : "" };
|
||||||
|
}
|
||||||
|
|
||||||
|
best.node.click();
|
||||||
|
return { clicked: true, score: best.score, reasons: best.reasons, snapshot: best.snapshot };
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
{"torrentId": torrent_id},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _remove_control_exists(page: Any, torrent_id: str | None) -> bool:
|
||||||
|
return bool(
|
||||||
|
page.evaluate(
|
||||||
|
"""
|
||||||
|
({ torrentId }) => {
|
||||||
|
const normalize = (v) => (v || "").toString().toLowerCase();
|
||||||
|
const removeTokens = ["unbookmark", "remove", "delete", "forget", "unmark"];
|
||||||
|
const nodes = Array.from(document.querySelectorAll(
|
||||||
|
"a,button,[role='button'],[onclick],input[type='button'],input[type='submit']"
|
||||||
|
));
|
||||||
|
|
||||||
|
for (const node of nodes) {
|
||||||
|
const text = normalize(node.textContent || "");
|
||||||
|
const title = normalize(node.getAttribute("title"));
|
||||||
|
const aria = normalize(node.getAttribute("aria-label"));
|
||||||
|
const id = normalize(node.id);
|
||||||
|
const cls = normalize(node.className);
|
||||||
|
const href = normalize(node.getAttribute("href"));
|
||||||
|
const onclick = normalize(node.getAttribute("onclick"));
|
||||||
|
const all = [text, title, aria, id, cls, href, onclick].join(" ");
|
||||||
|
const looksLikeRemove = removeTokens.some((t) => all.includes(t)) || onclick.includes("unbookmark");
|
||||||
|
const matchesId = torrentId ? all.includes(torrentId) : true;
|
||||||
|
if (looksLikeRemove && matchesId) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
{"torrentId": torrent_id},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _bookmark_control_state(page: Any, torrent_id: str | None) -> dict[str, Any]:
|
||||||
|
return page.evaluate(
|
||||||
|
"""
|
||||||
|
({ torrentId }) => {
|
||||||
|
const normalize = (v) => (v || "").toString().toLowerCase();
|
||||||
|
const candidates = [];
|
||||||
|
|
||||||
|
if (torrentId) {
|
||||||
|
const direct = document.getElementById(`bookmarklink_torrent_${torrentId}`);
|
||||||
|
if (direct) candidates.push(direct);
|
||||||
|
}
|
||||||
|
|
||||||
|
const nodes = Array.from(document.querySelectorAll("a,button,[onclick],[id*='bookmark']"));
|
||||||
|
for (const node of nodes) {
|
||||||
|
if (!candidates.includes(node)) candidates.push(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
const scored = [];
|
||||||
|
for (const node of candidates) {
|
||||||
|
const text = normalize(node.textContent || "");
|
||||||
|
const id = normalize(node.id);
|
||||||
|
const href = normalize(node.getAttribute("href"));
|
||||||
|
const onclick = normalize(node.getAttribute("onclick"));
|
||||||
|
const title = normalize(node.getAttribute("title"));
|
||||||
|
const all = [text, id, href, onclick, title].join(" ");
|
||||||
|
|
||||||
|
let score = 0;
|
||||||
|
if (all.includes("bookmark")) score += 12;
|
||||||
|
if (torrentId && all.includes(torrentId)) score += 28;
|
||||||
|
if (onclick.includes("unbookmark")) score += 45;
|
||||||
|
if (onclick.includes("bookmark(") && !onclick.includes("unbookmark")) score += 20;
|
||||||
|
if (id.includes("bookmarklink_torrent")) score += 35;
|
||||||
|
|
||||||
|
const action = onclick.includes("unbookmark")
|
||||||
|
? "remove"
|
||||||
|
: (onclick.includes("bookmark(") ? "add" : "unknown");
|
||||||
|
|
||||||
|
scored.push({
|
||||||
|
score,
|
||||||
|
action,
|
||||||
|
text,
|
||||||
|
id,
|
||||||
|
onclick,
|
||||||
|
snapshot: (node.outerHTML || "").slice(0, 220),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
scored.sort((a, b) => b.score - a.score);
|
||||||
|
const best = scored[0] || null;
|
||||||
|
return { best, total: scored.length };
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
{"torrentId": torrent_id},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _click_bookmark_control(page: Any, torrent_id: str | None) -> dict[str, Any]:
|
||||||
|
return page.evaluate(
|
||||||
|
"""
|
||||||
|
({ torrentId }) => {
|
||||||
|
const normalize = (v) => (v || "").toString().toLowerCase();
|
||||||
|
let target = null;
|
||||||
|
|
||||||
|
if (torrentId) {
|
||||||
|
target = document.getElementById(`bookmarklink_torrent_${torrentId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!target) {
|
||||||
|
const nodes = Array.from(document.querySelectorAll("a,button,[onclick],[id*='bookmark']"));
|
||||||
|
let best = null;
|
||||||
|
for (const node of nodes) {
|
||||||
|
const text = normalize(node.textContent || "");
|
||||||
|
const id = normalize(node.id);
|
||||||
|
const href = normalize(node.getAttribute("href"));
|
||||||
|
const onclick = normalize(node.getAttribute("onclick"));
|
||||||
|
const title = normalize(node.getAttribute("title"));
|
||||||
|
const all = [text, id, href, onclick, title].join(" ");
|
||||||
|
let score = 0;
|
||||||
|
if (all.includes("bookmark")) score += 12;
|
||||||
|
if (torrentId && all.includes(torrentId)) score += 28;
|
||||||
|
if (onclick.includes("unbookmark")) score += 45;
|
||||||
|
if (id.includes("bookmarklink_torrent")) score += 35;
|
||||||
|
if (!best || score > best.score) best = { node, score, all, onclick };
|
||||||
|
}
|
||||||
|
if (best) target = best.node;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!target) return { clicked: false, reason: "no-target" };
|
||||||
|
|
||||||
|
const onclick = normalize(target.getAttribute("onclick"));
|
||||||
|
if (onclick.includes("bookmark(") && !onclick.includes("unbookmark")) {
|
||||||
|
return { clicked: false, reason: "already-removed" };
|
||||||
|
}
|
||||||
|
|
||||||
|
target.click();
|
||||||
|
return { clicked: true, reason: "clicked", snapshot: (target.outerHTML || "").slice(0, 220) };
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
{"torrentId": torrent_id},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_bookmark_with_retry(
|
||||||
|
session: DynamicSession,
|
||||||
|
detail_url: str,
|
||||||
|
torrent_id: str | None,
|
||||||
|
retries: int,
|
||||||
|
backoff_base: float,
|
||||||
|
) -> bool:
|
||||||
|
last_error: Exception | None = None
|
||||||
|
for attempt in range(retries):
|
||||||
|
page = session.context.new_page()
|
||||||
|
try:
|
||||||
|
page.goto(detail_url, wait_until="domcontentloaded", timeout=45_000)
|
||||||
|
state_before = _bookmark_control_state(page, torrent_id)
|
||||||
|
best_before = (state_before or {}).get("best") or {}
|
||||||
|
action_before = best_before.get("action")
|
||||||
|
if action_before == "add":
|
||||||
|
return True
|
||||||
|
if action_before != "remove":
|
||||||
|
raise RuntimeError("Bookmark remove control not detected on detail page.")
|
||||||
|
|
||||||
|
click_result = _click_bookmark_control(page, torrent_id)
|
||||||
|
if not click_result.get("clicked"):
|
||||||
|
if click_result.get("reason") == "already-removed":
|
||||||
|
return True
|
||||||
|
raise RuntimeError(
|
||||||
|
"Bookmark remove action could not be clicked. "
|
||||||
|
f"reason={click_result.get('reason')}"
|
||||||
|
)
|
||||||
|
|
||||||
|
page.wait_for_timeout(2200)
|
||||||
|
page.reload(wait_until="domcontentloaded", timeout=45_000)
|
||||||
|
state_after = _bookmark_control_state(page, torrent_id)
|
||||||
|
best_after = (state_after or {}).get("best") or {}
|
||||||
|
action_after = best_after.get("action")
|
||||||
|
if action_after == "remove":
|
||||||
|
raise RuntimeError("Bookmark remove control still present after click; remove likely failed.")
|
||||||
|
return True
|
||||||
|
except Exception as err: # noqa: BLE001
|
||||||
|
last_error = err
|
||||||
|
if attempt == retries - 1:
|
||||||
|
break
|
||||||
|
time.sleep(backoff_base * (2**attempt))
|
||||||
|
finally:
|
||||||
|
page.close()
|
||||||
|
|
||||||
|
raise RuntimeError(f"Bookmark remove failed for {detail_url}: {last_error}") from last_error
|
||||||
|
|||||||
Reference in New Issue
Block a user