feat: backend orkestrasyonunu ve arac entegrasyonlarini genislet

2026-03-22 04:45:43 +03:00
parent d07bc365f5
commit 5f4c19a18d
25 changed files with 3750 additions and 82 deletions
--- a/backend/app/tools/browser_use.py
+++ b/backend/app/tools/browser_use.py
@@ -0,0 +1,296 @@
+import asyncio
+import json
+import os
+from pathlib import Path
+from typing import Any
+from urllib.parse import urlparse
+
+import httpx
+
+from app.config import Settings
+from app.models import RuntimeSettings
+from app.tools.base import Tool
+
+
+class BrowserUseTool(Tool):
+    name = "browser_use"
+    description = (
+        "Use the browser-use agent for higher-level real browser tasks such as navigating sites, "
+        "extracting lists, comparing items, and completing multi-step browsing workflows."
+    )
+
+    def __init__(self, workspace_root: Path, runtime: RuntimeSettings, settings: Settings, api_key: str) -> None:
+        self.workspace_root = workspace_root.resolve()
+        self.runtime = runtime
+        self.settings = settings
+        self.api_key = api_key
+        self.debug_port = 9223 + (abs(hash(str(self.workspace_root))) % 200)
+        self.chromium_path = (
+            Path.home()
+            / "Library"
+            / "Caches"
+            / "ms-playwright"
+            / "chromium-1194"
+            / "chrome-mac"
+            / "Chromium.app"
+            / "Contents"
+            / "MacOS"
+            / "Chromium"
+        )
+
+    def parameters_schema(self) -> dict[str, Any]:
+        return {
+            "type": "object",
+            "properties": {
+                "task": {
+                    "type": "string",
+                    "description": "The high-level browser task to complete.",
+                },
+                "start_url": {
+                    "type": "string",
+                    "description": "Optional URL to open first before the agent starts.",
+                },
+                "max_steps": {
+                    "type": "integer",
+                    "description": "Maximum browser-use steps before stopping. Defaults to 20.",
+                },
+                "keep_alive": {
+                    "type": "boolean",
+                    "description": "Keep the browser open after the run finishes.",
+                },
+                "allowed_domains": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "Optional list of allowed domains for the run.",
+                },
+            },
+            "required": ["task"],
+            "additionalProperties": False,
+        }
+
+    async def run(self, payload: dict[str, Any]) -> dict[str, Any]:
+        task = str(payload.get("task", "")).strip()
+        if not task:
+            return {"tool": self.name, "status": "error", "message": "task is required."}
+
+        start_url = str(payload.get("start_url", "")).strip()
+        max_steps = int(payload.get("max_steps", 20))
+        keep_alive = bool(payload.get("keep_alive", False))
+        allowed_domains = self._normalize_domains(payload.get("allowed_domains"))
+
+        if start_url and not allowed_domains:
+            host = urlparse(start_url).netloc
+            if host:
+                allowed_domains = [host]
+
+        llm_error = self._provider_readiness_error()
+        if llm_error is not None:
+            return {"tool": self.name, "status": "error", "message": llm_error}
+
+        try:
+            result = await self._run_agent(
+                task=self._compose_task(task, start_url),
+                max_steps=max_steps,
+                keep_alive=keep_alive,
+                allowed_domains=allowed_domains,
+            )
+        except Exception as exc:
+            return {
+                "tool": self.name,
+                "status": "error",
+                "message": str(exc),
+            }
+
+        return {
+            "tool": self.name,
+            "status": "ok" if result["success"] else "error",
+            **result,
+        }
+
+    async def _run_agent(
+        self,
+        task: str,
+        max_steps: int,
+        keep_alive: bool,
+        allowed_domains: list[str],
+    ) -> dict[str, Any]:
+        from browser_use import Agent, Browser, ChatAnthropic, ChatOpenAI
+
+        cdp_url = await self._ensure_persistent_browser()
+        browser = Browser(
+            cdp_url=cdp_url,
+            is_local=True,
+            keep_alive=True,
+            allowed_domains=allowed_domains or None,
+        )
+        llm = self._build_llm(ChatAnthropic=ChatAnthropic, ChatOpenAI=ChatOpenAI)
+        agent = Agent(
+            task=task,
+            llm=llm,
+            browser=browser,
+            use_vision=True,
+            enable_planning=False,
+            max_actions_per_step=3,
+            display_files_in_done_text=False,
+        )
+
+        try:
+            history = await agent.run(max_steps=max_steps)
+            final_result = history.final_result() or ""
+            extracted = history.extracted_content()
+            errors = [error for error in history.errors() if error]
+            urls = [url for url in history.urls() if url]
+            return {
+                "success": bool(history.is_successful()),
+                "final_result": final_result,
+                "extracted_content": extracted[-10:],
+                "errors": errors[-5:],
+                "urls": urls[-10:],
+                "steps": history.number_of_steps(),
+                "actions": history.action_names()[-20:],
+            }
+        finally:
+            await agent.close()
+
+    def _build_llm(self, ChatAnthropic: Any, ChatOpenAI: Any) -> Any:
+        if self.runtime.model_provider == "zai":
+            return ChatAnthropic(
+                model=self.runtime.zai_model,
+                api_key=self.api_key,
+                base_url=self.settings.zai_base_url,
+                timeout=180.0,
+            )
+
+        return ChatOpenAI(
+            model=self.runtime.local_model,
+            api_key="lm-studio",
+            base_url=f"{self.runtime.local_base_url.rstrip('/')}/v1",
+            timeout=180.0,
+        )
+
+    def _provider_readiness_error(self) -> str | None:
+        if self.runtime.model_provider == "zai" and not self.api_key.strip():
+            return "Z.AI API key is not configured."
+        if self.runtime.model_provider == "local" and not self.runtime.local_base_url.strip():
+            return "Local model base URL is not configured."
+        return None
+
+    def _compose_task(self, task: str, start_url: str) -> str:
+        instructions = [
+            "Work in a real browser on macOS.",
+            "If the task asks for list extraction, return concise structured text.",
+            "If a captcha or login wall blocks progress, stop immediately and say that user action is required.",
+            "Do not click third-party sign-in buttons such as Google, Apple, or GitHub OAuth buttons.",
+            "Do not open or interact with login popups or OAuth consent windows.",
+            "If authentication is required, leave the page open in the persistent browser and tell the user to complete login manually, then retry the task.",
+            "Do not submit irreversible forms or purchases unless the user explicitly asked for it.",
+        ]
+        if start_url:
+            instructions.append(f"Start at this URL first: {start_url}")
+        instructions.append(task)
+        return "\n".join(instructions)
+
+    def _normalize_domains(self, value: object) -> list[str]:
+        if not isinstance(value, list):
+            return []
+        return [str(item).strip() for item in value if str(item).strip()]
+
+    def _profile_root(self) -> Path:
+        profile_root = self.workspace_root / ".wiseclaw" / "browser-use-profile"
+        profile_root.mkdir(parents=True, exist_ok=True)
+        (profile_root / "WiseClaw").mkdir(parents=True, exist_ok=True)
+        return profile_root
+
+    async def _ensure_persistent_browser(self) -> str:
+        state = self._load_browser_state()
+        if state and self._pid_is_running(int(state.get("pid", 0))):
+            cdp_url = await self._fetch_cdp_url(int(state["port"]))
+            if cdp_url:
+                return cdp_url
+
+        await self._launch_persistent_browser()
+        cdp_url = await self._wait_for_cdp_url()
+        self._save_browser_state({"pid": self._read_pid_file(), "port": self.debug_port})
+        return cdp_url
+
+    async def _launch_persistent_browser(self) -> None:
+        executable = str(self.chromium_path if self.chromium_path.exists() else "Chromium")
+        profile_root = self._profile_root()
+        args = [
+            executable,
+            f"--remote-debugging-port={self.debug_port}",
+            f"--user-data-dir={profile_root}",
+            "--profile-directory=WiseClaw",
+            "--no-first-run",
+            "--no-default-browser-check",
+            "--start-maximized",
+            "about:blank",
+        ]
+        process = await asyncio.create_subprocess_exec(
+            *args,
+            stdout=asyncio.subprocess.DEVNULL,
+            stderr=asyncio.subprocess.DEVNULL,
+            start_new_session=True,
+        )
+        self._write_pid_file(process.pid)
+
+    async def _wait_for_cdp_url(self) -> str:
+        for _ in range(40):
+            cdp_url = await self._fetch_cdp_url(self.debug_port)
+            if cdp_url:
+                return cdp_url
+            await asyncio.sleep(0.5)
+        raise RuntimeError("Persistent Chromium browser did not expose a CDP endpoint in time.")
+
+    async def _fetch_cdp_url(self, port: int) -> str:
+        try:
+            async with httpx.AsyncClient(timeout=2.0) as client:
+                response = await client.get(f"http://127.0.0.1:{port}/json/version")
+                response.raise_for_status()
+        except httpx.HTTPError:
+            return ""
+        payload = response.json()
+        return str(payload.get("webSocketDebuggerUrl", ""))
+
+    def _browser_state_path(self) -> Path:
+        return self.workspace_root / ".wiseclaw" / "browser-use-browser.json"
+
+    def _browser_pid_path(self) -> Path:
+        return self.workspace_root / ".wiseclaw" / "browser-use-browser.pid"
+
+    def _load_browser_state(self) -> dict[str, int] | None:
+        path = self._browser_state_path()
+        if not path.exists():
+            return None
+        try:
+            return json.loads(path.read_text(encoding="utf-8"))
+        except json.JSONDecodeError:
+            return None
+
+    def _save_browser_state(self, payload: dict[str, int]) -> None:
+        path = self._browser_state_path()
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(json.dumps(payload), encoding="utf-8")
+
+    def _write_pid_file(self, pid: int) -> None:
+        path = self._browser_pid_path()
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(str(pid), encoding="utf-8")
+
+    def _read_pid_file(self) -> int:
+        path = self._browser_pid_path()
+        if not path.exists():
+            return 0
+        try:
+            return int(path.read_text(encoding="utf-8").strip())
+        except ValueError:
+            return 0
+
+    def _pid_is_running(self, pid: int) -> bool:
+        if pid <= 0:
+            return False
+        try:
+            os.kill(pid, 0)
+        except OSError:
+            return False
+        return True