feat: backend orkestrasyonunu ve arac entegrasyonlarini genislet
This commit is contained in:
296
backend/app/tools/browser_use.py
Normal file
296
backend/app/tools/browser_use.py
Normal file
@@ -0,0 +1,296 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import httpx
|
||||
|
||||
from app.config import Settings
|
||||
from app.models import RuntimeSettings
|
||||
from app.tools.base import Tool
|
||||
|
||||
|
||||
class BrowserUseTool(Tool):
|
||||
name = "browser_use"
|
||||
description = (
|
||||
"Use the browser-use agent for higher-level real browser tasks such as navigating sites, "
|
||||
"extracting lists, comparing items, and completing multi-step browsing workflows."
|
||||
)
|
||||
|
||||
def __init__(self, workspace_root: Path, runtime: RuntimeSettings, settings: Settings, api_key: str) -> None:
|
||||
self.workspace_root = workspace_root.resolve()
|
||||
self.runtime = runtime
|
||||
self.settings = settings
|
||||
self.api_key = api_key
|
||||
self.debug_port = 9223 + (abs(hash(str(self.workspace_root))) % 200)
|
||||
self.chromium_path = (
|
||||
Path.home()
|
||||
/ "Library"
|
||||
/ "Caches"
|
||||
/ "ms-playwright"
|
||||
/ "chromium-1194"
|
||||
/ "chrome-mac"
|
||||
/ "Chromium.app"
|
||||
/ "Contents"
|
||||
/ "MacOS"
|
||||
/ "Chromium"
|
||||
)
|
||||
|
||||
def parameters_schema(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task": {
|
||||
"type": "string",
|
||||
"description": "The high-level browser task to complete.",
|
||||
},
|
||||
"start_url": {
|
||||
"type": "string",
|
||||
"description": "Optional URL to open first before the agent starts.",
|
||||
},
|
||||
"max_steps": {
|
||||
"type": "integer",
|
||||
"description": "Maximum browser-use steps before stopping. Defaults to 20.",
|
||||
},
|
||||
"keep_alive": {
|
||||
"type": "boolean",
|
||||
"description": "Keep the browser open after the run finishes.",
|
||||
},
|
||||
"allowed_domains": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "Optional list of allowed domains for the run.",
|
||||
},
|
||||
},
|
||||
"required": ["task"],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
async def run(self, payload: dict[str, Any]) -> dict[str, Any]:
|
||||
task = str(payload.get("task", "")).strip()
|
||||
if not task:
|
||||
return {"tool": self.name, "status": "error", "message": "task is required."}
|
||||
|
||||
start_url = str(payload.get("start_url", "")).strip()
|
||||
max_steps = int(payload.get("max_steps", 20))
|
||||
keep_alive = bool(payload.get("keep_alive", False))
|
||||
allowed_domains = self._normalize_domains(payload.get("allowed_domains"))
|
||||
|
||||
if start_url and not allowed_domains:
|
||||
host = urlparse(start_url).netloc
|
||||
if host:
|
||||
allowed_domains = [host]
|
||||
|
||||
llm_error = self._provider_readiness_error()
|
||||
if llm_error is not None:
|
||||
return {"tool": self.name, "status": "error", "message": llm_error}
|
||||
|
||||
try:
|
||||
result = await self._run_agent(
|
||||
task=self._compose_task(task, start_url),
|
||||
max_steps=max_steps,
|
||||
keep_alive=keep_alive,
|
||||
allowed_domains=allowed_domains,
|
||||
)
|
||||
except Exception as exc:
|
||||
return {
|
||||
"tool": self.name,
|
||||
"status": "error",
|
||||
"message": str(exc),
|
||||
}
|
||||
|
||||
return {
|
||||
"tool": self.name,
|
||||
"status": "ok" if result["success"] else "error",
|
||||
**result,
|
||||
}
|
||||
|
||||
async def _run_agent(
|
||||
self,
|
||||
task: str,
|
||||
max_steps: int,
|
||||
keep_alive: bool,
|
||||
allowed_domains: list[str],
|
||||
) -> dict[str, Any]:
|
||||
from browser_use import Agent, Browser, ChatAnthropic, ChatOpenAI
|
||||
|
||||
cdp_url = await self._ensure_persistent_browser()
|
||||
browser = Browser(
|
||||
cdp_url=cdp_url,
|
||||
is_local=True,
|
||||
keep_alive=True,
|
||||
allowed_domains=allowed_domains or None,
|
||||
)
|
||||
llm = self._build_llm(ChatAnthropic=ChatAnthropic, ChatOpenAI=ChatOpenAI)
|
||||
agent = Agent(
|
||||
task=task,
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
use_vision=True,
|
||||
enable_planning=False,
|
||||
max_actions_per_step=3,
|
||||
display_files_in_done_text=False,
|
||||
)
|
||||
|
||||
try:
|
||||
history = await agent.run(max_steps=max_steps)
|
||||
final_result = history.final_result() or ""
|
||||
extracted = history.extracted_content()
|
||||
errors = [error for error in history.errors() if error]
|
||||
urls = [url for url in history.urls() if url]
|
||||
return {
|
||||
"success": bool(history.is_successful()),
|
||||
"final_result": final_result,
|
||||
"extracted_content": extracted[-10:],
|
||||
"errors": errors[-5:],
|
||||
"urls": urls[-10:],
|
||||
"steps": history.number_of_steps(),
|
||||
"actions": history.action_names()[-20:],
|
||||
}
|
||||
finally:
|
||||
await agent.close()
|
||||
|
||||
def _build_llm(self, ChatAnthropic: Any, ChatOpenAI: Any) -> Any:
|
||||
if self.runtime.model_provider == "zai":
|
||||
return ChatAnthropic(
|
||||
model=self.runtime.zai_model,
|
||||
api_key=self.api_key,
|
||||
base_url=self.settings.zai_base_url,
|
||||
timeout=180.0,
|
||||
)
|
||||
|
||||
return ChatOpenAI(
|
||||
model=self.runtime.local_model,
|
||||
api_key="lm-studio",
|
||||
base_url=f"{self.runtime.local_base_url.rstrip('/')}/v1",
|
||||
timeout=180.0,
|
||||
)
|
||||
|
||||
def _provider_readiness_error(self) -> str | None:
|
||||
if self.runtime.model_provider == "zai" and not self.api_key.strip():
|
||||
return "Z.AI API key is not configured."
|
||||
if self.runtime.model_provider == "local" and not self.runtime.local_base_url.strip():
|
||||
return "Local model base URL is not configured."
|
||||
return None
|
||||
|
||||
def _compose_task(self, task: str, start_url: str) -> str:
|
||||
instructions = [
|
||||
"Work in a real browser on macOS.",
|
||||
"If the task asks for list extraction, return concise structured text.",
|
||||
"If a captcha or login wall blocks progress, stop immediately and say that user action is required.",
|
||||
"Do not click third-party sign-in buttons such as Google, Apple, or GitHub OAuth buttons.",
|
||||
"Do not open or interact with login popups or OAuth consent windows.",
|
||||
"If authentication is required, leave the page open in the persistent browser and tell the user to complete login manually, then retry the task.",
|
||||
"Do not submit irreversible forms or purchases unless the user explicitly asked for it.",
|
||||
]
|
||||
if start_url:
|
||||
instructions.append(f"Start at this URL first: {start_url}")
|
||||
instructions.append(task)
|
||||
return "\n".join(instructions)
|
||||
|
||||
def _normalize_domains(self, value: object) -> list[str]:
|
||||
if not isinstance(value, list):
|
||||
return []
|
||||
return [str(item).strip() for item in value if str(item).strip()]
|
||||
|
||||
def _profile_root(self) -> Path:
|
||||
profile_root = self.workspace_root / ".wiseclaw" / "browser-use-profile"
|
||||
profile_root.mkdir(parents=True, exist_ok=True)
|
||||
(profile_root / "WiseClaw").mkdir(parents=True, exist_ok=True)
|
||||
return profile_root
|
||||
|
||||
async def _ensure_persistent_browser(self) -> str:
|
||||
state = self._load_browser_state()
|
||||
if state and self._pid_is_running(int(state.get("pid", 0))):
|
||||
cdp_url = await self._fetch_cdp_url(int(state["port"]))
|
||||
if cdp_url:
|
||||
return cdp_url
|
||||
|
||||
await self._launch_persistent_browser()
|
||||
cdp_url = await self._wait_for_cdp_url()
|
||||
self._save_browser_state({"pid": self._read_pid_file(), "port": self.debug_port})
|
||||
return cdp_url
|
||||
|
||||
async def _launch_persistent_browser(self) -> None:
|
||||
executable = str(self.chromium_path if self.chromium_path.exists() else "Chromium")
|
||||
profile_root = self._profile_root()
|
||||
args = [
|
||||
executable,
|
||||
f"--remote-debugging-port={self.debug_port}",
|
||||
f"--user-data-dir={profile_root}",
|
||||
"--profile-directory=WiseClaw",
|
||||
"--no-first-run",
|
||||
"--no-default-browser-check",
|
||||
"--start-maximized",
|
||||
"about:blank",
|
||||
]
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
*args,
|
||||
stdout=asyncio.subprocess.DEVNULL,
|
||||
stderr=asyncio.subprocess.DEVNULL,
|
||||
start_new_session=True,
|
||||
)
|
||||
self._write_pid_file(process.pid)
|
||||
|
||||
async def _wait_for_cdp_url(self) -> str:
|
||||
for _ in range(40):
|
||||
cdp_url = await self._fetch_cdp_url(self.debug_port)
|
||||
if cdp_url:
|
||||
return cdp_url
|
||||
await asyncio.sleep(0.5)
|
||||
raise RuntimeError("Persistent Chromium browser did not expose a CDP endpoint in time.")
|
||||
|
||||
async def _fetch_cdp_url(self, port: int) -> str:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=2.0) as client:
|
||||
response = await client.get(f"http://127.0.0.1:{port}/json/version")
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPError:
|
||||
return ""
|
||||
payload = response.json()
|
||||
return str(payload.get("webSocketDebuggerUrl", ""))
|
||||
|
||||
def _browser_state_path(self) -> Path:
|
||||
return self.workspace_root / ".wiseclaw" / "browser-use-browser.json"
|
||||
|
||||
def _browser_pid_path(self) -> Path:
|
||||
return self.workspace_root / ".wiseclaw" / "browser-use-browser.pid"
|
||||
|
||||
def _load_browser_state(self) -> dict[str, int] | None:
|
||||
path = self._browser_state_path()
|
||||
if not path.exists():
|
||||
return None
|
||||
try:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
def _save_browser_state(self, payload: dict[str, int]) -> None:
|
||||
path = self._browser_state_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
|
||||
def _write_pid_file(self, pid: int) -> None:
|
||||
path = self._browser_pid_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(str(pid), encoding="utf-8")
|
||||
|
||||
def _read_pid_file(self) -> int:
|
||||
path = self._browser_pid_path()
|
||||
if not path.exists():
|
||||
return 0
|
||||
try:
|
||||
return int(path.read_text(encoding="utf-8").strip())
|
||||
except ValueError:
|
||||
return 0
|
||||
|
||||
def _pid_is_running(self, pid: int) -> bool:
|
||||
if pid <= 0:
|
||||
return False
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
except OSError:
|
||||
return False
|
||||
return True
|
||||
Reference in New Issue
Block a user