feat: backend orkestrasyonunu ve arac entegrasyonlarini genislet

This commit is contained in:
2026-03-22 04:45:43 +03:00
parent d07bc365f5
commit 5f4c19a18d
25 changed files with 3750 additions and 82 deletions

View File

@@ -0,0 +1,296 @@
import asyncio
import json
import os
from pathlib import Path
from typing import Any
from urllib.parse import urlparse
import httpx
from app.config import Settings
from app.models import RuntimeSettings
from app.tools.base import Tool
class BrowserUseTool(Tool):
name = "browser_use"
description = (
"Use the browser-use agent for higher-level real browser tasks such as navigating sites, "
"extracting lists, comparing items, and completing multi-step browsing workflows."
)
def __init__(self, workspace_root: Path, runtime: RuntimeSettings, settings: Settings, api_key: str) -> None:
self.workspace_root = workspace_root.resolve()
self.runtime = runtime
self.settings = settings
self.api_key = api_key
self.debug_port = 9223 + (abs(hash(str(self.workspace_root))) % 200)
self.chromium_path = (
Path.home()
/ "Library"
/ "Caches"
/ "ms-playwright"
/ "chromium-1194"
/ "chrome-mac"
/ "Chromium.app"
/ "Contents"
/ "MacOS"
/ "Chromium"
)
def parameters_schema(self) -> dict[str, Any]:
return {
"type": "object",
"properties": {
"task": {
"type": "string",
"description": "The high-level browser task to complete.",
},
"start_url": {
"type": "string",
"description": "Optional URL to open first before the agent starts.",
},
"max_steps": {
"type": "integer",
"description": "Maximum browser-use steps before stopping. Defaults to 20.",
},
"keep_alive": {
"type": "boolean",
"description": "Keep the browser open after the run finishes.",
},
"allowed_domains": {
"type": "array",
"items": {"type": "string"},
"description": "Optional list of allowed domains for the run.",
},
},
"required": ["task"],
"additionalProperties": False,
}
async def run(self, payload: dict[str, Any]) -> dict[str, Any]:
task = str(payload.get("task", "")).strip()
if not task:
return {"tool": self.name, "status": "error", "message": "task is required."}
start_url = str(payload.get("start_url", "")).strip()
max_steps = int(payload.get("max_steps", 20))
keep_alive = bool(payload.get("keep_alive", False))
allowed_domains = self._normalize_domains(payload.get("allowed_domains"))
if start_url and not allowed_domains:
host = urlparse(start_url).netloc
if host:
allowed_domains = [host]
llm_error = self._provider_readiness_error()
if llm_error is not None:
return {"tool": self.name, "status": "error", "message": llm_error}
try:
result = await self._run_agent(
task=self._compose_task(task, start_url),
max_steps=max_steps,
keep_alive=keep_alive,
allowed_domains=allowed_domains,
)
except Exception as exc:
return {
"tool": self.name,
"status": "error",
"message": str(exc),
}
return {
"tool": self.name,
"status": "ok" if result["success"] else "error",
**result,
}
async def _run_agent(
self,
task: str,
max_steps: int,
keep_alive: bool,
allowed_domains: list[str],
) -> dict[str, Any]:
from browser_use import Agent, Browser, ChatAnthropic, ChatOpenAI
cdp_url = await self._ensure_persistent_browser()
browser = Browser(
cdp_url=cdp_url,
is_local=True,
keep_alive=True,
allowed_domains=allowed_domains or None,
)
llm = self._build_llm(ChatAnthropic=ChatAnthropic, ChatOpenAI=ChatOpenAI)
agent = Agent(
task=task,
llm=llm,
browser=browser,
use_vision=True,
enable_planning=False,
max_actions_per_step=3,
display_files_in_done_text=False,
)
try:
history = await agent.run(max_steps=max_steps)
final_result = history.final_result() or ""
extracted = history.extracted_content()
errors = [error for error in history.errors() if error]
urls = [url for url in history.urls() if url]
return {
"success": bool(history.is_successful()),
"final_result": final_result,
"extracted_content": extracted[-10:],
"errors": errors[-5:],
"urls": urls[-10:],
"steps": history.number_of_steps(),
"actions": history.action_names()[-20:],
}
finally:
await agent.close()
def _build_llm(self, ChatAnthropic: Any, ChatOpenAI: Any) -> Any:
if self.runtime.model_provider == "zai":
return ChatAnthropic(
model=self.runtime.zai_model,
api_key=self.api_key,
base_url=self.settings.zai_base_url,
timeout=180.0,
)
return ChatOpenAI(
model=self.runtime.local_model,
api_key="lm-studio",
base_url=f"{self.runtime.local_base_url.rstrip('/')}/v1",
timeout=180.0,
)
def _provider_readiness_error(self) -> str | None:
if self.runtime.model_provider == "zai" and not self.api_key.strip():
return "Z.AI API key is not configured."
if self.runtime.model_provider == "local" and not self.runtime.local_base_url.strip():
return "Local model base URL is not configured."
return None
def _compose_task(self, task: str, start_url: str) -> str:
instructions = [
"Work in a real browser on macOS.",
"If the task asks for list extraction, return concise structured text.",
"If a captcha or login wall blocks progress, stop immediately and say that user action is required.",
"Do not click third-party sign-in buttons such as Google, Apple, or GitHub OAuth buttons.",
"Do not open or interact with login popups or OAuth consent windows.",
"If authentication is required, leave the page open in the persistent browser and tell the user to complete login manually, then retry the task.",
"Do not submit irreversible forms or purchases unless the user explicitly asked for it.",
]
if start_url:
instructions.append(f"Start at this URL first: {start_url}")
instructions.append(task)
return "\n".join(instructions)
def _normalize_domains(self, value: object) -> list[str]:
if not isinstance(value, list):
return []
return [str(item).strip() for item in value if str(item).strip()]
def _profile_root(self) -> Path:
profile_root = self.workspace_root / ".wiseclaw" / "browser-use-profile"
profile_root.mkdir(parents=True, exist_ok=True)
(profile_root / "WiseClaw").mkdir(parents=True, exist_ok=True)
return profile_root
async def _ensure_persistent_browser(self) -> str:
state = self._load_browser_state()
if state and self._pid_is_running(int(state.get("pid", 0))):
cdp_url = await self._fetch_cdp_url(int(state["port"]))
if cdp_url:
return cdp_url
await self._launch_persistent_browser()
cdp_url = await self._wait_for_cdp_url()
self._save_browser_state({"pid": self._read_pid_file(), "port": self.debug_port})
return cdp_url
async def _launch_persistent_browser(self) -> None:
executable = str(self.chromium_path if self.chromium_path.exists() else "Chromium")
profile_root = self._profile_root()
args = [
executable,
f"--remote-debugging-port={self.debug_port}",
f"--user-data-dir={profile_root}",
"--profile-directory=WiseClaw",
"--no-first-run",
"--no-default-browser-check",
"--start-maximized",
"about:blank",
]
process = await asyncio.create_subprocess_exec(
*args,
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.DEVNULL,
start_new_session=True,
)
self._write_pid_file(process.pid)
async def _wait_for_cdp_url(self) -> str:
for _ in range(40):
cdp_url = await self._fetch_cdp_url(self.debug_port)
if cdp_url:
return cdp_url
await asyncio.sleep(0.5)
raise RuntimeError("Persistent Chromium browser did not expose a CDP endpoint in time.")
async def _fetch_cdp_url(self, port: int) -> str:
try:
async with httpx.AsyncClient(timeout=2.0) as client:
response = await client.get(f"http://127.0.0.1:{port}/json/version")
response.raise_for_status()
except httpx.HTTPError:
return ""
payload = response.json()
return str(payload.get("webSocketDebuggerUrl", ""))
def _browser_state_path(self) -> Path:
return self.workspace_root / ".wiseclaw" / "browser-use-browser.json"
def _browser_pid_path(self) -> Path:
return self.workspace_root / ".wiseclaw" / "browser-use-browser.pid"
def _load_browser_state(self) -> dict[str, int] | None:
path = self._browser_state_path()
if not path.exists():
return None
try:
return json.loads(path.read_text(encoding="utf-8"))
except json.JSONDecodeError:
return None
def _save_browser_state(self, payload: dict[str, int]) -> None:
path = self._browser_state_path()
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(payload), encoding="utf-8")
def _write_pid_file(self, pid: int) -> None:
path = self._browser_pid_path()
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(str(pid), encoding="utf-8")
def _read_pid_file(self) -> int:
path = self._browser_pid_path()
if not path.exists():
return 0
try:
return int(path.read_text(encoding="utf-8").strip())
except ValueError:
return 0
def _pid_is_running(self, pid: int) -> bool:
if pid <= 0:
return False
try:
os.kill(pid, 0)
except OSError:
return False
return True