import re from typing import Any import httpx from app.tools.base import Tool class WebFetchTool(Tool): name = "web_fetch" description = "Fetch a webpage and return simplified content." def parameters_schema(self) -> dict[str, Any]: return { "type": "object", "properties": { "url": { "type": "string", "description": "The http or https URL to fetch.", } }, "required": ["url"], "additionalProperties": False, } async def run(self, payload: dict[str, Any]) -> dict[str, Any]: url = str(payload.get("url", "")).strip() if not url.startswith(("http://", "https://")): return { "tool": self.name, "status": "error", "url": url, "message": "Only http and https URLs are allowed.", } try: async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client: response = await client.get(url) response.raise_for_status() except httpx.HTTPError as exc: return { "tool": self.name, "status": "error", "url": url, "message": str(exc), } text = self._simplify_content(response.text) return { "tool": self.name, "status": "ok", "url": url, "content_type": response.headers.get("content-type", ""), "content": text[:12000], "truncated": len(text) > 12000, } def _simplify_content(self, content: str) -> str: text = re.sub(r"(?is).*?", " ", content) text = re.sub(r"(?is).*?", " ", text) text = re.sub(r"(?s)<[^>]+>", " ", text) text = re.sub(r" ", " ", text) text = re.sub(r"&", "&", text) text = re.sub(r"\s+", " ", text) return text.strip()