web_search.py (3313B)
1 import requests 2 from smolagents import Tool 3 4 class WebSearchTool(Tool): 5 name = "web_search" 6 description = "Performs a web search for a query and returns a string of the top search results formatted as markdown with titles, links, and descriptions." 7 inputs = {"query": {"type": "string", "description": "The search query to perform."}} 8 output_type = "string" 9 def forward(self, query: str) -> str: 10 src_params = { 11 'q': query, 12 'format': 'json' 13 } 14 search_url = 'https://searx.laack.co/search' 15 16 try: 17 response = requests.get(search_url, params=src_params) 18 response.raise_for_status() 19 res_list = response.json()['results'] 20 except (requests.RequestException, KeyError) as e: 21 return f"Search failed: {e}" 22 23 markdown_results = [] 24 for result in res_list: 25 title = result.get('title', 'No title') 26 url = result.get('url', '') 27 content = result.get('content', 'No description') 28 markdown_results.append(f"### [{title}]({url})\n{content}\n") 29 30 return "\n".join(markdown_results) if markdown_results else "No results found." 31 32 import requests 33 from smolagents import Tool 34 from bs4 import BeautifulSoup 35 from pypdf import PdfReader 36 from io import BytesIO 37 38 class WebVisitTool(Tool): 39 name = "visit_webpage" 40 description = "Visits a webpage or PDF at the given URL and returns its text content. Supports HTML pages and PDF documents." 41 inputs = {"url": {"type": "string", "description": "The URL of the webpage or PDF to visit."}} 42 output_type = "string" 43 44 def forward(self, url: str) -> str: 45 headers = { 46 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' 47 } 48 49 try: 50 response = requests.get(url, headers=headers, timeout=15) 51 response.raise_for_status() 52 53 content_type = response.headers.get('Content-Type', '').lower() 54 55 if 'application/pdf' in content_type or url.lower().endswith('.pdf'): 56 return self._parse_pdf(response.content) 57 58 return self._parse_html(response.text) 59 60 except requests.RequestException as e: 61 return f"Failed to fetch URL: {e}" 62 63 def _parse_pdf(self, content: bytes) -> str: 64 try: 65 reader = PdfReader(BytesIO(content)) 66 text_parts = [] 67 for page in reader.pages: 68 text_parts.append(page.extract_text() or "") 69 text = "\n".join(text_parts) 70 return self._truncate(text) 71 except Exception as e: 72 return f"Failed to parse PDF: {e}" 73 74 def _parse_html(self, html: str) -> str: 75 soup = BeautifulSoup(html, 'html.parser') 76 for element in soup(['script', 'style', 'nav', 'footer', 'header']): 77 element.decompose() 78 text = soup.get_text(separator='\n', strip=True) 79 return self._truncate(text) 80 81 def _truncate(self, text: str, max_chars: int = 15000) -> str: 82 if len(text) > max_chars: 83 return text[:max_chars] + "\n\n[Content truncated...]" 84 return text if text else "No readable content found."