package tools import ( "context" "encoding/json" "fmt" "io" "net/http" "net/url" "regexp" "strings" "time" ) const ( userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ) type SearchProvider interface { Search(ctx context.Context, query string, count int) (string, error) } type BraveSearchProvider struct { apiKey string } func (p *BraveSearchProvider) Search(ctx context.Context, query string, count int) (string, error) { searchURL := fmt.Sprintf("https://api.search.brave.com/res/v1/web/search?q=%s&count=%d", url.QueryEscape(query), count) req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil) if err != nil { return "", fmt.Errorf("failed to create request: %w", err) } req.Header.Set("Accept", "application/json") req.Header.Set("X-Subscription-Token", p.apiKey) client := &http.Client{Timeout: 10 * time.Second} resp, err := client.Do(req) if err != nil { return "", fmt.Errorf("request failed: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { return "", fmt.Errorf("failed to read response: %w", err) } var searchResp struct { Web struct { Results []struct { Title string `json:"title"` URL string `json:"url"` Description string `json:"description"` } `json:"results"` } `json:"web"` } if err := json.Unmarshal(body, &searchResp); err != nil { // Log error body for debugging fmt.Printf("Brave API Error Body: %s\n", string(body)) return "", fmt.Errorf("failed to parse response: %w", err) } results := searchResp.Web.Results if len(results) == 0 { return fmt.Sprintf("No results for: %s", query), nil } var lines []string lines = append(lines, fmt.Sprintf("Results for: %s", query)) for i, item := range results { if i >= count { break } lines = append(lines, fmt.Sprintf("%d. %s\n %s", i+1, item.Title, item.URL)) if item.Description != "" { lines = append(lines, fmt.Sprintf(" %s", item.Description)) } } return strings.Join(lines, "\n"), nil } type DuckDuckGoSearchProvider struct{} func (p *DuckDuckGoSearchProvider) Search(ctx context.Context, query string, count int) (string, error) { searchURL := fmt.Sprintf("https://html.duckduckgo.com/html/?q=%s", url.QueryEscape(query)) req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil) if err != nil { return "", fmt.Errorf("failed to create request: %w", err) } req.Header.Set("User-Agent", userAgent) client := &http.Client{Timeout: 10 * time.Second} resp, err := client.Do(req) if err != nil { return "", fmt.Errorf("request failed: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { return "", fmt.Errorf("failed to read response: %w", err) } return p.extractResults(string(body), count, query) } func (p *DuckDuckGoSearchProvider) extractResults(html string, count int, query string) (string, error) { // Simple regex based extraction for DDG HTML // Strategy: Find all result containers or key anchors directly // Try finding the result links directly first, as they are the most critical // Pattern: Title // The previous regex was a bit strict. Let's make it more flexible for attributes order/content reLink := regexp.MustCompile(`]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)`) matches := reLink.FindAllStringSubmatch(html, count+5) if len(matches) == 0 { return fmt.Sprintf("No results found or extraction failed. Query: %s", query), nil } var lines []string lines = append(lines, fmt.Sprintf("Results for: %s (via DuckDuckGo)", query)) // Pre-compile snippet regex to run inside the loop // We'll search for snippets relative to the link position or just globally if needed // But simple global search for snippets might mismatch order. // Since we only have the raw HTML string, let's just extract snippets globally and assume order matches (risky but simple for regex) // Or better: Let's assume the snippet follows the link in the HTML // A better regex approach: iterate through text and find matches in order // But for now, let's grab all snippets too reSnippet := regexp.MustCompile(`([\s\S]*?)`) snippetMatches := reSnippet.FindAllStringSubmatch(html, count+5) maxItems := min(len(matches), count) for i := 0; i < maxItems; i++ { urlStr := matches[i][1] title := stripTags(matches[i][2]) title = strings.TrimSpace(title) // URL decoding if needed if strings.Contains(urlStr, "uddg=") { if u, err := url.QueryUnescape(urlStr); err == nil { idx := strings.Index(u, "uddg=") if idx != -1 { urlStr = u[idx+5:] } } } lines = append(lines, fmt.Sprintf("%d. %s\n %s", i+1, title, urlStr)) // Attempt to attach snippet if available and index aligns if i < len(snippetMatches) { snippet := stripTags(snippetMatches[i][1]) snippet = strings.TrimSpace(snippet) if snippet != "" { lines = append(lines, fmt.Sprintf(" %s", snippet)) } } } return strings.Join(lines, "\n"), nil } func min(a, b int) int { if a < b { return a } return b } func stripTags(content string) string { re := regexp.MustCompile(`<[^>]+>`) return re.ReplaceAllString(content, "") } type WebSearchTool struct { provider SearchProvider maxResults int } func NewWebSearchTool(apiKey string, maxResults int) *WebSearchTool { if maxResults <= 0 || maxResults > 10 { maxResults = 5 } var provider SearchProvider if apiKey != "" { provider = &BraveSearchProvider{apiKey: apiKey} } else { provider = &DuckDuckGoSearchProvider{} } return &WebSearchTool{ provider: provider, maxResults: maxResults, } } func (t *WebSearchTool) Name() string { return "web_search" } func (t *WebSearchTool) Description() string { return "Search the web for current information. Returns titles, URLs, and snippets from search results." } func (t *WebSearchTool) Parameters() map[string]interface{} { return map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "query": map[string]interface{}{ "type": "string", "description": "Search query", }, "count": map[string]interface{}{ "type": "integer", "description": "Number of results (1-10)", "minimum": 1.0, "maximum": 10.0, }, }, "required": []string{"query"}, } } func (t *WebSearchTool) Execute(ctx context.Context, args map[string]interface{}) (string, error) { query, ok := args["query"].(string) if !ok { return "", fmt.Errorf("query is required") } count := t.maxResults if c, ok := args["count"].(float64); ok { if int(c) > 0 && int(c) <= 10 { count = int(c) } } return t.provider.Search(ctx, query, count) } type WebFetchTool struct { maxChars int } func NewWebFetchTool(maxChars int) *WebFetchTool { if maxChars <= 0 { maxChars = 50000 } return &WebFetchTool{ maxChars: maxChars, } } func (t *WebFetchTool) Name() string { return "web_fetch" } func (t *WebFetchTool) Description() string { return "Fetch a URL and extract readable content (HTML to text). Use this to get weather info, news, articles, or any web content." } func (t *WebFetchTool) Parameters() map[string]interface{} { return map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ "url": map[string]interface{}{ "type": "string", "description": "URL to fetch", }, "maxChars": map[string]interface{}{ "type": "integer", "description": "Maximum characters to extract", "minimum": 100.0, }, }, "required": []string{"url"}, } } func (t *WebFetchTool) Execute(ctx context.Context, args map[string]interface{}) (string, error) { urlStr, ok := args["url"].(string) if !ok { return "", fmt.Errorf("url is required") } parsedURL, err := url.Parse(urlStr) if err != nil { return "", fmt.Errorf("invalid URL: %w", err) } if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" { return "", fmt.Errorf("only http/https URLs are allowed") } if parsedURL.Host == "" { return "", fmt.Errorf("missing domain in URL") } maxChars := t.maxChars if mc, ok := args["maxChars"].(float64); ok { if int(mc) > 100 { maxChars = int(mc) } } req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil) if err != nil { return "", fmt.Errorf("failed to create request: %w", err) } req.Header.Set("User-Agent", userAgent) client := &http.Client{ Timeout: 60 * time.Second, Transport: &http.Transport{ MaxIdleConns: 10, IdleConnTimeout: 30 * time.Second, DisableCompression: false, TLSHandshakeTimeout: 15 * time.Second, }, CheckRedirect: func(req *http.Request, via []*http.Request) error { if len(via) >= 5 { return fmt.Errorf("stopped after 5 redirects") } return nil }, } resp, err := client.Do(req) if err != nil { return "", fmt.Errorf("request failed: %w", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { return "", fmt.Errorf("failed to read response: %w", err) } contentType := resp.Header.Get("Content-Type") var text, extractor string if strings.Contains(contentType, "application/json") { var jsonData interface{} if err := json.Unmarshal(body, &jsonData); err == nil { formatted, _ := json.MarshalIndent(jsonData, "", " ") text = string(formatted) extractor = "json" } else { text = string(body) extractor = "raw" } } else if strings.Contains(contentType, "text/html") || len(body) > 0 && (strings.HasPrefix(string(body), " maxChars if truncated { text = text[:maxChars] } result := map[string]interface{}{ "url": urlStr, "status": resp.StatusCode, "extractor": extractor, "truncated": truncated, "length": len(text), "text": text, } resultJSON, _ := json.MarshalIndent(result, "", " ") return string(resultJSON), nil } func (t *WebFetchTool) extractText(htmlContent string) string { re := regexp.MustCompile(``) result := re.ReplaceAllLiteralString(htmlContent, "") re = regexp.MustCompile(``) result = re.ReplaceAllLiteralString(result, "") re = regexp.MustCompile(`<[^>]+>`) result = re.ReplaceAllLiteralString(result, "") result = strings.TrimSpace(result) re = regexp.MustCompile(`\s+`) result = re.ReplaceAllLiteralString(result, " ") lines := strings.Split(result, "\n") var cleanLines []string for _, line := range lines { line = strings.TrimSpace(line) if line != "" { cleanLines = append(cleanLines, line) } } return strings.Join(cleanLines, "\n") }