feat: Add DuckDuckGo search fallback.
- Refactor web tool to use Provider pattern (Brave/DuckDuckGo) - Add robust HTML scraping for keyless DuckDuckGo search - Update README with search provider guidelines
This commit is contained in:
24
README.md
24
README.md
@@ -52,7 +52,7 @@
|
|||||||
🤖 **AI-Bootstrapped**: Autonomous Go-native implementation — 95% Agent-generated core with human-in-the-loop refinement.
|
🤖 **AI-Bootstrapped**: Autonomous Go-native implementation — 95% Agent-generated core with human-in-the-loop refinement.
|
||||||
|
|
||||||
| | OpenClaw | NanoBot | **PicoClaw** |
|
| | OpenClaw | NanoBot | **PicoClaw** |
|
||||||
| --- | --- | --- |--- |
|
| ----------------------------- | ------------- | ------------------------ | ----------------------------------------- |
|
||||||
| **Language** | TypeScript | Python | **Go** |
|
| **Language** | TypeScript | Python | **Go** |
|
||||||
| **RAM** | >1GB | >100MB | **< 10MB** |
|
| **RAM** | >1GB | >100MB | **< 10MB** |
|
||||||
| **Startup**</br>(0.8GHz core) | >500s | >30s | **<1s** |
|
| **Startup**</br>(0.8GHz core) | >500s | >30s | **<1s** |
|
||||||
@@ -123,7 +123,7 @@ make install
|
|||||||
> [!TIP]
|
> [!TIP]
|
||||||
> Set your API key in `~/.picoclaw/config.json`.
|
> Set your API key in `~/.picoclaw/config.json`.
|
||||||
> Get API keys: [OpenRouter](https://openrouter.ai/keys) (LLM) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) (LLM)
|
> Get API keys: [OpenRouter](https://openrouter.ai/keys) (LLM) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) (LLM)
|
||||||
> Web search is **optional** - get free [Brave Search API](https://brave.com/search/api) (2000 free queries/month)
|
> Web search is **optional** - get free [Brave Search API](https://brave.com/search/api) (2000 free queries/month) or use built-in auto fallback.
|
||||||
|
|
||||||
**1. Initialize**
|
**1. Initialize**
|
||||||
|
|
||||||
@@ -183,7 +183,7 @@ That's it! You have a working AI assistant in 2 minutes.
|
|||||||
Talk to your picoclaw through Telegram, Discord, or DingTalk
|
Talk to your picoclaw through Telegram, Discord, or DingTalk
|
||||||
|
|
||||||
| Channel | Setup |
|
| Channel | Setup |
|
||||||
|---------|-------|
|
| ------------ | -------------------------- |
|
||||||
| **Telegram** | Easy (just a token) |
|
| **Telegram** | Easy (just a token) |
|
||||||
| **Discord** | Easy (bot token + intents) |
|
| **Discord** | Easy (bot token + intents) |
|
||||||
| **QQ** | Easy (AppID + AppSecret) |
|
| **QQ** | Easy (AppID + AppSecret) |
|
||||||
@@ -343,8 +343,6 @@ Connect Picoclaw to the Agent Social Network simply by sending a single message
|
|||||||
|
|
||||||
**Read `https://clawdchat.ai/skill.md` and follow the instructions to join [ClawdChat.ai](https://clawdchat.ai)**
|
**Read `https://clawdchat.ai/skill.md` and follow the instructions to join [ClawdChat.ai](https://clawdchat.ai)**
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## ⚙️ Configuration
|
## ⚙️ Configuration
|
||||||
|
|
||||||
Config file: `~/.picoclaw/config.json`
|
Config file: `~/.picoclaw/config.json`
|
||||||
@@ -372,7 +370,7 @@ PicoClaw stores data in your configured workspace (default: `~/.picoclaw/workspa
|
|||||||
> Groq provides free voice transcription via Whisper. If configured, Telegram voice messages will be automatically transcribed.
|
> Groq provides free voice transcription via Whisper. If configured, Telegram voice messages will be automatically transcribed.
|
||||||
|
|
||||||
| Provider | Purpose | Get API Key |
|
| Provider | Purpose | Get API Key |
|
||||||
|----------|---------|-------------|
|
| -------------------------- | --------------------------------------- | ------------------------------------------------------ |
|
||||||
| `gemini` | LLM (Gemini direct) | [aistudio.google.com](https://aistudio.google.com) |
|
| `gemini` | LLM (Gemini direct) | [aistudio.google.com](https://aistudio.google.com) |
|
||||||
| `zhipu` | LLM (Zhipu direct) | [bigmodel.cn](bigmodel.cn) |
|
| `zhipu` | LLM (Zhipu direct) | [bigmodel.cn](bigmodel.cn) |
|
||||||
| `openrouter(To be tested)` | LLM (recommended, access to all models) | [openrouter.ai](https://openrouter.ai) |
|
| `openrouter(To be tested)` | LLM (recommended, access to all models) | [openrouter.ai](https://openrouter.ai) |
|
||||||
@@ -405,8 +403,8 @@ PicoClaw stores data in your configured workspace (default: `~/.picoclaw/workspa
|
|||||||
"zhipu": {
|
"zhipu": {
|
||||||
"api_key": "Your API Key",
|
"api_key": "Your API Key",
|
||||||
"api_base": "https://open.bigmodel.cn/api/paas/v4"
|
"api_base": "https://open.bigmodel.cn/api/paas/v4"
|
||||||
},
|
}
|
||||||
},
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -480,7 +478,7 @@ picoclaw agent -m "Hello"
|
|||||||
## CLI Reference
|
## CLI Reference
|
||||||
|
|
||||||
| Command | Description |
|
| Command | Description |
|
||||||
|---------|-------------|
|
| ------------------------- | ----------------------------- |
|
||||||
| `picoclaw onboard` | Initialize config & workspace |
|
| `picoclaw onboard` | Initialize config & workspace |
|
||||||
| `picoclaw agent -m "..."` | Chat with the agent |
|
| `picoclaw agent -m "..."` | Chat with the agent |
|
||||||
| `picoclaw agent` | Interactive chat mode |
|
| `picoclaw agent` | Interactive chat mode |
|
||||||
@@ -515,8 +513,10 @@ This is normal if you haven't configured a search API key yet. PicoClaw will pro
|
|||||||
|
|
||||||
To enable web search:
|
To enable web search:
|
||||||
|
|
||||||
1. Get a free API key at [https://brave.com/search/api](https://brave.com/search/api) (2000 free queries/month)
|
1. **Option 1 (Recommended)**: Get a free API key at [https://brave.com/search/api](https://brave.com/search/api) (2000 free queries/month) for the best results.
|
||||||
2. Add to `~/.picoclaw/config.json`:
|
2. **Option 2 (No Credit Card)**: If you don't have a key, we automatically fall back to **DuckDuckGo** (no key required).
|
||||||
|
|
||||||
|
Add the key to `~/.picoclaw/config.json` if using Brave:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
@@ -544,7 +544,7 @@ This happens when another instance of the bot is running. Make sure only one `pi
|
|||||||
## 📝 API Key Comparison
|
## 📝 API Key Comparison
|
||||||
|
|
||||||
| Service | Free Tier | Use Case |
|
| Service | Free Tier | Use Case |
|
||||||
|---------|-----------|-----------|
|
| ---------------- | ------------------- | ------------------------------------- |
|
||||||
| **OpenRouter** | 200K tokens/month | Multiple models (Claude, GPT-4, etc.) |
|
| **OpenRouter** | 200K tokens/month | Multiple models (Claude, GPT-4, etc.) |
|
||||||
| **Zhipu** | 200K tokens/month | Best for Chinese users |
|
| **Zhipu** | 200K tokens/month | Best for Chinese users |
|
||||||
| **Brave Search** | 2000 queries/month | Web search functionality |
|
| **Brave Search** | 2000 queries/month | Web search functionality |
|
||||||
|
|||||||
241
pkg/tools/web.go
241
pkg/tools/web.go
@@ -13,11 +13,178 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
userAgent = "Mozilla/5.0 (compatible; picoclaw/1.0)"
|
userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||||
)
|
)
|
||||||
|
|
||||||
type WebSearchTool struct {
|
type SearchProvider interface {
|
||||||
|
Search(ctx context.Context, query string, count int) (string, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type BraveSearchProvider struct {
|
||||||
apiKey string
|
apiKey string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *BraveSearchProvider) Search(ctx context.Context, query string, count int) (string, error) {
|
||||||
|
searchURL := fmt.Sprintf("https://api.search.brave.com/res/v1/web/search?q=%s&count=%d",
|
||||||
|
url.QueryEscape(query), count)
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to create request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
req.Header.Set("Accept", "application/json")
|
||||||
|
req.Header.Set("X-Subscription-Token", p.apiKey)
|
||||||
|
|
||||||
|
client := &http.Client{Timeout: 10 * time.Second}
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("request failed: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to read response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var searchResp struct {
|
||||||
|
Web struct {
|
||||||
|
Results []struct {
|
||||||
|
Title string `json:"title"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
Description string `json:"description"`
|
||||||
|
} `json:"results"`
|
||||||
|
} `json:"web"`
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.Unmarshal(body, &searchResp); err != nil {
|
||||||
|
// Log error body for debugging
|
||||||
|
fmt.Printf("Brave API Error Body: %s\n", string(body))
|
||||||
|
return "", fmt.Errorf("failed to parse response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
results := searchResp.Web.Results
|
||||||
|
if len(results) == 0 {
|
||||||
|
return fmt.Sprintf("No results for: %s", query), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var lines []string
|
||||||
|
lines = append(lines, fmt.Sprintf("Results for: %s", query))
|
||||||
|
for i, item := range results {
|
||||||
|
if i >= count {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
lines = append(lines, fmt.Sprintf("%d. %s\n %s", i+1, item.Title, item.URL))
|
||||||
|
if item.Description != "" {
|
||||||
|
lines = append(lines, fmt.Sprintf(" %s", item.Description))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.Join(lines, "\n"), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type DuckDuckGoSearchProvider struct{}
|
||||||
|
|
||||||
|
func (p *DuckDuckGoSearchProvider) Search(ctx context.Context, query string, count int) (string, error) {
|
||||||
|
searchURL := fmt.Sprintf("https://html.duckduckgo.com/html/?q=%s", url.QueryEscape(query))
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to create request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
req.Header.Set("User-Agent", userAgent)
|
||||||
|
|
||||||
|
client := &http.Client{Timeout: 10 * time.Second}
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("request failed: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to read response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return p.extractResults(string(body), count, query)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *DuckDuckGoSearchProvider) extractResults(html string, count int, query string) (string, error) {
|
||||||
|
// Simple regex based extraction for DDG HTML
|
||||||
|
// Strategy: Find all result containers or key anchors directly
|
||||||
|
|
||||||
|
// Try finding the result links directly first, as they are the most critical
|
||||||
|
// Pattern: <a class="result__a" href="...">Title</a>
|
||||||
|
// The previous regex was a bit strict. Let's make it more flexible for attributes order/content
|
||||||
|
reLink := regexp.MustCompile(`<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)</a>`)
|
||||||
|
matches := reLink.FindAllStringSubmatch(html, count+5)
|
||||||
|
|
||||||
|
if len(matches) == 0 {
|
||||||
|
return fmt.Sprintf("No results found or extraction failed. Query: %s", query), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var lines []string
|
||||||
|
lines = append(lines, fmt.Sprintf("Results for: %s (via DuckDuckGo)", query))
|
||||||
|
|
||||||
|
// Pre-compile snippet regex to run inside the loop
|
||||||
|
// We'll search for snippets relative to the link position or just globally if needed
|
||||||
|
// But simple global search for snippets might mismatch order.
|
||||||
|
// Since we only have the raw HTML string, let's just extract snippets globally and assume order matches (risky but simple for regex)
|
||||||
|
// Or better: Let's assume the snippet follows the link in the HTML
|
||||||
|
|
||||||
|
// A better regex approach: iterate through text and find matches in order
|
||||||
|
// But for now, let's grab all snippets too
|
||||||
|
reSnippet := regexp.MustCompile(`<a class="result__snippet[^"]*".*?>([\s\S]*?)</a>`)
|
||||||
|
snippetMatches := reSnippet.FindAllStringSubmatch(html, count+5)
|
||||||
|
|
||||||
|
maxItems := min(len(matches), count)
|
||||||
|
|
||||||
|
for i := 0; i < maxItems; i++ {
|
||||||
|
urlStr := matches[i][1]
|
||||||
|
title := stripTags(matches[i][2])
|
||||||
|
title = strings.TrimSpace(title)
|
||||||
|
|
||||||
|
// URL decoding if needed
|
||||||
|
if strings.Contains(urlStr, "uddg=") {
|
||||||
|
if u, err := url.QueryUnescape(urlStr); err == nil {
|
||||||
|
idx := strings.Index(u, "uddg=")
|
||||||
|
if idx != -1 {
|
||||||
|
urlStr = u[idx+5:]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lines = append(lines, fmt.Sprintf("%d. %s\n %s", i+1, title, urlStr))
|
||||||
|
|
||||||
|
// Attempt to attach snippet if available and index aligns
|
||||||
|
if i < len(snippetMatches) {
|
||||||
|
snippet := stripTags(snippetMatches[i][1])
|
||||||
|
snippet = strings.TrimSpace(snippet)
|
||||||
|
if snippet != "" {
|
||||||
|
lines = append(lines, fmt.Sprintf(" %s", snippet))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.Join(lines, "\n"), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func min(a, b int) int {
|
||||||
|
if a < b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func stripTags(content string) string {
|
||||||
|
re := regexp.MustCompile(`<[^>]+>`)
|
||||||
|
return re.ReplaceAllString(content, "")
|
||||||
|
}
|
||||||
|
|
||||||
|
type WebSearchTool struct {
|
||||||
|
provider SearchProvider
|
||||||
maxResults int
|
maxResults int
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -25,8 +192,16 @@ func NewWebSearchTool(apiKey string, maxResults int) *WebSearchTool {
|
|||||||
if maxResults <= 0 || maxResults > 10 {
|
if maxResults <= 0 || maxResults > 10 {
|
||||||
maxResults = 5
|
maxResults = 5
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var provider SearchProvider
|
||||||
|
if apiKey != "" {
|
||||||
|
provider = &BraveSearchProvider{apiKey: apiKey}
|
||||||
|
} else {
|
||||||
|
provider = &DuckDuckGoSearchProvider{}
|
||||||
|
}
|
||||||
|
|
||||||
return &WebSearchTool{
|
return &WebSearchTool{
|
||||||
apiKey: apiKey,
|
provider: provider,
|
||||||
maxResults: maxResults,
|
maxResults: maxResults,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -59,10 +234,6 @@ func (t *WebSearchTool) Parameters() map[string]interface{} {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (t *WebSearchTool) Execute(ctx context.Context, args map[string]interface{}) (string, error) {
|
func (t *WebSearchTool) Execute(ctx context.Context, args map[string]interface{}) (string, error) {
|
||||||
if t.apiKey == "" {
|
|
||||||
return "Error: BRAVE_API_KEY not configured", nil
|
|
||||||
}
|
|
||||||
|
|
||||||
query, ok := args["query"].(string)
|
query, ok := args["query"].(string)
|
||||||
if !ok {
|
if !ok {
|
||||||
return "", fmt.Errorf("query is required")
|
return "", fmt.Errorf("query is required")
|
||||||
@@ -75,61 +246,7 @@ func (t *WebSearchTool) Execute(ctx context.Context, args map[string]interface{}
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
searchURL := fmt.Sprintf("https://api.search.brave.com/res/v1/web/search?q=%s&count=%d",
|
return t.provider.Search(ctx, query, count)
|
||||||
url.QueryEscape(query), count)
|
|
||||||
|
|
||||||
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
|
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("failed to create request: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
req.Header.Set("Accept", "application/json")
|
|
||||||
req.Header.Set("X-Subscription-Token", t.apiKey)
|
|
||||||
|
|
||||||
client := &http.Client{Timeout: 10 * time.Second}
|
|
||||||
resp, err := client.Do(req)
|
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("request failed: %w", err)
|
|
||||||
}
|
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
body, err := io.ReadAll(resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("failed to read response: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var searchResp struct {
|
|
||||||
Web struct {
|
|
||||||
Results []struct {
|
|
||||||
Title string `json:"title"`
|
|
||||||
URL string `json:"url"`
|
|
||||||
Description string `json:"description"`
|
|
||||||
} `json:"results"`
|
|
||||||
} `json:"web"`
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := json.Unmarshal(body, &searchResp); err != nil {
|
|
||||||
return "", fmt.Errorf("failed to parse response: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
results := searchResp.Web.Results
|
|
||||||
if len(results) == 0 {
|
|
||||||
return fmt.Sprintf("No results for: %s", query), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
var lines []string
|
|
||||||
lines = append(lines, fmt.Sprintf("Results for: %s", query))
|
|
||||||
for i, item := range results {
|
|
||||||
if i >= count {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
lines = append(lines, fmt.Sprintf("%d. %s\n %s", i+1, item.Title, item.URL))
|
|
||||||
if item.Description != "" {
|
|
||||||
lines = append(lines, fmt.Sprintf(" %s", item.Description))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return strings.Join(lines, "\n"), nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type WebFetchTool struct {
|
type WebFetchTool struct {
|
||||||
|
|||||||
Reference in New Issue
Block a user