merge: resolve conflicts with upstream/main

Merge upstream/main into bugfix/fix-duplicate-telegram-messages. Conflict resolutions: - pkg/agent/loop.go: Adopt upstream's processSystemMessage which removes runAgentLoop call entirely (subagents now communicate via message tool directly). Keep PR's HasSentInRound() check in Run() for normal message processing path. - pkg/tools/message.go: Merge both changes - keep sentInRound tracking from PR and adopt upstream's *ToolResult return type with Silent: true.
2026-02-13 16:52:33 +08:00
parent 132fe7db51 c0d1346b5c
commit cf5a84aac1
37 changed files with 5029 additions and 386 deletions
--- a/pkg/agent/loop.go
+++ b/pkg/agent/loop.go
@@ -19,9 +19,11 @@ import (

 	"github.com/sipeed/picoclaw/pkg/bus"
 	"github.com/sipeed/picoclaw/pkg/config"
+	"github.com/sipeed/picoclaw/pkg/constants"
 	"github.com/sipeed/picoclaw/pkg/logger"
 	"github.com/sipeed/picoclaw/pkg/providers"
 	"github.com/sipeed/picoclaw/pkg/session"
+	"github.com/sipeed/picoclaw/pkg/state"
 	"github.com/sipeed/picoclaw/pkg/tools"
 	"github.com/sipeed/picoclaw/pkg/utils"
 )
@@ -34,6 +36,7 @@ type AgentLoop struct {
 	contextWindow  int           // Maximum context window size in tokens
 	maxIterations  int
 	sessions       *session.SessionManager
+	state          *state.Manager
 	contextBuilder *ContextBuilder
 	tools          *tools.ToolRegistry
 	running        atomic.Bool
@@ -49,25 +52,31 @@ type processOptions struct {
 	DefaultResponse string // Response when LLM returns empty
 	EnableSummary   bool   // Whether to trigger summarization
 	SendResponse    bool   // Whether to send response via bus
+	NoHistory       bool   // If true, don't load session history (for heartbeat)
 }

-func NewAgentLoop(cfg *config.Config, msgBus *bus.MessageBus, provider providers.LLMProvider) *AgentLoop {
-	workspace := cfg.WorkspacePath()
-	os.MkdirAll(workspace, 0755)
+// createToolRegistry creates a tool registry with common tools.
+// This is shared between main agent and subagents.
+func createToolRegistry(workspace string, restrict bool, cfg *config.Config, msgBus *bus.MessageBus) *tools.ToolRegistry {
+	registry := tools.NewToolRegistry()

-	restrict := cfg.Agents.Defaults.RestrictToWorkspace
+	// File system tools
+	registry.Register(tools.NewReadFileTool(workspace, restrict))
+	registry.Register(tools.NewWriteFileTool(workspace, restrict))
+	registry.Register(tools.NewListDirTool(workspace, restrict))
+	registry.Register(tools.NewEditFileTool(workspace, restrict))
+	registry.Register(tools.NewAppendFileTool(workspace, restrict))

-	toolsRegistry := tools.NewToolRegistry()
-	toolsRegistry.Register(tools.NewReadFileTool(workspace, restrict))
-	toolsRegistry.Register(tools.NewWriteFileTool(workspace, restrict))
-	toolsRegistry.Register(tools.NewListDirTool(workspace, restrict))
-	toolsRegistry.Register(tools.NewExecTool(workspace, restrict))
+	// Shell execution
+	registry.Register(tools.NewExecTool(workspace, restrict))

+	// Web tools
 	braveAPIKey := cfg.Tools.Web.Search.APIKey
-	toolsRegistry.Register(tools.NewWebSearchTool(braveAPIKey, cfg.Tools.Web.Search.MaxResults))
-	toolsRegistry.Register(tools.NewWebFetchTool(50000))
+	registry.Register(tools.NewWebSearchTool(braveAPIKey, cfg.Tools.Web.Search.MaxResults))
+	registry.Register(tools.NewWebFetchTool(50000))

-	// Register message tool
+	// Message tool - available to both agent and subagent
+	// Subagent uses it to communicate directly with user
 	messageTool := tools.NewMessageTool()
 	messageTool.SetSendCallback(func(channel, chatID, content string) error {
 		msgBus.PublishOutbound(bus.OutboundMessage{
@@ -77,20 +86,39 @@ func NewAgentLoop(cfg *config.Config, msgBus *bus.MessageBus, provider providers
 		})
 		return nil
 	})
-	toolsRegistry.Register(messageTool)
+	registry.Register(messageTool)

-	// Register spawn tool
-	subagentManager := tools.NewSubagentManager(provider, workspace, msgBus)
+	return registry
+}
+
+func NewAgentLoop(cfg *config.Config, msgBus *bus.MessageBus, provider providers.LLMProvider) *AgentLoop {
+	workspace := cfg.WorkspacePath()
+	os.MkdirAll(workspace, 0755)
+
+	restrict := cfg.Agents.Defaults.RestrictToWorkspace
+
+	// Create tool registry for main agent
+	toolsRegistry := createToolRegistry(workspace, restrict, cfg, msgBus)
+
+	// Create subagent manager with its own tool registry
+	subagentManager := tools.NewSubagentManager(provider, cfg.Agents.Defaults.Model, workspace, msgBus)
+	subagentTools := createToolRegistry(workspace, restrict, cfg, msgBus)
+	// Subagent doesn't need spawn/subagent tools to avoid recursion
+	subagentManager.SetTools(subagentTools)
+
+	// Register spawn tool (for main agent)
 	spawnTool := tools.NewSpawnTool(subagentManager)
 	toolsRegistry.Register(spawnTool)

-	// Register edit file tool
-	editFileTool := tools.NewEditFileTool(workspace, restrict)
-	toolsRegistry.Register(editFileTool)
-	toolsRegistry.Register(tools.NewAppendFileTool(workspace, restrict))
+	// Register subagent tool (synchronous execution)
+	subagentTool := tools.NewSubagentTool(subagentManager)
+	toolsRegistry.Register(subagentTool)

 	sessionsManager := session.NewSessionManager(filepath.Join(workspace, "sessions"))

+	// Create state manager for atomic state persistence
+	stateManager := state.NewManager(workspace)
+
 	// Create context builder and set tools registry
 	contextBuilder := NewContextBuilder(workspace)
 	contextBuilder.SetToolsRegistry(toolsRegistry)
@@ -103,6 +131,7 @@ func NewAgentLoop(cfg *config.Config, msgBus *bus.MessageBus, provider providers
 		contextWindow:  cfg.Agents.Defaults.MaxTokens, // Restore context window for summarization
 		maxIterations:  cfg.Agents.Defaults.MaxToolIterations,
 		sessions:       sessionsManager,
+		state:          stateManager,
 		contextBuilder: contextBuilder,
 		tools:          toolsRegistry,
 		summarizing:    sync.Map{},
@@ -159,6 +188,18 @@ func (al *AgentLoop) RegisterTool(tool tools.Tool) {
 	al.tools.Register(tool)
 }

+// RecordLastChannel records the last active channel for this workspace.
+// This uses the atomic state save mechanism to prevent data loss on crash.
+func (al *AgentLoop) RecordLastChannel(channel string) error {
+	return al.state.SetLastChannel(channel)
+}
+
+// RecordLastChatID records the last active chat ID for this workspace.
+// This uses the atomic state save mechanism to prevent data loss on crash.
+func (al *AgentLoop) RecordLastChatID(chatID string) error {
+	return al.state.SetLastChatID(chatID)
+}
+
 func (al *AgentLoop) ProcessDirect(ctx context.Context, content, sessionKey string) (string, error) {
 	return al.ProcessDirectWithChannel(ctx, content, sessionKey, "cli", "direct")
 }
@@ -175,10 +216,30 @@ func (al *AgentLoop) ProcessDirectWithChannel(ctx context.Context, content, sess
 	return al.processMessage(ctx, msg)
 }

+// ProcessHeartbeat processes a heartbeat request without session history.
+// Each heartbeat is independent and doesn't accumulate context.
+func (al *AgentLoop) ProcessHeartbeat(ctx context.Context, content, channel, chatID string) (string, error) {
+	return al.runAgentLoop(ctx, processOptions{
+		SessionKey:      "heartbeat",
+		Channel:         channel,
+		ChatID:          chatID,
+		UserMessage:     content,
+		DefaultResponse: "I've completed processing but have no response to give.",
+		EnableSummary:   false,
+		SendResponse:    false,
+		NoHistory:       true, // Don't load session history for heartbeat
+	})
+}
+
 func (al *AgentLoop) processMessage(ctx context.Context, msg bus.InboundMessage) (string, error) {
-	// Add message preview to log
-	preview := utils.Truncate(msg.Content, 80)
-	logger.InfoCF("agent", fmt.Sprintf("Processing message from %s:%s: %s", msg.Channel, msg.SenderID, preview),
+	// Add message preview to log (show full content for error messages)
+	var logContent string
+	if strings.Contains(msg.Content, "Error:") || strings.Contains(msg.Content, "error") {
+		logContent = msg.Content // Full content for errors
+	} else {
+		logContent = utils.Truncate(msg.Content, 80)
+	}
+	logger.InfoCF("agent", fmt.Sprintf("Processing message from %s:%s: %s", msg.Channel, msg.SenderID, logContent),
 		map[string]interface{}{
 			"channel":     msg.Channel,
 			"chat_id":     msg.ChatID,
@@ -215,45 +276,70 @@ func (al *AgentLoop) processSystemMessage(ctx context.Context, msg bus.InboundMe
 			"chat_id":   msg.ChatID,
 		})

-	// Parse origin from chat_id (format: "channel:chat_id")
-	var originChannel, originChatID string
+	// Parse origin channel from chat_id (format: "channel:chat_id")
+	var originChannel string
 	if idx := strings.Index(msg.ChatID, ":"); idx > 0 {
 		originChannel = msg.ChatID[:idx]
-		originChatID = msg.ChatID[idx+1:]
 	} else {
 		// Fallback
 		originChannel = "cli"
-		originChatID = msg.ChatID
 	}

-	// Use the origin session for context
-	sessionKey := fmt.Sprintf("%s:%s", originChannel, originChatID)
+	// Extract subagent result from message content
+	// Format: "Task 'label' completed.\n\nResult:\n<actual content>"
+	content := msg.Content
+	if idx := strings.Index(content, "Result:\n"); idx >= 0 {
+		content = content[idx+8:] // Extract just the result part
+	}

-	// Process as system message with routing back to origin.
-	// SendResponse: true means runAgentLoop will publish the outbound message itself,
-	// so we return empty string to prevent Run() from publishing a duplicate.
-	_, err := al.runAgentLoop(ctx, processOptions{
-		SessionKey:      sessionKey,
-		Channel:         originChannel,
-		ChatID:          originChatID,
-		UserMessage:     fmt.Sprintf("[System: %s] %s", msg.SenderID, msg.Content),
-		DefaultResponse: "Background task completed.",
-		EnableSummary:   false,
-		SendResponse:    true, // Send response back to original channel
-	})
-	// Return empty string: response was already sent via bus in runAgentLoop
-	return "", err
+	// Skip internal channels - only log, don't send to user
+	if constants.IsInternalChannel(originChannel) {
+		logger.InfoCF("agent", "Subagent completed (internal channel)",
+			map[string]interface{}{
+				"sender_id":    msg.SenderID,
+				"content_len":  len(content),
+				"channel":      originChannel,
+			})
+		return "", nil
+	}
+
+	// Agent acts as dispatcher only - subagent handles user interaction via message tool
+	// Don't forward result here, subagent should use message tool to communicate with user
+	logger.InfoCF("agent", "Subagent completed",
+		map[string]interface{}{
+			"sender_id":    msg.SenderID,
+			"channel":      originChannel,
+			"content_len":  len(content),
+		})
+
+	// Agent only logs, does not respond to user
+	return "", nil
 }

 // runAgentLoop is the core message processing logic.
 // It handles context building, LLM calls, tool execution, and response handling.
 func (al *AgentLoop) runAgentLoop(ctx context.Context, opts processOptions) (string, error) {
+	// 0. Record last channel for heartbeat notifications (skip internal channels)
+	if opts.Channel != "" && opts.ChatID != "" {
+		// Don't record internal channels (cli, system, subagent)
+		if !constants.IsInternalChannel(opts.Channel) {
+			channelKey := fmt.Sprintf("%s:%s", opts.Channel, opts.ChatID)
+			if err := al.RecordLastChannel(channelKey); err != nil {
+				logger.WarnCF("agent", "Failed to record last channel: %v", map[string]interface{}{"error": err.Error()})
+			}
+		}
+	}
+
 	// 1. Update tool contexts
 	al.updateToolContexts(opts.Channel, opts.ChatID)

-	// 2. Build messages
-	history := al.sessions.GetHistory(opts.SessionKey)
-	summary := al.sessions.GetSummary(opts.SessionKey)
+	// 2. Build messages (skip history for heartbeat)
+	var history []providers.Message
+	var summary string
+	if !opts.NoHistory {
+		history = al.sessions.GetHistory(opts.SessionKey)
+		summary = al.sessions.GetSummary(opts.SessionKey)
+	}
 	messages := al.contextBuilder.BuildMessages(
 		history,
 		summary,
@@ -272,6 +358,9 @@ func (al *AgentLoop) runAgentLoop(ctx context.Context, opts processOptions) (str
 		return "", err
 	}

+	// If last tool had ForUser content and we already sent it, we might not need to send final response
+	// This is controlled by the tool's Silent flag and ForUser content
+
 	// 5. Handle empty response
 	if finalContent == "" {
 		finalContent = opts.DefaultResponse
@@ -323,18 +412,7 @@ func (al *AgentLoop) runLLMIteration(ctx context.Context, messages []providers.M
 			})

 		// Build tool definitions
-		toolDefs := al.tools.GetDefinitions()
-		providerToolDefs := make([]providers.ToolDefinition, 0, len(toolDefs))
-		for _, td := range toolDefs {
-			providerToolDefs = append(providerToolDefs, providers.ToolDefinition{
-				Type: td["type"].(string),
-				Function: providers.ToolFunctionDefinition{
-					Name:        td["function"].(map[string]interface{})["name"].(string),
-					Description: td["function"].(map[string]interface{})["description"].(string),
-					Parameters:  td["function"].(map[string]interface{})["parameters"].(map[string]interface{}),
-				},
-			})
-		}
+		providerToolDefs := al.tools.ToProviderDefs()

 		// Log LLM request details
 		logger.DebugCF("agent", "LLM request",
@@ -390,7 +468,7 @@ func (al *AgentLoop) runLLMIteration(ctx context.Context, messages []providers.M
 		logger.InfoCF("agent", "LLM requested tool calls",
 			map[string]interface{}{
 				"tools":     toolNames,
-				"count":     len(toolNames),
+				"count":     len(response.ToolCalls),
 				"iteration": iteration,
 			})

@@ -426,14 +504,47 @@ func (al *AgentLoop) runLLMIteration(ctx context.Context, messages []providers.M
 					"iteration": iteration,
 				})

-			result, err := al.tools.ExecuteWithContext(ctx, tc.Name, tc.Arguments, opts.Channel, opts.ChatID)
-			if err != nil {
-				result = fmt.Sprintf("Error: %v", err)
+			// Create async callback for tools that implement AsyncTool
+			// NOTE: Following openclaw's design, async tools do NOT send results directly to users.
+			// Instead, they notify the agent via PublishInbound, and the agent decides
+			// whether to forward the result to the user (in processSystemMessage).
+			asyncCallback := func(callbackCtx context.Context, result *tools.ToolResult) {
+				// Log the async completion but don't send directly to user
+				// The agent will handle user notification via processSystemMessage
+				if !result.Silent && result.ForUser != "" {
+					logger.InfoCF("agent", "Async tool completed, agent will handle notification",
+						map[string]interface{}{
+							"tool":        tc.Name,
+							"content_len": len(result.ForUser),
+						})
+				}
+			}
+
+			toolResult := al.tools.ExecuteWithContext(ctx, tc.Name, tc.Arguments, opts.Channel, opts.ChatID, asyncCallback)
+
+			// Send ForUser content to user immediately if not Silent
+			if !toolResult.Silent && toolResult.ForUser != "" && opts.SendResponse {
+				al.bus.PublishOutbound(bus.OutboundMessage{
+					Channel: opts.Channel,
+					ChatID:  opts.ChatID,
+					Content: toolResult.ForUser,
+				})
+				logger.DebugCF("agent", "Sent tool result to user",
+					map[string]interface{}{
+						"tool":        tc.Name,
+						"content_len": len(toolResult.ForUser),
+					})
+			}
+
+			// Determine content for LLM based on tool result
+			contentForLLM := toolResult.ForLLM
+			if contentForLLM == "" && toolResult.Err != nil {
+				contentForLLM = toolResult.Err.Error()
 			}

 			toolResultMsg := providers.Message{
 				Role:       "tool",
-				Content:    result,
+				Content:    contentForLLM,
 				ToolCallID: tc.ID,
 			}
 			messages = append(messages, toolResultMsg)
@@ -448,13 +559,19 @@ func (al *AgentLoop) runLLMIteration(ctx context.Context, messages []providers.M

 // updateToolContexts updates the context for tools that need channel/chatID info.
 func (al *AgentLoop) updateToolContexts(channel, chatID string) {
+	// Use ContextualTool interface instead of type assertions
 	if tool, ok := al.tools.Get("message"); ok {
-		if mt, ok := tool.(*tools.MessageTool); ok {
+		if mt, ok := tool.(tools.ContextualTool); ok {
 			mt.SetContext(channel, chatID)
 		}
 	}
 	if tool, ok := al.tools.Get("spawn"); ok {
-		if st, ok := tool.(*tools.SpawnTool); ok {
+		if st, ok := tool.(tools.ContextualTool); ok {
+			st.SetContext(channel, chatID)
+		}
+	}
+	if tool, ok := al.tools.Get("subagent"); ok {
+		if st, ok := tool.(tools.ContextualTool); ok {
 			st.SetContext(channel, chatID)
 		}
 	}