From e77b0a6755587d200db8980c6ee37b93be5bab62 Mon Sep 17 00:00:00 2001 From: Leandro Barbosa Date: Mon, 16 Feb 2026 00:40:17 -0300 Subject: [PATCH] feat: add Codex CLI provider for subprocess integration (#80) * feat: add Codex CLI provider for OpenAI subprocess integration Add CodexCliProvider that wraps `codex exec --json` as a subprocess, analogous to the existing ClaudeCliProvider pattern. This enables using OpenAI's Codex CLI tool as a local LLM backend. - CodexCliProvider: subprocess wrapper parsing JSONL event stream - Credential reader for ~/.codex/auth.json with token expiry detection - Factory integration: provider "codex-cli" and auth_method "codex-cli" - Fix tilde expansion in workspace path for CLI providers - 37 unit tests covering parsing, prompt building, credentials, and mocks * fix: add tool call extraction to Codex CLI provider - Extract shared tool call parsing into tool_call_extract.go (extractToolCallsFromText, stripToolCallsFromText, findMatchingBrace) - Both ClaudeCliProvider and CodexCliProvider now share the same tool call extraction logic for PicoClaw-specific tools - Fix cache token accounting: include cached_input_tokens in total - Add 2 new tests for tool call extraction from JSONL events - Update existing tests for corrected token calculations * fix(docker): update Go version to match go.mod requirement Dockerfile used golang:1.24-alpine but go.mod requires go >= 1.25.7. This caused Docker builds to fail on all branches with: "go: go.mod requires go >= 1.25.7 (running go 1.24.13)" Update to golang:1.25-alpine to match the project requirement. * fix: handle codex CLI stderr noise without losing valid stdout Codex writes diagnostic messages to stderr (e.g. rollout errors) which cause non-zero exit codes even when valid JSONL output exists on stdout. Parse stdout first before checking exit code to avoid false errors. * style: fix gofmt formatting and update web search API in tests - Remove trailing whitespace in web.go and base_test.go - Update config_test.go and web_test.go for WebSearchToolOptions API --- pkg/providers/claude_cli_provider.go | 62 +-- pkg/providers/codex_cli_credentials.go | 79 +++ pkg/providers/codex_cli_credentials_test.go | 181 ++++++ pkg/providers/codex_cli_provider.go | 251 +++++++++ pkg/providers/codex_cli_provider_test.go | 585 ++++++++++++++++++++ pkg/providers/http_provider.go | 11 +- pkg/providers/tool_call_extract.go | 72 +++ pkg/tools/web_test.go | 16 +- 8 files changed, 1192 insertions(+), 65 deletions(-) create mode 100644 pkg/providers/codex_cli_credentials.go create mode 100644 pkg/providers/codex_cli_credentials_test.go create mode 100644 pkg/providers/codex_cli_provider.go create mode 100644 pkg/providers/codex_cli_provider_test.go create mode 100644 pkg/providers/tool_call_extract.go diff --git a/pkg/providers/claude_cli_provider.go b/pkg/providers/claude_cli_provider.go index a917957..58ba364 100644 --- a/pkg/providers/claude_cli_provider.go +++ b/pkg/providers/claude_cli_provider.go @@ -171,68 +171,14 @@ func (p *ClaudeCliProvider) parseClaudeCliResponse(output string) (*LLMResponse, }, nil } -// extractToolCalls parses tool call JSON from the response text. +// extractToolCalls delegates to the shared extractToolCallsFromText function. func (p *ClaudeCliProvider) extractToolCalls(text string) []ToolCall { - start := strings.Index(text, `{"tool_calls"`) - if start == -1 { - return nil - } - - end := findMatchingBrace(text, start) - if end == start { - return nil - } - - jsonStr := text[start:end] - - var wrapper struct { - ToolCalls []struct { - ID string `json:"id"` - Type string `json:"type"` - Function struct { - Name string `json:"name"` - Arguments string `json:"arguments"` - } `json:"function"` - } `json:"tool_calls"` - } - - if err := json.Unmarshal([]byte(jsonStr), &wrapper); err != nil { - return nil - } - - var result []ToolCall - for _, tc := range wrapper.ToolCalls { - var args map[string]interface{} - json.Unmarshal([]byte(tc.Function.Arguments), &args) - - result = append(result, ToolCall{ - ID: tc.ID, - Type: tc.Type, - Name: tc.Function.Name, - Arguments: args, - Function: &FunctionCall{ - Name: tc.Function.Name, - Arguments: tc.Function.Arguments, - }, - }) - } - - return result + return extractToolCallsFromText(text) } -// stripToolCallsJSON removes tool call JSON from response text. +// stripToolCallsJSON delegates to the shared stripToolCallsFromText function. func (p *ClaudeCliProvider) stripToolCallsJSON(text string) string { - start := strings.Index(text, `{"tool_calls"`) - if start == -1 { - return text - } - - end := findMatchingBrace(text, start) - if end == start { - return text - } - - return strings.TrimSpace(text[:start] + text[end:]) + return stripToolCallsFromText(text) } // findMatchingBrace finds the index after the closing brace matching the opening brace at pos. diff --git a/pkg/providers/codex_cli_credentials.go b/pkg/providers/codex_cli_credentials.go new file mode 100644 index 0000000..7ad39ce --- /dev/null +++ b/pkg/providers/codex_cli_credentials.go @@ -0,0 +1,79 @@ +package providers + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "time" +) + +// CodexCliAuth represents the ~/.codex/auth.json file structure. +type CodexCliAuth struct { + Tokens struct { + AccessToken string `json:"access_token"` + RefreshToken string `json:"refresh_token"` + AccountID string `json:"account_id"` + } `json:"tokens"` +} + +// ReadCodexCliCredentials reads OAuth tokens from the Codex CLI's auth.json file. +// Expiry is estimated as file modification time + 1 hour (same approach as moltbot). +func ReadCodexCliCredentials() (accessToken, accountID string, expiresAt time.Time, err error) { + authPath, err := resolveCodexAuthPath() + if err != nil { + return "", "", time.Time{}, err + } + + data, err := os.ReadFile(authPath) + if err != nil { + return "", "", time.Time{}, fmt.Errorf("reading %s: %w", authPath, err) + } + + var auth CodexCliAuth + if err := json.Unmarshal(data, &auth); err != nil { + return "", "", time.Time{}, fmt.Errorf("parsing %s: %w", authPath, err) + } + + if auth.Tokens.AccessToken == "" { + return "", "", time.Time{}, fmt.Errorf("no access_token in %s", authPath) + } + + stat, err := os.Stat(authPath) + if err != nil { + expiresAt = time.Now().Add(time.Hour) + } else { + expiresAt = stat.ModTime().Add(time.Hour) + } + + return auth.Tokens.AccessToken, auth.Tokens.AccountID, expiresAt, nil +} + +// CreateCodexCliTokenSource creates a token source that reads from ~/.codex/auth.json. +// This allows the existing CodexProvider to reuse Codex CLI credentials. +func CreateCodexCliTokenSource() func() (string, string, error) { + return func() (string, string, error) { + token, accountID, expiresAt, err := ReadCodexCliCredentials() + if err != nil { + return "", "", fmt.Errorf("reading codex cli credentials: %w", err) + } + + if time.Now().After(expiresAt) { + return "", "", fmt.Errorf("codex cli credentials expired (auth.json last modified > 1h ago). Run: codex login") + } + + return token, accountID, nil + } +} + +func resolveCodexAuthPath() (string, error) { + codexHome := os.Getenv("CODEX_HOME") + if codexHome == "" { + home, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("getting home dir: %w", err) + } + codexHome = filepath.Join(home, ".codex") + } + return filepath.Join(codexHome, "auth.json"), nil +} diff --git a/pkg/providers/codex_cli_credentials_test.go b/pkg/providers/codex_cli_credentials_test.go new file mode 100644 index 0000000..3267f2d --- /dev/null +++ b/pkg/providers/codex_cli_credentials_test.go @@ -0,0 +1,181 @@ +package providers + +import ( + "os" + "path/filepath" + "testing" + "time" +) + +func TestReadCodexCliCredentials_Valid(t *testing.T) { + tmpDir := t.TempDir() + authPath := filepath.Join(tmpDir, "auth.json") + + authJSON := `{ + "tokens": { + "access_token": "test-access-token", + "refresh_token": "test-refresh-token", + "account_id": "org-test123" + } + }` + if err := os.WriteFile(authPath, []byte(authJSON), 0600); err != nil { + t.Fatal(err) + } + + t.Setenv("CODEX_HOME", tmpDir) + + token, accountID, expiresAt, err := ReadCodexCliCredentials() + if err != nil { + t.Fatalf("ReadCodexCliCredentials() error: %v", err) + } + if token != "test-access-token" { + t.Errorf("token = %q, want %q", token, "test-access-token") + } + if accountID != "org-test123" { + t.Errorf("accountID = %q, want %q", accountID, "org-test123") + } + // Expiry should be within ~1 hour from now (file was just written) + if expiresAt.Before(time.Now()) { + t.Errorf("expiresAt = %v, should be in the future", expiresAt) + } + if expiresAt.After(time.Now().Add(2 * time.Hour)) { + t.Errorf("expiresAt = %v, should be within ~1 hour", expiresAt) + } +} + +func TestReadCodexCliCredentials_MissingFile(t *testing.T) { + tmpDir := t.TempDir() + t.Setenv("CODEX_HOME", tmpDir) + + _, _, _, err := ReadCodexCliCredentials() + if err == nil { + t.Fatal("expected error for missing auth.json") + } +} + +func TestReadCodexCliCredentials_EmptyToken(t *testing.T) { + tmpDir := t.TempDir() + authPath := filepath.Join(tmpDir, "auth.json") + + authJSON := `{"tokens": {"access_token": "", "refresh_token": "r", "account_id": "a"}}` + if err := os.WriteFile(authPath, []byte(authJSON), 0600); err != nil { + t.Fatal(err) + } + + t.Setenv("CODEX_HOME", tmpDir) + + _, _, _, err := ReadCodexCliCredentials() + if err == nil { + t.Fatal("expected error for empty access_token") + } +} + +func TestReadCodexCliCredentials_InvalidJSON(t *testing.T) { + tmpDir := t.TempDir() + authPath := filepath.Join(tmpDir, "auth.json") + + if err := os.WriteFile(authPath, []byte("not json"), 0600); err != nil { + t.Fatal(err) + } + + t.Setenv("CODEX_HOME", tmpDir) + + _, _, _, err := ReadCodexCliCredentials() + if err == nil { + t.Fatal("expected error for invalid JSON") + } +} + +func TestReadCodexCliCredentials_NoAccountID(t *testing.T) { + tmpDir := t.TempDir() + authPath := filepath.Join(tmpDir, "auth.json") + + authJSON := `{"tokens": {"access_token": "tok123", "refresh_token": "ref456"}}` + if err := os.WriteFile(authPath, []byte(authJSON), 0600); err != nil { + t.Fatal(err) + } + + t.Setenv("CODEX_HOME", tmpDir) + + token, accountID, _, err := ReadCodexCliCredentials() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if token != "tok123" { + t.Errorf("token = %q, want %q", token, "tok123") + } + if accountID != "" { + t.Errorf("accountID = %q, want empty", accountID) + } +} + +func TestReadCodexCliCredentials_CodexHomeEnv(t *testing.T) { + tmpDir := t.TempDir() + customDir := filepath.Join(tmpDir, "custom-codex") + if err := os.MkdirAll(customDir, 0755); err != nil { + t.Fatal(err) + } + + authJSON := `{"tokens": {"access_token": "custom-token", "refresh_token": "r"}}` + if err := os.WriteFile(filepath.Join(customDir, "auth.json"), []byte(authJSON), 0600); err != nil { + t.Fatal(err) + } + + t.Setenv("CODEX_HOME", customDir) + + token, _, _, err := ReadCodexCliCredentials() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if token != "custom-token" { + t.Errorf("token = %q, want %q", token, "custom-token") + } +} + +func TestCreateCodexCliTokenSource_Valid(t *testing.T) { + tmpDir := t.TempDir() + authPath := filepath.Join(tmpDir, "auth.json") + + authJSON := `{"tokens": {"access_token": "fresh-token", "refresh_token": "r", "account_id": "acc"}}` + if err := os.WriteFile(authPath, []byte(authJSON), 0600); err != nil { + t.Fatal(err) + } + + t.Setenv("CODEX_HOME", tmpDir) + + source := CreateCodexCliTokenSource() + token, accountID, err := source() + if err != nil { + t.Fatalf("token source error: %v", err) + } + if token != "fresh-token" { + t.Errorf("token = %q, want %q", token, "fresh-token") + } + if accountID != "acc" { + t.Errorf("accountID = %q, want %q", accountID, "acc") + } +} + +func TestCreateCodexCliTokenSource_Expired(t *testing.T) { + tmpDir := t.TempDir() + authPath := filepath.Join(tmpDir, "auth.json") + + authJSON := `{"tokens": {"access_token": "old-token", "refresh_token": "r"}}` + if err := os.WriteFile(authPath, []byte(authJSON), 0600); err != nil { + t.Fatal(err) + } + + // Set file modification time to 2 hours ago + oldTime := time.Now().Add(-2 * time.Hour) + if err := os.Chtimes(authPath, oldTime, oldTime); err != nil { + t.Fatal(err) + } + + t.Setenv("CODEX_HOME", tmpDir) + + source := CreateCodexCliTokenSource() + _, _, err := source() + if err == nil { + t.Fatal("expected error for expired credentials") + } +} diff --git a/pkg/providers/codex_cli_provider.go b/pkg/providers/codex_cli_provider.go new file mode 100644 index 0000000..8886406 --- /dev/null +++ b/pkg/providers/codex_cli_provider.go @@ -0,0 +1,251 @@ +package providers + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "fmt" + "os/exec" + "strings" +) + +// CodexCliProvider implements LLMProvider by wrapping the codex CLI as a subprocess. +type CodexCliProvider struct { + command string + workspace string +} + +// NewCodexCliProvider creates a new Codex CLI provider. +func NewCodexCliProvider(workspace string) *CodexCliProvider { + return &CodexCliProvider{ + command: "codex", + workspace: workspace, + } +} + +// Chat implements LLMProvider.Chat by executing the codex CLI in non-interactive mode. +func (p *CodexCliProvider) Chat(ctx context.Context, messages []Message, tools []ToolDefinition, model string, options map[string]interface{}) (*LLMResponse, error) { + if p.command == "" { + return nil, fmt.Errorf("codex command not configured") + } + + prompt := p.buildPrompt(messages, tools) + + args := []string{ + "exec", + "--json", + "--dangerously-bypass-approvals-and-sandbox", + "--skip-git-repo-check", + "--color", "never", + } + if model != "" && model != "codex-cli" { + args = append(args, "-m", model) + } + if p.workspace != "" { + args = append(args, "-C", p.workspace) + } + args = append(args, "-") // read prompt from stdin + + cmd := exec.CommandContext(ctx, p.command, args...) + cmd.Stdin = bytes.NewReader([]byte(prompt)) + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + err := cmd.Run() + + // Parse JSONL from stdout even if exit code is non-zero, + // because codex writes diagnostic noise to stderr (e.g. rollout errors) + // but still produces valid JSONL output. + if stdoutStr := stdout.String(); stdoutStr != "" { + resp, parseErr := p.parseJSONLEvents(stdoutStr) + if parseErr == nil && resp != nil && (resp.Content != "" || len(resp.ToolCalls) > 0) { + return resp, nil + } + } + + if err != nil { + if ctx.Err() == context.Canceled { + return nil, ctx.Err() + } + if stderrStr := stderr.String(); stderrStr != "" { + return nil, fmt.Errorf("codex cli error: %s", stderrStr) + } + return nil, fmt.Errorf("codex cli error: %w", err) + } + + return p.parseJSONLEvents(stdout.String()) +} + +// GetDefaultModel returns the default model identifier. +func (p *CodexCliProvider) GetDefaultModel() string { + return "codex-cli" +} + +// buildPrompt converts messages to a prompt string for the Codex CLI. +// System messages are prepended as instructions since Codex CLI has no --system-prompt flag. +func (p *CodexCliProvider) buildPrompt(messages []Message, tools []ToolDefinition) string { + var systemParts []string + var conversationParts []string + + for _, msg := range messages { + switch msg.Role { + case "system": + systemParts = append(systemParts, msg.Content) + case "user": + conversationParts = append(conversationParts, msg.Content) + case "assistant": + conversationParts = append(conversationParts, "Assistant: "+msg.Content) + case "tool": + conversationParts = append(conversationParts, + fmt.Sprintf("[Tool Result for %s]: %s", msg.ToolCallID, msg.Content)) + } + } + + var sb strings.Builder + + if len(systemParts) > 0 { + sb.WriteString("## System Instructions\n\n") + sb.WriteString(strings.Join(systemParts, "\n\n")) + sb.WriteString("\n\n## Task\n\n") + } + + if len(tools) > 0 { + sb.WriteString(p.buildToolsPrompt(tools)) + sb.WriteString("\n\n") + } + + // Simplify single user message (no prefix) + if len(conversationParts) == 1 && len(systemParts) == 0 && len(tools) == 0 { + return conversationParts[0] + } + + sb.WriteString(strings.Join(conversationParts, "\n")) + return sb.String() +} + +// buildToolsPrompt creates a tool definitions section for the prompt. +func (p *CodexCliProvider) buildToolsPrompt(tools []ToolDefinition) string { + var sb strings.Builder + + sb.WriteString("## Available Tools\n\n") + sb.WriteString("When you need to use a tool, respond with ONLY a JSON object:\n\n") + sb.WriteString("```json\n") + sb.WriteString(`{"tool_calls":[{"id":"call_xxx","type":"function","function":{"name":"tool_name","arguments":"{...}"}}]}`) + sb.WriteString("\n```\n\n") + sb.WriteString("CRITICAL: The 'arguments' field MUST be a JSON-encoded STRING.\n\n") + sb.WriteString("### Tool Definitions:\n\n") + + for _, tool := range tools { + if tool.Type != "function" { + continue + } + sb.WriteString(fmt.Sprintf("#### %s\n", tool.Function.Name)) + if tool.Function.Description != "" { + sb.WriteString(fmt.Sprintf("Description: %s\n", tool.Function.Description)) + } + if len(tool.Function.Parameters) > 0 { + paramsJSON, _ := json.Marshal(tool.Function.Parameters) + sb.WriteString(fmt.Sprintf("Parameters:\n```json\n%s\n```\n", string(paramsJSON))) + } + sb.WriteString("\n") + } + + return sb.String() +} + +// codexEvent represents a single JSONL event from `codex exec --json`. +type codexEvent struct { + Type string `json:"type"` + ThreadID string `json:"thread_id,omitempty"` + Message string `json:"message,omitempty"` + Item *codexEventItem `json:"item,omitempty"` + Usage *codexUsage `json:"usage,omitempty"` + Error *codexEventErr `json:"error,omitempty"` +} + +type codexEventItem struct { + ID string `json:"id"` + Type string `json:"type"` + Text string `json:"text,omitempty"` + Command string `json:"command,omitempty"` + Status string `json:"status,omitempty"` + ExitCode *int `json:"exit_code,omitempty"` + Output string `json:"output,omitempty"` +} + +type codexUsage struct { + InputTokens int `json:"input_tokens"` + CachedInputTokens int `json:"cached_input_tokens"` + OutputTokens int `json:"output_tokens"` +} + +type codexEventErr struct { + Message string `json:"message"` +} + +// parseJSONLEvents processes the JSONL output from codex exec --json. +func (p *CodexCliProvider) parseJSONLEvents(output string) (*LLMResponse, error) { + var contentParts []string + var usage *UsageInfo + var lastError string + + scanner := bufio.NewScanner(strings.NewReader(output)) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" { + continue + } + + var event codexEvent + if err := json.Unmarshal([]byte(line), &event); err != nil { + continue // skip malformed lines + } + + switch event.Type { + case "item.completed": + if event.Item != nil && event.Item.Type == "agent_message" && event.Item.Text != "" { + contentParts = append(contentParts, event.Item.Text) + } + case "turn.completed": + if event.Usage != nil { + promptTokens := event.Usage.InputTokens + event.Usage.CachedInputTokens + usage = &UsageInfo{ + PromptTokens: promptTokens, + CompletionTokens: event.Usage.OutputTokens, + TotalTokens: promptTokens + event.Usage.OutputTokens, + } + } + case "error": + lastError = event.Message + case "turn.failed": + if event.Error != nil { + lastError = event.Error.Message + } + } + } + + if lastError != "" && len(contentParts) == 0 { + return nil, fmt.Errorf("codex cli: %s", lastError) + } + + content := strings.Join(contentParts, "\n") + + // Extract tool calls from response text (same pattern as ClaudeCliProvider) + toolCalls := extractToolCallsFromText(content) + + finishReason := "stop" + if len(toolCalls) > 0 { + finishReason = "tool_calls" + content = stripToolCallsFromText(content) + } + + return &LLMResponse{ + Content: strings.TrimSpace(content), + ToolCalls: toolCalls, + FinishReason: finishReason, + Usage: usage, + }, nil +} diff --git a/pkg/providers/codex_cli_provider_test.go b/pkg/providers/codex_cli_provider_test.go new file mode 100644 index 0000000..7e4e1bc --- /dev/null +++ b/pkg/providers/codex_cli_provider_test.go @@ -0,0 +1,585 @@ +package providers + +import ( + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" +) + +// --- JSONL Event Parsing Tests --- + +func TestParseJSONLEvents_AgentMessage(t *testing.T) { + p := &CodexCliProvider{} + events := `{"type":"thread.started","thread_id":"abc-123"} +{"type":"turn.started"} +{"type":"item.completed","item":{"id":"item_1","type":"agent_message","text":"Hello from Codex!"}} +{"type":"turn.completed","usage":{"input_tokens":100,"cached_input_tokens":50,"output_tokens":20}}` + + resp, err := p.parseJSONLEvents(events) + if err != nil { + t.Fatalf("parseJSONLEvents() error: %v", err) + } + if resp.Content != "Hello from Codex!" { + t.Errorf("Content = %q, want %q", resp.Content, "Hello from Codex!") + } + if resp.FinishReason != "stop" { + t.Errorf("FinishReason = %q, want %q", resp.FinishReason, "stop") + } + if resp.Usage == nil { + t.Fatal("Usage should not be nil") + } + if resp.Usage.PromptTokens != 150 { + t.Errorf("PromptTokens = %d, want 150", resp.Usage.PromptTokens) + } + if resp.Usage.CompletionTokens != 20 { + t.Errorf("CompletionTokens = %d, want 20", resp.Usage.CompletionTokens) + } + if resp.Usage.TotalTokens != 170 { + t.Errorf("TotalTokens = %d, want 170", resp.Usage.TotalTokens) + } + if len(resp.ToolCalls) != 0 { + t.Errorf("ToolCalls should be empty, got %d", len(resp.ToolCalls)) + } +} + +func TestParseJSONLEvents_ToolCallExtraction(t *testing.T) { + p := &CodexCliProvider{} + toolCallText := `Let me read that file. +{"tool_calls":[{"id":"call_1","type":"function","function":{"name":"read_file","arguments":"{\"path\":\"/tmp/test.txt\"}"}}]}` + // Build valid JSONL by marshaling the event + item := codexEvent{ + Type: "item.completed", + Item: &codexEventItem{ID: "item_1", Type: "agent_message", Text: toolCallText}, + } + itemJSON, _ := json.Marshal(item) + usageEvt := `{"type":"turn.completed","usage":{"input_tokens":50,"cached_input_tokens":0,"output_tokens":20}}` + events := `{"type":"turn.started"}` + "\n" + string(itemJSON) + "\n" + usageEvt + + resp, err := p.parseJSONLEvents(events) + if err != nil { + t.Fatalf("parseJSONLEvents() error: %v", err) + } + if resp.FinishReason != "tool_calls" { + t.Errorf("FinishReason = %q, want %q", resp.FinishReason, "tool_calls") + } + if len(resp.ToolCalls) != 1 { + t.Fatalf("ToolCalls count = %d, want 1", len(resp.ToolCalls)) + } + if resp.ToolCalls[0].Name != "read_file" { + t.Errorf("ToolCalls[0].Name = %q, want %q", resp.ToolCalls[0].Name, "read_file") + } + if resp.ToolCalls[0].ID != "call_1" { + t.Errorf("ToolCalls[0].ID = %q, want %q", resp.ToolCalls[0].ID, "call_1") + } + if resp.ToolCalls[0].Function.Arguments != `{"path":"/tmp/test.txt"}` { + t.Errorf("ToolCalls[0].Function.Arguments = %q", resp.ToolCalls[0].Function.Arguments) + } + // Content should have the tool call JSON stripped + if strings.Contains(resp.Content, "tool_calls") { + t.Errorf("Content should not contain tool_calls JSON, got: %q", resp.Content) + } +} + +func TestParseJSONLEvents_MultipleToolCalls(t *testing.T) { + p := &CodexCliProvider{} + toolCallText := `{"tool_calls":[{"id":"call_1","type":"function","function":{"name":"read_file","arguments":"{\"path\":\"a.txt\"}"}},{"id":"call_2","type":"function","function":{"name":"write_file","arguments":"{\"path\":\"b.txt\",\"content\":\"hello\"}"}}]}` + item := codexEvent{ + Type: "item.completed", + Item: &codexEventItem{ID: "item_1", Type: "agent_message", Text: toolCallText}, + } + itemJSON, _ := json.Marshal(item) + events := `{"type":"turn.started"}` + "\n" + string(itemJSON) + "\n" + `{"type":"turn.completed"}` + + resp, err := p.parseJSONLEvents(events) + if err != nil { + t.Fatalf("parseJSONLEvents() error: %v", err) + } + if len(resp.ToolCalls) != 2 { + t.Fatalf("ToolCalls count = %d, want 2", len(resp.ToolCalls)) + } + if resp.ToolCalls[0].Name != "read_file" { + t.Errorf("ToolCalls[0].Name = %q, want %q", resp.ToolCalls[0].Name, "read_file") + } + if resp.ToolCalls[1].Name != "write_file" { + t.Errorf("ToolCalls[1].Name = %q, want %q", resp.ToolCalls[1].Name, "write_file") + } + if resp.FinishReason != "tool_calls" { + t.Errorf("FinishReason = %q, want %q", resp.FinishReason, "tool_calls") + } +} + +func TestParseJSONLEvents_MultipleMessages(t *testing.T) { + p := &CodexCliProvider{} + events := `{"type":"turn.started"} +{"type":"item.completed","item":{"id":"item_1","type":"agent_message","text":"First part."}} +{"type":"item.completed","item":{"id":"item_2","type":"command_execution","command":"ls","status":"completed"}} +{"type":"item.completed","item":{"id":"item_3","type":"agent_message","text":"Second part."}} +{"type":"turn.completed"}` + + resp, err := p.parseJSONLEvents(events) + if err != nil { + t.Fatalf("parseJSONLEvents() error: %v", err) + } + if resp.Content != "First part.\nSecond part." { + t.Errorf("Content = %q, want %q", resp.Content, "First part.\nSecond part.") + } +} + +func TestParseJSONLEvents_ErrorEvent(t *testing.T) { + p := &CodexCliProvider{} + events := `{"type":"thread.started","thread_id":"abc"} +{"type":"turn.started"} +{"type":"error","message":"token expired"} +{"type":"turn.failed","error":{"message":"token expired"}}` + + _, err := p.parseJSONLEvents(events) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "token expired") { + t.Errorf("error = %q, want to contain 'token expired'", err.Error()) + } +} + +func TestParseJSONLEvents_TurnFailed(t *testing.T) { + p := &CodexCliProvider{} + events := `{"type":"turn.started"} +{"type":"turn.failed","error":{"message":"rate limit exceeded"}}` + + _, err := p.parseJSONLEvents(events) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "rate limit exceeded") { + t.Errorf("error = %q, want to contain 'rate limit exceeded'", err.Error()) + } +} + +func TestParseJSONLEvents_ErrorWithContent(t *testing.T) { + p := &CodexCliProvider{} + // If there's an error but also content, return the content (partial success) + events := `{"type":"turn.started"} +{"type":"item.completed","item":{"id":"item_1","type":"agent_message","text":"Partial result."}} +{"type":"error","message":"connection reset"} +{"type":"turn.failed","error":{"message":"connection reset"}}` + + resp, err := p.parseJSONLEvents(events) + if err != nil { + t.Fatalf("should not error when content exists: %v", err) + } + if resp.Content != "Partial result." { + t.Errorf("Content = %q, want %q", resp.Content, "Partial result.") + } +} + +func TestParseJSONLEvents_EmptyOutput(t *testing.T) { + p := &CodexCliProvider{} + resp, err := p.parseJSONLEvents("") + if err != nil { + t.Fatalf("empty output should not error: %v", err) + } + if resp.Content != "" { + t.Errorf("Content = %q, want empty", resp.Content) + } +} + +func TestParseJSONLEvents_MalformedLines(t *testing.T) { + p := &CodexCliProvider{} + events := `not json at all +{"type":"item.completed","item":{"id":"item_1","type":"agent_message","text":"Good line."}} +another bad line +{"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":5}}` + + resp, err := p.parseJSONLEvents(events) + if err != nil { + t.Fatalf("should skip malformed lines: %v", err) + } + if resp.Content != "Good line." { + t.Errorf("Content = %q, want %q", resp.Content, "Good line.") + } + if resp.Usage == nil || resp.Usage.TotalTokens != 15 { + t.Errorf("Usage.TotalTokens = %v, want 15", resp.Usage) + } +} + +func TestParseJSONLEvents_CommandExecution(t *testing.T) { + p := &CodexCliProvider{} + events := `{"type":"turn.started"} +{"type":"item.started","item":{"id":"item_1","type":"command_execution","command":"bash -lc ls","status":"in_progress"}} +{"type":"item.completed","item":{"id":"item_1","type":"command_execution","command":"bash -lc ls","status":"completed","exit_code":0,"output":"file1.go\nfile2.go"}} +{"type":"item.completed","item":{"id":"item_2","type":"agent_message","text":"Found 2 files."}} +{"type":"turn.completed"}` + + resp, err := p.parseJSONLEvents(events) + if err != nil { + t.Fatalf("parseJSONLEvents() error: %v", err) + } + // command_execution items should be skipped; only agent_message text is returned + if resp.Content != "Found 2 files." { + t.Errorf("Content = %q, want %q", resp.Content, "Found 2 files.") + } +} + +func TestParseJSONLEvents_NoUsage(t *testing.T) { + p := &CodexCliProvider{} + events := `{"type":"turn.started"} +{"type":"item.completed","item":{"id":"item_1","type":"agent_message","text":"No usage info."}} +{"type":"turn.completed"}` + + resp, err := p.parseJSONLEvents(events) + if err != nil { + t.Fatalf("parseJSONLEvents() error: %v", err) + } + if resp.Usage != nil { + t.Errorf("Usage should be nil when turn.completed has no usage, got %+v", resp.Usage) + } +} + +// --- Prompt Building Tests --- + +func TestBuildPrompt_SystemAsInstructions(t *testing.T) { + p := &CodexCliProvider{} + messages := []Message{ + {Role: "system", Content: "You are helpful."}, + {Role: "user", Content: "Hi there"}, + } + + prompt := p.buildPrompt(messages, nil) + + if !strings.Contains(prompt, "## System Instructions") { + t.Error("prompt should contain '## System Instructions'") + } + if !strings.Contains(prompt, "You are helpful.") { + t.Error("prompt should contain system content") + } + if !strings.Contains(prompt, "## Task") { + t.Error("prompt should contain '## Task'") + } + if !strings.Contains(prompt, "Hi there") { + t.Error("prompt should contain user message") + } +} + +func TestBuildPrompt_NoSystem(t *testing.T) { + p := &CodexCliProvider{} + messages := []Message{ + {Role: "user", Content: "Just a question"}, + } + + prompt := p.buildPrompt(messages, nil) + + if strings.Contains(prompt, "## System Instructions") { + t.Error("prompt should not contain system instructions header") + } + if prompt != "Just a question" { + t.Errorf("prompt = %q, want %q", prompt, "Just a question") + } +} + +func TestBuildPrompt_WithTools(t *testing.T) { + p := &CodexCliProvider{} + messages := []Message{ + {Role: "user", Content: "Get weather"}, + } + tools := []ToolDefinition{ + { + Type: "function", + Function: ToolFunctionDefinition{ + Name: "get_weather", + Description: "Get current weather", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "city": map[string]interface{}{"type": "string"}, + }, + }, + }, + }, + } + + prompt := p.buildPrompt(messages, tools) + + if !strings.Contains(prompt, "## Available Tools") { + t.Error("prompt should contain tools section") + } + if !strings.Contains(prompt, "get_weather") { + t.Error("prompt should contain tool name") + } + if !strings.Contains(prompt, "Get current weather") { + t.Error("prompt should contain tool description") + } +} + +func TestBuildPrompt_MultipleMessages(t *testing.T) { + p := &CodexCliProvider{} + messages := []Message{ + {Role: "user", Content: "Hello"}, + {Role: "assistant", Content: "Hi! How can I help?"}, + {Role: "user", Content: "Tell me about Go"}, + } + + prompt := p.buildPrompt(messages, nil) + + if !strings.Contains(prompt, "Hello") { + t.Error("prompt should contain first user message") + } + if !strings.Contains(prompt, "Assistant: Hi! How can I help?") { + t.Error("prompt should contain assistant message with prefix") + } + if !strings.Contains(prompt, "Tell me about Go") { + t.Error("prompt should contain second user message") + } +} + +func TestBuildPrompt_ToolResults(t *testing.T) { + p := &CodexCliProvider{} + messages := []Message{ + {Role: "user", Content: "Weather?"}, + {Role: "tool", Content: `{"temp": 72}`, ToolCallID: "call_1"}, + } + + prompt := p.buildPrompt(messages, nil) + + if !strings.Contains(prompt, "[Tool Result for call_1]") { + t.Error("prompt should contain tool result") + } + if !strings.Contains(prompt, `{"temp": 72}`) { + t.Error("prompt should contain tool result content") + } +} + +func TestBuildPrompt_SystemAndTools(t *testing.T) { + p := &CodexCliProvider{} + messages := []Message{ + {Role: "system", Content: "Be concise."}, + {Role: "user", Content: "Do something"}, + } + tools := []ToolDefinition{ + { + Type: "function", + Function: ToolFunctionDefinition{ + Name: "my_tool", + Description: "A tool", + }, + }, + } + + prompt := p.buildPrompt(messages, tools) + + // System instructions should come first + sysIdx := strings.Index(prompt, "## System Instructions") + toolIdx := strings.Index(prompt, "## Available Tools") + taskIdx := strings.Index(prompt, "## Task") + + if sysIdx == -1 || toolIdx == -1 || taskIdx == -1 { + t.Fatal("prompt should contain all sections") + } + if sysIdx >= taskIdx { + t.Error("system instructions should come before task") + } + if taskIdx >= toolIdx { + t.Error("task section should come before tools in the output") + } +} + +// --- CLI Argument Tests --- + +func TestCodexCliProvider_GetDefaultModel(t *testing.T) { + p := NewCodexCliProvider("") + if got := p.GetDefaultModel(); got != "codex-cli" { + t.Errorf("GetDefaultModel() = %q, want %q", got, "codex-cli") + } +} + +// --- Mock CLI Integration Test --- + +func createMockCodexCLI(t *testing.T, events []string) string { + t.Helper() + tmpDir := t.TempDir() + scriptPath := filepath.Join(tmpDir, "codex") + + var sb strings.Builder + sb.WriteString("#!/bin/bash\n") + for _, event := range events { + sb.WriteString(fmt.Sprintf("echo '%s'\n", event)) + } + + if err := os.WriteFile(scriptPath, []byte(sb.String()), 0755); err != nil { + t.Fatal(err) + } + return scriptPath +} + +func TestCodexCliProvider_MockCLI_Success(t *testing.T) { + scriptPath := createMockCodexCLI(t, []string{ + `{"type":"thread.started","thread_id":"test-123"}`, + `{"type":"turn.started"}`, + `{"type":"item.completed","item":{"id":"item_1","type":"agent_message","text":"Mock response from Codex CLI"}}`, + `{"type":"turn.completed","usage":{"input_tokens":50,"cached_input_tokens":10,"output_tokens":15}}`, + }) + + p := &CodexCliProvider{ + command: scriptPath, + workspace: "", + } + + messages := []Message{{Role: "user", Content: "Hello"}} + resp, err := p.Chat(context.Background(), messages, nil, "", nil) + if err != nil { + t.Fatalf("Chat() error: %v", err) + } + if resp.Content != "Mock response from Codex CLI" { + t.Errorf("Content = %q, want %q", resp.Content, "Mock response from Codex CLI") + } + if resp.Usage == nil { + t.Fatal("Usage should not be nil") + } + if resp.Usage.PromptTokens != 60 { + t.Errorf("PromptTokens = %d, want 60", resp.Usage.PromptTokens) + } + if resp.Usage.CompletionTokens != 15 { + t.Errorf("CompletionTokens = %d, want 15", resp.Usage.CompletionTokens) + } +} + +func TestCodexCliProvider_MockCLI_Error(t *testing.T) { + scriptPath := createMockCodexCLI(t, []string{ + `{"type":"thread.started","thread_id":"test-err"}`, + `{"type":"turn.started"}`, + `{"type":"error","message":"auth token expired"}`, + `{"type":"turn.failed","error":{"message":"auth token expired"}}`, + }) + + p := &CodexCliProvider{ + command: scriptPath, + workspace: "", + } + + messages := []Message{{Role: "user", Content: "Hello"}} + _, err := p.Chat(context.Background(), messages, nil, "", nil) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "auth token expired") { + t.Errorf("error = %q, want to contain 'auth token expired'", err.Error()) + } +} + +func TestCodexCliProvider_MockCLI_WithModel(t *testing.T) { + // Mock script that captures args to verify model flag is passed + tmpDir := t.TempDir() + scriptPath := filepath.Join(tmpDir, "codex") + script := `#!/bin/bash +# Write args to a file for verification +echo "$@" > "` + filepath.Join(tmpDir, "args.txt") + `" +echo '{"type":"item.completed","item":{"id":"1","type":"agent_message","text":"ok"}}' +echo '{"type":"turn.completed"}'` + + if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { + t.Fatal(err) + } + + p := &CodexCliProvider{ + command: scriptPath, + workspace: "/tmp/test-workspace", + } + + messages := []Message{{Role: "user", Content: "test"}} + _, err := p.Chat(context.Background(), messages, nil, "gpt-5.2-codex", nil) + if err != nil { + t.Fatalf("Chat() error: %v", err) + } + + // Verify the args + argsData, err := os.ReadFile(filepath.Join(tmpDir, "args.txt")) + if err != nil { + t.Fatalf("reading args: %v", err) + } + args := string(argsData) + + if !strings.Contains(args, "-m gpt-5.2-codex") { + t.Errorf("args should contain model flag, got: %s", args) + } + if !strings.Contains(args, "-C /tmp/test-workspace") { + t.Errorf("args should contain workspace flag, got: %s", args) + } + if !strings.Contains(args, "--json") { + t.Errorf("args should contain --json, got: %s", args) + } + if !strings.Contains(args, "--dangerously-bypass-approvals-and-sandbox") { + t.Errorf("args should contain bypass flag, got: %s", args) + } +} + +func TestCodexCliProvider_MockCLI_ContextCancel(t *testing.T) { + // Script that sleeps forever + tmpDir := t.TempDir() + scriptPath := filepath.Join(tmpDir, "codex") + script := "#!/bin/bash\nsleep 60" + + if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { + t.Fatal(err) + } + + p := &CodexCliProvider{ + command: scriptPath, + workspace: "", + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() // cancel immediately + + messages := []Message{{Role: "user", Content: "test"}} + _, err := p.Chat(ctx, messages, nil, "", nil) + if err == nil { + t.Fatal("expected error on canceled context") + } +} + +func TestCodexCliProvider_EmptyCommand(t *testing.T) { + p := &CodexCliProvider{command: ""} + + messages := []Message{{Role: "user", Content: "test"}} + _, err := p.Chat(context.Background(), messages, nil, "", nil) + if err == nil { + t.Fatal("expected error for empty command") + } +} + +// --- Integration Test (requires real codex CLI with valid auth) --- + +func TestCodexCliProvider_Integration(t *testing.T) { + if os.Getenv("PICOCLAW_INTEGRATION_TESTS") == "" { + t.Skip("skipping integration test (set PICOCLAW_INTEGRATION_TESTS=1 to enable)") + } + + // Verify codex is available + codexPath, err := exec.LookPath("codex") + if err != nil { + t.Skip("codex CLI not found in PATH") + } + + p := &CodexCliProvider{ + command: codexPath, + workspace: "", + } + + messages := []Message{ + {Role: "user", Content: "Respond with just the word 'hello' and nothing else."}, + } + + resp, err := p.Chat(context.Background(), messages, nil, "", nil) + if err != nil { + t.Fatalf("Chat() error: %v", err) + } + + lower := strings.ToLower(strings.TrimSpace(resp.Content)) + if !strings.Contains(lower, "hello") { + t.Errorf("Content = %q, expected to contain 'hello'", resp.Content) + } +} diff --git a/pkg/providers/http_provider.go b/pkg/providers/http_provider.go index 60294c4..4cf2c6d 100644 --- a/pkg/providers/http_provider.go +++ b/pkg/providers/http_provider.go @@ -240,6 +240,9 @@ func CreateProvider(cfg *config.Config) (LLMProvider, error) { } case "openai", "gpt": if cfg.Providers.OpenAI.APIKey != "" || cfg.Providers.OpenAI.AuthMethod != "" { + if cfg.Providers.OpenAI.AuthMethod == "codex-cli" { + return NewCodexProviderWithTokenSource("", "", CreateCodexCliTokenSource()), nil + } if cfg.Providers.OpenAI.AuthMethod == "oauth" || cfg.Providers.OpenAI.AuthMethod == "token" { return createCodexAuthProvider() } @@ -299,11 +302,17 @@ func CreateProvider(cfg *config.Config) (LLMProvider, error) { } } case "claude-cli", "claudecode", "claude-code": - workspace := cfg.Agents.Defaults.Workspace + workspace := cfg.WorkspacePath() if workspace == "" { workspace = "." } return NewClaudeCliProvider(workspace), nil + case "codex-cli", "codex-code": + workspace := cfg.WorkspacePath() + if workspace == "" { + workspace = "." + } + return NewCodexCliProvider(workspace), nil case "deepseek": if cfg.Providers.DeepSeek.APIKey != "" { apiKey = cfg.Providers.DeepSeek.APIKey diff --git a/pkg/providers/tool_call_extract.go b/pkg/providers/tool_call_extract.go new file mode 100644 index 0000000..97a2192 --- /dev/null +++ b/pkg/providers/tool_call_extract.go @@ -0,0 +1,72 @@ +package providers + +import ( + "encoding/json" + "strings" +) + +// extractToolCallsFromText parses tool call JSON from response text. +// Both ClaudeCliProvider and CodexCliProvider use this to extract +// tool calls that the model outputs in its response text. +func extractToolCallsFromText(text string) []ToolCall { + start := strings.Index(text, `{"tool_calls"`) + if start == -1 { + return nil + } + + end := findMatchingBrace(text, start) + if end == start { + return nil + } + + jsonStr := text[start:end] + + var wrapper struct { + ToolCalls []struct { + ID string `json:"id"` + Type string `json:"type"` + Function struct { + Name string `json:"name"` + Arguments string `json:"arguments"` + } `json:"function"` + } `json:"tool_calls"` + } + + if err := json.Unmarshal([]byte(jsonStr), &wrapper); err != nil { + return nil + } + + var result []ToolCall + for _, tc := range wrapper.ToolCalls { + var args map[string]interface{} + json.Unmarshal([]byte(tc.Function.Arguments), &args) + + result = append(result, ToolCall{ + ID: tc.ID, + Type: tc.Type, + Name: tc.Function.Name, + Arguments: args, + Function: &FunctionCall{ + Name: tc.Function.Name, + Arguments: tc.Function.Arguments, + }, + }) + } + + return result +} + +// stripToolCallsFromText removes tool call JSON from response text. +func stripToolCallsFromText(text string) string { + start := strings.Index(text, `{"tool_calls"`) + if start == -1 { + return text + } + + end := findMatchingBrace(text, start) + if end == start { + return text + } + + return strings.TrimSpace(text[:start] + text[end:]) +} diff --git a/pkg/tools/web_test.go b/pkg/tools/web_test.go index 988eada..a526ea3 100644 --- a/pkg/tools/web_test.go +++ b/pkg/tools/web_test.go @@ -173,19 +173,23 @@ func TestWebTool_WebFetch_Truncation(t *testing.T) { } } -// TestWebTool_WebSearch_NoApiKey verifies that nil is returned when no provider is configured +// TestWebTool_WebSearch_NoApiKey verifies that no tool is created when API key is missing func TestWebTool_WebSearch_NoApiKey(t *testing.T) { - tool := NewWebSearchTool(WebSearchToolOptions{BraveAPIKey: "", BraveMaxResults: 5}) - - // Should return nil when no provider is enabled + tool := NewWebSearchTool(WebSearchToolOptions{BraveEnabled: true, BraveAPIKey: ""}) if tool != nil { - t.Errorf("Expected nil when no search provider is configured") + t.Errorf("Expected nil tool when Brave API key is empty") + } + + // Also nil when nothing is enabled + tool = NewWebSearchTool(WebSearchToolOptions{}) + if tool != nil { + t.Errorf("Expected nil tool when no provider is enabled") } } // TestWebTool_WebSearch_MissingQuery verifies error handling for missing query func TestWebTool_WebSearch_MissingQuery(t *testing.T) { - tool := NewWebSearchTool(WebSearchToolOptions{BraveAPIKey: "test-key", BraveMaxResults: 5, BraveEnabled: true}) + tool := NewWebSearchTool(WebSearchToolOptions{BraveEnabled: true, BraveAPIKey: "test-key", BraveMaxResults: 5}) ctx := context.Background() args := map[string]interface{}{}