fix: resolve multiple bugs from code review #116

Fixes four issues identified in the community code review:

- Session persistence broken on Windows: session keys like
  "telegram:123456" contain ':', which is illegal in Windows
  filenames. filepath.Base() strips drive-letter prefixes on Windows,
  causing Save() to silently fail. Added sanitizeFilename() to
  replace invalid chars in the filename while keeping the original
  key in the JSON payload.

- HTTP client with no timeout: HTTPProvider used Timeout: 0 (infinite
  wait), which can hang the entire agent if an API endpoint becomes
  unresponsive. Set a 120s safety timeout.

- Slack AllowFrom type mismatch: SlackConfig used plain []string
  while every other channel uses FlexibleStringSlice, so numeric
  user IDs in Slack config would fail to parse.

- Token estimation wrong for CJK: estimateTokens() divided byte
  length by 4, but CJK characters are 3 bytes each, causing ~3x
  overestimation and premature summarization. Switched to
  utf8.RuneCountInString() / 3 for better cross-language accuracy.

Also added unit tests for the session filename sanitization.

Ref #116
This commit is contained in:
xiaoen
2026-02-15 00:28:36 +08:00
parent 1cff7d4e37
commit 0a88ff0817
5 changed files with 103 additions and 10 deletions

View File

@@ -16,6 +16,7 @@ import (
"sync"
"sync/atomic"
"time"
"unicode/utf8"
"github.com/sipeed/picoclaw/pkg/bus"
"github.com/sipeed/picoclaw/pkg/config"
@@ -768,10 +769,13 @@ func (al *AgentLoop) summarizeBatch(ctx context.Context, batch []providers.Messa
}
// estimateTokens estimates the number of tokens in a message list.
// Uses rune count instead of byte length so that CJK and other multi-byte
// characters are not over-counted (a Chinese character is 3 bytes but roughly
// one token).
func (al *AgentLoop) estimateTokens(messages []providers.Message) int {
total := 0
for _, m := range messages {
total += len(m.Content) / 4 // Simple heuristic: 4 chars per token
total += utf8.RuneCountInString(m.Content) / 3
}
return total
}

View File

@@ -130,10 +130,10 @@ type DingTalkConfig struct {
}
type SlackConfig struct {
Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_SLACK_ENABLED"`
BotToken string `json:"bot_token" env:"PICOCLAW_CHANNELS_SLACK_BOT_TOKEN"`
AppToken string `json:"app_token" env:"PICOCLAW_CHANNELS_SLACK_APP_TOKEN"`
AllowFrom []string `json:"allow_from" env:"PICOCLAW_CHANNELS_SLACK_ALLOW_FROM"`
Enabled bool `json:"enabled" env:"PICOCLAW_CHANNELS_SLACK_ENABLED"`
BotToken string `json:"bot_token" env:"PICOCLAW_CHANNELS_SLACK_BOT_TOKEN"`
AppToken string `json:"app_token" env:"PICOCLAW_CHANNELS_SLACK_APP_TOKEN"`
AllowFrom FlexibleStringSlice `json:"allow_from" env:"PICOCLAW_CHANNELS_SLACK_ALLOW_FROM"`
}
type LINEConfig struct {
@@ -261,7 +261,7 @@ func DefaultConfig() *Config {
Enabled: false,
BotToken: "",
AppToken: "",
AllowFrom: []string{},
AllowFrom: FlexibleStringSlice{},
},
LINE: LINEConfig{
Enabled: false,

View File

@@ -15,6 +15,7 @@ import (
"net/http"
"net/url"
"strings"
"time"
"github.com/sipeed/picoclaw/pkg/auth"
"github.com/sipeed/picoclaw/pkg/config"
@@ -28,7 +29,7 @@ type HTTPProvider struct {
func NewHTTPProvider(apiKey, apiBase, proxy string) *HTTPProvider {
client := &http.Client{
Timeout: 0,
Timeout: 120 * time.Second,
}
if proxy != "" {

View File

@@ -145,13 +145,27 @@ func (sm *SessionManager) TruncateHistory(key string, keepLast int) {
session.Updated = time.Now()
}
// sanitizeFilename converts a session key into a cross-platform safe filename.
// Session keys use "channel:chatID" (e.g. "telegram:123456") but ':' is the
// volume separator on Windows, so filepath.Base would misinterpret the key.
// We replace it with '_'. The original key is preserved inside the JSON file,
// so loadSessions still maps back to the right in-memory key.
func sanitizeFilename(key string) string {
return strings.ReplaceAll(key, ":", "_")
}
func (sm *SessionManager) Save(key string) error {
if sm.storage == "" {
return nil
}
// Validate key to avoid invalid filenames and path traversal.
if key == "" || key == "." || key == ".." || key != filepath.Base(key) || strings.Contains(key, "/") || strings.Contains(key, "\\") {
filename := sanitizeFilename(key)
// filepath.IsLocal rejects empty names, "..", absolute paths, and
// OS-reserved device names (NUL, COM1 … on Windows).
// The extra checks reject "." and any directory separators so that
// the session file is always written directly inside sm.storage.
if filename == "." || !filepath.IsLocal(filename) || strings.ContainsAny(filename, `/\`) {
return os.ErrInvalid
}
@@ -182,7 +196,7 @@ func (sm *SessionManager) Save(key string) error {
return err
}
sessionPath := filepath.Join(sm.storage, key+".json")
sessionPath := filepath.Join(sm.storage, filename+".json")
tmpFile, err := os.CreateTemp(sm.storage, "session-*.tmp")
if err != nil {
return err

View File

@@ -0,0 +1,74 @@
package session
import (
"os"
"path/filepath"
"testing"
)
func TestSanitizeFilename(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"simple", "simple"},
{"telegram:123456", "telegram_123456"},
{"discord:987654321", "discord_987654321"},
{"slack:C01234", "slack_C01234"},
{"no-colons-here", "no-colons-here"},
{"multiple:colons:here", "multiple_colons_here"},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
got := sanitizeFilename(tt.input)
if got != tt.expected {
t.Errorf("sanitizeFilename(%q) = %q, want %q", tt.input, got, tt.expected)
}
})
}
}
func TestSave_WithColonInKey(t *testing.T) {
tmpDir := t.TempDir()
sm := NewSessionManager(tmpDir)
// Create a session with a key containing colon (typical channel session key).
key := "telegram:123456"
sm.GetOrCreate(key)
sm.AddMessage(key, "user", "hello")
// Save should succeed even though the key contains ':'
if err := sm.Save(key); err != nil {
t.Fatalf("Save(%q) failed: %v", key, err)
}
// The file on disk should use sanitized name.
expectedFile := filepath.Join(tmpDir, "telegram_123456.json")
if _, err := os.Stat(expectedFile); os.IsNotExist(err) {
t.Fatalf("expected session file %s to exist", expectedFile)
}
// Load into a fresh manager and verify the session round-trips.
sm2 := NewSessionManager(tmpDir)
history := sm2.GetHistory(key)
if len(history) != 1 {
t.Fatalf("expected 1 message after reload, got %d", len(history))
}
if history[0].Content != "hello" {
t.Errorf("expected message content %q, got %q", "hello", history[0].Content)
}
}
func TestSave_RejectsPathTraversal(t *testing.T) {
tmpDir := t.TempDir()
sm := NewSessionManager(tmpDir)
badKeys := []string{"", ".", "..", "foo/bar", "foo\\bar"}
for _, key := range badKeys {
sm.GetOrCreate(key)
if err := sm.Save(key); err == nil {
t.Errorf("Save(%q) should have failed but didn't", key)
}
}
}