From 341dbd30073270117a1f3fd1e019e519781613e3 Mon Sep 17 00:00:00 2001 From: fahadahmadansari111 <57217561+fahadahmadansari111@users.noreply.github.com> Date: Mon, 16 Feb 2026 09:23:11 +0530 Subject: [PATCH] Add health check endpoints (/health and /ready) for container orchestration liveness and readiness probes. (#104) * added health endpoint * formatted files --------- Co-authored-by: lxowalle <83055338+lxowalle@users.noreply.github.com> --- Dockerfile | 4 ++ cmd/picoclaw/main.go | 11 +++ pkg/health/server.go | 164 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 179 insertions(+) create mode 100644 pkg/health/server.go diff --git a/Dockerfile b/Dockerfile index 433d962..dd98ec0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,6 +22,10 @@ FROM alpine:3.23 RUN apk add --no-cache ca-certificates tzdata curl +# Health check +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD wget -q --spider http://localhost:18790/health || exit 1 + # Copy binary COPY --from=builder /src/build/picoclaw /usr/local/bin/picoclaw diff --git a/cmd/picoclaw/main.go b/cmd/picoclaw/main.go index 2129662..cd3637a 100644 --- a/cmd/picoclaw/main.go +++ b/cmd/picoclaw/main.go @@ -13,6 +13,7 @@ import ( "fmt" "io" "io/fs" + "net/http" "os" "os/signal" "path/filepath" @@ -28,6 +29,7 @@ import ( "github.com/sipeed/picoclaw/pkg/config" "github.com/sipeed/picoclaw/pkg/cron" "github.com/sipeed/picoclaw/pkg/devices" + "github.com/sipeed/picoclaw/pkg/health" "github.com/sipeed/picoclaw/pkg/heartbeat" "github.com/sipeed/picoclaw/pkg/logger" "github.com/sipeed/picoclaw/pkg/migrate" @@ -658,6 +660,14 @@ func gatewayCmd() { fmt.Printf("Error starting channels: %v\n", err) } + healthServer := health.NewServer(cfg.Gateway.Host, cfg.Gateway.Port) + go func() { + if err := healthServer.Start(); err != nil && err != http.ErrServerClosed { + logger.ErrorCF("health", "Health server error", map[string]interface{}{"error": err.Error()}) + } + }() + fmt.Printf("✓ Health endpoints available at http://%s:%d/health and /ready\n", cfg.Gateway.Host, cfg.Gateway.Port) + go agentLoop.Run(ctx) sigChan := make(chan os.Signal, 1) @@ -666,6 +676,7 @@ func gatewayCmd() { fmt.Println("\nShutting down...") cancel() + healthServer.Stop(context.Background()) deviceService.Stop() heartbeatService.Stop() cronService.Stop() diff --git a/pkg/health/server.go b/pkg/health/server.go new file mode 100644 index 0000000..77b3603 --- /dev/null +++ b/pkg/health/server.go @@ -0,0 +1,164 @@ +package health + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "sync" + "time" +) + +type Server struct { + server *http.Server + mu sync.RWMutex + ready bool + checks map[string]Check + startTime time.Time +} + +type Check struct { + Name string `json:"name"` + Status string `json:"status"` + Message string `json:"message,omitempty"` + Timestamp time.Time `json:"timestamp"` +} + +type StatusResponse struct { + Status string `json:"status"` + Uptime string `json:"uptime"` + Checks map[string]Check `json:"checks,omitempty"` +} + +func NewServer(host string, port int) *Server { + mux := http.NewServeMux() + s := &Server{ + ready: false, + checks: make(map[string]Check), + startTime: time.Now(), + } + + mux.HandleFunc("/health", s.healthHandler) + mux.HandleFunc("/ready", s.readyHandler) + + addr := fmt.Sprintf("%s:%d", host, port) + s.server = &http.Server{ + Addr: addr, + Handler: mux, + ReadTimeout: 5 * time.Second, + WriteTimeout: 5 * time.Second, + } + + return s +} + +func (s *Server) Start() error { + s.mu.Lock() + s.ready = true + s.mu.Unlock() + return s.server.ListenAndServe() +} + +func (s *Server) StartContext(ctx context.Context) error { + s.mu.Lock() + s.ready = true + s.mu.Unlock() + + errCh := make(chan error, 1) + go func() { + errCh <- s.server.ListenAndServe() + }() + + select { + case err := <-errCh: + return err + case <-ctx.Done(): + return s.server.Shutdown(context.Background()) + } +} + +func (s *Server) Stop(ctx context.Context) error { + s.mu.Lock() + s.ready = false + s.mu.Unlock() + return s.server.Shutdown(ctx) +} + +func (s *Server) SetReady(ready bool) { + s.mu.Lock() + s.ready = ready + s.mu.Unlock() +} + +func (s *Server) RegisterCheck(name string, checkFn func() (bool, string)) { + s.mu.Lock() + defer s.mu.Unlock() + + status, msg := checkFn() + s.checks[name] = Check{ + Name: name, + Status: statusString(status), + Message: msg, + Timestamp: time.Now(), + } +} + +func (s *Server) healthHandler(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + + uptime := time.Since(s.startTime) + resp := StatusResponse{ + Status: "ok", + Uptime: uptime.String(), + } + + json.NewEncoder(w).Encode(resp) +} + +func (s *Server) readyHandler(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + + s.mu.RLock() + ready := s.ready + checks := make(map[string]Check) + for k, v := range s.checks { + checks[k] = v + } + s.mu.RUnlock() + + if !ready { + w.WriteHeader(http.StatusServiceUnavailable) + json.NewEncoder(w).Encode(StatusResponse{ + Status: "not ready", + Checks: checks, + }) + return + } + + for _, check := range checks { + if check.Status == "fail" { + w.WriteHeader(http.StatusServiceUnavailable) + json.NewEncoder(w).Encode(StatusResponse{ + Status: "not ready", + Checks: checks, + }) + return + } + } + + w.WriteHeader(http.StatusOK) + uptime := time.Since(s.startTime) + json.NewEncoder(w).Encode(StatusResponse{ + Status: "ready", + Uptime: uptime.String(), + Checks: checks, + }) +} + +func statusString(ok bool) string { + if ok { + return "ok" + } + return "fail" +}