Add health check endpoints (/health and /ready) for container orchestration liveness and readiness probes. (#104)
* added health endpoint * formatted files --------- Co-authored-by: lxowalle <83055338+lxowalle@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
0cb9387cf8
commit
341dbd3007
@@ -22,6 +22,10 @@ FROM alpine:3.23
|
||||
|
||||
RUN apk add --no-cache ca-certificates tzdata curl
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
||||
CMD wget -q --spider http://localhost:18790/health || exit 1
|
||||
|
||||
# Copy binary
|
||||
COPY --from=builder /src/build/picoclaw /usr/local/bin/picoclaw
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
@@ -28,6 +29,7 @@ import (
|
||||
"github.com/sipeed/picoclaw/pkg/config"
|
||||
"github.com/sipeed/picoclaw/pkg/cron"
|
||||
"github.com/sipeed/picoclaw/pkg/devices"
|
||||
"github.com/sipeed/picoclaw/pkg/health"
|
||||
"github.com/sipeed/picoclaw/pkg/heartbeat"
|
||||
"github.com/sipeed/picoclaw/pkg/logger"
|
||||
"github.com/sipeed/picoclaw/pkg/migrate"
|
||||
@@ -658,6 +660,14 @@ func gatewayCmd() {
|
||||
fmt.Printf("Error starting channels: %v\n", err)
|
||||
}
|
||||
|
||||
healthServer := health.NewServer(cfg.Gateway.Host, cfg.Gateway.Port)
|
||||
go func() {
|
||||
if err := healthServer.Start(); err != nil && err != http.ErrServerClosed {
|
||||
logger.ErrorCF("health", "Health server error", map[string]interface{}{"error": err.Error()})
|
||||
}
|
||||
}()
|
||||
fmt.Printf("✓ Health endpoints available at http://%s:%d/health and /ready\n", cfg.Gateway.Host, cfg.Gateway.Port)
|
||||
|
||||
go agentLoop.Run(ctx)
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
@@ -666,6 +676,7 @@ func gatewayCmd() {
|
||||
|
||||
fmt.Println("\nShutting down...")
|
||||
cancel()
|
||||
healthServer.Stop(context.Background())
|
||||
deviceService.Stop()
|
||||
heartbeatService.Stop()
|
||||
cronService.Stop()
|
||||
|
||||
164
pkg/health/server.go
Normal file
164
pkg/health/server.go
Normal file
@@ -0,0 +1,164 @@
|
||||
package health
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Server struct {
|
||||
server *http.Server
|
||||
mu sync.RWMutex
|
||||
ready bool
|
||||
checks map[string]Check
|
||||
startTime time.Time
|
||||
}
|
||||
|
||||
type Check struct {
|
||||
Name string `json:"name"`
|
||||
Status string `json:"status"`
|
||||
Message string `json:"message,omitempty"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
}
|
||||
|
||||
type StatusResponse struct {
|
||||
Status string `json:"status"`
|
||||
Uptime string `json:"uptime"`
|
||||
Checks map[string]Check `json:"checks,omitempty"`
|
||||
}
|
||||
|
||||
func NewServer(host string, port int) *Server {
|
||||
mux := http.NewServeMux()
|
||||
s := &Server{
|
||||
ready: false,
|
||||
checks: make(map[string]Check),
|
||||
startTime: time.Now(),
|
||||
}
|
||||
|
||||
mux.HandleFunc("/health", s.healthHandler)
|
||||
mux.HandleFunc("/ready", s.readyHandler)
|
||||
|
||||
addr := fmt.Sprintf("%s:%d", host, port)
|
||||
s.server = &http.Server{
|
||||
Addr: addr,
|
||||
Handler: mux,
|
||||
ReadTimeout: 5 * time.Second,
|
||||
WriteTimeout: 5 * time.Second,
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *Server) Start() error {
|
||||
s.mu.Lock()
|
||||
s.ready = true
|
||||
s.mu.Unlock()
|
||||
return s.server.ListenAndServe()
|
||||
}
|
||||
|
||||
func (s *Server) StartContext(ctx context.Context) error {
|
||||
s.mu.Lock()
|
||||
s.ready = true
|
||||
s.mu.Unlock()
|
||||
|
||||
errCh := make(chan error, 1)
|
||||
go func() {
|
||||
errCh <- s.server.ListenAndServe()
|
||||
}()
|
||||
|
||||
select {
|
||||
case err := <-errCh:
|
||||
return err
|
||||
case <-ctx.Done():
|
||||
return s.server.Shutdown(context.Background())
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) Stop(ctx context.Context) error {
|
||||
s.mu.Lock()
|
||||
s.ready = false
|
||||
s.mu.Unlock()
|
||||
return s.server.Shutdown(ctx)
|
||||
}
|
||||
|
||||
func (s *Server) SetReady(ready bool) {
|
||||
s.mu.Lock()
|
||||
s.ready = ready
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
func (s *Server) RegisterCheck(name string, checkFn func() (bool, string)) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
status, msg := checkFn()
|
||||
s.checks[name] = Check{
|
||||
Name: name,
|
||||
Status: statusString(status),
|
||||
Message: msg,
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) healthHandler(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
|
||||
uptime := time.Since(s.startTime)
|
||||
resp := StatusResponse{
|
||||
Status: "ok",
|
||||
Uptime: uptime.String(),
|
||||
}
|
||||
|
||||
json.NewEncoder(w).Encode(resp)
|
||||
}
|
||||
|
||||
func (s *Server) readyHandler(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
|
||||
s.mu.RLock()
|
||||
ready := s.ready
|
||||
checks := make(map[string]Check)
|
||||
for k, v := range s.checks {
|
||||
checks[k] = v
|
||||
}
|
||||
s.mu.RUnlock()
|
||||
|
||||
if !ready {
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
json.NewEncoder(w).Encode(StatusResponse{
|
||||
Status: "not ready",
|
||||
Checks: checks,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
for _, check := range checks {
|
||||
if check.Status == "fail" {
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
json.NewEncoder(w).Encode(StatusResponse{
|
||||
Status: "not ready",
|
||||
Checks: checks,
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusOK)
|
||||
uptime := time.Since(s.startTime)
|
||||
json.NewEncoder(w).Encode(StatusResponse{
|
||||
Status: "ready",
|
||||
Uptime: uptime.String(),
|
||||
Checks: checks,
|
||||
})
|
||||
}
|
||||
|
||||
func statusString(ok bool) string {
|
||||
if ok {
|
||||
return "ok"
|
||||
}
|
||||
return "fail"
|
||||
}
|
||||
Reference in New Issue
Block a user