Add health check endpoints (/health and /ready) for container orchestration liveness and readiness probes. (#104)
* added health endpoint * formatted files --------- Co-authored-by: lxowalle <83055338+lxowalle@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
0cb9387cf8
commit
341dbd3007
@@ -22,6 +22,10 @@ FROM alpine:3.23
|
|||||||
|
|
||||||
RUN apk add --no-cache ca-certificates tzdata curl
|
RUN apk add --no-cache ca-certificates tzdata curl
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
||||||
|
CMD wget -q --spider http://localhost:18790/health || exit 1
|
||||||
|
|
||||||
# Copy binary
|
# Copy binary
|
||||||
COPY --from=builder /src/build/picoclaw /usr/local/bin/picoclaw
|
COPY --from=builder /src/build/picoclaw /usr/local/bin/picoclaw
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -28,6 +29,7 @@ import (
|
|||||||
"github.com/sipeed/picoclaw/pkg/config"
|
"github.com/sipeed/picoclaw/pkg/config"
|
||||||
"github.com/sipeed/picoclaw/pkg/cron"
|
"github.com/sipeed/picoclaw/pkg/cron"
|
||||||
"github.com/sipeed/picoclaw/pkg/devices"
|
"github.com/sipeed/picoclaw/pkg/devices"
|
||||||
|
"github.com/sipeed/picoclaw/pkg/health"
|
||||||
"github.com/sipeed/picoclaw/pkg/heartbeat"
|
"github.com/sipeed/picoclaw/pkg/heartbeat"
|
||||||
"github.com/sipeed/picoclaw/pkg/logger"
|
"github.com/sipeed/picoclaw/pkg/logger"
|
||||||
"github.com/sipeed/picoclaw/pkg/migrate"
|
"github.com/sipeed/picoclaw/pkg/migrate"
|
||||||
@@ -658,6 +660,14 @@ func gatewayCmd() {
|
|||||||
fmt.Printf("Error starting channels: %v\n", err)
|
fmt.Printf("Error starting channels: %v\n", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
healthServer := health.NewServer(cfg.Gateway.Host, cfg.Gateway.Port)
|
||||||
|
go func() {
|
||||||
|
if err := healthServer.Start(); err != nil && err != http.ErrServerClosed {
|
||||||
|
logger.ErrorCF("health", "Health server error", map[string]interface{}{"error": err.Error()})
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
fmt.Printf("✓ Health endpoints available at http://%s:%d/health and /ready\n", cfg.Gateway.Host, cfg.Gateway.Port)
|
||||||
|
|
||||||
go agentLoop.Run(ctx)
|
go agentLoop.Run(ctx)
|
||||||
|
|
||||||
sigChan := make(chan os.Signal, 1)
|
sigChan := make(chan os.Signal, 1)
|
||||||
@@ -666,6 +676,7 @@ func gatewayCmd() {
|
|||||||
|
|
||||||
fmt.Println("\nShutting down...")
|
fmt.Println("\nShutting down...")
|
||||||
cancel()
|
cancel()
|
||||||
|
healthServer.Stop(context.Background())
|
||||||
deviceService.Stop()
|
deviceService.Stop()
|
||||||
heartbeatService.Stop()
|
heartbeatService.Stop()
|
||||||
cronService.Stop()
|
cronService.Stop()
|
||||||
|
|||||||
164
pkg/health/server.go
Normal file
164
pkg/health/server.go
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
package health
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Server struct {
|
||||||
|
server *http.Server
|
||||||
|
mu sync.RWMutex
|
||||||
|
ready bool
|
||||||
|
checks map[string]Check
|
||||||
|
startTime time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
type Check struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Status string `json:"status"`
|
||||||
|
Message string `json:"message,omitempty"`
|
||||||
|
Timestamp time.Time `json:"timestamp"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type StatusResponse struct {
|
||||||
|
Status string `json:"status"`
|
||||||
|
Uptime string `json:"uptime"`
|
||||||
|
Checks map[string]Check `json:"checks,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewServer(host string, port int) *Server {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
s := &Server{
|
||||||
|
ready: false,
|
||||||
|
checks: make(map[string]Check),
|
||||||
|
startTime: time.Now(),
|
||||||
|
}
|
||||||
|
|
||||||
|
mux.HandleFunc("/health", s.healthHandler)
|
||||||
|
mux.HandleFunc("/ready", s.readyHandler)
|
||||||
|
|
||||||
|
addr := fmt.Sprintf("%s:%d", host, port)
|
||||||
|
s.server = &http.Server{
|
||||||
|
Addr: addr,
|
||||||
|
Handler: mux,
|
||||||
|
ReadTimeout: 5 * time.Second,
|
||||||
|
WriteTimeout: 5 * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) Start() error {
|
||||||
|
s.mu.Lock()
|
||||||
|
s.ready = true
|
||||||
|
s.mu.Unlock()
|
||||||
|
return s.server.ListenAndServe()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) StartContext(ctx context.Context) error {
|
||||||
|
s.mu.Lock()
|
||||||
|
s.ready = true
|
||||||
|
s.mu.Unlock()
|
||||||
|
|
||||||
|
errCh := make(chan error, 1)
|
||||||
|
go func() {
|
||||||
|
errCh <- s.server.ListenAndServe()
|
||||||
|
}()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case err := <-errCh:
|
||||||
|
return err
|
||||||
|
case <-ctx.Done():
|
||||||
|
return s.server.Shutdown(context.Background())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) Stop(ctx context.Context) error {
|
||||||
|
s.mu.Lock()
|
||||||
|
s.ready = false
|
||||||
|
s.mu.Unlock()
|
||||||
|
return s.server.Shutdown(ctx)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) SetReady(ready bool) {
|
||||||
|
s.mu.Lock()
|
||||||
|
s.ready = ready
|
||||||
|
s.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) RegisterCheck(name string, checkFn func() (bool, string)) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
status, msg := checkFn()
|
||||||
|
s.checks[name] = Check{
|
||||||
|
Name: name,
|
||||||
|
Status: statusString(status),
|
||||||
|
Message: msg,
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) healthHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
|
||||||
|
uptime := time.Since(s.startTime)
|
||||||
|
resp := StatusResponse{
|
||||||
|
Status: "ok",
|
||||||
|
Uptime: uptime.String(),
|
||||||
|
}
|
||||||
|
|
||||||
|
json.NewEncoder(w).Encode(resp)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) readyHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
s.mu.RLock()
|
||||||
|
ready := s.ready
|
||||||
|
checks := make(map[string]Check)
|
||||||
|
for k, v := range s.checks {
|
||||||
|
checks[k] = v
|
||||||
|
}
|
||||||
|
s.mu.RUnlock()
|
||||||
|
|
||||||
|
if !ready {
|
||||||
|
w.WriteHeader(http.StatusServiceUnavailable)
|
||||||
|
json.NewEncoder(w).Encode(StatusResponse{
|
||||||
|
Status: "not ready",
|
||||||
|
Checks: checks,
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, check := range checks {
|
||||||
|
if check.Status == "fail" {
|
||||||
|
w.WriteHeader(http.StatusServiceUnavailable)
|
||||||
|
json.NewEncoder(w).Encode(StatusResponse{
|
||||||
|
Status: "not ready",
|
||||||
|
Checks: checks,
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
uptime := time.Since(s.startTime)
|
||||||
|
json.NewEncoder(w).Encode(StatusResponse{
|
||||||
|
Status: "ready",
|
||||||
|
Uptime: uptime.String(),
|
||||||
|
Checks: checks,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func statusString(ok bool) string {
|
||||||
|
if ok {
|
||||||
|
return "ok"
|
||||||
|
}
|
||||||
|
return "fail"
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user