Better server manager

This commit is contained in:
Hein
2025-12-29 17:19:16 +02:00
parent 8f83e8fdc1
commit d4a6f9c4c2
8 changed files with 1475 additions and 857 deletions

View File

@@ -4,26 +4,173 @@ import (
"context"
"crypto/tls"
"fmt"
"net"
"net/http"
"os"
"os/signal"
"sync"
"sync/atomic"
"syscall"
"time"
"github.com/bitechdev/ResolveSpec/pkg/logger"
"github.com/bitechdev/ResolveSpec/pkg/middleware"
"github.com/klauspost/compress/gzhttp"
"golang.org/x/net/http2"
)
// serverManager manages a collection of server instances.
// gracefulServer wraps http.Server with graceful shutdown capabilities (internal type)
type gracefulServer struct {
server *http.Server
shutdownTimeout time.Duration
drainTimeout time.Duration
inFlightRequests atomic.Int64
isShuttingDown atomic.Bool
shutdownOnce sync.Once
shutdownComplete chan struct{}
}
// trackRequestsMiddleware tracks in-flight requests and blocks new requests during shutdown
func (gs *gracefulServer) trackRequestsMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Check if shutting down
if gs.isShuttingDown.Load() {
http.Error(w, `{"error":"service_unavailable","message":"Server is shutting down"}`, http.StatusServiceUnavailable)
return
}
// Increment in-flight counter
gs.inFlightRequests.Add(1)
defer gs.inFlightRequests.Add(-1)
// Serve the request
next.ServeHTTP(w, r)
})
}
// shutdown performs graceful shutdown with request draining
func (gs *gracefulServer) shutdown(ctx context.Context) error {
var shutdownErr error
gs.shutdownOnce.Do(func() {
logger.Info("Starting graceful shutdown...")
// Mark as shutting down (new requests will be rejected)
gs.isShuttingDown.Store(true)
// Create context with timeout
shutdownCtx, cancel := context.WithTimeout(ctx, gs.shutdownTimeout)
defer cancel()
// Wait for in-flight requests to complete (with drain timeout)
drainCtx, drainCancel := context.WithTimeout(shutdownCtx, gs.drainTimeout)
defer drainCancel()
shutdownErr = gs.drainRequests(drainCtx)
if shutdownErr != nil {
logger.Error("Error draining requests: %v", shutdownErr)
}
// Shutdown the server
logger.Info("Shutting down HTTP server...")
if err := gs.server.Shutdown(shutdownCtx); err != nil {
logger.Error("Error shutting down server: %v", err)
if shutdownErr == nil {
shutdownErr = err
}
}
logger.Info("Graceful shutdown complete")
close(gs.shutdownComplete)
})
return shutdownErr
}
// drainRequests waits for in-flight requests to complete
func (gs *gracefulServer) drainRequests(ctx context.Context) error {
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()
startTime := time.Now()
for {
inFlight := gs.inFlightRequests.Load()
if inFlight == 0 {
logger.Info("All requests drained in %v", time.Since(startTime))
return nil
}
select {
case <-ctx.Done():
logger.Warn("Drain timeout exceeded with %d requests still in flight", inFlight)
return fmt.Errorf("drain timeout exceeded: %d requests still in flight", inFlight)
case <-ticker.C:
logger.Debug("Waiting for %d in-flight requests to complete...", inFlight)
}
}
}
// inFlightRequests returns the current number of in-flight requests
func (gs *gracefulServer) inFlightRequestsCount() int64 {
return gs.inFlightRequests.Load()
}
// isShutdown returns true if the server is shutting down
func (gs *gracefulServer) isShutdown() bool {
return gs.isShuttingDown.Load()
}
// wait blocks until shutdown is complete
func (gs *gracefulServer) wait() {
<-gs.shutdownComplete
}
// healthCheckHandler returns a handler that responds to health checks
func (gs *gracefulServer) healthCheckHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if gs.isShutdown() {
http.Error(w, `{"status":"shutting_down"}`, http.StatusServiceUnavailable)
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
_, err := w.Write([]byte(`{"status":"healthy"}`))
if err != nil {
logger.Warn("Failed to write health check response: %v", err)
}
}
}
// readinessHandler returns a handler for readiness checks
func (gs *gracefulServer) readinessHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if gs.isShutdown() {
http.Error(w, `{"ready":false,"reason":"shutting_down"}`, http.StatusServiceUnavailable)
return
}
inFlight := gs.inFlightRequestsCount()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
fmt.Fprintf(w, `{"ready":true,"in_flight_requests":%d}`, inFlight)
}
}
// serverManager manages a collection of server instances with graceful shutdown support.
type serverManager struct {
instances map[string]Instance
mu sync.RWMutex
instances map[string]Instance
mu sync.RWMutex
shutdownCallbacks []ShutdownCallback
callbacksMu sync.Mutex
}
// NewManager creates a new server manager.
func NewManager() Manager {
return &serverManager{
instances: make(map[string]Instance),
instances: make(map[string]Instance),
shutdownCallbacks: make([]ShutdownCallback, 0),
}
}
@@ -74,7 +221,7 @@ func (sm *serverManager) Remove(name string) error {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := instance.Stop(ctx); err != nil {
logger.Warn("Failed to gracefully stop server '%s' on remove: %v", name, err, context.Background())
logger.Warn("Failed to gracefully stop server '%s' on remove: %v", name, err)
}
delete(sm.instances, name)
@@ -94,7 +241,6 @@ func (sm *serverManager) StartAll() error {
}
if len(startErrors) > 0 {
// In a real-world scenario, you might want a more sophisticated error handling strategy
return fmt.Errorf("encountered errors while starting servers: %v", startErrors)
}
return nil
@@ -102,6 +248,11 @@ func (sm *serverManager) StartAll() error {
// StopAll gracefully shuts down all running server instances.
func (sm *serverManager) StopAll() error {
return sm.StopAllWithContext(context.Background())
}
// StopAllWithContext gracefully shuts down all running server instances with a context.
func (sm *serverManager) StopAllWithContext(ctx context.Context) error {
sm.mu.RLock()
instancesToStop := make([]Instance, 0, len(sm.instances))
for _, instance := range sm.instances {
@@ -109,19 +260,38 @@ func (sm *serverManager) StopAll() error {
}
sm.mu.RUnlock()
logger.Info("Shutting down all servers...", context.Background())
logger.Info("Shutting down all servers...")
// Execute shutdown callbacks first
sm.callbacksMu.Lock()
callbacks := make([]ShutdownCallback, len(sm.shutdownCallbacks))
copy(callbacks, sm.shutdownCallbacks)
sm.callbacksMu.Unlock()
if len(callbacks) > 0 {
logger.Info("Executing %d shutdown callbacks...", len(callbacks))
for i, cb := range callbacks {
if err := cb(ctx); err != nil {
logger.Error("Shutdown callback %d failed: %v", i+1, err)
}
}
}
// Stop all instances in parallel
var shutdownErrors []error
var wg sync.WaitGroup
var errorsMu sync.Mutex
for _, instance := range instancesToStop {
wg.Add(1)
go func(inst Instance) {
defer wg.Done()
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
shutdownCtx, cancel := context.WithTimeout(ctx, 15*time.Second)
defer cancel()
if err := inst.Stop(ctx); err != nil {
shutdownErrors = append(shutdownErrors, fmt.Errorf("failed to stop server '%s': %w", inst.Addr(), err))
if err := inst.Stop(shutdownCtx); err != nil {
errorsMu.Lock()
shutdownErrors = append(shutdownErrors, fmt.Errorf("failed to stop server '%s': %w", inst.Name(), err))
errorsMu.Unlock()
}
}(instance)
}
@@ -131,13 +301,13 @@ func (sm *serverManager) StopAll() error {
if len(shutdownErrors) > 0 {
return fmt.Errorf("encountered errors while stopping servers: %v", shutdownErrors)
}
logger.Info("All servers stopped gracefully.", context.Background())
logger.Info("All servers stopped gracefully.")
return nil
}
// RestartAll gracefully restarts all running server instances.
func (sm *serverManager) RestartAll() error {
logger.Info("Restarting all servers...", context.Background())
logger.Info("Restarting all servers...")
if err := sm.StopAll(); err != nil {
return fmt.Errorf("failed to stop servers during restart: %w", err)
}
@@ -148,7 +318,7 @@ func (sm *serverManager) RestartAll() error {
if err := sm.StartAll(); err != nil {
return fmt.Errorf("failed to start servers during restart: %w", err)
}
logger.Info("All servers restarted successfully.", context.Background())
logger.Info("All servers restarted successfully.")
return nil
}
@@ -164,13 +334,46 @@ func (sm *serverManager) List() []Instance {
return instances
}
// RegisterShutdownCallback registers a callback to be called during shutdown.
func (sm *serverManager) RegisterShutdownCallback(cb ShutdownCallback) {
sm.callbacksMu.Lock()
defer sm.callbacksMu.Unlock()
sm.shutdownCallbacks = append(sm.shutdownCallbacks, cb)
}
// ServeWithGracefulShutdown starts all servers and blocks until a shutdown signal is received.
func (sm *serverManager) ServeWithGracefulShutdown() error {
// Start all servers
if err := sm.StartAll(); err != nil {
return fmt.Errorf("failed to start servers: %w", err)
}
logger.Info("All servers started. Waiting for shutdown signal...")
// Wait for interrupt signal
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM, syscall.SIGINT)
sig := <-sigChan
logger.Info("Received signal: %v, initiating graceful shutdown", sig)
// Create context with timeout for shutdown
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
return sm.StopAllWithContext(ctx)
}
// serverInstance is a concrete implementation of the Instance interface.
// It wraps gracefulServer to provide graceful shutdown capabilities.
type serverInstance struct {
cfg Config
httpServer *http.Server
mu sync.RWMutex
running bool
stopCh chan struct{}
cfg Config
gracefulServer *gracefulServer
certFile string // Path to certificate file (may be temporary for self-signed)
keyFile string // Path to key file (may be temporary for self-signed)
mu sync.RWMutex
running bool
serverErr chan error
}
// newInstance creates a new, unstarted server instance from a config.
@@ -179,12 +382,29 @@ func newInstance(cfg Config) (*serverInstance, error) {
return nil, fmt.Errorf("handler cannot be nil")
}
// Set default timeouts
if cfg.ShutdownTimeout == 0 {
cfg.ShutdownTimeout = 30 * time.Second
}
if cfg.DrainTimeout == 0 {
cfg.DrainTimeout = 25 * time.Second
}
if cfg.ReadTimeout == 0 {
cfg.ReadTimeout = 15 * time.Second
}
if cfg.WriteTimeout == 0 {
cfg.WriteTimeout = 15 * time.Second
}
if cfg.IdleTimeout == 0 {
cfg.IdleTimeout = 60 * time.Second
}
addr := fmt.Sprintf("%s:%d", cfg.Host, cfg.Port)
var handler http.Handler = cfg.Handler
// Wrap with GZIP handler if enabled
if cfg.GZIP {
gz, err := gzhttp.NewWrapper(gzhttp.BestSpeed)
gz, err := gzhttp.NewWrapper()
if err != nil {
return nil, fmt.Errorf("failed to create GZIP wrapper: %w", err)
}
@@ -194,20 +414,33 @@ func newInstance(cfg Config) (*serverInstance, error) {
// Wrap with the panic recovery middleware
handler = middleware.PanicRecovery(handler)
// Here you could add other default middleware like request logging, metrics, etc.
// Configure TLS if any TLS option is enabled
tlsConfig, certFile, keyFile, err := configureTLS(cfg)
if err != nil {
return nil, fmt.Errorf("failed to configure TLS: %w", err)
}
httpServer := &http.Server{
Addr: addr,
Handler: handler,
ReadTimeout: 15 * time.Second,
WriteTimeout: 15 * time.Second,
IdleTimeout: 60 * time.Second,
// Create gracefulServer
gracefulSrv := &gracefulServer{
server: &http.Server{
Addr: addr,
Handler: handler,
ReadTimeout: cfg.ReadTimeout,
WriteTimeout: cfg.WriteTimeout,
IdleTimeout: cfg.IdleTimeout,
TLSConfig: tlsConfig,
},
shutdownTimeout: cfg.ShutdownTimeout,
drainTimeout: cfg.DrainTimeout,
shutdownComplete: make(chan struct{}),
}
return &serverInstance{
cfg: cfg,
httpServer: httpServer,
stopCh: make(chan struct{}),
cfg: cfg,
gracefulServer: gracefulSrv,
certFile: certFile,
keyFile: keyFile,
serverErr: make(chan error, 1),
}, nil
}
@@ -220,42 +453,69 @@ func (s *serverInstance) Start() error {
return fmt.Errorf("server '%s' is already running", s.cfg.Name)
}
hasSSL := s.cfg.SSLCert != "" && s.cfg.SSLKey != ""
// Determine if we're using TLS
useTLS := s.cfg.SSLCert != "" || s.cfg.SSLKey != "" || s.cfg.SelfSignedSSL || s.cfg.AutoTLS
// Wrap handler with request tracking
s.gracefulServer.server.Handler = s.gracefulServer.trackRequestsMiddleware(s.gracefulServer.server.Handler)
go func() {
defer func() {
s.mu.Lock()
s.running = false
s.mu.Unlock()
logger.Info("Server '%s' stopped.", s.cfg.Name, context.Background())
logger.Info("Server '%s' stopped.", s.cfg.Name)
}()
var err error
protocol := "HTTP"
if hasSSL {
if useTLS {
protocol = "HTTPS"
// Configure TLS + HTTP/2
s.httpServer.TLSConfig = &tls.Config{
MinVersion: tls.VersionTLS12,
logger.Info("Starting %s server '%s' on %s", protocol, s.cfg.Name, s.Addr())
// For AutoTLS, we need to use a TLS listener
if s.cfg.AutoTLS {
// Create listener
ln, lnErr := net.Listen("tcp", s.gracefulServer.server.Addr)
if lnErr != nil {
err = fmt.Errorf("failed to create listener: %w", lnErr)
} else {
// Wrap with TLS
tlsListener := tls.NewListener(ln, s.gracefulServer.server.TLSConfig)
err = s.gracefulServer.server.Serve(tlsListener)
}
} else {
// Use certificate files (regular SSL or self-signed)
err = s.gracefulServer.server.ListenAndServeTLS(s.certFile, s.keyFile)
}
logger.Info("Starting %s server '%s' on %s", protocol, s.cfg.Name, s.Addr(), context.Background()) err = s.httpServer.ListenAndServeTLS(s.cfg.SSLCert, s.cfg.SSLKey)
} else {
logger.Info("Starting %s server '%s' on %s", protocol, s.cfg.Name, s.Addr(), context.Background())
err = s.httpServer.ListenAndServe()
logger.Info("Starting %s server '%s' on %s", protocol, s.cfg.Name, s.Addr())
err = s.gracefulServer.server.ListenAndServe()
}
// If the server stopped for a reason other than a graceful shutdown, log the error.
// If the server stopped for a reason other than a graceful shutdown, log and report the error.
if err != nil && err != http.ErrServerClosed {
logger.Error("Server '%s' failed: %v", s.cfg.Name, err, context.Background())
logger.Error("Server '%s' failed: %v", s.cfg.Name, err)
select {
case s.serverErr <- err:
default:
}
}
}()
s.running = true
// A small delay to allow the goroutine to start and potentially fail on binding.
// A more robust solution might involve a channel signal.
time.Sleep(50 * time.Millisecond)
// Check if the server failed to start
select {
case err := <-s.serverErr:
s.running = false
return err
default:
}
return nil
}
@@ -269,7 +529,7 @@ func (s *serverInstance) Stop(ctx context.Context) error {
}
logger.Info("Gracefully shutting down server '%s'...", s.cfg.Name)
err := s.httpServer.Shutdown(ctx)
err := s.gracefulServer.shutdown(ctx)
if err == nil {
s.running = false
}
@@ -278,5 +538,35 @@ func (s *serverInstance) Stop(ctx context.Context) error {
// Addr returns the network address the server is listening on.
func (s *serverInstance) Addr() string {
return s.httpServer.Addr
return s.gracefulServer.server.Addr
}
// Name returns the server instance name.
func (s *serverInstance) Name() string {
return s.cfg.Name
}
// HealthCheckHandler returns a handler that responds to health checks.
func (s *serverInstance) HealthCheckHandler() http.HandlerFunc {
return s.gracefulServer.healthCheckHandler()
}
// ReadinessHandler returns a handler for readiness checks.
func (s *serverInstance) ReadinessHandler() http.HandlerFunc {
return s.gracefulServer.readinessHandler()
}
// InFlightRequests returns the current number of in-flight requests.
func (s *serverInstance) InFlightRequests() int64 {
return s.gracefulServer.inFlightRequestsCount()
}
// IsShuttingDown returns true if the server is shutting down.
func (s *serverInstance) IsShuttingDown() bool {
return s.gracefulServer.isShutdown()
}
// Wait blocks until shutdown is complete.
func (s *serverInstance) Wait() {
s.gracefulServer.wait()
}