mirror of
https://github.com/bitechdev/ResolveSpec.git
synced 2026-02-16 13:26:12 +00:00
Better server manager
This commit is contained in:
@@ -4,26 +4,173 @@ import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/bitechdev/ResolveSpec/pkg/logger"
|
||||
"github.com/bitechdev/ResolveSpec/pkg/middleware"
|
||||
"github.com/klauspost/compress/gzhttp"
|
||||
"golang.org/x/net/http2"
|
||||
)
|
||||
|
||||
// serverManager manages a collection of server instances.
|
||||
// gracefulServer wraps http.Server with graceful shutdown capabilities (internal type)
|
||||
type gracefulServer struct {
|
||||
server *http.Server
|
||||
shutdownTimeout time.Duration
|
||||
drainTimeout time.Duration
|
||||
inFlightRequests atomic.Int64
|
||||
isShuttingDown atomic.Bool
|
||||
shutdownOnce sync.Once
|
||||
shutdownComplete chan struct{}
|
||||
}
|
||||
|
||||
// trackRequestsMiddleware tracks in-flight requests and blocks new requests during shutdown
|
||||
func (gs *gracefulServer) trackRequestsMiddleware(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
// Check if shutting down
|
||||
if gs.isShuttingDown.Load() {
|
||||
http.Error(w, `{"error":"service_unavailable","message":"Server is shutting down"}`, http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
|
||||
// Increment in-flight counter
|
||||
gs.inFlightRequests.Add(1)
|
||||
defer gs.inFlightRequests.Add(-1)
|
||||
|
||||
// Serve the request
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
|
||||
// shutdown performs graceful shutdown with request draining
|
||||
func (gs *gracefulServer) shutdown(ctx context.Context) error {
|
||||
var shutdownErr error
|
||||
|
||||
gs.shutdownOnce.Do(func() {
|
||||
logger.Info("Starting graceful shutdown...")
|
||||
|
||||
// Mark as shutting down (new requests will be rejected)
|
||||
gs.isShuttingDown.Store(true)
|
||||
|
||||
// Create context with timeout
|
||||
shutdownCtx, cancel := context.WithTimeout(ctx, gs.shutdownTimeout)
|
||||
defer cancel()
|
||||
|
||||
// Wait for in-flight requests to complete (with drain timeout)
|
||||
drainCtx, drainCancel := context.WithTimeout(shutdownCtx, gs.drainTimeout)
|
||||
defer drainCancel()
|
||||
|
||||
shutdownErr = gs.drainRequests(drainCtx)
|
||||
if shutdownErr != nil {
|
||||
logger.Error("Error draining requests: %v", shutdownErr)
|
||||
}
|
||||
|
||||
// Shutdown the server
|
||||
logger.Info("Shutting down HTTP server...")
|
||||
if err := gs.server.Shutdown(shutdownCtx); err != nil {
|
||||
logger.Error("Error shutting down server: %v", err)
|
||||
if shutdownErr == nil {
|
||||
shutdownErr = err
|
||||
}
|
||||
}
|
||||
|
||||
logger.Info("Graceful shutdown complete")
|
||||
close(gs.shutdownComplete)
|
||||
})
|
||||
|
||||
return shutdownErr
|
||||
}
|
||||
|
||||
// drainRequests waits for in-flight requests to complete
|
||||
func (gs *gracefulServer) drainRequests(ctx context.Context) error {
|
||||
ticker := time.NewTicker(100 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
for {
|
||||
inFlight := gs.inFlightRequests.Load()
|
||||
|
||||
if inFlight == 0 {
|
||||
logger.Info("All requests drained in %v", time.Since(startTime))
|
||||
return nil
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
logger.Warn("Drain timeout exceeded with %d requests still in flight", inFlight)
|
||||
return fmt.Errorf("drain timeout exceeded: %d requests still in flight", inFlight)
|
||||
case <-ticker.C:
|
||||
logger.Debug("Waiting for %d in-flight requests to complete...", inFlight)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// inFlightRequests returns the current number of in-flight requests
|
||||
func (gs *gracefulServer) inFlightRequestsCount() int64 {
|
||||
return gs.inFlightRequests.Load()
|
||||
}
|
||||
|
||||
// isShutdown returns true if the server is shutting down
|
||||
func (gs *gracefulServer) isShutdown() bool {
|
||||
return gs.isShuttingDown.Load()
|
||||
}
|
||||
|
||||
// wait blocks until shutdown is complete
|
||||
func (gs *gracefulServer) wait() {
|
||||
<-gs.shutdownComplete
|
||||
}
|
||||
|
||||
// healthCheckHandler returns a handler that responds to health checks
|
||||
func (gs *gracefulServer) healthCheckHandler() http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
if gs.isShutdown() {
|
||||
http.Error(w, `{"status":"shutting_down"}`, http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, err := w.Write([]byte(`{"status":"healthy"}`))
|
||||
if err != nil {
|
||||
logger.Warn("Failed to write health check response: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// readinessHandler returns a handler for readiness checks
|
||||
func (gs *gracefulServer) readinessHandler() http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
if gs.isShutdown() {
|
||||
http.Error(w, `{"ready":false,"reason":"shutting_down"}`, http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
|
||||
inFlight := gs.inFlightRequestsCount()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
fmt.Fprintf(w, `{"ready":true,"in_flight_requests":%d}`, inFlight)
|
||||
}
|
||||
}
|
||||
|
||||
// serverManager manages a collection of server instances with graceful shutdown support.
|
||||
type serverManager struct {
|
||||
instances map[string]Instance
|
||||
mu sync.RWMutex
|
||||
instances map[string]Instance
|
||||
mu sync.RWMutex
|
||||
shutdownCallbacks []ShutdownCallback
|
||||
callbacksMu sync.Mutex
|
||||
}
|
||||
|
||||
// NewManager creates a new server manager.
|
||||
func NewManager() Manager {
|
||||
return &serverManager{
|
||||
instances: make(map[string]Instance),
|
||||
instances: make(map[string]Instance),
|
||||
shutdownCallbacks: make([]ShutdownCallback, 0),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,7 +221,7 @@ func (sm *serverManager) Remove(name string) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
if err := instance.Stop(ctx); err != nil {
|
||||
logger.Warn("Failed to gracefully stop server '%s' on remove: %v", name, err, context.Background())
|
||||
logger.Warn("Failed to gracefully stop server '%s' on remove: %v", name, err)
|
||||
}
|
||||
|
||||
delete(sm.instances, name)
|
||||
@@ -94,7 +241,6 @@ func (sm *serverManager) StartAll() error {
|
||||
}
|
||||
|
||||
if len(startErrors) > 0 {
|
||||
// In a real-world scenario, you might want a more sophisticated error handling strategy
|
||||
return fmt.Errorf("encountered errors while starting servers: %v", startErrors)
|
||||
}
|
||||
return nil
|
||||
@@ -102,6 +248,11 @@ func (sm *serverManager) StartAll() error {
|
||||
|
||||
// StopAll gracefully shuts down all running server instances.
|
||||
func (sm *serverManager) StopAll() error {
|
||||
return sm.StopAllWithContext(context.Background())
|
||||
}
|
||||
|
||||
// StopAllWithContext gracefully shuts down all running server instances with a context.
|
||||
func (sm *serverManager) StopAllWithContext(ctx context.Context) error {
|
||||
sm.mu.RLock()
|
||||
instancesToStop := make([]Instance, 0, len(sm.instances))
|
||||
for _, instance := range sm.instances {
|
||||
@@ -109,19 +260,38 @@ func (sm *serverManager) StopAll() error {
|
||||
}
|
||||
sm.mu.RUnlock()
|
||||
|
||||
logger.Info("Shutting down all servers...", context.Background())
|
||||
logger.Info("Shutting down all servers...")
|
||||
|
||||
// Execute shutdown callbacks first
|
||||
sm.callbacksMu.Lock()
|
||||
callbacks := make([]ShutdownCallback, len(sm.shutdownCallbacks))
|
||||
copy(callbacks, sm.shutdownCallbacks)
|
||||
sm.callbacksMu.Unlock()
|
||||
|
||||
if len(callbacks) > 0 {
|
||||
logger.Info("Executing %d shutdown callbacks...", len(callbacks))
|
||||
for i, cb := range callbacks {
|
||||
if err := cb(ctx); err != nil {
|
||||
logger.Error("Shutdown callback %d failed: %v", i+1, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Stop all instances in parallel
|
||||
var shutdownErrors []error
|
||||
var wg sync.WaitGroup
|
||||
var errorsMu sync.Mutex
|
||||
|
||||
for _, instance := range instancesToStop {
|
||||
wg.Add(1)
|
||||
go func(inst Instance) {
|
||||
defer wg.Done()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||
shutdownCtx, cancel := context.WithTimeout(ctx, 15*time.Second)
|
||||
defer cancel()
|
||||
if err := inst.Stop(ctx); err != nil {
|
||||
shutdownErrors = append(shutdownErrors, fmt.Errorf("failed to stop server '%s': %w", inst.Addr(), err))
|
||||
if err := inst.Stop(shutdownCtx); err != nil {
|
||||
errorsMu.Lock()
|
||||
shutdownErrors = append(shutdownErrors, fmt.Errorf("failed to stop server '%s': %w", inst.Name(), err))
|
||||
errorsMu.Unlock()
|
||||
}
|
||||
}(instance)
|
||||
}
|
||||
@@ -131,13 +301,13 @@ func (sm *serverManager) StopAll() error {
|
||||
if len(shutdownErrors) > 0 {
|
||||
return fmt.Errorf("encountered errors while stopping servers: %v", shutdownErrors)
|
||||
}
|
||||
logger.Info("All servers stopped gracefully.", context.Background())
|
||||
logger.Info("All servers stopped gracefully.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// RestartAll gracefully restarts all running server instances.
|
||||
func (sm *serverManager) RestartAll() error {
|
||||
logger.Info("Restarting all servers...", context.Background())
|
||||
logger.Info("Restarting all servers...")
|
||||
if err := sm.StopAll(); err != nil {
|
||||
return fmt.Errorf("failed to stop servers during restart: %w", err)
|
||||
}
|
||||
@@ -148,7 +318,7 @@ func (sm *serverManager) RestartAll() error {
|
||||
if err := sm.StartAll(); err != nil {
|
||||
return fmt.Errorf("failed to start servers during restart: %w", err)
|
||||
}
|
||||
logger.Info("All servers restarted successfully.", context.Background())
|
||||
logger.Info("All servers restarted successfully.")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -164,13 +334,46 @@ func (sm *serverManager) List() []Instance {
|
||||
return instances
|
||||
}
|
||||
|
||||
// RegisterShutdownCallback registers a callback to be called during shutdown.
|
||||
func (sm *serverManager) RegisterShutdownCallback(cb ShutdownCallback) {
|
||||
sm.callbacksMu.Lock()
|
||||
defer sm.callbacksMu.Unlock()
|
||||
sm.shutdownCallbacks = append(sm.shutdownCallbacks, cb)
|
||||
}
|
||||
|
||||
// ServeWithGracefulShutdown starts all servers and blocks until a shutdown signal is received.
|
||||
func (sm *serverManager) ServeWithGracefulShutdown() error {
|
||||
// Start all servers
|
||||
if err := sm.StartAll(); err != nil {
|
||||
return fmt.Errorf("failed to start servers: %w", err)
|
||||
}
|
||||
|
||||
logger.Info("All servers started. Waiting for shutdown signal...")
|
||||
|
||||
// Wait for interrupt signal
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM, syscall.SIGINT)
|
||||
|
||||
sig := <-sigChan
|
||||
logger.Info("Received signal: %v, initiating graceful shutdown", sig)
|
||||
|
||||
// Create context with timeout for shutdown
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
return sm.StopAllWithContext(ctx)
|
||||
}
|
||||
|
||||
// serverInstance is a concrete implementation of the Instance interface.
|
||||
// It wraps gracefulServer to provide graceful shutdown capabilities.
|
||||
type serverInstance struct {
|
||||
cfg Config
|
||||
httpServer *http.Server
|
||||
mu sync.RWMutex
|
||||
running bool
|
||||
stopCh chan struct{}
|
||||
cfg Config
|
||||
gracefulServer *gracefulServer
|
||||
certFile string // Path to certificate file (may be temporary for self-signed)
|
||||
keyFile string // Path to key file (may be temporary for self-signed)
|
||||
mu sync.RWMutex
|
||||
running bool
|
||||
serverErr chan error
|
||||
}
|
||||
|
||||
// newInstance creates a new, unstarted server instance from a config.
|
||||
@@ -179,12 +382,29 @@ func newInstance(cfg Config) (*serverInstance, error) {
|
||||
return nil, fmt.Errorf("handler cannot be nil")
|
||||
}
|
||||
|
||||
// Set default timeouts
|
||||
if cfg.ShutdownTimeout == 0 {
|
||||
cfg.ShutdownTimeout = 30 * time.Second
|
||||
}
|
||||
if cfg.DrainTimeout == 0 {
|
||||
cfg.DrainTimeout = 25 * time.Second
|
||||
}
|
||||
if cfg.ReadTimeout == 0 {
|
||||
cfg.ReadTimeout = 15 * time.Second
|
||||
}
|
||||
if cfg.WriteTimeout == 0 {
|
||||
cfg.WriteTimeout = 15 * time.Second
|
||||
}
|
||||
if cfg.IdleTimeout == 0 {
|
||||
cfg.IdleTimeout = 60 * time.Second
|
||||
}
|
||||
|
||||
addr := fmt.Sprintf("%s:%d", cfg.Host, cfg.Port)
|
||||
var handler http.Handler = cfg.Handler
|
||||
|
||||
// Wrap with GZIP handler if enabled
|
||||
if cfg.GZIP {
|
||||
gz, err := gzhttp.NewWrapper(gzhttp.BestSpeed)
|
||||
gz, err := gzhttp.NewWrapper()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create GZIP wrapper: %w", err)
|
||||
}
|
||||
@@ -194,20 +414,33 @@ func newInstance(cfg Config) (*serverInstance, error) {
|
||||
// Wrap with the panic recovery middleware
|
||||
handler = middleware.PanicRecovery(handler)
|
||||
|
||||
// Here you could add other default middleware like request logging, metrics, etc.
|
||||
// Configure TLS if any TLS option is enabled
|
||||
tlsConfig, certFile, keyFile, err := configureTLS(cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to configure TLS: %w", err)
|
||||
}
|
||||
|
||||
httpServer := &http.Server{
|
||||
Addr: addr,
|
||||
Handler: handler,
|
||||
ReadTimeout: 15 * time.Second,
|
||||
WriteTimeout: 15 * time.Second,
|
||||
IdleTimeout: 60 * time.Second,
|
||||
// Create gracefulServer
|
||||
gracefulSrv := &gracefulServer{
|
||||
server: &http.Server{
|
||||
Addr: addr,
|
||||
Handler: handler,
|
||||
ReadTimeout: cfg.ReadTimeout,
|
||||
WriteTimeout: cfg.WriteTimeout,
|
||||
IdleTimeout: cfg.IdleTimeout,
|
||||
TLSConfig: tlsConfig,
|
||||
},
|
||||
shutdownTimeout: cfg.ShutdownTimeout,
|
||||
drainTimeout: cfg.DrainTimeout,
|
||||
shutdownComplete: make(chan struct{}),
|
||||
}
|
||||
|
||||
return &serverInstance{
|
||||
cfg: cfg,
|
||||
httpServer: httpServer,
|
||||
stopCh: make(chan struct{}),
|
||||
cfg: cfg,
|
||||
gracefulServer: gracefulSrv,
|
||||
certFile: certFile,
|
||||
keyFile: keyFile,
|
||||
serverErr: make(chan error, 1),
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -220,42 +453,69 @@ func (s *serverInstance) Start() error {
|
||||
return fmt.Errorf("server '%s' is already running", s.cfg.Name)
|
||||
}
|
||||
|
||||
hasSSL := s.cfg.SSLCert != "" && s.cfg.SSLKey != ""
|
||||
// Determine if we're using TLS
|
||||
useTLS := s.cfg.SSLCert != "" || s.cfg.SSLKey != "" || s.cfg.SelfSignedSSL || s.cfg.AutoTLS
|
||||
|
||||
// Wrap handler with request tracking
|
||||
s.gracefulServer.server.Handler = s.gracefulServer.trackRequestsMiddleware(s.gracefulServer.server.Handler)
|
||||
|
||||
go func() {
|
||||
defer func() {
|
||||
s.mu.Lock()
|
||||
s.running = false
|
||||
s.mu.Unlock()
|
||||
logger.Info("Server '%s' stopped.", s.cfg.Name, context.Background())
|
||||
logger.Info("Server '%s' stopped.", s.cfg.Name)
|
||||
}()
|
||||
|
||||
var err error
|
||||
protocol := "HTTP"
|
||||
|
||||
if hasSSL {
|
||||
if useTLS {
|
||||
protocol = "HTTPS"
|
||||
// Configure TLS + HTTP/2
|
||||
s.httpServer.TLSConfig = &tls.Config{
|
||||
MinVersion: tls.VersionTLS12,
|
||||
logger.Info("Starting %s server '%s' on %s", protocol, s.cfg.Name, s.Addr())
|
||||
|
||||
// For AutoTLS, we need to use a TLS listener
|
||||
if s.cfg.AutoTLS {
|
||||
// Create listener
|
||||
ln, lnErr := net.Listen("tcp", s.gracefulServer.server.Addr)
|
||||
if lnErr != nil {
|
||||
err = fmt.Errorf("failed to create listener: %w", lnErr)
|
||||
} else {
|
||||
// Wrap with TLS
|
||||
tlsListener := tls.NewListener(ln, s.gracefulServer.server.TLSConfig)
|
||||
err = s.gracefulServer.server.Serve(tlsListener)
|
||||
}
|
||||
} else {
|
||||
// Use certificate files (regular SSL or self-signed)
|
||||
err = s.gracefulServer.server.ListenAndServeTLS(s.certFile, s.keyFile)
|
||||
}
|
||||
logger.Info("Starting %s server '%s' on %s", protocol, s.cfg.Name, s.Addr(), context.Background()) err = s.httpServer.ListenAndServeTLS(s.cfg.SSLCert, s.cfg.SSLKey)
|
||||
} else {
|
||||
logger.Info("Starting %s server '%s' on %s", protocol, s.cfg.Name, s.Addr(), context.Background())
|
||||
err = s.httpServer.ListenAndServe()
|
||||
logger.Info("Starting %s server '%s' on %s", protocol, s.cfg.Name, s.Addr())
|
||||
err = s.gracefulServer.server.ListenAndServe()
|
||||
}
|
||||
|
||||
// If the server stopped for a reason other than a graceful shutdown, log the error.
|
||||
// If the server stopped for a reason other than a graceful shutdown, log and report the error.
|
||||
if err != nil && err != http.ErrServerClosed {
|
||||
logger.Error("Server '%s' failed: %v", s.cfg.Name, err, context.Background())
|
||||
logger.Error("Server '%s' failed: %v", s.cfg.Name, err)
|
||||
select {
|
||||
case s.serverErr <- err:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
s.running = true
|
||||
// A small delay to allow the goroutine to start and potentially fail on binding.
|
||||
// A more robust solution might involve a channel signal.
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
// Check if the server failed to start
|
||||
select {
|
||||
case err := <-s.serverErr:
|
||||
s.running = false
|
||||
return err
|
||||
default:
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -269,7 +529,7 @@ func (s *serverInstance) Stop(ctx context.Context) error {
|
||||
}
|
||||
|
||||
logger.Info("Gracefully shutting down server '%s'...", s.cfg.Name)
|
||||
err := s.httpServer.Shutdown(ctx)
|
||||
err := s.gracefulServer.shutdown(ctx)
|
||||
if err == nil {
|
||||
s.running = false
|
||||
}
|
||||
@@ -278,5 +538,35 @@ func (s *serverInstance) Stop(ctx context.Context) error {
|
||||
|
||||
// Addr returns the network address the server is listening on.
|
||||
func (s *serverInstance) Addr() string {
|
||||
return s.httpServer.Addr
|
||||
return s.gracefulServer.server.Addr
|
||||
}
|
||||
|
||||
// Name returns the server instance name.
|
||||
func (s *serverInstance) Name() string {
|
||||
return s.cfg.Name
|
||||
}
|
||||
|
||||
// HealthCheckHandler returns a handler that responds to health checks.
|
||||
func (s *serverInstance) HealthCheckHandler() http.HandlerFunc {
|
||||
return s.gracefulServer.healthCheckHandler()
|
||||
}
|
||||
|
||||
// ReadinessHandler returns a handler for readiness checks.
|
||||
func (s *serverInstance) ReadinessHandler() http.HandlerFunc {
|
||||
return s.gracefulServer.readinessHandler()
|
||||
}
|
||||
|
||||
// InFlightRequests returns the current number of in-flight requests.
|
||||
func (s *serverInstance) InFlightRequests() int64 {
|
||||
return s.gracefulServer.inFlightRequestsCount()
|
||||
}
|
||||
|
||||
// IsShuttingDown returns true if the server is shutting down.
|
||||
func (s *serverInstance) IsShuttingDown() bool {
|
||||
return s.gracefulServer.isShutdown()
|
||||
}
|
||||
|
||||
// Wait blocks until shutdown is complete.
|
||||
func (s *serverInstance) Wait() {
|
||||
s.gracefulServer.wait()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user