ResolveSpec/pkg/eventbroker/broker.go
2025-12-12 09:23:54 +02:00

454 lines
12 KiB
Go

package eventbroker
import (
"context"
"fmt"
"sync"
"sync/atomic"
"time"
"github.com/bitechdev/ResolveSpec/pkg/logger"
)
// Broker is the main interface for event publishing and subscription
type Broker interface {
// Publish publishes an event (mode-dependent: sync or async)
Publish(ctx context.Context, event *Event) error
// PublishSync publishes an event synchronously (blocks until all handlers complete)
PublishSync(ctx context.Context, event *Event) error
// PublishAsync publishes an event asynchronously (returns immediately)
PublishAsync(ctx context.Context, event *Event) error
// Subscribe registers a handler for events matching the pattern
Subscribe(pattern string, handler EventHandler) (SubscriptionID, error)
// Unsubscribe removes a subscription
Unsubscribe(id SubscriptionID) error
// Start starts the broker (begins processing events)
Start(ctx context.Context) error
// Stop stops the broker gracefully (flushes pending events)
Stop(ctx context.Context) error
// Stats returns broker statistics
Stats(ctx context.Context) (*BrokerStats, error)
// InstanceID returns the instance ID of this broker
InstanceID() string
}
// ProcessingMode determines how events are processed
type ProcessingMode string
const (
ProcessingModeSync ProcessingMode = "sync"
ProcessingModeAsync ProcessingMode = "async"
)
// BrokerStats contains broker statistics
type BrokerStats struct {
InstanceID string `json:"instance_id"`
Mode ProcessingMode `json:"mode"`
IsRunning bool `json:"is_running"`
TotalPublished int64 `json:"total_published"`
TotalProcessed int64 `json:"total_processed"`
TotalFailed int64 `json:"total_failed"`
ActiveSubscribers int `json:"active_subscribers"`
QueueSize int `json:"queue_size,omitempty"` // For async mode
ActiveWorkers int `json:"active_workers,omitempty"` // For async mode
ProviderStats *ProviderStats `json:"provider_stats,omitempty"`
AdditionalStats map[string]interface{} `json:"additional_stats,omitempty"`
}
// EventBroker implements the Broker interface
type EventBroker struct {
provider Provider
subscriptions *subscriptionManager
mode ProcessingMode
instanceID string
retryPolicy *RetryPolicy
// Async mode fields (initialized in Phase 4)
workerPool *workerPool
// Runtime state
isRunning atomic.Bool
stopOnce sync.Once
stopCh chan struct{}
wg sync.WaitGroup
// Statistics
statsPublished atomic.Int64
statsProcessed atomic.Int64
statsFailed atomic.Int64
}
// RetryPolicy defines how failed events should be retried
type RetryPolicy struct {
MaxRetries int
InitialDelay time.Duration
MaxDelay time.Duration
BackoffFactor float64
}
// DefaultRetryPolicy returns a sensible default retry policy
func DefaultRetryPolicy() *RetryPolicy {
return &RetryPolicy{
MaxRetries: 3,
InitialDelay: 1 * time.Second,
MaxDelay: 30 * time.Second,
BackoffFactor: 2.0,
}
}
// Options for creating a new broker
type Options struct {
Provider Provider
Mode ProcessingMode
WorkerCount int // For async mode
BufferSize int // For async mode
RetryPolicy *RetryPolicy
InstanceID string
}
// NewBroker creates a new event broker with the given options
func NewBroker(opts Options) (*EventBroker, error) {
if opts.Provider == nil {
return nil, fmt.Errorf("provider is required")
}
if opts.InstanceID == "" {
return nil, fmt.Errorf("instance ID is required")
}
if opts.Mode == "" {
opts.Mode = ProcessingModeAsync // Default to async
}
if opts.RetryPolicy == nil {
opts.RetryPolicy = DefaultRetryPolicy()
}
broker := &EventBroker{
provider: opts.Provider,
subscriptions: newSubscriptionManager(),
mode: opts.Mode,
instanceID: opts.InstanceID,
retryPolicy: opts.RetryPolicy,
stopCh: make(chan struct{}),
}
// Worker pool will be initialized in Phase 4 for async mode
if opts.Mode == ProcessingModeAsync {
if opts.WorkerCount == 0 {
opts.WorkerCount = 10 // Default
}
if opts.BufferSize == 0 {
opts.BufferSize = 1000 // Default
}
broker.workerPool = newWorkerPool(opts.WorkerCount, opts.BufferSize, broker.processEvent)
}
return broker, nil
}
// Functional option pattern helpers
func WithProvider(p Provider) func(*Options) {
return func(o *Options) { o.Provider = p }
}
func WithMode(m ProcessingMode) func(*Options) {
return func(o *Options) { o.Mode = m }
}
func WithWorkerCount(count int) func(*Options) {
return func(o *Options) { o.WorkerCount = count }
}
func WithBufferSize(size int) func(*Options) {
return func(o *Options) { o.BufferSize = size }
}
func WithRetryPolicy(policy *RetryPolicy) func(*Options) {
return func(o *Options) { o.RetryPolicy = policy }
}
func WithInstanceID(id string) func(*Options) {
return func(o *Options) { o.InstanceID = id }
}
// Start starts the broker
func (b *EventBroker) Start(ctx context.Context) error {
if b.isRunning.Load() {
return fmt.Errorf("broker already running")
}
b.isRunning.Store(true)
// Start worker pool for async mode
if b.mode == ProcessingModeAsync && b.workerPool != nil {
b.workerPool.Start()
}
logger.Info("Event broker started (mode: %s, instance: %s)", b.mode, b.instanceID)
return nil
}
// Stop stops the broker gracefully
func (b *EventBroker) Stop(ctx context.Context) error {
var stopErr error
b.stopOnce.Do(func() {
logger.Info("Stopping event broker...")
// Mark as not running
b.isRunning.Store(false)
// Close the stop channel
close(b.stopCh)
// Stop worker pool for async mode
if b.mode == ProcessingModeAsync && b.workerPool != nil {
if err := b.workerPool.Stop(ctx); err != nil {
logger.Error("Error stopping worker pool: %v", err)
stopErr = err
}
}
// Wait for all goroutines
b.wg.Wait()
// Close provider
if err := b.provider.Close(); err != nil {
logger.Error("Error closing provider: %v", err)
if stopErr == nil {
stopErr = err
}
}
logger.Info("Event broker stopped")
})
return stopErr
}
// Publish publishes an event based on the broker's mode
func (b *EventBroker) Publish(ctx context.Context, event *Event) error {
if b.mode == ProcessingModeSync {
return b.PublishSync(ctx, event)
}
return b.PublishAsync(ctx, event)
}
// PublishSync publishes an event synchronously
func (b *EventBroker) PublishSync(ctx context.Context, event *Event) error {
if !b.isRunning.Load() {
return fmt.Errorf("broker is not running")
}
// Validate event
if err := event.Validate(); err != nil {
return fmt.Errorf("invalid event: %w", err)
}
// Store event in provider
if err := b.provider.Publish(ctx, event); err != nil {
return fmt.Errorf("failed to publish event: %w", err)
}
b.statsPublished.Add(1)
// Record metrics
recordEventPublished(event)
// Process event synchronously
if err := b.processEvent(ctx, event); err != nil {
logger.Error("Failed to process event %s: %v", event.ID, err)
b.statsFailed.Add(1)
return err
}
b.statsProcessed.Add(1)
return nil
}
// PublishAsync publishes an event asynchronously
func (b *EventBroker) PublishAsync(ctx context.Context, event *Event) error {
if !b.isRunning.Load() {
return fmt.Errorf("broker is not running")
}
// Validate event
if err := event.Validate(); err != nil {
return fmt.Errorf("invalid event: %w", err)
}
// Store event in provider
if err := b.provider.Publish(ctx, event); err != nil {
return fmt.Errorf("failed to publish event: %w", err)
}
b.statsPublished.Add(1)
// Record metrics
recordEventPublished(event)
// Queue for async processing
if b.mode == ProcessingModeAsync && b.workerPool != nil {
// Update queue size metrics
updateQueueSize(int64(b.workerPool.QueueSize()))
return b.workerPool.Submit(ctx, event)
}
// Fallback to sync if async not configured
return b.processEvent(ctx, event)
}
// Subscribe adds a subscription for events matching the pattern
func (b *EventBroker) Subscribe(pattern string, handler EventHandler) (SubscriptionID, error) {
return b.subscriptions.Subscribe(pattern, handler)
}
// Unsubscribe removes a subscription
func (b *EventBroker) Unsubscribe(id SubscriptionID) error {
return b.subscriptions.Unsubscribe(id)
}
// processEvent processes an event by calling all matching handlers
func (b *EventBroker) processEvent(ctx context.Context, event *Event) error {
startTime := time.Now()
// Get all handlers matching this event type
handlers := b.subscriptions.GetMatching(event.Type)
if len(handlers) == 0 {
logger.Debug("No handlers for event type: %s", event.Type)
return nil
}
logger.Debug("Processing event %s with %d handler(s)", event.ID, len(handlers))
// Mark event as processing
event.MarkProcessing()
if err := b.provider.UpdateStatus(ctx, event.ID, EventStatusProcessing, ""); err != nil {
logger.Warn("Failed to update event status: %v", err)
}
// Execute all handlers
var lastErr error
for i, handler := range handlers {
if err := b.executeHandlerWithRetry(ctx, handler, event); err != nil {
logger.Error("Handler %d failed for event %s: %v", i+1, event.ID, err)
lastErr = err
// Continue processing other handlers
}
}
// Update final status
if lastErr != nil {
event.MarkFailed(lastErr)
if err := b.provider.UpdateStatus(ctx, event.ID, EventStatusFailed, lastErr.Error()); err != nil {
logger.Warn("Failed to update event status: %v", err)
}
// Record metrics
recordEventProcessed(event, time.Since(startTime))
return lastErr
}
event.MarkCompleted()
if err := b.provider.UpdateStatus(ctx, event.ID, EventStatusCompleted, ""); err != nil {
logger.Warn("Failed to update event status: %v", err)
}
// Record metrics
recordEventProcessed(event, time.Since(startTime))
return nil
}
// executeHandlerWithRetry executes a handler with retry logic
func (b *EventBroker) executeHandlerWithRetry(ctx context.Context, handler EventHandler, event *Event) error {
var lastErr error
for attempt := 0; attempt <= b.retryPolicy.MaxRetries; attempt++ {
if attempt > 0 {
// Calculate backoff delay
delay := b.calculateBackoff(attempt)
logger.Debug("Retrying event %s (attempt %d/%d) after %v",
event.ID, attempt, b.retryPolicy.MaxRetries, delay)
select {
case <-time.After(delay):
case <-ctx.Done():
return ctx.Err()
}
event.IncrementRetry()
}
// Execute handler
if err := handler.Handle(ctx, event); err != nil {
lastErr = err
logger.Warn("Handler failed for event %s (attempt %d): %v", event.ID, attempt+1, err)
continue
}
// Success
return nil
}
return fmt.Errorf("handler failed after %d attempts: %w", b.retryPolicy.MaxRetries+1, lastErr)
}
// calculateBackoff calculates the backoff delay for a retry attempt
func (b *EventBroker) calculateBackoff(attempt int) time.Duration {
delay := float64(b.retryPolicy.InitialDelay) * pow(b.retryPolicy.BackoffFactor, float64(attempt-1))
if delay > float64(b.retryPolicy.MaxDelay) {
delay = float64(b.retryPolicy.MaxDelay)
}
return time.Duration(delay)
}
// pow is a simple integer power function
func pow(base float64, exp float64) float64 {
result := 1.0
for i := 0.0; i < exp; i++ {
result *= base
}
return result
}
// Stats returns broker statistics
func (b *EventBroker) Stats(ctx context.Context) (*BrokerStats, error) {
providerStats, err := b.provider.Stats(ctx)
if err != nil {
logger.Warn("Failed to get provider stats: %v", err)
}
stats := &BrokerStats{
InstanceID: b.instanceID,
Mode: b.mode,
IsRunning: b.isRunning.Load(),
TotalPublished: b.statsPublished.Load(),
TotalProcessed: b.statsProcessed.Load(),
TotalFailed: b.statsFailed.Load(),
ActiveSubscribers: b.subscriptions.Count(),
ProviderStats: providerStats,
}
// Add async-specific stats
if b.mode == ProcessingModeAsync && b.workerPool != nil {
stats.QueueSize = b.workerPool.QueueSize()
stats.ActiveWorkers = b.workerPool.ActiveWorkers()
}
return stats, nil
}
// InstanceID returns the instance ID
func (b *EventBroker) InstanceID() string {
return b.instanceID
}