mirror of
https://github.com/bitechdev/ResolveSpec.git
synced 2026-01-10 05:04:25 +00:00
Code sanity fixes, added middlewares
This commit is contained in:
533
pkg/tracing/README.md
Normal file
533
pkg/tracing/README.md
Normal file
@@ -0,0 +1,533 @@
|
||||
# Tracing Package
|
||||
|
||||
OpenTelemetry distributed tracing for ResolveSpec.
|
||||
|
||||
## Quick Start
|
||||
|
||||
```go
|
||||
import "github.com/bitechdev/ResolveSpec/pkg/tracing"
|
||||
|
||||
// Initialize tracer
|
||||
config := tracing.Config{
|
||||
ServiceName: "my-api",
|
||||
ServiceVersion: "1.0.0",
|
||||
Endpoint: "localhost:4317", // OTLP collector
|
||||
Enabled: true,
|
||||
}
|
||||
|
||||
shutdown, err := tracing.InitTracer(config)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer shutdown(context.Background())
|
||||
|
||||
// Apply middleware
|
||||
router.Use(tracing.Middleware)
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
```go
|
||||
type Config struct {
|
||||
ServiceName string // Service identifier
|
||||
ServiceVersion string // Version for tracking deployments
|
||||
Endpoint string // OTLP collector endpoint (e.g., "localhost:4317")
|
||||
Enabled bool // Enable/disable tracing
|
||||
}
|
||||
```
|
||||
|
||||
### Environment-based Configuration
|
||||
|
||||
```go
|
||||
import "os"
|
||||
|
||||
config := tracing.Config{
|
||||
ServiceName: os.Getenv("SERVICE_NAME"),
|
||||
ServiceVersion: os.Getenv("VERSION"),
|
||||
Endpoint: getEnv("OTEL_ENDPOINT", "localhost:4317"),
|
||||
Enabled: getEnv("TRACING_ENABLED", "true") == "true",
|
||||
}
|
||||
```
|
||||
|
||||
## Automatic HTTP Tracing
|
||||
|
||||
The middleware automatically creates spans for all HTTP requests:
|
||||
|
||||
```go
|
||||
router.Use(tracing.Middleware)
|
||||
```
|
||||
|
||||
**Captured attributes:**
|
||||
- HTTP method
|
||||
- HTTP URL
|
||||
- HTTP path
|
||||
- HTTP scheme
|
||||
- Host name
|
||||
- Span kind (server)
|
||||
|
||||
## Manual Span Creation
|
||||
|
||||
### Basic Span
|
||||
|
||||
```go
|
||||
import "go.opentelemetry.io/otel/attribute"
|
||||
|
||||
func processOrder(ctx context.Context, orderID string) error {
|
||||
ctx, span := tracing.StartSpan(ctx, "process-order",
|
||||
attribute.String("order.id", orderID),
|
||||
)
|
||||
defer span.End()
|
||||
|
||||
// Your logic here...
|
||||
return nil
|
||||
}
|
||||
```
|
||||
|
||||
### Nested Spans
|
||||
|
||||
```go
|
||||
func handleRequest(ctx context.Context) error {
|
||||
ctx, span := tracing.StartSpan(ctx, "handle-request")
|
||||
defer span.End()
|
||||
|
||||
// Child span 1
|
||||
if err := validateInput(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Child span 2
|
||||
if err := processData(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateInput(ctx context.Context) error {
|
||||
ctx, span := tracing.StartSpan(ctx, "validate-input")
|
||||
defer span.End()
|
||||
|
||||
// Validation logic...
|
||||
return nil
|
||||
}
|
||||
|
||||
func processData(ctx context.Context) error {
|
||||
ctx, span := tracing.StartSpan(ctx, "process-data")
|
||||
defer span.End()
|
||||
|
||||
// Processing logic...
|
||||
return nil
|
||||
}
|
||||
```
|
||||
|
||||
## Adding Attributes
|
||||
|
||||
```go
|
||||
import "go.opentelemetry.io/otel/attribute"
|
||||
|
||||
ctx, span := tracing.StartSpan(ctx, "database-query",
|
||||
attribute.String("db.table", "users"),
|
||||
attribute.String("db.operation", "SELECT"),
|
||||
attribute.Int("user.id", 123),
|
||||
)
|
||||
defer span.End()
|
||||
```
|
||||
|
||||
**Or add attributes later:**
|
||||
|
||||
```go
|
||||
tracing.SetAttributes(ctx,
|
||||
attribute.String("result.status", "success"),
|
||||
attribute.Int("result.count", 42),
|
||||
)
|
||||
```
|
||||
|
||||
## Recording Events
|
||||
|
||||
```go
|
||||
tracing.AddEvent(ctx, "cache-miss",
|
||||
attribute.String("cache.key", cacheKey),
|
||||
)
|
||||
|
||||
tracing.AddEvent(ctx, "retry-attempt",
|
||||
attribute.Int("attempt", 2),
|
||||
attribute.String("reason", "timeout"),
|
||||
)
|
||||
```
|
||||
|
||||
## Error Recording
|
||||
|
||||
```go
|
||||
result, err := someOperation()
|
||||
if err != nil {
|
||||
tracing.RecordError(ctx, err)
|
||||
return err
|
||||
}
|
||||
```
|
||||
|
||||
**With additional context:**
|
||||
|
||||
```go
|
||||
if err != nil {
|
||||
span := tracing.SpanFromContext(ctx)
|
||||
span.RecordError(err)
|
||||
span.SetAttributes(
|
||||
attribute.String("error.type", "database"),
|
||||
attribute.Bool("error.retriable", true),
|
||||
)
|
||||
return err
|
||||
}
|
||||
```
|
||||
|
||||
## Complete Example
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"log"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/bitechdev/ResolveSpec/pkg/tracing"
|
||||
"github.com/gorilla/mux"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Initialize tracing
|
||||
config := tracing.Config{
|
||||
ServiceName: "user-service",
|
||||
ServiceVersion: "1.0.0",
|
||||
Endpoint: "localhost:4317",
|
||||
Enabled: true,
|
||||
}
|
||||
|
||||
shutdown, err := tracing.InitTracer(config)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer shutdown(context.Background())
|
||||
|
||||
// Create router
|
||||
router := mux.NewRouter()
|
||||
|
||||
// Apply tracing middleware
|
||||
router.Use(tracing.Middleware)
|
||||
|
||||
// Routes
|
||||
router.HandleFunc("/users/{id}", getUserHandler)
|
||||
|
||||
log.Fatal(http.ListenAndServe(":8080", router))
|
||||
}
|
||||
|
||||
func getUserHandler(w http.ResponseWriter, r *http.Request) {
|
||||
ctx := r.Context()
|
||||
|
||||
// Extract user ID from path
|
||||
vars := mux.Vars(r)
|
||||
userID := vars["id"]
|
||||
|
||||
// Create span for this operation
|
||||
ctx, span := tracing.StartSpan(ctx, "get-user",
|
||||
attribute.String("user.id", userID),
|
||||
)
|
||||
defer span.End()
|
||||
|
||||
// Fetch user
|
||||
user, err := fetchUser(ctx, userID)
|
||||
if err != nil {
|
||||
tracing.RecordError(ctx, err)
|
||||
http.Error(w, "Internal Server Error", 500)
|
||||
return
|
||||
}
|
||||
|
||||
// Record success
|
||||
tracing.SetAttributes(ctx,
|
||||
attribute.String("user.name", user.Name),
|
||||
attribute.Bool("user.active", user.Active),
|
||||
)
|
||||
|
||||
// Return user...
|
||||
}
|
||||
|
||||
func fetchUser(ctx context.Context, userID string) (*User, error) {
|
||||
// Create database span
|
||||
ctx, span := tracing.StartSpan(ctx, "db.query",
|
||||
attribute.String("db.system", "postgresql"),
|
||||
attribute.String("db.operation", "SELECT"),
|
||||
attribute.String("db.table", "users"),
|
||||
)
|
||||
defer span.End()
|
||||
|
||||
start := time.Now()
|
||||
|
||||
// Execute query
|
||||
user, err := queryUser(ctx, userID)
|
||||
|
||||
// Record duration
|
||||
duration := time.Since(start)
|
||||
span.SetAttributes(
|
||||
attribute.Int64("db.duration_ms", duration.Milliseconds()),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
tracing.RecordError(ctx, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return user, nil
|
||||
}
|
||||
```
|
||||
|
||||
## OpenTelemetry Collector Setup
|
||||
|
||||
### Docker Compose
|
||||
|
||||
```yaml
|
||||
version: '3'
|
||||
services:
|
||||
app:
|
||||
build: .
|
||||
ports:
|
||||
- "8080:8080"
|
||||
environment:
|
||||
- OTEL_ENDPOINT=otel-collector:4317
|
||||
depends_on:
|
||||
- otel-collector
|
||||
|
||||
otel-collector:
|
||||
image: otel/opentelemetry-collector:latest
|
||||
command: ["--config=/etc/otel-collector-config.yaml"]
|
||||
volumes:
|
||||
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml
|
||||
ports:
|
||||
- "4317:4317" # OTLP gRPC
|
||||
- "4318:4318" # OTLP HTTP
|
||||
|
||||
jaeger:
|
||||
image: jaegertracing/all-in-one:latest
|
||||
ports:
|
||||
- "16686:16686" # Jaeger UI
|
||||
- "14250:14250" # Jaeger gRPC
|
||||
```
|
||||
|
||||
### Collector Configuration
|
||||
|
||||
**otel-collector-config.yaml:**
|
||||
|
||||
```yaml
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
exporters:
|
||||
jaeger:
|
||||
endpoint: jaeger:14250
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
logging:
|
||||
loglevel: debug
|
||||
|
||||
processors:
|
||||
batch:
|
||||
timeout: 10s
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [jaeger, logging]
|
||||
```
|
||||
|
||||
## Viewing Traces
|
||||
|
||||
### Jaeger UI
|
||||
|
||||
Access at `http://localhost:16686`
|
||||
|
||||
**Finding traces:**
|
||||
1. Select service: "my-api"
|
||||
2. Select operation: "GET /users/:id"
|
||||
3. Click "Find Traces"
|
||||
|
||||
### Sample Trace
|
||||
|
||||
```
|
||||
GET /users/123 (200ms)
|
||||
├── get-user (180ms)
|
||||
│ ├── validate-permissions (20ms)
|
||||
│ ├── db.query (150ms)
|
||||
│ │ └── SELECT FROM users WHERE id = 123
|
||||
│ └── transform-response (10ms)
|
||||
└── send-response (20ms)
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Span Naming
|
||||
|
||||
**Good:**
|
||||
```go
|
||||
tracing.StartSpan(ctx, "database.query.users")
|
||||
tracing.StartSpan(ctx, "http.request.external-api")
|
||||
tracing.StartSpan(ctx, "cache.get")
|
||||
```
|
||||
|
||||
**Bad:**
|
||||
```go
|
||||
tracing.StartSpan(ctx, "DoStuff") // Too vague
|
||||
tracing.StartSpan(ctx, "user_123_query") // User-specific (high cardinality)
|
||||
```
|
||||
|
||||
### 2. Attribute Keys
|
||||
|
||||
Follow OpenTelemetry semantic conventions:
|
||||
|
||||
```go
|
||||
// HTTP
|
||||
attribute.String("http.method", "GET")
|
||||
attribute.String("http.url", url)
|
||||
attribute.Int("http.status_code", 200)
|
||||
|
||||
// Database
|
||||
attribute.String("db.system", "postgresql")
|
||||
attribute.String("db.table", "users")
|
||||
attribute.String("db.operation", "SELECT")
|
||||
|
||||
// Custom
|
||||
attribute.String("user.id", userID)
|
||||
attribute.String("order.status", "pending")
|
||||
```
|
||||
|
||||
### 3. Error Handling
|
||||
|
||||
Always record errors:
|
||||
|
||||
```go
|
||||
if err != nil {
|
||||
tracing.RecordError(ctx, err)
|
||||
// Also add context
|
||||
tracing.SetAttributes(ctx,
|
||||
attribute.Bool("error.retriable", isRetriable(err)),
|
||||
attribute.String("error.type", errorType(err)),
|
||||
)
|
||||
return err
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Sampling
|
||||
|
||||
For high-traffic services, configure sampling:
|
||||
|
||||
```go
|
||||
// In production: sample 10% of traces
|
||||
// Currently using AlwaysSample() - update in tracing.go if needed
|
||||
```
|
||||
|
||||
### 5. Context Propagation
|
||||
|
||||
Always pass context through the call chain:
|
||||
|
||||
```go
|
||||
func handler(w http.ResponseWriter, r *http.Request) {
|
||||
ctx := r.Context() // Get context from request
|
||||
processRequest(ctx) // Pass it down
|
||||
}
|
||||
|
||||
func processRequest(ctx context.Context) {
|
||||
// Context carries trace information
|
||||
ctx, span := tracing.StartSpan(ctx, "process")
|
||||
defer span.End()
|
||||
|
||||
// Pass to next function
|
||||
saveData(ctx)
|
||||
}
|
||||
```
|
||||
|
||||
## Performance Impact
|
||||
|
||||
- **Overhead**: <1% CPU, <5MB memory
|
||||
- **Latency**: <100μs per span
|
||||
- **Safe for production** at high throughput
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Traces Not Appearing
|
||||
|
||||
1. **Check collector is running:**
|
||||
```bash
|
||||
docker-compose ps
|
||||
```
|
||||
|
||||
2. **Verify endpoint:**
|
||||
```go
|
||||
Endpoint: "localhost:4317" // Correct
|
||||
Endpoint: "http://localhost:4317" // Wrong (no http://)
|
||||
```
|
||||
|
||||
3. **Check logs:**
|
||||
```bash
|
||||
docker-compose logs otel-collector
|
||||
```
|
||||
|
||||
### Disable Tracing
|
||||
|
||||
```go
|
||||
config := tracing.Config{
|
||||
Enabled: false, // Tracing disabled
|
||||
}
|
||||
```
|
||||
|
||||
### TLS in Production
|
||||
|
||||
Update `tracing.go` line with TLS credentials:
|
||||
|
||||
```go
|
||||
client := otlptracegrpc.NewClient(
|
||||
otlptracegrpc.WithEndpoint(config.Endpoint),
|
||||
otlptracegrpc.WithTLSCredentials(credentials.NewClientTLSFromCert(nil, "")),
|
||||
)
|
||||
```
|
||||
|
||||
## Integration with Metrics
|
||||
|
||||
Combine with metrics for full observability:
|
||||
|
||||
```go
|
||||
import (
|
||||
"github.com/bitechdev/ResolveSpec/pkg/metrics"
|
||||
"github.com/bitechdev/ResolveSpec/pkg/tracing"
|
||||
)
|
||||
|
||||
// Apply both
|
||||
router.Use(metrics.GetProvider().Middleware)
|
||||
router.Use(tracing.Middleware)
|
||||
```
|
||||
|
||||
## Distributed Tracing
|
||||
|
||||
Traces automatically propagate across services via HTTP headers:
|
||||
|
||||
**Service A:**
|
||||
```go
|
||||
// Create request with trace context
|
||||
req, _ := http.NewRequestWithContext(ctx, "GET", "http://service-b/api", nil)
|
||||
resp, _ := client.Do(req)
|
||||
```
|
||||
|
||||
**Service B:**
|
||||
```go
|
||||
// Trace context automatically extracted by middleware
|
||||
router.Use(tracing.Middleware)
|
||||
```
|
||||
|
||||
The trace ID propagates across both services, creating a unified trace.
|
||||
146
pkg/tracing/tracing.go
Normal file
146
pkg/tracing/tracing.go
Normal file
@@ -0,0 +1,146 @@
|
||||
package tracing
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
|
||||
"go.opentelemetry.io/otel/propagation"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
sdktrace "go.opentelemetry.io/otel/sdk/trace"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
var tracer trace.Tracer
|
||||
|
||||
// Config holds tracing configuration
|
||||
type Config struct {
|
||||
ServiceName string
|
||||
ServiceVersion string
|
||||
Endpoint string // OTLP endpoint (e.g., "localhost:4317")
|
||||
Enabled bool
|
||||
}
|
||||
|
||||
// InitTracer initializes the OpenTelemetry tracer
|
||||
func InitTracer(config Config) (func(context.Context) error, error) {
|
||||
if !config.Enabled {
|
||||
// Return no-op shutdown function
|
||||
return func(context.Context) error { return nil }, nil
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// Create OTLP exporter
|
||||
client := otlptracegrpc.NewClient(
|
||||
otlptracegrpc.WithEndpoint(config.Endpoint),
|
||||
otlptracegrpc.WithInsecure(), // Use WithTLSCredentials in production
|
||||
)
|
||||
|
||||
exporter, err := otlptrace.New(ctx, client)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create OTLP exporter: %w", err)
|
||||
}
|
||||
|
||||
// Create resource
|
||||
res, err := resource.New(ctx,
|
||||
resource.WithAttributes(
|
||||
semconv.ServiceNameKey.String(config.ServiceName),
|
||||
semconv.ServiceVersionKey.String(config.ServiceVersion),
|
||||
),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create resource: %w", err)
|
||||
}
|
||||
|
||||
// Create trace provider
|
||||
tp := sdktrace.NewTracerProvider(
|
||||
sdktrace.WithBatcher(exporter),
|
||||
sdktrace.WithResource(res),
|
||||
sdktrace.WithSampler(sdktrace.AlwaysSample()),
|
||||
)
|
||||
|
||||
// Set global trace provider
|
||||
otel.SetTracerProvider(tp)
|
||||
|
||||
// Set global propagator
|
||||
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
|
||||
propagation.TraceContext{},
|
||||
propagation.Baggage{},
|
||||
))
|
||||
|
||||
// Get tracer
|
||||
tracer = tp.Tracer(config.ServiceName)
|
||||
|
||||
// Return shutdown function
|
||||
return tp.Shutdown, nil
|
||||
}
|
||||
|
||||
// Middleware returns an HTTP middleware that creates spans for requests
|
||||
func Middleware(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if tracer == nil {
|
||||
next.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
// Extract context from request headers
|
||||
ctx := otel.GetTextMapPropagator().Extract(r.Context(), propagation.HeaderCarrier(r.Header))
|
||||
|
||||
// Start span
|
||||
ctx, span := tracer.Start(ctx, r.Method+" "+r.URL.Path,
|
||||
trace.WithSpanKind(trace.SpanKindServer),
|
||||
trace.WithAttributes(
|
||||
semconv.HTTPMethodKey.String(r.Method),
|
||||
semconv.HTTPURLKey.String(r.URL.String()),
|
||||
semconv.HTTPTargetKey.String(r.URL.Path),
|
||||
semconv.HTTPSchemeKey.String(r.URL.Scheme),
|
||||
semconv.NetHostNameKey.String(r.Host),
|
||||
),
|
||||
)
|
||||
defer span.End()
|
||||
|
||||
// Create new request with updated context
|
||||
r = r.WithContext(ctx)
|
||||
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
|
||||
// StartSpan starts a new span with the given name
|
||||
func StartSpan(ctx context.Context, name string, attrs ...attribute.KeyValue) (context.Context, trace.Span) {
|
||||
if tracer == nil {
|
||||
return ctx, trace.SpanFromContext(ctx)
|
||||
}
|
||||
return tracer.Start(ctx, name, trace.WithAttributes(attrs...))
|
||||
}
|
||||
|
||||
// SpanFromContext returns the current span from the context
|
||||
func SpanFromContext(ctx context.Context) trace.Span {
|
||||
return trace.SpanFromContext(ctx)
|
||||
}
|
||||
|
||||
// AddEvent adds an event to the current span
|
||||
func AddEvent(ctx context.Context, name string, attrs ...attribute.KeyValue) {
|
||||
span := trace.SpanFromContext(ctx)
|
||||
span.AddEvent(name, trace.WithAttributes(attrs...))
|
||||
}
|
||||
|
||||
// SetAttributes sets attributes on the current span
|
||||
func SetAttributes(ctx context.Context, attrs ...attribute.KeyValue) {
|
||||
span := trace.SpanFromContext(ctx)
|
||||
span.SetAttributes(attrs...)
|
||||
}
|
||||
|
||||
// RecordError records an error on the current span
|
||||
func RecordError(ctx context.Context, err error) {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
span := trace.SpanFromContext(ctx)
|
||||
span.RecordError(err)
|
||||
}
|
||||
Reference in New Issue
Block a user