mirror of
https://github.com/bitechdev/ResolveSpec.git
synced 2025-12-13 17:10:36 +00:00
10 KiB
10 KiB
Tracing Package
OpenTelemetry distributed tracing for ResolveSpec.
Quick Start
import "github.com/bitechdev/ResolveSpec/pkg/tracing"
// Initialize tracer
config := tracing.Config{
ServiceName: "my-api",
ServiceVersion: "1.0.0",
Endpoint: "localhost:4317", // OTLP collector
Enabled: true,
}
shutdown, err := tracing.InitTracer(config)
if err != nil {
log.Fatal(err)
}
defer shutdown(context.Background())
// Apply middleware
router.Use(tracing.Middleware)
Configuration
type Config struct {
ServiceName string // Service identifier
ServiceVersion string // Version for tracking deployments
Endpoint string // OTLP collector endpoint (e.g., "localhost:4317")
Enabled bool // Enable/disable tracing
}
Environment-based Configuration
import "os"
config := tracing.Config{
ServiceName: os.Getenv("SERVICE_NAME"),
ServiceVersion: os.Getenv("VERSION"),
Endpoint: getEnv("OTEL_ENDPOINT", "localhost:4317"),
Enabled: getEnv("TRACING_ENABLED", "true") == "true",
}
Automatic HTTP Tracing
The middleware automatically creates spans for all HTTP requests:
router.Use(tracing.Middleware)
Captured attributes:
- HTTP method
- HTTP URL
- HTTP path
- HTTP scheme
- Host name
- Span kind (server)
Manual Span Creation
Basic Span
import "go.opentelemetry.io/otel/attribute"
func processOrder(ctx context.Context, orderID string) error {
ctx, span := tracing.StartSpan(ctx, "process-order",
attribute.String("order.id", orderID),
)
defer span.End()
// Your logic here...
return nil
}
Nested Spans
func handleRequest(ctx context.Context) error {
ctx, span := tracing.StartSpan(ctx, "handle-request")
defer span.End()
// Child span 1
if err := validateInput(ctx); err != nil {
return err
}
// Child span 2
if err := processData(ctx); err != nil {
return err
}
return nil
}
func validateInput(ctx context.Context) error {
ctx, span := tracing.StartSpan(ctx, "validate-input")
defer span.End()
// Validation logic...
return nil
}
func processData(ctx context.Context) error {
ctx, span := tracing.StartSpan(ctx, "process-data")
defer span.End()
// Processing logic...
return nil
}
Adding Attributes
import "go.opentelemetry.io/otel/attribute"
ctx, span := tracing.StartSpan(ctx, "database-query",
attribute.String("db.table", "users"),
attribute.String("db.operation", "SELECT"),
attribute.Int("user.id", 123),
)
defer span.End()
Or add attributes later:
tracing.SetAttributes(ctx,
attribute.String("result.status", "success"),
attribute.Int("result.count", 42),
)
Recording Events
tracing.AddEvent(ctx, "cache-miss",
attribute.String("cache.key", cacheKey),
)
tracing.AddEvent(ctx, "retry-attempt",
attribute.Int("attempt", 2),
attribute.String("reason", "timeout"),
)
Error Recording
result, err := someOperation()
if err != nil {
tracing.RecordError(ctx, err)
return err
}
With additional context:
if err != nil {
span := tracing.SpanFromContext(ctx)
span.RecordError(err)
span.SetAttributes(
attribute.String("error.type", "database"),
attribute.Bool("error.retriable", true),
)
return err
}
Complete Example
package main
import (
"context"
"database/sql"
"log"
"net/http"
"time"
"github.com/bitechdev/ResolveSpec/pkg/tracing"
"github.com/gorilla/mux"
"go.opentelemetry.io/otel/attribute"
)
func main() {
// Initialize tracing
config := tracing.Config{
ServiceName: "user-service",
ServiceVersion: "1.0.0",
Endpoint: "localhost:4317",
Enabled: true,
}
shutdown, err := tracing.InitTracer(config)
if err != nil {
log.Fatal(err)
}
defer shutdown(context.Background())
// Create router
router := mux.NewRouter()
// Apply tracing middleware
router.Use(tracing.Middleware)
// Routes
router.HandleFunc("/users/{id}", getUserHandler)
log.Fatal(http.ListenAndServe(":8080", router))
}
func getUserHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
// Extract user ID from path
vars := mux.Vars(r)
userID := vars["id"]
// Create span for this operation
ctx, span := tracing.StartSpan(ctx, "get-user",
attribute.String("user.id", userID),
)
defer span.End()
// Fetch user
user, err := fetchUser(ctx, userID)
if err != nil {
tracing.RecordError(ctx, err)
http.Error(w, "Internal Server Error", 500)
return
}
// Record success
tracing.SetAttributes(ctx,
attribute.String("user.name", user.Name),
attribute.Bool("user.active", user.Active),
)
// Return user...
}
func fetchUser(ctx context.Context, userID string) (*User, error) {
// Create database span
ctx, span := tracing.StartSpan(ctx, "db.query",
attribute.String("db.system", "postgresql"),
attribute.String("db.operation", "SELECT"),
attribute.String("db.table", "users"),
)
defer span.End()
start := time.Now()
// Execute query
user, err := queryUser(ctx, userID)
// Record duration
duration := time.Since(start)
span.SetAttributes(
attribute.Int64("db.duration_ms", duration.Milliseconds()),
)
if err != nil {
tracing.RecordError(ctx, err)
return nil, err
}
return user, nil
}
OpenTelemetry Collector Setup
Docker Compose
version: '3'
services:
app:
build: .
ports:
- "8080:8080"
environment:
- OTEL_ENDPOINT=otel-collector:4317
depends_on:
- otel-collector
otel-collector:
image: otel/opentelemetry-collector:latest
command: ["--config=/etc/otel-collector-config.yaml"]
volumes:
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml
ports:
- "4317:4317" # OTLP gRPC
- "4318:4318" # OTLP HTTP
jaeger:
image: jaegertracing/all-in-one:latest
ports:
- "16686:16686" # Jaeger UI
- "14250:14250" # Jaeger gRPC
Collector Configuration
otel-collector-config.yaml:
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
exporters:
jaeger:
endpoint: jaeger:14250
tls:
insecure: true
logging:
loglevel: debug
processors:
batch:
timeout: 10s
service:
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [jaeger, logging]
Viewing Traces
Jaeger UI
Access at http://localhost:16686
Finding traces:
- Select service: "my-api"
- Select operation: "GET /users/:id"
- Click "Find Traces"
Sample Trace
GET /users/123 (200ms)
├── get-user (180ms)
│ ├── validate-permissions (20ms)
│ ├── db.query (150ms)
│ │ └── SELECT FROM users WHERE id = 123
│ └── transform-response (10ms)
└── send-response (20ms)
Best Practices
1. Span Naming
Good:
tracing.StartSpan(ctx, "database.query.users")
tracing.StartSpan(ctx, "http.request.external-api")
tracing.StartSpan(ctx, "cache.get")
Bad:
tracing.StartSpan(ctx, "DoStuff") // Too vague
tracing.StartSpan(ctx, "user_123_query") // User-specific (high cardinality)
2. Attribute Keys
Follow OpenTelemetry semantic conventions:
// HTTP
attribute.String("http.method", "GET")
attribute.String("http.url", url)
attribute.Int("http.status_code", 200)
// Database
attribute.String("db.system", "postgresql")
attribute.String("db.table", "users")
attribute.String("db.operation", "SELECT")
// Custom
attribute.String("user.id", userID)
attribute.String("order.status", "pending")
3. Error Handling
Always record errors:
if err != nil {
tracing.RecordError(ctx, err)
// Also add context
tracing.SetAttributes(ctx,
attribute.Bool("error.retriable", isRetriable(err)),
attribute.String("error.type", errorType(err)),
)
return err
}
4. Sampling
For high-traffic services, configure sampling:
// In production: sample 10% of traces
// Currently using AlwaysSample() - update in tracing.go if needed
5. Context Propagation
Always pass context through the call chain:
func handler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context() // Get context from request
processRequest(ctx) // Pass it down
}
func processRequest(ctx context.Context) {
// Context carries trace information
ctx, span := tracing.StartSpan(ctx, "process")
defer span.End()
// Pass to next function
saveData(ctx)
}
Performance Impact
- Overhead: <1% CPU, <5MB memory
- Latency: <100μs per span
- Safe for production at high throughput
Troubleshooting
Traces Not Appearing
-
Check collector is running:
docker-compose ps -
Verify endpoint:
Endpoint: "localhost:4317" // Correct Endpoint: "http://localhost:4317" // Wrong (no http://) -
Check logs:
docker-compose logs otel-collector
Disable Tracing
config := tracing.Config{
Enabled: false, // Tracing disabled
}
TLS in Production
Update tracing.go line with TLS credentials:
client := otlptracegrpc.NewClient(
otlptracegrpc.WithEndpoint(config.Endpoint),
otlptracegrpc.WithTLSCredentials(credentials.NewClientTLSFromCert(nil, "")),
)
Integration with Metrics
Combine with metrics for full observability:
import (
"github.com/bitechdev/ResolveSpec/pkg/metrics"
"github.com/bitechdev/ResolveSpec/pkg/tracing"
)
// Apply both
router.Use(metrics.GetProvider().Middleware)
router.Use(tracing.Middleware)
Distributed Tracing
Traces automatically propagate across services via HTTP headers:
Service A:
// Create request with trace context
req, _ := http.NewRequestWithContext(ctx, "GET", "http://service-b/api", nil)
resp, _ := client.Do(req)
Service B:
// Trace context automatically extracted by middleware
router.Use(tracing.Middleware)
The trace ID propagates across both services, creating a unified trace.