package middleware
import (
"html"
"net/http"
"regexp"
"strings"
)
// Sanitizer provides input sanitization beyond SQL injection protection
type Sanitizer struct {
// StripHTML removes HTML tags from input
StripHTML bool
// EscapeHTML escapes HTML entities
EscapeHTML bool
// RemoveNullBytes removes null bytes from input
RemoveNullBytes bool
// RemoveControlChars removes control characters (except newline, carriage return, tab)
RemoveControlChars bool
// MaxStringLength limits individual string field length (0 = no limit)
MaxStringLength int
// BlockPatterns are regex patterns to block (e.g., script tags, SQL keywords)
BlockPatterns []*regexp.Regexp
// Custom sanitization function
CustomSanitizer func(string) string
}
// DefaultSanitizer returns a sanitizer with secure defaults
func DefaultSanitizer() *Sanitizer {
return &Sanitizer{
StripHTML: false, // Don't strip by default (breaks legitimate HTML content)
EscapeHTML: true, // Escape HTML entities to prevent XSS
RemoveNullBytes: true, // Remove null bytes (security best practice)
RemoveControlChars: true, // Remove dangerous control characters
MaxStringLength: 0, // No limit by default
// Block common XSS and injection patterns
BlockPatterns: []*regexp.Regexp{
regexp.MustCompile(`(?i)`), // Script tags
regexp.MustCompile(`(?i)javascript:`), // JavaScript protocol
regexp.MustCompile(`(?i)on\w+\s*=`), // Event handlers (onclick, onerror, etc.)
regexp.MustCompile(`(?i)