security: harden against SAST findings (criticals through mediums)

Critical: - Validate baseURL/sourceURL/upstreamURL at config load time (prevents XML injection, XSS, SSRF via config/env manipulation) - Use xml.Escape for OpenSearch XML template interpolation High: - Add security headers middleware (CSP, X-Frame-Options, HSTS, etc.) - Sanitize result URLs to reject javascript:/data: schemes - Sanitize infobox img_src against dangerous URL schemes - Default CORS to deny-all (was wildcard *) Medium: - Rate limiter: X-Forwarded-For only trusted from configured proxies - Validate engine names against known registry allowlist - Add 1024-char max query length - Sanitize upstream error messages (strip raw response bodies) - Upstream client validates URL scheme (http/https only) Test updates: - Update extractIP tests for new trusted proxy behavior
2026-03-22 16:22:27 +00:00 · 2026-03-22 16:22:27 +00:00 · da367a1bfd
commit da367a1bfd
parent 4b0cde91ed
23 changed files with 399 additions and 41 deletions
--- a/internal/middleware/ratelimit.go
+++ b/internal/middleware/ratelimit.go
@ -27,10 +27,14 @@ import (
 	"log/slog"
 )

+// RateLimitConfig controls per-IP rate limiting.
 type RateLimitConfig struct {
 	Requests        int
 	Window          time.Duration
 	CleanupInterval time.Duration
+	// TrustedProxies is a list of CIDR ranges that are allowed to set
+	// X-Forwarded-For / X-Real-IP. If empty, only r.RemoteAddr is used.
+	TrustedProxies []string
 }

 func RateLimit(cfg RateLimitConfig, logger *slog.Logger) func(http.Handler) http.Handler {
@ -53,18 +57,30 @@ func RateLimit(cfg RateLimitConfig, logger *slog.Logger) func(http.Handler) http
 		logger = slog.Default()
 	}

+	// Parse trusted proxy CIDRs.
+	var trustedNets []*net.IPNet
+	for _, cidr := range cfg.TrustedProxies {
+		_, network, err := net.ParseCIDR(cidr)
+		if err != nil {
+			logger.Warn("invalid trusted proxy CIDR, skipping", "cidr", cidr, "error", err)
+			continue
+		}
+		trustedNets = append(trustedNets, network)
+	}
+
 	limiter := &ipLimiter{
 		requests: requests,
 		window:   window,
 		clients:  make(map[string]*bucket),
 		logger:   logger,
+		trusted:  trustedNets,
 	}

 	go limiter.cleanup(cleanup)

 	return func(next http.Handler) http.Handler {
 		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-			ip := extractIP(r)
+			ip := l.extractIP(r)

 			if !limiter.allow(ip) {
 				retryAfter := int(limiter.window.Seconds())
@ -92,6 +108,7 @@ type ipLimiter struct {
 	clients  map[string]*bucket
 	mu       sync.Mutex
 	logger   *slog.Logger
+	trusted  []*net.IPNet
 }

 func (l *ipLimiter) allow(ip string) bool {
@ -129,18 +146,48 @@ func (l *ipLimiter) cleanup(interval time.Duration) {
 	}
 }

-func extractIP(r *http.Request) string {
-	if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
-		parts := strings.SplitN(xff, ",", 2)
-		return strings.TrimSpace(parts[0])
-	}
-	if rip := r.Header.Get("X-Real-IP"); rip != "" {
-		return strings.TrimSpace(rip)
+// extractIP extracts the client IP from the request.
+// If trusted proxy CIDRs are configured, X-Forwarded-For is only used when
+// the direct connection comes from a trusted proxy. Otherwise, only RemoteAddr is used.
+func (l *ipLimiter) extractIP(r *http.Request) string {
+	return extractIP(r, l.trusted...)
+}
+
+func extractIP(r *http.Request, trusted ...*net.IPNet) string {
+	remoteIP, _, err := net.SplitHostPort(r.RemoteAddr)
+	if err != nil {
+		remoteIP = r.RemoteAddr
 	}

-	host, _, err := net.SplitHostPort(r.RemoteAddr)
-	if err != nil {
-		return r.RemoteAddr
+	// Check if the direct connection is from a trusted proxy.
+	isTrusted := false
+	if len(trusted) > 0 {
+		ip := net.ParseIP(remoteIP)
+		if ip != nil {
+			for _, network := range trusted {
+				if network.Contains(ip) {
+					isTrusted = true
+					break
+				}
+			}
+		}
 	}
-	return host
+
+	if isTrusted {
+		if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
+			parts := strings.SplitN(xff, ",", 2)
+			candidate := strings.TrimSpace(parts[0])
+			if net.ParseIP(candidate) != nil {
+				return candidate
+			}
+		}
+		if rip := r.Header.Get("X-Real-IP"); rip != "" {
+			candidate := strings.TrimSpace(rip)
+			if net.ParseIP(candidate) != nil {
+				return candidate
+			}
+		}
+	}
+
+	return remoteIP
 }