security: harden against SAST findings (criticals through mediums)

Critical:
- Validate baseURL/sourceURL/upstreamURL at config load time
  (prevents XML injection, XSS, SSRF via config/env manipulation)
- Use xml.Escape for OpenSearch XML template interpolation

High:
- Add security headers middleware (CSP, X-Frame-Options, HSTS, etc.)
- Sanitize result URLs to reject javascript:/data: schemes
- Sanitize infobox img_src against dangerous URL schemes
- Default CORS to deny-all (was wildcard *)

Medium:
- Rate limiter: X-Forwarded-For only trusted from configured proxies
- Validate engine names against known registry allowlist
- Add 1024-char max query length
- Sanitize upstream error messages (strip raw response bodies)
- Upstream client validates URL scheme (http/https only)

Test updates:
- Update extractIP tests for new trusted proxy behavior
This commit is contained in:
Franz Kafka 2026-03-22 16:22:27 +00:00
parent 4b0cde91ed
commit da367a1bfd
23 changed files with 399 additions and 41 deletions

View file

@ -42,7 +42,8 @@ type CORSConfig struct {
func CORS(cfg CORSConfig) func(http.Handler) http.Handler {
origins := cfg.AllowedOrigins
if len(origins) == 0 {
origins = []string{"*"}
// Default: no CORS headers. Explicitly configure origins to enable.
origins = nil
}
methods := cfg.AllowedMethods
@ -70,6 +71,7 @@ func CORS(cfg CORSConfig) func(http.Handler) http.Handler {
origin := r.Header.Get("Origin")
// Determine the allowed origin for this request.
// If no origins are configured, CORS is disabled entirely — no headers are set.
allowedOrigin := ""
for _, o := range origins {
if o == "*" {

View file

@ -27,10 +27,14 @@ import (
"log/slog"
)
// RateLimitConfig controls per-IP rate limiting.
type RateLimitConfig struct {
Requests int
Window time.Duration
CleanupInterval time.Duration
// TrustedProxies is a list of CIDR ranges that are allowed to set
// X-Forwarded-For / X-Real-IP. If empty, only r.RemoteAddr is used.
TrustedProxies []string
}
func RateLimit(cfg RateLimitConfig, logger *slog.Logger) func(http.Handler) http.Handler {
@ -53,18 +57,30 @@ func RateLimit(cfg RateLimitConfig, logger *slog.Logger) func(http.Handler) http
logger = slog.Default()
}
// Parse trusted proxy CIDRs.
var trustedNets []*net.IPNet
for _, cidr := range cfg.TrustedProxies {
_, network, err := net.ParseCIDR(cidr)
if err != nil {
logger.Warn("invalid trusted proxy CIDR, skipping", "cidr", cidr, "error", err)
continue
}
trustedNets = append(trustedNets, network)
}
limiter := &ipLimiter{
requests: requests,
window: window,
clients: make(map[string]*bucket),
logger: logger,
trusted: trustedNets,
}
go limiter.cleanup(cleanup)
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ip := extractIP(r)
ip := l.extractIP(r)
if !limiter.allow(ip) {
retryAfter := int(limiter.window.Seconds())
@ -92,6 +108,7 @@ type ipLimiter struct {
clients map[string]*bucket
mu sync.Mutex
logger *slog.Logger
trusted []*net.IPNet
}
func (l *ipLimiter) allow(ip string) bool {
@ -129,18 +146,48 @@ func (l *ipLimiter) cleanup(interval time.Duration) {
}
}
func extractIP(r *http.Request) string {
if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
parts := strings.SplitN(xff, ",", 2)
return strings.TrimSpace(parts[0])
}
if rip := r.Header.Get("X-Real-IP"); rip != "" {
return strings.TrimSpace(rip)
// extractIP extracts the client IP from the request.
// If trusted proxy CIDRs are configured, X-Forwarded-For is only used when
// the direct connection comes from a trusted proxy. Otherwise, only RemoteAddr is used.
func (l *ipLimiter) extractIP(r *http.Request) string {
return extractIP(r, l.trusted...)
}
func extractIP(r *http.Request, trusted ...*net.IPNet) string {
remoteIP, _, err := net.SplitHostPort(r.RemoteAddr)
if err != nil {
remoteIP = r.RemoteAddr
}
host, _, err := net.SplitHostPort(r.RemoteAddr)
if err != nil {
return r.RemoteAddr
// Check if the direct connection is from a trusted proxy.
isTrusted := false
if len(trusted) > 0 {
ip := net.ParseIP(remoteIP)
if ip != nil {
for _, network := range trusted {
if network.Contains(ip) {
isTrusted = true
break
}
}
}
}
return host
if isTrusted {
if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
parts := strings.SplitN(xff, ",", 2)
candidate := strings.TrimSpace(parts[0])
if net.ParseIP(candidate) != nil {
return candidate
}
}
if rip := r.Header.Get("X-Real-IP"); rip != "" {
candidate := strings.TrimSpace(rip)
if net.ParseIP(candidate) != nil {
return candidate
}
}
}
return remoteIP
}

View file

@ -71,7 +71,7 @@ func GlobalRateLimit(cfg GlobalRateLimitConfig, logger *slog.Logger) func(http.H
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
w.WriteHeader(http.StatusServiceUnavailable)
_, _ = w.Write([]byte("503 Service Unavailable — global rate limit exceeded\n"))
logger.Warn("global rate limit exceeded", "ip", extractIP(r))
logger.Warn("global rate limit exceeded", "remote", r.RemoteAddr)
return
}

View file

@ -92,9 +92,11 @@ func TestRateLimit_DifferentIPs(t *testing.T) {
}
func TestRateLimit_XForwardedFor(t *testing.T) {
privateNet := mustParseCIDR("10.0.0.0/8")
h := RateLimit(RateLimitConfig{
Requests: 1,
Window: 10 * time.Second,
Requests: 1,
Window: 10 * time.Second,
TrustedProxies: []string{"10.0.0.0/8"},
}, nil)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
@ -143,17 +145,27 @@ func TestRateLimit_WindowExpires(t *testing.T) {
}
func TestExtractIP(t *testing.T) {
// Trusted proxy: loopback
loopback := mustParseCIDR("127.0.0.0/8")
privateNet := mustParseCIDR("10.0.0.0/8")
tests := []struct {
name string
xff string
realIP string
remote string
trusted []*net.IPNet
expected string
}{
{"xff", "203.0.113.50, 10.0.0.1", "", "10.0.0.1:1234", "203.0.113.50"},
{"real_ip", "", "203.0.113.50", "10.0.0.1:1234", "203.0.113.50"},
{"remote", "", "", "1.2.3.4:5678", "1.2.3.4"},
{"xff_over_real", "203.0.113.50", "10.0.0.1", "10.0.0.1:1234", "203.0.113.50"},
// No trusted proxies → always use RemoteAddr.
{"no_trusted_xff", "203.0.113.50, 10.0.0.1", "", "10.0.0.1:1234", nil, "10.0.0.1"},
{"no_trusted_real", "", "203.0.113.50", "10.0.0.1:1234", nil, "10.0.0.1"},
{"no_trusted_remote", "", "", "1.2.3.4:5678", nil, "1.2.3.4"},
// Trusted proxy → XFF is respected.
{"trusted_xff", "203.0.113.50, 10.0.0.1", "", "10.0.0.1:1234", []*net.IPNet{privateNet}, "203.0.113.50"},
{"trusted_real_ip", "", "203.0.113.50", "10.0.0.1:1234", []*net.IPNet{privateNet}, "203.0.113.50"},
// Untrusted remote → XFF ignored even if present.
{"untrusted_xff", "203.0.113.50, 10.0.0.1", "", "1.2.3.4:5678", []*net.IPNet{loopback}, "1.2.3.4"},
}
for _, tt := range tests {
@ -167,9 +179,17 @@ func TestExtractIP(t *testing.T) {
}
req.RemoteAddr = tt.remote
if got := extractIP(req); got != tt.expected {
if got := extractIP(req, tt.trusted...); got != tt.expected {
t.Errorf("extractIP() = %q, want %q", got, tt.expected)
}
})
}
}
func mustParseCIDR(s string) *net.IPNet {
_, network, err := net.ParseCIDR(s)
if err != nil {
panic(err)
}
return network
}

View file

@ -0,0 +1,92 @@
// kafka — a privacy-respecting metasearch engine
// Copyright (C) 2026-present metamorphosis-dev
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
package middleware
import (
"net/http"
"strconv"
"strings"
)
// SecurityHeadersConfig controls which security headers are set.
type SecurityHeadersConfig struct {
// FrameOptions controls X-Frame-Options. Default: "DENY".
FrameOptions string
// HSTSMaxAge controls the max-age for Strict-Transport-Security.
// Set to 0 to disable HSTS (useful for local dev). Default: 31536000 (1 year).
HSTSMaxAge int
// HSTSPreloadDomains adds "includeSubDomains; preload" to HSTS.
HSTSPreloadDomains bool
// ReferrerPolicy controls the Referrer-Policy header. Default: "no-referrer".
ReferrerPolicy string
// CSP controls Content-Security-Policy. Default: a restrictive policy.
// Set to "" to disable CSP entirely.
CSP string
}
// SecurityHeaders returns middleware that sets standard HTTP security headers
// on every response.
func SecurityHeaders(cfg SecurityHeadersConfig) func(http.Handler) http.Handler {
frameOpts := cfg.FrameOptions
if frameOpts == "" {
frameOpts = "DENY"
}
hstsAge := cfg.HSTSMaxAge
if hstsAge == 0 {
hstsAge = 31536000 // 1 year
}
refPol := cfg.ReferrerPolicy
if refPol == "" {
refPol = "no-referrer"
}
csp := cfg.CSP
if csp == "" {
csp = defaultCSP()
}
hstsValue := "max-age=" + strconv.Itoa(hstsAge)
if cfg.HSTSPreloadDomains {
hstsValue += "; includeSubDomains; preload"
}
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Content-Type-Options", "nosniff")
w.Header().Set("X-Frame-Options", frameOpts)
w.Header().Set("Referrer-Policy", refPol)
w.Header().Set("Permissions-Policy", "camera=(), microphone=(), geolocation=()")
w.Header().Set("Content-Security-Policy", csp)
if hstsAge > 0 {
w.Header().Set("Strict-Transport-Security", hstsValue)
}
next.ServeHTTP(w, r)
})
}
}
// defaultCSP returns a restrictive Content-Security-Policy for the
// metasearch engine.
func defaultCSP() string {
return strings.Join([]string{
"default-src 'self'",
"script-src 'self'",
"style-src 'self' 'unsafe-inline'",
"img-src 'self' https: data:",
"connect-src 'self'",
"font-src 'self'",
"frame-ancestors 'none'",
"base-uri 'self'",
"form-action 'self'",
}, "; ")
}