security: harden against SAST findings (criticals through mediums)

Critical:
- Validate baseURL/sourceURL/upstreamURL at config load time
  (prevents XML injection, XSS, SSRF via config/env manipulation)
- Use xml.Escape for OpenSearch XML template interpolation

High:
- Add security headers middleware (CSP, X-Frame-Options, HSTS, etc.)
- Sanitize result URLs to reject javascript:/data: schemes
- Sanitize infobox img_src against dangerous URL schemes
- Default CORS to deny-all (was wildcard *)

Medium:
- Rate limiter: X-Forwarded-For only trusted from configured proxies
- Validate engine names against known registry allowlist
- Add 1024-char max query length
- Sanitize upstream error messages (strip raw response bodies)
- Upstream client validates URL scheme (http/https only)

Test updates:
- Update extractIP tests for new trusted proxy behavior
This commit is contained in:
Franz Kafka 2026-03-22 16:22:27 +00:00
parent 4b0cde91ed
commit da367a1bfd
23 changed files with 399 additions and 41 deletions

View file

@ -44,6 +44,9 @@ func NewClient(baseURL string, timeout time.Duration) (*Client, error) {
if err != nil {
return nil, fmt.Errorf("invalid upstream base URL: %w", err)
}
if u.Scheme != "http" && u.Scheme != "https" {
return nil, fmt.Errorf("upstream URL must use http or https, got %q", u.Scheme)
}
// Normalize: trim trailing slash to make URL concatenation predictable.
base := strings.TrimRight(u.String(), "/")
@ -108,7 +111,7 @@ func (c *Client) SearchJSON(ctx context.Context, req contracts.SearchRequest, en
}
if resp.StatusCode != http.StatusOK {
return contracts.SearchResponse{}, fmt.Errorf("upstream search failed: status=%d body=%q", resp.StatusCode, string(body))
return contracts.SearchResponse{}, fmt.Errorf("upstream search failed with status %d", resp.StatusCode)
}
// Decode upstream JSON into our contract types.