security: harden against SAST findings (criticals through mediums)
Critical: - Validate baseURL/sourceURL/upstreamURL at config load time (prevents XML injection, XSS, SSRF via config/env manipulation) - Use xml.Escape for OpenSearch XML template interpolation High: - Add security headers middleware (CSP, X-Frame-Options, HSTS, etc.) - Sanitize result URLs to reject javascript:/data: schemes - Sanitize infobox img_src against dangerous URL schemes - Default CORS to deny-all (was wildcard *) Medium: - Rate limiter: X-Forwarded-For only trusted from configured proxies - Validate engine names against known registry allowlist - Add 1024-char max query length - Sanitize upstream error messages (strip raw response bodies) - Upstream client validates URL scheme (http/https only) Test updates: - Update extractIP tests for new trusted proxy behavior
This commit is contained in:
parent
4b0cde91ed
commit
da367a1bfd
23 changed files with 399 additions and 41 deletions
|
|
@ -26,6 +26,28 @@ import (
|
|||
|
||||
var languageCodeRe = regexp.MustCompile(`^[a-z]{2,3}(-[a-zA-Z]{2})?$`)
|
||||
|
||||
// maxQueryLength is the maximum allowed length for the search query.
|
||||
const maxQueryLength = 1024
|
||||
|
||||
// knownEngineNames is the allowlist of valid engine identifiers.
|
||||
var knownEngineNames = map[string]bool{
|
||||
"wikipedia": true, "arxiv": true, "crossref": true,
|
||||
"braveapi": true, "brave": true, "qwant": true,
|
||||
"duckduckgo": true, "github": true, "reddit": true,
|
||||
"bing": true, "google": true, "youtube": true,
|
||||
}
|
||||
|
||||
// validateEngines filters engine names against the known registry.
|
||||
func validateEngines(engines []string) []string {
|
||||
out := make([]string, 0, len(engines))
|
||||
for _, e := range engines {
|
||||
if knownEngineNames[strings.ToLower(e)] {
|
||||
out = append(out, strings.ToLower(e))
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func ParseSearchRequest(r *http.Request) (SearchRequest, error) {
|
||||
// Supports both GET and POST and relies on form values for routing.
|
||||
if err := r.ParseForm(); err != nil {
|
||||
|
|
@ -50,6 +72,9 @@ func ParseSearchRequest(r *http.Request) (SearchRequest, error) {
|
|||
if strings.TrimSpace(q) == "" {
|
||||
return SearchRequest{}, errors.New("missing required parameter: q")
|
||||
}
|
||||
if len(q) > maxQueryLength {
|
||||
return SearchRequest{}, errors.New("query exceeds maximum length")
|
||||
}
|
||||
|
||||
pageno := 1
|
||||
if s := strings.TrimSpace(r.FormValue("pageno")); s != "" {
|
||||
|
|
@ -105,6 +130,8 @@ func ParseSearchRequest(r *http.Request) (SearchRequest, error) {
|
|||
|
||||
// engines is an explicit list of engine names.
|
||||
engines := splitCSV(strings.TrimSpace(r.FormValue("engines")))
|
||||
// Validate engine names against known registry to prevent injection.
|
||||
engines = validateEngines(engines)
|
||||
|
||||
// categories and category_<name> params mirror the webadapter parsing.
|
||||
// We don't validate against a registry here; we just preserve the requested values.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue