fix(engines): validate Wikipedia language codes to prevent SSRF

Wikipedia language subdomain was derived from user input without
validation, allowing attackers to redirect requests via malicious
language values like "evil.com.attacker.com". Added a whitelist of
valid Wikipedia language codes to prevent this.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
ashisgreat22 2026-03-22 13:22:52 +01:00
parent 19f5c89053
commit d21e9189b8

View file

@ -33,6 +33,44 @@ type WikipediaEngine struct {
client *http.Client
}
// validWikipediaLangs contains the set of valid Wikipedia language codes.
// This prevents SSRF attacks where an attacker could use a malicious language
// value to redirect requests to an attacker-controlled domain.
var validWikipediaLangs = map[string]struct{}{
"aa": {}, "ab": {}, "ae": {}, "af": {}, "ak": {}, "am": {}, "an": {},
"ar": {}, "arc": {}, "as": {}, "ast": {}, "at": {}, "av": {}, "ay": {},
"az": {}, "ba": {}, "be": {}, "bg": {}, "bh": {}, "bi": {}, "bm": {},
"bn": {}, "bo": {}, "br": {}, "bs": {}, "ca": {}, "ce": {}, "ch": {},
"co": {}, "cr": {}, "cs": {}, "cu": {}, "cv": {}, "cy": {}, "da": {},
"de": {}, "di": {}, "dv": {}, "dz": {}, "ee": {}, "el": {}, "en": {},
"eo": {}, "es": {}, "et": {}, "eu": {}, "fa": {}, "ff": {}, "fi": {},
"fj": {}, "fo": {}, "fr": {}, "fy": {}, "ga": {}, "gd": {}, "gl": {},
"gn": {}, "gu": {}, "gv": {}, "ha": {}, "he": {}, "hi": {}, "ho": {},
"hr": {}, "ht": {}, "hu": {}, "hy": {}, "hz": {}, "ia": {}, "id": {},
"ie": {}, "ig": {}, "ii": {}, "ik": {}, "io": {}, "is": {}, "it": {},
"iu": {}, "ja": {}, "jv": {}, "ka": {}, "kg": {}, "ki": {}, "kj": {},
"kk": {}, "kl": {}, "km": {}, "kn": {}, "ko": {}, "kr": {}, "ks": {},
"ku": {}, "kv": {}, "kw": {}, "ky": {}, "la": {}, "lb": {}, "lg": {},
"li": {}, "lij": {}, "ln": {}, "lo": {}, "lt": {}, "lv": {}, "mg": {},
"mh": {}, "mi": {}, "mk": {}, "ml": {}, "mn": {}, "mo": {}, "mr": {},
"ms": {}, "mt": {}, "mus": {}, "my": {}, "na": {}, "nah": {}, "nap": {},
"nd": {}, "nds": {}, "ne": {}, "new": {}, "ng": {}, "nl": {}, "nn": {},
"no": {}, "nov": {}, "nrm": {}, "nv": {}, "ny": {}, "oc": {}, "oj": {},
"om": {}, "or": {}, "os": {}, "pa": {}, "pag": {}, "pam": {}, "pap": {},
"pdc": {}, "pl": {}, "pms": {}, "pn": {}, "ps": {}, "pt": {}, "qu": {},
"rm": {}, "rmy": {}, "rn": {}, "ro": {}, "roa-rup": {}, "ru": {},
"rw": {}, "sa": {}, "sah": {}, "sc": {}, "scn": {}, "sco": {}, "sd": {},
"se": {}, "sg": {}, "sh": {}, "si": {}, "simple": {}, "sk": {}, "sl": {},
"sm": {}, "sn": {}, "so": {}, "sq": {}, "sr": {}, "ss": {}, "st": {},
"su": {}, "sv": {}, "sw": {}, "szl": {}, "ta": {}, "te": {}, "tg": {},
"th": {}, "ti": {}, "tk": {}, "tl": {}, "tn": {}, "to": {}, "tpi": {},
"tr": {}, "ts": {}, "tt": {}, "tum": {}, "tw": {}, "ty": {}, "udm": {},
"ug": {}, "uk": {}, "ur": {}, "uz": {}, "ve": {}, "vec": {}, "vi": {},
"vls": {}, "vo": {}, "wa": {}, "wo": {}, "xal": {}, "xh": {}, "yi": {},
"yo": {}, "za": {}, "zea": {}, "zh": {}, "zh-classical": {},
"zh-min-nan": {}, "zh-yue": {}, "zu": {},
}
func (e *WikipediaEngine) Name() string { return "wikipedia" }
func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
@ -50,6 +88,11 @@ func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchReques
// Wikipedia subdomains are based on the language code; keep it simple for MVP.
lang = strings.SplitN(lang, "-", 2)[0]
lang = strings.ReplaceAll(lang, "_", "-")
// Validate lang against whitelist to prevent SSRF attacks where an attacker
// could use a malicious language value to redirect requests to their server.
if _, ok := validWikipediaLangs[lang]; !ok {
lang = "en"
}
wikiNetloc := fmt.Sprintf("%s.wikipedia.org", lang)
endpoint := fmt.Sprintf(