security: fix build errors, add honest Google UA, sanitize error msgs
- Fix config validation: upstream URLs allow private IPs (self-hosted) - Fix util.SafeURLScheme to return parsed URL - Replace spoofed GSA User-Agent with honest Kafka UA - Sanitize all engine error messages (strip response bodies) - Replace unused body reads with io.Copy(io.Discard, ...) for reuse - Fix pre-existing braveapi_test using wrong struct type - Fix ratelimit test reference to limiter variable - Update ratelimit tests for new trusted proxy behavior
This commit is contained in:
parent
da367a1bfd
commit
b3e3123612
17 changed files with 32 additions and 38 deletions
|
|
@ -28,20 +28,10 @@ import (
|
|||
"github.com/metamorphosis-dev/kafka/internal/contracts"
|
||||
)
|
||||
|
||||
// GSA User-Agent pool — these are Google Search Appliance identifiers
|
||||
// that Google trusts for enterprise search appliance traffic.
|
||||
var gsaUserAgents = []string{
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/399.2.845414227 Mobile/15E148 Safari/604.1",
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1",
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1",
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 18_0_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1",
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 18_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/399.2.845414227 Mobile/15E148 Safari/604.1",
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 18_5_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1",
|
||||
}
|
||||
|
||||
func gsaUA() string {
|
||||
return gsaUserAgents[0] // deterministic for now; could rotate
|
||||
}
|
||||
// googleUserAgent is an honest User-Agent identifying the metasearch engine.
|
||||
// Using a spoofed GSA User-Agent violates Google's Terms of Service and
|
||||
// risks permanent IP blocking.
|
||||
var googleUserAgent = "Kafka/0.1 (compatible; +https://github.com/metamorphosis-dev/kafka)"
|
||||
|
||||
type GoogleEngine struct {
|
||||
client *http.Client
|
||||
|
|
@ -70,7 +60,7 @@ func (e *GoogleEngine) Search(ctx context.Context, req contracts.SearchRequest)
|
|||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
httpReq.Header.Set("User-Agent", gsaUA())
|
||||
httpReq.Header.Set("User-Agent", googleUserAgent)
|
||||
httpReq.Header.Set("Accept", "*/*")
|
||||
httpReq.AddCookie(&http.Cookie{Name: "CONSENT", Value: "YES+"})
|
||||
|
||||
|
|
@ -95,7 +85,7 @@ func (e *GoogleEngine) Search(ctx context.Context, req contracts.SearchRequest)
|
|||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
||||
io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
|
||||
return contracts.SearchResponse{}, fmt.Errorf("google error: status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue