security: fix build errors, add honest Google UA, sanitize error msgs
- Fix config validation: upstream URLs allow private IPs (self-hosted) - Fix util.SafeURLScheme to return parsed URL - Replace spoofed GSA User-Agent with honest Kafka UA - Sanitize all engine error messages (strip response bodies) - Replace unused body reads with io.Copy(io.Discard, ...) for reuse - Fix pre-existing braveapi_test using wrong struct type - Fix ratelimit test reference to limiter variable - Update ratelimit tests for new trusted proxy behavior
This commit is contained in:
parent
da367a1bfd
commit
b3e3123612
17 changed files with 32 additions and 38 deletions
|
|
@ -18,7 +18,6 @@ package config
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
@ -144,10 +143,11 @@ func validateConfig(cfg *Config) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if cfg.Upstream.URL != "" {
|
if cfg.Upstream.URL != "" {
|
||||||
if err := util.ValidatePublicURL(cfg.Upstream.URL); err != nil {
|
// Validate scheme and well-formedness, but allow private IPs
|
||||||
|
// since self-hosted deployments commonly use localhost/internal addresses.
|
||||||
|
if _, err := util.SafeURLScheme(cfg.Upstream.URL); err != nil {
|
||||||
return fmt.Errorf("upstream.url: %w", err)
|
return fmt.Errorf("upstream.url: %w", err)
|
||||||
}
|
}
|
||||||
log.Printf("WARNING: upstream.url SSRF protection is enabled; ensure the upstream host is not on a private network")
|
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -75,7 +75,7 @@ func (e *ArxivEngine) Search(ctx context.Context, req contracts.SearchRequest) (
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024))
|
||||||
return contracts.SearchResponse{}, fmt.Errorf("arxiv upstream error: status %d", resp.StatusCode)
|
return contracts.SearchResponse{}, fmt.Errorf("arxiv upstream error: status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -68,7 +68,7 @@ func (e *BingEngine) Search(ctx context.Context, req contracts.SearchRequest) (c
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
|
||||||
return contracts.SearchResponse{}, fmt.Errorf("bing upstream error: status %d", resp.StatusCode)
|
return contracts.SearchResponse{}, fmt.Errorf("bing upstream error: status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) (
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
|
||||||
return contracts.SearchResponse{}, fmt.Errorf("brave error: status %d", resp.StatusCode)
|
return contracts.SearchResponse{}, fmt.Errorf("brave error: status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -127,7 +127,7 @@ func (e *BraveAPIEngine) Search(ctx context.Context, req contracts.SearchRequest
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024))
|
||||||
return contracts.SearchResponse{}, fmt.Errorf("brave upstream error: status %d", resp.StatusCode)
|
return contracts.SearchResponse{}, fmt.Errorf("brave upstream error: status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ func TestBraveEngine_GatingAndHeader(t *testing.T) {
|
||||||
})
|
})
|
||||||
|
|
||||||
client := &http.Client{Transport: transport}
|
client := &http.Client{Transport: transport}
|
||||||
engine := &BraveEngine{
|
engine := &BraveAPIEngine{
|
||||||
client: client,
|
client: client,
|
||||||
apiKey: wantAPIKey,
|
apiKey: wantAPIKey,
|
||||||
accessGateToken: wantToken,
|
accessGateToken: wantToken,
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,7 @@ func (e *CrossrefEngine) Search(ctx context.Context, req contracts.SearchRequest
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024))
|
||||||
return contracts.SearchResponse{}, fmt.Errorf("crossref upstream error: status %d", resp.StatusCode)
|
return contracts.SearchResponse{}, fmt.Errorf("crossref upstream error: status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,7 @@ func (e *DuckDuckGoEngine) Search(ctx context.Context, req contracts.SearchReque
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
|
||||||
return contracts.SearchResponse{}, fmt.Errorf("duckduckgo upstream error: status %d", resp.StatusCode)
|
return contracts.SearchResponse{}, fmt.Errorf("duckduckgo upstream error: status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,7 @@ func (e *GitHubEngine) Search(ctx context.Context, req contracts.SearchRequest)
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
|
||||||
return contracts.SearchResponse{}, fmt.Errorf("github api error: status %d", resp.StatusCode)
|
return contracts.SearchResponse{}, fmt.Errorf("github api error: status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,20 +28,10 @@ import (
|
||||||
"github.com/metamorphosis-dev/kafka/internal/contracts"
|
"github.com/metamorphosis-dev/kafka/internal/contracts"
|
||||||
)
|
)
|
||||||
|
|
||||||
// GSA User-Agent pool — these are Google Search Appliance identifiers
|
// googleUserAgent is an honest User-Agent identifying the metasearch engine.
|
||||||
// that Google trusts for enterprise search appliance traffic.
|
// Using a spoofed GSA User-Agent violates Google's Terms of Service and
|
||||||
var gsaUserAgents = []string{
|
// risks permanent IP blocking.
|
||||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/399.2.845414227 Mobile/15E148 Safari/604.1",
|
var googleUserAgent = "Kafka/0.1 (compatible; +https://github.com/metamorphosis-dev/kafka)"
|
||||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1",
|
|
||||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1",
|
|
||||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 18_0_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1",
|
|
||||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 18_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/399.2.845414227 Mobile/15E148 Safari/604.1",
|
|
||||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 18_5_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1",
|
|
||||||
}
|
|
||||||
|
|
||||||
func gsaUA() string {
|
|
||||||
return gsaUserAgents[0] // deterministic for now; could rotate
|
|
||||||
}
|
|
||||||
|
|
||||||
type GoogleEngine struct {
|
type GoogleEngine struct {
|
||||||
client *http.Client
|
client *http.Client
|
||||||
|
|
@ -70,7 +60,7 @@ func (e *GoogleEngine) Search(ctx context.Context, req contracts.SearchRequest)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return contracts.SearchResponse{}, err
|
return contracts.SearchResponse{}, err
|
||||||
}
|
}
|
||||||
httpReq.Header.Set("User-Agent", gsaUA())
|
httpReq.Header.Set("User-Agent", googleUserAgent)
|
||||||
httpReq.Header.Set("Accept", "*/*")
|
httpReq.Header.Set("Accept", "*/*")
|
||||||
httpReq.AddCookie(&http.Cookie{Name: "CONSENT", Value: "YES+"})
|
httpReq.AddCookie(&http.Cookie{Name: "CONSENT", Value: "YES+"})
|
||||||
|
|
||||||
|
|
@ -95,7 +85,7 @@ func (e *GoogleEngine) Search(ctx context.Context, req contracts.SearchRequest)
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
|
||||||
return contracts.SearchResponse{}, fmt.Errorf("google error: status %d", resp.StatusCode)
|
return contracts.SearchResponse{}, fmt.Errorf("google error: status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -124,7 +124,7 @@ func (e *QwantEngine) searchWebAPI(ctx context.Context, req contracts.SearchRequ
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024))
|
||||||
return contracts.SearchResponse{}, fmt.Errorf("qwant upstream error: status %d", resp.StatusCode)
|
return contracts.SearchResponse{}, fmt.Errorf("qwant upstream error: status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -253,7 +253,7 @@ func (e *QwantEngine) searchWebLite(ctx context.Context, req contracts.SearchReq
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024))
|
||||||
return contracts.SearchResponse{}, fmt.Errorf("qwant lite upstream error: status %d", resp.StatusCode)
|
return contracts.SearchResponse{}, fmt.Errorf("qwant lite upstream error: status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -62,7 +62,7 @@ func (e *RedditEngine) Search(ctx context.Context, req contracts.SearchRequest)
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
|
||||||
return contracts.SearchResponse{}, fmt.Errorf("reddit api error: status %d", resp.StatusCode)
|
return contracts.SearchResponse{}, fmt.Errorf("reddit api error: status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -134,7 +134,7 @@ func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchReques
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024))
|
||||||
return contracts.SearchResponse{}, fmt.Errorf("wikipedia upstream error: status %d", resp.StatusCode)
|
return contracts.SearchResponse{}, fmt.Errorf("wikipedia upstream error: status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -77,7 +77,7 @@ func (e *YouTubeEngine) Search(ctx context.Context, req contracts.SearchRequest)
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
|
||||||
return contracts.SearchResponse{}, fmt.Errorf("youtube api error: status %d", resp.StatusCode)
|
return contracts.SearchResponse{}, fmt.Errorf("youtube api error: status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,7 @@ func RateLimit(cfg RateLimitConfig, logger *slog.Logger) func(http.Handler) http
|
||||||
|
|
||||||
return func(next http.Handler) http.Handler {
|
return func(next http.Handler) http.Handler {
|
||||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
ip := l.extractIP(r)
|
ip := limiter.extractIP(r)
|
||||||
|
|
||||||
if !limiter.allow(ip) {
|
if !limiter.allow(ip) {
|
||||||
retryAfter := int(limiter.window.Seconds())
|
retryAfter := int(limiter.window.Seconds())
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package middleware
|
package middleware
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
@ -92,7 +93,6 @@ func TestRateLimit_DifferentIPs(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRateLimit_XForwardedFor(t *testing.T) {
|
func TestRateLimit_XForwardedFor(t *testing.T) {
|
||||||
privateNet := mustParseCIDR("10.0.0.0/8")
|
|
||||||
h := RateLimit(RateLimitConfig{
|
h := RateLimit(RateLimitConfig{
|
||||||
Requests: 1,
|
Requests: 1,
|
||||||
Window: 10 * time.Second,
|
Window: 10 * time.Second,
|
||||||
|
|
|
||||||
|
|
@ -14,13 +14,17 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
// SafeURLScheme returns true if the URL uses an acceptable scheme (http or https).
|
// SafeURLScheme validates that a URL is well-formed and uses an acceptable scheme.
|
||||||
func SafeURLScheme(raw string) bool {
|
// Returns the parsed URL on success, or an error.
|
||||||
|
func SafeURLScheme(raw string) (*url.URL, error) {
|
||||||
u, err := url.Parse(raw)
|
u, err := url.Parse(raw)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
return nil, err
|
||||||
}
|
}
|
||||||
return u.Scheme == "http" || u.Scheme == "https"
|
if u.Scheme != "http" && u.Scheme != "https" {
|
||||||
|
return nil, fmt.Errorf("URL must use http or https, got %q", u.Scheme)
|
||||||
|
}
|
||||||
|
return u, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsPrivateIP returns true if the IP address is in a private, loopback,
|
// IsPrivateIP returns true if the IP address is in a private, loopback,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue