diff --git a/cmd/searxng-go/main.go b/cmd/searxng-go/main.go index fb50d22..386014d 100644 --- a/cmd/searxng-go/main.go +++ b/cmd/searxng-go/main.go @@ -11,6 +11,7 @@ import ( "github.com/ashie/gosearch/internal/cache" "github.com/ashie/gosearch/internal/config" "github.com/ashie/gosearch/internal/httpapi" + "github.com/ashie/gosearch/internal/middleware" "github.com/ashie/gosearch/internal/search" ) @@ -61,7 +62,26 @@ func main() { mux.HandleFunc("/healthz", h.Healthz) mux.HandleFunc("/search", h.Search) + // Apply middleware: rate limiter → CORS → handler. + var handler http.Handler = mux + handler = middleware.CORS(middleware.CORSConfig{ + AllowedOrigins: cfg.CORS.AllowedOrigins, + AllowedMethods: cfg.CORS.AllowedMethods, + AllowedHeaders: cfg.CORS.AllowedHeaders, + ExposedHeaders: cfg.CORS.ExposedHeaders, + MaxAge: cfg.CORS.MaxAge, + })(handler) + handler = middleware.RateLimit(middleware.RateLimitConfig{ + Requests: cfg.RateLimit.Requests, + Window: cfg.RateLimitWindow(), + CleanupInterval: cfg.RateLimitCleanupInterval(), + }, logger)(handler) + addr := fmt.Sprintf(":%d", cfg.Server.Port) - logger.Info("searxng-go starting", "addr", addr, "cache", searchCache.Enabled()) - log.Fatal(http.ListenAndServe(addr, mux)) + logger.Info("searxng-go starting", + "addr", addr, + "cache", searchCache.Enabled(), + "rate_limit", cfg.RateLimit.Requests > 0, + ) + log.Fatal(http.ListenAndServe(addr, handler)) } diff --git a/config.example.toml b/config.example.toml index 143ee32..4a5ebe9 100644 --- a/config.example.toml +++ b/config.example.toml @@ -41,3 +41,23 @@ password = "" db = 0 # Cache TTL for search results (env: VALKEY_CACHE_TTL) default_ttl = "5m" + +[cors] +# CORS configuration for browser-based clients. +# Allowed origins: use "*" for all, or specific domains (env: CORS_ALLOWED_ORIGINS) +allowed_origins = ["*"] +# Allowed methods (default: GET, POST, OPTIONS) +# allowed_methods = ["GET", "POST", "OPTIONS"] +# Allowed headers (default: Content-Type, Authorization, X-Search-Token, X-Brave-Access-Token) +# allowed_headers = ["Content-Type", "Authorization"] +# Preflight cache duration in seconds (default: 3600) +# max_age = 3600 + +[rate_limit] +# Per-IP rate limiting. Set requests to 0 to disable. +# Env: RATE_LIMIT_REQUESTS +requests = 30 +# Time window for rate limit (env: RATE_LIMIT_WINDOW) +window = "1m" +# How often to clean up stale IP entries (env: RATE_LIMIT_CLEANUP_INTERVAL) +cleanup_interval = "5m" diff --git a/internal/config/config.go b/internal/config/config.go index 6e4f7cc..21db2d9 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -11,10 +11,12 @@ import ( // Config is the top-level configuration for the gosearch service. type Config struct { - Server ServerConfig `toml:"server"` - Upstream UpstreamConfig `toml:"upstream"` - Engines EnginesConfig `toml:"engines"` - Cache CacheConfig `toml:"cache"` + Server ServerConfig `toml:"server"` + Upstream UpstreamConfig `toml:"upstream"` + Engines EnginesConfig `toml:"engines"` + Cache CacheConfig `toml:"cache"` + CORS CORSConfig `toml:"cors"` + RateLimit RateLimitConfig `toml:"rate_limit"` } type ServerConfig struct { @@ -40,6 +42,22 @@ type CacheConfig struct { DefaultTTL string `toml:"default_ttl"` // Cache TTL (e.g. "5m", default "5m") } +// CORSConfig holds CORS middleware settings. +type CORSConfig struct { + AllowedOrigins []string `toml:"allowed_origins"` + AllowedMethods []string `toml:"allowed_methods"` + AllowedHeaders []string `toml:"allowed_headers"` + ExposedHeaders []string `toml:"exposed_headers"` + MaxAge int `toml:"max_age"` +} + +// RateLimitConfig holds per-IP rate limiting settings. +type RateLimitConfig struct { + Requests int `toml:"requests"` // Max requests per window (default: 30) + Window string `toml:"window"` // Time window (e.g. "1m", default: "1m") + CleanupInterval string `toml:"cleanup_interval"` // Stale entry cleanup interval (default: "5m") +} + type BraveConfig struct { APIKey string `toml:"api_key"` AccessToken string `toml:"access_token"` @@ -84,6 +102,10 @@ func defaultConfig() *Config { DB: 0, DefaultTTL: "5m", }, + RateLimit: RateLimitConfig{ + Window: "1m", + CleanupInterval: "5m", + }, } } @@ -124,6 +146,18 @@ func applyEnvOverrides(cfg *Config) { if v := os.Getenv("VALKEY_CACHE_TTL"); v != "" { cfg.Cache.DefaultTTL = v } + if v := os.Getenv("CORS_ALLOWED_ORIGINS"); v != "" { + cfg.CORS.AllowedOrigins = splitCSV(v) + } + if v := os.Getenv("RATE_LIMIT_REQUESTS"); v != "" { + fmt.Sscanf(v, "%d", &cfg.RateLimit.Requests) + } + if v := os.Getenv("RATE_LIMIT_WINDOW"); v != "" { + cfg.RateLimit.Window = v + } + if v := os.Getenv("RATE_LIMIT_CLEANUP_INTERVAL"); v != "" { + cfg.RateLimit.CleanupInterval = v + } } // HTTPTimeout parses the configured timeout string into a time.Duration. @@ -147,6 +181,22 @@ func (c *Config) CacheTTL() time.Duration { return 5 * time.Minute } +// RateLimitWindow parses the rate limit window into a time.Duration. +func (c *Config) RateLimitWindow() time.Duration { + if d, err := time.ParseDuration(c.RateLimit.Window); err == nil && d > 0 { + return d + } + return time.Minute +} + +// RateLimitCleanupInterval parses the cleanup interval into a time.Duration. +func (c *Config) RateLimitCleanupInterval() time.Duration { + if d, err := time.ParseDuration(c.RateLimit.CleanupInterval); err == nil && d > 0 { + return d + } + return 5 * time.Minute +} + func splitCSV(s string) []string { if s == "" { return nil diff --git a/internal/middleware/cors.go b/internal/middleware/cors.go new file mode 100644 index 0000000..7b4000e --- /dev/null +++ b/internal/middleware/cors.go @@ -0,0 +1,88 @@ +package middleware + +import ( + "net/http" + "strconv" + "strings" +) + +// CORSConfig controls Cross-Origin Resource Sharing headers. +type CORSConfig struct { + // AllowedOrigins is a list of allowed origin patterns. + // Use "*" to allow all origins, or specific domains like "https://example.com". + AllowedOrigins []string + // AllowedMethods defaults to GET, POST, OPTIONS if empty. + AllowedMethods []string + // AllowedHeaders defaults to Content-Type, Authorization if empty. + AllowedHeaders []string + // ExposedHeaders lists headers the browser can access from the response. + ExposedHeaders []string + // MaxAge is the preflight cache duration in seconds (default: 3600). + MaxAge int +} + +// CORS returns a middleware that sets CORS headers on all responses +// and handles OPTIONS preflight requests. +func CORS(cfg CORSConfig) func(http.Handler) http.Handler { + origins := cfg.AllowedOrigins + if len(origins) == 0 { + origins = []string{"*"} + } + + methods := cfg.AllowedMethods + if len(methods) == 0 { + methods = []string{"GET", "POST", "OPTIONS"} + } + + headers := cfg.AllowedHeaders + if len(headers) == 0 { + headers = []string{"Content-Type", "Authorization", "X-Search-Token", "X-Brave-Access-Token"} + } + + maxAge := cfg.MaxAge + if maxAge <= 0 { + maxAge = 3600 + } + + methodsStr := strings.Join(methods, ", ") + headersStr := strings.Join(headers, ", ") + exposedStr := strings.Join(cfg.ExposedHeaders, ", ") + maxAgeStr := strconv.Itoa(maxAge) + + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + origin := r.Header.Get("Origin") + + // Determine the allowed origin for this request. + allowedOrigin := "" + for _, o := range origins { + if o == "*" { + allowedOrigin = "*" + break + } + if o == origin { + allowedOrigin = origin + break + } + } + + if allowedOrigin != "" { + w.Header().Set("Access-Control-Allow-Origin", allowedOrigin) + w.Header().Set("Access-Control-Allow-Methods", methodsStr) + w.Header().Set("Access-Control-Allow-Headers", headersStr) + if exposedStr != "" { + w.Header().Set("Access-Control-Expose-Headers", exposedStr) + } + w.Header().Set("Access-Control-Max-Age", maxAgeStr) + } + + // Handle preflight. + if r.Method == http.MethodOptions { + w.WriteHeader(http.StatusNoContent) + return + } + + next.ServeHTTP(w, r) + }) + } +} diff --git a/internal/middleware/cors_test.go b/internal/middleware/cors_test.go new file mode 100644 index 0000000..4f3f6c2 --- /dev/null +++ b/internal/middleware/cors_test.go @@ -0,0 +1,115 @@ +package middleware + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +func TestCORS_WildcardOrigin(t *testing.T) { + h := CORS(CORSConfig{AllowedOrigins: []string{"*"}})(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + + req := httptest.NewRequest("GET", "/search?q=test", nil) + req.Header.Set("Origin", "https://evil.com") + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Errorf("expected 200, got %d", rec.Code) + } + if rec.Header().Get("Access-Control-Allow-Origin") != "*" { + t.Errorf("expected wildcard origin, got %s", rec.Header().Get("Access-Control-Allow-Origin")) + } +} + +func TestCORS_SpecificOrigin(t *testing.T) { + h := CORS(CORSConfig{AllowedOrigins: []string{"https://example.com"}})(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + + // Allowed origin. + req := httptest.NewRequest("GET", "/search?q=test", nil) + req.Header.Set("Origin", "https://example.com") + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + + if rec.Header().Get("Access-Control-Allow-Origin") != "https://example.com" { + t.Errorf("expected https://example.com, got %s", rec.Header().Get("Access-Control-Allow-Origin")) + } + + // Disallowed origin — header should not be set. + req2 := httptest.NewRequest("GET", "/search?q=test", nil) + req2.Header.Set("Origin", "https://evil.com") + rec2 := httptest.NewRecorder() + h.ServeHTTP(rec2, req2) + + if rec2.Header().Get("Access-Control-Allow-Origin") != "" { + t.Errorf("expected no CORS header for disallowed origin, got %s", rec2.Header().Get("Access-Control-Allow-Origin")) + } +} + +func TestCORS_Preflight(t *testing.T) { + h := CORS(CORSConfig{})(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + t.Error("handler should not be called for preflight") + })) + + req := httptest.NewRequest("OPTIONS", "/search", nil) + req.Header.Set("Origin", "https://example.com") + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + + if rec.Code != http.StatusNoContent { + t.Errorf("expected 204 for preflight, got %d", rec.Code) + } + if rec.Header().Get("Access-Control-Allow-Methods") == "" { + t.Error("expected Access-Control-Allow-Methods header") + } + if rec.Header().Get("Access-Control-Max-Age") != "3600" { + t.Errorf("expected Max-Age 3600, got %s", rec.Header().Get("Access-Control-Max-Age")) + } +} + +func TestCORS_NoOriginHeader(t *testing.T) { + called := false + h := CORS(CORSConfig{AllowedOrigins: []string{"https://example.com"}})(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + called = true + w.WriteHeader(http.StatusOK) + })) + + // No Origin header — should pass through without CORS headers. + req := httptest.NewRequest("GET", "/search?q=test", nil) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + + if !called { + t.Error("handler should be called") + } + if rec.Header().Get("Access-Control-Allow-Origin") != "" { + t.Errorf("expected no CORS header without Origin, got %s", rec.Header().Get("Access-Control-Allow-Origin")) + } +} + +func TestCORS_CustomMethodsAndHeaders(t *testing.T) { + h := CORS(CORSConfig{ + AllowedOrigins: []string{"*"}, + AllowedMethods: []string{"GET"}, + AllowedHeaders: []string{"X-Custom"}, + MaxAge: 7200, + })(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {})) + + req := httptest.NewRequest("OPTIONS", "/search", nil) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + + if rec.Header().Get("Access-Control-Allow-Methods") != "GET" { + t.Errorf("expected 'GET', got %s", rec.Header().Get("Access-Control-Allow-Methods")) + } + if rec.Header().Get("Access-Control-Allow-Headers") != "X-Custom" { + t.Errorf("expected 'X-Custom', got %s", rec.Header().Get("Access-Control-Allow-Headers")) + } + if rec.Header().Get("Access-Control-Max-Age") != "7200" { + t.Errorf("expected '7200', got %s", rec.Header().Get("Access-Control-Max-Age")) + } +} diff --git a/internal/middleware/ratelimit.go b/internal/middleware/ratelimit.go new file mode 100644 index 0000000..f1a181d --- /dev/null +++ b/internal/middleware/ratelimit.go @@ -0,0 +1,142 @@ +package middleware + +import ( + "net" + "net/http" + "strconv" + "strings" + "sync" + "time" + + "log/slog" +) + +// RateLimitConfig controls per-IP rate limiting using a sliding window counter. +type RateLimitConfig struct { + // Requests is the max number of requests allowed per window. + Requests int + // Window is the time window duration (e.g. "1m"). + Window time.Duration + // CleanupInterval is how often stale entries are purged (default: 5m). + CleanupInterval time.Duration +} + +// RateLimit returns a middleware that limits requests per IP address. +// Uses an in-memory sliding window counter. When the limit is exceeded, +// responds with HTTP 429 and a Retry-After header. +func RateLimit(cfg RateLimitConfig, logger *slog.Logger) func(http.Handler) http.Handler { + requests := cfg.Requests + if requests <= 0 { + requests = 30 + } + + window := cfg.Window + if window <= 0 { + window = time.Minute + } + + cleanup := cfg.CleanupInterval + if cleanup <= 0 { + cleanup = 5 * time.Minute + } + + if logger == nil { + logger = slog.Default() + } + + limiter := &ipLimiter{ + requests: requests, + window: window, + clients: make(map[string]*bucket), + logger: logger, + } + + // Background cleanup of stale buckets. + go limiter.cleanup(cleanup) + + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + ip := extractIP(r) + + if !limiter.allow(ip) { + retryAfter := int(limiter.window.Seconds()) + w.Header().Set("Retry-After", strconv.Itoa(retryAfter)) + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + w.WriteHeader(http.StatusTooManyRequests) + _, _ = w.Write([]byte("429 Too Many Requests\n")) + logger.Debug("rate limited", "ip", ip) + return + } + + next.ServeHTTP(w, r) + }) + } +} + +type bucket struct { + count int + expireAt time.Time +} + +type ipLimiter struct { + requests int + window time.Duration + clients map[string]*bucket + mu sync.Mutex + logger *slog.Logger +} + +func (l *ipLimiter) allow(ip string) bool { + l.mu.Lock() + defer l.mu.Unlock() + + now := time.Now() + b, ok := l.clients[ip] + + if !ok || now.After(b.expireAt) { + l.clients[ip] = &bucket{ + count: 1, + expireAt: now.Add(l.window), + } + return true + } + + b.count++ + return b.count <= l.requests +} + +func (l *ipLimiter) cleanup(interval time.Duration) { + ticker := time.NewTicker(interval) + defer ticker.Stop() + + for range ticker.C { + l.mu.Lock() + now := time.Now() + for ip, b := range l.clients { + if now.After(b.expireAt) { + delete(l.clients, ip) + } + } + l.mu.Unlock() + } +} + +func extractIP(r *http.Request) string { + // Trust X-Forwarded-For / X-Real-IP if behind a proxy. + if xff := r.Header.Get("X-Forwarded-For"); xff != "" { + // First IP in the chain is the client. + if idx := len(xff); idx > 0 { + parts := strings.SplitN(xff, ",", 2) + return strings.TrimSpace(parts[0]) + } + } + if rip := r.Header.Get("X-Real-IP"); rip != "" { + return strings.TrimSpace(rip) + } + + host, _, err := net.SplitHostPort(r.RemoteAddr) + if err != nil { + return r.RemoteAddr + } + return host +} diff --git a/internal/middleware/ratelimit_test.go b/internal/middleware/ratelimit_test.go new file mode 100644 index 0000000..987d014 --- /dev/null +++ b/internal/middleware/ratelimit_test.go @@ -0,0 +1,175 @@ +package middleware + +import ( + "net/http" + "net/http/httptest" + "testing" + "time" +) + +func TestRateLimit_AllowsUnderLimit(t *testing.T) { + h := RateLimit(RateLimitConfig{ + Requests: 5, + Window: 10 * time.Second, + }, nil)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + + for i := 0; i < 5; i++ { + req := httptest.NewRequest("GET", "/search?q=test", nil) + req.RemoteAddr = "1.2.3.4:1234" + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("request %d: expected 200, got %d", i+1, rec.Code) + } + } +} + +func TestRateLimit_BlocksOverLimit(t *testing.T) { + h := RateLimit(RateLimitConfig{ + Requests: 3, + Window: 10 * time.Second, + }, nil)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + + for i := 0; i < 3; i++ { + req := httptest.NewRequest("GET", "/search?q=test", nil) + req.RemoteAddr = "1.2.3.4:1234" + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("request %d: expected 200, got %d", i+1, rec.Code) + } + } + + // 4th request should be blocked. + req := httptest.NewRequest("GET", "/search?q=test", nil) + req.RemoteAddr = "1.2.3.4:1234" + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + + if rec.Code != http.StatusTooManyRequests { + t.Errorf("expected 429, got %d", rec.Code) + } +} + +func TestRateLimit_DifferentIPs(t *testing.T) { + h := RateLimit(RateLimitConfig{ + Requests: 1, + Window: 10 * time.Second, + }, nil)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + + // IP A: allowed + req := httptest.NewRequest("GET", "/search", nil) + req.RemoteAddr = "1.1.1.1:1234" + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Errorf("IP A first request: expected 200, got %d", rec.Code) + } + + // IP A: blocked + req = httptest.NewRequest("GET", "/search", nil) + req.RemoteAddr = "1.1.1.1:1234" + rec = httptest.NewRecorder() + h.ServeHTTP(rec, req) + if rec.Code != http.StatusTooManyRequests { + t.Errorf("IP A second request: expected 429, got %d", rec.Code) + } + + // IP B: allowed (separate bucket) + req = httptest.NewRequest("GET", "/search", nil) + req.RemoteAddr = "2.2.2.2:1234" + rec = httptest.NewRecorder() + h.ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Errorf("IP B first request: expected 200, got %d", rec.Code) + } +} + +func TestRateLimit_XForwardedFor(t *testing.T) { + h := RateLimit(RateLimitConfig{ + Requests: 1, + Window: 10 * time.Second, + }, nil)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + + // Request via proxy — should use X-Forwarded-For. + req := httptest.NewRequest("GET", "/search", nil) + req.RemoteAddr = "10.0.0.1:1234" + req.Header.Set("X-Forwarded-For", "203.0.113.50, 10.0.0.1") + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Errorf("first XFF request: expected 200, got %d", rec.Code) + } + + // Different proxy, same client IP — should be blocked. + req = httptest.NewRequest("GET", "/search", nil) + req.RemoteAddr = "10.0.0.2:1234" + req.Header.Set("X-Forwarded-For", "203.0.113.50, 10.0.0.2") + rec = httptest.NewRecorder() + h.ServeHTTP(rec, req) + if rec.Code != http.StatusTooManyRequests { + t.Errorf("same XFF client: expected 429, got %d", rec.Code) + } +} + +func TestRateLimit_WindowExpires(t *testing.T) { + limiter := &ipLimiter{ + requests: 1, + window: 50 * time.Millisecond, + clients: make(map[string]*bucket), + } + + if !limiter.allow("1.1.1.1") { + t.Error("first request should be allowed") + } + if limiter.allow("1.1.1.1") { + t.Error("second request should be blocked") + } + + // Wait for window to expire. + time.Sleep(60 * time.Millisecond) + + if !limiter.allow("1.1.1.1") { + t.Error("request after window expiry should be allowed") + } +} + +func TestExtractIP(t *testing.T) { + tests := []struct { + name string + xff string + realIP string + remote string + expected string + }{ + {"xff", "203.0.113.50, 10.0.0.1", "", "10.0.0.1:1234", "203.0.113.50"}, + {"real_ip", "", "203.0.113.50", "10.0.0.1:1234", "203.0.113.50"}, + {"remote", "", "", "1.2.3.4:5678", "1.2.3.4"}, + {"xff_over_real", "203.0.113.50", "10.0.0.1", "10.0.0.1:1234", "203.0.113.50"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + req := httptest.NewRequest("GET", "/", nil) + if tt.xff != "" { + req.Header.Set("X-Forwarded-For", tt.xff) + } + if tt.realIP != "" { + req.Header.Set("X-Real-IP", tt.realIP) + } + req.RemoteAddr = tt.remote + + if got := extractIP(req); got != tt.expected { + t.Errorf("extractIP() = %q, want %q", got, tt.expected) + } + }) + } +} diff --git a/searxng-go b/searxng-go deleted file mode 100755 index 4f75db2..0000000 Binary files a/searxng-go and /dev/null differ