diff --git a/cmd/searxng-go/main.go b/cmd/searxng-go/main.go index bc82387..fb50d22 100644 --- a/cmd/searxng-go/main.go +++ b/cmd/searxng-go/main.go @@ -4,9 +4,11 @@ import ( "flag" "fmt" "log" + "log/slog" "net/http" "os" + "github.com/ashie/gosearch/internal/cache" "github.com/ashie/gosearch/internal/config" "github.com/ashie/gosearch/internal/httpapi" "github.com/ashie/gosearch/internal/search" @@ -16,11 +18,24 @@ func main() { configPath := flag.String("config", "config.toml", "path to config.toml") flag.Parse() + // Initialize structured logging. + logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelInfo})) + slog.SetDefault(logger) + cfg, err := config.Load(*configPath) if err != nil { log.Fatalf("failed to load config: %v", err) } + // Initialize Valkey cache. + searchCache := cache.New(cache.Config{ + Address: cfg.Cache.Address, + Password: cfg.Cache.Password, + DB: cfg.Cache.DB, + DefaultTTL: cfg.CacheTTL(), + }, logger) + defer searchCache.Close() + // Seed env vars from config so existing engine/factory/planner code // picks them up without changes. The config layer is the single source // of truth; env vars remain as overrides via applyEnvOverrides. @@ -37,6 +52,7 @@ func main() { svc := search.NewService(search.ServiceConfig{ UpstreamURL: cfg.Upstream.URL, HTTPTimeout: cfg.HTTPTimeout(), + Cache: searchCache, }) h := httpapi.NewHandler(svc) @@ -46,6 +62,6 @@ func main() { mux.HandleFunc("/search", h.Search) addr := fmt.Sprintf(":%d", cfg.Server.Port) - log.Printf("searxng-go listening on %s", addr) + logger.Info("searxng-go starting", "addr", addr, "cache", searchCache.Enabled()) log.Fatal(http.ListenAndServe(addr, mux)) } diff --git a/config.example.toml b/config.example.toml index e3e9556..143ee32 100644 --- a/config.example.toml +++ b/config.example.toml @@ -29,3 +29,15 @@ access_token = "" # Qwant category: "web" or "web-lite" (default: "web-lite") category = "web-lite" results_per_page = 10 + +[cache] +# Valkey/Redis cache for search results. +# Leave address empty to disable caching entirely. +# Env: VALKEY_ADDRESS +address = "" +# Env: VALKEY_PASSWORD +password = "" +# Database index (env: VALKEY_DB) +db = 0 +# Cache TTL for search results (env: VALKEY_CACHE_TTL) +default_ttl = "5m" diff --git a/go.mod b/go.mod index 5346e09..81efe4a 100644 --- a/go.mod +++ b/go.mod @@ -5,10 +5,13 @@ go 1.25.0 require ( github.com/BurntSushi/toml v1.5.0 github.com/PuerkitoBio/goquery v1.12.0 + github.com/redis/go-redis/v9 v9.18.0 ) require ( github.com/andybalholm/cascadia v1.3.3 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect + go.uber.org/atomic v1.11.0 // indirect golang.org/x/net v0.52.0 // indirect - golang.org/x/sync v0.20.0 // indirect ) diff --git a/go.sum b/go.sum index d6a1c83..0aad3f0 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,30 @@ github.com/PuerkitoBio/goquery v1.12.0 h1:pAcL4g3WRXekcB9AU/y1mbKez2dbY2AajVhtkO github.com/PuerkitoBio/goquery v1.12.0/go.mod h1:802ej+gV2y7bbIhOIoPY5sT183ZW0YFofScC4q/hIpQ= github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM= github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA= +github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= +github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= +github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= +github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/redis/go-redis/v9 v9.18.0 h1:pMkxYPkEbMPwRdenAzUNyFNrDgHx9U+DrBabWNfSRQs= +github.com/redis/go-redis/v9 v9.18.0/go.mod h1:k3ufPphLU5YXwNTUcCRXGxUoF1fqxnhFQmscfkCoDA0= +github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= +github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= +go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= @@ -35,8 +57,6 @@ golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= -golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/internal/cache/cache.go b/internal/cache/cache.go new file mode 100644 index 0000000..4e86efb --- /dev/null +++ b/internal/cache/cache.go @@ -0,0 +1,162 @@ +package cache + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "log/slog" + "time" + + "github.com/ashie/gosearch/internal/contracts" + "github.com/redis/go-redis/v9" +) + +// Config holds Valkey/Redis connection settings. +type Config struct { + // Address is the Valkey server address (e.g. "localhost:6379"). + Address string + // Password for authentication (empty = no auth). + Password string + // Database index (default 0). + DB int + // Default TTL for cached search results. + DefaultTTL time.Duration +} + +// Cache provides a Valkey-backed cache for search responses. +// It is safe for concurrent use. +// If the Valkey connection is nil or fails, cache operations are no-ops. +type Cache struct { + client *redis.Client + ttl time.Duration + logger *slog.Logger +} + +// New creates a new Cache. If cfg.Address is empty, returns a no-op cache. +func New(cfg Config, logger *slog.Logger) *Cache { + if logger == nil { + logger = slog.Default() + } + + if cfg.Address == "" { + logger.Debug("cache disabled: no valkey address configured") + return &Cache{logger: logger} + } + + ttl := cfg.DefaultTTL + if ttl <= 0 { + ttl = 5 * time.Minute + } + + client := redis.NewClient(&redis.Options{ + Addr: cfg.Address, + Password: cfg.Password, + DB: cfg.DB, + }) + + // Verify connectivity with a short timeout. + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + + if err := client.Ping(ctx).Err(); err != nil { + logger.Warn("cache disabled: valkey ping failed", "addr", cfg.Address, "error", err) + return &Cache{logger: logger} + } + + logger.Info("cache connected", "addr", cfg.Address, "db", cfg.DB, "ttl", ttl) + return &Cache{client: client, ttl: ttl, logger: logger} +} + +// Enabled returns true if the cache has a live Valkey connection. +func (c *Cache) Enabled() bool { + return c.client != nil +} + +// Get retrieves a cached search response. Returns (response, true) on hit, +// (zero, false) on miss or error. +func (c *Cache) Get(ctx context.Context, key string) (contracts.SearchResponse, bool) { + if !c.Enabled() { + return contracts.SearchResponse{}, false + } + + fullKey := "gosearch:" + key + + data, err := c.client.Get(ctx, fullKey).Bytes() + if err != nil { + if err != redis.Nil { + c.logger.Debug("cache miss (error)", "key", fullKey, "error", err) + } + return contracts.SearchResponse{}, false + } + + var resp contracts.SearchResponse + if err := json.Unmarshal(data, &resp); err != nil { + c.logger.Warn("cache hit but unmarshal failed", "key", fullKey, "error", err) + return contracts.SearchResponse{}, false + } + + c.logger.Debug("cache hit", "key", fullKey) + return resp, true +} + +// Set stores a search response in the cache with the default TTL. +func (c *Cache) Set(ctx context.Context, key string, resp contracts.SearchResponse) { + if !c.Enabled() { + return + } + + data, err := json.Marshal(resp) + if err != nil { + c.logger.Warn("cache set: marshal failed", "key", key, "error", err) + return + } + + fullKey := "gosearch:" + key + if err := c.client.Set(ctx, fullKey, data, c.ttl).Err(); err != nil { + c.logger.Warn("cache set failed", "key", fullKey, "error", err) + } +} + +// Invalidate removes a specific key from the cache. +func (c *Cache) Invalidate(ctx context.Context, key string) { + if !c.Enabled() { + return + } + fullKey := "gosearch:" + key + c.client.Del(ctx, fullKey) +} + +// Close closes the Valkey connection. +func (c *Cache) Close() error { + if c.client == nil { + return nil + } + return c.client.Close() +} + +// Key generates a deterministic cache key from search parameters. +// The key is a SHA-256 hash of the normalized parameters, prefixed for readability. +func Key(req contracts.SearchRequest) string { + h := sha256.New() + + fmt.Fprintf(h, "q=%s|", req.Query) + fmt.Fprintf(h, "format=%s|", req.Format) + fmt.Fprintf(h, "pageno=%d|", req.Pageno) + fmt.Fprintf(h, "safesearch=%d|", req.Safesearch) + fmt.Fprintf(h, "lang=%s|", req.Language) + + if req.TimeRange != nil { + fmt.Fprintf(h, "tr=%s|", *req.TimeRange) + } + + for _, e := range req.Engines { + fmt.Fprintf(h, "e=%s|", e) + } + for _, cat := range req.Categories { + fmt.Fprintf(h, "c=%s|", cat) + } + + return hex.EncodeToString(h.Sum(nil))[:32] +} diff --git a/internal/cache/cache_test.go b/internal/cache/cache_test.go new file mode 100644 index 0000000..e2e9594 --- /dev/null +++ b/internal/cache/cache_test.go @@ -0,0 +1,77 @@ +package cache + +import ( + "testing" + + "github.com/ashie/gosearch/internal/contracts" +) + +func TestKey_Deterministic(t *testing.T) { + req := contracts.SearchRequest{ + Format: contracts.FormatJSON, + Query: "kafka metamorphosis", + Pageno: 1, + Safesearch: 0, + Language: "auto", + Engines: []string{"wikipedia", "braveapi"}, + Categories: []string{"general"}, + } + + key1 := Key(req) + key2 := Key(req) + + if key1 != key2 { + t.Errorf("Key should be deterministic: %q != %q", key1, key2) + } + if len(key1) != 32 { + t.Errorf("expected 32-char key, got %d", len(key1)) + } +} + +func TestKey_DifferentQueries(t *testing.T) { + reqA := contracts.SearchRequest{Query: "kafka", Format: contracts.FormatJSON} + reqB := contracts.SearchRequest{Query: "orwell", Format: contracts.FormatJSON} + + if Key(reqA) == Key(reqB) { + t.Error("different queries should produce different keys") + } +} + +func TestKey_DifferentPageno(t *testing.T) { + req1 := contracts.SearchRequest{Query: "test", Pageno: 1} + req2 := contracts.SearchRequest{Query: "test", Pageno: 2} + + if Key(req1) == Key(req2) { + t.Error("different pageno should produce different keys") + } +} + +func TestKey_DifferentEngines(t *testing.T) { + req1 := contracts.SearchRequest{Query: "test", Engines: []string{"wikipedia"}} + req2 := contracts.SearchRequest{Query: "test", Engines: []string{"braveapi"}} + + if Key(req1) == Key(req2) { + t.Error("different engines should produce different keys") + } +} + +func TestKey_TimeRange(t *testing.T) { + req1 := contracts.SearchRequest{Query: "test"} + req2 := contracts.SearchRequest{Query: "test", TimeRange: strPtr("week")} + + if Key(req1) == Key(req2) { + t.Error("with/without time_range should produce different keys") + } +} + +func TestNew_NopWithoutAddress(t *testing.T) { + c := New(Config{}, nil) + if c.Enabled() { + t.Error("cache should be disabled when no address is configured") + } + if err := c.Close(); err != nil { + t.Errorf("Close on nop cache should not error: %v", err) + } +} + +func strPtr(s string) *string { return &s } diff --git a/internal/config/config.go b/internal/config/config.go index aeca2af..6e4f7cc 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -11,9 +11,10 @@ import ( // Config is the top-level configuration for the gosearch service. type Config struct { - Server ServerConfig `toml:"server"` + Server ServerConfig `toml:"server"` Upstream UpstreamConfig `toml:"upstream"` - Engines EnginesConfig `toml:"engines"` + Engines EnginesConfig `toml:"engines"` + Cache CacheConfig `toml:"cache"` } type ServerConfig struct { @@ -26,11 +27,19 @@ type UpstreamConfig struct { } type EnginesConfig struct { - LocalPorted []string `toml:"local_ported"` + LocalPorted []string `toml:"local_ported"` Brave BraveConfig `toml:"brave"` Qwant QwantConfig `toml:"qwant"` } +// CacheConfig holds Valkey/Redis cache settings. +type CacheConfig struct { + Address string `toml:"address"` // Valkey server address (e.g. "localhost:6379") + Password string `toml:"password"` // Auth password (empty = none) + DB int `toml:"db"` // Database index (default 0) + DefaultTTL string `toml:"default_ttl"` // Cache TTL (e.g. "5m", default "5m") +} + type BraveConfig struct { APIKey string `toml:"api_key"` AccessToken string `toml:"access_token"` @@ -71,6 +80,10 @@ func defaultConfig() *Config { ResultsPerPage: 10, }, }, + Cache: CacheConfig{ + DB: 0, + DefaultTTL: "5m", + }, } } @@ -99,6 +112,18 @@ func applyEnvOverrides(cfg *Config) { if v := os.Getenv("BRAVE_ACCESS_TOKEN"); v != "" { cfg.Engines.Brave.AccessToken = v } + if v := os.Getenv("VALKEY_ADDRESS"); v != "" { + cfg.Cache.Address = v + } + if v := os.Getenv("VALKEY_PASSWORD"); v != "" { + cfg.Cache.Password = v + } + if v := os.Getenv("VALKEY_DB"); v != "" { + fmt.Sscanf(v, "%d", &cfg.Cache.DB) + } + if v := os.Getenv("VALKEY_CACHE_TTL"); v != "" { + cfg.Cache.DefaultTTL = v + } } // HTTPTimeout parses the configured timeout string into a time.Duration. @@ -114,6 +139,14 @@ func (c *Config) LocalPortedCSV() string { return strings.Join(c.Engines.LocalPorted, ",") } +// CacheTTL parses the configured cache TTL string into a time.Duration. +func (c *Config) CacheTTL() time.Duration { + if d, err := time.ParseDuration(c.Cache.DefaultTTL); err == nil && d > 0 { + return d + } + return 5 * time.Minute +} + func splitCSV(s string) []string { if s == "" { return nil diff --git a/internal/search/service.go b/internal/search/service.go index f7cf83b..f6ca4fa 100644 --- a/internal/search/service.go +++ b/internal/search/service.go @@ -6,6 +6,7 @@ import ( "sync" "time" + "github.com/ashie/gosearch/internal/cache" "github.com/ashie/gosearch/internal/contracts" "github.com/ashie/gosearch/internal/engines" "github.com/ashie/gosearch/internal/upstream" @@ -14,12 +15,14 @@ import ( type ServiceConfig struct { UpstreamURL string HTTPTimeout time.Duration + Cache *cache.Cache } type Service struct { upstreamClient *upstream.Client planner *engines.Planner localEngines map[string]engines.Engine + cache *cache.Cache } func NewService(cfg ServiceConfig) *Service { @@ -42,6 +45,7 @@ func NewService(cfg ServiceConfig) *Service { upstreamClient: up, planner: engines.NewPlannerFromEnv(), localEngines: engines.NewDefaultPortedEngines(httpClient), + cache: cfg.Cache, } } @@ -50,7 +54,34 @@ func NewService(cfg ServiceConfig) *Service { // // Individual engine failures are reported as unresponsive_engines rather // than aborting the entire search. +// +// If a Valkey cache is configured and contains a cached response for this +// request, the cached result is returned without hitting any engines. func (s *Service) Search(ctx context.Context, req SearchRequest) (SearchResponse, error) { + // Check cache first. + if s.cache != nil { + cacheKey := cache.Key(req) + if cached, hit := s.cache.Get(ctx, cacheKey); hit { + return cached, nil + } + } + + merged, err := s.executeSearch(ctx, req) + if err != nil { + return SearchResponse{}, err + } + + // Store in cache. + if s.cache != nil { + cacheKey := cache.Key(req) + s.cache.Set(ctx, cacheKey, merged) + } + + return merged, nil +} + +// executeSearch runs the actual engine queries and merges results. +func (s *Service) executeSearch(ctx context.Context, req SearchRequest) (SearchResponse, error) { localEngineNames, upstreamEngineNames, _ := s.planner.Plan(req) // Run all local engines concurrently. @@ -176,5 +207,3 @@ func shouldFallbackToUpstream(engineName string, r contracts.SearchResponse) boo } return len(r.Results) == 0 && len(r.Answers) == 0 && len(r.Infoboxes) == 0 } - - diff --git a/searxng-go b/searxng-go deleted file mode 100755 index e286897..0000000 Binary files a/searxng-go and /dev/null differ