search: wire per-engine cache with tier-aware TTLs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
ashisgreat22 2026-03-24 01:14:31 +01:00
parent b710aec798
commit 26f8e4855b

View file

@ -18,14 +18,16 @@ package search
import ( import (
"context" "context"
"encoding/json"
"fmt"
"sync" "sync"
"time" "time"
"github.com/metamorphosis-dev/samsa/internal/cache" "github.com/metamorphosis-dev/samsa/internal/cache"
"github.com/metamorphosis-dev/samsa/internal/httpclient"
"github.com/metamorphosis-dev/samsa/internal/config" "github.com/metamorphosis-dev/samsa/internal/config"
"github.com/metamorphosis-dev/samsa/internal/contracts" "github.com/metamorphosis-dev/samsa/internal/contracts"
"github.com/metamorphosis-dev/samsa/internal/engines" "github.com/metamorphosis-dev/samsa/internal/engines"
"github.com/metamorphosis-dev/samsa/internal/httpclient"
"github.com/metamorphosis-dev/samsa/internal/upstream" "github.com/metamorphosis-dev/samsa/internal/upstream"
) )
@ -33,6 +35,7 @@ type ServiceConfig struct {
UpstreamURL string UpstreamURL string
HTTPTimeout time.Duration HTTPTimeout time.Duration
Cache *cache.Cache Cache *cache.Cache
CacheTTLOverrides map[string]time.Duration
EnginesConfig *config.Config EnginesConfig *config.Config
} }
@ -40,7 +43,7 @@ type Service struct {
upstreamClient *upstream.Client upstreamClient *upstream.Client
planner *engines.Planner planner *engines.Planner
localEngines map[string]engines.Engine localEngines map[string]engines.Engine
cache *cache.Cache engineCache *cache.EngineCache
} }
func NewService(cfg ServiceConfig) *Service { func NewService(cfg ServiceConfig) *Service {
@ -59,118 +62,177 @@ func NewService(cfg ServiceConfig) *Service {
} }
} }
var engineCache *cache.EngineCache
if cfg.Cache != nil {
engineCache = cache.NewEngineCache(cfg.Cache, cfg.CacheTTLOverrides)
}
return &Service{ return &Service{
upstreamClient: up, upstreamClient: up,
planner: engines.NewPlannerFromEnv(), planner: engines.NewPlannerFromEnv(),
localEngines: engines.NewDefaultPortedEngines(httpClient, cfg.EnginesConfig), localEngines: engines.NewDefaultPortedEngines(httpClient, cfg.EnginesConfig),
cache: cfg.Cache, engineCache: engineCache,
} }
} }
// derefString returns the string value of a *string, or "" if nil.
func derefString(s *string) string {
if s == nil {
return ""
}
return *s
}
// Search executes the request against local engines (in parallel) and // Search executes the request against local engines (in parallel) and
// optionally the upstream instance for unported engines. // optionally the upstream instance for unported engines.
//
// Individual engine failures are reported as unresponsive_engines rather
// than aborting the entire search.
//
// If a Valkey cache is configured and contains a cached response for this
// request, the cached result is returned without hitting any engines.
func (s *Service) Search(ctx context.Context, req SearchRequest) (SearchResponse, error) { func (s *Service) Search(ctx context.Context, req SearchRequest) (SearchResponse, error) {
// Check cache first. queryHash := cache.QueryHash(
if s.cache != nil { req.Query,
cacheKey := cache.Key(req) int(req.Pageno),
if cached, hit := s.cache.Get(ctx, cacheKey); hit { int(req.Safesearch),
return cached, nil req.Language,
} derefString(req.TimeRange),
} )
merged, err := s.executeSearch(ctx, req)
if err != nil {
return SearchResponse{}, err
}
// Store in cache.
if s.cache != nil {
cacheKey := cache.Key(req)
s.cache.Set(ctx, cacheKey, merged)
}
return merged, nil
}
// executeSearch runs the actual engine queries and merges results.
func (s *Service) executeSearch(ctx context.Context, req SearchRequest) (SearchResponse, error) {
localEngineNames, upstreamEngineNames, _ := s.planner.Plan(req) localEngineNames, upstreamEngineNames, _ := s.planner.Plan(req)
// Run all local engines concurrently. // Phase 1: Parallel cache lookups — classify each engine as fresh/stale/miss
type engineResult struct { type cacheResult struct {
name string engine string
resp contracts.SearchResponse cached cache.CachedEngineResponse
err error hit bool
fresh *contracts.SearchResponse // nil if no fresh response
fetchErr error
unmarshalErr bool // true if hit but unmarshal failed (treat as miss)
} }
localResults := make([]engineResult, 0, len(localEngineNames)) cacheResults := make([]cacheResult, len(localEngineNames))
var wg sync.WaitGroup var lookupWg sync.WaitGroup
var mu sync.Mutex for i, name := range localEngineNames {
lookupWg.Add(1)
go func(i int, name string) {
defer lookupWg.Done()
for _, name := range localEngineNames { result := cacheResult{engine: name}
eng, ok := s.localEngines[name]
if !ok { if s.engineCache != nil {
mu.Lock() cached, ok := s.engineCache.Get(ctx, name, queryHash)
localResults = append(localResults, engineResult{ if ok {
name: name, result.hit = true
resp: unresponsiveResponse(req.Query, name, "engine_not_registered"), result.cached = cached
}) if !s.engineCache.IsStale(cached, name) {
mu.Unlock() // Fresh cache hit — deserialize and use directly
var resp contracts.SearchResponse
if err := json.Unmarshal(cached.Response, &resp); err == nil {
result.fresh = &resp
} else {
// Unmarshal failed — treat as cache miss (will fetch fresh synchronously)
result.unmarshalErr = true
result.hit = false // treat as miss
}
}
// If stale: result.fresh stays zero, result.cached has stale data
}
}
cacheResults[i] = result
}(i, name)
}
lookupWg.Wait()
// Phase 2: Fetch fresh for misses and stale entries
var fetchWg sync.WaitGroup
for i, name := range localEngineNames {
cr := cacheResults[i]
// Fresh hit — nothing to do in phase 2
if cr.hit && cr.fresh != nil {
continue continue
} }
wg.Add(1) // Stale hit — return stale immediately, refresh in background
go func(name string, eng engines.Engine) { if cr.hit && len(cr.cached.Response) > 0 && s.engineCache != nil && s.engineCache.IsStale(cr.cached, name) {
defer wg.Done() fetchWg.Add(1)
go func(name string) {
r, err := eng.Search(ctx, req) defer fetchWg.Done()
eng, ok := s.localEngines[name]
mu.Lock() if !ok {
defer mu.Unlock()
if err != nil {
localResults = append(localResults, engineResult{
name: name,
resp: unresponsiveResponse(req.Query, name, err.Error()),
})
return return
} }
localResults = append(localResults, engineResult{name: name, resp: r}) freshResp, err := eng.Search(ctx, req)
}(name, eng) if err != nil {
s.engineCache.Logger().Debug("background refresh failed", "engine", name, "error", err)
return
}
s.engineCache.Set(ctx, name, queryHash, freshResp)
}(name)
continue
} }
wg.Wait() // Cache miss — fetch fresh synchronously
if !cr.hit {
fetchWg.Add(1)
go func(i int, name string) {
defer fetchWg.Done()
// Collect successful responses and determine upstream fallbacks. eng, ok := s.localEngines[name]
responses := make([]contracts.SearchResponse, 0, len(localResults)+1) if !ok {
upstreamSet := map[string]bool{} cacheResults[i] = cacheResult{
for _, e := range upstreamEngineNames { engine: name,
upstreamSet[e] = true fetchErr: fmt.Errorf("engine not registered: %s", name),
}
return
} }
for _, lr := range localResults { freshResp, err := eng.Search(ctx, req)
responses = append(responses, lr.resp) if err != nil {
cacheResults[i] = cacheResult{
engine: name,
fetchErr: err,
}
return
}
// If a local engine returned nothing (e.g. qwant anti-bot), fall back // Cache the fresh response
// to upstream if available. if s.engineCache != nil {
if shouldFallbackToUpstream(lr.name, lr.resp) && !upstreamSet[lr.name] { s.engineCache.Set(ctx, name, queryHash, freshResp)
upstreamEngineNames = append(upstreamEngineNames, lr.name) }
upstreamSet[lr.name] = true
cacheResults[i] = cacheResult{
engine: name,
fresh: &freshResp,
hit: false,
}
}(i, name)
}
}
fetchWg.Wait()
// Phase 3: Collect responses for merge
responses := make([]contracts.SearchResponse, 0, len(cacheResults))
for _, cr := range cacheResults {
if cr.fetchErr != nil {
responses = append(responses, unresponsiveResponse(req.Query, cr.engine, cr.fetchErr.Error()))
continue
}
// Use fresh data if available (fresh hit or freshly fetched), otherwise use stale cached
if cr.fresh != nil {
responses = append(responses, *cr.fresh)
} else if cr.hit && len(cr.cached.Response) > 0 {
var resp contracts.SearchResponse
if err := json.Unmarshal(cr.cached.Response, &resp); err == nil {
responses = append(responses, resp)
}
} }
} }
// Upstream proxy for unported (or fallback) engines. // Upstream proxy for unported (or fallback) engines.
// ... rest of the existing code is UNCHANGED ...
if s.upstreamClient != nil && len(upstreamEngineNames) > 0 { if s.upstreamClient != nil && len(upstreamEngineNames) > 0 {
r, err := s.upstreamClient.SearchJSON(ctx, req, upstreamEngineNames) r, err := s.upstreamClient.SearchJSON(ctx, req, upstreamEngineNames)
if err != nil { if err != nil {
// Upstream failure is treated as a single unresponsive engine entry.
responses = append(responses, contracts.SearchResponse{ responses = append(responses, contracts.SearchResponse{
Query: req.Query, Query: req.Query,
UnresponsiveEngines: [][2]string{{"upstream", err.Error()}}, UnresponsiveEngines: [][2]string{{"upstream", err.Error()}},