search: wire per-engine cache with tier-aware TTLs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-24 01:14:31 +01:00 · 2026-03-24 01:14:31 +01:00 · 26f8e4855b
commit 26f8e4855b
parent b710aec798
1 changed files with 162 additions and 100 deletions
--- a/internal/search/service.go
+++ b/internal/search/service.go
@ -18,14 +18,16 @@ package search
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"sync"
 	"time"
 	"github.com/metamorphosis-dev/samsa/internal/cache"
 	"github.com/metamorphosis-dev/samsa/internal/httpclient"
 	"github.com/metamorphosis-dev/samsa/internal/config"
 	"github.com/metamorphosis-dev/samsa/internal/contracts"
 	"github.com/metamorphosis-dev/samsa/internal/engines"
 	"github.com/metamorphosis-dev/samsa/internal/httpclient"
 	"github.com/metamorphosis-dev/samsa/internal/upstream"
 )
@ -33,6 +35,7 @@ type ServiceConfig struct {
 	UpstreamURL       string
 	HTTPTimeout       time.Duration
 	Cache             *cache.Cache
 	CacheTTLOverrides map[string]time.Duration
 	EnginesConfig     *config.Config
 }
@ -40,7 +43,7 @@ type Service struct {
 	upstreamClient *upstream.Client
 	planner        *engines.Planner
 	localEngines   map[string]engines.Engine
-	cache          *cache.Cache
+	engineCache    *cache.EngineCache
 }
 func NewService(cfg ServiceConfig) *Service {
@ -59,118 +62,177 @@ func NewService(cfg ServiceConfig) *Service {
 		}
 	}
 	var engineCache *cache.EngineCache
 	if cfg.Cache != nil {
 		engineCache = cache.NewEngineCache(cfg.Cache, cfg.CacheTTLOverrides)
 	}
 	return &Service{
 		upstreamClient: up,
 		planner:        engines.NewPlannerFromEnv(),
 		localEngines:   engines.NewDefaultPortedEngines(httpClient, cfg.EnginesConfig),
-		cache:          cfg.Cache,
+		engineCache:    engineCache,
 	}
 }
 // derefString returns the string value of a *string, or "" if nil.
 func derefString(s *string) string {
 	if s == nil {
 		return ""
 	}
 	return *s
 }
 // Search executes the request against local engines (in parallel) and
 // optionally the upstream instance for unported engines.
 //
 // Individual engine failures are reported as unresponsive_engines rather
 // than aborting the entire search.
 //
 // If a Valkey cache is configured and contains a cached response for this
 // request, the cached result is returned without hitting any engines.
 func (s *Service) Search(ctx context.Context, req SearchRequest) (SearchResponse, error) {
-	// Check cache first.
+	queryHash := cache.QueryHash(
-	if s.cache != nil {
+		req.Query,
-		cacheKey := cache.Key(req)
+		int(req.Pageno),
-		if cached, hit := s.cache.Get(ctx, cacheKey); hit {
+		int(req.Safesearch),
-			return cached, nil
+		req.Language,
-		}
+		derefString(req.TimeRange),
-	}
+	)
 	merged, err := s.executeSearch(ctx, req)
 	if err != nil {
 		return SearchResponse{}, err
 	}
 	// Store in cache.
 	if s.cache != nil {
 		cacheKey := cache.Key(req)
 		s.cache.Set(ctx, cacheKey, merged)
 	}
 	return merged, nil
 }
 // executeSearch runs the actual engine queries and merges results.
 func (s *Service) executeSearch(ctx context.Context, req SearchRequest) (SearchResponse, error) {
 	localEngineNames, upstreamEngineNames, _ := s.planner.Plan(req)
-	// Run all local engines concurrently.
+	// Phase 1: Parallel cache lookups — classify each engine as fresh/stale/miss
-	type engineResult struct {
+	type cacheResult struct {
-		name string
+		engine       string
-		resp contracts.SearchResponse
+		cached       cache.CachedEngineResponse
-		err  error
+		hit          bool
 		fresh        *contracts.SearchResponse // nil if no fresh response
 		fetchErr     error
 		unmarshalErr bool // true if hit but unmarshal failed (treat as miss)
 	}
-	localResults := make([]engineResult, 0, len(localEngineNames))
+	cacheResults := make([]cacheResult, len(localEngineNames))
-	var wg sync.WaitGroup
+	var lookupWg sync.WaitGroup
-	var mu sync.Mutex
+	for i, name := range localEngineNames {
 		lookupWg.Add(1)
 		go func(i int, name string) {
 			defer lookupWg.Done()
-	for _, name := range localEngineNames {
+			result := cacheResult{engine: name}
-		eng, ok := s.localEngines[name]
+
-		if !ok {
+			if s.engineCache != nil {
-			mu.Lock()
+				cached, ok := s.engineCache.Get(ctx, name, queryHash)
-			localResults = append(localResults, engineResult{
+				if ok {
-				name: name,
+					result.hit = true
-				resp: unresponsiveResponse(req.Query, name, "engine_not_registered"),
+					result.cached = cached
-			})
+					if !s.engineCache.IsStale(cached, name) {
-			mu.Unlock()
+						// Fresh cache hit — deserialize and use directly
 						var resp contracts.SearchResponse
 						if err := json.Unmarshal(cached.Response, &resp); err == nil {
 							result.fresh = &resp
 						} else {
 							// Unmarshal failed — treat as cache miss (will fetch fresh synchronously)
 							result.unmarshalErr = true
 							result.hit = false // treat as miss
 						}
 					}
 					// If stale: result.fresh stays zero, result.cached has stale data
 				}
 			}
 			cacheResults[i] = result
 		}(i, name)
 	}
 	lookupWg.Wait()
 	// Phase 2: Fetch fresh for misses and stale entries
 	var fetchWg sync.WaitGroup
 	for i, name := range localEngineNames {
 		cr := cacheResults[i]
 		// Fresh hit — nothing to do in phase 2
 		if cr.hit && cr.fresh != nil {
 			continue
 		}
-		wg.Add(1)
+		// Stale hit — return stale immediately, refresh in background
-		go func(name string, eng engines.Engine) {
+		if cr.hit && len(cr.cached.Response) > 0 && s.engineCache != nil && s.engineCache.IsStale(cr.cached, name) {
-			defer wg.Done()
+			fetchWg.Add(1)
-
+			go func(name string) {
-			r, err := eng.Search(ctx, req)
+				defer fetchWg.Done()
-
+				eng, ok := s.localEngines[name]
-			mu.Lock()
+				if !ok {
 			defer mu.Unlock()
 			if err != nil {
 				localResults = append(localResults, engineResult{
 					name: name,
 					resp: unresponsiveResponse(req.Query, name, err.Error()),
 				})
 					return
 				}
-			localResults = append(localResults, engineResult{name: name, resp: r})
+				freshResp, err := eng.Search(ctx, req)
-		}(name, eng)
+				if err != nil {
 					s.engineCache.Logger().Debug("background refresh failed", "engine", name, "error", err)
 					return
 				}
 				s.engineCache.Set(ctx, name, queryHash, freshResp)
 			}(name)
 			continue
 		}
-	wg.Wait()
+		// Cache miss — fetch fresh synchronously
 		if !cr.hit {
 			fetchWg.Add(1)
 			go func(i int, name string) {
 				defer fetchWg.Done()
-	// Collect successful responses and determine upstream fallbacks.
+				eng, ok := s.localEngines[name]
-	responses := make([]contracts.SearchResponse, 0, len(localResults)+1)
+				if !ok {
-	upstreamSet := map[string]bool{}
+					cacheResults[i] = cacheResult{
-	for _, e := range upstreamEngineNames {
+						engine:   name,
-		upstreamSet[e] = true
+						fetchErr: fmt.Errorf("engine not registered: %s", name),
 					}
 					return
 				}
-	for _, lr := range localResults {
+				freshResp, err := eng.Search(ctx, req)
-		responses = append(responses, lr.resp)
+				if err != nil {
 					cacheResults[i] = cacheResult{
 						engine:   name,
 						fetchErr: err,
 					}
 					return
 				}
-		// If a local engine returned nothing (e.g. qwant anti-bot), fall back
+				// Cache the fresh response
-		// to upstream if available.
+				if s.engineCache != nil {
-		if shouldFallbackToUpstream(lr.name, lr.resp) && !upstreamSet[lr.name] {
+					s.engineCache.Set(ctx, name, queryHash, freshResp)
-			upstreamEngineNames = append(upstreamEngineNames, lr.name)
+				}
-			upstreamSet[lr.name] = true
+
 				cacheResults[i] = cacheResult{
 					engine: name,
 					fresh:  &freshResp,
 					hit:    false,
 				}
 			}(i, name)
 		}
 	}
 	fetchWg.Wait()
 	// Phase 3: Collect responses for merge
 	responses := make([]contracts.SearchResponse, 0, len(cacheResults))
 	for _, cr := range cacheResults {
 		if cr.fetchErr != nil {
 			responses = append(responses, unresponsiveResponse(req.Query, cr.engine, cr.fetchErr.Error()))
 			continue
 		}
 		// Use fresh data if available (fresh hit or freshly fetched), otherwise use stale cached
 		if cr.fresh != nil {
 			responses = append(responses, *cr.fresh)
 		} else if cr.hit && len(cr.cached.Response) > 0 {
 			var resp contracts.SearchResponse
 			if err := json.Unmarshal(cr.cached.Response, &resp); err == nil {
 				responses = append(responses, resp)
 			}
 		}
 	}
 	// Upstream proxy for unported (or fallback) engines.
 	// ... rest of the existing code is UNCHANGED ...
 	if s.upstreamClient != nil && len(upstreamEngineNames) > 0 {
 		r, err := s.upstreamClient.SearchJSON(ctx, req, upstreamEngineNames)
 		if err != nil {
 			// Upstream failure is treated as a single unresponsive engine entry.
 			responses = append(responses, contracts.SearchResponse{
 				Query:               req.Query,
 				UnresponsiveEngines: [][2]string{{"upstream", err.Error()}},