// samsa — a privacy-respecting metasearch engine // Copyright (C) 2026-present metamorphosis-dev // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . package search import ( "context" "encoding/json" "fmt" "sync" "time" "github.com/metamorphosis-dev/samsa/internal/cache" "github.com/metamorphosis-dev/samsa/internal/config" "github.com/metamorphosis-dev/samsa/internal/contracts" "github.com/metamorphosis-dev/samsa/internal/engines" "github.com/metamorphosis-dev/samsa/internal/httpclient" "github.com/metamorphosis-dev/samsa/internal/upstream" ) type ServiceConfig struct { UpstreamURL string HTTPTimeout time.Duration Cache *cache.Cache CacheTTLOverrides map[string]time.Duration EnginesConfig *config.Config } type Service struct { upstreamClient *upstream.Client planner *engines.Planner localEngines map[string]engines.Engine engineCache *cache.EngineCache } func NewService(cfg ServiceConfig) *Service { timeout := cfg.HTTPTimeout if timeout <= 0 { timeout = 10 * time.Second } httpClient := httpclient.NewClient(timeout) var up *upstream.Client if cfg.UpstreamURL != "" { c, err := upstream.NewClient(cfg.UpstreamURL, timeout) if err == nil { up = c } } var engineCache *cache.EngineCache if cfg.Cache != nil { engineCache = cache.NewEngineCache(cfg.Cache, cfg.CacheTTLOverrides) } return &Service{ upstreamClient: up, planner: engines.NewPlannerFromEnv(), localEngines: engines.NewDefaultPortedEngines(httpClient, cfg.EnginesConfig), engineCache: engineCache, } } // derefString returns the string value of a *string, or "" if nil. func derefString(s *string) string { if s == nil { return "" } return *s } // Search executes the request against local engines (in parallel) and // optionally the upstream instance for unported engines. func (s *Service) Search(ctx context.Context, req SearchRequest) (SearchResponse, error) { queryHash := cache.QueryHash( req.Query, int(req.Pageno), int(req.Safesearch), req.Language, derefString(req.TimeRange), ) localEngineNames, upstreamEngineNames, _ := s.planner.Plan(req) // Phase 1: Parallel cache lookups — classify each engine as fresh/stale/miss type cacheResult struct { engine string cached cache.CachedEngineResponse hit bool fresh *contracts.SearchResponse // nil if no fresh response fetchErr error unmarshalErr bool // true if hit but unmarshal failed (treat as miss) } cacheResults := make([]cacheResult, len(localEngineNames)) var lookupWg sync.WaitGroup for i, name := range localEngineNames { lookupWg.Add(1) go func(i int, name string) { defer lookupWg.Done() result := cacheResult{engine: name} if s.engineCache != nil { cached, ok := s.engineCache.Get(ctx, name, queryHash) if ok { result.hit = true result.cached = cached if !s.engineCache.IsStale(cached, name) { // Fresh cache hit — deserialize and use directly var resp contracts.SearchResponse if err := json.Unmarshal(cached.Response, &resp); err == nil { result.fresh = &resp } else { // Unmarshal failed — treat as cache miss (will fetch fresh synchronously) result.unmarshalErr = true result.hit = false // treat as miss } } // If stale: result.fresh stays zero, result.cached has stale data } } cacheResults[i] = result }(i, name) } lookupWg.Wait() // Phase 2: Fetch fresh for misses and stale entries var fetchWg sync.WaitGroup for i, name := range localEngineNames { cr := cacheResults[i] // Fresh hit — nothing to do in phase 2 if cr.hit && cr.fresh != nil { continue } // Stale hit — return stale immediately, refresh in background if cr.hit && len(cr.cached.Response) > 0 && s.engineCache != nil && s.engineCache.IsStale(cr.cached, name) { fetchWg.Add(1) go func(name string) { defer fetchWg.Done() eng, ok := s.localEngines[name] if !ok { return } freshResp, err := eng.Search(ctx, req) if err != nil { s.engineCache.Logger().Debug("background refresh failed", "engine", name, "error", err) return } s.engineCache.Set(ctx, name, queryHash, freshResp) }(name) continue } // Cache miss — fetch fresh synchronously if !cr.hit { fetchWg.Add(1) go func(i int, name string) { defer fetchWg.Done() eng, ok := s.localEngines[name] if !ok { cacheResults[i] = cacheResult{ engine: name, fetchErr: fmt.Errorf("engine not registered: %s", name), } return } freshResp, err := eng.Search(ctx, req) if err != nil { cacheResults[i] = cacheResult{ engine: name, fetchErr: err, } return } // Cache the fresh response if s.engineCache != nil { s.engineCache.Set(ctx, name, queryHash, freshResp) } cacheResults[i] = cacheResult{ engine: name, fresh: &freshResp, hit: false, } }(i, name) } } fetchWg.Wait() // Phase 3: Collect responses for merge responses := make([]contracts.SearchResponse, 0, len(cacheResults)) for _, cr := range cacheResults { if cr.fetchErr != nil { responses = append(responses, unresponsiveResponse(req.Query, cr.engine, cr.fetchErr.Error())) continue } // Use fresh data if available (fresh hit or freshly fetched), otherwise use stale cached if cr.fresh != nil { responses = append(responses, *cr.fresh) } else if cr.hit && len(cr.cached.Response) > 0 { var resp contracts.SearchResponse if err := json.Unmarshal(cr.cached.Response, &resp); err == nil { responses = append(responses, resp) } } } // Upstream proxy for unported (or fallback) engines. // ... rest of the existing code is UNCHANGED ... if s.upstreamClient != nil && len(upstreamEngineNames) > 0 { r, err := s.upstreamClient.SearchJSON(ctx, req, upstreamEngineNames) if err != nil { responses = append(responses, contracts.SearchResponse{ Query: req.Query, UnresponsiveEngines: [][2]string{{"upstream", err.Error()}}, }) } else { responses = append(responses, r) } } if len(responses) == 0 { return emptyResponse(req.Query), nil } merged := MergeResponses(responses) if merged.Query == "" { merged.Query = req.Query } return merged, nil } // unresponsiveResponse returns a zero-result response marking the engine as unresponsive. func unresponsiveResponse(query, engine, reason string) contracts.SearchResponse { return contracts.SearchResponse{ Query: query, NumberOfResults: 0, Results: []contracts.MainResult{}, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, UnresponsiveEngines: [][2]string{{engine, reason}}, } } // emptyResponse returns a valid empty response with stable empty slices. func emptyResponse(query string) contracts.SearchResponse { return contracts.SearchResponse{ Query: query, NumberOfResults: 0, Results: []contracts.MainResult{}, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, UnresponsiveEngines: [][2]string{}, } } func shouldFallbackToUpstream(engineName string, r contracts.SearchResponse) bool { if engineName != "qwant" { return false } return len(r.Results) == 0 && len(r.Answers) == 0 && len(r.Infoboxes) == 0 }