package engines import ( "os" "strings" "github.com/metamorphosis-dev/kafka/internal/contracts" ) var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"} type Planner struct { PortedSet map[string]bool PortedList []string } func NewPlannerFromEnv() *Planner { raw := strings.TrimSpace(os.Getenv("LOCAL_PORTED_ENGINES")) if raw == "" { return NewPlanner(defaultPortedEngines) } parts := splitCSV(raw) if len(parts) == 0 { return NewPlanner(defaultPortedEngines) } return NewPlanner(parts) } func NewPlanner(portedEngines []string) *Planner { set := make(map[string]bool, len(portedEngines)) out := make([]string, 0, len(portedEngines)) for _, e := range portedEngines { e = strings.TrimSpace(strings.ToLower(e)) if e == "" { continue } if set[e] { continue } set[e] = true out = append(out, e) } return &Planner{ PortedSet: set, PortedList: out, } } // Plan returns: // - localEngines: engines that are configured as ported for this service // - upstreamEngines: engines that should be executed by the upstream instance // - requestedEngines: the (possibly inferred) requested engines list // // If the request provides an explicit `engines` parameter, we use it. // Otherwise we infer a small subset from `categories` for the starter set. func (p *Planner) Plan(req contracts.SearchRequest) (localEngines, upstreamEngines, requestedEngines []string) { if p == nil { p = NewPlannerFromEnv() } requestedEngines = nil if len(req.Engines) > 0 { requestedEngines = normalizeList(req.Engines) } else { requestedEngines = inferFromCategories(req.Categories) } localEngines = make([]string, 0, len(requestedEngines)) upstreamEngines = make([]string, 0, len(requestedEngines)) for _, e := range requestedEngines { if p.PortedSet[e] { localEngines = append(localEngines, e) } else { upstreamEngines = append(upstreamEngines, e) } } return localEngines, upstreamEngines, requestedEngines } func inferFromCategories(categories []string) []string { // Minimal mapping for the initial porting subset. // This mirrors the idea of selecting from engine categories without // embedding the whole engine registry. set := map[string]bool{} for _, c := range categories { switch strings.TrimSpace(strings.ToLower(c)) { case "general": set["wikipedia"] = true set["braveapi"] = true set["qwant"] = true set["duckduckgo"] = true set["bing"] = true case "science", "scientific publications": set["arxiv"] = true set["crossref"] = true case "it": set["github"] = true case "social media": set["reddit"] = true } } out := make([]string, 0, len(set)) for e := range set { out = append(out, e) } // stable order order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "arxiv": 5, "crossref": 6, "github": 7, "reddit": 8} sortByOrder(out, order) return out } func sortByOrder(list []string, order map[string]int) { // simple insertion sort (list is tiny) for i := 1; i < len(list); i++ { j := i for j > 0 && order[list[j-1]] > order[list[j]] { list[j-1], list[j] = list[j], list[j-1] j-- } } } func normalizeList(in []string) []string { out := make([]string, 0, len(in)) seen := map[string]bool{} for _, e := range in { e = strings.TrimSpace(strings.ToLower(e)) if e == "" || seen[e] { continue } seen[e] = true out = append(out, e) } return out } func splitCSV(s string) []string { if s == "" { return nil } parts := strings.Split(s, ",") out := make([]string, 0, len(parts)) for _, p := range parts { p = strings.TrimSpace(p) if p == "" { continue } out = append(out, p) } return out }