duckduckgo, github, reddit, and bing were registered in factory.go and config.go but missing from planner.go, so they were silently skipped when LOCAL_PORTED_ENGINES was not set. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
154 lines
3.7 KiB
Go
154 lines
3.7 KiB
Go
package engines
|
|
|
|
import (
|
|
"os"
|
|
"strings"
|
|
|
|
"github.com/metamorphosis-dev/kafka/internal/contracts"
|
|
)
|
|
|
|
var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"}
|
|
|
|
type Planner struct {
|
|
PortedSet map[string]bool
|
|
PortedList []string
|
|
}
|
|
|
|
func NewPlannerFromEnv() *Planner {
|
|
raw := strings.TrimSpace(os.Getenv("LOCAL_PORTED_ENGINES"))
|
|
if raw == "" {
|
|
return NewPlanner(defaultPortedEngines)
|
|
}
|
|
parts := splitCSV(raw)
|
|
if len(parts) == 0 {
|
|
return NewPlanner(defaultPortedEngines)
|
|
}
|
|
return NewPlanner(parts)
|
|
}
|
|
|
|
func NewPlanner(portedEngines []string) *Planner {
|
|
set := make(map[string]bool, len(portedEngines))
|
|
out := make([]string, 0, len(portedEngines))
|
|
for _, e := range portedEngines {
|
|
e = strings.TrimSpace(strings.ToLower(e))
|
|
if e == "" {
|
|
continue
|
|
}
|
|
if set[e] {
|
|
continue
|
|
}
|
|
set[e] = true
|
|
out = append(out, e)
|
|
}
|
|
return &Planner{
|
|
PortedSet: set,
|
|
PortedList: out,
|
|
}
|
|
}
|
|
|
|
// Plan returns:
|
|
// - localEngines: engines that are configured as ported for this service
|
|
// - upstreamEngines: engines that should be executed by upstream SearXNG
|
|
// - requestedEngines: the (possibly inferred) requested engines list
|
|
//
|
|
// If the request provides an explicit `engines` parameter, we use it.
|
|
// Otherwise we infer a small subset from `categories` for the starter set.
|
|
func (p *Planner) Plan(req contracts.SearchRequest) (localEngines, upstreamEngines, requestedEngines []string) {
|
|
if p == nil {
|
|
p = NewPlannerFromEnv()
|
|
}
|
|
|
|
requestedEngines = nil
|
|
if len(req.Engines) > 0 {
|
|
requestedEngines = normalizeList(req.Engines)
|
|
} else {
|
|
requestedEngines = inferFromCategories(req.Categories)
|
|
}
|
|
|
|
localEngines = make([]string, 0, len(requestedEngines))
|
|
upstreamEngines = make([]string, 0, len(requestedEngines))
|
|
for _, e := range requestedEngines {
|
|
if p.PortedSet[e] {
|
|
localEngines = append(localEngines, e)
|
|
} else {
|
|
upstreamEngines = append(upstreamEngines, e)
|
|
}
|
|
}
|
|
|
|
return localEngines, upstreamEngines, requestedEngines
|
|
}
|
|
|
|
func inferFromCategories(categories []string) []string {
|
|
// Minimal mapping for the initial porting subset.
|
|
// This mirrors the idea of selecting from SearXNG categories without
|
|
// embedding the whole engine registry.
|
|
set := map[string]bool{}
|
|
for _, c := range categories {
|
|
switch strings.TrimSpace(strings.ToLower(c)) {
|
|
case "general":
|
|
set["wikipedia"] = true
|
|
set["braveapi"] = true
|
|
set["qwant"] = true
|
|
set["duckduckgo"] = true
|
|
set["bing"] = true
|
|
case "science", "scientific publications":
|
|
set["arxiv"] = true
|
|
set["crossref"] = true
|
|
case "it":
|
|
set["github"] = true
|
|
case "social media":
|
|
set["reddit"] = true
|
|
}
|
|
}
|
|
|
|
out := make([]string, 0, len(set))
|
|
for e := range set {
|
|
out = append(out, e)
|
|
}
|
|
// stable order
|
|
order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "arxiv": 5, "crossref": 6, "github": 7, "reddit": 8}
|
|
sortByOrder(out, order)
|
|
return out
|
|
}
|
|
|
|
func sortByOrder(list []string, order map[string]int) {
|
|
// simple insertion sort (list is tiny)
|
|
for i := 1; i < len(list); i++ {
|
|
j := i
|
|
for j > 0 && order[list[j-1]] > order[list[j]] {
|
|
list[j-1], list[j] = list[j], list[j-1]
|
|
j--
|
|
}
|
|
}
|
|
}
|
|
|
|
func normalizeList(in []string) []string {
|
|
out := make([]string, 0, len(in))
|
|
seen := map[string]bool{}
|
|
for _, e := range in {
|
|
e = strings.TrimSpace(strings.ToLower(e))
|
|
if e == "" || seen[e] {
|
|
continue
|
|
}
|
|
seen[e] = true
|
|
out = append(out, e)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func splitCSV(s string) []string {
|
|
if s == "" {
|
|
return nil
|
|
}
|
|
parts := strings.Split(s, ",")
|
|
out := make([]string, 0, len(parts))
|
|
for _, p := range parts {
|
|
p = strings.TrimSpace(p)
|
|
if p == "" {
|
|
continue
|
|
}
|
|
out = append(out, p)
|
|
}
|
|
return out
|
|
}
|
|
|