kafka/internal/engines/planner.go
Franz Kafka dc44837219 feat: build Go-based SearXNG-compatible search service
Implement an API-first Go rewrite with local engine adapters, upstream fallback, and Nix-based tooling so searches can run without matching the original UI while preserving response compatibility.

Made-with: Cursor
2026-03-20 20:34:08 +01:00

148 lines
3.5 KiB
Go

package engines
import (
"os"
"strings"
"github.com/ashie/gosearch/internal/contracts"
)
var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant"}
type Planner struct {
PortedSet map[string]bool
PortedList []string
}
func NewPlannerFromEnv() *Planner {
raw := strings.TrimSpace(os.Getenv("LOCAL_PORTED_ENGINES"))
if raw == "" {
return NewPlanner(defaultPortedEngines)
}
parts := splitCSV(raw)
if len(parts) == 0 {
return NewPlanner(defaultPortedEngines)
}
return NewPlanner(parts)
}
func NewPlanner(portedEngines []string) *Planner {
set := make(map[string]bool, len(portedEngines))
out := make([]string, 0, len(portedEngines))
for _, e := range portedEngines {
e = strings.TrimSpace(strings.ToLower(e))
if e == "" {
continue
}
if set[e] {
continue
}
set[e] = true
out = append(out, e)
}
return &Planner{
PortedSet: set,
PortedList: out,
}
}
// Plan returns:
// - localEngines: engines that are configured as ported for this service
// - upstreamEngines: engines that should be executed by upstream SearXNG
// - requestedEngines: the (possibly inferred) requested engines list
//
// If the request provides an explicit `engines` parameter, we use it.
// Otherwise we infer a small subset from `categories` for the starter set.
func (p *Planner) Plan(req contracts.SearchRequest) (localEngines, upstreamEngines, requestedEngines []string) {
if p == nil {
p = NewPlannerFromEnv()
}
requestedEngines = nil
if len(req.Engines) > 0 {
requestedEngines = normalizeList(req.Engines)
} else {
requestedEngines = inferFromCategories(req.Categories)
}
localEngines = make([]string, 0, len(requestedEngines))
upstreamEngines = make([]string, 0, len(requestedEngines))
for _, e := range requestedEngines {
if p.PortedSet[e] {
localEngines = append(localEngines, e)
} else {
upstreamEngines = append(upstreamEngines, e)
}
}
return localEngines, upstreamEngines, requestedEngines
}
func inferFromCategories(categories []string) []string {
// Minimal mapping for the initial porting subset.
// This mirrors the idea of selecting from SearXNG categories without
// embedding the whole engine registry.
set := map[string]bool{}
for _, c := range categories {
switch strings.TrimSpace(strings.ToLower(c)) {
case "general":
set["wikipedia"] = true
set["braveapi"] = true
set["qwant"] = true
case "science", "scientific publications":
set["arxiv"] = true
set["crossref"] = true
}
}
out := make([]string, 0, len(set))
for e := range set {
out = append(out, e)
}
// stable order
order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "arxiv": 3, "crossref": 4}
sortByOrder(out, order)
return out
}
func sortByOrder(list []string, order map[string]int) {
// simple insertion sort (list is tiny)
for i := 1; i < len(list); i++ {
j := i
for j > 0 && order[list[j-1]] > order[list[j]] {
list[j-1], list[j] = list[j], list[j-1]
j--
}
}
}
func normalizeList(in []string) []string {
out := make([]string, 0, len(in))
seen := map[string]bool{}
for _, e := range in {
e = strings.TrimSpace(strings.ToLower(e))
if e == "" || seen[e] {
continue
}
seen[e] = true
out = append(out, e)
}
return out
}
func splitCSV(s string) []string {
if s == "" {
return nil
}
parts := strings.Split(s, ",")
out := make([]string, 0, len(parts))
for _, p := range parts {
p = strings.TrimSpace(p)
if p == "" {
continue
}
out = append(out, p)
}
return out
}