samsa/internal/engines/planner.go
Franz Kafka 8e9aae062b
Some checks failed
Build and Push Docker Image / build-and-push (push) Failing after 11s
Mirror to GitHub / mirror (push) Failing after 5s
Tests / test (push) Successful in 42s
rename: kafka → samsa
Full project rename from kafka to samsa (after Gregor Samsa, who
woke one morning from uneasy dreams to find himself transformed).

- Module: github.com/metamorphosis-dev/kafka → samsa
- Binary: cmd/kafka/ → cmd/samsa/
- CSS: kafka.css → samsa.css
- UI: all 'kafka' product names, titles, localStorage keys → samsa
- localStorage keys: kafka-theme → samsa-theme, kafka-engines → samsa-engines
- OpenSearch: ShortName, LongName, description, URLs updated
- AGPL headers: 'kafka' → 'samsa'
- Docs, configs, examples updated
- Cache key prefix: kafka: → samsa:
2026-03-22 23:44:55 +00:00

184 lines
4.7 KiB
Go

// samsa — a privacy-respecting metasearch engine
// Copyright (C) 2026-present metamorphosis-dev
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package engines
import (
"os"
"strings"
"github.com/metamorphosis-dev/samsa/internal/contracts"
)
var defaultPortedEngines = []string{
"wikipedia", "arxiv", "crossref", "braveapi",
"brave", "qwant", "duckduckgo", "github", "reddit",
"bing", "google", "youtube", "stackoverflow",
// Image engines
"bing_images", "ddg_images", "qwant_images",
}
type Planner struct {
PortedSet map[string]bool
PortedList []string
}
func NewPlannerFromEnv() *Planner {
raw := strings.TrimSpace(os.Getenv("LOCAL_PORTED_ENGINES"))
if raw == "" {
return NewPlanner(defaultPortedEngines)
}
parts := splitCSV(raw)
if len(parts) == 0 {
return NewPlanner(defaultPortedEngines)
}
return NewPlanner(parts)
}
func NewPlanner(portedEngines []string) *Planner {
set := make(map[string]bool, len(portedEngines))
out := make([]string, 0, len(portedEngines))
for _, e := range portedEngines {
e = strings.TrimSpace(strings.ToLower(e))
if e == "" {
continue
}
if set[e] {
continue
}
set[e] = true
out = append(out, e)
}
return &Planner{
PortedSet: set,
PortedList: out,
}
}
// Plan returns:
// - localEngines: engines that are configured as ported for this service
// - upstreamEngines: engines that should be executed by the upstream instance
// - requestedEngines: the (possibly inferred) requested engines list
//
// If the request provides an explicit `engines` parameter, we use it.
// Otherwise we infer a small subset from `categories` for the starter set.
func (p *Planner) Plan(req contracts.SearchRequest) (localEngines, upstreamEngines, requestedEngines []string) {
if p == nil {
p = NewPlannerFromEnv()
}
requestedEngines = nil
if len(req.Engines) > 0 {
requestedEngines = normalizeList(req.Engines)
} else {
requestedEngines = inferFromCategories(req.Categories)
}
localEngines = make([]string, 0, len(requestedEngines))
upstreamEngines = make([]string, 0, len(requestedEngines))
for _, e := range requestedEngines {
if p.PortedSet[e] {
localEngines = append(localEngines, e)
} else {
upstreamEngines = append(upstreamEngines, e)
}
}
return localEngines, upstreamEngines, requestedEngines
}
func inferFromCategories(categories []string) []string {
set := map[string]bool{}
for _, c := range categories {
switch strings.TrimSpace(strings.ToLower(c)) {
case "general":
set["wikipedia"] = true
set["braveapi"] = true
set["qwant"] = true
set["duckduckgo"] = true
set["bing"] = true
set["google"] = true
case "science", "scientific publications":
set["arxiv"] = true
set["crossref"] = true
case "it":
set["github"] = true
set["stackoverflow"] = true
case "social media":
set["reddit"] = true
case "videos":
set["youtube"] = true
case "images":
set["bing_images"] = true
set["ddg_images"] = true
set["qwant_images"] = true
}
}
out := make([]string, 0, len(set))
for e := range set {
out = append(out, e)
}
// stable order
order := map[string]int{
"wikipedia": 0, "braveapi": 1, "brave": 2, "qwant": 3, "duckduckgo": 4, "bing": 5, "google": 6,
"arxiv": 7, "crossref": 8, "github": 9, "stackoverflow": 10, "reddit": 11, "youtube": 12,
"bing_images": 13, "ddg_images": 14, "qwant_images": 15,
}
sortByOrder(out, order)
return out
}
func sortByOrder(list []string, order map[string]int) {
for i := 1; i < len(list); i++ {
j := i
for j > 0 && order[list[j-1]] > order[list[j]] {
list[j-1], list[j] = list[j], list[j-1]
j--
}
}
}
func normalizeList(in []string) []string {
out := make([]string, 0, len(in))
seen := map[string]bool{}
for _, e := range in {
e = strings.TrimSpace(strings.ToLower(e))
if e == "" || seen[e] {
continue
}
seen[e] = true
out = append(out, e)
}
return out
}
func splitCSV(s string) []string {
if s == "" {
return nil
}
parts := strings.Split(s, ",")
out := make([]string, 0, len(parts))
for _, p := range parts {
p = strings.TrimSpace(p)
if p == "" {
continue
}
out = append(out, p)
}
return out
}