kafka/internal/engines/planner.go
Franz Kafka 7be03b4017 license: change from MIT to AGPLv3
Update LICENSE file and add AGPL header to all source files.

AGPLv3 ensures that if someone runs Kafka as a network service and
modifies it, they must release their source code under the same license.
2026-03-22 08:27:23 +00:00

173 lines
4.6 KiB
Go

// kafka — a privacy-respecting metasearch engine
// Copyright (C) 2026-present metamorphosis-dev
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package engines
import (
"os"
"strings"
"github.com/metamorphosis-dev/kafka/internal/contracts"
)
var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"}
type Planner struct {
PortedSet map[string]bool
PortedList []string
}
func NewPlannerFromEnv() *Planner {
raw := strings.TrimSpace(os.Getenv("LOCAL_PORTED_ENGINES"))
if raw == "" {
return NewPlanner(defaultPortedEngines)
}
parts := splitCSV(raw)
if len(parts) == 0 {
return NewPlanner(defaultPortedEngines)
}
return NewPlanner(parts)
}
func NewPlanner(portedEngines []string) *Planner {
set := make(map[string]bool, len(portedEngines))
out := make([]string, 0, len(portedEngines))
for _, e := range portedEngines {
e = strings.TrimSpace(strings.ToLower(e))
if e == "" {
continue
}
if set[e] {
continue
}
set[e] = true
out = append(out, e)
}
return &Planner{
PortedSet: set,
PortedList: out,
}
}
// Plan returns:
// - localEngines: engines that are configured as ported for this service
// - upstreamEngines: engines that should be executed by the upstream instance
// - requestedEngines: the (possibly inferred) requested engines list
//
// If the request provides an explicit `engines` parameter, we use it.
// Otherwise we infer a small subset from `categories` for the starter set.
func (p *Planner) Plan(req contracts.SearchRequest) (localEngines, upstreamEngines, requestedEngines []string) {
if p == nil {
p = NewPlannerFromEnv()
}
requestedEngines = nil
if len(req.Engines) > 0 {
requestedEngines = normalizeList(req.Engines)
} else {
requestedEngines = inferFromCategories(req.Categories)
}
localEngines = make([]string, 0, len(requestedEngines))
upstreamEngines = make([]string, 0, len(requestedEngines))
for _, e := range requestedEngines {
if p.PortedSet[e] {
localEngines = append(localEngines, e)
} else {
upstreamEngines = append(upstreamEngines, e)
}
}
return localEngines, upstreamEngines, requestedEngines
}
func inferFromCategories(categories []string) []string {
// Minimal mapping for the initial porting subset.
// This mirrors the idea of selecting from engine categories without
// embedding the whole engine registry.
set := map[string]bool{}
for _, c := range categories {
switch strings.TrimSpace(strings.ToLower(c)) {
case "general":
set["wikipedia"] = true
set["braveapi"] = true
set["qwant"] = true
set["duckduckgo"] = true
set["bing"] = true
set["google"] = true
case "science", "scientific publications":
set["arxiv"] = true
set["crossref"] = true
case "it":
set["github"] = true
case "social media":
set["reddit"] = true
case "videos":
set["youtube"] = true
}
}
out := make([]string, 0, len(set))
for e := range set {
out = append(out, e)
}
// stable order
order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "google": 5, "arxiv": 6, "crossref": 7, "github": 8, "reddit": 9, "youtube": 10}
sortByOrder(out, order)
return out
}
func sortByOrder(list []string, order map[string]int) {
// simple insertion sort (list is tiny)
for i := 1; i < len(list); i++ {
j := i
for j > 0 && order[list[j-1]] > order[list[j]] {
list[j-1], list[j] = list[j], list[j-1]
j--
}
}
}
func normalizeList(in []string) []string {
out := make([]string, 0, len(in))
seen := map[string]bool{}
for _, e := range in {
e = strings.TrimSpace(strings.ToLower(e))
if e == "" || seen[e] {
continue
}
seen[e] = true
out = append(out, e)
}
return out
}
func splitCSV(s string) []string {
if s == "" {
return nil
}
parts := strings.Split(s, ",")
out := make([]string, 0, len(parts))
for _, p := range parts {
p = strings.TrimSpace(p)
if p == "" {
continue
}
out = append(out, p)
}
return out
}