feat: build Go-based SearXNG-compatible search service

Implement an API-first Go rewrite with local engine adapters, upstream fallback, and Nix-based tooling so searches can run without matching the original UI while preserving response compatibility.

Made-with: Cursor
This commit is contained in:
Franz Kafka 2026-03-20 20:34:08 +01:00
parent 7783367c71
commit dc44837219
32 changed files with 3330 additions and 0 deletions

121
internal/search/merge.go Normal file
View file

@ -0,0 +1,121 @@
package search
import (
"encoding/json"
"net/url"
"strings"
"github.com/ashie/gosearch/internal/contracts"
)
// MergeResponses merges multiple SearXNG-compatible JSON responses.
//
// MVP merge semantics:
// - results are concatenated with a simple de-dup key (engine|title|url)
// - suggestions/corrections are de-duplicated as sets
// - answers/infoboxes/unresponsive_engines are concatenated (best-effort)
func MergeResponses(responses []contracts.SearchResponse) contracts.SearchResponse {
var merged contracts.SearchResponse
mergedResultSeen := map[string]struct{}{}
mergedAnswerSeen := map[string]struct{}{}
mergedCorrectionsSeen := map[string]struct{}{}
mergedSuggestionsSeen := map[string]struct{}{}
for _, r := range responses {
if merged.Query == "" {
merged.Query = r.Query
}
merged.NumberOfResults = maxInt(merged.NumberOfResults, r.NumberOfResults)
for _, mr := range r.Results {
key := resultDedupKey(mr)
if _, ok := mergedResultSeen[key]; ok {
continue
}
mergedResultSeen[key] = struct{}{}
merged.Results = append(merged.Results, mr)
}
for _, ans := range r.Answers {
// De-dup by normalized JSON when possible.
b, err := json.Marshal(ans)
if err != nil {
merged.Answers = append(merged.Answers, ans)
continue
}
key := string(b)
if _, ok := mergedAnswerSeen[key]; ok {
continue
}
mergedAnswerSeen[key] = struct{}{}
merged.Answers = append(merged.Answers, ans)
}
merged.Corrections = unionStrings(merged.Corrections, r.Corrections, &mergedCorrectionsSeen)
merged.Suggestions = unionStrings(merged.Suggestions, r.Suggestions, &mergedSuggestionsSeen)
merged.Infoboxes = append(merged.Infoboxes, r.Infoboxes...)
merged.UnresponsiveEngines = append(merged.UnresponsiveEngines, r.UnresponsiveEngines...)
}
// Ensure non-nil slices to keep JSON shape stable.
if merged.Results == nil {
merged.Results = []contracts.MainResult{}
}
if merged.Answers == nil {
merged.Answers = []map[string]any{}
}
if merged.Corrections == nil {
merged.Corrections = []string{}
}
if merged.Infoboxes == nil {
merged.Infoboxes = []map[string]any{}
}
if merged.Suggestions == nil {
merged.Suggestions = []string{}
}
if merged.UnresponsiveEngines == nil {
merged.UnresponsiveEngines = [][2]string{}
}
return merged
}
func resultDedupKey(r contracts.MainResult) string {
urlStr := ""
if r.URL != nil {
urlStr = *r.URL
}
// Normalize host to reduce duplicates.
if u, err := url.Parse(urlStr); err == nil {
if u.Host != "" {
urlStr = u.Host + u.Path
}
}
return strings.ToLower(r.Engine) + "|" + strings.ToLower(r.Title) + "|" + urlStr
}
func unionStrings(dst []string, src []string, seen *map[string]struct{}) []string {
if *seen == nil {
*seen = map[string]struct{}{}
}
out := dst
for _, s := range src {
if _, ok := (*seen)[s]; ok {
continue
}
(*seen)[s] = struct{}{}
out = append(out, s)
}
return out
}
func maxInt(a, b int) int {
if a > b {
return a
}
return b
}

View file

@ -0,0 +1,80 @@
package search
import (
"strings"
"testing"
"github.com/ashie/gosearch/internal/contracts"
)
func TestMergeResponses_DedupResultsAndSets(t *testing.T) {
url1 := "https://example.com/a?x=1"
uPtr := &url1
r1 := contracts.SearchResponse{
Query: "q",
NumberOfResults: 1,
Results: []contracts.MainResult{
{
Template: "default.html",
Title: "Title1",
Content: "C1",
URL: uPtr,
Engine: "wikipedia",
Score: 1.0,
},
},
Answers: []map[string]any{{"title": "A1", "url": url1}},
Corrections: []string{"corr1", "corr2"},
Suggestions: []string{"s1", "s2"},
Infoboxes: []map[string]any{},
UnresponsiveEngines: [][2]string{},
}
r2 := contracts.SearchResponse{
Query: "q",
NumberOfResults: 1,
Results: []contracts.MainResult{
{
Template: "default.html",
Title: "Title1",
Content: "C2",
URL: uPtr,
Engine: "wikipedia",
Score: 2.0,
},
},
Answers: []map[string]any{{"title": "A1", "url": url1}},
Corrections: []string{"corr2", "corr3"},
Suggestions: []string{"s2", "s3"},
Infoboxes: []map[string]any{},
UnresponsiveEngines: [][2]string{},
}
merged := MergeResponses([]contracts.SearchResponse{r1, r2})
if merged.Query != "q" {
t.Fatalf("expected query q, got %q", merged.Query)
}
if merged.NumberOfResults != 1 {
t.Fatalf("expected number_of_results max=1, got %d", merged.NumberOfResults)
}
if len(merged.Results) != 1 {
t.Fatalf("expected 1 merged result, got %d", len(merged.Results))
}
// Corrections/suggestions should be unioned.
joinedCorr := strings.Join(merged.Corrections, ",")
if !strings.Contains(joinedCorr, "corr1") || !strings.Contains(joinedCorr, "corr2") || !strings.Contains(joinedCorr, "corr3") {
t.Fatalf("expected unioned corrections, got %v", merged.Corrections)
}
joinedSug := strings.Join(merged.Suggestions, ",")
if !strings.Contains(joinedSug, "s1") || !strings.Contains(joinedSug, "s2") || !strings.Contains(joinedSug, "s3") {
t.Fatalf("expected unioned suggestions, got %v", merged.Suggestions)
}
if len(merged.Answers) != 1 {
t.Fatalf("expected 1 merged answer, got %d", len(merged.Answers))
}
}

View file

@ -0,0 +1,206 @@
package search
import (
"errors"
"net/http"
"regexp"
"strconv"
"strings"
)
var languageCodeRe = regexp.MustCompile(`^[a-z]{2,3}(-[a-zA-Z]{2})?$`)
func ParseSearchRequest(r *http.Request) (SearchRequest, error) {
// SearXNG supports both GET and POST and relies on form values for routing.
if err := r.ParseForm(); err != nil {
return SearchRequest{}, errors.New("invalid request: cannot parse form")
}
format := strings.ToLower(r.FormValue("format"))
switch OutputFormat(format) {
case FormatJSON, FormatCSV, FormatRSS:
default:
// MVP: treat everything else as json, except `html` which we accept for compatibility.
if format == string(FormatHTML) {
// accepted, but not implemented by the server yet
} else {
format = string(FormatJSON)
}
}
q := r.FormValue("q")
if strings.TrimSpace(q) == "" {
return SearchRequest{}, errors.New("missing required parameter: q")
}
pageno := 1
if s := strings.TrimSpace(r.FormValue("pageno")); s != "" {
n, err := strconv.Atoi(s)
if err != nil || n < 1 {
return SearchRequest{}, errors.New("invalid parameter: pageno")
}
pageno = n
}
// MVP defaults.
safesearch := 0
if s := strings.TrimSpace(r.FormValue("safesearch")); s != "" {
n, err := strconv.Atoi(s)
if err != nil || n < 0 || n > 2 {
return SearchRequest{}, errors.New("invalid parameter: safesearch")
}
safesearch = n
}
var timeRange *string
if tr := strings.TrimSpace(r.FormValue("time_range")); tr != "" && tr != "None" {
switch tr {
case "day", "week", "month", "year":
tt := tr
timeRange = &tt
default:
return SearchRequest{}, errors.New("invalid parameter: time_range")
}
}
var timeoutLimit *float64
if s := strings.TrimSpace(r.FormValue("timeout_limit")); s != "" && s != "None" {
v, err := strconv.ParseFloat(s, 64)
if err != nil || v <= 0 {
return SearchRequest{}, errors.New("invalid parameter: timeout_limit")
}
timeoutLimit = &v
}
language := strings.TrimSpace(r.FormValue("language"))
if language == "" {
language = "auto"
}
switch language {
case "auto", "all":
// ok
default:
if !languageCodeRe.MatchString(language) {
return SearchRequest{}, errors.New("invalid parameter: language")
}
}
// engines is an explicit list of engine names.
engines := splitCSV(strings.TrimSpace(r.FormValue("engines")))
// categories and category_<name> params mirror SearXNG's webadapter parsing.
// We don't validate against a registry here; we just preserve the requested values.
catSet := map[string]bool{}
if catsParam := strings.TrimSpace(r.FormValue("categories")); catsParam != "" {
for _, cat := range splitCSV(catsParam) {
catSet[cat] = true
}
}
for k, v := range r.Form {
if !strings.HasPrefix(k, "category_") {
continue
}
category := strings.TrimPrefix(k, "category_")
if category == "" {
continue
}
val := ""
if len(v) > 0 {
val = strings.TrimSpace(v[0])
}
if val == "" || val != "off" {
catSet[category] = true
} else {
delete(catSet, category)
}
}
categories := make([]string, 0, len(catSet))
for c := range catSet {
categories = append(categories, c)
}
if len(categories) == 0 {
categories = []string{"general"}
}
// Parse engine_data-<engine>-<key>=<value> parameters.
engineData := map[string]map[string]string{}
for k, v := range r.Form {
if !strings.HasPrefix(k, "engine_data-") {
continue
}
parts := strings.SplitN(k, "-", 3) // engine_data-<engine>-<key>
if len(parts) != 3 {
continue
}
engine := parts[1]
key := parts[2]
// For HTML forms, r.Form[k] can contain multiple values; keep first.
val := ""
if len(v) > 0 {
val = v[0]
}
if _, ok := engineData[engine]; !ok {
engineData[engine] = map[string]string{}
}
engineData[engine][key] = val
}
accessToken := parseAccessToken(r)
return SearchRequest{
Format: OutputFormat(format),
Query: q,
Pageno: pageno,
Safesearch: safesearch,
TimeRange: timeRange,
TimeoutLimit: timeoutLimit,
Language: language,
Engines: engines,
Categories: categories,
EngineData: engineData,
AccessToken: accessToken,
}, nil
}
func splitCSV(s string) []string {
if s == "" {
return nil
}
raw := strings.Split(s, ",")
out := make([]string, 0, len(raw))
for _, item := range raw {
item = strings.TrimSpace(item)
if item == "" {
continue
}
out = append(out, item)
}
return out
}
func parseAccessToken(r *http.Request) string {
// Supported sources (first non-empty wins):
// - `Authorization: Bearer <token>`
// - `X-Search-Token` / `X-Brave-Access-Token`
// - `token` form value
if auth := r.Header.Get("Authorization"); auth != "" {
const prefix = "Bearer "
if len(auth) > len(prefix) && auth[:len(prefix)] == prefix {
return strings.TrimSpace(auth[len(prefix):])
}
}
if v := strings.TrimSpace(r.Header.Get("X-Search-Token")); v != "" {
return v
}
if v := strings.TrimSpace(r.Header.Get("X-Brave-Access-Token")); v != "" {
return v
}
if v := strings.TrimSpace(r.FormValue("token")); v != "" {
return v
}
return ""
}

View file

@ -0,0 +1,74 @@
package search
import (
"net/http"
"net/http/httptest"
"net/url"
"strings"
"testing"
)
func TestParseSearchRequest_MissingQ(t *testing.T) {
r := httptest.NewRequest(http.MethodGet, "/search?format=json", nil)
_, err := ParseSearchRequest(r)
if err == nil {
t.Fatalf("expected error, got nil")
}
}
func TestParseSearchRequest_InvalidPageno(t *testing.T) {
r := httptest.NewRequest(http.MethodGet, "/search?q=hi&pageno=0", nil)
_, err := ParseSearchRequest(r)
if err == nil {
t.Fatalf("expected error for pageno, got nil")
}
}
func TestParseSearchRequest_InvalidLanguage(t *testing.T) {
r := httptest.NewRequest(http.MethodGet, "/search?q=hi&language=bad!", nil)
_, err := ParseSearchRequest(r)
if err == nil {
t.Fatalf("expected error for language, got nil")
}
}
func TestParseSearchRequest_CategoriesAndEngineData(t *testing.T) {
values := url.Values{}
values.Set("q", "hello")
values.Set("format", "json")
values.Set("categories", "general,science")
values.Set("category_science", "off")
values.Set("engines", "wikipedia,arxiv")
values.Set("engine_data-wikipedia-timeout", "123")
r := httptest.NewRequest(http.MethodPost, "/search", strings.NewReader(values.Encode()))
r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req, err := ParseSearchRequest(r)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
// categories should drop `science` due to category_science=off
wantCats := map[string]bool{"general": true}
gotCats := map[string]bool{}
for _, c := range req.Categories {
gotCats[c] = true
}
for c := range wantCats {
if !gotCats[c] {
t.Fatalf("expected category %q in result, got %v", c, req.Categories)
}
}
if gotCats["science"] {
t.Fatalf("expected category science to be removed, got %v", req.Categories)
}
if len(req.Engines) != 2 {
t.Fatalf("expected 2 engines, got %v", req.Engines)
}
if req.EngineData["wikipedia"]["timeout"] != "123" {
t.Fatalf("expected engine_data parsed, got %#v", req.EngineData)
}
}

223
internal/search/response.go Normal file
View file

@ -0,0 +1,223 @@
package search
import (
"bytes"
"encoding/csv"
"encoding/json"
"fmt"
"net/http"
"net/url"
"encoding/xml"
"strconv"
"strings"
)
func WriteSearchResponse(w http.ResponseWriter, format OutputFormat, resp SearchResponse) error {
switch format {
case FormatJSON:
w.Header().Set("Content-Type", "application/json; charset=utf-8")
return json.NewEncoder(w).Encode(resp)
case FormatCSV:
w.Header().Set("Content-Type", "text/csv; charset=utf-8")
if err := writeCSV(w, resp); err != nil {
return err
}
return nil
case FormatRSS:
w.Header().Set("Content-Type", "text/xml; charset=utf-8")
if err := writeRSS(w, resp); err != nil {
return err
}
return nil
case FormatHTML:
w.WriteHeader(http.StatusNotImplemented)
_, _ = w.Write([]byte("format=html not implemented yet"))
return nil
default:
return fmt.Errorf("unsupported format: %s", format)
}
}
// csvRowHeader matches the SearXNG CSV writer key order.
var csvRowHeader = []string{"title", "url", "content", "host", "engine", "score", "type"}
func writeCSV(w http.ResponseWriter, resp SearchResponse) error {
cw := csv.NewWriter(w)
defer cw.Flush()
if err := cw.Write(csvRowHeader); err != nil {
return err
}
for _, r := range resp.Results {
urlStr := ""
if r.URL != nil {
urlStr = *r.URL
}
host := hostFromURL(urlStr)
scoreStr := strconv.FormatFloat(r.Score, 'f', -1, 64)
row := []string{
r.Title,
urlStr,
r.Content,
host,
r.Engine,
scoreStr,
"result",
}
if err := cw.Write(row); err != nil {
return err
}
}
for _, ans := range resp.Answers {
title := asString(ans["title"])
urlStr := asString(ans["url"])
content := asString(ans["content"])
engine := asString(ans["engine"])
scoreStr := scoreString(ans["score"])
host := hostFromURL(urlStr)
row := []string{
title,
urlStr,
content,
host,
engine,
scoreStr,
"answer",
}
if err := cw.Write(row); err != nil {
return err
}
}
for _, s := range resp.Suggestions {
row := []string{s, "", "", "", "", "", "suggestion"}
if err := cw.Write(row); err != nil {
return err
}
}
for _, c := range resp.Corrections {
row := []string{c, "", "", "", "", "", "correction"}
if err := cw.Write(row); err != nil {
return err
}
}
return nil
}
func writeRSS(w http.ResponseWriter, resp SearchResponse) error {
q := resp.Query
escapedTitle := xmlEscape("SearXNG search: " + q)
escapedDesc := xmlEscape("Search results for \"" + q + "\" - SearXNG")
escapedQueryTerms := xmlEscape(q)
link := "/search?q=" + url.QueryEscape(q)
opensearchQuery := fmt.Sprintf(`<opensearch:Query role="request" searchTerms="%s" startPage="1" />`, escapedQueryTerms)
// SearXNG template uses the number of results for both totalResults and itemsPerPage.
nr := resp.NumberOfResults
var items bytes.Buffer
for _, r := range resp.Results {
title := xmlEscape(r.Title)
urlStr := ""
if r.URL != nil {
urlStr = *r.URL
}
linkEsc := xmlEscape(urlStr)
desc := xmlEscape(r.Content)
pub := ""
if r.Pubdate != nil && strings.TrimSpace(*r.Pubdate) != "" {
pub = "<pubDate>" + xmlEscape(*r.Pubdate) + "</pubDate>"
}
items.WriteString(
fmt.Sprintf(
`<item><title>%s</title><type>result</type><link>%s</link><description>%s</description>%s</item>`,
title,
linkEsc,
desc,
pub,
),
)
}
xml := fmt.Sprintf(
`<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet href="/rss.xsl" type="text/xsl"?>
<rss version="2.0"
xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/"
xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>%s</title>
<link>%s</link>
<description>%s</description>
<opensearch:totalResults>%d</opensearch:totalResults>
<opensearch:startIndex>1</opensearch:startIndex>
<opensearch:itemsPerPage>%d</opensearch:itemsPerPage>
<atom:link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml"/>
%s
%s
</channel>
</rss>
`,
escapedTitle,
xmlEscape(link),
escapedDesc,
nr,
nr,
opensearchQuery,
items.String(),
)
_, err := w.Write([]byte(xml))
return err
}
func xmlEscape(s string) string {
var b bytes.Buffer
_ = xml.EscapeText(&b, []byte(s))
return b.String()
}
func hostFromURL(urlStr string) string {
if strings.TrimSpace(urlStr) == "" {
return ""
}
u, err := url.Parse(urlStr)
if err != nil {
return ""
}
return u.Host
}
func asString(v any) string {
s, _ := v.(string)
return s
}
func scoreString(v any) string {
switch t := v.(type) {
case float64:
return strconv.FormatFloat(t, 'f', -1, 64)
case float32:
return strconv.FormatFloat(float64(t), 'f', -1, 64)
case int:
return strconv.Itoa(t)
case int64:
return strconv.FormatInt(t, 10)
case json.Number:
if f, err := t.Float64(); err == nil {
return strconv.FormatFloat(f, 'f', -1, 64)
}
return ""
default:
return ""
}
}

111
internal/search/service.go Normal file
View file

@ -0,0 +1,111 @@
package search
import (
"context"
"net/http"
"time"
"github.com/ashie/gosearch/internal/engines"
"github.com/ashie/gosearch/internal/contracts"
"github.com/ashie/gosearch/internal/upstream"
)
type ServiceConfig struct {
UpstreamURL string
HTTPTimeout time.Duration
}
type Service struct {
upstreamClient *upstream.Client
planner *engines.Planner
localEngines map[string]engines.Engine
}
func NewService(cfg ServiceConfig) *Service {
timeout := cfg.HTTPTimeout
if timeout <= 0 {
timeout = 10 * time.Second
}
httpClient := &http.Client{Timeout: timeout}
var up *upstream.Client
if cfg.UpstreamURL != "" {
c, err := upstream.NewClient(cfg.UpstreamURL, timeout)
if err == nil {
up = c
}
}
return &Service{
upstreamClient: up,
planner: engines.NewPlannerFromEnv(),
localEngines: engines.NewDefaultPortedEngines(httpClient),
}
}
func (s *Service) Search(ctx context.Context, req SearchRequest) (SearchResponse, error) {
localEngines, upstreamEngines, _ := s.planner.Plan(req)
responses := make([]contracts.SearchResponse, 0, 2)
upstreamSet := map[string]bool{}
for _, e := range upstreamEngines {
upstreamSet[e] = true
}
for _, engineName := range localEngines {
eng, ok := s.localEngines[engineName]
if !ok {
continue
}
r, err := eng.Search(ctx, req)
if err != nil {
// MVP: fail fast so the client sees a real error.
return SearchResponse{}, err
}
responses = append(responses, r)
// Some engines (notably qwant due to anti-bot protections) can return
// zero local results depending on client/IP. If upstream SearXNG is
// configured, let it attempt the same engine as a fallback.
if shouldFallbackToUpstream(engineName, r) && !upstreamSet[engineName] {
upstreamEngines = append(upstreamEngines, engineName)
upstreamSet[engineName] = true
}
}
if s.upstreamClient != nil && len(upstreamEngines) > 0 {
r, err := s.upstreamClient.SearchJSON(ctx, req, upstreamEngines)
if err != nil {
return SearchResponse{}, err
}
responses = append(responses, r)
}
if len(responses) == 0 {
return SearchResponse{
Query: req.Query,
NumberOfResults: 0,
Results: []MainResult{},
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
merged := MergeResponses(responses)
if merged.Query == "" {
merged.Query = req.Query
}
return merged, nil
}
func shouldFallbackToUpstream(engineName string, r contracts.SearchResponse) bool {
if engineName != "qwant" {
return false
}
return len(r.Results) == 0 && len(r.Answers) == 0 && len(r.Infoboxes) == 0
}

20
internal/search/types.go Normal file
View file

@ -0,0 +1,20 @@
package search
import "github.com/ashie/gosearch/internal/contracts"
// Re-export the JSON contract types so the rest of the code can stay in the
// `internal/search` namespace without creating an import cycle.
type OutputFormat = contracts.OutputFormat
const (
FormatHTML = contracts.FormatHTML // accepted for compatibility (not yet implemented)
FormatJSON = contracts.FormatJSON
FormatCSV = contracts.FormatCSV
FormatRSS = contracts.FormatRSS
)
type SearchRequest = contracts.SearchRequest
type SearchResponse = contracts.SearchResponse
type MainResult = contracts.MainResult