feat: build Go-based SearXNG-compatible search service

Implement an API-first Go rewrite with local engine adapters, upstream fallback, and Nix-based tooling so searches can run without matching the original UI while preserving response compatibility.

Made-with: Cursor
This commit is contained in:
Franz Kafka 2026-03-20 20:34:08 +01:00
parent 7783367c71
commit dc44837219
32 changed files with 3330 additions and 0 deletions

View file

@ -0,0 +1,193 @@
package contracts
import (
"bytes"
"encoding/json"
)
// MainResult represents one element of SearXNG's `results` array.
//
// SearXNG returns many additional keys beyond what templates use. To keep the
// contract stable for proxying/merging, we preserve all unknown keys in
// `raw` and re-emit them via MarshalJSON.
type MainResult struct {
raw map[string]any
// Common fields used by SearXNG templates (RSS uses: title, url, content, pubdate).
Template string `json:"template"`
Title string `json:"title"`
Content string `json:"content"`
URL *string `json:"url"`
Pubdate *string `json:"pubdate"`
Engine string `json:"engine"`
Score float64 `json:"score"`
Category string `json:"category"`
Priority string `json:"priority"`
Positions []int `json:"positions"`
Engines []string `json:"engines"`
// These fields exist in SearXNG's MainResult base; keep them so downstream
// callers can generate richer output later.
OpenGroup bool `json:"open_group"`
CloseGroup bool `json:"close_group"`
// parsed_url in SearXNG is emitted as a tuple; we preserve it as-is.
ParsedURL any `json:"parsed_url"`
}
func (mr *MainResult) UnmarshalJSON(data []byte) error {
// Preserve the full object.
dec := json.NewDecoder(bytes.NewReader(data))
dec.UseNumber()
var m map[string]any
if err := dec.Decode(&m); err != nil {
return err
}
mr.raw = m
// Fill the typed/common fields (best-effort; don't fail if types differ).
mr.Template = stringOrEmpty(m["template"])
mr.Title = stringOrEmpty(m["title"])
mr.Content = stringOrEmpty(m["content"])
mr.Engine = stringOrEmpty(m["engine"])
mr.Category = stringOrEmpty(m["category"])
mr.Priority = stringOrEmpty(m["priority"])
if s, ok := stringOrNullable(m["url"]); ok {
mr.URL = &s
}
if s, ok := stringOrNullable(m["pubdate"]); ok {
mr.Pubdate = &s
}
mr.Score = floatOrZero(m["score"])
if v, ok := sliceOfStrings(m["engines"]); ok {
mr.Engines = v
}
if v, ok := sliceOfInts(m["positions"]); ok {
mr.Positions = v
}
if v, ok := boolOrFalse(m["open_group"]); ok {
mr.OpenGroup = v
}
if v, ok := boolOrFalse(m["close_group"]); ok {
mr.CloseGroup = v
}
mr.ParsedURL = m["parsed_url"]
return nil
}
func (mr MainResult) MarshalJSON() ([]byte, error) {
// If we came from upstream JSON, preserve all keys exactly.
if mr.raw != nil {
return json.Marshal(mr.raw)
}
// Otherwise, marshal the known fields.
m := map[string]any{
"template": mr.Template,
"title": mr.Title,
"content": mr.Content,
"url": mr.URL,
"pubdate": mr.Pubdate,
"engine": mr.Engine,
"score": mr.Score,
"category": mr.Category,
"priority": mr.Priority,
"positions": mr.Positions,
"engines": mr.Engines,
"open_group": mr.OpenGroup,
"close_group": mr.CloseGroup,
"parsed_url": mr.ParsedURL,
}
return json.Marshal(m)
}
func stringOrEmpty(v any) string {
s, _ := v.(string)
return s
}
func stringOrNullable(v any) (string, bool) {
if v == nil {
return "", false
}
s, ok := v.(string)
return s, ok
}
func floatOrZero(v any) float64 {
switch t := v.(type) {
case float64:
return t
case float32:
return float64(t)
case int:
return float64(t)
case int64:
return float64(t)
case json.Number:
f, _ := t.Float64()
return f
default:
return 0
}
}
func boolOrFalse(v any) (bool, bool) {
b, ok := v.(bool)
if !ok {
return false, false
}
return b, true
}
func sliceOfStrings(v any) ([]string, bool) {
raw, ok := v.([]any)
if !ok {
return nil, false
}
out := make([]string, 0, len(raw))
for _, item := range raw {
s, ok := item.(string)
if !ok {
return nil, false
}
out = append(out, s)
}
return out, true
}
func sliceOfInts(v any) ([]int, bool) {
raw, ok := v.([]any)
if !ok {
return nil, false
}
out := make([]int, 0, len(raw))
for _, item := range raw {
switch t := item.(type) {
case float64:
out = append(out, int(t))
case int:
out = append(out, t)
case json.Number:
i64, err := t.Int64()
if err != nil {
return nil, false
}
out = append(out, int(i64))
default:
return nil, false
}
}
return out, true
}

View file

@ -0,0 +1,50 @@
package contracts
// OutputFormat matches SearXNG's `/search?format=...` values.
type OutputFormat string
const (
FormatHTML OutputFormat = "html" // accepted for compatibility (not yet implemented)
FormatJSON OutputFormat = "json"
FormatCSV OutputFormat = "csv"
FormatRSS OutputFormat = "rss"
)
type SearchRequest struct {
// Format is what the client requested via `format=...`.
Format OutputFormat
Query string
Pageno int
Safesearch int
TimeRange *string
TimeoutLimit *float64
Language string
// Engines and categories are used for deciding which engines run locally vs are proxied.
// For now, engines can be supplied directly via the `engines` form parameter.
Engines []string
Categories []string
// EngineData matches SearXNG's `engine_data-<engine>-<key>=<value>` parameters.
EngineData map[string]map[string]string
// AccessToken is an optional request token used to gate paid/limited engines.
// It is not part of the upstream JSON schema; it only influences local engines.
AccessToken string
}
// SearchResponse matches the JSON schema returned by SearXNG's `webutils.get_json_response()`.
type SearchResponse struct {
Query string `json:"query"`
NumberOfResults int `json:"number_of_results"`
Results []MainResult `json:"results"`
Answers []map[string]any `json:"answers"`
Corrections []string `json:"corrections"`
Infoboxes []map[string]any `json:"infoboxes"`
Suggestions []string `json:"suggestions"`
UnresponsiveEngines [][2]string `json:"unresponsive_engines"`
}

191
internal/engines/arxiv.go Normal file
View file

@ -0,0 +1,191 @@
package engines
import (
"bytes"
"context"
"encoding/xml"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/ashie/gosearch/internal/contracts"
)
const (
arxivSearchPrefix = "all"
arxivMaxResults = 10
)
type ArxivEngine struct {
client *http.Client
}
func (e *ArxivEngine) Name() string { return "arxiv" }
func (e *ArxivEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("arxiv engine not initialized")
}
q := strings.TrimSpace(req.Query)
if q == "" {
return contracts.SearchResponse{Query: req.Query}, nil
}
start := (req.Pageno - 1) * arxivMaxResults
if start < 0 {
start = 0
}
args := url.Values{}
args.Set("search_query", fmt.Sprintf("%s:%s", arxivSearchPrefix, q))
args.Set("start", fmt.Sprintf("%d", start))
args.Set("max_results", fmt.Sprintf("%d", arxivMaxResults))
endpoint := "https://export.arxiv.org/api/query?" + args.Encode()
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return contracts.SearchResponse{}, err
}
resp, err := e.client.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
return contracts.SearchResponse{}, fmt.Errorf("arxiv upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
raw, err := io.ReadAll(resp.Body)
if err != nil {
return contracts.SearchResponse{}, err
}
results, err := parseArxivAtom(raw)
if err != nil {
return contracts.SearchResponse{}, err
}
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: len(results),
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
type arxivEntry struct {
Title string
ID string
Summary string
Published string
}
func parseArxivAtom(xmlBytes []byte) ([]contracts.MainResult, error) {
dec := xml.NewDecoder(bytes.NewReader(xmlBytes))
var entries []arxivEntry
var cur *arxivEntry
for {
tok, err := dec.Token()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
switch t := tok.(type) {
case xml.StartElement:
switch strings.ToLower(t.Name.Local) {
case "entry":
cur = &arxivEntry{}
case "title":
if cur != nil {
var v string
if err := dec.DecodeElement(&v, &t); err == nil {
cur.Title = strings.TrimSpace(v)
}
}
case "id":
if cur != nil {
var v string
if err := dec.DecodeElement(&v, &t); err == nil {
cur.ID = strings.TrimSpace(v)
}
}
case "summary":
if cur != nil {
var v string
if err := dec.DecodeElement(&v, &t); err == nil {
cur.Summary = strings.TrimSpace(v)
}
}
case "published":
if cur != nil {
var v string
if err := dec.DecodeElement(&v, &t); err == nil {
cur.Published = strings.TrimSpace(v)
}
}
}
case xml.EndElement:
if strings.ToLower(t.Name.Local) == "entry" && cur != nil {
if cur.Title != "" && cur.ID != "" {
entries = append(entries, *cur)
}
cur = nil
}
}
}
out := make([]contracts.MainResult, 0, len(entries))
for _, e := range entries {
urlPtr := e.ID
content := e.Summary
pubdate := parseArxivPublished(e.Published)
out = append(out, contracts.MainResult{
Template: "default.html",
Title: e.Title,
Content: content,
URL: &urlPtr,
Pubdate: pubdate,
Engine: "arxiv",
Category: "science",
Score: 0,
Positions: nil,
Engines: []string{"arxiv"},
})
}
return out, nil
}
func parseArxivPublished(s string) *string {
s = strings.TrimSpace(s)
if s == "" {
return nil
}
// ArXiv uses RFC3339 like "2024-06-03T00:00:00Z".
t, err := time.Parse(time.RFC3339, s)
if err != nil {
return nil
}
formatted := t.Format("2006-01-02 15:04:05-0700")
return &formatted
}

View file

@ -0,0 +1,66 @@
package engines
import (
"context"
"net/http"
"strings"
"testing"
"github.com/ashie/gosearch/internal/contracts"
)
func TestArxivEngine_Search(t *testing.T) {
transport := roundTripperFunc(func(r *http.Request) (*http.Response, error) {
if r.Method != http.MethodGet {
return httpResponse(http.StatusMethodNotAllowed, "", ""), nil
}
if r.URL.Host != "export.arxiv.org" || r.URL.Path != "/api/query" {
return httpResponse(http.StatusNotFound, "", ""), nil
}
q := r.URL.Query().Get("search_query")
if q != "all:quantum" {
return httpResponse(http.StatusBadRequest, "", ""), nil
}
atom := `<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<title>Quantum Test</title>
<id>http://arxiv.org/abs/1234.5678</id>
<summary>Abstract here</summary>
<published>2024-06-03T00:00:00Z</published>
</entry>
</feed>`
return httpResponse(http.StatusOK, atom, "application/atom+xml"), nil
})
client := &http.Client{Transport: transport}
engine := &ArxivEngine{client: client}
resp, err := engine.Search(context.Background(), contracts.SearchRequest{
Query: "quantum",
Pageno: 1,
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(resp.Results) != 1 {
t.Fatalf("expected 1 result, got %d", len(resp.Results))
}
r := resp.Results[0]
if r.Title != "Quantum Test" {
t.Fatalf("unexpected title: %q", r.Title)
}
if r.Content != "Abstract here" {
t.Fatalf("unexpected content: %q", r.Content)
}
if r.URL == nil || !strings.Contains(*r.URL, "1234.5678") {
t.Fatalf("unexpected url: %v", r.URL)
}
if r.Pubdate == nil || !strings.Contains(*r.Pubdate, "2024-06-03") {
t.Fatalf("expected pubdate around 2024-06-03, got %v", r.Pubdate)
}
}

View file

@ -0,0 +1,195 @@
package engines
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/ashie/gosearch/internal/contracts"
)
// BraveEngine implements the SearXNG `braveapi` engine (Brave Web Search API).
//
// Config / gating:
// - BRAVE_API_KEY: required to call Brave
// - BRAVE_ACCESS_TOKEN (optional): if set, the request must include a token
// that matches the env var (via Authorization Bearer, X-Search-Token,
// X-Brave-Access-Token, or form field `token`).
type BraveEngine struct {
client *http.Client
apiKey string
accessGateToken string
resultsPerPage int
}
func (e *BraveEngine) Name() string { return "braveapi" }
func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("brave engine not initialized")
}
// Gate / config checks should not be treated as fatal errors; SearXNG
// treats misconfigured engines as unresponsive.
if strings.TrimSpace(e.apiKey) == "" {
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: 0,
Results: []contracts.MainResult{},
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{{e.Name(), "missing_api_key"}},
}, nil
}
if gate := strings.TrimSpace(e.accessGateToken); gate != "" {
if strings.TrimSpace(req.AccessToken) == "" || req.AccessToken != gate {
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: 0,
Results: []contracts.MainResult{},
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{{e.Name(), "unauthorized"}},
}, nil
}
}
q := strings.TrimSpace(req.Query)
if q == "" {
return contracts.SearchResponse{Query: req.Query}, nil
}
offset := 0
if req.Pageno > 1 {
offset = (req.Pageno - 1) * e.resultsPerPage
}
args := url.Values{}
args.Set("q", q)
args.Set("count", fmt.Sprintf("%d", e.resultsPerPage))
args.Set("offset", fmt.Sprintf("%d", offset))
if req.TimeRange != nil {
switch *req.TimeRange {
case "day":
args.Set("time_range", "past_day")
case "week":
args.Set("time_range", "past_week")
case "month":
args.Set("time_range", "past_month")
case "year":
args.Set("time_range", "past_year")
}
}
// SearXNG's python checks `if params["safesearch"]:` which treats any
// non-zero (moderate/strict) as strict.
if req.Safesearch > 0 {
args.Set("safesearch", "strict")
}
endpoint := "https://api.search.brave.com/res/v1/web/search?" + args.Encode()
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return contracts.SearchResponse{}, err
}
httpReq.Header.Set("X-Subscription-Token", e.apiKey)
resp, err := e.client.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
return contracts.SearchResponse{}, fmt.Errorf("brave upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
var api struct {
Web struct {
Results []struct {
URL string `json:"url"`
Title string `json:"title"`
Description string `json:"description"`
Age string `json:"age"`
Thumbnail struct {
Src string `json:"src"`
} `json:"thumbnail"`
} `json:"results"`
} `json:"web"`
}
if err := json.NewDecoder(resp.Body).Decode(&api); err != nil {
return contracts.SearchResponse{}, err
}
results := make([]contracts.MainResult, 0, len(api.Web.Results))
for _, r := range api.Web.Results {
urlPtr := strings.TrimSpace(r.URL)
if urlPtr == "" {
continue
}
pub := parseBraveAge(r.Age)
results = append(results, contracts.MainResult{
Template: "default.html",
Title: r.Title,
Content: r.Description,
URL: &urlPtr,
Pubdate: pub,
Engine: e.Name(),
Score: 0,
Category: "general",
Priority: "",
Positions: nil,
Engines: []string{e.Name()},
})
}
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: len(results),
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
func parseBraveAge(ageRaw string) *string {
ageRaw = strings.TrimSpace(ageRaw)
if ageRaw == "" {
return nil
}
// Brave sometimes returns RFC3339-like timestamps for `age`.
layouts := []string{
time.RFC3339Nano,
time.RFC3339,
"2006-01-02T15:04:05Z07:00",
"2006-01-02",
}
for _, layout := range layouts {
if t, err := time.Parse(layout, ageRaw); err == nil {
s := t.Format("2006-01-02 15:04:05-0700")
return &s
}
}
return nil
}

View file

@ -0,0 +1,92 @@
package engines
import (
"context"
"net/http"
"testing"
"github.com/ashie/gosearch/internal/contracts"
)
func TestBraveEngine_GatingAndHeader(t *testing.T) {
wantToken := "letmein"
wantAPIKey := "api-key"
transport := roundTripperFunc(func(r *http.Request) (*http.Response, error) {
if r.Header.Get("X-Subscription-Token") != wantAPIKey {
t.Fatalf("missing/incorrect X-Subscription-Token header: got %q", r.Header.Get("X-Subscription-Token"))
}
if r.URL.Host != "api.search.brave.com" {
t.Fatalf("unexpected host: %s", r.URL.Host)
}
if r.URL.Path != "/res/v1/web/search" {
t.Fatalf("unexpected path: %s", r.URL.Path)
}
// basic query assertions
q := r.URL.Query().Get("q")
if q != "hugo" {
t.Fatalf("unexpected q: %q", q)
}
body := `{
"web": {
"results": [
{"url":"https://example.com/a","title":"A","description":"B","age":"2024-06-03T00:00:00Z","thumbnail":{"src":"x"}}
]
}
}`
return httpResponse(http.StatusOK, body, "application/json"), nil
})
client := &http.Client{Transport: transport}
engine := &BraveEngine{
client: client,
apiKey: wantAPIKey,
accessGateToken: wantToken,
resultsPerPage: 20,
}
// Wrong token => no upstream call / unresponsive engine.
{
resp, err := engine.Search(context.Background(), contracts.SearchRequest{
Query: "hugo",
Pageno: 1,
Safesearch: 0,
Language: "en",
AccessToken: "wrong",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(resp.Results) != 0 {
t.Fatalf("expected no results on unauthorized, got %d", len(resp.Results))
}
if len(resp.UnresponsiveEngines) != 1 {
t.Fatalf("expected 1 unresponsive engine entry, got %v", resp.UnresponsiveEngines)
}
}
// Correct token => upstream call.
{
resp, err := engine.Search(context.Background(), contracts.SearchRequest{
Query: "hugo",
Pageno: 1,
Safesearch: 0,
Language: "en",
AccessToken: wantToken,
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(resp.Results) != 1 {
t.Fatalf("expected 1 result, got %d", len(resp.Results))
}
if resp.Results[0].Title != "A" {
t.Fatalf("unexpected title: %q", resp.Results[0].Title)
}
if resp.Results[0].URL == nil || *resp.Results[0].URL != "https://example.com/a" {
t.Fatalf("unexpected url: %v", resp.Results[0].URL)
}
}
}

View file

@ -0,0 +1,144 @@
package engines
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/ashie/gosearch/internal/contracts"
)
type CrossrefEngine struct {
client *http.Client
}
func (e *CrossrefEngine) Name() string { return "crossref" }
func (e *CrossrefEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("crossref engine not initialized")
}
q := strings.TrimSpace(req.Query)
if q == "" {
return contracts.SearchResponse{Query: req.Query}, nil
}
offset := 20 * (req.Pageno - 1)
args := url.Values{}
args.Set("query", q)
args.Set("offset", fmt.Sprintf("%d", offset))
endpoint := "https://api.crossref.org/works?" + args.Encode()
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return contracts.SearchResponse{}, err
}
resp, err := e.client.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
return contracts.SearchResponse{}, fmt.Errorf("crossref upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
var api struct {
Message struct {
Items []crossrefItem `json:"items"`
} `json:"message"`
}
if err := json.NewDecoder(resp.Body).Decode(&api); err != nil {
return contracts.SearchResponse{}, err
}
results := make([]contracts.MainResult, 0, len(api.Message.Items))
for _, item := range api.Message.Items {
title := ""
if len(item.Title) > 0 {
title = strings.TrimSpace(item.Title[0])
}
content := strings.TrimSpace(item.Abstract)
urlStr := strings.TrimSpace(item.URL)
if urlStr == "" {
urlStr = strings.TrimSpace(item.DOI)
}
pub := parseCrossrefDateParts(item.Published.DateParts)
urlPtr := urlStr
results = append(results, contracts.MainResult{
Template: "default.html",
Title: title,
Content: content,
URL: &urlPtr,
Pubdate: pub,
Engine: "crossref",
Score: 0,
Category: "science",
Priority: "",
Positions: nil,
Engines: []string{"crossref"},
})
}
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: len(results),
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
type crossrefItem struct {
Type string `json:"type"`
Title []string `json:"title"`
URL string `json:"URL"`
DOI string `json:"DOI"`
Abstract string `json:"abstract"`
Page string `json:"page"`
Publisher string `json:"publisher"`
Subject []string `json:"subject"`
Published crossrefPublished `json:"published"`
}
type crossrefPublished struct {
DateParts [][]int `json:"date-parts"`
}
func parseCrossrefDateParts(parts [][]int) *string {
if len(parts) == 0 || len(parts[0]) == 0 {
return nil
}
dp := parts[0]
year := dp[0]
month := 1
day := 1
if len(dp) >= 2 {
month = dp[1]
}
if len(dp) >= 3 {
day = dp[2]
}
t := time.Date(year, time.Month(month), day, 0, 0, 0, 0, time.UTC)
formatted := t.Format("2006-01-02 00:00:00+0000")
return &formatted
}

View file

@ -0,0 +1,71 @@
package engines
import (
"context"
"net/http"
"testing"
"github.com/ashie/gosearch/internal/contracts"
)
func TestCrossrefEngine_Search(t *testing.T) {
transport := roundTripperFunc(func(r *http.Request) (*http.Response, error) {
if r.Method != http.MethodGet {
return httpResponse(http.StatusMethodNotAllowed, "", ""), nil
}
if r.URL.Host != "api.crossref.org" || r.URL.Path != "/works" {
return httpResponse(http.StatusNotFound, "", ""), nil
}
q := r.URL.Query().Get("query")
if q != "hugo" {
return httpResponse(http.StatusBadRequest, "", ""), nil
}
body := `{
"message": {
"items": [
{
"type": "journal-article",
"title": ["Paper B"],
"URL": "https://example.com/paperb",
"abstract": "Abstract B",
"DOI": "10.1234/b",
"published": {
"date-parts": [[2020, 5, 1]]
}
}
]
}
}`
return httpResponse(http.StatusOK, body, "application/json"), nil
})
client := &http.Client{Transport: transport}
engine := &CrossrefEngine{client: client}
resp, err := engine.Search(context.Background(), contracts.SearchRequest{
Query: "hugo",
Pageno: 1,
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(resp.Results) != 1 {
t.Fatalf("expected 1 result, got %d", len(resp.Results))
}
r := resp.Results[0]
if r.Title != "Paper B" {
t.Fatalf("expected title Paper B, got %q", r.Title)
}
if r.Content != "Abstract B" {
t.Fatalf("expected content, got %q", r.Content)
}
if r.Pubdate == nil || *r.Pubdate == "" {
t.Fatalf("expected pubdate, got nil/empty")
}
if r.Engine != "crossref" {
t.Fatalf("expected engine crossref, got %q", r.Engine)
}
}

View file

@ -0,0 +1,17 @@
package engines
import (
"context"
"github.com/ashie/gosearch/internal/contracts"
)
// Engine is a Go-native implementation of a SearXNG engine.
//
// Implementations should return a SearchResponse containing only the results
// for that engine subset; the caller will merge multiple engine responses.
type Engine interface {
Name() string
Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error)
}

View file

@ -0,0 +1,33 @@
package engines
import (
"net/http"
"os"
"time"
)
// NewDefaultPortedEngines returns the starter set of Go-native engines.
// The service can swap/extend this registry later as more engines are ported.
func NewDefaultPortedEngines(client *http.Client) map[string]Engine {
if client == nil {
client = &http.Client{Timeout: 10 * time.Second}
}
return map[string]Engine{
"wikipedia": &WikipediaEngine{client: client},
"arxiv": &ArxivEngine{client: client},
"crossref": &CrossrefEngine{client: client},
"braveapi": &BraveEngine{
client: client,
apiKey: os.Getenv("BRAVE_API_KEY"),
accessGateToken: os.Getenv("BRAVE_ACCESS_TOKEN"),
resultsPerPage: 20,
},
"qwant": &QwantEngine{
client: client,
category: "web-lite",
resultsPerPage: 10,
},
}
}

View file

@ -0,0 +1,26 @@
package engines
import (
"io"
"net/http"
"strings"
)
type roundTripperFunc func(*http.Request) (*http.Response, error)
func (f roundTripperFunc) RoundTrip(r *http.Request) (*http.Response, error) {
return f(r)
}
func httpResponse(status int, body string, contentType string) *http.Response {
h := make(http.Header)
if contentType != "" {
h.Set("Content-Type", contentType)
}
return &http.Response{
StatusCode: status,
Header: h,
Body: io.NopCloser(strings.NewReader(body)),
}
}

148
internal/engines/planner.go Normal file
View file

@ -0,0 +1,148 @@
package engines
import (
"os"
"strings"
"github.com/ashie/gosearch/internal/contracts"
)
var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant"}
type Planner struct {
PortedSet map[string]bool
PortedList []string
}
func NewPlannerFromEnv() *Planner {
raw := strings.TrimSpace(os.Getenv("LOCAL_PORTED_ENGINES"))
if raw == "" {
return NewPlanner(defaultPortedEngines)
}
parts := splitCSV(raw)
if len(parts) == 0 {
return NewPlanner(defaultPortedEngines)
}
return NewPlanner(parts)
}
func NewPlanner(portedEngines []string) *Planner {
set := make(map[string]bool, len(portedEngines))
out := make([]string, 0, len(portedEngines))
for _, e := range portedEngines {
e = strings.TrimSpace(strings.ToLower(e))
if e == "" {
continue
}
if set[e] {
continue
}
set[e] = true
out = append(out, e)
}
return &Planner{
PortedSet: set,
PortedList: out,
}
}
// Plan returns:
// - localEngines: engines that are configured as ported for this service
// - upstreamEngines: engines that should be executed by upstream SearXNG
// - requestedEngines: the (possibly inferred) requested engines list
//
// If the request provides an explicit `engines` parameter, we use it.
// Otherwise we infer a small subset from `categories` for the starter set.
func (p *Planner) Plan(req contracts.SearchRequest) (localEngines, upstreamEngines, requestedEngines []string) {
if p == nil {
p = NewPlannerFromEnv()
}
requestedEngines = nil
if len(req.Engines) > 0 {
requestedEngines = normalizeList(req.Engines)
} else {
requestedEngines = inferFromCategories(req.Categories)
}
localEngines = make([]string, 0, len(requestedEngines))
upstreamEngines = make([]string, 0, len(requestedEngines))
for _, e := range requestedEngines {
if p.PortedSet[e] {
localEngines = append(localEngines, e)
} else {
upstreamEngines = append(upstreamEngines, e)
}
}
return localEngines, upstreamEngines, requestedEngines
}
func inferFromCategories(categories []string) []string {
// Minimal mapping for the initial porting subset.
// This mirrors the idea of selecting from SearXNG categories without
// embedding the whole engine registry.
set := map[string]bool{}
for _, c := range categories {
switch strings.TrimSpace(strings.ToLower(c)) {
case "general":
set["wikipedia"] = true
set["braveapi"] = true
set["qwant"] = true
case "science", "scientific publications":
set["arxiv"] = true
set["crossref"] = true
}
}
out := make([]string, 0, len(set))
for e := range set {
out = append(out, e)
}
// stable order
order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "arxiv": 3, "crossref": 4}
sortByOrder(out, order)
return out
}
func sortByOrder(list []string, order map[string]int) {
// simple insertion sort (list is tiny)
for i := 1; i < len(list); i++ {
j := i
for j > 0 && order[list[j-1]] > order[list[j]] {
list[j-1], list[j] = list[j], list[j-1]
j--
}
}
}
func normalizeList(in []string) []string {
out := make([]string, 0, len(in))
seen := map[string]bool{}
for _, e := range in {
e = strings.TrimSpace(strings.ToLower(e))
if e == "" || seen[e] {
continue
}
seen[e] = true
out = append(out, e)
}
return out
}
func splitCSV(s string) []string {
if s == "" {
return nil
}
parts := strings.Split(s, ",")
out := make([]string, 0, len(parts))
for _, p := range parts {
p = strings.TrimSpace(p)
if p == "" {
continue
}
out = append(out, p)
}
return out
}

467
internal/engines/qwant.go Normal file
View file

@ -0,0 +1,467 @@
package engines
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"github.com/ashie/gosearch/internal/contracts"
"github.com/PuerkitoBio/goquery"
)
// QwantEngine implements a SearXNG-like `qwant` (web) adapter using
// Qwant v3 endpoint: https://api.qwant.com/v3/search/web.
//
// Qwant's API is not fully documented; this mirrors SearXNG's parsing logic
// for the `web` category from `.agent/searxng/searx/engines/qwant.py`.
type QwantEngine struct {
client *http.Client
category string // "web" (JSON API) or "web-lite" (HTML fallback)
resultsPerPage int
}
func (e *QwantEngine) Name() string { return "qwant" }
func (e *QwantEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("qwant engine not initialized")
}
q := strings.TrimSpace(req.Query)
if q == "" {
return contracts.SearchResponse{Query: req.Query}, nil
}
// For API parity we use SearXNG web defaults: count=10, offset=(pageno-1)*count.
// The engine's config field exists so we can expand to news/images/videos later.
count := e.resultsPerPage
if count <= 0 {
count = 10
}
offset := 0
if req.Pageno > 1 {
offset = (req.Pageno - 1) * count
}
mode := strings.TrimSpace(strings.ToLower(e.category))
if mode == "" {
mode = "web"
}
switch mode {
case "web-lite":
return e.searchWebLite(ctx, req)
case "web":
return e.searchWebAPI(ctx, req, count, offset)
default:
// Unknown mode: treat as unresponsive.
return contracts.SearchResponse{
Query: req.Query,
UnresponsiveEngines: [][2]string{
{e.Name(), "unknown_qwant_mode"},
},
Results: []contracts.MainResult{},
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
}, nil
}
}
func (e *QwantEngine) searchWebAPI(ctx context.Context, req contracts.SearchRequest, count, offset int) (contracts.SearchResponse, error) {
qLocale := qwantLocale(req.Language)
args := url.Values{}
args.Set("q", req.Query)
args.Set("count", fmt.Sprintf("%d", count))
args.Set("locale", qLocale)
args.Set("safesearch", fmt.Sprintf("%d", req.Safesearch))
args.Set("llm", "false")
args.Set("tgp", "3")
args.Set("offset", fmt.Sprintf("%d", offset))
endpoint := "https://api.qwant.com/v3/search/web?" + args.Encode()
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return contracts.SearchResponse{}, err
}
httpReq.Header.Set("User-Agent", "gosearch-go/0.1 (+https://github.com/ashie/gosearch)")
resp, err := e.client.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
// Qwant often returns a 403 captcha/JS block for the JSON API.
if resp.StatusCode == http.StatusForbidden {
return contracts.SearchResponse{
Query: req.Query,
UnresponsiveEngines: [][2]string{
{e.Name(), "captcha_or_js_block"},
},
Results: []contracts.MainResult{},
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
}, nil
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
return contracts.SearchResponse{}, fmt.Errorf("qwant upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024))
if err != nil {
return contracts.SearchResponse{}, err
}
var top map[string]any
if err := json.Unmarshal(body, &top); err != nil {
return contracts.SearchResponse{}, err
}
status, _ := top["status"].(string)
if status != "success" {
return contracts.SearchResponse{
Query: req.Query,
UnresponsiveEngines: [][2]string{
{e.Name(), "api_error"},
},
Results: []contracts.MainResult{},
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
}, nil
}
data, _ := top["data"].(map[string]any)
result, _ := data["result"].(map[string]any)
items, _ := result["items"].(map[string]any)
mainline := items["mainline"]
rows := toSlice(mainline)
if len(rows) == 0 {
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: 0,
Results: []contracts.MainResult{},
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
results := make([]contracts.MainResult, 0, len(rows))
for _, row := range rows {
rowMap, ok := row.(map[string]any)
if !ok {
continue
}
rowType, _ := rowMap["type"].(string)
if rowType == "" {
rowType = "web"
}
if rowType != "web" {
continue
}
if rowType == "ads" {
continue
}
rowItems := toSlice(rowMap["items"])
for _, it := range rowItems {
itemMap, ok := it.(map[string]any)
if !ok {
continue
}
title := toString(itemMap["title"])
resURL := toString(itemMap["url"])
desc := toString(itemMap["desc"])
if resURL == "" {
continue
}
urlPtr := resURL
results = append(results, contracts.MainResult{
Template: "default.html",
Title: title,
Content: desc,
URL: &urlPtr,
Engine: e.Name(),
Score: 0,
Category: "general",
Engines: []string{e.Name()},
})
}
}
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: len(results),
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
func (e *QwantEngine) searchWebLite(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
qLocale := qwantLocale(req.Language)
langBase := strings.SplitN(qLocale, "_", 2)[0]
args := url.Values{}
args.Set("q", req.Query)
args.Set("locale", strings.ToLower(qLocale))
args.Set("l", langBase)
args.Set("s", fmt.Sprintf("%d", req.Safesearch))
args.Set("p", fmt.Sprintf("%d", req.Pageno))
endpoint := "https://lite.qwant.com/?" + args.Encode()
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return contracts.SearchResponse{}, err
}
httpReq.Header.Set("User-Agent", "gosearch-go/0.1 (+https://github.com/ashie/gosearch)")
resp, err := e.client.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
return contracts.SearchResponse{}, fmt.Errorf("qwant lite upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return contracts.SearchResponse{}, err
}
results := make([]contracts.MainResult, 0)
seen := map[string]bool{}
// Pattern 1: legacy/known qwant-lite structure.
doc.Find("section article").Each(func(_ int, item *goquery.Selection) {
// ignore randomly interspersed advertising adds
if item.Find("span.tooltip").Length() > 0 {
return
}
// In SearXNG: "./span[contains(@class, 'url partner')]"
urlText := strings.TrimSpace(item.Find("span.url.partner").First().Text())
if urlText == "" {
// fallback: any span with class containing both 'url' and 'partner'
urlText = strings.TrimSpace(item.Find("span[class*='url'][class*='partner']").First().Text())
}
title := strings.TrimSpace(item.Find("h2 a").First().Text())
content := strings.TrimSpace(item.Find("p").First().Text())
if urlText == "" {
return
}
if seen[urlText] {
return
}
seen[urlText] = true
u := urlText
results = append(results, contracts.MainResult{
Template: "default.html",
Title: title,
Content: content,
URL: &u,
Engine: e.Name(),
Score: 0,
Category: "general",
Engines: []string{e.Name()},
})
})
// Pattern 2: broader fallback for updated lite markup:
// any article/list item/div block containing an external anchor.
// We keep this conservative by requiring non-empty title + URL.
doc.Find("article, li, div").Each(func(_ int, item *goquery.Selection) {
if len(results) >= 20 {
return
}
// Skip ad-like blocks in fallback pass too.
if item.Find("span.tooltip").Length() > 0 {
return
}
// Skip obvious nav/footer blocks.
classAttr, _ := item.Attr("class")
classLower := strings.ToLower(classAttr)
if strings.Contains(classLower, "nav") || strings.Contains(classLower, "footer") {
return
}
a := item.Find("a[href]").First()
if a.Length() == 0 {
return
}
href, ok := a.Attr("href")
if !ok {
return
}
href = strings.TrimSpace(href)
if href == "" {
return
}
// Ignore in-page and relative links.
if strings.HasPrefix(href, "/") || strings.HasPrefix(href, "#") {
return
}
if !strings.HasPrefix(href, "http://") && !strings.HasPrefix(href, "https://") {
return
}
// Skip known sponsored partner links surfaced in lite pages.
if isKnownSponsoredURL(href) {
return
}
if isQwantInternalURL(href) {
// Ignore qwant nav/house links.
return
}
title := strings.TrimSpace(a.Text())
if title == "" {
return
}
if isLikelyNavTitle(title) {
return
}
if seen[href] {
return
}
seen[href] = true
// Best-effort snippet extraction from nearby paragraph/span text.
content := strings.TrimSpace(item.Find("p").First().Text())
if content == "" {
content = strings.TrimSpace(item.Find("span").First().Text())
}
// If there is no snippet, still keep clearly external result links.
// Qwant-lite frequently omits rich snippets for some entries.
u := href
results = append(results, contracts.MainResult{
Template: "default.html",
Title: title,
Content: content,
URL: &u,
Engine: e.Name(),
Score: 0,
Category: "general",
Engines: []string{e.Name()},
})
})
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: len(results),
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
func qwantLocale(lang string) string {
lang = strings.TrimSpace(lang)
if lang == "" || lang == "auto" {
return "en_US"
}
lang = strings.ReplaceAll(lang, "-", "_")
parts := strings.SplitN(lang, "_", 2)
base := strings.ToLower(parts[0])
country := "US"
if len(parts) == 2 && strings.TrimSpace(parts[1]) != "" {
country = strings.ToUpper(strings.TrimSpace(parts[1]))
}
// Qwant expects locales like en_US.
return base + "_" + country
}
func toSlice(v any) []any {
switch t := v.(type) {
case []any:
return t
default:
// Handle case where mainline might be a single object.
if m, ok := v.(map[string]any); ok {
return []any{m}
}
return nil
}
}
func toString(v any) string {
switch t := v.(type) {
case string:
return t
case json.Number:
return t.String()
default:
return ""
}
}
func isQwantInternalURL(raw string) bool {
u, err := url.Parse(raw)
if err != nil {
return false
}
host := strings.ToLower(u.Hostname())
if host == "" {
return false
}
return host == "qwant.com" || host == "www.qwant.com" || strings.HasSuffix(host, ".qwant.com") || host == "about.qwant.com"
}
func isLikelyNavTitle(title string) bool {
t := strings.TrimSpace(strings.ToLower(title))
switch t {
case "qwant search", "search", "privacy", "discover the service", "better web", "discover":
return true
}
if strings.HasPrefix(t, "get 20gb of free storage") {
return true
}
return false
}
func isKnownSponsoredURL(raw string) bool {
u, err := url.Parse(raw)
if err != nil {
return false
}
host := strings.ToLower(u.Hostname())
switch host {
case "shdw.me", "www.shdw.me":
return true
}
if strings.Contains(strings.ToLower(raw), "qwant-tool") {
return true
}
return false
}

View file

@ -0,0 +1,89 @@
package engines
import (
"context"
"net/http"
"testing"
"github.com/ashie/gosearch/internal/contracts"
)
func TestQwantEngine_WebLite(t *testing.T) {
transport := roundTripperFunc(func(r *http.Request) (*http.Response, error) {
if r.Method != http.MethodGet {
return httpResponse(http.StatusMethodNotAllowed, "", ""), nil
}
if r.URL.Host != "lite.qwant.com" {
return httpResponse(http.StatusNotFound, "", ""), nil
}
if r.URL.Path != "/" {
// goquery request URL parsing should normalize to "/"
t.Fatalf("unexpected path: %s", r.URL.Path)
}
q := r.URL.Query().Get("q")
if q != "hugo" {
t.Fatalf("unexpected q: %q", q)
}
if r.URL.Query().Get("locale") != "en_us" {
t.Fatalf("unexpected locale: %q", r.URL.Query().Get("locale"))
}
if r.URL.Query().Get("l") != "en" {
t.Fatalf("unexpected l: %q", r.URL.Query().Get("l"))
}
if r.URL.Query().Get("s") != "0" {
t.Fatalf("unexpected s: %q", r.URL.Query().Get("s"))
}
if r.URL.Query().Get("p") != "1" {
t.Fatalf("unexpected p: %q", r.URL.Query().Get("p"))
}
body := `
<!doctype html>
<html>
<body>
<section>
<article>
<span class="url partner">https://example.com/q</span>
<h2><a href="https://example.com/q">Qwant Title</a></h2>
<p>Qwant description</p>
</article>
<article>
<span class="tooltip">ad</span>
<span class="url partner">https://example.com/ad</span>
<h2><a href="https://example.com/ad">Ad Title</a></h2>
<p>Ad description</p>
</article>
</section>
</body>
</html>`
return httpResponse(http.StatusOK, body, "text/html"), nil
})
client := &http.Client{Transport: transport}
engine := &QwantEngine{client: client, category: "web-lite", resultsPerPage: 10}
resp, err := engine.Search(context.Background(), contracts.SearchRequest{
Query: "hugo",
Pageno: 1,
Safesearch: 0,
Language: "en",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(resp.Results) != 1 {
t.Fatalf("expected 1 result (non-ad), got %d", len(resp.Results))
}
if resp.Results[0].Title != "Qwant Title" {
t.Fatalf("unexpected title: %q", resp.Results[0].Title)
}
if resp.Results[0].Content != "Qwant description" {
t.Fatalf("unexpected content: %q", resp.Results[0].Content)
}
if resp.Results[0].URL == nil || *resp.Results[0].URL != "https://example.com/q" {
t.Fatalf("unexpected url: %v", resp.Results[0].URL)
}
}

View file

@ -0,0 +1,94 @@
package engines
import (
"context"
"net/http"
"testing"
"github.com/ashie/gosearch/internal/contracts"
)
func TestQwantEngine_Web(t *testing.T) {
transport := roundTripperFunc(func(r *http.Request) (*http.Response, error) {
if r.Method != http.MethodGet {
return httpResponse(http.StatusMethodNotAllowed, "", ""), nil
}
if r.URL.Host != "api.qwant.com" {
return httpResponse(http.StatusNotFound, "", ""), nil
}
if r.URL.Path != "/v3/search/web" {
t.Fatalf("unexpected path: %s", r.URL.Path)
}
q := r.URL.Query().Get("q")
if q != "hugo" {
t.Fatalf("unexpected q: %q", q)
}
if r.URL.Query().Get("count") != "10" {
t.Fatalf("unexpected count: %q", r.URL.Query().Get("count"))
}
if r.URL.Query().Get("locale") != "en_US" {
t.Fatalf("unexpected locale: %q", r.URL.Query().Get("locale"))
}
if r.URL.Query().Get("safesearch") != "0" {
t.Fatalf("unexpected safesearch: %q", r.URL.Query().Get("safesearch"))
}
if r.URL.Query().Get("llm") != "false" {
t.Fatalf("unexpected llm: %q", r.URL.Query().Get("llm"))
}
if r.URL.Query().Get("tgp") != "3" {
t.Fatalf("unexpected tgp: %q", r.URL.Query().Get("tgp"))
}
if r.URL.Query().Get("offset") != "0" {
t.Fatalf("unexpected offset: %q", r.URL.Query().Get("offset"))
}
body := `{
"status": "success",
"data": {
"result": {
"items": {
"mainline": [
{
"type": "web",
"items": [
{ "title": "Qwant Title", "url": "https://example.com/q", "desc": "Qwant description" }
]
}
]
}
}
}
}`
return httpResponse(http.StatusOK, body, "application/json"), nil
})
client := &http.Client{Transport: transport}
engine := &QwantEngine{client: client, category: "web", resultsPerPage: 10}
resp, err := engine.Search(context.Background(), contracts.SearchRequest{
Query: "hugo",
Pageno: 1,
Safesearch: 0,
Language: "en",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(resp.Results) != 1 {
t.Fatalf("expected 1 result, got %d", len(resp.Results))
}
if resp.Results[0].Title != "Qwant Title" {
t.Fatalf("unexpected title: %q", resp.Results[0].Title)
}
if resp.Results[0].Content != "Qwant description" {
t.Fatalf("unexpected content: %q", resp.Results[0].Content)
}
if resp.Results[0].URL == nil || *resp.Results[0].URL != "https://example.com/q" {
t.Fatalf("unexpected url: %v", resp.Results[0].URL)
}
if resp.Results[0].Engine != "qwant" {
t.Fatalf("unexpected engine: %q", resp.Results[0].Engine)
}
}

View file

@ -0,0 +1,151 @@
package engines
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"github.com/ashie/gosearch/internal/contracts"
)
type WikipediaEngine struct {
client *http.Client
}
func (e *WikipediaEngine) Name() string { return "wikipedia" }
func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("wikipedia engine not initialized")
}
if strings.TrimSpace(req.Query) == "" {
return contracts.SearchResponse{Query: req.Query}, nil
}
lang := strings.TrimSpace(req.Language)
if lang == "" || lang == "auto" {
lang = "en"
}
// Wikipedia subdomains are based on the language code; keep it simple for MVP.
lang = strings.SplitN(lang, "-", 2)[0]
lang = strings.ReplaceAll(lang, "_", "-")
wikiNetloc := fmt.Sprintf("%s.wikipedia.org", lang)
endpoint := fmt.Sprintf(
"https://%s/api/rest_v1/page/summary/%s",
wikiNetloc,
url.PathEscape(req.Query),
)
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return contracts.SearchResponse{}, err
}
// Wikimedia APIs require a descriptive User-Agent.
httpReq.Header.Set(
"User-Agent",
"gosearch-go/0.1 (compatible; +https://github.com/ashie/gosearch)",
)
// Best-effort: hint content language.
if req.Language != "" && req.Language != "auto" {
httpReq.Header.Set("Accept-Language", req.Language)
}
resp, err := e.client.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusNotFound {
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: 0,
Results: []contracts.MainResult{},
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
return contracts.SearchResponse{}, fmt.Errorf("wikipedia upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
var api struct {
Title string `json:"title"`
Description string `json:"description"`
Titles struct {
Display string `json:"display"`
} `json:"titles"`
ContentURLs struct {
Desktop struct {
Page string `json:"page"`
} `json:"desktop"`
} `json:"content_urls"`
}
if err := json.NewDecoder(resp.Body).Decode(&api); err != nil {
return contracts.SearchResponse{}, err
}
pageURL := api.ContentURLs.Desktop.Page
if pageURL == "" {
// API returned a non-standard payload; treat as no result.
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: 0,
Results: []contracts.MainResult{},
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
title := api.Titles.Display
if title == "" {
title = api.Title
}
content := api.Description
urlPtr := pageURL
pub := (*string)(nil)
results := []contracts.MainResult{
{
Template: "default.html",
Title: title,
Content: content,
URL: &urlPtr,
Pubdate: pub,
Engine: "wikipedia",
Score: 0,
Category: "general",
Priority: "",
Positions: nil,
Engines: []string{"wikipedia"},
},
}
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: len(results),
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}

View file

@ -0,0 +1,61 @@
package engines
import (
"context"
"net/http"
"testing"
"github.com/ashie/gosearch/internal/contracts"
)
func TestWikipediaEngine_Search(t *testing.T) {
transport := roundTripperFunc(func(r *http.Request) (*http.Response, error) {
if r.Method != http.MethodGet {
return httpResponse(http.StatusMethodNotAllowed, "", ""), nil
}
if r.URL.Host != "en.wikipedia.org" {
return httpResponse(http.StatusNotFound, "", ""), nil
}
if r.URL.Path != "/api/rest_v1/page/summary/Taxi" {
return httpResponse(http.StatusNotFound, "", ""), nil
}
body := `{
"title": "Taxi",
"description": "A car",
"titles": { "display": "Taxi" },
"content_urls": { "desktop": { "page": "https://en.wikipedia.org/wiki/Taxi" } }
}`
return httpResponse(http.StatusOK, body, "application/json"), nil
})
client := &http.Client{Transport: transport}
engine := &WikipediaEngine{client: client}
resp, err := engine.Search(context.Background(), contracts.SearchRequest{
Query: "Taxi",
Pageno: 1,
Language: "en",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(resp.Results) != 1 {
t.Fatalf("expected 1 result, got %d", len(resp.Results))
}
r := resp.Results[0]
if r.Title != "Taxi" {
t.Fatalf("expected title Taxi, got %q", r.Title)
}
if r.Content != "A car" {
t.Fatalf("expected content, got %q", r.Content)
}
if r.URL == nil || *r.URL == "" {
t.Fatalf("expected url, got nil/empty")
}
if *r.URL != "https://en.wikipedia.org/wiki/Taxi" {
t.Fatalf("unexpected url: %q", *r.URL)
}
}

View file

@ -0,0 +1,41 @@
package httpapi
import (
"net/http"
"github.com/ashie/gosearch/internal/search"
)
type Handler struct {
searchSvc *search.Service
}
func NewHandler(searchSvc *search.Service) *Handler {
return &Handler{searchSvc: searchSvc}
}
func (h *Handler) Healthz(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("OK"))
}
func (h *Handler) Search(w http.ResponseWriter, r *http.Request) {
req, err := search.ParseSearchRequest(r)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
resp, err := h.searchSvc.Search(r.Context(), req)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if err := search.WriteSearchResponse(w, req.Format, resp); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
}

121
internal/search/merge.go Normal file
View file

@ -0,0 +1,121 @@
package search
import (
"encoding/json"
"net/url"
"strings"
"github.com/ashie/gosearch/internal/contracts"
)
// MergeResponses merges multiple SearXNG-compatible JSON responses.
//
// MVP merge semantics:
// - results are concatenated with a simple de-dup key (engine|title|url)
// - suggestions/corrections are de-duplicated as sets
// - answers/infoboxes/unresponsive_engines are concatenated (best-effort)
func MergeResponses(responses []contracts.SearchResponse) contracts.SearchResponse {
var merged contracts.SearchResponse
mergedResultSeen := map[string]struct{}{}
mergedAnswerSeen := map[string]struct{}{}
mergedCorrectionsSeen := map[string]struct{}{}
mergedSuggestionsSeen := map[string]struct{}{}
for _, r := range responses {
if merged.Query == "" {
merged.Query = r.Query
}
merged.NumberOfResults = maxInt(merged.NumberOfResults, r.NumberOfResults)
for _, mr := range r.Results {
key := resultDedupKey(mr)
if _, ok := mergedResultSeen[key]; ok {
continue
}
mergedResultSeen[key] = struct{}{}
merged.Results = append(merged.Results, mr)
}
for _, ans := range r.Answers {
// De-dup by normalized JSON when possible.
b, err := json.Marshal(ans)
if err != nil {
merged.Answers = append(merged.Answers, ans)
continue
}
key := string(b)
if _, ok := mergedAnswerSeen[key]; ok {
continue
}
mergedAnswerSeen[key] = struct{}{}
merged.Answers = append(merged.Answers, ans)
}
merged.Corrections = unionStrings(merged.Corrections, r.Corrections, &mergedCorrectionsSeen)
merged.Suggestions = unionStrings(merged.Suggestions, r.Suggestions, &mergedSuggestionsSeen)
merged.Infoboxes = append(merged.Infoboxes, r.Infoboxes...)
merged.UnresponsiveEngines = append(merged.UnresponsiveEngines, r.UnresponsiveEngines...)
}
// Ensure non-nil slices to keep JSON shape stable.
if merged.Results == nil {
merged.Results = []contracts.MainResult{}
}
if merged.Answers == nil {
merged.Answers = []map[string]any{}
}
if merged.Corrections == nil {
merged.Corrections = []string{}
}
if merged.Infoboxes == nil {
merged.Infoboxes = []map[string]any{}
}
if merged.Suggestions == nil {
merged.Suggestions = []string{}
}
if merged.UnresponsiveEngines == nil {
merged.UnresponsiveEngines = [][2]string{}
}
return merged
}
func resultDedupKey(r contracts.MainResult) string {
urlStr := ""
if r.URL != nil {
urlStr = *r.URL
}
// Normalize host to reduce duplicates.
if u, err := url.Parse(urlStr); err == nil {
if u.Host != "" {
urlStr = u.Host + u.Path
}
}
return strings.ToLower(r.Engine) + "|" + strings.ToLower(r.Title) + "|" + urlStr
}
func unionStrings(dst []string, src []string, seen *map[string]struct{}) []string {
if *seen == nil {
*seen = map[string]struct{}{}
}
out := dst
for _, s := range src {
if _, ok := (*seen)[s]; ok {
continue
}
(*seen)[s] = struct{}{}
out = append(out, s)
}
return out
}
func maxInt(a, b int) int {
if a > b {
return a
}
return b
}

View file

@ -0,0 +1,80 @@
package search
import (
"strings"
"testing"
"github.com/ashie/gosearch/internal/contracts"
)
func TestMergeResponses_DedupResultsAndSets(t *testing.T) {
url1 := "https://example.com/a?x=1"
uPtr := &url1
r1 := contracts.SearchResponse{
Query: "q",
NumberOfResults: 1,
Results: []contracts.MainResult{
{
Template: "default.html",
Title: "Title1",
Content: "C1",
URL: uPtr,
Engine: "wikipedia",
Score: 1.0,
},
},
Answers: []map[string]any{{"title": "A1", "url": url1}},
Corrections: []string{"corr1", "corr2"},
Suggestions: []string{"s1", "s2"},
Infoboxes: []map[string]any{},
UnresponsiveEngines: [][2]string{},
}
r2 := contracts.SearchResponse{
Query: "q",
NumberOfResults: 1,
Results: []contracts.MainResult{
{
Template: "default.html",
Title: "Title1",
Content: "C2",
URL: uPtr,
Engine: "wikipedia",
Score: 2.0,
},
},
Answers: []map[string]any{{"title": "A1", "url": url1}},
Corrections: []string{"corr2", "corr3"},
Suggestions: []string{"s2", "s3"},
Infoboxes: []map[string]any{},
UnresponsiveEngines: [][2]string{},
}
merged := MergeResponses([]contracts.SearchResponse{r1, r2})
if merged.Query != "q" {
t.Fatalf("expected query q, got %q", merged.Query)
}
if merged.NumberOfResults != 1 {
t.Fatalf("expected number_of_results max=1, got %d", merged.NumberOfResults)
}
if len(merged.Results) != 1 {
t.Fatalf("expected 1 merged result, got %d", len(merged.Results))
}
// Corrections/suggestions should be unioned.
joinedCorr := strings.Join(merged.Corrections, ",")
if !strings.Contains(joinedCorr, "corr1") || !strings.Contains(joinedCorr, "corr2") || !strings.Contains(joinedCorr, "corr3") {
t.Fatalf("expected unioned corrections, got %v", merged.Corrections)
}
joinedSug := strings.Join(merged.Suggestions, ",")
if !strings.Contains(joinedSug, "s1") || !strings.Contains(joinedSug, "s2") || !strings.Contains(joinedSug, "s3") {
t.Fatalf("expected unioned suggestions, got %v", merged.Suggestions)
}
if len(merged.Answers) != 1 {
t.Fatalf("expected 1 merged answer, got %d", len(merged.Answers))
}
}

View file

@ -0,0 +1,206 @@
package search
import (
"errors"
"net/http"
"regexp"
"strconv"
"strings"
)
var languageCodeRe = regexp.MustCompile(`^[a-z]{2,3}(-[a-zA-Z]{2})?$`)
func ParseSearchRequest(r *http.Request) (SearchRequest, error) {
// SearXNG supports both GET and POST and relies on form values for routing.
if err := r.ParseForm(); err != nil {
return SearchRequest{}, errors.New("invalid request: cannot parse form")
}
format := strings.ToLower(r.FormValue("format"))
switch OutputFormat(format) {
case FormatJSON, FormatCSV, FormatRSS:
default:
// MVP: treat everything else as json, except `html` which we accept for compatibility.
if format == string(FormatHTML) {
// accepted, but not implemented by the server yet
} else {
format = string(FormatJSON)
}
}
q := r.FormValue("q")
if strings.TrimSpace(q) == "" {
return SearchRequest{}, errors.New("missing required parameter: q")
}
pageno := 1
if s := strings.TrimSpace(r.FormValue("pageno")); s != "" {
n, err := strconv.Atoi(s)
if err != nil || n < 1 {
return SearchRequest{}, errors.New("invalid parameter: pageno")
}
pageno = n
}
// MVP defaults.
safesearch := 0
if s := strings.TrimSpace(r.FormValue("safesearch")); s != "" {
n, err := strconv.Atoi(s)
if err != nil || n < 0 || n > 2 {
return SearchRequest{}, errors.New("invalid parameter: safesearch")
}
safesearch = n
}
var timeRange *string
if tr := strings.TrimSpace(r.FormValue("time_range")); tr != "" && tr != "None" {
switch tr {
case "day", "week", "month", "year":
tt := tr
timeRange = &tt
default:
return SearchRequest{}, errors.New("invalid parameter: time_range")
}
}
var timeoutLimit *float64
if s := strings.TrimSpace(r.FormValue("timeout_limit")); s != "" && s != "None" {
v, err := strconv.ParseFloat(s, 64)
if err != nil || v <= 0 {
return SearchRequest{}, errors.New("invalid parameter: timeout_limit")
}
timeoutLimit = &v
}
language := strings.TrimSpace(r.FormValue("language"))
if language == "" {
language = "auto"
}
switch language {
case "auto", "all":
// ok
default:
if !languageCodeRe.MatchString(language) {
return SearchRequest{}, errors.New("invalid parameter: language")
}
}
// engines is an explicit list of engine names.
engines := splitCSV(strings.TrimSpace(r.FormValue("engines")))
// categories and category_<name> params mirror SearXNG's webadapter parsing.
// We don't validate against a registry here; we just preserve the requested values.
catSet := map[string]bool{}
if catsParam := strings.TrimSpace(r.FormValue("categories")); catsParam != "" {
for _, cat := range splitCSV(catsParam) {
catSet[cat] = true
}
}
for k, v := range r.Form {
if !strings.HasPrefix(k, "category_") {
continue
}
category := strings.TrimPrefix(k, "category_")
if category == "" {
continue
}
val := ""
if len(v) > 0 {
val = strings.TrimSpace(v[0])
}
if val == "" || val != "off" {
catSet[category] = true
} else {
delete(catSet, category)
}
}
categories := make([]string, 0, len(catSet))
for c := range catSet {
categories = append(categories, c)
}
if len(categories) == 0 {
categories = []string{"general"}
}
// Parse engine_data-<engine>-<key>=<value> parameters.
engineData := map[string]map[string]string{}
for k, v := range r.Form {
if !strings.HasPrefix(k, "engine_data-") {
continue
}
parts := strings.SplitN(k, "-", 3) // engine_data-<engine>-<key>
if len(parts) != 3 {
continue
}
engine := parts[1]
key := parts[2]
// For HTML forms, r.Form[k] can contain multiple values; keep first.
val := ""
if len(v) > 0 {
val = v[0]
}
if _, ok := engineData[engine]; !ok {
engineData[engine] = map[string]string{}
}
engineData[engine][key] = val
}
accessToken := parseAccessToken(r)
return SearchRequest{
Format: OutputFormat(format),
Query: q,
Pageno: pageno,
Safesearch: safesearch,
TimeRange: timeRange,
TimeoutLimit: timeoutLimit,
Language: language,
Engines: engines,
Categories: categories,
EngineData: engineData,
AccessToken: accessToken,
}, nil
}
func splitCSV(s string) []string {
if s == "" {
return nil
}
raw := strings.Split(s, ",")
out := make([]string, 0, len(raw))
for _, item := range raw {
item = strings.TrimSpace(item)
if item == "" {
continue
}
out = append(out, item)
}
return out
}
func parseAccessToken(r *http.Request) string {
// Supported sources (first non-empty wins):
// - `Authorization: Bearer <token>`
// - `X-Search-Token` / `X-Brave-Access-Token`
// - `token` form value
if auth := r.Header.Get("Authorization"); auth != "" {
const prefix = "Bearer "
if len(auth) > len(prefix) && auth[:len(prefix)] == prefix {
return strings.TrimSpace(auth[len(prefix):])
}
}
if v := strings.TrimSpace(r.Header.Get("X-Search-Token")); v != "" {
return v
}
if v := strings.TrimSpace(r.Header.Get("X-Brave-Access-Token")); v != "" {
return v
}
if v := strings.TrimSpace(r.FormValue("token")); v != "" {
return v
}
return ""
}

View file

@ -0,0 +1,74 @@
package search
import (
"net/http"
"net/http/httptest"
"net/url"
"strings"
"testing"
)
func TestParseSearchRequest_MissingQ(t *testing.T) {
r := httptest.NewRequest(http.MethodGet, "/search?format=json", nil)
_, err := ParseSearchRequest(r)
if err == nil {
t.Fatalf("expected error, got nil")
}
}
func TestParseSearchRequest_InvalidPageno(t *testing.T) {
r := httptest.NewRequest(http.MethodGet, "/search?q=hi&pageno=0", nil)
_, err := ParseSearchRequest(r)
if err == nil {
t.Fatalf("expected error for pageno, got nil")
}
}
func TestParseSearchRequest_InvalidLanguage(t *testing.T) {
r := httptest.NewRequest(http.MethodGet, "/search?q=hi&language=bad!", nil)
_, err := ParseSearchRequest(r)
if err == nil {
t.Fatalf("expected error for language, got nil")
}
}
func TestParseSearchRequest_CategoriesAndEngineData(t *testing.T) {
values := url.Values{}
values.Set("q", "hello")
values.Set("format", "json")
values.Set("categories", "general,science")
values.Set("category_science", "off")
values.Set("engines", "wikipedia,arxiv")
values.Set("engine_data-wikipedia-timeout", "123")
r := httptest.NewRequest(http.MethodPost, "/search", strings.NewReader(values.Encode()))
r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req, err := ParseSearchRequest(r)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
// categories should drop `science` due to category_science=off
wantCats := map[string]bool{"general": true}
gotCats := map[string]bool{}
for _, c := range req.Categories {
gotCats[c] = true
}
for c := range wantCats {
if !gotCats[c] {
t.Fatalf("expected category %q in result, got %v", c, req.Categories)
}
}
if gotCats["science"] {
t.Fatalf("expected category science to be removed, got %v", req.Categories)
}
if len(req.Engines) != 2 {
t.Fatalf("expected 2 engines, got %v", req.Engines)
}
if req.EngineData["wikipedia"]["timeout"] != "123" {
t.Fatalf("expected engine_data parsed, got %#v", req.EngineData)
}
}

223
internal/search/response.go Normal file
View file

@ -0,0 +1,223 @@
package search
import (
"bytes"
"encoding/csv"
"encoding/json"
"fmt"
"net/http"
"net/url"
"encoding/xml"
"strconv"
"strings"
)
func WriteSearchResponse(w http.ResponseWriter, format OutputFormat, resp SearchResponse) error {
switch format {
case FormatJSON:
w.Header().Set("Content-Type", "application/json; charset=utf-8")
return json.NewEncoder(w).Encode(resp)
case FormatCSV:
w.Header().Set("Content-Type", "text/csv; charset=utf-8")
if err := writeCSV(w, resp); err != nil {
return err
}
return nil
case FormatRSS:
w.Header().Set("Content-Type", "text/xml; charset=utf-8")
if err := writeRSS(w, resp); err != nil {
return err
}
return nil
case FormatHTML:
w.WriteHeader(http.StatusNotImplemented)
_, _ = w.Write([]byte("format=html not implemented yet"))
return nil
default:
return fmt.Errorf("unsupported format: %s", format)
}
}
// csvRowHeader matches the SearXNG CSV writer key order.
var csvRowHeader = []string{"title", "url", "content", "host", "engine", "score", "type"}
func writeCSV(w http.ResponseWriter, resp SearchResponse) error {
cw := csv.NewWriter(w)
defer cw.Flush()
if err := cw.Write(csvRowHeader); err != nil {
return err
}
for _, r := range resp.Results {
urlStr := ""
if r.URL != nil {
urlStr = *r.URL
}
host := hostFromURL(urlStr)
scoreStr := strconv.FormatFloat(r.Score, 'f', -1, 64)
row := []string{
r.Title,
urlStr,
r.Content,
host,
r.Engine,
scoreStr,
"result",
}
if err := cw.Write(row); err != nil {
return err
}
}
for _, ans := range resp.Answers {
title := asString(ans["title"])
urlStr := asString(ans["url"])
content := asString(ans["content"])
engine := asString(ans["engine"])
scoreStr := scoreString(ans["score"])
host := hostFromURL(urlStr)
row := []string{
title,
urlStr,
content,
host,
engine,
scoreStr,
"answer",
}
if err := cw.Write(row); err != nil {
return err
}
}
for _, s := range resp.Suggestions {
row := []string{s, "", "", "", "", "", "suggestion"}
if err := cw.Write(row); err != nil {
return err
}
}
for _, c := range resp.Corrections {
row := []string{c, "", "", "", "", "", "correction"}
if err := cw.Write(row); err != nil {
return err
}
}
return nil
}
func writeRSS(w http.ResponseWriter, resp SearchResponse) error {
q := resp.Query
escapedTitle := xmlEscape("SearXNG search: " + q)
escapedDesc := xmlEscape("Search results for \"" + q + "\" - SearXNG")
escapedQueryTerms := xmlEscape(q)
link := "/search?q=" + url.QueryEscape(q)
opensearchQuery := fmt.Sprintf(`<opensearch:Query role="request" searchTerms="%s" startPage="1" />`, escapedQueryTerms)
// SearXNG template uses the number of results for both totalResults and itemsPerPage.
nr := resp.NumberOfResults
var items bytes.Buffer
for _, r := range resp.Results {
title := xmlEscape(r.Title)
urlStr := ""
if r.URL != nil {
urlStr = *r.URL
}
linkEsc := xmlEscape(urlStr)
desc := xmlEscape(r.Content)
pub := ""
if r.Pubdate != nil && strings.TrimSpace(*r.Pubdate) != "" {
pub = "<pubDate>" + xmlEscape(*r.Pubdate) + "</pubDate>"
}
items.WriteString(
fmt.Sprintf(
`<item><title>%s</title><type>result</type><link>%s</link><description>%s</description>%s</item>`,
title,
linkEsc,
desc,
pub,
),
)
}
xml := fmt.Sprintf(
`<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet href="/rss.xsl" type="text/xsl"?>
<rss version="2.0"
xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/"
xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>%s</title>
<link>%s</link>
<description>%s</description>
<opensearch:totalResults>%d</opensearch:totalResults>
<opensearch:startIndex>1</opensearch:startIndex>
<opensearch:itemsPerPage>%d</opensearch:itemsPerPage>
<atom:link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml"/>
%s
%s
</channel>
</rss>
`,
escapedTitle,
xmlEscape(link),
escapedDesc,
nr,
nr,
opensearchQuery,
items.String(),
)
_, err := w.Write([]byte(xml))
return err
}
func xmlEscape(s string) string {
var b bytes.Buffer
_ = xml.EscapeText(&b, []byte(s))
return b.String()
}
func hostFromURL(urlStr string) string {
if strings.TrimSpace(urlStr) == "" {
return ""
}
u, err := url.Parse(urlStr)
if err != nil {
return ""
}
return u.Host
}
func asString(v any) string {
s, _ := v.(string)
return s
}
func scoreString(v any) string {
switch t := v.(type) {
case float64:
return strconv.FormatFloat(t, 'f', -1, 64)
case float32:
return strconv.FormatFloat(float64(t), 'f', -1, 64)
case int:
return strconv.Itoa(t)
case int64:
return strconv.FormatInt(t, 10)
case json.Number:
if f, err := t.Float64(); err == nil {
return strconv.FormatFloat(f, 'f', -1, 64)
}
return ""
default:
return ""
}
}

111
internal/search/service.go Normal file
View file

@ -0,0 +1,111 @@
package search
import (
"context"
"net/http"
"time"
"github.com/ashie/gosearch/internal/engines"
"github.com/ashie/gosearch/internal/contracts"
"github.com/ashie/gosearch/internal/upstream"
)
type ServiceConfig struct {
UpstreamURL string
HTTPTimeout time.Duration
}
type Service struct {
upstreamClient *upstream.Client
planner *engines.Planner
localEngines map[string]engines.Engine
}
func NewService(cfg ServiceConfig) *Service {
timeout := cfg.HTTPTimeout
if timeout <= 0 {
timeout = 10 * time.Second
}
httpClient := &http.Client{Timeout: timeout}
var up *upstream.Client
if cfg.UpstreamURL != "" {
c, err := upstream.NewClient(cfg.UpstreamURL, timeout)
if err == nil {
up = c
}
}
return &Service{
upstreamClient: up,
planner: engines.NewPlannerFromEnv(),
localEngines: engines.NewDefaultPortedEngines(httpClient),
}
}
func (s *Service) Search(ctx context.Context, req SearchRequest) (SearchResponse, error) {
localEngines, upstreamEngines, _ := s.planner.Plan(req)
responses := make([]contracts.SearchResponse, 0, 2)
upstreamSet := map[string]bool{}
for _, e := range upstreamEngines {
upstreamSet[e] = true
}
for _, engineName := range localEngines {
eng, ok := s.localEngines[engineName]
if !ok {
continue
}
r, err := eng.Search(ctx, req)
if err != nil {
// MVP: fail fast so the client sees a real error.
return SearchResponse{}, err
}
responses = append(responses, r)
// Some engines (notably qwant due to anti-bot protections) can return
// zero local results depending on client/IP. If upstream SearXNG is
// configured, let it attempt the same engine as a fallback.
if shouldFallbackToUpstream(engineName, r) && !upstreamSet[engineName] {
upstreamEngines = append(upstreamEngines, engineName)
upstreamSet[engineName] = true
}
}
if s.upstreamClient != nil && len(upstreamEngines) > 0 {
r, err := s.upstreamClient.SearchJSON(ctx, req, upstreamEngines)
if err != nil {
return SearchResponse{}, err
}
responses = append(responses, r)
}
if len(responses) == 0 {
return SearchResponse{
Query: req.Query,
NumberOfResults: 0,
Results: []MainResult{},
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
merged := MergeResponses(responses)
if merged.Query == "" {
merged.Query = req.Query
}
return merged, nil
}
func shouldFallbackToUpstream(engineName string, r contracts.SearchResponse) bool {
if engineName != "qwant" {
return false
}
return len(r.Results) == 0 && len(r.Answers) == 0 && len(r.Infoboxes) == 0
}

20
internal/search/types.go Normal file
View file

@ -0,0 +1,20 @@
package search
import "github.com/ashie/gosearch/internal/contracts"
// Re-export the JSON contract types so the rest of the code can stay in the
// `internal/search` namespace without creating an import cycle.
type OutputFormat = contracts.OutputFormat
const (
FormatHTML = contracts.FormatHTML // accepted for compatibility (not yet implemented)
FormatJSON = contracts.FormatJSON
FormatCSV = contracts.FormatCSV
FormatRSS = contracts.FormatRSS
)
type SearchRequest = contracts.SearchRequest
type SearchResponse = contracts.SearchResponse
type MainResult = contracts.MainResult

112
internal/upstream/client.go Normal file
View file

@ -0,0 +1,112 @@
package upstream
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/ashie/gosearch/internal/contracts"
)
type Client struct {
baseURL string
http *http.Client
}
func NewClient(baseURL string, timeout time.Duration) (*Client, error) {
if strings.TrimSpace(baseURL) == "" {
return nil, errors.New("upstream base URL is empty")
}
u, err := url.Parse(baseURL)
if err != nil {
return nil, fmt.Errorf("invalid upstream base URL: %w", err)
}
// Normalize: trim trailing slash to make URL concatenation predictable.
base := strings.TrimRight(u.String(), "/")
if timeout <= 0 {
timeout = 10 * time.Second
}
return &Client{
baseURL: base,
http: &http.Client{
Timeout: timeout,
},
}, nil
}
func (c *Client) SearchJSON(ctx context.Context, req contracts.SearchRequest, engines []string) (contracts.SearchResponse, error) {
// Always request upstream JSON; the Go service will handle csv/rss later.
form := url.Values{}
form.Set("q", req.Query)
form.Set("format", "json")
form.Set("pageno", fmt.Sprintf("%d", req.Pageno))
form.Set("safesearch", fmt.Sprintf("%d", req.Safesearch))
form.Set("language", req.Language)
if req.TimeRange != nil {
form.Set("time_range", *req.TimeRange)
}
if req.TimeoutLimit != nil {
form.Set("timeout_limit", formatFloat(*req.TimeoutLimit))
}
if len(req.Categories) > 0 {
form.Set("categories", strings.Join(req.Categories, ","))
}
if len(engines) > 0 {
form.Set("engines", strings.Join(engines, ","))
}
for engineName, kv := range req.EngineData {
for key, value := range kv {
// Mirror SearXNG's naming: `engine_data-<engine>-<key>=<value>`
form.Set(fmt.Sprintf("engine_data-%s-%s", engineName, key), value)
}
}
endpoint := c.baseURL + "/search"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, strings.NewReader(form.Encode()))
if err != nil {
return contracts.SearchResponse{}, err
}
httpReq.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=utf-8")
resp, err := c.http.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
body, err := io.ReadAll(io.LimitReader(resp.Body, 4*1024*1024))
if err != nil {
return contracts.SearchResponse{}, err
}
if resp.StatusCode != http.StatusOK {
return contracts.SearchResponse{}, fmt.Errorf("upstream search failed: status=%d body=%q", resp.StatusCode, string(body))
}
// Decode upstream JSON into our contract types.
var out contracts.SearchResponse
dec := json.NewDecoder(strings.NewReader(string(body)))
if err := dec.Decode(&out); err != nil {
return contracts.SearchResponse{}, fmt.Errorf("decode upstream JSON: %w", err)
}
return out, nil
}
func formatFloat(f float64) string {
// Keep stable formatting for upstream parsing.
return strings.TrimRight(strings.TrimRight(fmt.Sprintf("%.6f", f), "0"), ".")
}