feat: build Go-based SearXNG-compatible search service
Implement an API-first Go rewrite with local engine adapters, upstream fallback, and Nix-based tooling so searches can run without matching the original UI while preserving response compatibility. Made-with: Cursor
This commit is contained in:
parent
7783367c71
commit
dc44837219
32 changed files with 3330 additions and 0 deletions
121
internal/search/merge.go
Normal file
121
internal/search/merge.go
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
package search
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/ashie/gosearch/internal/contracts"
|
||||
)
|
||||
|
||||
// MergeResponses merges multiple SearXNG-compatible JSON responses.
|
||||
//
|
||||
// MVP merge semantics:
|
||||
// - results are concatenated with a simple de-dup key (engine|title|url)
|
||||
// - suggestions/corrections are de-duplicated as sets
|
||||
// - answers/infoboxes/unresponsive_engines are concatenated (best-effort)
|
||||
func MergeResponses(responses []contracts.SearchResponse) contracts.SearchResponse {
|
||||
var merged contracts.SearchResponse
|
||||
|
||||
mergedResultSeen := map[string]struct{}{}
|
||||
mergedAnswerSeen := map[string]struct{}{}
|
||||
mergedCorrectionsSeen := map[string]struct{}{}
|
||||
mergedSuggestionsSeen := map[string]struct{}{}
|
||||
|
||||
for _, r := range responses {
|
||||
if merged.Query == "" {
|
||||
merged.Query = r.Query
|
||||
}
|
||||
|
||||
merged.NumberOfResults = maxInt(merged.NumberOfResults, r.NumberOfResults)
|
||||
|
||||
for _, mr := range r.Results {
|
||||
key := resultDedupKey(mr)
|
||||
if _, ok := mergedResultSeen[key]; ok {
|
||||
continue
|
||||
}
|
||||
mergedResultSeen[key] = struct{}{}
|
||||
merged.Results = append(merged.Results, mr)
|
||||
}
|
||||
|
||||
for _, ans := range r.Answers {
|
||||
// De-dup by normalized JSON when possible.
|
||||
b, err := json.Marshal(ans)
|
||||
if err != nil {
|
||||
merged.Answers = append(merged.Answers, ans)
|
||||
continue
|
||||
}
|
||||
key := string(b)
|
||||
if _, ok := mergedAnswerSeen[key]; ok {
|
||||
continue
|
||||
}
|
||||
mergedAnswerSeen[key] = struct{}{}
|
||||
merged.Answers = append(merged.Answers, ans)
|
||||
}
|
||||
|
||||
merged.Corrections = unionStrings(merged.Corrections, r.Corrections, &mergedCorrectionsSeen)
|
||||
merged.Suggestions = unionStrings(merged.Suggestions, r.Suggestions, &mergedSuggestionsSeen)
|
||||
|
||||
merged.Infoboxes = append(merged.Infoboxes, r.Infoboxes...)
|
||||
merged.UnresponsiveEngines = append(merged.UnresponsiveEngines, r.UnresponsiveEngines...)
|
||||
}
|
||||
|
||||
// Ensure non-nil slices to keep JSON shape stable.
|
||||
if merged.Results == nil {
|
||||
merged.Results = []contracts.MainResult{}
|
||||
}
|
||||
if merged.Answers == nil {
|
||||
merged.Answers = []map[string]any{}
|
||||
}
|
||||
if merged.Corrections == nil {
|
||||
merged.Corrections = []string{}
|
||||
}
|
||||
if merged.Infoboxes == nil {
|
||||
merged.Infoboxes = []map[string]any{}
|
||||
}
|
||||
if merged.Suggestions == nil {
|
||||
merged.Suggestions = []string{}
|
||||
}
|
||||
if merged.UnresponsiveEngines == nil {
|
||||
merged.UnresponsiveEngines = [][2]string{}
|
||||
}
|
||||
|
||||
return merged
|
||||
}
|
||||
|
||||
func resultDedupKey(r contracts.MainResult) string {
|
||||
urlStr := ""
|
||||
if r.URL != nil {
|
||||
urlStr = *r.URL
|
||||
}
|
||||
// Normalize host to reduce duplicates.
|
||||
if u, err := url.Parse(urlStr); err == nil {
|
||||
if u.Host != "" {
|
||||
urlStr = u.Host + u.Path
|
||||
}
|
||||
}
|
||||
return strings.ToLower(r.Engine) + "|" + strings.ToLower(r.Title) + "|" + urlStr
|
||||
}
|
||||
|
||||
func unionStrings(dst []string, src []string, seen *map[string]struct{}) []string {
|
||||
if *seen == nil {
|
||||
*seen = map[string]struct{}{}
|
||||
}
|
||||
out := dst
|
||||
for _, s := range src {
|
||||
if _, ok := (*seen)[s]; ok {
|
||||
continue
|
||||
}
|
||||
(*seen)[s] = struct{}{}
|
||||
out = append(out, s)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func maxInt(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
80
internal/search/merge_test.go
Normal file
80
internal/search/merge_test.go
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
package search
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/ashie/gosearch/internal/contracts"
|
||||
)
|
||||
|
||||
func TestMergeResponses_DedupResultsAndSets(t *testing.T) {
|
||||
url1 := "https://example.com/a?x=1"
|
||||
uPtr := &url1
|
||||
|
||||
r1 := contracts.SearchResponse{
|
||||
Query: "q",
|
||||
NumberOfResults: 1,
|
||||
Results: []contracts.MainResult{
|
||||
{
|
||||
Template: "default.html",
|
||||
Title: "Title1",
|
||||
Content: "C1",
|
||||
URL: uPtr,
|
||||
Engine: "wikipedia",
|
||||
Score: 1.0,
|
||||
},
|
||||
},
|
||||
Answers: []map[string]any{{"title": "A1", "url": url1}},
|
||||
Corrections: []string{"corr1", "corr2"},
|
||||
Suggestions: []string{"s1", "s2"},
|
||||
Infoboxes: []map[string]any{},
|
||||
UnresponsiveEngines: [][2]string{},
|
||||
}
|
||||
|
||||
r2 := contracts.SearchResponse{
|
||||
Query: "q",
|
||||
NumberOfResults: 1,
|
||||
Results: []contracts.MainResult{
|
||||
{
|
||||
Template: "default.html",
|
||||
Title: "Title1",
|
||||
Content: "C2",
|
||||
URL: uPtr,
|
||||
Engine: "wikipedia",
|
||||
Score: 2.0,
|
||||
},
|
||||
},
|
||||
Answers: []map[string]any{{"title": "A1", "url": url1}},
|
||||
Corrections: []string{"corr2", "corr3"},
|
||||
Suggestions: []string{"s2", "s3"},
|
||||
Infoboxes: []map[string]any{},
|
||||
UnresponsiveEngines: [][2]string{},
|
||||
}
|
||||
|
||||
merged := MergeResponses([]contracts.SearchResponse{r1, r2})
|
||||
|
||||
if merged.Query != "q" {
|
||||
t.Fatalf("expected query q, got %q", merged.Query)
|
||||
}
|
||||
if merged.NumberOfResults != 1 {
|
||||
t.Fatalf("expected number_of_results max=1, got %d", merged.NumberOfResults)
|
||||
}
|
||||
if len(merged.Results) != 1 {
|
||||
t.Fatalf("expected 1 merged result, got %d", len(merged.Results))
|
||||
}
|
||||
|
||||
// Corrections/suggestions should be unioned.
|
||||
joinedCorr := strings.Join(merged.Corrections, ",")
|
||||
if !strings.Contains(joinedCorr, "corr1") || !strings.Contains(joinedCorr, "corr2") || !strings.Contains(joinedCorr, "corr3") {
|
||||
t.Fatalf("expected unioned corrections, got %v", merged.Corrections)
|
||||
}
|
||||
joinedSug := strings.Join(merged.Suggestions, ",")
|
||||
if !strings.Contains(joinedSug, "s1") || !strings.Contains(joinedSug, "s2") || !strings.Contains(joinedSug, "s3") {
|
||||
t.Fatalf("expected unioned suggestions, got %v", merged.Suggestions)
|
||||
}
|
||||
|
||||
if len(merged.Answers) != 1 {
|
||||
t.Fatalf("expected 1 merged answer, got %d", len(merged.Answers))
|
||||
}
|
||||
}
|
||||
|
||||
206
internal/search/request_params.go
Normal file
206
internal/search/request_params.go
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
package search
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var languageCodeRe = regexp.MustCompile(`^[a-z]{2,3}(-[a-zA-Z]{2})?$`)
|
||||
|
||||
func ParseSearchRequest(r *http.Request) (SearchRequest, error) {
|
||||
// SearXNG supports both GET and POST and relies on form values for routing.
|
||||
if err := r.ParseForm(); err != nil {
|
||||
return SearchRequest{}, errors.New("invalid request: cannot parse form")
|
||||
}
|
||||
|
||||
format := strings.ToLower(r.FormValue("format"))
|
||||
switch OutputFormat(format) {
|
||||
case FormatJSON, FormatCSV, FormatRSS:
|
||||
default:
|
||||
// MVP: treat everything else as json, except `html` which we accept for compatibility.
|
||||
if format == string(FormatHTML) {
|
||||
// accepted, but not implemented by the server yet
|
||||
} else {
|
||||
format = string(FormatJSON)
|
||||
}
|
||||
}
|
||||
|
||||
q := r.FormValue("q")
|
||||
if strings.TrimSpace(q) == "" {
|
||||
return SearchRequest{}, errors.New("missing required parameter: q")
|
||||
}
|
||||
|
||||
pageno := 1
|
||||
if s := strings.TrimSpace(r.FormValue("pageno")); s != "" {
|
||||
n, err := strconv.Atoi(s)
|
||||
if err != nil || n < 1 {
|
||||
return SearchRequest{}, errors.New("invalid parameter: pageno")
|
||||
}
|
||||
pageno = n
|
||||
}
|
||||
|
||||
// MVP defaults.
|
||||
safesearch := 0
|
||||
if s := strings.TrimSpace(r.FormValue("safesearch")); s != "" {
|
||||
n, err := strconv.Atoi(s)
|
||||
if err != nil || n < 0 || n > 2 {
|
||||
return SearchRequest{}, errors.New("invalid parameter: safesearch")
|
||||
}
|
||||
safesearch = n
|
||||
}
|
||||
|
||||
var timeRange *string
|
||||
if tr := strings.TrimSpace(r.FormValue("time_range")); tr != "" && tr != "None" {
|
||||
switch tr {
|
||||
case "day", "week", "month", "year":
|
||||
tt := tr
|
||||
timeRange = &tt
|
||||
default:
|
||||
return SearchRequest{}, errors.New("invalid parameter: time_range")
|
||||
}
|
||||
}
|
||||
|
||||
var timeoutLimit *float64
|
||||
if s := strings.TrimSpace(r.FormValue("timeout_limit")); s != "" && s != "None" {
|
||||
v, err := strconv.ParseFloat(s, 64)
|
||||
if err != nil || v <= 0 {
|
||||
return SearchRequest{}, errors.New("invalid parameter: timeout_limit")
|
||||
}
|
||||
timeoutLimit = &v
|
||||
}
|
||||
|
||||
language := strings.TrimSpace(r.FormValue("language"))
|
||||
if language == "" {
|
||||
language = "auto"
|
||||
}
|
||||
switch language {
|
||||
case "auto", "all":
|
||||
// ok
|
||||
default:
|
||||
if !languageCodeRe.MatchString(language) {
|
||||
return SearchRequest{}, errors.New("invalid parameter: language")
|
||||
}
|
||||
}
|
||||
|
||||
// engines is an explicit list of engine names.
|
||||
engines := splitCSV(strings.TrimSpace(r.FormValue("engines")))
|
||||
|
||||
// categories and category_<name> params mirror SearXNG's webadapter parsing.
|
||||
// We don't validate against a registry here; we just preserve the requested values.
|
||||
catSet := map[string]bool{}
|
||||
if catsParam := strings.TrimSpace(r.FormValue("categories")); catsParam != "" {
|
||||
for _, cat := range splitCSV(catsParam) {
|
||||
catSet[cat] = true
|
||||
}
|
||||
}
|
||||
for k, v := range r.Form {
|
||||
if !strings.HasPrefix(k, "category_") {
|
||||
continue
|
||||
}
|
||||
category := strings.TrimPrefix(k, "category_")
|
||||
if category == "" {
|
||||
continue
|
||||
}
|
||||
val := ""
|
||||
if len(v) > 0 {
|
||||
val = strings.TrimSpace(v[0])
|
||||
}
|
||||
if val == "" || val != "off" {
|
||||
catSet[category] = true
|
||||
} else {
|
||||
delete(catSet, category)
|
||||
}
|
||||
}
|
||||
categories := make([]string, 0, len(catSet))
|
||||
for c := range catSet {
|
||||
categories = append(categories, c)
|
||||
}
|
||||
if len(categories) == 0 {
|
||||
categories = []string{"general"}
|
||||
}
|
||||
|
||||
// Parse engine_data-<engine>-<key>=<value> parameters.
|
||||
engineData := map[string]map[string]string{}
|
||||
for k, v := range r.Form {
|
||||
if !strings.HasPrefix(k, "engine_data-") {
|
||||
continue
|
||||
}
|
||||
parts := strings.SplitN(k, "-", 3) // engine_data-<engine>-<key>
|
||||
if len(parts) != 3 {
|
||||
continue
|
||||
}
|
||||
engine := parts[1]
|
||||
key := parts[2]
|
||||
// For HTML forms, r.Form[k] can contain multiple values; keep first.
|
||||
val := ""
|
||||
if len(v) > 0 {
|
||||
val = v[0]
|
||||
}
|
||||
if _, ok := engineData[engine]; !ok {
|
||||
engineData[engine] = map[string]string{}
|
||||
}
|
||||
engineData[engine][key] = val
|
||||
}
|
||||
|
||||
accessToken := parseAccessToken(r)
|
||||
|
||||
return SearchRequest{
|
||||
Format: OutputFormat(format),
|
||||
Query: q,
|
||||
Pageno: pageno,
|
||||
Safesearch: safesearch,
|
||||
TimeRange: timeRange,
|
||||
TimeoutLimit: timeoutLimit,
|
||||
Language: language,
|
||||
Engines: engines,
|
||||
Categories: categories,
|
||||
EngineData: engineData,
|
||||
AccessToken: accessToken,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func splitCSV(s string) []string {
|
||||
if s == "" {
|
||||
return nil
|
||||
}
|
||||
raw := strings.Split(s, ",")
|
||||
out := make([]string, 0, len(raw))
|
||||
for _, item := range raw {
|
||||
item = strings.TrimSpace(item)
|
||||
if item == "" {
|
||||
continue
|
||||
}
|
||||
out = append(out, item)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func parseAccessToken(r *http.Request) string {
|
||||
// Supported sources (first non-empty wins):
|
||||
// - `Authorization: Bearer <token>`
|
||||
// - `X-Search-Token` / `X-Brave-Access-Token`
|
||||
// - `token` form value
|
||||
if auth := r.Header.Get("Authorization"); auth != "" {
|
||||
const prefix = "Bearer "
|
||||
if len(auth) > len(prefix) && auth[:len(prefix)] == prefix {
|
||||
return strings.TrimSpace(auth[len(prefix):])
|
||||
}
|
||||
}
|
||||
|
||||
if v := strings.TrimSpace(r.Header.Get("X-Search-Token")); v != "" {
|
||||
return v
|
||||
}
|
||||
if v := strings.TrimSpace(r.Header.Get("X-Brave-Access-Token")); v != "" {
|
||||
return v
|
||||
}
|
||||
|
||||
if v := strings.TrimSpace(r.FormValue("token")); v != "" {
|
||||
return v
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
74
internal/search/request_params_test.go
Normal file
74
internal/search/request_params_test.go
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
package search
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseSearchRequest_MissingQ(t *testing.T) {
|
||||
r := httptest.NewRequest(http.MethodGet, "/search?format=json", nil)
|
||||
_, err := ParseSearchRequest(r)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSearchRequest_InvalidPageno(t *testing.T) {
|
||||
r := httptest.NewRequest(http.MethodGet, "/search?q=hi&pageno=0", nil)
|
||||
_, err := ParseSearchRequest(r)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for pageno, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSearchRequest_InvalidLanguage(t *testing.T) {
|
||||
r := httptest.NewRequest(http.MethodGet, "/search?q=hi&language=bad!", nil)
|
||||
_, err := ParseSearchRequest(r)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for language, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSearchRequest_CategoriesAndEngineData(t *testing.T) {
|
||||
values := url.Values{}
|
||||
values.Set("q", "hello")
|
||||
values.Set("format", "json")
|
||||
values.Set("categories", "general,science")
|
||||
values.Set("category_science", "off")
|
||||
values.Set("engines", "wikipedia,arxiv")
|
||||
values.Set("engine_data-wikipedia-timeout", "123")
|
||||
|
||||
r := httptest.NewRequest(http.MethodPost, "/search", strings.NewReader(values.Encode()))
|
||||
r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
|
||||
req, err := ParseSearchRequest(r)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
// categories should drop `science` due to category_science=off
|
||||
wantCats := map[string]bool{"general": true}
|
||||
gotCats := map[string]bool{}
|
||||
for _, c := range req.Categories {
|
||||
gotCats[c] = true
|
||||
}
|
||||
for c := range wantCats {
|
||||
if !gotCats[c] {
|
||||
t.Fatalf("expected category %q in result, got %v", c, req.Categories)
|
||||
}
|
||||
}
|
||||
if gotCats["science"] {
|
||||
t.Fatalf("expected category science to be removed, got %v", req.Categories)
|
||||
}
|
||||
|
||||
if len(req.Engines) != 2 {
|
||||
t.Fatalf("expected 2 engines, got %v", req.Engines)
|
||||
}
|
||||
if req.EngineData["wikipedia"]["timeout"] != "123" {
|
||||
t.Fatalf("expected engine_data parsed, got %#v", req.EngineData)
|
||||
}
|
||||
}
|
||||
|
||||
223
internal/search/response.go
Normal file
223
internal/search/response.go
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
package search
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/csv"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"encoding/xml"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func WriteSearchResponse(w http.ResponseWriter, format OutputFormat, resp SearchResponse) error {
|
||||
switch format {
|
||||
case FormatJSON:
|
||||
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||
return json.NewEncoder(w).Encode(resp)
|
||||
case FormatCSV:
|
||||
w.Header().Set("Content-Type", "text/csv; charset=utf-8")
|
||||
if err := writeCSV(w, resp); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
case FormatRSS:
|
||||
w.Header().Set("Content-Type", "text/xml; charset=utf-8")
|
||||
if err := writeRSS(w, resp); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
case FormatHTML:
|
||||
w.WriteHeader(http.StatusNotImplemented)
|
||||
_, _ = w.Write([]byte("format=html not implemented yet"))
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("unsupported format: %s", format)
|
||||
}
|
||||
}
|
||||
|
||||
// csvRowHeader matches the SearXNG CSV writer key order.
|
||||
var csvRowHeader = []string{"title", "url", "content", "host", "engine", "score", "type"}
|
||||
|
||||
func writeCSV(w http.ResponseWriter, resp SearchResponse) error {
|
||||
cw := csv.NewWriter(w)
|
||||
defer cw.Flush()
|
||||
|
||||
if err := cw.Write(csvRowHeader); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, r := range resp.Results {
|
||||
urlStr := ""
|
||||
if r.URL != nil {
|
||||
urlStr = *r.URL
|
||||
}
|
||||
host := hostFromURL(urlStr)
|
||||
scoreStr := strconv.FormatFloat(r.Score, 'f', -1, 64)
|
||||
row := []string{
|
||||
r.Title,
|
||||
urlStr,
|
||||
r.Content,
|
||||
host,
|
||||
r.Engine,
|
||||
scoreStr,
|
||||
"result",
|
||||
}
|
||||
if err := cw.Write(row); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, ans := range resp.Answers {
|
||||
title := asString(ans["title"])
|
||||
urlStr := asString(ans["url"])
|
||||
content := asString(ans["content"])
|
||||
engine := asString(ans["engine"])
|
||||
scoreStr := scoreString(ans["score"])
|
||||
host := hostFromURL(urlStr)
|
||||
|
||||
row := []string{
|
||||
title,
|
||||
urlStr,
|
||||
content,
|
||||
host,
|
||||
engine,
|
||||
scoreStr,
|
||||
"answer",
|
||||
}
|
||||
if err := cw.Write(row); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, s := range resp.Suggestions {
|
||||
row := []string{s, "", "", "", "", "", "suggestion"}
|
||||
if err := cw.Write(row); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, c := range resp.Corrections {
|
||||
row := []string{c, "", "", "", "", "", "correction"}
|
||||
if err := cw.Write(row); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeRSS(w http.ResponseWriter, resp SearchResponse) error {
|
||||
q := resp.Query
|
||||
escapedTitle := xmlEscape("SearXNG search: " + q)
|
||||
escapedDesc := xmlEscape("Search results for \"" + q + "\" - SearXNG")
|
||||
escapedQueryTerms := xmlEscape(q)
|
||||
|
||||
link := "/search?q=" + url.QueryEscape(q)
|
||||
opensearchQuery := fmt.Sprintf(`<opensearch:Query role="request" searchTerms="%s" startPage="1" />`, escapedQueryTerms)
|
||||
|
||||
// SearXNG template uses the number of results for both totalResults and itemsPerPage.
|
||||
nr := resp.NumberOfResults
|
||||
|
||||
var items bytes.Buffer
|
||||
for _, r := range resp.Results {
|
||||
title := xmlEscape(r.Title)
|
||||
urlStr := ""
|
||||
if r.URL != nil {
|
||||
urlStr = *r.URL
|
||||
}
|
||||
linkEsc := xmlEscape(urlStr)
|
||||
desc := xmlEscape(r.Content)
|
||||
|
||||
pub := ""
|
||||
if r.Pubdate != nil && strings.TrimSpace(*r.Pubdate) != "" {
|
||||
pub = "<pubDate>" + xmlEscape(*r.Pubdate) + "</pubDate>"
|
||||
}
|
||||
|
||||
items.WriteString(
|
||||
fmt.Sprintf(
|
||||
`<item><title>%s</title><type>result</type><link>%s</link><description>%s</description>%s</item>`,
|
||||
title,
|
||||
linkEsc,
|
||||
desc,
|
||||
pub,
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
xml := fmt.Sprintf(
|
||||
`<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?xml-stylesheet href="/rss.xsl" type="text/xsl"?>
|
||||
<rss version="2.0"
|
||||
xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/"
|
||||
xmlns:atom="http://www.w3.org/2005/Atom">
|
||||
<channel>
|
||||
<title>%s</title>
|
||||
<link>%s</link>
|
||||
<description>%s</description>
|
||||
<opensearch:totalResults>%d</opensearch:totalResults>
|
||||
<opensearch:startIndex>1</opensearch:startIndex>
|
||||
<opensearch:itemsPerPage>%d</opensearch:itemsPerPage>
|
||||
<atom:link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml"/>
|
||||
%s
|
||||
%s
|
||||
</channel>
|
||||
</rss>
|
||||
`,
|
||||
escapedTitle,
|
||||
xmlEscape(link),
|
||||
escapedDesc,
|
||||
nr,
|
||||
nr,
|
||||
opensearchQuery,
|
||||
items.String(),
|
||||
)
|
||||
|
||||
_, err := w.Write([]byte(xml))
|
||||
return err
|
||||
}
|
||||
|
||||
func xmlEscape(s string) string {
|
||||
var b bytes.Buffer
|
||||
_ = xml.EscapeText(&b, []byte(s))
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func hostFromURL(urlStr string) string {
|
||||
if strings.TrimSpace(urlStr) == "" {
|
||||
return ""
|
||||
}
|
||||
u, err := url.Parse(urlStr)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return u.Host
|
||||
}
|
||||
|
||||
func asString(v any) string {
|
||||
s, _ := v.(string)
|
||||
return s
|
||||
}
|
||||
|
||||
func scoreString(v any) string {
|
||||
switch t := v.(type) {
|
||||
case float64:
|
||||
return strconv.FormatFloat(t, 'f', -1, 64)
|
||||
case float32:
|
||||
return strconv.FormatFloat(float64(t), 'f', -1, 64)
|
||||
case int:
|
||||
return strconv.Itoa(t)
|
||||
case int64:
|
||||
return strconv.FormatInt(t, 10)
|
||||
case json.Number:
|
||||
if f, err := t.Float64(); err == nil {
|
||||
return strconv.FormatFloat(f, 'f', -1, 64)
|
||||
}
|
||||
return ""
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
111
internal/search/service.go
Normal file
111
internal/search/service.go
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
package search
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/ashie/gosearch/internal/engines"
|
||||
"github.com/ashie/gosearch/internal/contracts"
|
||||
"github.com/ashie/gosearch/internal/upstream"
|
||||
)
|
||||
|
||||
type ServiceConfig struct {
|
||||
UpstreamURL string
|
||||
HTTPTimeout time.Duration
|
||||
}
|
||||
|
||||
type Service struct {
|
||||
upstreamClient *upstream.Client
|
||||
planner *engines.Planner
|
||||
localEngines map[string]engines.Engine
|
||||
}
|
||||
|
||||
func NewService(cfg ServiceConfig) *Service {
|
||||
timeout := cfg.HTTPTimeout
|
||||
if timeout <= 0 {
|
||||
timeout = 10 * time.Second
|
||||
}
|
||||
|
||||
httpClient := &http.Client{Timeout: timeout}
|
||||
|
||||
var up *upstream.Client
|
||||
if cfg.UpstreamURL != "" {
|
||||
c, err := upstream.NewClient(cfg.UpstreamURL, timeout)
|
||||
if err == nil {
|
||||
up = c
|
||||
}
|
||||
}
|
||||
|
||||
return &Service{
|
||||
upstreamClient: up,
|
||||
planner: engines.NewPlannerFromEnv(),
|
||||
localEngines: engines.NewDefaultPortedEngines(httpClient),
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Service) Search(ctx context.Context, req SearchRequest) (SearchResponse, error) {
|
||||
localEngines, upstreamEngines, _ := s.planner.Plan(req)
|
||||
|
||||
responses := make([]contracts.SearchResponse, 0, 2)
|
||||
upstreamSet := map[string]bool{}
|
||||
for _, e := range upstreamEngines {
|
||||
upstreamSet[e] = true
|
||||
}
|
||||
|
||||
for _, engineName := range localEngines {
|
||||
eng, ok := s.localEngines[engineName]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
r, err := eng.Search(ctx, req)
|
||||
if err != nil {
|
||||
// MVP: fail fast so the client sees a real error.
|
||||
return SearchResponse{}, err
|
||||
}
|
||||
responses = append(responses, r)
|
||||
|
||||
// Some engines (notably qwant due to anti-bot protections) can return
|
||||
// zero local results depending on client/IP. If upstream SearXNG is
|
||||
// configured, let it attempt the same engine as a fallback.
|
||||
if shouldFallbackToUpstream(engineName, r) && !upstreamSet[engineName] {
|
||||
upstreamEngines = append(upstreamEngines, engineName)
|
||||
upstreamSet[engineName] = true
|
||||
}
|
||||
}
|
||||
|
||||
if s.upstreamClient != nil && len(upstreamEngines) > 0 {
|
||||
r, err := s.upstreamClient.SearchJSON(ctx, req, upstreamEngines)
|
||||
if err != nil {
|
||||
return SearchResponse{}, err
|
||||
}
|
||||
responses = append(responses, r)
|
||||
}
|
||||
|
||||
if len(responses) == 0 {
|
||||
return SearchResponse{
|
||||
Query: req.Query,
|
||||
NumberOfResults: 0,
|
||||
Results: []MainResult{},
|
||||
Answers: []map[string]any{},
|
||||
Corrections: []string{},
|
||||
Infoboxes: []map[string]any{},
|
||||
Suggestions: []string{},
|
||||
UnresponsiveEngines: [][2]string{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
merged := MergeResponses(responses)
|
||||
if merged.Query == "" {
|
||||
merged.Query = req.Query
|
||||
}
|
||||
return merged, nil
|
||||
}
|
||||
|
||||
func shouldFallbackToUpstream(engineName string, r contracts.SearchResponse) bool {
|
||||
if engineName != "qwant" {
|
||||
return false
|
||||
}
|
||||
return len(r.Results) == 0 && len(r.Answers) == 0 && len(r.Infoboxes) == 0
|
||||
}
|
||||
|
||||
20
internal/search/types.go
Normal file
20
internal/search/types.go
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
package search
|
||||
|
||||
import "github.com/ashie/gosearch/internal/contracts"
|
||||
|
||||
// Re-export the JSON contract types so the rest of the code can stay in the
|
||||
// `internal/search` namespace without creating an import cycle.
|
||||
type OutputFormat = contracts.OutputFormat
|
||||
|
||||
const (
|
||||
FormatHTML = contracts.FormatHTML // accepted for compatibility (not yet implemented)
|
||||
FormatJSON = contracts.FormatJSON
|
||||
FormatCSV = contracts.FormatCSV
|
||||
FormatRSS = contracts.FormatRSS
|
||||
)
|
||||
|
||||
type SearchRequest = contracts.SearchRequest
|
||||
type SearchResponse = contracts.SearchResponse
|
||||
|
||||
type MainResult = contracts.MainResult
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue