feat: build Go-based SearXNG-compatible search service
Implement an API-first Go rewrite with local engine adapters, upstream fallback, and Nix-based tooling so searches can run without matching the original UI while preserving response compatibility. Made-with: Cursor
This commit is contained in:
parent
7783367c71
commit
dc44837219
32 changed files with 3330 additions and 0 deletions
467
internal/engines/qwant.go
Normal file
467
internal/engines/qwant.go
Normal file
|
|
@ -0,0 +1,467 @@
|
|||
package engines
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/ashie/gosearch/internal/contracts"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// QwantEngine implements a SearXNG-like `qwant` (web) adapter using
|
||||
// Qwant v3 endpoint: https://api.qwant.com/v3/search/web.
|
||||
//
|
||||
// Qwant's API is not fully documented; this mirrors SearXNG's parsing logic
|
||||
// for the `web` category from `.agent/searxng/searx/engines/qwant.py`.
|
||||
type QwantEngine struct {
|
||||
client *http.Client
|
||||
category string // "web" (JSON API) or "web-lite" (HTML fallback)
|
||||
resultsPerPage int
|
||||
}
|
||||
|
||||
func (e *QwantEngine) Name() string { return "qwant" }
|
||||
|
||||
func (e *QwantEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||
if e == nil || e.client == nil {
|
||||
return contracts.SearchResponse{}, errors.New("qwant engine not initialized")
|
||||
}
|
||||
|
||||
q := strings.TrimSpace(req.Query)
|
||||
if q == "" {
|
||||
return contracts.SearchResponse{Query: req.Query}, nil
|
||||
}
|
||||
|
||||
// For API parity we use SearXNG web defaults: count=10, offset=(pageno-1)*count.
|
||||
// The engine's config field exists so we can expand to news/images/videos later.
|
||||
count := e.resultsPerPage
|
||||
if count <= 0 {
|
||||
count = 10
|
||||
}
|
||||
offset := 0
|
||||
if req.Pageno > 1 {
|
||||
offset = (req.Pageno - 1) * count
|
||||
}
|
||||
mode := strings.TrimSpace(strings.ToLower(e.category))
|
||||
if mode == "" {
|
||||
mode = "web"
|
||||
}
|
||||
|
||||
switch mode {
|
||||
case "web-lite":
|
||||
return e.searchWebLite(ctx, req)
|
||||
case "web":
|
||||
return e.searchWebAPI(ctx, req, count, offset)
|
||||
default:
|
||||
// Unknown mode: treat as unresponsive.
|
||||
return contracts.SearchResponse{
|
||||
Query: req.Query,
|
||||
UnresponsiveEngines: [][2]string{
|
||||
{e.Name(), "unknown_qwant_mode"},
|
||||
},
|
||||
Results: []contracts.MainResult{},
|
||||
Answers: []map[string]any{},
|
||||
Corrections: []string{},
|
||||
Infoboxes: []map[string]any{},
|
||||
Suggestions: []string{},
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
func (e *QwantEngine) searchWebAPI(ctx context.Context, req contracts.SearchRequest, count, offset int) (contracts.SearchResponse, error) {
|
||||
qLocale := qwantLocale(req.Language)
|
||||
args := url.Values{}
|
||||
args.Set("q", req.Query)
|
||||
args.Set("count", fmt.Sprintf("%d", count))
|
||||
args.Set("locale", qLocale)
|
||||
args.Set("safesearch", fmt.Sprintf("%d", req.Safesearch))
|
||||
args.Set("llm", "false")
|
||||
args.Set("tgp", "3")
|
||||
args.Set("offset", fmt.Sprintf("%d", offset))
|
||||
|
||||
endpoint := "https://api.qwant.com/v3/search/web?" + args.Encode()
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
httpReq.Header.Set("User-Agent", "gosearch-go/0.1 (+https://github.com/ashie/gosearch)")
|
||||
|
||||
resp, err := e.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Qwant often returns a 403 captcha/JS block for the JSON API.
|
||||
if resp.StatusCode == http.StatusForbidden {
|
||||
return contracts.SearchResponse{
|
||||
Query: req.Query,
|
||||
UnresponsiveEngines: [][2]string{
|
||||
{e.Name(), "captcha_or_js_block"},
|
||||
},
|
||||
Results: []contracts.MainResult{},
|
||||
Answers: []map[string]any{},
|
||||
Corrections: []string{},
|
||||
Infoboxes: []map[string]any{},
|
||||
Suggestions: []string{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
||||
return contracts.SearchResponse{}, fmt.Errorf("qwant upstream error: status=%d body=%q", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024))
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
|
||||
var top map[string]any
|
||||
if err := json.Unmarshal(body, &top); err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
|
||||
status, _ := top["status"].(string)
|
||||
if status != "success" {
|
||||
return contracts.SearchResponse{
|
||||
Query: req.Query,
|
||||
UnresponsiveEngines: [][2]string{
|
||||
{e.Name(), "api_error"},
|
||||
},
|
||||
Results: []contracts.MainResult{},
|
||||
Answers: []map[string]any{},
|
||||
Corrections: []string{},
|
||||
Infoboxes: []map[string]any{},
|
||||
Suggestions: []string{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
data, _ := top["data"].(map[string]any)
|
||||
result, _ := data["result"].(map[string]any)
|
||||
items, _ := result["items"].(map[string]any)
|
||||
mainline := items["mainline"]
|
||||
|
||||
rows := toSlice(mainline)
|
||||
if len(rows) == 0 {
|
||||
return contracts.SearchResponse{
|
||||
Query: req.Query,
|
||||
NumberOfResults: 0,
|
||||
Results: []contracts.MainResult{},
|
||||
Answers: []map[string]any{},
|
||||
Corrections: []string{},
|
||||
Infoboxes: []map[string]any{},
|
||||
Suggestions: []string{},
|
||||
UnresponsiveEngines: [][2]string{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
results := make([]contracts.MainResult, 0, len(rows))
|
||||
for _, row := range rows {
|
||||
rowMap, ok := row.(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
rowType, _ := rowMap["type"].(string)
|
||||
if rowType == "" {
|
||||
rowType = "web"
|
||||
}
|
||||
if rowType != "web" {
|
||||
continue
|
||||
}
|
||||
if rowType == "ads" {
|
||||
continue
|
||||
}
|
||||
|
||||
rowItems := toSlice(rowMap["items"])
|
||||
for _, it := range rowItems {
|
||||
itemMap, ok := it.(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
title := toString(itemMap["title"])
|
||||
resURL := toString(itemMap["url"])
|
||||
desc := toString(itemMap["desc"])
|
||||
if resURL == "" {
|
||||
continue
|
||||
}
|
||||
urlPtr := resURL
|
||||
results = append(results, contracts.MainResult{
|
||||
Template: "default.html",
|
||||
Title: title,
|
||||
Content: desc,
|
||||
URL: &urlPtr,
|
||||
Engine: e.Name(),
|
||||
Score: 0,
|
||||
Category: "general",
|
||||
Engines: []string{e.Name()},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return contracts.SearchResponse{
|
||||
Query: req.Query,
|
||||
NumberOfResults: len(results),
|
||||
Results: results,
|
||||
Answers: []map[string]any{},
|
||||
Corrections: []string{},
|
||||
Infoboxes: []map[string]any{},
|
||||
Suggestions: []string{},
|
||||
UnresponsiveEngines: [][2]string{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (e *QwantEngine) searchWebLite(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||
qLocale := qwantLocale(req.Language)
|
||||
langBase := strings.SplitN(qLocale, "_", 2)[0]
|
||||
|
||||
args := url.Values{}
|
||||
args.Set("q", req.Query)
|
||||
args.Set("locale", strings.ToLower(qLocale))
|
||||
args.Set("l", langBase)
|
||||
args.Set("s", fmt.Sprintf("%d", req.Safesearch))
|
||||
args.Set("p", fmt.Sprintf("%d", req.Pageno))
|
||||
|
||||
endpoint := "https://lite.qwant.com/?" + args.Encode()
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
httpReq.Header.Set("User-Agent", "gosearch-go/0.1 (+https://github.com/ashie/gosearch)")
|
||||
|
||||
resp, err := e.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
||||
return contracts.SearchResponse{}, fmt.Errorf("qwant lite upstream error: status=%d body=%q", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
|
||||
results := make([]contracts.MainResult, 0)
|
||||
seen := map[string]bool{}
|
||||
|
||||
// Pattern 1: legacy/known qwant-lite structure.
|
||||
doc.Find("section article").Each(func(_ int, item *goquery.Selection) {
|
||||
// ignore randomly interspersed advertising adds
|
||||
if item.Find("span.tooltip").Length() > 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// In SearXNG: "./span[contains(@class, 'url partner')]"
|
||||
urlText := strings.TrimSpace(item.Find("span.url.partner").First().Text())
|
||||
if urlText == "" {
|
||||
// fallback: any span with class containing both 'url' and 'partner'
|
||||
urlText = strings.TrimSpace(item.Find("span[class*='url'][class*='partner']").First().Text())
|
||||
}
|
||||
title := strings.TrimSpace(item.Find("h2 a").First().Text())
|
||||
content := strings.TrimSpace(item.Find("p").First().Text())
|
||||
|
||||
if urlText == "" {
|
||||
return
|
||||
}
|
||||
if seen[urlText] {
|
||||
return
|
||||
}
|
||||
seen[urlText] = true
|
||||
u := urlText
|
||||
results = append(results, contracts.MainResult{
|
||||
Template: "default.html",
|
||||
Title: title,
|
||||
Content: content,
|
||||
URL: &u,
|
||||
Engine: e.Name(),
|
||||
Score: 0,
|
||||
Category: "general",
|
||||
Engines: []string{e.Name()},
|
||||
})
|
||||
})
|
||||
|
||||
// Pattern 2: broader fallback for updated lite markup:
|
||||
// any article/list item/div block containing an external anchor.
|
||||
// We keep this conservative by requiring non-empty title + URL.
|
||||
doc.Find("article, li, div").Each(func(_ int, item *goquery.Selection) {
|
||||
if len(results) >= 20 {
|
||||
return
|
||||
}
|
||||
// Skip ad-like blocks in fallback pass too.
|
||||
if item.Find("span.tooltip").Length() > 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Skip obvious nav/footer blocks.
|
||||
classAttr, _ := item.Attr("class")
|
||||
classLower := strings.ToLower(classAttr)
|
||||
if strings.Contains(classLower, "nav") || strings.Contains(classLower, "footer") {
|
||||
return
|
||||
}
|
||||
|
||||
a := item.Find("a[href]").First()
|
||||
if a.Length() == 0 {
|
||||
return
|
||||
}
|
||||
href, ok := a.Attr("href")
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
href = strings.TrimSpace(href)
|
||||
if href == "" {
|
||||
return
|
||||
}
|
||||
|
||||
// Ignore in-page and relative links.
|
||||
if strings.HasPrefix(href, "/") || strings.HasPrefix(href, "#") {
|
||||
return
|
||||
}
|
||||
if !strings.HasPrefix(href, "http://") && !strings.HasPrefix(href, "https://") {
|
||||
return
|
||||
}
|
||||
// Skip known sponsored partner links surfaced in lite pages.
|
||||
if isKnownSponsoredURL(href) {
|
||||
return
|
||||
}
|
||||
if isQwantInternalURL(href) {
|
||||
// Ignore qwant nav/house links.
|
||||
return
|
||||
}
|
||||
|
||||
title := strings.TrimSpace(a.Text())
|
||||
if title == "" {
|
||||
return
|
||||
}
|
||||
if isLikelyNavTitle(title) {
|
||||
return
|
||||
}
|
||||
|
||||
if seen[href] {
|
||||
return
|
||||
}
|
||||
seen[href] = true
|
||||
|
||||
// Best-effort snippet extraction from nearby paragraph/span text.
|
||||
content := strings.TrimSpace(item.Find("p").First().Text())
|
||||
if content == "" {
|
||||
content = strings.TrimSpace(item.Find("span").First().Text())
|
||||
}
|
||||
// If there is no snippet, still keep clearly external result links.
|
||||
// Qwant-lite frequently omits rich snippets for some entries.
|
||||
|
||||
u := href
|
||||
results = append(results, contracts.MainResult{
|
||||
Template: "default.html",
|
||||
Title: title,
|
||||
Content: content,
|
||||
URL: &u,
|
||||
Engine: e.Name(),
|
||||
Score: 0,
|
||||
Category: "general",
|
||||
Engines: []string{e.Name()},
|
||||
})
|
||||
})
|
||||
|
||||
return contracts.SearchResponse{
|
||||
Query: req.Query,
|
||||
NumberOfResults: len(results),
|
||||
Results: results,
|
||||
Answers: []map[string]any{},
|
||||
Corrections: []string{},
|
||||
Infoboxes: []map[string]any{},
|
||||
Suggestions: []string{},
|
||||
UnresponsiveEngines: [][2]string{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func qwantLocale(lang string) string {
|
||||
lang = strings.TrimSpace(lang)
|
||||
if lang == "" || lang == "auto" {
|
||||
return "en_US"
|
||||
}
|
||||
lang = strings.ReplaceAll(lang, "-", "_")
|
||||
parts := strings.SplitN(lang, "_", 2)
|
||||
base := strings.ToLower(parts[0])
|
||||
country := "US"
|
||||
if len(parts) == 2 && strings.TrimSpace(parts[1]) != "" {
|
||||
country = strings.ToUpper(strings.TrimSpace(parts[1]))
|
||||
}
|
||||
// Qwant expects locales like en_US.
|
||||
return base + "_" + country
|
||||
}
|
||||
|
||||
func toSlice(v any) []any {
|
||||
switch t := v.(type) {
|
||||
case []any:
|
||||
return t
|
||||
default:
|
||||
// Handle case where mainline might be a single object.
|
||||
if m, ok := v.(map[string]any); ok {
|
||||
return []any{m}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func toString(v any) string {
|
||||
switch t := v.(type) {
|
||||
case string:
|
||||
return t
|
||||
case json.Number:
|
||||
return t.String()
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func isQwantInternalURL(raw string) bool {
|
||||
u, err := url.Parse(raw)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
host := strings.ToLower(u.Hostname())
|
||||
if host == "" {
|
||||
return false
|
||||
}
|
||||
return host == "qwant.com" || host == "www.qwant.com" || strings.HasSuffix(host, ".qwant.com") || host == "about.qwant.com"
|
||||
}
|
||||
|
||||
func isLikelyNavTitle(title string) bool {
|
||||
t := strings.TrimSpace(strings.ToLower(title))
|
||||
switch t {
|
||||
case "qwant search", "search", "privacy", "discover the service", "better web", "discover":
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(t, "get 20gb of free storage") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isKnownSponsoredURL(raw string) bool {
|
||||
u, err := url.Parse(raw)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
host := strings.ToLower(u.Hostname())
|
||||
switch host {
|
||||
case "shdw.me", "www.shdw.me":
|
||||
return true
|
||||
}
|
||||
if strings.Contains(strings.ToLower(raw), "qwant-tool") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue