feat: build Go-based SearXNG-compatible search service
Implement an API-first Go rewrite with local engine adapters, upstream fallback, and Nix-based tooling so searches can run without matching the original UI while preserving response compatibility. Made-with: Cursor
This commit is contained in:
parent
7783367c71
commit
dc44837219
32 changed files with 3330 additions and 0 deletions
76
README.md
76
README.md
|
|
@ -0,0 +1,76 @@
|
||||||
|
## gosearch (SearXNG rewrite in Go)
|
||||||
|
|
||||||
|
This repository contains a standalone Go HTTP service that implements a SearXNG-compatible
|
||||||
|
API-first `/search` endpoint and proxies unported engines to an upstream SearXNG instance.
|
||||||
|
|
||||||
|
### Endpoints
|
||||||
|
|
||||||
|
- `GET /healthz` -> `OK`
|
||||||
|
- `GET|POST /search`
|
||||||
|
- Required form/body parameter: `q`
|
||||||
|
- Optional: `format` (`json` | `csv` | `rss`; default: `json`)
|
||||||
|
|
||||||
|
### Supported `format=...`
|
||||||
|
|
||||||
|
- `json`: SearXNG-style JSON response (`query`, `number_of_results`, `results`, `answers`, `corrections`, `infoboxes`, `suggestions`, `unresponsive_engines`)
|
||||||
|
- `csv`: CSV with header `title,url,content,host,engine,score,type`
|
||||||
|
- `rss`: RSS 2.0 feed based on the `opensearch_response_rss.xml` template fields
|
||||||
|
|
||||||
|
### Request parameters
|
||||||
|
|
||||||
|
The server accepts SearXNG form parameters (both `GET` query string and `POST` form-encoded):
|
||||||
|
|
||||||
|
- `q` (required): search query
|
||||||
|
- `format` (optional): `json`/`csv`/`rss`
|
||||||
|
- `pageno` (optional, default `1`): positive integer
|
||||||
|
- `safesearch` (optional, default `0`): integer `0..2`
|
||||||
|
- `time_range` (optional): `day|week|month|year` (or omitted/`None`)
|
||||||
|
- `timeout_limit` (optional): float, seconds (or omitted/`None`)
|
||||||
|
- `language` (optional, default `auto`): `auto` or a BCP-47-ish language code
|
||||||
|
- `engines` (optional): comma-separated engine names (e.g. `wikipedia,arxiv`)
|
||||||
|
- `categories` / `category_<name>` (optional): used for selecting the initial ported subset
|
||||||
|
- `engine_data-<engine>-<key>=<value>` (optional): per-engine custom parameters
|
||||||
|
|
||||||
|
### Environment variables
|
||||||
|
|
||||||
|
- `PORT` (optional, default `8080`)
|
||||||
|
- `UPSTREAM_SEARXNG_URL` (optional for now, but required if you expect unported engines)
|
||||||
|
- When set, unported engines are proxied to `${UPSTREAM_SEARXNG_URL}/search` with `format=json`.
|
||||||
|
- `LOCAL_PORTED_ENGINES` (optional, default `wikipedia,arxiv,crossref,braveapi,qwant`)
|
||||||
|
- Controls which engine names are executed locally (Go-native adapters).
|
||||||
|
- `HTTP_TIMEOUT` (optional, default `10s`)
|
||||||
|
- Timeout for both local engine API calls and upstream proxy calls.
|
||||||
|
- Brave Search API:
|
||||||
|
- `BRAVE_API_KEY` (optional): enables the `braveapi` engine when set
|
||||||
|
- `BRAVE_ACCESS_TOKEN` (optional): if set, requests must include a token
|
||||||
|
(header `Authorization: Bearer <token>`, `X-Search-Token`, `X-Brave-Access-Token`, or form field `token`)
|
||||||
|
|
||||||
|
### Ported vs proxied strategy
|
||||||
|
|
||||||
|
1. The service plans which engines should run locally vs upstream using `LOCAL_PORTED_ENGINES`.
|
||||||
|
2. It executes local ported engines using Go-native adapters:
|
||||||
|
- `wikipedia`, `arxiv`, `crossref`
|
||||||
|
3. Any remaining requested engines are proxied to upstream SearXNG (`format=json`).
|
||||||
|
4. Responses are merged:
|
||||||
|
- `results` are de-duplicated by `engine|title|url`
|
||||||
|
- `suggestions`/`corrections` are treated as sets
|
||||||
|
- other arrays are concatenated
|
||||||
|
|
||||||
|
### Running with Nix
|
||||||
|
|
||||||
|
This repo uses `flake.nix` to provide the Go toolchain.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nix develop
|
||||||
|
go test ./...
|
||||||
|
go run ./cmd/searxng-go
|
||||||
|
```
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export UPSTREAM_SEARXNG_URL="http://127.0.0.1:8888"
|
||||||
|
export PORT="8080"
|
||||||
|
nix develop -c go run ./cmd/searxng-go
|
||||||
|
```
|
||||||
|
|
||||||
43
cmd/searxng-go/main.go
Normal file
43
cmd/searxng-go/main.go
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/httpapi"
|
||||||
|
"github.com/ashie/gosearch/internal/search"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
port := os.Getenv("PORT")
|
||||||
|
if port == "" {
|
||||||
|
port = "8080"
|
||||||
|
}
|
||||||
|
|
||||||
|
upstreamURL := os.Getenv("UPSTREAM_SEARXNG_URL")
|
||||||
|
|
||||||
|
timeout := 10 * time.Second
|
||||||
|
if v := os.Getenv("HTTP_TIMEOUT"); v != "" {
|
||||||
|
if d, err := time.ParseDuration(v); err == nil {
|
||||||
|
timeout = d
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
svc := search.NewService(search.ServiceConfig{
|
||||||
|
UpstreamURL: upstreamURL,
|
||||||
|
HTTPTimeout: timeout,
|
||||||
|
})
|
||||||
|
|
||||||
|
h := httpapi.NewHandler(svc)
|
||||||
|
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
mux.HandleFunc("/healthz", h.Healthz)
|
||||||
|
mux.HandleFunc("/search", h.Search)
|
||||||
|
|
||||||
|
addr := ":" + port
|
||||||
|
log.Printf("searxng-go listening on %s", addr)
|
||||||
|
log.Fatal(http.ListenAndServe(addr, mux))
|
||||||
|
}
|
||||||
|
|
||||||
27
flake.lock
generated
Normal file
27
flake.lock
generated
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
{
|
||||||
|
"nodes": {
|
||||||
|
"nixpkgs": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1773628058,
|
||||||
|
"narHash": "sha256-hpXH0z3K9xv0fHaje136KY872VT2T5uwxtezlAskQgY=",
|
||||||
|
"owner": "NixOS",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "f8573b9c935cfaa162dd62cc9e75ae2db86f85df",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "NixOS",
|
||||||
|
"ref": "nixpkgs-unstable",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": {
|
||||||
|
"inputs": {
|
||||||
|
"nixpkgs": "nixpkgs"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": "root",
|
||||||
|
"version": 7
|
||||||
|
}
|
||||||
28
flake.nix
Normal file
28
flake.nix
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
{
|
||||||
|
description = "Gosearch - SearXNG rewrite in Go";
|
||||||
|
|
||||||
|
inputs = {
|
||||||
|
nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
|
||||||
|
};
|
||||||
|
|
||||||
|
outputs = { self, nixpkgs }:
|
||||||
|
let
|
||||||
|
systems = [ "x86_64-linux" "aarch64-linux" ];
|
||||||
|
forAllSystems = f: nixpkgs.lib.genAttrs systems (system: f system);
|
||||||
|
in {
|
||||||
|
devShells = forAllSystems (system:
|
||||||
|
let
|
||||||
|
pkgs = import nixpkgs { inherit system; };
|
||||||
|
go = pkgs.go_1_24;
|
||||||
|
in
|
||||||
|
{
|
||||||
|
default = pkgs.mkShell {
|
||||||
|
buildInputs = [
|
||||||
|
go
|
||||||
|
pkgs.curl
|
||||||
|
];
|
||||||
|
};
|
||||||
|
});
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
9
go.mod
Normal file
9
go.mod
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
module github.com/ashie/gosearch
|
||||||
|
|
||||||
|
go 1.25.0
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/PuerkitoBio/goquery v1.12.0 // indirect
|
||||||
|
github.com/andybalholm/cascadia v1.3.3 // indirect
|
||||||
|
golang.org/x/net v0.52.0 // indirect
|
||||||
|
)
|
||||||
71
go.sum
Normal file
71
go.sum
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
github.com/PuerkitoBio/goquery v1.12.0 h1:pAcL4g3WRXekcB9AU/y1mbKez2dbY2AajVhtkO8RIBo=
|
||||||
|
github.com/PuerkitoBio/goquery v1.12.0/go.mod h1:802ej+gV2y7bbIhOIoPY5sT183ZW0YFofScC4q/hIpQ=
|
||||||
|
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
|
||||||
|
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
|
||||||
|
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||||
|
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||||
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
|
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||||
|
golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
|
||||||
|
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
|
||||||
|
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
|
||||||
|
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
||||||
|
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||||
|
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||||
|
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||||
|
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||||
|
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||||
|
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
|
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||||
|
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||||
|
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||||
|
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
|
||||||
|
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
|
||||||
|
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
|
||||||
|
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
|
||||||
|
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
|
||||||
|
golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
|
||||||
|
golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
|
||||||
|
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
|
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
|
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
|
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
|
||||||
|
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||||
|
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||||
|
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||||
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
|
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
|
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
|
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
|
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
|
||||||
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
|
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||||
|
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||||
|
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
|
||||||
|
golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
|
||||||
|
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
|
||||||
|
golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
|
||||||
|
golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
|
||||||
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
|
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
|
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||||
|
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||||
|
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||||
|
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
||||||
|
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||||
|
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||||
|
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
||||||
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
|
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||||
|
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||||
|
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
||||||
|
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
|
||||||
|
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
|
||||||
|
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
193
internal/contracts/main_result.go
Normal file
193
internal/contracts/main_result.go
Normal file
|
|
@ -0,0 +1,193 @@
|
||||||
|
package contracts
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
)
|
||||||
|
|
||||||
|
// MainResult represents one element of SearXNG's `results` array.
|
||||||
|
//
|
||||||
|
// SearXNG returns many additional keys beyond what templates use. To keep the
|
||||||
|
// contract stable for proxying/merging, we preserve all unknown keys in
|
||||||
|
// `raw` and re-emit them via MarshalJSON.
|
||||||
|
type MainResult struct {
|
||||||
|
raw map[string]any
|
||||||
|
|
||||||
|
// Common fields used by SearXNG templates (RSS uses: title, url, content, pubdate).
|
||||||
|
Template string `json:"template"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Content string `json:"content"`
|
||||||
|
URL *string `json:"url"`
|
||||||
|
Pubdate *string `json:"pubdate"`
|
||||||
|
|
||||||
|
Engine string `json:"engine"`
|
||||||
|
Score float64 `json:"score"`
|
||||||
|
Category string `json:"category"`
|
||||||
|
Priority string `json:"priority"`
|
||||||
|
|
||||||
|
Positions []int `json:"positions"`
|
||||||
|
Engines []string `json:"engines"`
|
||||||
|
|
||||||
|
// These fields exist in SearXNG's MainResult base; keep them so downstream
|
||||||
|
// callers can generate richer output later.
|
||||||
|
OpenGroup bool `json:"open_group"`
|
||||||
|
CloseGroup bool `json:"close_group"`
|
||||||
|
|
||||||
|
// parsed_url in SearXNG is emitted as a tuple; we preserve it as-is.
|
||||||
|
ParsedURL any `json:"parsed_url"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mr *MainResult) UnmarshalJSON(data []byte) error {
|
||||||
|
// Preserve the full object.
|
||||||
|
dec := json.NewDecoder(bytes.NewReader(data))
|
||||||
|
dec.UseNumber()
|
||||||
|
|
||||||
|
var m map[string]any
|
||||||
|
if err := dec.Decode(&m); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
mr.raw = m
|
||||||
|
|
||||||
|
// Fill the typed/common fields (best-effort; don't fail if types differ).
|
||||||
|
mr.Template = stringOrEmpty(m["template"])
|
||||||
|
mr.Title = stringOrEmpty(m["title"])
|
||||||
|
mr.Content = stringOrEmpty(m["content"])
|
||||||
|
mr.Engine = stringOrEmpty(m["engine"])
|
||||||
|
mr.Category = stringOrEmpty(m["category"])
|
||||||
|
mr.Priority = stringOrEmpty(m["priority"])
|
||||||
|
|
||||||
|
if s, ok := stringOrNullable(m["url"]); ok {
|
||||||
|
mr.URL = &s
|
||||||
|
}
|
||||||
|
if s, ok := stringOrNullable(m["pubdate"]); ok {
|
||||||
|
mr.Pubdate = &s
|
||||||
|
}
|
||||||
|
|
||||||
|
mr.Score = floatOrZero(m["score"])
|
||||||
|
|
||||||
|
if v, ok := sliceOfStrings(m["engines"]); ok {
|
||||||
|
mr.Engines = v
|
||||||
|
}
|
||||||
|
if v, ok := sliceOfInts(m["positions"]); ok {
|
||||||
|
mr.Positions = v
|
||||||
|
}
|
||||||
|
|
||||||
|
if v, ok := boolOrFalse(m["open_group"]); ok {
|
||||||
|
mr.OpenGroup = v
|
||||||
|
}
|
||||||
|
if v, ok := boolOrFalse(m["close_group"]); ok {
|
||||||
|
mr.CloseGroup = v
|
||||||
|
}
|
||||||
|
|
||||||
|
mr.ParsedURL = m["parsed_url"]
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mr MainResult) MarshalJSON() ([]byte, error) {
|
||||||
|
// If we came from upstream JSON, preserve all keys exactly.
|
||||||
|
if mr.raw != nil {
|
||||||
|
return json.Marshal(mr.raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, marshal the known fields.
|
||||||
|
m := map[string]any{
|
||||||
|
"template": mr.Template,
|
||||||
|
"title": mr.Title,
|
||||||
|
"content": mr.Content,
|
||||||
|
"url": mr.URL,
|
||||||
|
"pubdate": mr.Pubdate,
|
||||||
|
"engine": mr.Engine,
|
||||||
|
"score": mr.Score,
|
||||||
|
"category": mr.Category,
|
||||||
|
"priority": mr.Priority,
|
||||||
|
"positions": mr.Positions,
|
||||||
|
"engines": mr.Engines,
|
||||||
|
"open_group": mr.OpenGroup,
|
||||||
|
"close_group": mr.CloseGroup,
|
||||||
|
"parsed_url": mr.ParsedURL,
|
||||||
|
}
|
||||||
|
return json.Marshal(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func stringOrEmpty(v any) string {
|
||||||
|
s, _ := v.(string)
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func stringOrNullable(v any) (string, bool) {
|
||||||
|
if v == nil {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
s, ok := v.(string)
|
||||||
|
return s, ok
|
||||||
|
}
|
||||||
|
|
||||||
|
func floatOrZero(v any) float64 {
|
||||||
|
switch t := v.(type) {
|
||||||
|
case float64:
|
||||||
|
return t
|
||||||
|
case float32:
|
||||||
|
return float64(t)
|
||||||
|
case int:
|
||||||
|
return float64(t)
|
||||||
|
case int64:
|
||||||
|
return float64(t)
|
||||||
|
case json.Number:
|
||||||
|
f, _ := t.Float64()
|
||||||
|
return f
|
||||||
|
default:
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func boolOrFalse(v any) (bool, bool) {
|
||||||
|
b, ok := v.(bool)
|
||||||
|
if !ok {
|
||||||
|
return false, false
|
||||||
|
}
|
||||||
|
return b, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func sliceOfStrings(v any) ([]string, bool) {
|
||||||
|
raw, ok := v.([]any)
|
||||||
|
if !ok {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
out := make([]string, 0, len(raw))
|
||||||
|
for _, item := range raw {
|
||||||
|
s, ok := item.(string)
|
||||||
|
if !ok {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
out = append(out, s)
|
||||||
|
}
|
||||||
|
return out, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func sliceOfInts(v any) ([]int, bool) {
|
||||||
|
raw, ok := v.([]any)
|
||||||
|
if !ok {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
out := make([]int, 0, len(raw))
|
||||||
|
for _, item := range raw {
|
||||||
|
switch t := item.(type) {
|
||||||
|
case float64:
|
||||||
|
out = append(out, int(t))
|
||||||
|
case int:
|
||||||
|
out = append(out, t)
|
||||||
|
case json.Number:
|
||||||
|
i64, err := t.Int64()
|
||||||
|
if err != nil {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
out = append(out, int(i64))
|
||||||
|
default:
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out, true
|
||||||
|
}
|
||||||
|
|
||||||
50
internal/contracts/types.go
Normal file
50
internal/contracts/types.go
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
package contracts
|
||||||
|
|
||||||
|
// OutputFormat matches SearXNG's `/search?format=...` values.
|
||||||
|
type OutputFormat string
|
||||||
|
|
||||||
|
const (
|
||||||
|
FormatHTML OutputFormat = "html" // accepted for compatibility (not yet implemented)
|
||||||
|
FormatJSON OutputFormat = "json"
|
||||||
|
FormatCSV OutputFormat = "csv"
|
||||||
|
FormatRSS OutputFormat = "rss"
|
||||||
|
)
|
||||||
|
|
||||||
|
type SearchRequest struct {
|
||||||
|
// Format is what the client requested via `format=...`.
|
||||||
|
Format OutputFormat
|
||||||
|
|
||||||
|
Query string
|
||||||
|
|
||||||
|
Pageno int
|
||||||
|
Safesearch int
|
||||||
|
TimeRange *string
|
||||||
|
|
||||||
|
TimeoutLimit *float64
|
||||||
|
Language string
|
||||||
|
|
||||||
|
// Engines and categories are used for deciding which engines run locally vs are proxied.
|
||||||
|
// For now, engines can be supplied directly via the `engines` form parameter.
|
||||||
|
Engines []string
|
||||||
|
Categories []string
|
||||||
|
|
||||||
|
// EngineData matches SearXNG's `engine_data-<engine>-<key>=<value>` parameters.
|
||||||
|
EngineData map[string]map[string]string
|
||||||
|
|
||||||
|
// AccessToken is an optional request token used to gate paid/limited engines.
|
||||||
|
// It is not part of the upstream JSON schema; it only influences local engines.
|
||||||
|
AccessToken string
|
||||||
|
}
|
||||||
|
|
||||||
|
// SearchResponse matches the JSON schema returned by SearXNG's `webutils.get_json_response()`.
|
||||||
|
type SearchResponse struct {
|
||||||
|
Query string `json:"query"`
|
||||||
|
NumberOfResults int `json:"number_of_results"`
|
||||||
|
Results []MainResult `json:"results"`
|
||||||
|
Answers []map[string]any `json:"answers"`
|
||||||
|
Corrections []string `json:"corrections"`
|
||||||
|
Infoboxes []map[string]any `json:"infoboxes"`
|
||||||
|
Suggestions []string `json:"suggestions"`
|
||||||
|
UnresponsiveEngines [][2]string `json:"unresponsive_engines"`
|
||||||
|
}
|
||||||
|
|
||||||
191
internal/engines/arxiv.go
Normal file
191
internal/engines/arxiv.go
Normal file
|
|
@ -0,0 +1,191 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/xml"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
arxivSearchPrefix = "all"
|
||||||
|
arxivMaxResults = 10
|
||||||
|
)
|
||||||
|
|
||||||
|
type ArxivEngine struct {
|
||||||
|
client *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *ArxivEngine) Name() string { return "arxiv" }
|
||||||
|
|
||||||
|
func (e *ArxivEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||||
|
if e == nil || e.client == nil {
|
||||||
|
return contracts.SearchResponse{}, errors.New("arxiv engine not initialized")
|
||||||
|
}
|
||||||
|
q := strings.TrimSpace(req.Query)
|
||||||
|
if q == "" {
|
||||||
|
return contracts.SearchResponse{Query: req.Query}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
start := (req.Pageno - 1) * arxivMaxResults
|
||||||
|
if start < 0 {
|
||||||
|
start = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
args := url.Values{}
|
||||||
|
args.Set("search_query", fmt.Sprintf("%s:%s", arxivSearchPrefix, q))
|
||||||
|
args.Set("start", fmt.Sprintf("%d", start))
|
||||||
|
args.Set("max_results", fmt.Sprintf("%d", arxivMaxResults))
|
||||||
|
|
||||||
|
endpoint := "https://export.arxiv.org/api/query?" + args.Encode()
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := e.client.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
||||||
|
return contracts.SearchResponse{}, fmt.Errorf("arxiv upstream error: status=%d body=%q", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
raw, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
results, err := parseArxivAtom(raw)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: len(results),
|
||||||
|
Results: results,
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type arxivEntry struct {
|
||||||
|
Title string
|
||||||
|
ID string
|
||||||
|
Summary string
|
||||||
|
Published string
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseArxivAtom(xmlBytes []byte) ([]contracts.MainResult, error) {
|
||||||
|
dec := xml.NewDecoder(bytes.NewReader(xmlBytes))
|
||||||
|
|
||||||
|
var entries []arxivEntry
|
||||||
|
var cur *arxivEntry
|
||||||
|
|
||||||
|
for {
|
||||||
|
tok, err := dec.Token()
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch t := tok.(type) {
|
||||||
|
case xml.StartElement:
|
||||||
|
switch strings.ToLower(t.Name.Local) {
|
||||||
|
case "entry":
|
||||||
|
cur = &arxivEntry{}
|
||||||
|
case "title":
|
||||||
|
if cur != nil {
|
||||||
|
var v string
|
||||||
|
if err := dec.DecodeElement(&v, &t); err == nil {
|
||||||
|
cur.Title = strings.TrimSpace(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case "id":
|
||||||
|
if cur != nil {
|
||||||
|
var v string
|
||||||
|
if err := dec.DecodeElement(&v, &t); err == nil {
|
||||||
|
cur.ID = strings.TrimSpace(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case "summary":
|
||||||
|
if cur != nil {
|
||||||
|
var v string
|
||||||
|
if err := dec.DecodeElement(&v, &t); err == nil {
|
||||||
|
cur.Summary = strings.TrimSpace(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case "published":
|
||||||
|
if cur != nil {
|
||||||
|
var v string
|
||||||
|
if err := dec.DecodeElement(&v, &t); err == nil {
|
||||||
|
cur.Published = strings.TrimSpace(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case xml.EndElement:
|
||||||
|
if strings.ToLower(t.Name.Local) == "entry" && cur != nil {
|
||||||
|
if cur.Title != "" && cur.ID != "" {
|
||||||
|
entries = append(entries, *cur)
|
||||||
|
}
|
||||||
|
cur = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make([]contracts.MainResult, 0, len(entries))
|
||||||
|
for _, e := range entries {
|
||||||
|
urlPtr := e.ID
|
||||||
|
content := e.Summary
|
||||||
|
pubdate := parseArxivPublished(e.Published)
|
||||||
|
|
||||||
|
out = append(out, contracts.MainResult{
|
||||||
|
Template: "default.html",
|
||||||
|
Title: e.Title,
|
||||||
|
Content: content,
|
||||||
|
URL: &urlPtr,
|
||||||
|
Pubdate: pubdate,
|
||||||
|
Engine: "arxiv",
|
||||||
|
Category: "science",
|
||||||
|
Score: 0,
|
||||||
|
Positions: nil,
|
||||||
|
Engines: []string{"arxiv"},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseArxivPublished(s string) *string {
|
||||||
|
s = strings.TrimSpace(s)
|
||||||
|
if s == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ArXiv uses RFC3339 like "2024-06-03T00:00:00Z".
|
||||||
|
t, err := time.Parse(time.RFC3339, s)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
formatted := t.Format("2006-01-02 15:04:05-0700")
|
||||||
|
return &formatted
|
||||||
|
}
|
||||||
|
|
||||||
66
internal/engines/arxiv_test.go
Normal file
66
internal/engines/arxiv_test.go
Normal file
|
|
@ -0,0 +1,66 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestArxivEngine_Search(t *testing.T) {
|
||||||
|
transport := roundTripperFunc(func(r *http.Request) (*http.Response, error) {
|
||||||
|
if r.Method != http.MethodGet {
|
||||||
|
return httpResponse(http.StatusMethodNotAllowed, "", ""), nil
|
||||||
|
}
|
||||||
|
if r.URL.Host != "export.arxiv.org" || r.URL.Path != "/api/query" {
|
||||||
|
return httpResponse(http.StatusNotFound, "", ""), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
q := r.URL.Query().Get("search_query")
|
||||||
|
if q != "all:quantum" {
|
||||||
|
return httpResponse(http.StatusBadRequest, "", ""), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
atom := `<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<entry>
|
||||||
|
<title>Quantum Test</title>
|
||||||
|
<id>http://arxiv.org/abs/1234.5678</id>
|
||||||
|
<summary>Abstract here</summary>
|
||||||
|
<published>2024-06-03T00:00:00Z</published>
|
||||||
|
</entry>
|
||||||
|
</feed>`
|
||||||
|
return httpResponse(http.StatusOK, atom, "application/atom+xml"), nil
|
||||||
|
})
|
||||||
|
|
||||||
|
client := &http.Client{Transport: transport}
|
||||||
|
engine := &ArxivEngine{client: client}
|
||||||
|
|
||||||
|
resp, err := engine.Search(context.Background(), contracts.SearchRequest{
|
||||||
|
Query: "quantum",
|
||||||
|
Pageno: 1,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(resp.Results) != 1 {
|
||||||
|
t.Fatalf("expected 1 result, got %d", len(resp.Results))
|
||||||
|
}
|
||||||
|
|
||||||
|
r := resp.Results[0]
|
||||||
|
if r.Title != "Quantum Test" {
|
||||||
|
t.Fatalf("unexpected title: %q", r.Title)
|
||||||
|
}
|
||||||
|
if r.Content != "Abstract here" {
|
||||||
|
t.Fatalf("unexpected content: %q", r.Content)
|
||||||
|
}
|
||||||
|
if r.URL == nil || !strings.Contains(*r.URL, "1234.5678") {
|
||||||
|
t.Fatalf("unexpected url: %v", r.URL)
|
||||||
|
}
|
||||||
|
if r.Pubdate == nil || !strings.Contains(*r.Pubdate, "2024-06-03") {
|
||||||
|
t.Fatalf("expected pubdate around 2024-06-03, got %v", r.Pubdate)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
195
internal/engines/braveapi.go
Normal file
195
internal/engines/braveapi.go
Normal file
|
|
@ -0,0 +1,195 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
// BraveEngine implements the SearXNG `braveapi` engine (Brave Web Search API).
|
||||||
|
//
|
||||||
|
// Config / gating:
|
||||||
|
// - BRAVE_API_KEY: required to call Brave
|
||||||
|
// - BRAVE_ACCESS_TOKEN (optional): if set, the request must include a token
|
||||||
|
// that matches the env var (via Authorization Bearer, X-Search-Token,
|
||||||
|
// X-Brave-Access-Token, or form field `token`).
|
||||||
|
type BraveEngine struct {
|
||||||
|
client *http.Client
|
||||||
|
apiKey string
|
||||||
|
accessGateToken string
|
||||||
|
resultsPerPage int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *BraveEngine) Name() string { return "braveapi" }
|
||||||
|
|
||||||
|
func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||||
|
if e == nil || e.client == nil {
|
||||||
|
return contracts.SearchResponse{}, errors.New("brave engine not initialized")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gate / config checks should not be treated as fatal errors; SearXNG
|
||||||
|
// treats misconfigured engines as unresponsive.
|
||||||
|
if strings.TrimSpace(e.apiKey) == "" {
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: 0,
|
||||||
|
Results: []contracts.MainResult{},
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{{e.Name(), "missing_api_key"}},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if gate := strings.TrimSpace(e.accessGateToken); gate != "" {
|
||||||
|
if strings.TrimSpace(req.AccessToken) == "" || req.AccessToken != gate {
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: 0,
|
||||||
|
Results: []contracts.MainResult{},
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{{e.Name(), "unauthorized"}},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
q := strings.TrimSpace(req.Query)
|
||||||
|
if q == "" {
|
||||||
|
return contracts.SearchResponse{Query: req.Query}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
offset := 0
|
||||||
|
if req.Pageno > 1 {
|
||||||
|
offset = (req.Pageno - 1) * e.resultsPerPage
|
||||||
|
}
|
||||||
|
|
||||||
|
args := url.Values{}
|
||||||
|
args.Set("q", q)
|
||||||
|
args.Set("count", fmt.Sprintf("%d", e.resultsPerPage))
|
||||||
|
args.Set("offset", fmt.Sprintf("%d", offset))
|
||||||
|
|
||||||
|
if req.TimeRange != nil {
|
||||||
|
switch *req.TimeRange {
|
||||||
|
case "day":
|
||||||
|
args.Set("time_range", "past_day")
|
||||||
|
case "week":
|
||||||
|
args.Set("time_range", "past_week")
|
||||||
|
case "month":
|
||||||
|
args.Set("time_range", "past_month")
|
||||||
|
case "year":
|
||||||
|
args.Set("time_range", "past_year")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SearXNG's python checks `if params["safesearch"]:` which treats any
|
||||||
|
// non-zero (moderate/strict) as strict.
|
||||||
|
if req.Safesearch > 0 {
|
||||||
|
args.Set("safesearch", "strict")
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := "https://api.search.brave.com/res/v1/web/search?" + args.Encode()
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
httpReq.Header.Set("X-Subscription-Token", e.apiKey)
|
||||||
|
|
||||||
|
resp, err := e.client.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
||||||
|
return contracts.SearchResponse{}, fmt.Errorf("brave upstream error: status=%d body=%q", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var api struct {
|
||||||
|
Web struct {
|
||||||
|
Results []struct {
|
||||||
|
URL string `json:"url"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Description string `json:"description"`
|
||||||
|
Age string `json:"age"`
|
||||||
|
Thumbnail struct {
|
||||||
|
Src string `json:"src"`
|
||||||
|
} `json:"thumbnail"`
|
||||||
|
} `json:"results"`
|
||||||
|
} `json:"web"`
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&api); err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
results := make([]contracts.MainResult, 0, len(api.Web.Results))
|
||||||
|
for _, r := range api.Web.Results {
|
||||||
|
urlPtr := strings.TrimSpace(r.URL)
|
||||||
|
if urlPtr == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pub := parseBraveAge(r.Age)
|
||||||
|
|
||||||
|
results = append(results, contracts.MainResult{
|
||||||
|
Template: "default.html",
|
||||||
|
Title: r.Title,
|
||||||
|
Content: r.Description,
|
||||||
|
URL: &urlPtr,
|
||||||
|
Pubdate: pub,
|
||||||
|
Engine: e.Name(),
|
||||||
|
Score: 0,
|
||||||
|
Category: "general",
|
||||||
|
Priority: "",
|
||||||
|
Positions: nil,
|
||||||
|
Engines: []string{e.Name()},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: len(results),
|
||||||
|
Results: results,
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseBraveAge(ageRaw string) *string {
|
||||||
|
ageRaw = strings.TrimSpace(ageRaw)
|
||||||
|
if ageRaw == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Brave sometimes returns RFC3339-like timestamps for `age`.
|
||||||
|
layouts := []string{
|
||||||
|
time.RFC3339Nano,
|
||||||
|
time.RFC3339,
|
||||||
|
"2006-01-02T15:04:05Z07:00",
|
||||||
|
"2006-01-02",
|
||||||
|
}
|
||||||
|
for _, layout := range layouts {
|
||||||
|
if t, err := time.Parse(layout, ageRaw); err == nil {
|
||||||
|
s := t.Format("2006-01-02 15:04:05-0700")
|
||||||
|
return &s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
92
internal/engines/braveapi_test.go
Normal file
92
internal/engines/braveapi_test.go
Normal file
|
|
@ -0,0 +1,92 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBraveEngine_GatingAndHeader(t *testing.T) {
|
||||||
|
wantToken := "letmein"
|
||||||
|
wantAPIKey := "api-key"
|
||||||
|
|
||||||
|
transport := roundTripperFunc(func(r *http.Request) (*http.Response, error) {
|
||||||
|
if r.Header.Get("X-Subscription-Token") != wantAPIKey {
|
||||||
|
t.Fatalf("missing/incorrect X-Subscription-Token header: got %q", r.Header.Get("X-Subscription-Token"))
|
||||||
|
}
|
||||||
|
if r.URL.Host != "api.search.brave.com" {
|
||||||
|
t.Fatalf("unexpected host: %s", r.URL.Host)
|
||||||
|
}
|
||||||
|
if r.URL.Path != "/res/v1/web/search" {
|
||||||
|
t.Fatalf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
// basic query assertions
|
||||||
|
q := r.URL.Query().Get("q")
|
||||||
|
if q != "hugo" {
|
||||||
|
t.Fatalf("unexpected q: %q", q)
|
||||||
|
}
|
||||||
|
|
||||||
|
body := `{
|
||||||
|
"web": {
|
||||||
|
"results": [
|
||||||
|
{"url":"https://example.com/a","title":"A","description":"B","age":"2024-06-03T00:00:00Z","thumbnail":{"src":"x"}}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}`
|
||||||
|
return httpResponse(http.StatusOK, body, "application/json"), nil
|
||||||
|
})
|
||||||
|
|
||||||
|
client := &http.Client{Transport: transport}
|
||||||
|
engine := &BraveEngine{
|
||||||
|
client: client,
|
||||||
|
apiKey: wantAPIKey,
|
||||||
|
accessGateToken: wantToken,
|
||||||
|
resultsPerPage: 20,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wrong token => no upstream call / unresponsive engine.
|
||||||
|
{
|
||||||
|
resp, err := engine.Search(context.Background(), contracts.SearchRequest{
|
||||||
|
Query: "hugo",
|
||||||
|
Pageno: 1,
|
||||||
|
Safesearch: 0,
|
||||||
|
Language: "en",
|
||||||
|
AccessToken: "wrong",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(resp.Results) != 0 {
|
||||||
|
t.Fatalf("expected no results on unauthorized, got %d", len(resp.Results))
|
||||||
|
}
|
||||||
|
if len(resp.UnresponsiveEngines) != 1 {
|
||||||
|
t.Fatalf("expected 1 unresponsive engine entry, got %v", resp.UnresponsiveEngines)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Correct token => upstream call.
|
||||||
|
{
|
||||||
|
resp, err := engine.Search(context.Background(), contracts.SearchRequest{
|
||||||
|
Query: "hugo",
|
||||||
|
Pageno: 1,
|
||||||
|
Safesearch: 0,
|
||||||
|
Language: "en",
|
||||||
|
AccessToken: wantToken,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(resp.Results) != 1 {
|
||||||
|
t.Fatalf("expected 1 result, got %d", len(resp.Results))
|
||||||
|
}
|
||||||
|
if resp.Results[0].Title != "A" {
|
||||||
|
t.Fatalf("unexpected title: %q", resp.Results[0].Title)
|
||||||
|
}
|
||||||
|
if resp.Results[0].URL == nil || *resp.Results[0].URL != "https://example.com/a" {
|
||||||
|
t.Fatalf("unexpected url: %v", resp.Results[0].URL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
144
internal/engines/crossref.go
Normal file
144
internal/engines/crossref.go
Normal file
|
|
@ -0,0 +1,144 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
type CrossrefEngine struct {
|
||||||
|
client *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *CrossrefEngine) Name() string { return "crossref" }
|
||||||
|
|
||||||
|
func (e *CrossrefEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||||
|
if e == nil || e.client == nil {
|
||||||
|
return contracts.SearchResponse{}, errors.New("crossref engine not initialized")
|
||||||
|
}
|
||||||
|
q := strings.TrimSpace(req.Query)
|
||||||
|
if q == "" {
|
||||||
|
return contracts.SearchResponse{Query: req.Query}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
offset := 20 * (req.Pageno - 1)
|
||||||
|
args := url.Values{}
|
||||||
|
args.Set("query", q)
|
||||||
|
args.Set("offset", fmt.Sprintf("%d", offset))
|
||||||
|
|
||||||
|
endpoint := "https://api.crossref.org/works?" + args.Encode()
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := e.client.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
||||||
|
return contracts.SearchResponse{}, fmt.Errorf("crossref upstream error: status=%d body=%q", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var api struct {
|
||||||
|
Message struct {
|
||||||
|
Items []crossrefItem `json:"items"`
|
||||||
|
} `json:"message"`
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&api); err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
results := make([]contracts.MainResult, 0, len(api.Message.Items))
|
||||||
|
for _, item := range api.Message.Items {
|
||||||
|
title := ""
|
||||||
|
if len(item.Title) > 0 {
|
||||||
|
title = strings.TrimSpace(item.Title[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
content := strings.TrimSpace(item.Abstract)
|
||||||
|
|
||||||
|
urlStr := strings.TrimSpace(item.URL)
|
||||||
|
if urlStr == "" {
|
||||||
|
urlStr = strings.TrimSpace(item.DOI)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub := parseCrossrefDateParts(item.Published.DateParts)
|
||||||
|
|
||||||
|
urlPtr := urlStr
|
||||||
|
results = append(results, contracts.MainResult{
|
||||||
|
Template: "default.html",
|
||||||
|
Title: title,
|
||||||
|
Content: content,
|
||||||
|
URL: &urlPtr,
|
||||||
|
Pubdate: pub,
|
||||||
|
Engine: "crossref",
|
||||||
|
Score: 0,
|
||||||
|
Category: "science",
|
||||||
|
Priority: "",
|
||||||
|
Positions: nil,
|
||||||
|
Engines: []string{"crossref"},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: len(results),
|
||||||
|
Results: results,
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type crossrefItem struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Title []string `json:"title"`
|
||||||
|
URL string `json:"URL"`
|
||||||
|
DOI string `json:"DOI"`
|
||||||
|
Abstract string `json:"abstract"`
|
||||||
|
Page string `json:"page"`
|
||||||
|
Publisher string `json:"publisher"`
|
||||||
|
Subject []string `json:"subject"`
|
||||||
|
Published crossrefPublished `json:"published"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type crossrefPublished struct {
|
||||||
|
DateParts [][]int `json:"date-parts"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseCrossrefDateParts(parts [][]int) *string {
|
||||||
|
if len(parts) == 0 || len(parts[0]) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
dp := parts[0]
|
||||||
|
year := dp[0]
|
||||||
|
month := 1
|
||||||
|
day := 1
|
||||||
|
if len(dp) >= 2 {
|
||||||
|
month = dp[1]
|
||||||
|
}
|
||||||
|
if len(dp) >= 3 {
|
||||||
|
day = dp[2]
|
||||||
|
}
|
||||||
|
|
||||||
|
t := time.Date(year, time.Month(month), day, 0, 0, 0, 0, time.UTC)
|
||||||
|
formatted := t.Format("2006-01-02 00:00:00+0000")
|
||||||
|
return &formatted
|
||||||
|
}
|
||||||
|
|
||||||
71
internal/engines/crossref_test.go
Normal file
71
internal/engines/crossref_test.go
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCrossrefEngine_Search(t *testing.T) {
|
||||||
|
transport := roundTripperFunc(func(r *http.Request) (*http.Response, error) {
|
||||||
|
if r.Method != http.MethodGet {
|
||||||
|
return httpResponse(http.StatusMethodNotAllowed, "", ""), nil
|
||||||
|
}
|
||||||
|
if r.URL.Host != "api.crossref.org" || r.URL.Path != "/works" {
|
||||||
|
return httpResponse(http.StatusNotFound, "", ""), nil
|
||||||
|
}
|
||||||
|
q := r.URL.Query().Get("query")
|
||||||
|
if q != "hugo" {
|
||||||
|
return httpResponse(http.StatusBadRequest, "", ""), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
body := `{
|
||||||
|
"message": {
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"type": "journal-article",
|
||||||
|
"title": ["Paper B"],
|
||||||
|
"URL": "https://example.com/paperb",
|
||||||
|
"abstract": "Abstract B",
|
||||||
|
"DOI": "10.1234/b",
|
||||||
|
"published": {
|
||||||
|
"date-parts": [[2020, 5, 1]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}`
|
||||||
|
return httpResponse(http.StatusOK, body, "application/json"), nil
|
||||||
|
})
|
||||||
|
|
||||||
|
client := &http.Client{Transport: transport}
|
||||||
|
engine := &CrossrefEngine{client: client}
|
||||||
|
|
||||||
|
resp, err := engine.Search(context.Background(), contracts.SearchRequest{
|
||||||
|
Query: "hugo",
|
||||||
|
Pageno: 1,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(resp.Results) != 1 {
|
||||||
|
t.Fatalf("expected 1 result, got %d", len(resp.Results))
|
||||||
|
}
|
||||||
|
|
||||||
|
r := resp.Results[0]
|
||||||
|
if r.Title != "Paper B" {
|
||||||
|
t.Fatalf("expected title Paper B, got %q", r.Title)
|
||||||
|
}
|
||||||
|
if r.Content != "Abstract B" {
|
||||||
|
t.Fatalf("expected content, got %q", r.Content)
|
||||||
|
}
|
||||||
|
if r.Pubdate == nil || *r.Pubdate == "" {
|
||||||
|
t.Fatalf("expected pubdate, got nil/empty")
|
||||||
|
}
|
||||||
|
if r.Engine != "crossref" {
|
||||||
|
t.Fatalf("expected engine crossref, got %q", r.Engine)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
17
internal/engines/engine.go
Normal file
17
internal/engines/engine.go
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Engine is a Go-native implementation of a SearXNG engine.
|
||||||
|
//
|
||||||
|
// Implementations should return a SearchResponse containing only the results
|
||||||
|
// for that engine subset; the caller will merge multiple engine responses.
|
||||||
|
type Engine interface {
|
||||||
|
Name() string
|
||||||
|
Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error)
|
||||||
|
}
|
||||||
|
|
||||||
33
internal/engines/factory.go
Normal file
33
internal/engines/factory.go
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NewDefaultPortedEngines returns the starter set of Go-native engines.
|
||||||
|
// The service can swap/extend this registry later as more engines are ported.
|
||||||
|
func NewDefaultPortedEngines(client *http.Client) map[string]Engine {
|
||||||
|
if client == nil {
|
||||||
|
client = &http.Client{Timeout: 10 * time.Second}
|
||||||
|
}
|
||||||
|
|
||||||
|
return map[string]Engine{
|
||||||
|
"wikipedia": &WikipediaEngine{client: client},
|
||||||
|
"arxiv": &ArxivEngine{client: client},
|
||||||
|
"crossref": &CrossrefEngine{client: client},
|
||||||
|
"braveapi": &BraveEngine{
|
||||||
|
client: client,
|
||||||
|
apiKey: os.Getenv("BRAVE_API_KEY"),
|
||||||
|
accessGateToken: os.Getenv("BRAVE_ACCESS_TOKEN"),
|
||||||
|
resultsPerPage: 20,
|
||||||
|
},
|
||||||
|
"qwant": &QwantEngine{
|
||||||
|
client: client,
|
||||||
|
category: "web-lite",
|
||||||
|
resultsPerPage: 10,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
26
internal/engines/http_mock_test.go
Normal file
26
internal/engines/http_mock_test.go
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type roundTripperFunc func(*http.Request) (*http.Response, error)
|
||||||
|
|
||||||
|
func (f roundTripperFunc) RoundTrip(r *http.Request) (*http.Response, error) {
|
||||||
|
return f(r)
|
||||||
|
}
|
||||||
|
|
||||||
|
func httpResponse(status int, body string, contentType string) *http.Response {
|
||||||
|
h := make(http.Header)
|
||||||
|
if contentType != "" {
|
||||||
|
h.Set("Content-Type", contentType)
|
||||||
|
}
|
||||||
|
return &http.Response{
|
||||||
|
StatusCode: status,
|
||||||
|
Header: h,
|
||||||
|
Body: io.NopCloser(strings.NewReader(body)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
148
internal/engines/planner.go
Normal file
148
internal/engines/planner.go
Normal file
|
|
@ -0,0 +1,148 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant"}
|
||||||
|
|
||||||
|
type Planner struct {
|
||||||
|
PortedSet map[string]bool
|
||||||
|
PortedList []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewPlannerFromEnv() *Planner {
|
||||||
|
raw := strings.TrimSpace(os.Getenv("LOCAL_PORTED_ENGINES"))
|
||||||
|
if raw == "" {
|
||||||
|
return NewPlanner(defaultPortedEngines)
|
||||||
|
}
|
||||||
|
parts := splitCSV(raw)
|
||||||
|
if len(parts) == 0 {
|
||||||
|
return NewPlanner(defaultPortedEngines)
|
||||||
|
}
|
||||||
|
return NewPlanner(parts)
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewPlanner(portedEngines []string) *Planner {
|
||||||
|
set := make(map[string]bool, len(portedEngines))
|
||||||
|
out := make([]string, 0, len(portedEngines))
|
||||||
|
for _, e := range portedEngines {
|
||||||
|
e = strings.TrimSpace(strings.ToLower(e))
|
||||||
|
if e == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if set[e] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
set[e] = true
|
||||||
|
out = append(out, e)
|
||||||
|
}
|
||||||
|
return &Planner{
|
||||||
|
PortedSet: set,
|
||||||
|
PortedList: out,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Plan returns:
|
||||||
|
// - localEngines: engines that are configured as ported for this service
|
||||||
|
// - upstreamEngines: engines that should be executed by upstream SearXNG
|
||||||
|
// - requestedEngines: the (possibly inferred) requested engines list
|
||||||
|
//
|
||||||
|
// If the request provides an explicit `engines` parameter, we use it.
|
||||||
|
// Otherwise we infer a small subset from `categories` for the starter set.
|
||||||
|
func (p *Planner) Plan(req contracts.SearchRequest) (localEngines, upstreamEngines, requestedEngines []string) {
|
||||||
|
if p == nil {
|
||||||
|
p = NewPlannerFromEnv()
|
||||||
|
}
|
||||||
|
|
||||||
|
requestedEngines = nil
|
||||||
|
if len(req.Engines) > 0 {
|
||||||
|
requestedEngines = normalizeList(req.Engines)
|
||||||
|
} else {
|
||||||
|
requestedEngines = inferFromCategories(req.Categories)
|
||||||
|
}
|
||||||
|
|
||||||
|
localEngines = make([]string, 0, len(requestedEngines))
|
||||||
|
upstreamEngines = make([]string, 0, len(requestedEngines))
|
||||||
|
for _, e := range requestedEngines {
|
||||||
|
if p.PortedSet[e] {
|
||||||
|
localEngines = append(localEngines, e)
|
||||||
|
} else {
|
||||||
|
upstreamEngines = append(upstreamEngines, e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return localEngines, upstreamEngines, requestedEngines
|
||||||
|
}
|
||||||
|
|
||||||
|
func inferFromCategories(categories []string) []string {
|
||||||
|
// Minimal mapping for the initial porting subset.
|
||||||
|
// This mirrors the idea of selecting from SearXNG categories without
|
||||||
|
// embedding the whole engine registry.
|
||||||
|
set := map[string]bool{}
|
||||||
|
for _, c := range categories {
|
||||||
|
switch strings.TrimSpace(strings.ToLower(c)) {
|
||||||
|
case "general":
|
||||||
|
set["wikipedia"] = true
|
||||||
|
set["braveapi"] = true
|
||||||
|
set["qwant"] = true
|
||||||
|
case "science", "scientific publications":
|
||||||
|
set["arxiv"] = true
|
||||||
|
set["crossref"] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make([]string, 0, len(set))
|
||||||
|
for e := range set {
|
||||||
|
out = append(out, e)
|
||||||
|
}
|
||||||
|
// stable order
|
||||||
|
order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "arxiv": 3, "crossref": 4}
|
||||||
|
sortByOrder(out, order)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func sortByOrder(list []string, order map[string]int) {
|
||||||
|
// simple insertion sort (list is tiny)
|
||||||
|
for i := 1; i < len(list); i++ {
|
||||||
|
j := i
|
||||||
|
for j > 0 && order[list[j-1]] > order[list[j]] {
|
||||||
|
list[j-1], list[j] = list[j], list[j-1]
|
||||||
|
j--
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeList(in []string) []string {
|
||||||
|
out := make([]string, 0, len(in))
|
||||||
|
seen := map[string]bool{}
|
||||||
|
for _, e := range in {
|
||||||
|
e = strings.TrimSpace(strings.ToLower(e))
|
||||||
|
if e == "" || seen[e] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[e] = true
|
||||||
|
out = append(out, e)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitCSV(s string) []string {
|
||||||
|
if s == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
parts := strings.Split(s, ",")
|
||||||
|
out := make([]string, 0, len(parts))
|
||||||
|
for _, p := range parts {
|
||||||
|
p = strings.TrimSpace(p)
|
||||||
|
if p == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out = append(out, p)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
467
internal/engines/qwant.go
Normal file
467
internal/engines/qwant.go
Normal file
|
|
@ -0,0 +1,467 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
)
|
||||||
|
|
||||||
|
// QwantEngine implements a SearXNG-like `qwant` (web) adapter using
|
||||||
|
// Qwant v3 endpoint: https://api.qwant.com/v3/search/web.
|
||||||
|
//
|
||||||
|
// Qwant's API is not fully documented; this mirrors SearXNG's parsing logic
|
||||||
|
// for the `web` category from `.agent/searxng/searx/engines/qwant.py`.
|
||||||
|
type QwantEngine struct {
|
||||||
|
client *http.Client
|
||||||
|
category string // "web" (JSON API) or "web-lite" (HTML fallback)
|
||||||
|
resultsPerPage int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *QwantEngine) Name() string { return "qwant" }
|
||||||
|
|
||||||
|
func (e *QwantEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||||
|
if e == nil || e.client == nil {
|
||||||
|
return contracts.SearchResponse{}, errors.New("qwant engine not initialized")
|
||||||
|
}
|
||||||
|
|
||||||
|
q := strings.TrimSpace(req.Query)
|
||||||
|
if q == "" {
|
||||||
|
return contracts.SearchResponse{Query: req.Query}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// For API parity we use SearXNG web defaults: count=10, offset=(pageno-1)*count.
|
||||||
|
// The engine's config field exists so we can expand to news/images/videos later.
|
||||||
|
count := e.resultsPerPage
|
||||||
|
if count <= 0 {
|
||||||
|
count = 10
|
||||||
|
}
|
||||||
|
offset := 0
|
||||||
|
if req.Pageno > 1 {
|
||||||
|
offset = (req.Pageno - 1) * count
|
||||||
|
}
|
||||||
|
mode := strings.TrimSpace(strings.ToLower(e.category))
|
||||||
|
if mode == "" {
|
||||||
|
mode = "web"
|
||||||
|
}
|
||||||
|
|
||||||
|
switch mode {
|
||||||
|
case "web-lite":
|
||||||
|
return e.searchWebLite(ctx, req)
|
||||||
|
case "web":
|
||||||
|
return e.searchWebAPI(ctx, req, count, offset)
|
||||||
|
default:
|
||||||
|
// Unknown mode: treat as unresponsive.
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
UnresponsiveEngines: [][2]string{
|
||||||
|
{e.Name(), "unknown_qwant_mode"},
|
||||||
|
},
|
||||||
|
Results: []contracts.MainResult{},
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *QwantEngine) searchWebAPI(ctx context.Context, req contracts.SearchRequest, count, offset int) (contracts.SearchResponse, error) {
|
||||||
|
qLocale := qwantLocale(req.Language)
|
||||||
|
args := url.Values{}
|
||||||
|
args.Set("q", req.Query)
|
||||||
|
args.Set("count", fmt.Sprintf("%d", count))
|
||||||
|
args.Set("locale", qLocale)
|
||||||
|
args.Set("safesearch", fmt.Sprintf("%d", req.Safesearch))
|
||||||
|
args.Set("llm", "false")
|
||||||
|
args.Set("tgp", "3")
|
||||||
|
args.Set("offset", fmt.Sprintf("%d", offset))
|
||||||
|
|
||||||
|
endpoint := "https://api.qwant.com/v3/search/web?" + args.Encode()
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
httpReq.Header.Set("User-Agent", "gosearch-go/0.1 (+https://github.com/ashie/gosearch)")
|
||||||
|
|
||||||
|
resp, err := e.client.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
// Qwant often returns a 403 captcha/JS block for the JSON API.
|
||||||
|
if resp.StatusCode == http.StatusForbidden {
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
UnresponsiveEngines: [][2]string{
|
||||||
|
{e.Name(), "captcha_or_js_block"},
|
||||||
|
},
|
||||||
|
Results: []contracts.MainResult{},
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
||||||
|
return contracts.SearchResponse{}, fmt.Errorf("qwant upstream error: status=%d body=%q", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024))
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var top map[string]any
|
||||||
|
if err := json.Unmarshal(body, &top); err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
status, _ := top["status"].(string)
|
||||||
|
if status != "success" {
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
UnresponsiveEngines: [][2]string{
|
||||||
|
{e.Name(), "api_error"},
|
||||||
|
},
|
||||||
|
Results: []contracts.MainResult{},
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
data, _ := top["data"].(map[string]any)
|
||||||
|
result, _ := data["result"].(map[string]any)
|
||||||
|
items, _ := result["items"].(map[string]any)
|
||||||
|
mainline := items["mainline"]
|
||||||
|
|
||||||
|
rows := toSlice(mainline)
|
||||||
|
if len(rows) == 0 {
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: 0,
|
||||||
|
Results: []contracts.MainResult{},
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
results := make([]contracts.MainResult, 0, len(rows))
|
||||||
|
for _, row := range rows {
|
||||||
|
rowMap, ok := row.(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
rowType, _ := rowMap["type"].(string)
|
||||||
|
if rowType == "" {
|
||||||
|
rowType = "web"
|
||||||
|
}
|
||||||
|
if rowType != "web" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if rowType == "ads" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
rowItems := toSlice(rowMap["items"])
|
||||||
|
for _, it := range rowItems {
|
||||||
|
itemMap, ok := it.(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
title := toString(itemMap["title"])
|
||||||
|
resURL := toString(itemMap["url"])
|
||||||
|
desc := toString(itemMap["desc"])
|
||||||
|
if resURL == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
urlPtr := resURL
|
||||||
|
results = append(results, contracts.MainResult{
|
||||||
|
Template: "default.html",
|
||||||
|
Title: title,
|
||||||
|
Content: desc,
|
||||||
|
URL: &urlPtr,
|
||||||
|
Engine: e.Name(),
|
||||||
|
Score: 0,
|
||||||
|
Category: "general",
|
||||||
|
Engines: []string{e.Name()},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: len(results),
|
||||||
|
Results: results,
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *QwantEngine) searchWebLite(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||||
|
qLocale := qwantLocale(req.Language)
|
||||||
|
langBase := strings.SplitN(qLocale, "_", 2)[0]
|
||||||
|
|
||||||
|
args := url.Values{}
|
||||||
|
args.Set("q", req.Query)
|
||||||
|
args.Set("locale", strings.ToLower(qLocale))
|
||||||
|
args.Set("l", langBase)
|
||||||
|
args.Set("s", fmt.Sprintf("%d", req.Safesearch))
|
||||||
|
args.Set("p", fmt.Sprintf("%d", req.Pageno))
|
||||||
|
|
||||||
|
endpoint := "https://lite.qwant.com/?" + args.Encode()
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
httpReq.Header.Set("User-Agent", "gosearch-go/0.1 (+https://github.com/ashie/gosearch)")
|
||||||
|
|
||||||
|
resp, err := e.client.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
||||||
|
return contracts.SearchResponse{}, fmt.Errorf("qwant lite upstream error: status=%d body=%q", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
results := make([]contracts.MainResult, 0)
|
||||||
|
seen := map[string]bool{}
|
||||||
|
|
||||||
|
// Pattern 1: legacy/known qwant-lite structure.
|
||||||
|
doc.Find("section article").Each(func(_ int, item *goquery.Selection) {
|
||||||
|
// ignore randomly interspersed advertising adds
|
||||||
|
if item.Find("span.tooltip").Length() > 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// In SearXNG: "./span[contains(@class, 'url partner')]"
|
||||||
|
urlText := strings.TrimSpace(item.Find("span.url.partner").First().Text())
|
||||||
|
if urlText == "" {
|
||||||
|
// fallback: any span with class containing both 'url' and 'partner'
|
||||||
|
urlText = strings.TrimSpace(item.Find("span[class*='url'][class*='partner']").First().Text())
|
||||||
|
}
|
||||||
|
title := strings.TrimSpace(item.Find("h2 a").First().Text())
|
||||||
|
content := strings.TrimSpace(item.Find("p").First().Text())
|
||||||
|
|
||||||
|
if urlText == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if seen[urlText] {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
seen[urlText] = true
|
||||||
|
u := urlText
|
||||||
|
results = append(results, contracts.MainResult{
|
||||||
|
Template: "default.html",
|
||||||
|
Title: title,
|
||||||
|
Content: content,
|
||||||
|
URL: &u,
|
||||||
|
Engine: e.Name(),
|
||||||
|
Score: 0,
|
||||||
|
Category: "general",
|
||||||
|
Engines: []string{e.Name()},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// Pattern 2: broader fallback for updated lite markup:
|
||||||
|
// any article/list item/div block containing an external anchor.
|
||||||
|
// We keep this conservative by requiring non-empty title + URL.
|
||||||
|
doc.Find("article, li, div").Each(func(_ int, item *goquery.Selection) {
|
||||||
|
if len(results) >= 20 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Skip ad-like blocks in fallback pass too.
|
||||||
|
if item.Find("span.tooltip").Length() > 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip obvious nav/footer blocks.
|
||||||
|
classAttr, _ := item.Attr("class")
|
||||||
|
classLower := strings.ToLower(classAttr)
|
||||||
|
if strings.Contains(classLower, "nav") || strings.Contains(classLower, "footer") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
a := item.Find("a[href]").First()
|
||||||
|
if a.Length() == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
href, ok := a.Attr("href")
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
href = strings.TrimSpace(href)
|
||||||
|
if href == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ignore in-page and relative links.
|
||||||
|
if strings.HasPrefix(href, "/") || strings.HasPrefix(href, "#") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if !strings.HasPrefix(href, "http://") && !strings.HasPrefix(href, "https://") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Skip known sponsored partner links surfaced in lite pages.
|
||||||
|
if isKnownSponsoredURL(href) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if isQwantInternalURL(href) {
|
||||||
|
// Ignore qwant nav/house links.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
title := strings.TrimSpace(a.Text())
|
||||||
|
if title == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if isLikelyNavTitle(title) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if seen[href] {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
seen[href] = true
|
||||||
|
|
||||||
|
// Best-effort snippet extraction from nearby paragraph/span text.
|
||||||
|
content := strings.TrimSpace(item.Find("p").First().Text())
|
||||||
|
if content == "" {
|
||||||
|
content = strings.TrimSpace(item.Find("span").First().Text())
|
||||||
|
}
|
||||||
|
// If there is no snippet, still keep clearly external result links.
|
||||||
|
// Qwant-lite frequently omits rich snippets for some entries.
|
||||||
|
|
||||||
|
u := href
|
||||||
|
results = append(results, contracts.MainResult{
|
||||||
|
Template: "default.html",
|
||||||
|
Title: title,
|
||||||
|
Content: content,
|
||||||
|
URL: &u,
|
||||||
|
Engine: e.Name(),
|
||||||
|
Score: 0,
|
||||||
|
Category: "general",
|
||||||
|
Engines: []string{e.Name()},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: len(results),
|
||||||
|
Results: results,
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func qwantLocale(lang string) string {
|
||||||
|
lang = strings.TrimSpace(lang)
|
||||||
|
if lang == "" || lang == "auto" {
|
||||||
|
return "en_US"
|
||||||
|
}
|
||||||
|
lang = strings.ReplaceAll(lang, "-", "_")
|
||||||
|
parts := strings.SplitN(lang, "_", 2)
|
||||||
|
base := strings.ToLower(parts[0])
|
||||||
|
country := "US"
|
||||||
|
if len(parts) == 2 && strings.TrimSpace(parts[1]) != "" {
|
||||||
|
country = strings.ToUpper(strings.TrimSpace(parts[1]))
|
||||||
|
}
|
||||||
|
// Qwant expects locales like en_US.
|
||||||
|
return base + "_" + country
|
||||||
|
}
|
||||||
|
|
||||||
|
func toSlice(v any) []any {
|
||||||
|
switch t := v.(type) {
|
||||||
|
case []any:
|
||||||
|
return t
|
||||||
|
default:
|
||||||
|
// Handle case where mainline might be a single object.
|
||||||
|
if m, ok := v.(map[string]any); ok {
|
||||||
|
return []any{m}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func toString(v any) string {
|
||||||
|
switch t := v.(type) {
|
||||||
|
case string:
|
||||||
|
return t
|
||||||
|
case json.Number:
|
||||||
|
return t.String()
|
||||||
|
default:
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func isQwantInternalURL(raw string) bool {
|
||||||
|
u, err := url.Parse(raw)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
host := strings.ToLower(u.Hostname())
|
||||||
|
if host == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return host == "qwant.com" || host == "www.qwant.com" || strings.HasSuffix(host, ".qwant.com") || host == "about.qwant.com"
|
||||||
|
}
|
||||||
|
|
||||||
|
func isLikelyNavTitle(title string) bool {
|
||||||
|
t := strings.TrimSpace(strings.ToLower(title))
|
||||||
|
switch t {
|
||||||
|
case "qwant search", "search", "privacy", "discover the service", "better web", "discover":
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(t, "get 20gb of free storage") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func isKnownSponsoredURL(raw string) bool {
|
||||||
|
u, err := url.Parse(raw)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
host := strings.ToLower(u.Hostname())
|
||||||
|
switch host {
|
||||||
|
case "shdw.me", "www.shdw.me":
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if strings.Contains(strings.ToLower(raw), "qwant-tool") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
89
internal/engines/qwant_lite_test.go
Normal file
89
internal/engines/qwant_lite_test.go
Normal file
|
|
@ -0,0 +1,89 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestQwantEngine_WebLite(t *testing.T) {
|
||||||
|
transport := roundTripperFunc(func(r *http.Request) (*http.Response, error) {
|
||||||
|
if r.Method != http.MethodGet {
|
||||||
|
return httpResponse(http.StatusMethodNotAllowed, "", ""), nil
|
||||||
|
}
|
||||||
|
if r.URL.Host != "lite.qwant.com" {
|
||||||
|
return httpResponse(http.StatusNotFound, "", ""), nil
|
||||||
|
}
|
||||||
|
if r.URL.Path != "/" {
|
||||||
|
// goquery request URL parsing should normalize to "/"
|
||||||
|
t.Fatalf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
q := r.URL.Query().Get("q")
|
||||||
|
if q != "hugo" {
|
||||||
|
t.Fatalf("unexpected q: %q", q)
|
||||||
|
}
|
||||||
|
if r.URL.Query().Get("locale") != "en_us" {
|
||||||
|
t.Fatalf("unexpected locale: %q", r.URL.Query().Get("locale"))
|
||||||
|
}
|
||||||
|
if r.URL.Query().Get("l") != "en" {
|
||||||
|
t.Fatalf("unexpected l: %q", r.URL.Query().Get("l"))
|
||||||
|
}
|
||||||
|
if r.URL.Query().Get("s") != "0" {
|
||||||
|
t.Fatalf("unexpected s: %q", r.URL.Query().Get("s"))
|
||||||
|
}
|
||||||
|
if r.URL.Query().Get("p") != "1" {
|
||||||
|
t.Fatalf("unexpected p: %q", r.URL.Query().Get("p"))
|
||||||
|
}
|
||||||
|
|
||||||
|
body := `
|
||||||
|
<!doctype html>
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<section>
|
||||||
|
<article>
|
||||||
|
<span class="url partner">https://example.com/q</span>
|
||||||
|
<h2><a href="https://example.com/q">Qwant Title</a></h2>
|
||||||
|
<p>Qwant description</p>
|
||||||
|
</article>
|
||||||
|
<article>
|
||||||
|
<span class="tooltip">ad</span>
|
||||||
|
<span class="url partner">https://example.com/ad</span>
|
||||||
|
<h2><a href="https://example.com/ad">Ad Title</a></h2>
|
||||||
|
<p>Ad description</p>
|
||||||
|
</article>
|
||||||
|
</section>
|
||||||
|
</body>
|
||||||
|
</html>`
|
||||||
|
|
||||||
|
return httpResponse(http.StatusOK, body, "text/html"), nil
|
||||||
|
})
|
||||||
|
|
||||||
|
client := &http.Client{Transport: transport}
|
||||||
|
engine := &QwantEngine{client: client, category: "web-lite", resultsPerPage: 10}
|
||||||
|
|
||||||
|
resp, err := engine.Search(context.Background(), contracts.SearchRequest{
|
||||||
|
Query: "hugo",
|
||||||
|
Pageno: 1,
|
||||||
|
Safesearch: 0,
|
||||||
|
Language: "en",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(resp.Results) != 1 {
|
||||||
|
t.Fatalf("expected 1 result (non-ad), got %d", len(resp.Results))
|
||||||
|
}
|
||||||
|
if resp.Results[0].Title != "Qwant Title" {
|
||||||
|
t.Fatalf("unexpected title: %q", resp.Results[0].Title)
|
||||||
|
}
|
||||||
|
if resp.Results[0].Content != "Qwant description" {
|
||||||
|
t.Fatalf("unexpected content: %q", resp.Results[0].Content)
|
||||||
|
}
|
||||||
|
if resp.Results[0].URL == nil || *resp.Results[0].URL != "https://example.com/q" {
|
||||||
|
t.Fatalf("unexpected url: %v", resp.Results[0].URL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
94
internal/engines/qwant_test.go
Normal file
94
internal/engines/qwant_test.go
Normal file
|
|
@ -0,0 +1,94 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestQwantEngine_Web(t *testing.T) {
|
||||||
|
transport := roundTripperFunc(func(r *http.Request) (*http.Response, error) {
|
||||||
|
if r.Method != http.MethodGet {
|
||||||
|
return httpResponse(http.StatusMethodNotAllowed, "", ""), nil
|
||||||
|
}
|
||||||
|
if r.URL.Host != "api.qwant.com" {
|
||||||
|
return httpResponse(http.StatusNotFound, "", ""), nil
|
||||||
|
}
|
||||||
|
if r.URL.Path != "/v3/search/web" {
|
||||||
|
t.Fatalf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
q := r.URL.Query().Get("q")
|
||||||
|
if q != "hugo" {
|
||||||
|
t.Fatalf("unexpected q: %q", q)
|
||||||
|
}
|
||||||
|
if r.URL.Query().Get("count") != "10" {
|
||||||
|
t.Fatalf("unexpected count: %q", r.URL.Query().Get("count"))
|
||||||
|
}
|
||||||
|
if r.URL.Query().Get("locale") != "en_US" {
|
||||||
|
t.Fatalf("unexpected locale: %q", r.URL.Query().Get("locale"))
|
||||||
|
}
|
||||||
|
if r.URL.Query().Get("safesearch") != "0" {
|
||||||
|
t.Fatalf("unexpected safesearch: %q", r.URL.Query().Get("safesearch"))
|
||||||
|
}
|
||||||
|
if r.URL.Query().Get("llm") != "false" {
|
||||||
|
t.Fatalf("unexpected llm: %q", r.URL.Query().Get("llm"))
|
||||||
|
}
|
||||||
|
if r.URL.Query().Get("tgp") != "3" {
|
||||||
|
t.Fatalf("unexpected tgp: %q", r.URL.Query().Get("tgp"))
|
||||||
|
}
|
||||||
|
if r.URL.Query().Get("offset") != "0" {
|
||||||
|
t.Fatalf("unexpected offset: %q", r.URL.Query().Get("offset"))
|
||||||
|
}
|
||||||
|
|
||||||
|
body := `{
|
||||||
|
"status": "success",
|
||||||
|
"data": {
|
||||||
|
"result": {
|
||||||
|
"items": {
|
||||||
|
"mainline": [
|
||||||
|
{
|
||||||
|
"type": "web",
|
||||||
|
"items": [
|
||||||
|
{ "title": "Qwant Title", "url": "https://example.com/q", "desc": "Qwant description" }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}`
|
||||||
|
return httpResponse(http.StatusOK, body, "application/json"), nil
|
||||||
|
})
|
||||||
|
|
||||||
|
client := &http.Client{Transport: transport}
|
||||||
|
engine := &QwantEngine{client: client, category: "web", resultsPerPage: 10}
|
||||||
|
|
||||||
|
resp, err := engine.Search(context.Background(), contracts.SearchRequest{
|
||||||
|
Query: "hugo",
|
||||||
|
Pageno: 1,
|
||||||
|
Safesearch: 0,
|
||||||
|
Language: "en",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(resp.Results) != 1 {
|
||||||
|
t.Fatalf("expected 1 result, got %d", len(resp.Results))
|
||||||
|
}
|
||||||
|
if resp.Results[0].Title != "Qwant Title" {
|
||||||
|
t.Fatalf("unexpected title: %q", resp.Results[0].Title)
|
||||||
|
}
|
||||||
|
if resp.Results[0].Content != "Qwant description" {
|
||||||
|
t.Fatalf("unexpected content: %q", resp.Results[0].Content)
|
||||||
|
}
|
||||||
|
if resp.Results[0].URL == nil || *resp.Results[0].URL != "https://example.com/q" {
|
||||||
|
t.Fatalf("unexpected url: %v", resp.Results[0].URL)
|
||||||
|
}
|
||||||
|
if resp.Results[0].Engine != "qwant" {
|
||||||
|
t.Fatalf("unexpected engine: %q", resp.Results[0].Engine)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
151
internal/engines/wikipedia.go
Normal file
151
internal/engines/wikipedia.go
Normal file
|
|
@ -0,0 +1,151 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
type WikipediaEngine struct {
|
||||||
|
client *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *WikipediaEngine) Name() string { return "wikipedia" }
|
||||||
|
|
||||||
|
func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||||
|
if e == nil || e.client == nil {
|
||||||
|
return contracts.SearchResponse{}, errors.New("wikipedia engine not initialized")
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(req.Query) == "" {
|
||||||
|
return contracts.SearchResponse{Query: req.Query}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
lang := strings.TrimSpace(req.Language)
|
||||||
|
if lang == "" || lang == "auto" {
|
||||||
|
lang = "en"
|
||||||
|
}
|
||||||
|
// Wikipedia subdomains are based on the language code; keep it simple for MVP.
|
||||||
|
lang = strings.SplitN(lang, "-", 2)[0]
|
||||||
|
lang = strings.ReplaceAll(lang, "_", "-")
|
||||||
|
wikiNetloc := fmt.Sprintf("%s.wikipedia.org", lang)
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf(
|
||||||
|
"https://%s/api/rest_v1/page/summary/%s",
|
||||||
|
wikiNetloc,
|
||||||
|
url.PathEscape(req.Query),
|
||||||
|
)
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
// Wikimedia APIs require a descriptive User-Agent.
|
||||||
|
httpReq.Header.Set(
|
||||||
|
"User-Agent",
|
||||||
|
"gosearch-go/0.1 (compatible; +https://github.com/ashie/gosearch)",
|
||||||
|
)
|
||||||
|
// Best-effort: hint content language.
|
||||||
|
if req.Language != "" && req.Language != "auto" {
|
||||||
|
httpReq.Header.Set("Accept-Language", req.Language)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := e.client.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode == http.StatusNotFound {
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: 0,
|
||||||
|
Results: []contracts.MainResult{},
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
||||||
|
return contracts.SearchResponse{}, fmt.Errorf("wikipedia upstream error: status=%d body=%q", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var api struct {
|
||||||
|
Title string `json:"title"`
|
||||||
|
Description string `json:"description"`
|
||||||
|
Titles struct {
|
||||||
|
Display string `json:"display"`
|
||||||
|
} `json:"titles"`
|
||||||
|
ContentURLs struct {
|
||||||
|
Desktop struct {
|
||||||
|
Page string `json:"page"`
|
||||||
|
} `json:"desktop"`
|
||||||
|
} `json:"content_urls"`
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&api); err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
pageURL := api.ContentURLs.Desktop.Page
|
||||||
|
if pageURL == "" {
|
||||||
|
// API returned a non-standard payload; treat as no result.
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: 0,
|
||||||
|
Results: []contracts.MainResult{},
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
title := api.Titles.Display
|
||||||
|
if title == "" {
|
||||||
|
title = api.Title
|
||||||
|
}
|
||||||
|
|
||||||
|
content := api.Description
|
||||||
|
|
||||||
|
urlPtr := pageURL
|
||||||
|
pub := (*string)(nil)
|
||||||
|
|
||||||
|
results := []contracts.MainResult{
|
||||||
|
{
|
||||||
|
Template: "default.html",
|
||||||
|
Title: title,
|
||||||
|
Content: content,
|
||||||
|
URL: &urlPtr,
|
||||||
|
Pubdate: pub,
|
||||||
|
Engine: "wikipedia",
|
||||||
|
Score: 0,
|
||||||
|
Category: "general",
|
||||||
|
Priority: "",
|
||||||
|
Positions: nil,
|
||||||
|
Engines: []string{"wikipedia"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: len(results),
|
||||||
|
Results: results,
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
61
internal/engines/wikipedia_test.go
Normal file
61
internal/engines/wikipedia_test.go
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestWikipediaEngine_Search(t *testing.T) {
|
||||||
|
transport := roundTripperFunc(func(r *http.Request) (*http.Response, error) {
|
||||||
|
if r.Method != http.MethodGet {
|
||||||
|
return httpResponse(http.StatusMethodNotAllowed, "", ""), nil
|
||||||
|
}
|
||||||
|
if r.URL.Host != "en.wikipedia.org" {
|
||||||
|
return httpResponse(http.StatusNotFound, "", ""), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.URL.Path != "/api/rest_v1/page/summary/Taxi" {
|
||||||
|
return httpResponse(http.StatusNotFound, "", ""), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
body := `{
|
||||||
|
"title": "Taxi",
|
||||||
|
"description": "A car",
|
||||||
|
"titles": { "display": "Taxi" },
|
||||||
|
"content_urls": { "desktop": { "page": "https://en.wikipedia.org/wiki/Taxi" } }
|
||||||
|
}`
|
||||||
|
return httpResponse(http.StatusOK, body, "application/json"), nil
|
||||||
|
})
|
||||||
|
|
||||||
|
client := &http.Client{Transport: transport}
|
||||||
|
engine := &WikipediaEngine{client: client}
|
||||||
|
|
||||||
|
resp, err := engine.Search(context.Background(), contracts.SearchRequest{
|
||||||
|
Query: "Taxi",
|
||||||
|
Pageno: 1,
|
||||||
|
Language: "en",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(resp.Results) != 1 {
|
||||||
|
t.Fatalf("expected 1 result, got %d", len(resp.Results))
|
||||||
|
}
|
||||||
|
r := resp.Results[0]
|
||||||
|
if r.Title != "Taxi" {
|
||||||
|
t.Fatalf("expected title Taxi, got %q", r.Title)
|
||||||
|
}
|
||||||
|
if r.Content != "A car" {
|
||||||
|
t.Fatalf("expected content, got %q", r.Content)
|
||||||
|
}
|
||||||
|
if r.URL == nil || *r.URL == "" {
|
||||||
|
t.Fatalf("expected url, got nil/empty")
|
||||||
|
}
|
||||||
|
if *r.URL != "https://en.wikipedia.org/wiki/Taxi" {
|
||||||
|
t.Fatalf("unexpected url: %q", *r.URL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
41
internal/httpapi/handlers.go
Normal file
41
internal/httpapi/handlers.go
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
package httpapi
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/search"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Handler struct {
|
||||||
|
searchSvc *search.Service
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewHandler(searchSvc *search.Service) *Handler {
|
||||||
|
return &Handler{searchSvc: searchSvc}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *Handler) Healthz(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte("OK"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *Handler) Search(w http.ResponseWriter, r *http.Request) {
|
||||||
|
req, err := search.ParseSearchRequest(r)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := h.searchSvc.Search(r.Context(), req)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := search.WriteSearchResponse(w, req.Format, resp); err != nil {
|
||||||
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
121
internal/search/merge.go
Normal file
121
internal/search/merge.go
Normal file
|
|
@ -0,0 +1,121 @@
|
||||||
|
package search
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
// MergeResponses merges multiple SearXNG-compatible JSON responses.
|
||||||
|
//
|
||||||
|
// MVP merge semantics:
|
||||||
|
// - results are concatenated with a simple de-dup key (engine|title|url)
|
||||||
|
// - suggestions/corrections are de-duplicated as sets
|
||||||
|
// - answers/infoboxes/unresponsive_engines are concatenated (best-effort)
|
||||||
|
func MergeResponses(responses []contracts.SearchResponse) contracts.SearchResponse {
|
||||||
|
var merged contracts.SearchResponse
|
||||||
|
|
||||||
|
mergedResultSeen := map[string]struct{}{}
|
||||||
|
mergedAnswerSeen := map[string]struct{}{}
|
||||||
|
mergedCorrectionsSeen := map[string]struct{}{}
|
||||||
|
mergedSuggestionsSeen := map[string]struct{}{}
|
||||||
|
|
||||||
|
for _, r := range responses {
|
||||||
|
if merged.Query == "" {
|
||||||
|
merged.Query = r.Query
|
||||||
|
}
|
||||||
|
|
||||||
|
merged.NumberOfResults = maxInt(merged.NumberOfResults, r.NumberOfResults)
|
||||||
|
|
||||||
|
for _, mr := range r.Results {
|
||||||
|
key := resultDedupKey(mr)
|
||||||
|
if _, ok := mergedResultSeen[key]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
mergedResultSeen[key] = struct{}{}
|
||||||
|
merged.Results = append(merged.Results, mr)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, ans := range r.Answers {
|
||||||
|
// De-dup by normalized JSON when possible.
|
||||||
|
b, err := json.Marshal(ans)
|
||||||
|
if err != nil {
|
||||||
|
merged.Answers = append(merged.Answers, ans)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
key := string(b)
|
||||||
|
if _, ok := mergedAnswerSeen[key]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
mergedAnswerSeen[key] = struct{}{}
|
||||||
|
merged.Answers = append(merged.Answers, ans)
|
||||||
|
}
|
||||||
|
|
||||||
|
merged.Corrections = unionStrings(merged.Corrections, r.Corrections, &mergedCorrectionsSeen)
|
||||||
|
merged.Suggestions = unionStrings(merged.Suggestions, r.Suggestions, &mergedSuggestionsSeen)
|
||||||
|
|
||||||
|
merged.Infoboxes = append(merged.Infoboxes, r.Infoboxes...)
|
||||||
|
merged.UnresponsiveEngines = append(merged.UnresponsiveEngines, r.UnresponsiveEngines...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure non-nil slices to keep JSON shape stable.
|
||||||
|
if merged.Results == nil {
|
||||||
|
merged.Results = []contracts.MainResult{}
|
||||||
|
}
|
||||||
|
if merged.Answers == nil {
|
||||||
|
merged.Answers = []map[string]any{}
|
||||||
|
}
|
||||||
|
if merged.Corrections == nil {
|
||||||
|
merged.Corrections = []string{}
|
||||||
|
}
|
||||||
|
if merged.Infoboxes == nil {
|
||||||
|
merged.Infoboxes = []map[string]any{}
|
||||||
|
}
|
||||||
|
if merged.Suggestions == nil {
|
||||||
|
merged.Suggestions = []string{}
|
||||||
|
}
|
||||||
|
if merged.UnresponsiveEngines == nil {
|
||||||
|
merged.UnresponsiveEngines = [][2]string{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return merged
|
||||||
|
}
|
||||||
|
|
||||||
|
func resultDedupKey(r contracts.MainResult) string {
|
||||||
|
urlStr := ""
|
||||||
|
if r.URL != nil {
|
||||||
|
urlStr = *r.URL
|
||||||
|
}
|
||||||
|
// Normalize host to reduce duplicates.
|
||||||
|
if u, err := url.Parse(urlStr); err == nil {
|
||||||
|
if u.Host != "" {
|
||||||
|
urlStr = u.Host + u.Path
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.ToLower(r.Engine) + "|" + strings.ToLower(r.Title) + "|" + urlStr
|
||||||
|
}
|
||||||
|
|
||||||
|
func unionStrings(dst []string, src []string, seen *map[string]struct{}) []string {
|
||||||
|
if *seen == nil {
|
||||||
|
*seen = map[string]struct{}{}
|
||||||
|
}
|
||||||
|
out := dst
|
||||||
|
for _, s := range src {
|
||||||
|
if _, ok := (*seen)[s]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
(*seen)[s] = struct{}{}
|
||||||
|
out = append(out, s)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func maxInt(a, b int) int {
|
||||||
|
if a > b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
80
internal/search/merge_test.go
Normal file
80
internal/search/merge_test.go
Normal file
|
|
@ -0,0 +1,80 @@
|
||||||
|
package search
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMergeResponses_DedupResultsAndSets(t *testing.T) {
|
||||||
|
url1 := "https://example.com/a?x=1"
|
||||||
|
uPtr := &url1
|
||||||
|
|
||||||
|
r1 := contracts.SearchResponse{
|
||||||
|
Query: "q",
|
||||||
|
NumberOfResults: 1,
|
||||||
|
Results: []contracts.MainResult{
|
||||||
|
{
|
||||||
|
Template: "default.html",
|
||||||
|
Title: "Title1",
|
||||||
|
Content: "C1",
|
||||||
|
URL: uPtr,
|
||||||
|
Engine: "wikipedia",
|
||||||
|
Score: 1.0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Answers: []map[string]any{{"title": "A1", "url": url1}},
|
||||||
|
Corrections: []string{"corr1", "corr2"},
|
||||||
|
Suggestions: []string{"s1", "s2"},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}
|
||||||
|
|
||||||
|
r2 := contracts.SearchResponse{
|
||||||
|
Query: "q",
|
||||||
|
NumberOfResults: 1,
|
||||||
|
Results: []contracts.MainResult{
|
||||||
|
{
|
||||||
|
Template: "default.html",
|
||||||
|
Title: "Title1",
|
||||||
|
Content: "C2",
|
||||||
|
URL: uPtr,
|
||||||
|
Engine: "wikipedia",
|
||||||
|
Score: 2.0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Answers: []map[string]any{{"title": "A1", "url": url1}},
|
||||||
|
Corrections: []string{"corr2", "corr3"},
|
||||||
|
Suggestions: []string{"s2", "s3"},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}
|
||||||
|
|
||||||
|
merged := MergeResponses([]contracts.SearchResponse{r1, r2})
|
||||||
|
|
||||||
|
if merged.Query != "q" {
|
||||||
|
t.Fatalf("expected query q, got %q", merged.Query)
|
||||||
|
}
|
||||||
|
if merged.NumberOfResults != 1 {
|
||||||
|
t.Fatalf("expected number_of_results max=1, got %d", merged.NumberOfResults)
|
||||||
|
}
|
||||||
|
if len(merged.Results) != 1 {
|
||||||
|
t.Fatalf("expected 1 merged result, got %d", len(merged.Results))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Corrections/suggestions should be unioned.
|
||||||
|
joinedCorr := strings.Join(merged.Corrections, ",")
|
||||||
|
if !strings.Contains(joinedCorr, "corr1") || !strings.Contains(joinedCorr, "corr2") || !strings.Contains(joinedCorr, "corr3") {
|
||||||
|
t.Fatalf("expected unioned corrections, got %v", merged.Corrections)
|
||||||
|
}
|
||||||
|
joinedSug := strings.Join(merged.Suggestions, ",")
|
||||||
|
if !strings.Contains(joinedSug, "s1") || !strings.Contains(joinedSug, "s2") || !strings.Contains(joinedSug, "s3") {
|
||||||
|
t.Fatalf("expected unioned suggestions, got %v", merged.Suggestions)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(merged.Answers) != 1 {
|
||||||
|
t.Fatalf("expected 1 merged answer, got %d", len(merged.Answers))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
206
internal/search/request_params.go
Normal file
206
internal/search/request_params.go
Normal file
|
|
@ -0,0 +1,206 @@
|
||||||
|
package search
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"net/http"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
var languageCodeRe = regexp.MustCompile(`^[a-z]{2,3}(-[a-zA-Z]{2})?$`)
|
||||||
|
|
||||||
|
func ParseSearchRequest(r *http.Request) (SearchRequest, error) {
|
||||||
|
// SearXNG supports both GET and POST and relies on form values for routing.
|
||||||
|
if err := r.ParseForm(); err != nil {
|
||||||
|
return SearchRequest{}, errors.New("invalid request: cannot parse form")
|
||||||
|
}
|
||||||
|
|
||||||
|
format := strings.ToLower(r.FormValue("format"))
|
||||||
|
switch OutputFormat(format) {
|
||||||
|
case FormatJSON, FormatCSV, FormatRSS:
|
||||||
|
default:
|
||||||
|
// MVP: treat everything else as json, except `html` which we accept for compatibility.
|
||||||
|
if format == string(FormatHTML) {
|
||||||
|
// accepted, but not implemented by the server yet
|
||||||
|
} else {
|
||||||
|
format = string(FormatJSON)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
q := r.FormValue("q")
|
||||||
|
if strings.TrimSpace(q) == "" {
|
||||||
|
return SearchRequest{}, errors.New("missing required parameter: q")
|
||||||
|
}
|
||||||
|
|
||||||
|
pageno := 1
|
||||||
|
if s := strings.TrimSpace(r.FormValue("pageno")); s != "" {
|
||||||
|
n, err := strconv.Atoi(s)
|
||||||
|
if err != nil || n < 1 {
|
||||||
|
return SearchRequest{}, errors.New("invalid parameter: pageno")
|
||||||
|
}
|
||||||
|
pageno = n
|
||||||
|
}
|
||||||
|
|
||||||
|
// MVP defaults.
|
||||||
|
safesearch := 0
|
||||||
|
if s := strings.TrimSpace(r.FormValue("safesearch")); s != "" {
|
||||||
|
n, err := strconv.Atoi(s)
|
||||||
|
if err != nil || n < 0 || n > 2 {
|
||||||
|
return SearchRequest{}, errors.New("invalid parameter: safesearch")
|
||||||
|
}
|
||||||
|
safesearch = n
|
||||||
|
}
|
||||||
|
|
||||||
|
var timeRange *string
|
||||||
|
if tr := strings.TrimSpace(r.FormValue("time_range")); tr != "" && tr != "None" {
|
||||||
|
switch tr {
|
||||||
|
case "day", "week", "month", "year":
|
||||||
|
tt := tr
|
||||||
|
timeRange = &tt
|
||||||
|
default:
|
||||||
|
return SearchRequest{}, errors.New("invalid parameter: time_range")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var timeoutLimit *float64
|
||||||
|
if s := strings.TrimSpace(r.FormValue("timeout_limit")); s != "" && s != "None" {
|
||||||
|
v, err := strconv.ParseFloat(s, 64)
|
||||||
|
if err != nil || v <= 0 {
|
||||||
|
return SearchRequest{}, errors.New("invalid parameter: timeout_limit")
|
||||||
|
}
|
||||||
|
timeoutLimit = &v
|
||||||
|
}
|
||||||
|
|
||||||
|
language := strings.TrimSpace(r.FormValue("language"))
|
||||||
|
if language == "" {
|
||||||
|
language = "auto"
|
||||||
|
}
|
||||||
|
switch language {
|
||||||
|
case "auto", "all":
|
||||||
|
// ok
|
||||||
|
default:
|
||||||
|
if !languageCodeRe.MatchString(language) {
|
||||||
|
return SearchRequest{}, errors.New("invalid parameter: language")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// engines is an explicit list of engine names.
|
||||||
|
engines := splitCSV(strings.TrimSpace(r.FormValue("engines")))
|
||||||
|
|
||||||
|
// categories and category_<name> params mirror SearXNG's webadapter parsing.
|
||||||
|
// We don't validate against a registry here; we just preserve the requested values.
|
||||||
|
catSet := map[string]bool{}
|
||||||
|
if catsParam := strings.TrimSpace(r.FormValue("categories")); catsParam != "" {
|
||||||
|
for _, cat := range splitCSV(catsParam) {
|
||||||
|
catSet[cat] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for k, v := range r.Form {
|
||||||
|
if !strings.HasPrefix(k, "category_") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
category := strings.TrimPrefix(k, "category_")
|
||||||
|
if category == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
val := ""
|
||||||
|
if len(v) > 0 {
|
||||||
|
val = strings.TrimSpace(v[0])
|
||||||
|
}
|
||||||
|
if val == "" || val != "off" {
|
||||||
|
catSet[category] = true
|
||||||
|
} else {
|
||||||
|
delete(catSet, category)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
categories := make([]string, 0, len(catSet))
|
||||||
|
for c := range catSet {
|
||||||
|
categories = append(categories, c)
|
||||||
|
}
|
||||||
|
if len(categories) == 0 {
|
||||||
|
categories = []string{"general"}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse engine_data-<engine>-<key>=<value> parameters.
|
||||||
|
engineData := map[string]map[string]string{}
|
||||||
|
for k, v := range r.Form {
|
||||||
|
if !strings.HasPrefix(k, "engine_data-") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
parts := strings.SplitN(k, "-", 3) // engine_data-<engine>-<key>
|
||||||
|
if len(parts) != 3 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
engine := parts[1]
|
||||||
|
key := parts[2]
|
||||||
|
// For HTML forms, r.Form[k] can contain multiple values; keep first.
|
||||||
|
val := ""
|
||||||
|
if len(v) > 0 {
|
||||||
|
val = v[0]
|
||||||
|
}
|
||||||
|
if _, ok := engineData[engine]; !ok {
|
||||||
|
engineData[engine] = map[string]string{}
|
||||||
|
}
|
||||||
|
engineData[engine][key] = val
|
||||||
|
}
|
||||||
|
|
||||||
|
accessToken := parseAccessToken(r)
|
||||||
|
|
||||||
|
return SearchRequest{
|
||||||
|
Format: OutputFormat(format),
|
||||||
|
Query: q,
|
||||||
|
Pageno: pageno,
|
||||||
|
Safesearch: safesearch,
|
||||||
|
TimeRange: timeRange,
|
||||||
|
TimeoutLimit: timeoutLimit,
|
||||||
|
Language: language,
|
||||||
|
Engines: engines,
|
||||||
|
Categories: categories,
|
||||||
|
EngineData: engineData,
|
||||||
|
AccessToken: accessToken,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitCSV(s string) []string {
|
||||||
|
if s == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
raw := strings.Split(s, ",")
|
||||||
|
out := make([]string, 0, len(raw))
|
||||||
|
for _, item := range raw {
|
||||||
|
item = strings.TrimSpace(item)
|
||||||
|
if item == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out = append(out, item)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseAccessToken(r *http.Request) string {
|
||||||
|
// Supported sources (first non-empty wins):
|
||||||
|
// - `Authorization: Bearer <token>`
|
||||||
|
// - `X-Search-Token` / `X-Brave-Access-Token`
|
||||||
|
// - `token` form value
|
||||||
|
if auth := r.Header.Get("Authorization"); auth != "" {
|
||||||
|
const prefix = "Bearer "
|
||||||
|
if len(auth) > len(prefix) && auth[:len(prefix)] == prefix {
|
||||||
|
return strings.TrimSpace(auth[len(prefix):])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if v := strings.TrimSpace(r.Header.Get("X-Search-Token")); v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
if v := strings.TrimSpace(r.Header.Get("X-Brave-Access-Token")); v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
if v := strings.TrimSpace(r.FormValue("token")); v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
74
internal/search/request_params_test.go
Normal file
74
internal/search/request_params_test.go
Normal file
|
|
@ -0,0 +1,74 @@
|
||||||
|
package search
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseSearchRequest_MissingQ(t *testing.T) {
|
||||||
|
r := httptest.NewRequest(http.MethodGet, "/search?format=json", nil)
|
||||||
|
_, err := ParseSearchRequest(r)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected error, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseSearchRequest_InvalidPageno(t *testing.T) {
|
||||||
|
r := httptest.NewRequest(http.MethodGet, "/search?q=hi&pageno=0", nil)
|
||||||
|
_, err := ParseSearchRequest(r)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected error for pageno, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseSearchRequest_InvalidLanguage(t *testing.T) {
|
||||||
|
r := httptest.NewRequest(http.MethodGet, "/search?q=hi&language=bad!", nil)
|
||||||
|
_, err := ParseSearchRequest(r)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected error for language, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseSearchRequest_CategoriesAndEngineData(t *testing.T) {
|
||||||
|
values := url.Values{}
|
||||||
|
values.Set("q", "hello")
|
||||||
|
values.Set("format", "json")
|
||||||
|
values.Set("categories", "general,science")
|
||||||
|
values.Set("category_science", "off")
|
||||||
|
values.Set("engines", "wikipedia,arxiv")
|
||||||
|
values.Set("engine_data-wikipedia-timeout", "123")
|
||||||
|
|
||||||
|
r := httptest.NewRequest(http.MethodPost, "/search", strings.NewReader(values.Encode()))
|
||||||
|
r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||||
|
|
||||||
|
req, err := ParseSearchRequest(r)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// categories should drop `science` due to category_science=off
|
||||||
|
wantCats := map[string]bool{"general": true}
|
||||||
|
gotCats := map[string]bool{}
|
||||||
|
for _, c := range req.Categories {
|
||||||
|
gotCats[c] = true
|
||||||
|
}
|
||||||
|
for c := range wantCats {
|
||||||
|
if !gotCats[c] {
|
||||||
|
t.Fatalf("expected category %q in result, got %v", c, req.Categories)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if gotCats["science"] {
|
||||||
|
t.Fatalf("expected category science to be removed, got %v", req.Categories)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(req.Engines) != 2 {
|
||||||
|
t.Fatalf("expected 2 engines, got %v", req.Engines)
|
||||||
|
}
|
||||||
|
if req.EngineData["wikipedia"]["timeout"] != "123" {
|
||||||
|
t.Fatalf("expected engine_data parsed, got %#v", req.EngineData)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
223
internal/search/response.go
Normal file
223
internal/search/response.go
Normal file
|
|
@ -0,0 +1,223 @@
|
||||||
|
package search
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/csv"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"encoding/xml"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
func WriteSearchResponse(w http.ResponseWriter, format OutputFormat, resp SearchResponse) error {
|
||||||
|
switch format {
|
||||||
|
case FormatJSON:
|
||||||
|
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||||
|
return json.NewEncoder(w).Encode(resp)
|
||||||
|
case FormatCSV:
|
||||||
|
w.Header().Set("Content-Type", "text/csv; charset=utf-8")
|
||||||
|
if err := writeCSV(w, resp); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
case FormatRSS:
|
||||||
|
w.Header().Set("Content-Type", "text/xml; charset=utf-8")
|
||||||
|
if err := writeRSS(w, resp); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
case FormatHTML:
|
||||||
|
w.WriteHeader(http.StatusNotImplemented)
|
||||||
|
_, _ = w.Write([]byte("format=html not implemented yet"))
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("unsupported format: %s", format)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// csvRowHeader matches the SearXNG CSV writer key order.
|
||||||
|
var csvRowHeader = []string{"title", "url", "content", "host", "engine", "score", "type"}
|
||||||
|
|
||||||
|
func writeCSV(w http.ResponseWriter, resp SearchResponse) error {
|
||||||
|
cw := csv.NewWriter(w)
|
||||||
|
defer cw.Flush()
|
||||||
|
|
||||||
|
if err := cw.Write(csvRowHeader); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, r := range resp.Results {
|
||||||
|
urlStr := ""
|
||||||
|
if r.URL != nil {
|
||||||
|
urlStr = *r.URL
|
||||||
|
}
|
||||||
|
host := hostFromURL(urlStr)
|
||||||
|
scoreStr := strconv.FormatFloat(r.Score, 'f', -1, 64)
|
||||||
|
row := []string{
|
||||||
|
r.Title,
|
||||||
|
urlStr,
|
||||||
|
r.Content,
|
||||||
|
host,
|
||||||
|
r.Engine,
|
||||||
|
scoreStr,
|
||||||
|
"result",
|
||||||
|
}
|
||||||
|
if err := cw.Write(row); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, ans := range resp.Answers {
|
||||||
|
title := asString(ans["title"])
|
||||||
|
urlStr := asString(ans["url"])
|
||||||
|
content := asString(ans["content"])
|
||||||
|
engine := asString(ans["engine"])
|
||||||
|
scoreStr := scoreString(ans["score"])
|
||||||
|
host := hostFromURL(urlStr)
|
||||||
|
|
||||||
|
row := []string{
|
||||||
|
title,
|
||||||
|
urlStr,
|
||||||
|
content,
|
||||||
|
host,
|
||||||
|
engine,
|
||||||
|
scoreStr,
|
||||||
|
"answer",
|
||||||
|
}
|
||||||
|
if err := cw.Write(row); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, s := range resp.Suggestions {
|
||||||
|
row := []string{s, "", "", "", "", "", "suggestion"}
|
||||||
|
if err := cw.Write(row); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range resp.Corrections {
|
||||||
|
row := []string{c, "", "", "", "", "", "correction"}
|
||||||
|
if err := cw.Write(row); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeRSS(w http.ResponseWriter, resp SearchResponse) error {
|
||||||
|
q := resp.Query
|
||||||
|
escapedTitle := xmlEscape("SearXNG search: " + q)
|
||||||
|
escapedDesc := xmlEscape("Search results for \"" + q + "\" - SearXNG")
|
||||||
|
escapedQueryTerms := xmlEscape(q)
|
||||||
|
|
||||||
|
link := "/search?q=" + url.QueryEscape(q)
|
||||||
|
opensearchQuery := fmt.Sprintf(`<opensearch:Query role="request" searchTerms="%s" startPage="1" />`, escapedQueryTerms)
|
||||||
|
|
||||||
|
// SearXNG template uses the number of results for both totalResults and itemsPerPage.
|
||||||
|
nr := resp.NumberOfResults
|
||||||
|
|
||||||
|
var items bytes.Buffer
|
||||||
|
for _, r := range resp.Results {
|
||||||
|
title := xmlEscape(r.Title)
|
||||||
|
urlStr := ""
|
||||||
|
if r.URL != nil {
|
||||||
|
urlStr = *r.URL
|
||||||
|
}
|
||||||
|
linkEsc := xmlEscape(urlStr)
|
||||||
|
desc := xmlEscape(r.Content)
|
||||||
|
|
||||||
|
pub := ""
|
||||||
|
if r.Pubdate != nil && strings.TrimSpace(*r.Pubdate) != "" {
|
||||||
|
pub = "<pubDate>" + xmlEscape(*r.Pubdate) + "</pubDate>"
|
||||||
|
}
|
||||||
|
|
||||||
|
items.WriteString(
|
||||||
|
fmt.Sprintf(
|
||||||
|
`<item><title>%s</title><type>result</type><link>%s</link><description>%s</description>%s</item>`,
|
||||||
|
title,
|
||||||
|
linkEsc,
|
||||||
|
desc,
|
||||||
|
pub,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
xml := fmt.Sprintf(
|
||||||
|
`<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<?xml-stylesheet href="/rss.xsl" type="text/xsl"?>
|
||||||
|
<rss version="2.0"
|
||||||
|
xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/"
|
||||||
|
xmlns:atom="http://www.w3.org/2005/Atom">
|
||||||
|
<channel>
|
||||||
|
<title>%s</title>
|
||||||
|
<link>%s</link>
|
||||||
|
<description>%s</description>
|
||||||
|
<opensearch:totalResults>%d</opensearch:totalResults>
|
||||||
|
<opensearch:startIndex>1</opensearch:startIndex>
|
||||||
|
<opensearch:itemsPerPage>%d</opensearch:itemsPerPage>
|
||||||
|
<atom:link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml"/>
|
||||||
|
%s
|
||||||
|
%s
|
||||||
|
</channel>
|
||||||
|
</rss>
|
||||||
|
`,
|
||||||
|
escapedTitle,
|
||||||
|
xmlEscape(link),
|
||||||
|
escapedDesc,
|
||||||
|
nr,
|
||||||
|
nr,
|
||||||
|
opensearchQuery,
|
||||||
|
items.String(),
|
||||||
|
)
|
||||||
|
|
||||||
|
_, err := w.Write([]byte(xml))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func xmlEscape(s string) string {
|
||||||
|
var b bytes.Buffer
|
||||||
|
_ = xml.EscapeText(&b, []byte(s))
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func hostFromURL(urlStr string) string {
|
||||||
|
if strings.TrimSpace(urlStr) == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
u, err := url.Parse(urlStr)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return u.Host
|
||||||
|
}
|
||||||
|
|
||||||
|
func asString(v any) string {
|
||||||
|
s, _ := v.(string)
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func scoreString(v any) string {
|
||||||
|
switch t := v.(type) {
|
||||||
|
case float64:
|
||||||
|
return strconv.FormatFloat(t, 'f', -1, 64)
|
||||||
|
case float32:
|
||||||
|
return strconv.FormatFloat(float64(t), 'f', -1, 64)
|
||||||
|
case int:
|
||||||
|
return strconv.Itoa(t)
|
||||||
|
case int64:
|
||||||
|
return strconv.FormatInt(t, 10)
|
||||||
|
case json.Number:
|
||||||
|
if f, err := t.Float64(); err == nil {
|
||||||
|
return strconv.FormatFloat(f, 'f', -1, 64)
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
default:
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
111
internal/search/service.go
Normal file
111
internal/search/service.go
Normal file
|
|
@ -0,0 +1,111 @@
|
||||||
|
package search
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/engines"
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
"github.com/ashie/gosearch/internal/upstream"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ServiceConfig struct {
|
||||||
|
UpstreamURL string
|
||||||
|
HTTPTimeout time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
type Service struct {
|
||||||
|
upstreamClient *upstream.Client
|
||||||
|
planner *engines.Planner
|
||||||
|
localEngines map[string]engines.Engine
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewService(cfg ServiceConfig) *Service {
|
||||||
|
timeout := cfg.HTTPTimeout
|
||||||
|
if timeout <= 0 {
|
||||||
|
timeout = 10 * time.Second
|
||||||
|
}
|
||||||
|
|
||||||
|
httpClient := &http.Client{Timeout: timeout}
|
||||||
|
|
||||||
|
var up *upstream.Client
|
||||||
|
if cfg.UpstreamURL != "" {
|
||||||
|
c, err := upstream.NewClient(cfg.UpstreamURL, timeout)
|
||||||
|
if err == nil {
|
||||||
|
up = c
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Service{
|
||||||
|
upstreamClient: up,
|
||||||
|
planner: engines.NewPlannerFromEnv(),
|
||||||
|
localEngines: engines.NewDefaultPortedEngines(httpClient),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Service) Search(ctx context.Context, req SearchRequest) (SearchResponse, error) {
|
||||||
|
localEngines, upstreamEngines, _ := s.planner.Plan(req)
|
||||||
|
|
||||||
|
responses := make([]contracts.SearchResponse, 0, 2)
|
||||||
|
upstreamSet := map[string]bool{}
|
||||||
|
for _, e := range upstreamEngines {
|
||||||
|
upstreamSet[e] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, engineName := range localEngines {
|
||||||
|
eng, ok := s.localEngines[engineName]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
r, err := eng.Search(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
// MVP: fail fast so the client sees a real error.
|
||||||
|
return SearchResponse{}, err
|
||||||
|
}
|
||||||
|
responses = append(responses, r)
|
||||||
|
|
||||||
|
// Some engines (notably qwant due to anti-bot protections) can return
|
||||||
|
// zero local results depending on client/IP. If upstream SearXNG is
|
||||||
|
// configured, let it attempt the same engine as a fallback.
|
||||||
|
if shouldFallbackToUpstream(engineName, r) && !upstreamSet[engineName] {
|
||||||
|
upstreamEngines = append(upstreamEngines, engineName)
|
||||||
|
upstreamSet[engineName] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.upstreamClient != nil && len(upstreamEngines) > 0 {
|
||||||
|
r, err := s.upstreamClient.SearchJSON(ctx, req, upstreamEngines)
|
||||||
|
if err != nil {
|
||||||
|
return SearchResponse{}, err
|
||||||
|
}
|
||||||
|
responses = append(responses, r)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(responses) == 0 {
|
||||||
|
return SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: 0,
|
||||||
|
Results: []MainResult{},
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
merged := MergeResponses(responses)
|
||||||
|
if merged.Query == "" {
|
||||||
|
merged.Query = req.Query
|
||||||
|
}
|
||||||
|
return merged, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func shouldFallbackToUpstream(engineName string, r contracts.SearchResponse) bool {
|
||||||
|
if engineName != "qwant" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return len(r.Results) == 0 && len(r.Answers) == 0 && len(r.Infoboxes) == 0
|
||||||
|
}
|
||||||
|
|
||||||
20
internal/search/types.go
Normal file
20
internal/search/types.go
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
package search
|
||||||
|
|
||||||
|
import "github.com/ashie/gosearch/internal/contracts"
|
||||||
|
|
||||||
|
// Re-export the JSON contract types so the rest of the code can stay in the
|
||||||
|
// `internal/search` namespace without creating an import cycle.
|
||||||
|
type OutputFormat = contracts.OutputFormat
|
||||||
|
|
||||||
|
const (
|
||||||
|
FormatHTML = contracts.FormatHTML // accepted for compatibility (not yet implemented)
|
||||||
|
FormatJSON = contracts.FormatJSON
|
||||||
|
FormatCSV = contracts.FormatCSV
|
||||||
|
FormatRSS = contracts.FormatRSS
|
||||||
|
)
|
||||||
|
|
||||||
|
type SearchRequest = contracts.SearchRequest
|
||||||
|
type SearchResponse = contracts.SearchResponse
|
||||||
|
|
||||||
|
type MainResult = contracts.MainResult
|
||||||
|
|
||||||
112
internal/upstream/client.go
Normal file
112
internal/upstream/client.go
Normal file
|
|
@ -0,0 +1,112 @@
|
||||||
|
package upstream
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Client struct {
|
||||||
|
baseURL string
|
||||||
|
http *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewClient(baseURL string, timeout time.Duration) (*Client, error) {
|
||||||
|
if strings.TrimSpace(baseURL) == "" {
|
||||||
|
return nil, errors.New("upstream base URL is empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
u, err := url.Parse(baseURL)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid upstream base URL: %w", err)
|
||||||
|
}
|
||||||
|
// Normalize: trim trailing slash to make URL concatenation predictable.
|
||||||
|
base := strings.TrimRight(u.String(), "/")
|
||||||
|
|
||||||
|
if timeout <= 0 {
|
||||||
|
timeout = 10 * time.Second
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Client{
|
||||||
|
baseURL: base,
|
||||||
|
http: &http.Client{
|
||||||
|
Timeout: timeout,
|
||||||
|
},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Client) SearchJSON(ctx context.Context, req contracts.SearchRequest, engines []string) (contracts.SearchResponse, error) {
|
||||||
|
// Always request upstream JSON; the Go service will handle csv/rss later.
|
||||||
|
form := url.Values{}
|
||||||
|
form.Set("q", req.Query)
|
||||||
|
form.Set("format", "json")
|
||||||
|
form.Set("pageno", fmt.Sprintf("%d", req.Pageno))
|
||||||
|
form.Set("safesearch", fmt.Sprintf("%d", req.Safesearch))
|
||||||
|
form.Set("language", req.Language)
|
||||||
|
|
||||||
|
if req.TimeRange != nil {
|
||||||
|
form.Set("time_range", *req.TimeRange)
|
||||||
|
}
|
||||||
|
if req.TimeoutLimit != nil {
|
||||||
|
form.Set("timeout_limit", formatFloat(*req.TimeoutLimit))
|
||||||
|
}
|
||||||
|
if len(req.Categories) > 0 {
|
||||||
|
form.Set("categories", strings.Join(req.Categories, ","))
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(engines) > 0 {
|
||||||
|
form.Set("engines", strings.Join(engines, ","))
|
||||||
|
}
|
||||||
|
|
||||||
|
for engineName, kv := range req.EngineData {
|
||||||
|
for key, value := range kv {
|
||||||
|
// Mirror SearXNG's naming: `engine_data-<engine>-<key>=<value>`
|
||||||
|
form.Set(fmt.Sprintf("engine_data-%s-%s", engineName, key), value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := c.baseURL + "/search"
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, strings.NewReader(form.Encode()))
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
httpReq.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=utf-8")
|
||||||
|
|
||||||
|
resp, err := c.http.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
body, err := io.ReadAll(io.LimitReader(resp.Body, 4*1024*1024))
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return contracts.SearchResponse{}, fmt.Errorf("upstream search failed: status=%d body=%q", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decode upstream JSON into our contract types.
|
||||||
|
var out contracts.SearchResponse
|
||||||
|
dec := json.NewDecoder(strings.NewReader(string(body)))
|
||||||
|
if err := dec.Decode(&out); err != nil {
|
||||||
|
return contracts.SearchResponse{}, fmt.Errorf("decode upstream JSON: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatFloat(f float64) string {
|
||||||
|
// Keep stable formatting for upstream parsing.
|
||||||
|
return strings.TrimRight(strings.TrimRight(fmt.Sprintf("%.6f", f), "0"), ".")
|
||||||
|
}
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue