feat: build Go-based SearXNG-compatible search service
Implement an API-first Go rewrite with local engine adapters, upstream fallback, and Nix-based tooling so searches can run without matching the original UI while preserving response compatibility. Made-with: Cursor
This commit is contained in:
parent
7783367c71
commit
dc44837219
32 changed files with 3330 additions and 0 deletions
191
internal/engines/arxiv.go
Normal file
191
internal/engines/arxiv.go
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
package engines
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/xml"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ashie/gosearch/internal/contracts"
|
||||
)
|
||||
|
||||
const (
|
||||
arxivSearchPrefix = "all"
|
||||
arxivMaxResults = 10
|
||||
)
|
||||
|
||||
type ArxivEngine struct {
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
func (e *ArxivEngine) Name() string { return "arxiv" }
|
||||
|
||||
func (e *ArxivEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||
if e == nil || e.client == nil {
|
||||
return contracts.SearchResponse{}, errors.New("arxiv engine not initialized")
|
||||
}
|
||||
q := strings.TrimSpace(req.Query)
|
||||
if q == "" {
|
||||
return contracts.SearchResponse{Query: req.Query}, nil
|
||||
}
|
||||
|
||||
start := (req.Pageno - 1) * arxivMaxResults
|
||||
if start < 0 {
|
||||
start = 0
|
||||
}
|
||||
|
||||
args := url.Values{}
|
||||
args.Set("search_query", fmt.Sprintf("%s:%s", arxivSearchPrefix, q))
|
||||
args.Set("start", fmt.Sprintf("%d", start))
|
||||
args.Set("max_results", fmt.Sprintf("%d", arxivMaxResults))
|
||||
|
||||
endpoint := "https://export.arxiv.org/api/query?" + args.Encode()
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
|
||||
resp, err := e.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
||||
return contracts.SearchResponse{}, fmt.Errorf("arxiv upstream error: status=%d body=%q", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
raw, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
|
||||
results, err := parseArxivAtom(raw)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
|
||||
return contracts.SearchResponse{
|
||||
Query: req.Query,
|
||||
NumberOfResults: len(results),
|
||||
Results: results,
|
||||
Answers: []map[string]any{},
|
||||
Corrections: []string{},
|
||||
Infoboxes: []map[string]any{},
|
||||
Suggestions: []string{},
|
||||
UnresponsiveEngines: [][2]string{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
type arxivEntry struct {
|
||||
Title string
|
||||
ID string
|
||||
Summary string
|
||||
Published string
|
||||
}
|
||||
|
||||
func parseArxivAtom(xmlBytes []byte) ([]contracts.MainResult, error) {
|
||||
dec := xml.NewDecoder(bytes.NewReader(xmlBytes))
|
||||
|
||||
var entries []arxivEntry
|
||||
var cur *arxivEntry
|
||||
|
||||
for {
|
||||
tok, err := dec.Token()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch t := tok.(type) {
|
||||
case xml.StartElement:
|
||||
switch strings.ToLower(t.Name.Local) {
|
||||
case "entry":
|
||||
cur = &arxivEntry{}
|
||||
case "title":
|
||||
if cur != nil {
|
||||
var v string
|
||||
if err := dec.DecodeElement(&v, &t); err == nil {
|
||||
cur.Title = strings.TrimSpace(v)
|
||||
}
|
||||
}
|
||||
case "id":
|
||||
if cur != nil {
|
||||
var v string
|
||||
if err := dec.DecodeElement(&v, &t); err == nil {
|
||||
cur.ID = strings.TrimSpace(v)
|
||||
}
|
||||
}
|
||||
case "summary":
|
||||
if cur != nil {
|
||||
var v string
|
||||
if err := dec.DecodeElement(&v, &t); err == nil {
|
||||
cur.Summary = strings.TrimSpace(v)
|
||||
}
|
||||
}
|
||||
case "published":
|
||||
if cur != nil {
|
||||
var v string
|
||||
if err := dec.DecodeElement(&v, &t); err == nil {
|
||||
cur.Published = strings.TrimSpace(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
case xml.EndElement:
|
||||
if strings.ToLower(t.Name.Local) == "entry" && cur != nil {
|
||||
if cur.Title != "" && cur.ID != "" {
|
||||
entries = append(entries, *cur)
|
||||
}
|
||||
cur = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out := make([]contracts.MainResult, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
urlPtr := e.ID
|
||||
content := e.Summary
|
||||
pubdate := parseArxivPublished(e.Published)
|
||||
|
||||
out = append(out, contracts.MainResult{
|
||||
Template: "default.html",
|
||||
Title: e.Title,
|
||||
Content: content,
|
||||
URL: &urlPtr,
|
||||
Pubdate: pubdate,
|
||||
Engine: "arxiv",
|
||||
Category: "science",
|
||||
Score: 0,
|
||||
Positions: nil,
|
||||
Engines: []string{"arxiv"},
|
||||
})
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func parseArxivPublished(s string) *string {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// ArXiv uses RFC3339 like "2024-06-03T00:00:00Z".
|
||||
t, err := time.Parse(time.RFC3339, s)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
formatted := t.Format("2006-01-02 15:04:05-0700")
|
||||
return &formatted
|
||||
}
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue