- Add wikidata engine (wbsearchentities), tests, factory/planner/config - Wikipedia REST summary: infobox from extract, thumbnail, article URL - InfoboxView URL; render infobox list in results_inner + base styles - Preferences Wikidata toggle; engine badge color for wikidata Made-with: Cursor
133 lines
3.7 KiB
Go
133 lines
3.7 KiB
Go
// samsa — a privacy-respecting metasearch engine
|
|
// Copyright (C) 2026-present metamorphosis-dev
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
|
|
package engines
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
|
|
"github.com/metamorphosis-dev/samsa/internal/contracts"
|
|
)
|
|
|
|
// wikidataAPIBase is the Wikidata MediaWiki API endpoint (overridable in tests).
|
|
var wikidataAPIBase = "https://www.wikidata.org/w/api.php"
|
|
|
|
// WikidataEngine searches entity labels and descriptions via the Wikidata API.
|
|
// See: https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities
|
|
type WikidataEngine struct {
|
|
client *http.Client
|
|
}
|
|
|
|
func (e *WikidataEngine) Name() string { return "wikidata" }
|
|
|
|
func (e *WikidataEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
|
if e == nil || e.client == nil {
|
|
return contracts.SearchResponse{}, errors.New("wikidata engine not initialized")
|
|
}
|
|
q := strings.TrimSpace(req.Query)
|
|
if q == "" {
|
|
return contracts.SearchResponse{Query: req.Query}, nil
|
|
}
|
|
|
|
lang := strings.TrimSpace(req.Language)
|
|
if lang == "" || lang == "auto" {
|
|
lang = "en"
|
|
}
|
|
lang = strings.SplitN(lang, "-", 2)[0]
|
|
lang = strings.ReplaceAll(lang, "_", "-")
|
|
if _, ok := validWikipediaLangs[lang]; !ok {
|
|
lang = "en"
|
|
}
|
|
|
|
u, err := url.Parse(wikidataAPIBase)
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
qv := u.Query()
|
|
qv.Set("action", "wbsearchentities")
|
|
qv.Set("search", q)
|
|
qv.Set("language", lang)
|
|
qv.Set("limit", "10")
|
|
qv.Set("format", "json")
|
|
u.RawQuery = qv.Encode()
|
|
|
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
httpReq.Header.Set("User-Agent", "samsa/1.0 (Wikidata search; +https://github.com/metamorphosis-dev/samsa)")
|
|
|
|
resp, err := e.client.Do(httpReq)
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024))
|
|
return contracts.SearchResponse{}, fmt.Errorf("wikidata upstream error: status %d", resp.StatusCode)
|
|
}
|
|
|
|
body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024))
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
|
|
var api struct {
|
|
Search []struct {
|
|
ID string `json:"id"`
|
|
Label string `json:"label"`
|
|
Description string `json:"description"`
|
|
} `json:"search"`
|
|
}
|
|
if err := json.Unmarshal(body, &api); err != nil {
|
|
return contracts.SearchResponse{}, fmt.Errorf("wikidata JSON parse error: %w", err)
|
|
}
|
|
|
|
results := make([]contracts.MainResult, 0, len(api.Search))
|
|
for _, hit := range api.Search {
|
|
id := strings.TrimSpace(hit.ID)
|
|
if id == "" || !strings.HasPrefix(id, "Q") {
|
|
continue
|
|
}
|
|
pageURL := "https://www.wikidata.org/wiki/" + url.PathEscape(id)
|
|
title := strings.TrimSpace(hit.Label)
|
|
if title == "" {
|
|
title = id
|
|
}
|
|
content := strings.TrimSpace(hit.Description)
|
|
urlPtr := pageURL
|
|
results = append(results, contracts.MainResult{
|
|
Template: "default.html",
|
|
Title: title,
|
|
Content: content,
|
|
URL: &urlPtr,
|
|
Engine: "wikidata",
|
|
Category: "general",
|
|
Engines: []string{"wikidata"},
|
|
})
|
|
}
|
|
|
|
return contracts.SearchResponse{
|
|
Query: req.Query,
|
|
NumberOfResults: len(results),
|
|
Results: results,
|
|
Answers: []map[string]any{},
|
|
Corrections: []string{},
|
|
Infoboxes: []map[string]any{},
|
|
Suggestions: []string{},
|
|
UnresponsiveEngines: [][2]string{},
|
|
}, nil
|
|
}
|