feat: Wikidata engine and Wikipedia knowledge infobox
- Add wikidata engine (wbsearchentities), tests, factory/planner/config - Wikipedia REST summary: infobox from extract, thumbnail, article URL - InfoboxView URL; render infobox list in results_inner + base styles - Preferences Wikidata toggle; engine badge color for wikidata Made-with: Cursor
This commit is contained in:
parent
6e45abb150
commit
24577b27be
13 changed files with 344 additions and 34 deletions
|
|
@ -28,7 +28,7 @@ url = ""
|
|||
# Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES)
|
||||
# Engines not listed here will be proxied to the upstream instance.
|
||||
# Include bing_images, ddg_images, qwant_images for image search when [upstream].url is empty.
|
||||
local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube", "bing_images", "ddg_images", "qwant_images"]
|
||||
local_ported = ["wikipedia", "wikidata", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube", "bing_images", "ddg_images", "qwant_images"]
|
||||
|
||||
[engines.brave]
|
||||
# Brave Search API key (env: BRAVE_API_KEY)
|
||||
|
|
|
|||
|
|
@ -165,7 +165,7 @@ func defaultConfig() *Config {
|
|||
},
|
||||
Upstream: UpstreamConfig{},
|
||||
Engines: EnginesConfig{
|
||||
LocalPorted: []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube", "bing_images", "ddg_images", "qwant_images"},
|
||||
LocalPorted: []string{"wikipedia", "wikidata", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube", "bing_images", "ddg_images", "qwant_images"},
|
||||
Qwant: QwantConfig{
|
||||
Category: "web-lite",
|
||||
ResultsPerPage: 10,
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@ func TestLoadDefaults(t *testing.T) {
|
|||
if cfg.Server.Port != 5355 {
|
||||
t.Errorf("expected default port 5355, got %d", cfg.Server.Port)
|
||||
}
|
||||
if len(cfg.Engines.LocalPorted) != 14 {
|
||||
t.Errorf("expected 14 default engines, got %d", len(cfg.Engines.LocalPorted))
|
||||
if len(cfg.Engines.LocalPorted) != 15 {
|
||||
t.Errorf("expected 15 default engines, got %d", len(cfg.Engines.LocalPorted))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ func NewDefaultPortedEngines(client *http.Client, cfg *config.Config) map[string
|
|||
|
||||
return map[string]Engine{
|
||||
"wikipedia": &WikipediaEngine{client: client},
|
||||
"wikidata": &WikidataEngine{client: client},
|
||||
"arxiv": &ArxivEngine{client: client},
|
||||
"crossref": &CrossrefEngine{client: client},
|
||||
"braveapi": &BraveAPIEngine{
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ import (
|
|||
)
|
||||
|
||||
var defaultPortedEngines = []string{
|
||||
"wikipedia", "arxiv", "crossref", "braveapi",
|
||||
"wikipedia", "wikidata", "arxiv", "crossref", "braveapi",
|
||||
"brave", "qwant", "duckduckgo", "github", "reddit",
|
||||
"bing", "google", "youtube", "stackoverflow",
|
||||
// Image engines
|
||||
|
|
@ -106,6 +106,7 @@ func inferFromCategories(categories []string) []string {
|
|||
switch strings.TrimSpace(strings.ToLower(c)) {
|
||||
case "general":
|
||||
set["wikipedia"] = true
|
||||
set["wikidata"] = true
|
||||
set["braveapi"] = true
|
||||
set["qwant"] = true
|
||||
set["duckduckgo"] = true
|
||||
|
|
@ -134,9 +135,9 @@ func inferFromCategories(categories []string) []string {
|
|||
}
|
||||
// stable order
|
||||
order := map[string]int{
|
||||
"wikipedia": 0, "braveapi": 1, "brave": 2, "qwant": 3, "duckduckgo": 4, "bing": 5, "google": 6,
|
||||
"arxiv": 7, "crossref": 8, "github": 9, "stackoverflow": 10, "reddit": 11, "youtube": 12,
|
||||
"bing_images": 13, "ddg_images": 14, "qwant_images": 15,
|
||||
"wikipedia": 0, "wikidata": 1, "braveapi": 2, "brave": 3, "qwant": 4, "duckduckgo": 5, "bing": 6, "google": 7,
|
||||
"arxiv": 8, "crossref": 9, "github": 10, "stackoverflow": 11, "reddit": 12, "youtube": 13,
|
||||
"bing_images": 14, "ddg_images": 15, "qwant_images": 16,
|
||||
}
|
||||
sortByOrder(out, order)
|
||||
return out
|
||||
|
|
|
|||
133
internal/engines/wikidata.go
Normal file
133
internal/engines/wikidata.go
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
// samsa — a privacy-respecting metasearch engine
|
||||
// Copyright (C) 2026-present metamorphosis-dev
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
package engines
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/metamorphosis-dev/samsa/internal/contracts"
|
||||
)
|
||||
|
||||
// wikidataAPIBase is the Wikidata MediaWiki API endpoint (overridable in tests).
|
||||
var wikidataAPIBase = "https://www.wikidata.org/w/api.php"
|
||||
|
||||
// WikidataEngine searches entity labels and descriptions via the Wikidata API.
|
||||
// See: https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities
|
||||
type WikidataEngine struct {
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
func (e *WikidataEngine) Name() string { return "wikidata" }
|
||||
|
||||
func (e *WikidataEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||
if e == nil || e.client == nil {
|
||||
return contracts.SearchResponse{}, errors.New("wikidata engine not initialized")
|
||||
}
|
||||
q := strings.TrimSpace(req.Query)
|
||||
if q == "" {
|
||||
return contracts.SearchResponse{Query: req.Query}, nil
|
||||
}
|
||||
|
||||
lang := strings.TrimSpace(req.Language)
|
||||
if lang == "" || lang == "auto" {
|
||||
lang = "en"
|
||||
}
|
||||
lang = strings.SplitN(lang, "-", 2)[0]
|
||||
lang = strings.ReplaceAll(lang, "_", "-")
|
||||
if _, ok := validWikipediaLangs[lang]; !ok {
|
||||
lang = "en"
|
||||
}
|
||||
|
||||
u, err := url.Parse(wikidataAPIBase)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
qv := u.Query()
|
||||
qv.Set("action", "wbsearchentities")
|
||||
qv.Set("search", q)
|
||||
qv.Set("language", lang)
|
||||
qv.Set("limit", "10")
|
||||
qv.Set("format", "json")
|
||||
u.RawQuery = qv.Encode()
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
httpReq.Header.Set("User-Agent", "samsa/1.0 (Wikidata search; +https://github.com/metamorphosis-dev/samsa)")
|
||||
|
||||
resp, err := e.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024))
|
||||
return contracts.SearchResponse{}, fmt.Errorf("wikidata upstream error: status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024))
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
|
||||
var api struct {
|
||||
Search []struct {
|
||||
ID string `json:"id"`
|
||||
Label string `json:"label"`
|
||||
Description string `json:"description"`
|
||||
} `json:"search"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &api); err != nil {
|
||||
return contracts.SearchResponse{}, fmt.Errorf("wikidata JSON parse error: %w", err)
|
||||
}
|
||||
|
||||
results := make([]contracts.MainResult, 0, len(api.Search))
|
||||
for _, hit := range api.Search {
|
||||
id := strings.TrimSpace(hit.ID)
|
||||
if id == "" || !strings.HasPrefix(id, "Q") {
|
||||
continue
|
||||
}
|
||||
pageURL := "https://www.wikidata.org/wiki/" + url.PathEscape(id)
|
||||
title := strings.TrimSpace(hit.Label)
|
||||
if title == "" {
|
||||
title = id
|
||||
}
|
||||
content := strings.TrimSpace(hit.Description)
|
||||
urlPtr := pageURL
|
||||
results = append(results, contracts.MainResult{
|
||||
Template: "default.html",
|
||||
Title: title,
|
||||
Content: content,
|
||||
URL: &urlPtr,
|
||||
Engine: "wikidata",
|
||||
Category: "general",
|
||||
Engines: []string{"wikidata"},
|
||||
})
|
||||
}
|
||||
|
||||
return contracts.SearchResponse{
|
||||
Query: req.Query,
|
||||
NumberOfResults: len(results),
|
||||
Results: results,
|
||||
Answers: []map[string]any{},
|
||||
Corrections: []string{},
|
||||
Infoboxes: []map[string]any{},
|
||||
Suggestions: []string{},
|
||||
UnresponsiveEngines: [][2]string{},
|
||||
}, nil
|
||||
}
|
||||
51
internal/engines/wikidata_test.go
Normal file
51
internal/engines/wikidata_test.go
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
package engines
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/metamorphosis-dev/samsa/internal/contracts"
|
||||
)
|
||||
|
||||
func TestWikidataEngine_Search(t *testing.T) {
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Query().Get("action") != "wbsearchentities" {
|
||||
t.Errorf("action=%q", r.URL.Query().Get("action"))
|
||||
}
|
||||
if got := r.URL.Query().Get("search"); got != "test" {
|
||||
t.Errorf("search=%q want test", got)
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"search":[{"id":"Q937","label":"Go","description":"Programming language"}]}`))
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
orig := wikidataAPIBase
|
||||
t.Cleanup(func() { wikidataAPIBase = orig })
|
||||
wikidataAPIBase = ts.URL + "/w/api.php"
|
||||
|
||||
e := &WikidataEngine{client: ts.Client()}
|
||||
resp, err := e.Search(context.Background(), contracts.SearchRequest{
|
||||
Query: "test",
|
||||
Language: "en",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(resp.Results) != 1 {
|
||||
t.Fatalf("expected 1 result, got %d", len(resp.Results))
|
||||
}
|
||||
r0 := resp.Results[0]
|
||||
if r0.Engine != "wikidata" {
|
||||
t.Errorf("engine=%q", r0.Engine)
|
||||
}
|
||||
if r0.Title != "Go" {
|
||||
t.Errorf("title=%q", r0.Title)
|
||||
}
|
||||
if r0.URL == nil || !strings.Contains(*r0.URL, "Q937") {
|
||||
t.Errorf("url=%v", r0.URL)
|
||||
}
|
||||
}
|
||||
|
|
@ -141,9 +141,13 @@ func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchReques
|
|||
var api struct {
|
||||
Title string `json:"title"`
|
||||
Description string `json:"description"`
|
||||
Extract string `json:"extract"`
|
||||
Titles struct {
|
||||
Display string `json:"display"`
|
||||
} `json:"titles"`
|
||||
Thumbnail struct {
|
||||
Source string `json:"source"`
|
||||
} `json:"thumbnail"`
|
||||
ContentURLs struct {
|
||||
Desktop struct {
|
||||
Page string `json:"page"`
|
||||
|
|
@ -175,11 +179,37 @@ func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchReques
|
|||
title = api.Title
|
||||
}
|
||||
|
||||
content := api.Description
|
||||
content := strings.TrimSpace(api.Extract)
|
||||
if content == "" {
|
||||
content = strings.TrimSpace(api.Description)
|
||||
}
|
||||
|
||||
urlPtr := pageURL
|
||||
pub := (*string)(nil)
|
||||
|
||||
// Knowledge infobox for HTML (Wikipedia REST summary: title, extract, thumbnail, link).
|
||||
var infoboxes []map[string]any
|
||||
ibTitle := api.Titles.Display
|
||||
if ibTitle == "" {
|
||||
ibTitle = api.Title
|
||||
}
|
||||
body := strings.TrimSpace(api.Extract)
|
||||
if body == "" {
|
||||
body = strings.TrimSpace(api.Description)
|
||||
}
|
||||
imgSrc := strings.TrimSpace(api.Thumbnail.Source)
|
||||
if ibTitle != "" || body != "" || imgSrc != "" {
|
||||
row := map[string]any{
|
||||
"title": ibTitle,
|
||||
"infobox": body,
|
||||
"url": pageURL,
|
||||
}
|
||||
if imgSrc != "" {
|
||||
row["img_src"] = imgSrc
|
||||
}
|
||||
infoboxes = append(infoboxes, row)
|
||||
}
|
||||
|
||||
results := []contracts.MainResult{
|
||||
{
|
||||
Template: "default.html",
|
||||
|
|
@ -202,9 +232,8 @@ func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchReques
|
|||
Results: results,
|
||||
Answers: []map[string]any{},
|
||||
Corrections: []string{},
|
||||
Infoboxes: []map[string]any{},
|
||||
Infoboxes: infoboxes,
|
||||
Suggestions: []string{},
|
||||
UnresponsiveEngines: [][2]string{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ const maxQueryLength = 1024
|
|||
|
||||
// knownEngineNames is the allowlist of valid engine identifiers.
|
||||
var knownEngineNames = map[string]bool{
|
||||
"wikipedia": true, "arxiv": true, "crossref": true,
|
||||
"wikipedia": true, "wikidata": true, "arxiv": true, "crossref": true,
|
||||
"braveapi": true, "brave": true, "qwant": true,
|
||||
"duckduckgo": true, "github": true, "reddit": true,
|
||||
"bing": true, "google": true, "youtube": true,
|
||||
|
|
|
|||
|
|
@ -528,6 +528,7 @@
|
|||
.result[data-engine="braveapi"], .engine-badge[data-engine="braveapi"] { --engine-accent: #ff6600; }
|
||||
.result[data-engine="qwant"], .engine-badge[data-engine="qwant"] { --engine-accent: #5c97ff; }
|
||||
.result[data-engine="wikipedia"], .engine-badge[data-engine="wikipedia"] { --engine-accent: #a3a3a3; }
|
||||
.result[data-engine="wikidata"], .engine-badge[data-engine="wikidata"] { --engine-accent: #339966; }
|
||||
.result[data-engine="github"], .engine-badge[data-engine="github"] { --engine-accent: #8b5cf6; }
|
||||
.result[data-engine="reddit"], .engine-badge[data-engine="reddit"] { --engine-accent: #ff4500; }
|
||||
.result[data-engine="youtube"], .engine-badge[data-engine="youtube"] { --engine-accent: #ff0000; }
|
||||
|
|
@ -538,6 +539,73 @@
|
|||
.result[data-engine="ddg_images"], .engine-badge[data-engine="ddg_images"] { --engine-accent: #de5833; }
|
||||
.result[data-engine="qwant_images"], .engine-badge[data-engine="qwant_images"] { --engine-accent: #5c97ff; }
|
||||
|
||||
/* Wikipedia / knowledge infobox */
|
||||
.infobox-list {
|
||||
margin-bottom: 1.25rem;
|
||||
}
|
||||
|
||||
.infobox-card {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
flex-wrap: wrap;
|
||||
gap: 1rem;
|
||||
align-items: flex-start;
|
||||
padding: 1rem 1.15rem;
|
||||
background: var(--bg-secondary);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius-md);
|
||||
box-shadow: var(--shadow-sm);
|
||||
}
|
||||
|
||||
.infobox-image-wrap {
|
||||
flex-shrink: 0;
|
||||
width: 120px;
|
||||
height: 120px;
|
||||
border-radius: var(--radius-sm);
|
||||
overflow: hidden;
|
||||
background: var(--bg-tertiary);
|
||||
border: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.infobox-img {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
object-fit: cover;
|
||||
display: block;
|
||||
}
|
||||
|
||||
.infobox-main {
|
||||
flex: 1;
|
||||
min-width: min(100%, 220px);
|
||||
}
|
||||
|
||||
.infobox-title {
|
||||
font-size: 1.15rem;
|
||||
font-weight: 600;
|
||||
color: var(--text-primary);
|
||||
margin-bottom: 0.5rem;
|
||||
line-height: 1.3;
|
||||
}
|
||||
|
||||
.infobox-content {
|
||||
font-size: 0.9rem;
|
||||
color: var(--desc-color);
|
||||
line-height: 1.55;
|
||||
margin-bottom: 0.65rem;
|
||||
}
|
||||
|
||||
.infobox-link {
|
||||
display: inline-block;
|
||||
font-size: 0.875rem;
|
||||
font-weight: 500;
|
||||
color: var(--title-link);
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.infobox-link:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.dialog-error {
|
||||
padding: 0.65rem 0.85rem;
|
||||
margin-bottom: 0.75rem;
|
||||
|
|
|
|||
|
|
@ -40,6 +40,10 @@
|
|||
<input type="checkbox" name="engine" value="wikipedia" checked>
|
||||
<span>Wikipedia</span>
|
||||
</label>
|
||||
<label class="engine-toggle">
|
||||
<input type="checkbox" name="engine" value="wikidata" checked>
|
||||
<span>Wikidata</span>
|
||||
</label>
|
||||
<label class="engine-toggle">
|
||||
<input type="checkbox" name="engine" value="github">
|
||||
<span>GitHub</span>
|
||||
|
|
|
|||
|
|
@ -3,7 +3,26 @@
|
|||
<div id="corrections" class="correction">{{range .Corrections}}{{.}} {{end}}</div>
|
||||
{{end}}
|
||||
|
||||
{{if or .Answers .Infoboxes}}
|
||||
{{if .Infoboxes}}
|
||||
<div class="infobox-list" role="region" aria-label="Summary">
|
||||
{{range .Infoboxes}}
|
||||
<aside class="infobox-card">
|
||||
{{if .ImgSrc}}
|
||||
<div class="infobox-image-wrap">
|
||||
<img src="{{.ImgSrc}}" alt="" class="infobox-img" loading="lazy" width="120" height="120">
|
||||
</div>
|
||||
{{end}}
|
||||
<div class="infobox-main">
|
||||
{{if .Title}}<h2 class="infobox-title">{{.Title}}</h2>{{end}}
|
||||
{{if .Content}}<p class="infobox-content">{{.Content}}</p>{{end}}
|
||||
{{if .URL}}<a href="{{.URL}}" class="infobox-link" target="_blank" rel="noopener noreferrer">Read article on Wikipedia</a>{{end}}
|
||||
</div>
|
||||
</aside>
|
||||
{{end}}
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{if .Answers}}
|
||||
<div id="answers">
|
||||
{{range .Answers}}
|
||||
<div class="dialog-error">{{.}}</div>
|
||||
|
|
@ -38,7 +57,7 @@
|
|||
{{end}}
|
||||
{{end}}
|
||||
{{end}}
|
||||
{{else if not .Answers}}
|
||||
{{else if and (not .Answers) (not .Infoboxes)}}
|
||||
<div class="no-results">
|
||||
<div class="no-results-icon" aria-hidden="true">🔍</div>
|
||||
<h2>No results found</h2>
|
||||
|
|
|
|||
|
|
@ -96,6 +96,7 @@ type InfoboxView struct {
|
|||
Title string
|
||||
Content string
|
||||
ImgSrc string
|
||||
URL string
|
||||
}
|
||||
|
||||
// FilterOption represents a filter radio option for the sidebar.
|
||||
|
|
@ -273,7 +274,10 @@ func FromResponse(resp contracts.SearchResponse, query string, pageno int, activ
|
|||
if v, ok := ib["img_src"].(string); ok {
|
||||
iv.ImgSrc = util.SanitizeResultURL(v)
|
||||
}
|
||||
if iv.Title != "" || iv.Content != "" {
|
||||
if v, ok := ib["url"].(string); ok {
|
||||
iv.URL = util.SanitizeResultURL(v)
|
||||
}
|
||||
if iv.Title != "" || iv.Content != "" || iv.ImgSrc != "" {
|
||||
pd.Infoboxes = append(pd.Infoboxes, iv)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue