feat: Wikidata engine and Wikipedia knowledge infobox

- Add wikidata engine (wbsearchentities), tests, factory/planner/config
- Wikipedia REST summary: infobox from extract, thumbnail, article URL
- InfoboxView URL; render infobox list in results_inner + base styles
- Preferences Wikidata toggle; engine badge color for wikidata

Made-with: Cursor
This commit is contained in:
ashisgreat22 2026-03-24 00:07:12 +01:00
parent 6e45abb150
commit 24577b27be
13 changed files with 344 additions and 34 deletions

View file

@ -28,7 +28,7 @@ url = ""
# Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES) # Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES)
# Engines not listed here will be proxied to the upstream instance. # Engines not listed here will be proxied to the upstream instance.
# Include bing_images, ddg_images, qwant_images for image search when [upstream].url is empty. # Include bing_images, ddg_images, qwant_images for image search when [upstream].url is empty.
local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube", "bing_images", "ddg_images", "qwant_images"] local_ported = ["wikipedia", "wikidata", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube", "bing_images", "ddg_images", "qwant_images"]
[engines.brave] [engines.brave]
# Brave Search API key (env: BRAVE_API_KEY) # Brave Search API key (env: BRAVE_API_KEY)

View file

@ -165,7 +165,7 @@ func defaultConfig() *Config {
}, },
Upstream: UpstreamConfig{}, Upstream: UpstreamConfig{},
Engines: EnginesConfig{ Engines: EnginesConfig{
LocalPorted: []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube", "bing_images", "ddg_images", "qwant_images"}, LocalPorted: []string{"wikipedia", "wikidata", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube", "bing_images", "ddg_images", "qwant_images"},
Qwant: QwantConfig{ Qwant: QwantConfig{
Category: "web-lite", Category: "web-lite",
ResultsPerPage: 10, ResultsPerPage: 10,

View file

@ -14,8 +14,8 @@ func TestLoadDefaults(t *testing.T) {
if cfg.Server.Port != 5355 { if cfg.Server.Port != 5355 {
t.Errorf("expected default port 5355, got %d", cfg.Server.Port) t.Errorf("expected default port 5355, got %d", cfg.Server.Port)
} }
if len(cfg.Engines.LocalPorted) != 14 { if len(cfg.Engines.LocalPorted) != 15 {
t.Errorf("expected 14 default engines, got %d", len(cfg.Engines.LocalPorted)) t.Errorf("expected 15 default engines, got %d", len(cfg.Engines.LocalPorted))
} }
} }

View file

@ -50,6 +50,7 @@ func NewDefaultPortedEngines(client *http.Client, cfg *config.Config) map[string
return map[string]Engine{ return map[string]Engine{
"wikipedia": &WikipediaEngine{client: client}, "wikipedia": &WikipediaEngine{client: client},
"wikidata": &WikidataEngine{client: client},
"arxiv": &ArxivEngine{client: client}, "arxiv": &ArxivEngine{client: client},
"crossref": &CrossrefEngine{client: client}, "crossref": &CrossrefEngine{client: client},
"braveapi": &BraveAPIEngine{ "braveapi": &BraveAPIEngine{

View file

@ -24,7 +24,7 @@ import (
) )
var defaultPortedEngines = []string{ var defaultPortedEngines = []string{
"wikipedia", "arxiv", "crossref", "braveapi", "wikipedia", "wikidata", "arxiv", "crossref", "braveapi",
"brave", "qwant", "duckduckgo", "github", "reddit", "brave", "qwant", "duckduckgo", "github", "reddit",
"bing", "google", "youtube", "stackoverflow", "bing", "google", "youtube", "stackoverflow",
// Image engines // Image engines
@ -106,6 +106,7 @@ func inferFromCategories(categories []string) []string {
switch strings.TrimSpace(strings.ToLower(c)) { switch strings.TrimSpace(strings.ToLower(c)) {
case "general": case "general":
set["wikipedia"] = true set["wikipedia"] = true
set["wikidata"] = true
set["braveapi"] = true set["braveapi"] = true
set["qwant"] = true set["qwant"] = true
set["duckduckgo"] = true set["duckduckgo"] = true
@ -134,9 +135,9 @@ func inferFromCategories(categories []string) []string {
} }
// stable order // stable order
order := map[string]int{ order := map[string]int{
"wikipedia": 0, "braveapi": 1, "brave": 2, "qwant": 3, "duckduckgo": 4, "bing": 5, "google": 6, "wikipedia": 0, "wikidata": 1, "braveapi": 2, "brave": 3, "qwant": 4, "duckduckgo": 5, "bing": 6, "google": 7,
"arxiv": 7, "crossref": 8, "github": 9, "stackoverflow": 10, "reddit": 11, "youtube": 12, "arxiv": 8, "crossref": 9, "github": 10, "stackoverflow": 11, "reddit": 12, "youtube": 13,
"bing_images": 13, "ddg_images": 14, "qwant_images": 15, "bing_images": 14, "ddg_images": 15, "qwant_images": 16,
} }
sortByOrder(out, order) sortByOrder(out, order)
return out return out

View file

@ -0,0 +1,133 @@
// samsa — a privacy-respecting metasearch engine
// Copyright (C) 2026-present metamorphosis-dev
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
package engines
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"github.com/metamorphosis-dev/samsa/internal/contracts"
)
// wikidataAPIBase is the Wikidata MediaWiki API endpoint (overridable in tests).
var wikidataAPIBase = "https://www.wikidata.org/w/api.php"
// WikidataEngine searches entity labels and descriptions via the Wikidata API.
// See: https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities
type WikidataEngine struct {
client *http.Client
}
func (e *WikidataEngine) Name() string { return "wikidata" }
func (e *WikidataEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("wikidata engine not initialized")
}
q := strings.TrimSpace(req.Query)
if q == "" {
return contracts.SearchResponse{Query: req.Query}, nil
}
lang := strings.TrimSpace(req.Language)
if lang == "" || lang == "auto" {
lang = "en"
}
lang = strings.SplitN(lang, "-", 2)[0]
lang = strings.ReplaceAll(lang, "_", "-")
if _, ok := validWikipediaLangs[lang]; !ok {
lang = "en"
}
u, err := url.Parse(wikidataAPIBase)
if err != nil {
return contracts.SearchResponse{}, err
}
qv := u.Query()
qv.Set("action", "wbsearchentities")
qv.Set("search", q)
qv.Set("language", lang)
qv.Set("limit", "10")
qv.Set("format", "json")
u.RawQuery = qv.Encode()
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
if err != nil {
return contracts.SearchResponse{}, err
}
httpReq.Header.Set("User-Agent", "samsa/1.0 (Wikidata search; +https://github.com/metamorphosis-dev/samsa)")
resp, err := e.client.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024))
return contracts.SearchResponse{}, fmt.Errorf("wikidata upstream error: status %d", resp.StatusCode)
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024))
if err != nil {
return contracts.SearchResponse{}, err
}
var api struct {
Search []struct {
ID string `json:"id"`
Label string `json:"label"`
Description string `json:"description"`
} `json:"search"`
}
if err := json.Unmarshal(body, &api); err != nil {
return contracts.SearchResponse{}, fmt.Errorf("wikidata JSON parse error: %w", err)
}
results := make([]contracts.MainResult, 0, len(api.Search))
for _, hit := range api.Search {
id := strings.TrimSpace(hit.ID)
if id == "" || !strings.HasPrefix(id, "Q") {
continue
}
pageURL := "https://www.wikidata.org/wiki/" + url.PathEscape(id)
title := strings.TrimSpace(hit.Label)
if title == "" {
title = id
}
content := strings.TrimSpace(hit.Description)
urlPtr := pageURL
results = append(results, contracts.MainResult{
Template: "default.html",
Title: title,
Content: content,
URL: &urlPtr,
Engine: "wikidata",
Category: "general",
Engines: []string{"wikidata"},
})
}
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: len(results),
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}

View file

@ -0,0 +1,51 @@
package engines
import (
"context"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/metamorphosis-dev/samsa/internal/contracts"
)
func TestWikidataEngine_Search(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Query().Get("action") != "wbsearchentities" {
t.Errorf("action=%q", r.URL.Query().Get("action"))
}
if got := r.URL.Query().Get("search"); got != "test" {
t.Errorf("search=%q want test", got)
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"search":[{"id":"Q937","label":"Go","description":"Programming language"}]}`))
}))
defer ts.Close()
orig := wikidataAPIBase
t.Cleanup(func() { wikidataAPIBase = orig })
wikidataAPIBase = ts.URL + "/w/api.php"
e := &WikidataEngine{client: ts.Client()}
resp, err := e.Search(context.Background(), contracts.SearchRequest{
Query: "test",
Language: "en",
})
if err != nil {
t.Fatal(err)
}
if len(resp.Results) != 1 {
t.Fatalf("expected 1 result, got %d", len(resp.Results))
}
r0 := resp.Results[0]
if r0.Engine != "wikidata" {
t.Errorf("engine=%q", r0.Engine)
}
if r0.Title != "Go" {
t.Errorf("title=%q", r0.Title)
}
if r0.URL == nil || !strings.Contains(*r0.URL, "Q937") {
t.Errorf("url=%v", r0.URL)
}
}

View file

@ -123,13 +123,13 @@ func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchReques
if resp.StatusCode == http.StatusNotFound { if resp.StatusCode == http.StatusNotFound {
return contracts.SearchResponse{ return contracts.SearchResponse{
Query: req.Query, Query: req.Query,
NumberOfResults: 0, NumberOfResults: 0,
Results: []contracts.MainResult{}, Results: []contracts.MainResult{},
Answers: []map[string]any{}, Answers: []map[string]any{},
Corrections: []string{}, Corrections: []string{},
Infoboxes: []map[string]any{}, Infoboxes: []map[string]any{},
Suggestions: []string{}, Suggestions: []string{},
UnresponsiveEngines: [][2]string{}, UnresponsiveEngines: [][2]string{},
}, nil }, nil
} }
@ -141,9 +141,13 @@ func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchReques
var api struct { var api struct {
Title string `json:"title"` Title string `json:"title"`
Description string `json:"description"` Description string `json:"description"`
Extract string `json:"extract"`
Titles struct { Titles struct {
Display string `json:"display"` Display string `json:"display"`
} `json:"titles"` } `json:"titles"`
Thumbnail struct {
Source string `json:"source"`
} `json:"thumbnail"`
ContentURLs struct { ContentURLs struct {
Desktop struct { Desktop struct {
Page string `json:"page"` Page string `json:"page"`
@ -160,7 +164,7 @@ func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchReques
// API returned a non-standard payload; treat as no result. // API returned a non-standard payload; treat as no result.
return contracts.SearchResponse{ return contracts.SearchResponse{
Query: req.Query, Query: req.Query,
NumberOfResults: 0, NumberOfResults: 0,
Results: []contracts.MainResult{}, Results: []contracts.MainResult{},
Answers: []map[string]any{}, Answers: []map[string]any{},
Corrections: []string{}, Corrections: []string{},
@ -175,36 +179,61 @@ func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchReques
title = api.Title title = api.Title
} }
content := api.Description content := strings.TrimSpace(api.Extract)
if content == "" {
content = strings.TrimSpace(api.Description)
}
urlPtr := pageURL urlPtr := pageURL
pub := (*string)(nil) pub := (*string)(nil)
// Knowledge infobox for HTML (Wikipedia REST summary: title, extract, thumbnail, link).
var infoboxes []map[string]any
ibTitle := api.Titles.Display
if ibTitle == "" {
ibTitle = api.Title
}
body := strings.TrimSpace(api.Extract)
if body == "" {
body = strings.TrimSpace(api.Description)
}
imgSrc := strings.TrimSpace(api.Thumbnail.Source)
if ibTitle != "" || body != "" || imgSrc != "" {
row := map[string]any{
"title": ibTitle,
"infobox": body,
"url": pageURL,
}
if imgSrc != "" {
row["img_src"] = imgSrc
}
infoboxes = append(infoboxes, row)
}
results := []contracts.MainResult{ results := []contracts.MainResult{
{ {
Template: "default.html", Template: "default.html",
Title: title, Title: title,
Content: content, Content: content,
URL: &urlPtr, URL: &urlPtr,
Pubdate: pub, Pubdate: pub,
Engine: "wikipedia", Engine: "wikipedia",
Score: 0, Score: 0,
Category: "general", Category: "general",
Priority: "", Priority: "",
Positions: nil, Positions: nil,
Engines: []string{"wikipedia"}, Engines: []string{"wikipedia"},
}, },
} }
return contracts.SearchResponse{ return contracts.SearchResponse{
Query: req.Query, Query: req.Query,
NumberOfResults: len(results), NumberOfResults: len(results),
Results: results, Results: results,
Answers: []map[string]any{}, Answers: []map[string]any{},
Corrections: []string{}, Corrections: []string{},
Infoboxes: []map[string]any{}, Infoboxes: infoboxes,
Suggestions: []string{}, Suggestions: []string{},
UnresponsiveEngines: [][2]string{}, UnresponsiveEngines: [][2]string{},
}, nil }, nil
} }

View file

@ -31,7 +31,7 @@ const maxQueryLength = 1024
// knownEngineNames is the allowlist of valid engine identifiers. // knownEngineNames is the allowlist of valid engine identifiers.
var knownEngineNames = map[string]bool{ var knownEngineNames = map[string]bool{
"wikipedia": true, "arxiv": true, "crossref": true, "wikipedia": true, "wikidata": true, "arxiv": true, "crossref": true,
"braveapi": true, "brave": true, "qwant": true, "braveapi": true, "brave": true, "qwant": true,
"duckduckgo": true, "github": true, "reddit": true, "duckduckgo": true, "github": true, "reddit": true,
"bing": true, "google": true, "youtube": true, "bing": true, "google": true, "youtube": true,

View file

@ -528,6 +528,7 @@
.result[data-engine="braveapi"], .engine-badge[data-engine="braveapi"] { --engine-accent: #ff6600; } .result[data-engine="braveapi"], .engine-badge[data-engine="braveapi"] { --engine-accent: #ff6600; }
.result[data-engine="qwant"], .engine-badge[data-engine="qwant"] { --engine-accent: #5c97ff; } .result[data-engine="qwant"], .engine-badge[data-engine="qwant"] { --engine-accent: #5c97ff; }
.result[data-engine="wikipedia"], .engine-badge[data-engine="wikipedia"] { --engine-accent: #a3a3a3; } .result[data-engine="wikipedia"], .engine-badge[data-engine="wikipedia"] { --engine-accent: #a3a3a3; }
.result[data-engine="wikidata"], .engine-badge[data-engine="wikidata"] { --engine-accent: #339966; }
.result[data-engine="github"], .engine-badge[data-engine="github"] { --engine-accent: #8b5cf6; } .result[data-engine="github"], .engine-badge[data-engine="github"] { --engine-accent: #8b5cf6; }
.result[data-engine="reddit"], .engine-badge[data-engine="reddit"] { --engine-accent: #ff4500; } .result[data-engine="reddit"], .engine-badge[data-engine="reddit"] { --engine-accent: #ff4500; }
.result[data-engine="youtube"], .engine-badge[data-engine="youtube"] { --engine-accent: #ff0000; } .result[data-engine="youtube"], .engine-badge[data-engine="youtube"] { --engine-accent: #ff0000; }
@ -538,6 +539,73 @@
.result[data-engine="ddg_images"], .engine-badge[data-engine="ddg_images"] { --engine-accent: #de5833; } .result[data-engine="ddg_images"], .engine-badge[data-engine="ddg_images"] { --engine-accent: #de5833; }
.result[data-engine="qwant_images"], .engine-badge[data-engine="qwant_images"] { --engine-accent: #5c97ff; } .result[data-engine="qwant_images"], .engine-badge[data-engine="qwant_images"] { --engine-accent: #5c97ff; }
/* Wikipedia / knowledge infobox */
.infobox-list {
margin-bottom: 1.25rem;
}
.infobox-card {
display: flex;
flex-direction: row;
flex-wrap: wrap;
gap: 1rem;
align-items: flex-start;
padding: 1rem 1.15rem;
background: var(--bg-secondary);
border: 1px solid var(--border);
border-radius: var(--radius-md);
box-shadow: var(--shadow-sm);
}
.infobox-image-wrap {
flex-shrink: 0;
width: 120px;
height: 120px;
border-radius: var(--radius-sm);
overflow: hidden;
background: var(--bg-tertiary);
border: 1px solid var(--border);
}
.infobox-img {
width: 100%;
height: 100%;
object-fit: cover;
display: block;
}
.infobox-main {
flex: 1;
min-width: min(100%, 220px);
}
.infobox-title {
font-size: 1.15rem;
font-weight: 600;
color: var(--text-primary);
margin-bottom: 0.5rem;
line-height: 1.3;
}
.infobox-content {
font-size: 0.9rem;
color: var(--desc-color);
line-height: 1.55;
margin-bottom: 0.65rem;
}
.infobox-link {
display: inline-block;
font-size: 0.875rem;
font-weight: 500;
color: var(--title-link);
text-decoration: none;
}
.infobox-link:hover {
text-decoration: underline;
}
.dialog-error { .dialog-error {
padding: 0.65rem 0.85rem; padding: 0.65rem 0.85rem;
margin-bottom: 0.75rem; margin-bottom: 0.75rem;

View file

@ -40,6 +40,10 @@
<input type="checkbox" name="engine" value="wikipedia" checked> <input type="checkbox" name="engine" value="wikipedia" checked>
<span>Wikipedia</span> <span>Wikipedia</span>
</label> </label>
<label class="engine-toggle">
<input type="checkbox" name="engine" value="wikidata" checked>
<span>Wikidata</span>
</label>
<label class="engine-toggle"> <label class="engine-toggle">
<input type="checkbox" name="engine" value="github"> <input type="checkbox" name="engine" value="github">
<span>GitHub</span> <span>GitHub</span>

View file

@ -3,7 +3,26 @@
<div id="corrections" class="correction">{{range .Corrections}}{{.}} {{end}}</div> <div id="corrections" class="correction">{{range .Corrections}}{{.}} {{end}}</div>
{{end}} {{end}}
{{if or .Answers .Infoboxes}} {{if .Infoboxes}}
<div class="infobox-list" role="region" aria-label="Summary">
{{range .Infoboxes}}
<aside class="infobox-card">
{{if .ImgSrc}}
<div class="infobox-image-wrap">
<img src="{{.ImgSrc}}" alt="" class="infobox-img" loading="lazy" width="120" height="120">
</div>
{{end}}
<div class="infobox-main">
{{if .Title}}<h2 class="infobox-title">{{.Title}}</h2>{{end}}
{{if .Content}}<p class="infobox-content">{{.Content}}</p>{{end}}
{{if .URL}}<a href="{{.URL}}" class="infobox-link" target="_blank" rel="noopener noreferrer">Read article on Wikipedia</a>{{end}}
</div>
</aside>
{{end}}
</div>
{{end}}
{{if .Answers}}
<div id="answers"> <div id="answers">
{{range .Answers}} {{range .Answers}}
<div class="dialog-error">{{.}}</div> <div class="dialog-error">{{.}}</div>
@ -38,7 +57,7 @@
{{end}} {{end}}
{{end}} {{end}}
{{end}} {{end}}
{{else if not .Answers}} {{else if and (not .Answers) (not .Infoboxes)}}
<div class="no-results"> <div class="no-results">
<div class="no-results-icon" aria-hidden="true">🔍</div> <div class="no-results-icon" aria-hidden="true">🔍</div>
<h2>No results found</h2> <h2>No results found</h2>

View file

@ -96,6 +96,7 @@ type InfoboxView struct {
Title string Title string
Content string Content string
ImgSrc string ImgSrc string
URL string
} }
// FilterOption represents a filter radio option for the sidebar. // FilterOption represents a filter radio option for the sidebar.
@ -273,7 +274,10 @@ func FromResponse(resp contracts.SearchResponse, query string, pageno int, activ
if v, ok := ib["img_src"].(string); ok { if v, ok := ib["img_src"].(string); ok {
iv.ImgSrc = util.SanitizeResultURL(v) iv.ImgSrc = util.SanitizeResultURL(v)
} }
if iv.Title != "" || iv.Content != "" { if v, ok := ib["url"].(string); ok {
iv.URL = util.SanitizeResultURL(v)
}
if iv.Title != "" || iv.Content != "" || iv.ImgSrc != "" {
pd.Infoboxes = append(pd.Infoboxes, iv) pd.Infoboxes = append(pd.Infoboxes, iv)
} }
} }