diff --git a/config.example.toml b/config.example.toml index 4fcd931..6954440 100644 --- a/config.example.toml +++ b/config.example.toml @@ -28,7 +28,7 @@ url = "" # Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES) # Engines not listed here will be proxied to the upstream instance. # Include bing_images, ddg_images, qwant_images for image search when [upstream].url is empty. -local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube", "bing_images", "ddg_images", "qwant_images"] +local_ported = ["wikipedia", "wikidata", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube", "bing_images", "ddg_images", "qwant_images"] [engines.brave] # Brave Search API key (env: BRAVE_API_KEY) diff --git a/internal/config/config.go b/internal/config/config.go index 2318b6c..6158af7 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -165,7 +165,7 @@ func defaultConfig() *Config { }, Upstream: UpstreamConfig{}, Engines: EnginesConfig{ - LocalPorted: []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube", "bing_images", "ddg_images", "qwant_images"}, + LocalPorted: []string{"wikipedia", "wikidata", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube", "bing_images", "ddg_images", "qwant_images"}, Qwant: QwantConfig{ Category: "web-lite", ResultsPerPage: 10, diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 1bf1b47..993f466 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -14,8 +14,8 @@ func TestLoadDefaults(t *testing.T) { if cfg.Server.Port != 5355 { t.Errorf("expected default port 5355, got %d", cfg.Server.Port) } - if len(cfg.Engines.LocalPorted) != 14 { - t.Errorf("expected 14 default engines, got %d", len(cfg.Engines.LocalPorted)) + if len(cfg.Engines.LocalPorted) != 15 { + t.Errorf("expected 15 default engines, got %d", len(cfg.Engines.LocalPorted)) } } diff --git a/internal/engines/factory.go b/internal/engines/factory.go index f91cf1b..38d14d1 100644 --- a/internal/engines/factory.go +++ b/internal/engines/factory.go @@ -50,6 +50,7 @@ func NewDefaultPortedEngines(client *http.Client, cfg *config.Config) map[string return map[string]Engine{ "wikipedia": &WikipediaEngine{client: client}, + "wikidata": &WikidataEngine{client: client}, "arxiv": &ArxivEngine{client: client}, "crossref": &CrossrefEngine{client: client}, "braveapi": &BraveAPIEngine{ diff --git a/internal/engines/planner.go b/internal/engines/planner.go index 598ee01..54a1827 100644 --- a/internal/engines/planner.go +++ b/internal/engines/planner.go @@ -24,7 +24,7 @@ import ( ) var defaultPortedEngines = []string{ - "wikipedia", "arxiv", "crossref", "braveapi", + "wikipedia", "wikidata", "arxiv", "crossref", "braveapi", "brave", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube", "stackoverflow", // Image engines @@ -106,6 +106,7 @@ func inferFromCategories(categories []string) []string { switch strings.TrimSpace(strings.ToLower(c)) { case "general": set["wikipedia"] = true + set["wikidata"] = true set["braveapi"] = true set["qwant"] = true set["duckduckgo"] = true @@ -134,9 +135,9 @@ func inferFromCategories(categories []string) []string { } // stable order order := map[string]int{ - "wikipedia": 0, "braveapi": 1, "brave": 2, "qwant": 3, "duckduckgo": 4, "bing": 5, "google": 6, - "arxiv": 7, "crossref": 8, "github": 9, "stackoverflow": 10, "reddit": 11, "youtube": 12, - "bing_images": 13, "ddg_images": 14, "qwant_images": 15, + "wikipedia": 0, "wikidata": 1, "braveapi": 2, "brave": 3, "qwant": 4, "duckduckgo": 5, "bing": 6, "google": 7, + "arxiv": 8, "crossref": 9, "github": 10, "stackoverflow": 11, "reddit": 12, "youtube": 13, + "bing_images": 14, "ddg_images": 15, "qwant_images": 16, } sortByOrder(out, order) return out diff --git a/internal/engines/wikidata.go b/internal/engines/wikidata.go new file mode 100644 index 0000000..2956341 --- /dev/null +++ b/internal/engines/wikidata.go @@ -0,0 +1,133 @@ +// samsa — a privacy-respecting metasearch engine +// Copyright (C) 2026-present metamorphosis-dev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +package engines + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + + "github.com/metamorphosis-dev/samsa/internal/contracts" +) + +// wikidataAPIBase is the Wikidata MediaWiki API endpoint (overridable in tests). +var wikidataAPIBase = "https://www.wikidata.org/w/api.php" + +// WikidataEngine searches entity labels and descriptions via the Wikidata API. +// See: https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities +type WikidataEngine struct { + client *http.Client +} + +func (e *WikidataEngine) Name() string { return "wikidata" } + +func (e *WikidataEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { + if e == nil || e.client == nil { + return contracts.SearchResponse{}, errors.New("wikidata engine not initialized") + } + q := strings.TrimSpace(req.Query) + if q == "" { + return contracts.SearchResponse{Query: req.Query}, nil + } + + lang := strings.TrimSpace(req.Language) + if lang == "" || lang == "auto" { + lang = "en" + } + lang = strings.SplitN(lang, "-", 2)[0] + lang = strings.ReplaceAll(lang, "_", "-") + if _, ok := validWikipediaLangs[lang]; !ok { + lang = "en" + } + + u, err := url.Parse(wikidataAPIBase) + if err != nil { + return contracts.SearchResponse{}, err + } + qv := u.Query() + qv.Set("action", "wbsearchentities") + qv.Set("search", q) + qv.Set("language", lang) + qv.Set("limit", "10") + qv.Set("format", "json") + u.RawQuery = qv.Encode() + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil) + if err != nil { + return contracts.SearchResponse{}, err + } + httpReq.Header.Set("User-Agent", "samsa/1.0 (Wikidata search; +https://github.com/metamorphosis-dev/samsa)") + + resp, err := e.client.Do(httpReq) + if err != nil { + return contracts.SearchResponse{}, err + } + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024)) + return contracts.SearchResponse{}, fmt.Errorf("wikidata upstream error: status %d", resp.StatusCode) + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024)) + if err != nil { + return contracts.SearchResponse{}, err + } + + var api struct { + Search []struct { + ID string `json:"id"` + Label string `json:"label"` + Description string `json:"description"` + } `json:"search"` + } + if err := json.Unmarshal(body, &api); err != nil { + return contracts.SearchResponse{}, fmt.Errorf("wikidata JSON parse error: %w", err) + } + + results := make([]contracts.MainResult, 0, len(api.Search)) + for _, hit := range api.Search { + id := strings.TrimSpace(hit.ID) + if id == "" || !strings.HasPrefix(id, "Q") { + continue + } + pageURL := "https://www.wikidata.org/wiki/" + url.PathEscape(id) + title := strings.TrimSpace(hit.Label) + if title == "" { + title = id + } + content := strings.TrimSpace(hit.Description) + urlPtr := pageURL + results = append(results, contracts.MainResult{ + Template: "default.html", + Title: title, + Content: content, + URL: &urlPtr, + Engine: "wikidata", + Category: "general", + Engines: []string{"wikidata"}, + }) + } + + return contracts.SearchResponse{ + Query: req.Query, + NumberOfResults: len(results), + Results: results, + Answers: []map[string]any{}, + Corrections: []string{}, + Infoboxes: []map[string]any{}, + Suggestions: []string{}, + UnresponsiveEngines: [][2]string{}, + }, nil +} diff --git a/internal/engines/wikidata_test.go b/internal/engines/wikidata_test.go new file mode 100644 index 0000000..ff2ea45 --- /dev/null +++ b/internal/engines/wikidata_test.go @@ -0,0 +1,51 @@ +package engines + +import ( + "context" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/metamorphosis-dev/samsa/internal/contracts" +) + +func TestWikidataEngine_Search(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Query().Get("action") != "wbsearchentities" { + t.Errorf("action=%q", r.URL.Query().Get("action")) + } + if got := r.URL.Query().Get("search"); got != "test" { + t.Errorf("search=%q want test", got) + } + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"search":[{"id":"Q937","label":"Go","description":"Programming language"}]}`)) + })) + defer ts.Close() + + orig := wikidataAPIBase + t.Cleanup(func() { wikidataAPIBase = orig }) + wikidataAPIBase = ts.URL + "/w/api.php" + + e := &WikidataEngine{client: ts.Client()} + resp, err := e.Search(context.Background(), contracts.SearchRequest{ + Query: "test", + Language: "en", + }) + if err != nil { + t.Fatal(err) + } + if len(resp.Results) != 1 { + t.Fatalf("expected 1 result, got %d", len(resp.Results)) + } + r0 := resp.Results[0] + if r0.Engine != "wikidata" { + t.Errorf("engine=%q", r0.Engine) + } + if r0.Title != "Go" { + t.Errorf("title=%q", r0.Title) + } + if r0.URL == nil || !strings.Contains(*r0.URL, "Q937") { + t.Errorf("url=%v", r0.URL) + } +} diff --git a/internal/engines/wikipedia.go b/internal/engines/wikipedia.go index 302a49d..44065f3 100644 --- a/internal/engines/wikipedia.go +++ b/internal/engines/wikipedia.go @@ -123,13 +123,13 @@ func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchReques if resp.StatusCode == http.StatusNotFound { return contracts.SearchResponse{ - Query: req.Query, - NumberOfResults: 0, - Results: []contracts.MainResult{}, - Answers: []map[string]any{}, - Corrections: []string{}, - Infoboxes: []map[string]any{}, - Suggestions: []string{}, + Query: req.Query, + NumberOfResults: 0, + Results: []contracts.MainResult{}, + Answers: []map[string]any{}, + Corrections: []string{}, + Infoboxes: []map[string]any{}, + Suggestions: []string{}, UnresponsiveEngines: [][2]string{}, }, nil } @@ -141,9 +141,13 @@ func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchReques var api struct { Title string `json:"title"` Description string `json:"description"` + Extract string `json:"extract"` Titles struct { Display string `json:"display"` } `json:"titles"` + Thumbnail struct { + Source string `json:"source"` + } `json:"thumbnail"` ContentURLs struct { Desktop struct { Page string `json:"page"` @@ -160,7 +164,7 @@ func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchReques // API returned a non-standard payload; treat as no result. return contracts.SearchResponse{ Query: req.Query, - NumberOfResults: 0, + NumberOfResults: 0, Results: []contracts.MainResult{}, Answers: []map[string]any{}, Corrections: []string{}, @@ -175,36 +179,61 @@ func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchReques title = api.Title } - content := api.Description + content := strings.TrimSpace(api.Extract) + if content == "" { + content = strings.TrimSpace(api.Description) + } urlPtr := pageURL pub := (*string)(nil) + // Knowledge infobox for HTML (Wikipedia REST summary: title, extract, thumbnail, link). + var infoboxes []map[string]any + ibTitle := api.Titles.Display + if ibTitle == "" { + ibTitle = api.Title + } + body := strings.TrimSpace(api.Extract) + if body == "" { + body = strings.TrimSpace(api.Description) + } + imgSrc := strings.TrimSpace(api.Thumbnail.Source) + if ibTitle != "" || body != "" || imgSrc != "" { + row := map[string]any{ + "title": ibTitle, + "infobox": body, + "url": pageURL, + } + if imgSrc != "" { + row["img_src"] = imgSrc + } + infoboxes = append(infoboxes, row) + } + results := []contracts.MainResult{ { - Template: "default.html", - Title: title, - Content: content, - URL: &urlPtr, - Pubdate: pub, - Engine: "wikipedia", - Score: 0, - Category: "general", - Priority: "", + Template: "default.html", + Title: title, + Content: content, + URL: &urlPtr, + Pubdate: pub, + Engine: "wikipedia", + Score: 0, + Category: "general", + Priority: "", Positions: nil, - Engines: []string{"wikipedia"}, + Engines: []string{"wikipedia"}, }, } return contracts.SearchResponse{ Query: req.Query, - NumberOfResults: len(results), + NumberOfResults: len(results), Results: results, Answers: []map[string]any{}, Corrections: []string{}, - Infoboxes: []map[string]any{}, + Infoboxes: infoboxes, Suggestions: []string{}, UnresponsiveEngines: [][2]string{}, }, nil } - diff --git a/internal/search/request_params.go b/internal/search/request_params.go index 1e4ac84..59dc84a 100644 --- a/internal/search/request_params.go +++ b/internal/search/request_params.go @@ -31,7 +31,7 @@ const maxQueryLength = 1024 // knownEngineNames is the allowlist of valid engine identifiers. var knownEngineNames = map[string]bool{ - "wikipedia": true, "arxiv": true, "crossref": true, + "wikipedia": true, "wikidata": true, "arxiv": true, "crossref": true, "braveapi": true, "brave": true, "qwant": true, "duckduckgo": true, "github": true, "reddit": true, "bing": true, "google": true, "youtube": true, diff --git a/internal/views/templates/base.html b/internal/views/templates/base.html index f75f358..d6f8153 100644 --- a/internal/views/templates/base.html +++ b/internal/views/templates/base.html @@ -528,6 +528,7 @@ .result[data-engine="braveapi"], .engine-badge[data-engine="braveapi"] { --engine-accent: #ff6600; } .result[data-engine="qwant"], .engine-badge[data-engine="qwant"] { --engine-accent: #5c97ff; } .result[data-engine="wikipedia"], .engine-badge[data-engine="wikipedia"] { --engine-accent: #a3a3a3; } + .result[data-engine="wikidata"], .engine-badge[data-engine="wikidata"] { --engine-accent: #339966; } .result[data-engine="github"], .engine-badge[data-engine="github"] { --engine-accent: #8b5cf6; } .result[data-engine="reddit"], .engine-badge[data-engine="reddit"] { --engine-accent: #ff4500; } .result[data-engine="youtube"], .engine-badge[data-engine="youtube"] { --engine-accent: #ff0000; } @@ -538,6 +539,73 @@ .result[data-engine="ddg_images"], .engine-badge[data-engine="ddg_images"] { --engine-accent: #de5833; } .result[data-engine="qwant_images"], .engine-badge[data-engine="qwant_images"] { --engine-accent: #5c97ff; } + /* Wikipedia / knowledge infobox */ + .infobox-list { + margin-bottom: 1.25rem; + } + + .infobox-card { + display: flex; + flex-direction: row; + flex-wrap: wrap; + gap: 1rem; + align-items: flex-start; + padding: 1rem 1.15rem; + background: var(--bg-secondary); + border: 1px solid var(--border); + border-radius: var(--radius-md); + box-shadow: var(--shadow-sm); + } + + .infobox-image-wrap { + flex-shrink: 0; + width: 120px; + height: 120px; + border-radius: var(--radius-sm); + overflow: hidden; + background: var(--bg-tertiary); + border: 1px solid var(--border); + } + + .infobox-img { + width: 100%; + height: 100%; + object-fit: cover; + display: block; + } + + .infobox-main { + flex: 1; + min-width: min(100%, 220px); + } + + .infobox-title { + font-size: 1.15rem; + font-weight: 600; + color: var(--text-primary); + margin-bottom: 0.5rem; + line-height: 1.3; + } + + .infobox-content { + font-size: 0.9rem; + color: var(--desc-color); + line-height: 1.55; + margin-bottom: 0.65rem; + } + + .infobox-link { + display: inline-block; + font-size: 0.875rem; + font-weight: 500; + color: var(--title-link); + text-decoration: none; + } + + .infobox-link:hover { + text-decoration: underline; + } + .dialog-error { padding: 0.65rem 0.85rem; margin-bottom: 0.75rem; diff --git a/internal/views/templates/preferences.html b/internal/views/templates/preferences.html index cdb5de8..1fbbd48 100644 --- a/internal/views/templates/preferences.html +++ b/internal/views/templates/preferences.html @@ -40,6 +40,10 @@ Wikipedia +