// samsa — a privacy-respecting metasearch engine // Copyright (C) 2026-present metamorphosis-dev // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. package engines import ( "context" "encoding/json" "errors" "fmt" "io" "net/http" "net/url" "strings" "github.com/metamorphosis-dev/samsa/internal/contracts" ) // wikidataAPIBase is the Wikidata MediaWiki API endpoint (overridable in tests). var wikidataAPIBase = "https://www.wikidata.org/w/api.php" // WikidataEngine searches entity labels and descriptions via the Wikidata API. // See: https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities type WikidataEngine struct { client *http.Client } func (e *WikidataEngine) Name() string { return "wikidata" } func (e *WikidataEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { if e == nil || e.client == nil { return contracts.SearchResponse{}, errors.New("wikidata engine not initialized") } q := strings.TrimSpace(req.Query) if q == "" { return contracts.SearchResponse{Query: req.Query}, nil } lang := strings.TrimSpace(req.Language) if lang == "" || lang == "auto" { lang = "en" } lang = strings.SplitN(lang, "-", 2)[0] lang = strings.ReplaceAll(lang, "_", "-") if _, ok := validWikipediaLangs[lang]; !ok { lang = "en" } u, err := url.Parse(wikidataAPIBase) if err != nil { return contracts.SearchResponse{}, err } qv := u.Query() qv.Set("action", "wbsearchentities") qv.Set("search", q) qv.Set("language", lang) qv.Set("limit", "10") qv.Set("format", "json") u.RawQuery = qv.Encode() httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil) if err != nil { return contracts.SearchResponse{}, err } httpReq.Header.Set("User-Agent", "samsa/1.0 (Wikidata search; +https://github.com/metamorphosis-dev/samsa)") resp, err := e.client.Do(httpReq) if err != nil { return contracts.SearchResponse{}, err } defer resp.Body.Close() if resp.StatusCode < 200 || resp.StatusCode >= 300 { io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024)) return contracts.SearchResponse{}, fmt.Errorf("wikidata upstream error: status %d", resp.StatusCode) } body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024)) if err != nil { return contracts.SearchResponse{}, err } var api struct { Search []struct { ID string `json:"id"` Label string `json:"label"` Description string `json:"description"` } `json:"search"` } if err := json.Unmarshal(body, &api); err != nil { return contracts.SearchResponse{}, fmt.Errorf("wikidata JSON parse error: %w", err) } results := make([]contracts.MainResult, 0, len(api.Search)) for _, hit := range api.Search { id := strings.TrimSpace(hit.ID) if id == "" || !strings.HasPrefix(id, "Q") { continue } pageURL := "https://www.wikidata.org/wiki/" + url.PathEscape(id) title := strings.TrimSpace(hit.Label) if title == "" { title = id } content := strings.TrimSpace(hit.Description) urlPtr := pageURL results = append(results, contracts.MainResult{ Template: "default.html", Title: title, Content: content, URL: &urlPtr, Engine: "wikidata", Category: "general", Engines: []string{"wikidata"}, }) } return contracts.SearchResponse{ Query: req.Query, NumberOfResults: len(results), Results: results, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, UnresponsiveEngines: [][2]string{}, }, nil }