kafka/internal/engines/wikipedia.go
Franz Kafka 7be03b4017 license: change from MIT to AGPLv3
Update LICENSE file and add AGPL header to all source files.

AGPLv3 ensures that if someone runs Kafka as a network service and
modifies it, they must release their source code under the same license.
2026-03-22 08:27:23 +00:00

167 lines
4.6 KiB
Go

// kafka — a privacy-respecting metasearch engine
// Copyright (C) 2026-present metamorphosis-dev
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package engines
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"github.com/metamorphosis-dev/kafka/internal/contracts"
)
type WikipediaEngine struct {
client *http.Client
}
func (e *WikipediaEngine) Name() string { return "wikipedia" }
func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("wikipedia engine not initialized")
}
if strings.TrimSpace(req.Query) == "" {
return contracts.SearchResponse{Query: req.Query}, nil
}
lang := strings.TrimSpace(req.Language)
if lang == "" || lang == "auto" {
lang = "en"
}
// Wikipedia subdomains are based on the language code; keep it simple for MVP.
lang = strings.SplitN(lang, "-", 2)[0]
lang = strings.ReplaceAll(lang, "_", "-")
wikiNetloc := fmt.Sprintf("%s.wikipedia.org", lang)
endpoint := fmt.Sprintf(
"https://%s/api/rest_v1/page/summary/%s",
wikiNetloc,
url.PathEscape(req.Query),
)
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return contracts.SearchResponse{}, err
}
// Wikimedia APIs require a descriptive User-Agent.
httpReq.Header.Set(
"User-Agent",
"gosearch-go/0.1 (compatible; +https://github.com/metamorphosis-dev/kafka)",
)
// Best-effort: hint content language.
if req.Language != "" && req.Language != "auto" {
httpReq.Header.Set("Accept-Language", req.Language)
}
resp, err := e.client.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusNotFound {
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: 0,
Results: []contracts.MainResult{},
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
return contracts.SearchResponse{}, fmt.Errorf("wikipedia upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
var api struct {
Title string `json:"title"`
Description string `json:"description"`
Titles struct {
Display string `json:"display"`
} `json:"titles"`
ContentURLs struct {
Desktop struct {
Page string `json:"page"`
} `json:"desktop"`
} `json:"content_urls"`
}
if err := json.NewDecoder(resp.Body).Decode(&api); err != nil {
return contracts.SearchResponse{}, err
}
pageURL := api.ContentURLs.Desktop.Page
if pageURL == "" {
// API returned a non-standard payload; treat as no result.
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: 0,
Results: []contracts.MainResult{},
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
title := api.Titles.Display
if title == "" {
title = api.Title
}
content := api.Description
urlPtr := pageURL
pub := (*string)(nil)
results := []contracts.MainResult{
{
Template: "default.html",
Title: title,
Content: content,
URL: &urlPtr,
Pubdate: pub,
Engine: "wikipedia",
Score: 0,
Category: "general",
Priority: "",
Positions: nil,
Engines: []string{"wikipedia"},
},
}
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: len(results),
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}