package engines import ( "context" "encoding/json" "errors" "fmt" "io" "net/http" "net/url" "strings" "github.com/metamorphosis-dev/kafka/internal/contracts" ) type WikipediaEngine struct { client *http.Client } func (e *WikipediaEngine) Name() string { return "wikipedia" } func (e *WikipediaEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { if e == nil || e.client == nil { return contracts.SearchResponse{}, errors.New("wikipedia engine not initialized") } if strings.TrimSpace(req.Query) == "" { return contracts.SearchResponse{Query: req.Query}, nil } lang := strings.TrimSpace(req.Language) if lang == "" || lang == "auto" { lang = "en" } // Wikipedia subdomains are based on the language code; keep it simple for MVP. lang = strings.SplitN(lang, "-", 2)[0] lang = strings.ReplaceAll(lang, "_", "-") wikiNetloc := fmt.Sprintf("%s.wikipedia.org", lang) endpoint := fmt.Sprintf( "https://%s/api/rest_v1/page/summary/%s", wikiNetloc, url.PathEscape(req.Query), ) httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) if err != nil { return contracts.SearchResponse{}, err } // Wikimedia APIs require a descriptive User-Agent. httpReq.Header.Set( "User-Agent", "gosearch-go/0.1 (compatible; +https://github.com/metamorphosis-dev/kafka)", ) // Best-effort: hint content language. if req.Language != "" && req.Language != "auto" { httpReq.Header.Set("Accept-Language", req.Language) } resp, err := e.client.Do(httpReq) if err != nil { return contracts.SearchResponse{}, err } defer resp.Body.Close() if resp.StatusCode == http.StatusNotFound { return contracts.SearchResponse{ Query: req.Query, NumberOfResults: 0, Results: []contracts.MainResult{}, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, UnresponsiveEngines: [][2]string{}, }, nil } if resp.StatusCode < 200 || resp.StatusCode >= 300 { body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024)) return contracts.SearchResponse{}, fmt.Errorf("wikipedia upstream error: status=%d body=%q", resp.StatusCode, string(body)) } var api struct { Title string `json:"title"` Description string `json:"description"` Titles struct { Display string `json:"display"` } `json:"titles"` ContentURLs struct { Desktop struct { Page string `json:"page"` } `json:"desktop"` } `json:"content_urls"` } if err := json.NewDecoder(resp.Body).Decode(&api); err != nil { return contracts.SearchResponse{}, err } pageURL := api.ContentURLs.Desktop.Page if pageURL == "" { // API returned a non-standard payload; treat as no result. return contracts.SearchResponse{ Query: req.Query, NumberOfResults: 0, Results: []contracts.MainResult{}, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, UnresponsiveEngines: [][2]string{}, }, nil } title := api.Titles.Display if title == "" { title = api.Title } content := api.Description urlPtr := pageURL pub := (*string)(nil) results := []contracts.MainResult{ { Template: "default.html", Title: title, Content: content, URL: &urlPtr, Pubdate: pub, Engine: "wikipedia", Score: 0, Category: "general", Priority: "", Positions: nil, Engines: []string{"wikipedia"}, }, } return contracts.SearchResponse{ Query: req.Query, NumberOfResults: len(results), Results: results, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, UnresponsiveEngines: [][2]string{}, }, nil }