// kafka — a privacy-respecting metasearch engine // Copyright (C) 2026-present metamorphosis-dev // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . package engines import ( "context" "encoding/json" "errors" "fmt" "io" "net/http" "net/url" "strings" "time" "github.com/metamorphosis-dev/kafka/internal/contracts" ) const stackOverflowAPIBase = "https://api.stackexchange.com/2.3" // StackOverflowEngine searches Stack Overflow via the public API. // No API key is required, but providing one via STACKOVERFLOW_KEY env var // or config raises the rate limit from 300 to 10,000 requests/day. type StackOverflowEngine struct { client *http.Client apiKey string } func (e *StackOverflowEngine) Name() string { return "stackoverflow" } func (e *StackOverflowEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { if e == nil || e.client == nil { return contracts.SearchResponse{}, errors.New("stackoverflow engine not initialized") } q := strings.TrimSpace(req.Query) if q == "" { return contracts.SearchResponse{Query: req.Query}, nil } page := req.Pageno if page < 1 { page = 1 } args := url.Values{} args.Set("order", "desc") args.Set("sort", "relevance") args.Set("site", "stackoverflow") args.Set("page", fmt.Sprintf("%d", page)) args.Set("pagesize", "20") args.Set("filter", "!9_bDDxJY5") if e.apiKey != "" { args.Set("key", e.apiKey) } endpoint := stackOverflowAPIBase + "/search/advanced?" + args.Encode() + "&q=" + url.QueryEscape(q) httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) if err != nil { return contracts.SearchResponse{}, err } httpReq.Header.Set("User-Agent", "kafka/0.1 (compatible; +https://git.ashisgreat.xyz/penal-colony/kafka)") httpReq.Header.Set("Accept", "application/json") resp, err := e.client.Do(httpReq) if err != nil { return contracts.SearchResponse{}, err } defer resp.Body.Close() if resp.StatusCode == http.StatusTooManyRequests { return contracts.SearchResponse{ Query: req.Query, UnresponsiveEngines: [][2]string{{"stackoverflow", "rate_limited"}}, Results: []contracts.MainResult{}, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, }, nil } if resp.StatusCode != http.StatusOK { io.Copy(io.Discard, io.LimitReader(resp.Body, 4*1024)) return contracts.SearchResponse{}, fmt.Errorf("stackoverflow upstream error: status %d", resp.StatusCode) } body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024)) if err != nil { return contracts.SearchResponse{}, err } return parseStackOverflow(body, req.Query) } // soQuestion represents a question item from the Stack Exchange API. type soQuestion struct { QuestionID int `json:"question_id"` Title string `json:"title"` Link string `json:"link"` Body string `json:"body"` Score int `json:"score"` AnswerCount int `json:"answer_count"` ViewCount int `json:"view_count"` Tags []string `json:"tags"` CreationDate float64 `json:"creation_date"` Owner *soOwner `json:"owner"` AcceptedAnswerID *int `json:"accepted_answer_id"` IsAnswered bool `json:"is_answered"` } type soOwner struct { Reputation int `json:"reputation"` DisplayName string `json:"display_name"` } type soResponse struct { Items []soQuestion `json:"items"` HasMore bool `json:"has_more"` QuotaRemaining int `json:"quota_remaining"` QuotaMax int `json:"quota_max"` } func parseStackOverflow(body []byte, query string) (contracts.SearchResponse, error) { var resp soResponse if err := json.Unmarshal(body, &resp); err != nil { return contracts.SearchResponse{}, fmt.Errorf("stackoverflow JSON parse error: %w", err) } results := make([]contracts.MainResult, 0, len(resp.Items)) for _, q := range resp.Items { if q.Link == "" { continue } // Strip HTML from the body excerpt. snippet := truncate(stripHTML(q.Body), 300) // Build a content string with useful metadata. content := snippet if q.Score > 0 { content = fmt.Sprintf("Score: %d", q.Score) if q.AnswerCount > 0 { content += fmt.Sprintf(" · %d answers", q.AnswerCount) } if q.ViewCount > 0 { content += fmt.Sprintf(" · %s views", formatCount(q.ViewCount)) } if snippet != "" { content += "\n" + snippet } } // Append tags as category hint. if len(q.Tags) > 0 { displayTags := q.Tags if len(displayTags) > 5 { displayTags = displayTags[:5] } content += "\n[" + strings.Join(displayTags, "] [") + "]" } linkPtr := q.Link results = append(results, contracts.MainResult{ Template: "default", Title: q.Title, Content: content, URL: &linkPtr, Engine: "stackoverflow", Score: float64(q.Score), Category: "it", Engines: []string{"stackoverflow"}, }) } return contracts.SearchResponse{ Query: query, NumberOfResults: len(results), Results: results, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, UnresponsiveEngines: [][2]string{}, }, nil } // formatCount formats large numbers compactly (1.2k, 3.4M). func formatCount(n int) string { if n >= 1_000_000 { return fmt.Sprintf("%.1fM", float64(n)/1_000_000) } if n >= 1_000 { return fmt.Sprintf("%.1fk", float64(n)/1_000) } return fmt.Sprintf("%d", n) } // truncate cuts a string to at most maxLen characters, appending "…" if truncated. func truncate(s string, maxLen int) string { if len(s) <= maxLen { return s } return s[:maxLen] + "…" } // stackOverflowCreatedAt returns a time.Time from a Unix timestamp. // Kept as a helper for potential future pubdate use. func stackOverflowCreatedAt(unix float64) *string { t := time.Unix(int64(unix), 0).UTC() s := t.Format("2006-01-02") return &s }