feat: add Stack Overflow search engine
Uses the Stack Exchange API v3 (/search/advanced) to find questions sorted by relevance. No API key required (300 req/day); optionally configure via STACKOVERFLOW_KEY env var or [engines.stackoverflow]. Results include score, answer count, view count, and tags in the snippet. Assigned to the 'it' category, triggered by the IT category tab or explicit engine selection. 6 tests covering parsing, edge cases, and helpers.
This commit is contained in:
parent
e96040ef35
commit
df67492602
5 changed files with 440 additions and 7 deletions
226
internal/engines/stackoverflow.go
Normal file
226
internal/engines/stackoverflow.go
Normal file
|
|
@ -0,0 +1,226 @@
|
|||
// kafka — a privacy-respecting metasearch engine
|
||||
// Copyright (C) 2026-present metamorphosis-dev
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package engines
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/metamorphosis-dev/kafka/internal/contracts"
|
||||
)
|
||||
|
||||
const stackOverflowAPIBase = "https://api.stackexchange.com/2.3"
|
||||
|
||||
// StackOverflowEngine searches Stack Overflow via the public API.
|
||||
// No API key is required, but providing one via STACKOVERFLOW_KEY env var
|
||||
// or config raises the rate limit from 300 to 10,000 requests/day.
|
||||
type StackOverflowEngine struct {
|
||||
client *http.Client
|
||||
apiKey string
|
||||
}
|
||||
|
||||
func (e *StackOverflowEngine) Name() string { return "stackoverflow" }
|
||||
|
||||
func (e *StackOverflowEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||
if e == nil || e.client == nil {
|
||||
return contracts.SearchResponse{}, errors.New("stackoverflow engine not initialized")
|
||||
}
|
||||
q := strings.TrimSpace(req.Query)
|
||||
if q == "" {
|
||||
return contracts.SearchResponse{Query: req.Query}, nil
|
||||
}
|
||||
|
||||
page := req.Pageno
|
||||
if page < 1 {
|
||||
page = 1
|
||||
}
|
||||
args := url.Values{}
|
||||
args.Set("order", "desc")
|
||||
args.Set("sort", "relevance")
|
||||
args.Set("site", "stackoverflow")
|
||||
args.Set("page", fmt.Sprintf("%d", page))
|
||||
args.Set("pagesize", "20")
|
||||
args.Set("filter", "!9_bDDxJY5")
|
||||
if e.apiKey != "" {
|
||||
args.Set("key", e.apiKey)
|
||||
}
|
||||
|
||||
endpoint := stackOverflowAPIBase + "/search/advanced?" + args.Encode() + "&q=" + url.QueryEscape(q)
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
httpReq.Header.Set("User-Agent", "kafka/0.1 (compatible; +https://git.ashisgreat.xyz/penal-colony/kafka)")
|
||||
httpReq.Header.Set("Accept", "application/json")
|
||||
|
||||
resp, err := e.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode == http.StatusTooManyRequests {
|
||||
return contracts.SearchResponse{
|
||||
Query: req.Query,
|
||||
UnresponsiveEngines: [][2]string{{"stackoverflow", "rate_limited"}},
|
||||
Results: []contracts.MainResult{},
|
||||
Answers: []map[string]any{},
|
||||
Corrections: []string{},
|
||||
Infoboxes: []map[string]any{},
|
||||
Suggestions: []string{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
io.Copy(io.Discard, io.LimitReader(resp.Body, 4*1024))
|
||||
return contracts.SearchResponse{}, fmt.Errorf("stackoverflow upstream error: status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024))
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
|
||||
return parseStackOverflow(body, req.Query)
|
||||
}
|
||||
|
||||
// soQuestion represents a question item from the Stack Exchange API.
|
||||
type soQuestion struct {
|
||||
QuestionID int `json:"question_id"`
|
||||
Title string `json:"title"`
|
||||
Link string `json:"link"`
|
||||
Body string `json:"body"`
|
||||
Score int `json:"score"`
|
||||
AnswerCount int `json:"answer_count"`
|
||||
ViewCount int `json:"view_count"`
|
||||
Tags []string `json:"tags"`
|
||||
CreationDate float64 `json:"creation_date"`
|
||||
Owner *soOwner `json:"owner"`
|
||||
AcceptedAnswerID *int `json:"accepted_answer_id"`
|
||||
IsAnswered bool `json:"is_answered"`
|
||||
}
|
||||
|
||||
type soOwner struct {
|
||||
Reputation int `json:"reputation"`
|
||||
DisplayName string `json:"display_name"`
|
||||
}
|
||||
|
||||
type soResponse struct {
|
||||
Items []soQuestion `json:"items"`
|
||||
HasMore bool `json:"has_more"`
|
||||
QuotaRemaining int `json:"quota_remaining"`
|
||||
QuotaMax int `json:"quota_max"`
|
||||
}
|
||||
|
||||
func parseStackOverflow(body []byte, query string) (contracts.SearchResponse, error) {
|
||||
var resp soResponse
|
||||
if err := json.Unmarshal(body, &resp); err != nil {
|
||||
return contracts.SearchResponse{}, fmt.Errorf("stackoverflow JSON parse error: %w", err)
|
||||
}
|
||||
|
||||
results := make([]contracts.MainResult, 0, len(resp.Items))
|
||||
for _, q := range resp.Items {
|
||||
if q.Link == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Strip HTML from the body excerpt.
|
||||
snippet := truncate(stripHTML(q.Body), 300)
|
||||
|
||||
// Build a content string with useful metadata.
|
||||
content := snippet
|
||||
if q.Score > 0 {
|
||||
content = fmt.Sprintf("Score: %d", q.Score)
|
||||
if q.AnswerCount > 0 {
|
||||
content += fmt.Sprintf(" · %d answers", q.AnswerCount)
|
||||
}
|
||||
if q.ViewCount > 0 {
|
||||
content += fmt.Sprintf(" · %s views", formatCount(q.ViewCount))
|
||||
}
|
||||
if snippet != "" {
|
||||
content += "\n" + snippet
|
||||
}
|
||||
}
|
||||
|
||||
// Append tags as category hint.
|
||||
if len(q.Tags) > 0 {
|
||||
displayTags := q.Tags
|
||||
if len(displayTags) > 5 {
|
||||
displayTags = displayTags[:5]
|
||||
}
|
||||
content += "\n[" + strings.Join(displayTags, "] [") + "]"
|
||||
}
|
||||
|
||||
linkPtr := q.Link
|
||||
results = append(results, contracts.MainResult{
|
||||
Template: "default",
|
||||
Title: q.Title,
|
||||
Content: content,
|
||||
URL: &linkPtr,
|
||||
Engine: "stackoverflow",
|
||||
Score: float64(q.Score),
|
||||
Category: "it",
|
||||
Engines: []string{"stackoverflow"},
|
||||
})
|
||||
}
|
||||
|
||||
return contracts.SearchResponse{
|
||||
Query: query,
|
||||
NumberOfResults: len(results),
|
||||
Results: results,
|
||||
Answers: []map[string]any{},
|
||||
Corrections: []string{},
|
||||
Infoboxes: []map[string]any{},
|
||||
Suggestions: []string{},
|
||||
UnresponsiveEngines: [][2]string{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// formatCount formats large numbers compactly (1.2k, 3.4M).
|
||||
func formatCount(n int) string {
|
||||
if n >= 1_000_000 {
|
||||
return fmt.Sprintf("%.1fM", float64(n)/1_000_000)
|
||||
}
|
||||
if n >= 1_000 {
|
||||
return fmt.Sprintf("%.1fk", float64(n)/1_000)
|
||||
}
|
||||
return fmt.Sprintf("%d", n)
|
||||
}
|
||||
|
||||
// truncate cuts a string to at most maxLen characters, appending "…" if truncated.
|
||||
func truncate(s string, maxLen int) string {
|
||||
if len(s) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return s[:maxLen] + "…"
|
||||
}
|
||||
|
||||
// stackOverflowCreatedAt returns a time.Time from a Unix timestamp.
|
||||
// Kept as a helper for potential future pubdate use.
|
||||
func stackOverflowCreatedAt(unix float64) *string {
|
||||
t := time.Unix(int64(unix), 0).UTC()
|
||||
s := t.Format("2006-01-02")
|
||||
return &s
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue