- DuckDuckGo: scrapes Lite HTML endpoint for results - Language-aware region mapping (de→de-de, ja→jp-jp, etc.) - HTML parser extracts result links and snippets from DDG Lite markup - Shared html_helpers.go with extractAttr, stripHTML, htmlUnescape - GitHub: uses public Search API (repos, sorted by stars) - No auth required (10 req/min unauthenticated) - Shows stars, language, topics, last updated date - Paginated via GitHub's page parameter - Reddit: uses public JSON search API - Respects safesearch (skips over_18 posts) - Shows subreddit, score, comment count - Links self-posts to the thread URL - Bing: scrapes web search HTML (b_algo containers) - Extracts titles, URLs, and snippets from Bing's result markup - Handles Bing's tracking URL encoding - Updated factory, config defaults, and config.example.toml - Full test suite: unit tests for all engines, HTML parsing tests, region mapping tests, live request tests (skipped in short mode) 9 engines total: wikipedia, arxiv, crossref, braveapi, qwant, duckduckgo, github, reddit, bing
120 lines
3.2 KiB
Go
120 lines
3.2 KiB
Go
package engines
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
|
|
"github.com/ashie/gosearch/internal/contracts"
|
|
)
|
|
|
|
// RedditEngine searches Reddit posts via the public JSON API.
|
|
type RedditEngine struct {
|
|
client *http.Client
|
|
}
|
|
|
|
func (e *RedditEngine) Name() string { return "reddit" }
|
|
|
|
func (e *RedditEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
|
if strings.TrimSpace(req.Query) == "" {
|
|
return contracts.SearchResponse{Query: req.Query}, nil
|
|
}
|
|
if e == nil || e.client == nil {
|
|
return contracts.SearchResponse{}, errors.New("reddit engine not initialized")
|
|
}
|
|
|
|
endpoint := fmt.Sprintf(
|
|
"https://www.reddit.com/search.json?q=%s&limit=25&sort=relevance&t=all",
|
|
url.QueryEscape(req.Query),
|
|
)
|
|
|
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
httpReq.Header.Set("User-Agent", "gosearch/0.1 (compatible; +https://git.ashisgreat.xyz/penal-colony/gosearch)")
|
|
|
|
resp, err := e.client.Do(httpReq)
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
|
return contracts.SearchResponse{}, fmt.Errorf("reddit api error: status=%d body=%q", resp.StatusCode, string(body))
|
|
}
|
|
|
|
var data struct {
|
|
Data struct {
|
|
Children []struct {
|
|
Data struct {
|
|
Title string `json:"title"`
|
|
URL string `json:"url"`
|
|
Permalink string `json:"permalink"`
|
|
Score int `json:"score"`
|
|
NumComments int `json:"num_comments"`
|
|
Subreddit string `json:"subreddit"`
|
|
CreatedUTC float64 `json:"created_utc"`
|
|
IsSelf bool `json:"is_self"`
|
|
Over18 bool `json:"over_18"`
|
|
} `json:"data"`
|
|
} `json:"children"`
|
|
} `json:"data"`
|
|
}
|
|
|
|
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
|
|
results := make([]contracts.MainResult, 0, len(data.Data.Children))
|
|
for _, child := range data.Data.Children {
|
|
post := child.Data
|
|
|
|
// Skip NSFW results unless explicitly allowed.
|
|
if post.Over18 && req.Safesearch > 0 {
|
|
continue
|
|
}
|
|
|
|
// For self-posts, link to the Reddit thread.
|
|
linkURL := post.URL
|
|
if post.IsSelf || strings.HasPrefix(linkURL, "/r/") {
|
|
linkURL = "https://www.reddit.com" + post.Permalink
|
|
}
|
|
|
|
content := fmt.Sprintf("r/%s · ⬆ %d · 💬 %d", post.Subreddit, post.Score, post.NumComments)
|
|
if req.Safesearch == 0 {
|
|
// No additional content for safe mode
|
|
}
|
|
|
|
title := post.Title
|
|
urlPtr := linkURL
|
|
|
|
results = append(results, contracts.MainResult{
|
|
Template: "default.html",
|
|
Title: title,
|
|
Content: content,
|
|
URL: &urlPtr,
|
|
Engine: "reddit",
|
|
Score: float64(post.Score),
|
|
Category: "general",
|
|
Engines: []string{"reddit"},
|
|
})
|
|
}
|
|
|
|
return contracts.SearchResponse{
|
|
Query: req.Query,
|
|
NumberOfResults: len(results),
|
|
Results: results,
|
|
Answers: []map[string]any{},
|
|
Corrections: []string{},
|
|
Infoboxes: []map[string]any{},
|
|
Suggestions: []string{},
|
|
UnresponsiveEngines: [][2]string{},
|
|
}, nil
|
|
}
|