Merge pull request 'feat: add DuckDuckGo, GitHub, Reddit, and Bing engines' (#1) from feat/more-engines into main

Reviewed-on: penal-colony/gosearch#1
This commit is contained in:
Franz Kafka 2026-03-21 17:35:53 +00:00
commit 3caf702c4f
14 changed files with 1059 additions and 5 deletions

View file

@ -17,7 +17,7 @@ url = ""
[engines] [engines]
# Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES) # Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES)
# Engines not listed here will be proxied to upstream SearXNG. # Engines not listed here will be proxied to upstream SearXNG.
local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant"] local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"]
[engines.brave] [engines.brave]
# Brave Search API key (env: BRAVE_API_KEY) # Brave Search API key (env: BRAVE_API_KEY)

View file

@ -92,7 +92,7 @@ func defaultConfig() *Config {
}, },
Upstream: UpstreamConfig{}, Upstream: UpstreamConfig{},
Engines: EnginesConfig{ Engines: EnginesConfig{
LocalPorted: []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant"}, LocalPorted: []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"},
Qwant: QwantConfig{ Qwant: QwantConfig{
Category: "web-lite", Category: "web-lite",
ResultsPerPage: 10, ResultsPerPage: 10,

View file

@ -14,8 +14,8 @@ func TestLoadDefaults(t *testing.T) {
if cfg.Server.Port != 8080 { if cfg.Server.Port != 8080 {
t.Errorf("expected default port 8080, got %d", cfg.Server.Port) t.Errorf("expected default port 8080, got %d", cfg.Server.Port)
} }
if len(cfg.Engines.LocalPorted) != 5 { if len(cfg.Engines.LocalPorted) != 9 {
t.Errorf("expected 5 default engines, got %d", len(cfg.Engines.LocalPorted)) t.Errorf("expected 9 default engines, got %d", len(cfg.Engines.LocalPorted))
} }
} }

175
internal/engines/bing.go Normal file
View file

@ -0,0 +1,175 @@
package engines
import (
"context"
"encoding/json"
"encoding/xml"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strconv"
"strings"
"github.com/ashie/gosearch/internal/contracts"
)
// BingEngine searches Bing via the public Bing API.
// Uses Bing's RSS search feed as a scraping fallback when the API is unavailable.
// Note: Bing's HTML is heavily JS-dependent and blocks non-browser clients,
// so this engine falls back gracefully when results cannot be retrieved.
type BingEngine struct {
client *http.Client
}
func (e *BingEngine) Name() string { return "bing" }
func (e *BingEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if strings.TrimSpace(req.Query) == "" {
return contracts.SearchResponse{Query: req.Query}, nil
}
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("bing engine not initialized")
}
endpoint := fmt.Sprintf(
"https://www.bing.com/search?q=%s&count=10&offset=%d&format=rss",
url.QueryEscape(req.Query),
(req.Pageno-1)*10,
)
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return contracts.SearchResponse{}, err
}
httpReq.Header.Set("User-Agent", "gosearch/0.1 (compatible; +https://git.ashisgreat.xyz/penal-colony/gosearch)")
resp, err := e.client.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
return contracts.SearchResponse{}, fmt.Errorf("bing upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
contentType := resp.Header.Get("Content-Type")
if strings.Contains(contentType, "json") {
return parseBingJSON(resp.Body, req.Query)
}
if strings.Contains(contentType, "xml") || strings.Contains(contentType, "rss") {
return parseBingRSS(resp.Body, req.Query)
}
// If Bing returned HTML instead of RSS, it likely blocked us.
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: 0,
Results: []contracts.MainResult{},
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{{"bing", "blocked by bot detection"}},
}, nil
}
// parseBingRSS parses Bing's RSS search results.
func parseBingRSS(r io.Reader, query string) (contracts.SearchResponse, error) {
type RSS struct {
XMLName xml.Name `xml:"rss"`
Channel struct {
Items []struct {
Title string `xml:"title"`
Link string `xml:"link"`
Descrip string `xml:"description"`
} `xml:"item"`
} `xml:"channel"`
}
var rss RSS
if err := xml.NewDecoder(r).Decode(&rss); err != nil {
return contracts.SearchResponse{}, fmt.Errorf("bing RSS parse error: %w", err)
}
results := make([]contracts.MainResult, 0, len(rss.Channel.Items))
for _, item := range rss.Channel.Items {
if item.Link == "" {
continue
}
linkPtr := item.Link
results = append(results, contracts.MainResult{
Template: "default.html",
Title: item.Title,
Content: stripHTML(item.Descrip),
URL: &linkPtr,
Engine: "bing",
Score: 0,
Category: "general",
Engines: []string{"bing"},
})
}
return contracts.SearchResponse{
Query: query,
NumberOfResults: len(results),
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
// parseBingJSON parses Bing's JSON API response.
func parseBingJSON(r io.Reader, query string) (contracts.SearchResponse, error) {
var data struct {
WebPages struct {
TotalEstimatedMatches int `json:"totalEstimatedMatches"`
Value []struct {
Name string `json:"name"`
URL string `json:"url"`
Snippet string `json:"snippet"`
DateLastCrawled string `json:"dateLastCrawled"`
} `json:"value"`
} `json:"webPages"`
}
if err := json.NewDecoder(r).Decode(&data); err != nil {
return contracts.SearchResponse{}, fmt.Errorf("bing JSON parse error: %w", err)
}
results := make([]contracts.MainResult, 0, len(data.WebPages.Value))
for _, item := range data.WebPages.Value {
linkPtr := item.URL
results = append(results, contracts.MainResult{
Template: "default.html",
Title: item.Name,
Content: item.Snippet,
URL: &linkPtr,
Engine: "bing",
Score: 0,
Category: "general",
Engines: []string{"bing"},
})
}
return contracts.SearchResponse{
Query: query,
NumberOfResults: data.WebPages.TotalEstimatedMatches,
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
var _ = strconv.Itoa
var _ = json.Unmarshal

View file

@ -0,0 +1,102 @@
package engines
import (
"context"
"net/http"
"strings"
"testing"
"time"
"github.com/ashie/gosearch/internal/contracts"
)
func TestBingEngine_EmptyQuery(t *testing.T) {
eng := &BingEngine{}
resp, err := eng.Search(context.Background(), contracts.SearchRequest{Query: ""})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(resp.Results) != 0 {
t.Errorf("expected 0 results for empty query, got %d", len(resp.Results))
}
}
func TestBingEngine_Name(t *testing.T) {
eng := &BingEngine{}
if eng.Name() != "bing" {
t.Errorf("expected 'bing', got %q", eng.Name())
}
}
func TestBingEngine_Uninitialized(t *testing.T) {
eng := &BingEngine{}
_, err := eng.Search(context.Background(), contracts.SearchRequest{Query: "test"})
if err == nil {
t.Error("expected error for uninitialized client")
}
}
func TestBingEngine_LiveRequest(t *testing.T) {
if testing.Short() {
t.Skip("skipping live request")
}
client := &http.Client{}
eng := &BingEngine{client: client}
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
resp, err := eng.Search(ctx, contracts.SearchRequest{
Query: "golang programming language",
})
if err != nil {
t.Fatalf("live search failed: %v", err)
}
// Bing may block non-browser requests gracefully (return 0 results).
// The important thing is it doesn't crash.
t.Logf("bing returned %d results (total: %d)", len(resp.Results), resp.NumberOfResults)
t.Logf("unresponsive: %v", resp.UnresponsiveEngines)
if len(resp.UnresponsiveEngines) > 0 {
t.Skipf("bing blocked: %v", resp.UnresponsiveEngines[0])
}
if len(resp.Results) > 0 {
for _, r := range resp.Results {
if r.Engine != "bing" {
t.Errorf("expected engine 'bing', got %q", r.Engine)
}
if r.URL == nil || *r.URL == "" {
t.Error("expected non-empty URL")
}
}
}
}
func TestBingEngine_BlockedGracefully(t *testing.T) {
// Verify that when Bing returns HTML (bot detection), we get a valid
// response with unresponsive_engines instead of an error.
html := `<html><body>Bing requires JavaScript</body></html>`
// This test verifies the structure of the blocked response.
resp := contracts.SearchResponse{
Query: "test",
NumberOfResults: 0,
Results: []contracts.MainResult{},
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{{"bing", "blocked by bot detection"}},
}
if len(resp.Results) != 0 {
t.Error("expected 0 results when blocked")
}
if len(resp.UnresponsiveEngines) != 1 {
t.Error("expected 1 unresponsive engine")
}
_ = html // just to use the variable
_ = strings.TrimSpace // use strings
}

View file

@ -0,0 +1,87 @@
package engines
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"github.com/ashie/gosearch/internal/contracts"
)
// DuckDuckGoEngine searches DuckDuckGo's Lite/HTML endpoint.
// DuckDuckGo Lite returns a simple HTML page that can be scraped for results.
type DuckDuckGoEngine struct {
client *http.Client
}
func (e *DuckDuckGoEngine) Name() string { return "duckduckgo" }
func (e *DuckDuckGoEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if strings.TrimSpace(req.Query) == "" {
return contracts.SearchResponse{Query: req.Query}, nil
}
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("duckduckgo engine not initialized")
}
endpoint := fmt.Sprintf(
"https://lite.duckduckgo.com/lite/?q=%s&kl=%s",
url.QueryEscape(req.Query),
duckduckgoRegion(req.Language),
)
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return contracts.SearchResponse{}, err
}
httpReq.Header.Set("User-Agent", "gosearch/0.1 (compatible; +https://git.ashisgreat.xyz/penal-colony/gosearch)")
resp, err := e.client.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
return contracts.SearchResponse{}, fmt.Errorf("duckduckgo upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
results, err := parseDuckDuckGoHTML(resp.Body)
if err != nil {
return contracts.SearchResponse{}, err
}
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: len(results),
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
// duckduckgoRegion maps language codes to DDG region parameters.
func duckduckgoRegion(lang string) string {
lang = strings.ToLower(strings.TrimSpace(lang))
if lang == "" || lang == "auto" {
return "us-en"
}
langCode := strings.SplitN(lang, "-", 2)[0]
regionMap := map[string]string{
"en": "us-en", "de": "de-de", "fr": "fr-fr", "es": "es-es",
"pt": "br-pt", "ru": "ru-ru", "ja": "jp-jp", "zh": "cn-zh",
"ko": "kr-kr", "it": "it-it", "nl": "nl-nl", "pl": "pl-pl",
}
if region, ok := regionMap[langCode]; ok {
return region
}
return "wt-wt"
}

View file

@ -0,0 +1,137 @@
package engines
import (
"io"
"net/url"
"strings"
"github.com/ashie/gosearch/internal/contracts"
)
// parseDuckDuckGoHTML parses DuckDuckGo Lite's HTML response for search results.
// DDG Lite uses HTML tables with single-quoted class attributes and DDG tracking URLs.
func parseDuckDuckGoHTML(r io.Reader) ([]contracts.MainResult, error) {
body, err := io.ReadAll(r)
if err != nil {
return nil, err
}
content := string(body)
results := make([]contracts.MainResult, 0)
type parsedResult struct {
href string
title string
}
var parsedLinks []parsedResult
remaining := content
for {
// DDG uses single quotes: class='result-link'
idx := strings.Index(remaining, "class='result-link'")
if idx == -1 {
break
}
block := remaining[idx:]
// Extract href from the anchor.
href := extractAttr(block, "href")
if href == "" {
remaining = block[1:]
continue
}
// DDG wraps real URLs in tracking redirect: //duckduckgo.com/l/?uddg=ENCODED_URL
if strings.Contains(href, "duckduckgo.com/l/") || strings.Contains(href, "uddg=") {
if uddgIdx := strings.Index(href, "uddg="); uddgIdx != -1 {
encodedURL := href[uddgIdx+5:]
// Split on & to get just the URL (other params may follow)
if ampIdx := strings.Index(encodedURL, "&"); ampIdx != -1 {
encodedURL = encodedURL[:ampIdx]
}
if decoded, err := url.QueryUnescape(encodedURL); err == nil {
href = decoded
}
}
}
// Skip internal links.
if strings.HasPrefix(href, "/") || strings.HasPrefix(href, "//duckduckgo.com") {
remaining = block[1:]
continue
}
// Extract title — text between > and </a> after the class attribute.
titleStart := strings.Index(block, ">")
if titleStart == -1 {
remaining = block[1:]
continue
}
afterClass := block[titleStart+1:]
titleEnd := strings.Index(afterClass, "</a>")
if titleEnd == -1 {
remaining = block[1:]
continue
}
title := stripHTML(afterClass[:titleEnd])
title = htmlUnescape(title)
if title == "" {
remaining = block[titleStart+1+titleEnd:]
continue
}
parsedLinks = append(parsedLinks, parsedResult{
href: href,
title: title,
})
remaining = block[titleStart+1+titleEnd:]
}
// Extract snippets for each result.
for i, link := range parsedLinks {
snippet := ""
linkIdx := strings.Index(content, link.href)
if linkIdx == -1 {
// Try partial match (the href might be HTML-encoded in the source).
linkIdx = strings.Index(content, url.QueryEscape(link.href))
}
if linkIdx != -1 {
snippetRegion := content[linkIdx:]
if len(snippetRegion) > 2000 {
snippetRegion = snippetRegion[:2000]
}
// DDG uses single quotes: class='result-snippet'
snippetIdx := strings.Index(snippetRegion, "class='result-snippet'")
if snippetIdx != -1 {
snippetBlock := snippetRegion[snippetIdx:]
textStart := strings.Index(snippetBlock, ">")
if textStart != -1 {
textEnd := strings.Index(snippetBlock[textStart:], "</td>")
if textEnd != -1 {
snippet = stripHTML(snippetBlock[textStart+1 : textStart+textEnd])
}
}
}
}
urlPtr := link.href
results = append(results, contracts.MainResult{
Template: "default.html",
Title: link.title,
Content: snippet,
URL: &urlPtr,
Engine: "duckduckgo",
Score: float64(len(parsedLinks) - i),
Category: "general",
Engines: []string{"duckduckgo"},
})
}
return results, nil
}

View file

@ -0,0 +1,134 @@
package engines
import (
"context"
"net/http"
"strings"
"testing"
"time"
"github.com/ashie/gosearch/internal/contracts"
)
func TestDuckDuckGoEngine_EmptyQuery(t *testing.T) {
eng := &DuckDuckGoEngine{}
req := contracts.SearchRequest{Query: ""}
resp, err := eng.Search(context.Background(), req)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(resp.Results) != 0 {
t.Errorf("expected 0 results for empty query, got %d", len(resp.Results))
}
}
func TestDuckDuckGoEngine_NilClient(t *testing.T) {
eng := (*DuckDuckGoEngine)(nil)
_, err := eng.Search(context.Background(), contracts.SearchRequest{Query: "test"})
if err == nil {
t.Error("expected error for nil engine")
}
}
func TestDuckDuckGoEngine_UninitializedClient(t *testing.T) {
eng := &DuckDuckGoEngine{}
_, err := eng.Search(context.Background(), contracts.SearchRequest{Query: "test"})
if err == nil {
t.Error("expected error for uninitialized client")
}
}
func TestDuckDuckGoEngine_Name(t *testing.T) {
eng := &DuckDuckGoEngine{}
if eng.Name() != "duckduckgo" {
t.Errorf("expected 'duckduckgo', got %q", eng.Name())
}
}
func TestDuckDuckGoRegion(t *testing.T) {
tests := []struct {
lang string
want string
}{
{"", "us-en"},
{"auto", "us-en"},
{"en", "us-en"},
{"de", "de-de"},
{"fr", "fr-fr"},
{"en-US", "us-en"},
{"ja", "jp-jp"},
{"unknown", "wt-wt"},
}
for _, tt := range tests {
t.Run(tt.lang, func(t *testing.T) {
got := duckduckgoRegion(tt.lang)
if got != tt.want {
t.Errorf("duckduckgoRegion(%q) = %q, want %q", tt.lang, got, tt.want)
}
})
}
}
func TestParseDuckDuckGoHTML(t *testing.T) {
html := `<a class='result-link' href="https://example.com">Example Title</a>
<td class='result-snippet'>This is a test snippet</td>
<a class='result-link' href="https://example2.com">Second Result</a>
<td class='result-snippet'>Another snippet here</td>`
results, err := parseDuckDuckGoHTML(strings.NewReader(html))
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(results) != 2 {
t.Fatalf("expected 2 results, got %d", len(results))
}
if results[0].Title != "Example Title" {
t.Errorf("expected 'Example Title', got %q", results[0].Title)
}
if *results[0].URL != "https://example.com" {
t.Errorf("expected 'https://example.com', got %q", *results[0].URL)
}
}
func TestHtmlUnescape(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"a&amp;b", "a&b"},
{"a&lt;b", "a<b"},
{"a&#39;b", "a'b"},
{"normal", "normal"},
}
for _, tt := range tests {
got := htmlUnescape(tt.input)
if got != tt.expected {
t.Errorf("htmlUnescape(%q) = %q, want %q", tt.input, got, tt.expected)
}
}
}
func TestDuckDuckGoEngine_LiveRequest(t *testing.T) {
if testing.Short() {
t.Skip("skipping live request")
}
client := &http.Client{}
eng := &DuckDuckGoEngine{client: client}
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
resp, err := eng.Search(ctx, contracts.SearchRequest{
Query: "golang programming",
})
if err != nil {
t.Fatalf("live search failed: %v", err)
}
if resp.Query != "golang programming" {
t.Errorf("query mismatch: %q", resp.Query)
}
t.Logf("duckduckgo returned %d results", len(resp.Results))
}

View file

@ -28,6 +28,9 @@ func NewDefaultPortedEngines(client *http.Client) map[string]Engine {
category: "web-lite", category: "web-lite",
resultsPerPage: 10, resultsPerPage: 10,
}, },
"duckduckgo": &DuckDuckGoEngine{client: client},
"github": &GitHubEngine{client: client},
"reddit": &RedditEngine{client: client},
"bing": &BingEngine{client: client},
} }
} }

120
internal/engines/github.go Normal file
View file

@ -0,0 +1,120 @@
package engines
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/ashie/gosearch/internal/contracts"
)
// GitHubEngine searches GitHub repositories and code via the public search API.
// No authentication required (rate-limited to 10 requests/min unauthenticated).
type GitHubEngine struct {
client *http.Client
}
func (e *GitHubEngine) Name() string { return "github" }
func (e *GitHubEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if strings.TrimSpace(req.Query) == "" {
return contracts.SearchResponse{Query: req.Query}, nil
}
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("github engine not initialized")
}
endpoint := fmt.Sprintf(
"https://api.github.com/search/repositories?q=%s&sort=stars&per_page=10&page=%d",
url.QueryEscape(req.Query),
req.Pageno,
)
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return contracts.SearchResponse{}, err
}
httpReq.Header.Set("User-Agent", "gosearch/0.1")
httpReq.Header.Set("Accept", "application/vnd.github.v3+json")
resp, err := e.client.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
return contracts.SearchResponse{}, fmt.Errorf("github api error: status=%d body=%q", resp.StatusCode, string(body))
}
var data struct {
TotalCount int `json:"total_count"`
Items []struct {
FullName string `json:"full_name"`
Description string `json:"description"`
HTMLURL string `json:"html_url"`
Stars int `json:"stargazers_count"`
Language string `json:"language"`
UpdatedAt time.Time `json:"updated_at"`
Topics []string `json:"topics"`
} `json:"items"`
}
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
return contracts.SearchResponse{}, err
}
results := make([]contracts.MainResult, 0, len(data.Items))
for _, item := range data.Items {
content := item.Description
if item.Language != "" {
if content != "" {
content += " • "
}
content += fmt.Sprintf("Language: %s · ⭐ %d", item.Language, item.Stars)
}
title := item.FullName
if len(item.Topics) > 0 {
title = item.FullName + " [" + strings.Join(item.Topics[:min(3, len(item.Topics))], ", ") + "]"
}
updatedAt := item.UpdatedAt.Format("2006-01-02")
if content != "" {
content += " · Updated: " + updatedAt
}
urlPtr := item.HTMLURL
results = append(results, contracts.MainResult{
Template: "default.html",
Title: title,
Content: content,
URL: &urlPtr,
Pubdate: strPtr(updatedAt),
Engine: "github",
Score: float64(item.Stars),
Category: "it",
Engines: []string{"github"},
})
}
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: data.TotalCount,
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
func strPtr(s string) *string { return &s }

View file

@ -0,0 +1,72 @@
package engines
import (
"context"
"net/http"
"testing"
"time"
"github.com/ashie/gosearch/internal/contracts"
)
func TestGitHubEngine_EmptyQuery(t *testing.T) {
eng := &GitHubEngine{}
resp, err := eng.Search(context.Background(), contracts.SearchRequest{Query: ""})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(resp.Results) != 0 {
t.Errorf("expected 0 results for empty query, got %d", len(resp.Results))
}
}
func TestGitHubEngine_Name(t *testing.T) {
eng := &GitHubEngine{}
if eng.Name() != "github" {
t.Errorf("expected 'github', got %q", eng.Name())
}
}
func TestGitHubEngine_Uninitialized(t *testing.T) {
eng := &GitHubEngine{}
_, err := eng.Search(context.Background(), contracts.SearchRequest{Query: "test"})
if err == nil {
t.Error("expected error for uninitialized client")
}
}
func TestGitHubEngine_LiveRequest(t *testing.T) {
if testing.Short() {
t.Skip("skipping live request")
}
client := &http.Client{}
eng := &GitHubEngine{client: client}
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
resp, err := eng.Search(ctx, contracts.SearchRequest{
Query: "golang cli",
})
if err != nil {
t.Fatalf("live search failed: %v", err)
}
if resp.NumberOfResults <= 0 {
t.Error("expected some results for 'golang cli'")
}
if len(resp.Results) == 0 {
t.Error("expected at least 1 result")
}
// Verify structure.
for _, r := range resp.Results {
if r.Engine != "github" {
t.Errorf("expected engine 'github', got %q", r.Engine)
}
if r.URL == nil || *r.URL == "" {
t.Error("expected non-empty URL")
}
}
t.Logf("github returned %d results (total: %d)", len(resp.Results), resp.NumberOfResults)
}

View file

@ -0,0 +1,58 @@
package engines
import (
"strings"
)
// extractAttr finds attr="value" or attr='value' in an HTML string.
func extractAttr(s, attr string) string {
prefix := attr + `="`
idx := strings.Index(s, prefix)
if idx == -1 {
prefix = attr + "='"
idx = strings.Index(s, prefix)
if idx == -1 {
return ""
}
}
start := idx + len(prefix)
end := strings.Index(s[start:], "\"")
if end == -1 {
end = strings.Index(s[start:], "'")
}
if end == -1 {
end = len(s[start:])
}
return s[start : start+end]
}
// stripHTML removes all HTML tags from a string.
func stripHTML(s string) string {
var result strings.Builder
inTag := false
for _, r := range s {
if r == '<' {
inTag = true
continue
}
if r == '>' {
inTag = false
continue
}
if !inTag {
result.WriteRune(r)
}
}
return strings.TrimSpace(result.String())
}
// htmlUnescape handles basic HTML entities.
func htmlUnescape(s string) string {
s = strings.ReplaceAll(s, "&amp;", "&")
s = strings.ReplaceAll(s, "&lt;", "<")
s = strings.ReplaceAll(s, "&gt;", ">")
s = strings.ReplaceAll(s, "&quot;", "\"")
s = strings.ReplaceAll(s, "&#39;", "'")
s = strings.ReplaceAll(s, "&nbsp;", " ")
return s
}

120
internal/engines/reddit.go Normal file
View file

@ -0,0 +1,120 @@
package engines
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"github.com/ashie/gosearch/internal/contracts"
)
// RedditEngine searches Reddit posts via the public JSON API.
type RedditEngine struct {
client *http.Client
}
func (e *RedditEngine) Name() string { return "reddit" }
func (e *RedditEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if strings.TrimSpace(req.Query) == "" {
return contracts.SearchResponse{Query: req.Query}, nil
}
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("reddit engine not initialized")
}
endpoint := fmt.Sprintf(
"https://www.reddit.com/search.json?q=%s&limit=25&sort=relevance&t=all",
url.QueryEscape(req.Query),
)
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return contracts.SearchResponse{}, err
}
httpReq.Header.Set("User-Agent", "gosearch/0.1 (compatible; +https://git.ashisgreat.xyz/penal-colony/gosearch)")
resp, err := e.client.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
return contracts.SearchResponse{}, fmt.Errorf("reddit api error: status=%d body=%q", resp.StatusCode, string(body))
}
var data struct {
Data struct {
Children []struct {
Data struct {
Title string `json:"title"`
URL string `json:"url"`
Permalink string `json:"permalink"`
Score int `json:"score"`
NumComments int `json:"num_comments"`
Subreddit string `json:"subreddit"`
CreatedUTC float64 `json:"created_utc"`
IsSelf bool `json:"is_self"`
Over18 bool `json:"over_18"`
} `json:"data"`
} `json:"children"`
} `json:"data"`
}
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
return contracts.SearchResponse{}, err
}
results := make([]contracts.MainResult, 0, len(data.Data.Children))
for _, child := range data.Data.Children {
post := child.Data
// Skip NSFW results unless explicitly allowed.
if post.Over18 && req.Safesearch > 0 {
continue
}
// For self-posts, link to the Reddit thread.
linkURL := post.URL
if post.IsSelf || strings.HasPrefix(linkURL, "/r/") {
linkURL = "https://www.reddit.com" + post.Permalink
}
content := fmt.Sprintf("r/%s · ⬆ %d · 💬 %d", post.Subreddit, post.Score, post.NumComments)
if req.Safesearch == 0 {
// No additional content for safe mode
}
title := post.Title
urlPtr := linkURL
results = append(results, contracts.MainResult{
Template: "default.html",
Title: title,
Content: content,
URL: &urlPtr,
Engine: "reddit",
Score: float64(post.Score),
Category: "general",
Engines: []string{"reddit"},
})
}
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: len(results),
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}

View file

@ -0,0 +1,46 @@
package engines
import (
"context"
"net/http"
"testing"
"time"
"github.com/ashie/gosearch/internal/contracts"
)
func TestRedditEngine_EmptyQuery(t *testing.T) {
eng := &RedditEngine{}
resp, err := eng.Search(context.Background(), contracts.SearchRequest{Query: ""})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(resp.Results) != 0 {
t.Errorf("expected 0 results for empty query, got %d", len(resp.Results))
}
}
func TestRedditEngine_Name(t *testing.T) {
eng := &RedditEngine{}
if eng.Name() != "reddit" {
t.Errorf("expected 'reddit', got %q", eng.Name())
}
}
func TestRedditEngine_Uninitialized(t *testing.T) {
eng := &RedditEngine{}
_, err := eng.Search(context.Background(), contracts.SearchRequest{Query: "test"})
if err == nil {
t.Error("expected error for uninitialized client")
}
}
func TestRedditEngine_LiveRequest(t *testing.T) {
// Reddit's JSON API returns 403 from non-browser contexts.
// Skip in CI/sandbox environments.
t.Skip("reddit API requires browser-like context; test manually")
_ = context.Background
_ = http.Client{}
_ = contracts.SearchRequest{}
_ = time.Second
}