feat: add DuckDuckGo, GitHub, Reddit, and Bing engines
- DuckDuckGo: scrapes Lite HTML endpoint for results - Language-aware region mapping (de→de-de, ja→jp-jp, etc.) - HTML parser extracts result links and snippets from DDG Lite markup - Shared html_helpers.go with extractAttr, stripHTML, htmlUnescape - GitHub: uses public Search API (repos, sorted by stars) - No auth required (10 req/min unauthenticated) - Shows stars, language, topics, last updated date - Paginated via GitHub's page parameter - Reddit: uses public JSON search API - Respects safesearch (skips over_18 posts) - Shows subreddit, score, comment count - Links self-posts to the thread URL - Bing: scrapes web search HTML (b_algo containers) - Extracts titles, URLs, and snippets from Bing's result markup - Handles Bing's tracking URL encoding - Updated factory, config defaults, and config.example.toml - Full test suite: unit tests for all engines, HTML parsing tests, region mapping tests, live request tests (skipped in short mode) 9 engines total: wikipedia, arxiv, crossref, braveapi, qwant, duckduckgo, github, reddit, bing
This commit is contained in:
parent
28b61ff251
commit
df8fe9474b
14 changed files with 1030 additions and 5 deletions
|
|
@ -17,7 +17,7 @@ url = ""
|
||||||
[engines]
|
[engines]
|
||||||
# Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES)
|
# Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES)
|
||||||
# Engines not listed here will be proxied to upstream SearXNG.
|
# Engines not listed here will be proxied to upstream SearXNG.
|
||||||
local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant"]
|
local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"]
|
||||||
|
|
||||||
[engines.brave]
|
[engines.brave]
|
||||||
# Brave Search API key (env: BRAVE_API_KEY)
|
# Brave Search API key (env: BRAVE_API_KEY)
|
||||||
|
|
|
||||||
|
|
@ -92,7 +92,7 @@ func defaultConfig() *Config {
|
||||||
},
|
},
|
||||||
Upstream: UpstreamConfig{},
|
Upstream: UpstreamConfig{},
|
||||||
Engines: EnginesConfig{
|
Engines: EnginesConfig{
|
||||||
LocalPorted: []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant"},
|
LocalPorted: []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"},
|
||||||
Qwant: QwantConfig{
|
Qwant: QwantConfig{
|
||||||
Category: "web-lite",
|
Category: "web-lite",
|
||||||
ResultsPerPage: 10,
|
ResultsPerPage: 10,
|
||||||
|
|
|
||||||
|
|
@ -14,8 +14,8 @@ func TestLoadDefaults(t *testing.T) {
|
||||||
if cfg.Server.Port != 8080 {
|
if cfg.Server.Port != 8080 {
|
||||||
t.Errorf("expected default port 8080, got %d", cfg.Server.Port)
|
t.Errorf("expected default port 8080, got %d", cfg.Server.Port)
|
||||||
}
|
}
|
||||||
if len(cfg.Engines.LocalPorted) != 5 {
|
if len(cfg.Engines.LocalPorted) != 9 {
|
||||||
t.Errorf("expected 5 default engines, got %d", len(cfg.Engines.LocalPorted))
|
t.Errorf("expected 9 default engines, got %d", len(cfg.Engines.LocalPorted))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
182
internal/engines/bing.go
Normal file
182
internal/engines/bing.go
Normal file
|
|
@ -0,0 +1,182 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
// BingEngine searches Bing via the public search endpoint.
|
||||||
|
// Uses Bing's web search results page and extracts results from the HTML.
|
||||||
|
type BingEngine struct {
|
||||||
|
client *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *BingEngine) Name() string { return "bing" }
|
||||||
|
|
||||||
|
func (e *BingEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||||
|
if strings.TrimSpace(req.Query) == "" {
|
||||||
|
return contracts.SearchResponse{Query: req.Query}, nil
|
||||||
|
}
|
||||||
|
if e == nil || e.client == nil {
|
||||||
|
return contracts.SearchResponse{}, errors.New("bing engine not initialized")
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf(
|
||||||
|
"https://www.bing.com/search?q=%s&count=10&offset=%d",
|
||||||
|
url.QueryEscape(req.Query),
|
||||||
|
(req.Pageno-1)*10,
|
||||||
|
)
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
httpReq.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
|
||||||
|
|
||||||
|
resp, err := e.client.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
||||||
|
return contracts.SearchResponse{}, fmt.Errorf("bing upstream error: status=%d body=%q", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
results, err := parseBingHTML(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: len(results),
|
||||||
|
Results: results,
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseBingHTML extracts search results from Bing's HTML response.
|
||||||
|
// Bing results are in <li class="b_algo"> elements containing <h2><a href="...">Title</a></h2>
|
||||||
|
// and <p> or <div class="b_caption"> for snippets.
|
||||||
|
func parseBingHTML(r io.Reader) ([]contracts.MainResult, error) {
|
||||||
|
body, err := io.ReadAll(r)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
content := string(body)
|
||||||
|
results := make([]contracts.MainResult, 0)
|
||||||
|
|
||||||
|
// Split on b_algo result containers.
|
||||||
|
parts := strings.Split(content, `class="b_algo"`)
|
||||||
|
for i := 1; i < len(parts); i++ {
|
||||||
|
block := parts[i]
|
||||||
|
|
||||||
|
// Find the next container or end.
|
||||||
|
endIdx := len(block)
|
||||||
|
for _, terminator := range []string{`class="b_algo"`, `id="b_results"`, `id="b_footer"`} {
|
||||||
|
if idx := strings.Index(block, terminator); idx > 0 && idx < endIdx {
|
||||||
|
endIdx = idx
|
||||||
|
}
|
||||||
|
}
|
||||||
|
block = block[:endIdx]
|
||||||
|
|
||||||
|
// Extract title and URL from <h2><a href="...">
|
||||||
|
title, href := extractBingLink(block)
|
||||||
|
if title == "" || href == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract snippet from <p> or <div class="b_caption"><p>
|
||||||
|
snippet := extractBingSnippet(block)
|
||||||
|
|
||||||
|
results = append(results, contracts.MainResult{
|
||||||
|
Template: "default.html",
|
||||||
|
Title: title,
|
||||||
|
Content: snippet,
|
||||||
|
URL: &href,
|
||||||
|
Engine: "bing",
|
||||||
|
Score: 0,
|
||||||
|
Category: "general",
|
||||||
|
Engines: []string{"bing"},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return results, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractBingLink(block string) (title, href string) {
|
||||||
|
// Find <a href="...">
|
||||||
|
hrefStart := strings.Index(block, `href="`)
|
||||||
|
if hrefStart == -1 {
|
||||||
|
return "", ""
|
||||||
|
}
|
||||||
|
hrefStart += 6
|
||||||
|
hrefEnd := strings.Index(block[hrefStart:], `"`)
|
||||||
|
if hrefEnd == -1 {
|
||||||
|
return "", ""
|
||||||
|
}
|
||||||
|
href = block[hrefStart : hrefStart+hrefEnd]
|
||||||
|
|
||||||
|
// Skip Bing's own tracking URLs.
|
||||||
|
if strings.Contains(href, "bing.com") && strings.Contains(href, "search?") {
|
||||||
|
// Try to extract the real URL from u= parameter.
|
||||||
|
if uIdx := strings.Index(href, "&u="); uIdx != -1 {
|
||||||
|
encodedURL := href[uIdx+3:]
|
||||||
|
if decoded, err := url.QueryUnescape(encodedURL); err == nil {
|
||||||
|
href = decoded
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Title is between > and </a> after the href.
|
||||||
|
titleStart := strings.Index(block[hrefStart+hrefEnd:], ">")
|
||||||
|
if titleStart == -1 {
|
||||||
|
return href, ""
|
||||||
|
}
|
||||||
|
titleStart += hrefStart + hrefEnd + 1
|
||||||
|
titleEnd := strings.Index(block[titleStart:], "</a>")
|
||||||
|
if titleEnd == -1 {
|
||||||
|
return href, ""
|
||||||
|
}
|
||||||
|
title = stripHTML(block[titleStart : titleStart+titleEnd])
|
||||||
|
title = strings.TrimSpace(title)
|
||||||
|
|
||||||
|
return title, href
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractBingSnippet(block string) string {
|
||||||
|
// Try <div class="b_caption"><p> first.
|
||||||
|
if idx := strings.Index(block, `class="b_caption"`); idx != -1 {
|
||||||
|
caption := block[idx:]
|
||||||
|
if pStart := strings.Index(caption, "<p"); pStart != -1 {
|
||||||
|
snippet := caption[pStart:]
|
||||||
|
if pEnd := strings.Index(snippet, "</p>"); pEnd != -1 {
|
||||||
|
return stripHTML(snippet[:pEnd+4])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: any <p> tag.
|
||||||
|
if pStart := strings.Index(block, "<p"); pStart != -1 {
|
||||||
|
snippet := block[pStart:]
|
||||||
|
if pEnd := strings.Index(snippet, "</p>"); pEnd != -1 {
|
||||||
|
return stripHTML(snippet[:pEnd+4])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
91
internal/engines/bing_test.go
Normal file
91
internal/engines/bing_test.go
Normal file
|
|
@ -0,0 +1,91 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBingEngine_EmptyQuery(t *testing.T) {
|
||||||
|
eng := &BingEngine{}
|
||||||
|
resp, err := eng.Search(context.Background(), contracts.SearchRequest{Query: ""})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(resp.Results) != 0 {
|
||||||
|
t.Errorf("expected 0 results for empty query, got %d", len(resp.Results))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBingEngine_Name(t *testing.T) {
|
||||||
|
eng := &BingEngine{}
|
||||||
|
if eng.Name() != "bing" {
|
||||||
|
t.Errorf("expected 'bing', got %q", eng.Name())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBingEngine_Uninitialized(t *testing.T) {
|
||||||
|
eng := &BingEngine{}
|
||||||
|
_, err := eng.Search(context.Background(), contracts.SearchRequest{Query: "test"})
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for uninitialized client")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseBingHTML(t *testing.T) {
|
||||||
|
html := `<li class="b_algo">
|
||||||
|
<h2><a href="https://example.com">Example Title</a></h2>
|
||||||
|
<div class="b_caption"><p>This is a test snippet from Bing.</p></div>
|
||||||
|
</li>
|
||||||
|
<li class="b_algo">
|
||||||
|
<h2><a href="https://example2.com">Second Result</a></h2>
|
||||||
|
<div class="b_caption"><p>Another snippet</p></div>
|
||||||
|
</li>`
|
||||||
|
|
||||||
|
results, err := parseBingHTML(strings.NewReader(html))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(results) != 2 {
|
||||||
|
t.Fatalf("expected 2 results, got %d", len(results))
|
||||||
|
}
|
||||||
|
if results[0].Title != "Example Title" {
|
||||||
|
t.Errorf("expected 'Example Title', got %q", results[0].Title)
|
||||||
|
}
|
||||||
|
if *results[0].URL != "https://example.com" {
|
||||||
|
t.Errorf("expected 'https://example.com', got %q", *results[0].URL)
|
||||||
|
}
|
||||||
|
if results[0].Content != "This is a test snippet from Bing." {
|
||||||
|
t.Errorf("unexpected content: %q", results[0].Content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBingEngine_LiveRequest(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("skipping live request")
|
||||||
|
}
|
||||||
|
|
||||||
|
client := &http.Client{}
|
||||||
|
eng := &BingEngine{client: client}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
resp, err := eng.Search(ctx, contracts.SearchRequest{
|
||||||
|
Query: "golang programming language",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("live search failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Logf("bing returned %d results", len(resp.Results))
|
||||||
|
for _, r := range resp.Results {
|
||||||
|
if r.Engine != "bing" {
|
||||||
|
t.Errorf("expected engine 'bing', got %q", r.Engine)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
87
internal/engines/duckduckgo.go
Normal file
87
internal/engines/duckduckgo.go
Normal file
|
|
@ -0,0 +1,87 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DuckDuckGoEngine searches DuckDuckGo's Lite/HTML endpoint.
|
||||||
|
// DuckDuckGo Lite returns a simple HTML page that can be scraped for results.
|
||||||
|
type DuckDuckGoEngine struct {
|
||||||
|
client *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *DuckDuckGoEngine) Name() string { return "duckduckgo" }
|
||||||
|
|
||||||
|
func (e *DuckDuckGoEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||||
|
if strings.TrimSpace(req.Query) == "" {
|
||||||
|
return contracts.SearchResponse{Query: req.Query}, nil
|
||||||
|
}
|
||||||
|
if e == nil || e.client == nil {
|
||||||
|
return contracts.SearchResponse{}, errors.New("duckduckgo engine not initialized")
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf(
|
||||||
|
"https://lite.duckduckgo.com/lite/?q=%s&kl=%s",
|
||||||
|
url.QueryEscape(req.Query),
|
||||||
|
duckduckgoRegion(req.Language),
|
||||||
|
)
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
httpReq.Header.Set("User-Agent", "gosearch/0.1 (compatible; +https://git.ashisgreat.xyz/penal-colony/gosearch)")
|
||||||
|
|
||||||
|
resp, err := e.client.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
||||||
|
return contracts.SearchResponse{}, fmt.Errorf("duckduckgo upstream error: status=%d body=%q", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
results, err := parseDuckDuckGoHTML(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: len(results),
|
||||||
|
Results: results,
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// duckduckgoRegion maps language codes to DDG region parameters.
|
||||||
|
func duckduckgoRegion(lang string) string {
|
||||||
|
lang = strings.ToLower(strings.TrimSpace(lang))
|
||||||
|
if lang == "" || lang == "auto" {
|
||||||
|
return "us-en"
|
||||||
|
}
|
||||||
|
langCode := strings.SplitN(lang, "-", 2)[0]
|
||||||
|
regionMap := map[string]string{
|
||||||
|
"en": "us-en", "de": "de-de", "fr": "fr-fr", "es": "es-es",
|
||||||
|
"pt": "br-pt", "ru": "ru-ru", "ja": "jp-jp", "zh": "cn-zh",
|
||||||
|
"ko": "kr-kr", "it": "it-it", "nl": "nl-nl", "pl": "pl-pl",
|
||||||
|
}
|
||||||
|
if region, ok := regionMap[langCode]; ok {
|
||||||
|
return region
|
||||||
|
}
|
||||||
|
return "wt-wt"
|
||||||
|
}
|
||||||
112
internal/engines/duckduckgo_parse.go
Normal file
112
internal/engines/duckduckgo_parse.go
Normal file
|
|
@ -0,0 +1,112 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
// parseDuckDuckGoHTML parses DuckDuckGo Lite's HTML response for search results.
|
||||||
|
func parseDuckDuckGoHTML(r io.Reader) ([]contracts.MainResult, error) {
|
||||||
|
body, err := io.ReadAll(r)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
content := string(body)
|
||||||
|
results := make([]contracts.MainResult, 0)
|
||||||
|
|
||||||
|
type parsedResult struct {
|
||||||
|
href string
|
||||||
|
title string
|
||||||
|
}
|
||||||
|
|
||||||
|
var parsedLinks []parsedResult
|
||||||
|
remaining := content
|
||||||
|
|
||||||
|
for {
|
||||||
|
idx := strings.Index(remaining, `class="result-link"`)
|
||||||
|
if idx == -1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
block := remaining[idx:]
|
||||||
|
|
||||||
|
href := extractAttr(block, "href")
|
||||||
|
if href == "" {
|
||||||
|
remaining = block[1:]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip DDG internal links.
|
||||||
|
if strings.HasPrefix(href, "/") || strings.Contains(href, "duckduckgo.com/l/") {
|
||||||
|
remaining = block[1:]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract title — text between > and </a> after the href.
|
||||||
|
titleStart := strings.Index(block, ">")
|
||||||
|
if titleStart == -1 {
|
||||||
|
remaining = block[1:]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
afterHref := block[titleStart+1:]
|
||||||
|
titleEnd := strings.Index(afterHref, "</a>")
|
||||||
|
if titleEnd == -1 {
|
||||||
|
remaining = block[1:]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
title := stripHTML(afterHref[:titleEnd])
|
||||||
|
title = htmlUnescape(title)
|
||||||
|
|
||||||
|
parsedLinks = append(parsedLinks, parsedResult{
|
||||||
|
href: href,
|
||||||
|
title: title,
|
||||||
|
})
|
||||||
|
|
||||||
|
remaining = block[titleStart+1+titleEnd:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract snippets between results.
|
||||||
|
for i, link := range parsedLinks {
|
||||||
|
snippet := ""
|
||||||
|
linkIdx := strings.Index(content, link.href)
|
||||||
|
if linkIdx != -1 {
|
||||||
|
snippetRegion := content[linkIdx+len(link.href):]
|
||||||
|
if len(snippetRegion) > 2000 {
|
||||||
|
snippetRegion = snippetRegion[:2000]
|
||||||
|
}
|
||||||
|
|
||||||
|
snippetIdx := strings.Index(snippetRegion, "result-snippet")
|
||||||
|
if snippetIdx == -1 {
|
||||||
|
snippetIdx = strings.Index(snippetRegion, "result__snippet")
|
||||||
|
}
|
||||||
|
|
||||||
|
if snippetIdx != -1 {
|
||||||
|
snippetBlock := snippetRegion[snippetIdx:]
|
||||||
|
textStart := strings.Index(snippetBlock, ">")
|
||||||
|
if textStart != -1 {
|
||||||
|
textEnd := strings.Index(snippetBlock[textStart:], "</")
|
||||||
|
if textEnd != -1 {
|
||||||
|
snippet = stripHTML(snippetBlock[textStart+1 : textStart+textEnd])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
urlPtr := link.href
|
||||||
|
results = append(results, contracts.MainResult{
|
||||||
|
Template: "default.html",
|
||||||
|
Title: link.title,
|
||||||
|
Content: snippet,
|
||||||
|
URL: &urlPtr,
|
||||||
|
Engine: "duckduckgo",
|
||||||
|
Score: float64(len(parsedLinks) - i),
|
||||||
|
Category: "general",
|
||||||
|
Engines: []string{"duckduckgo"},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return results, nil
|
||||||
|
}
|
||||||
134
internal/engines/duckduckgo_test.go
Normal file
134
internal/engines/duckduckgo_test.go
Normal file
|
|
@ -0,0 +1,134 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestDuckDuckGoEngine_EmptyQuery(t *testing.T) {
|
||||||
|
eng := &DuckDuckGoEngine{}
|
||||||
|
req := contracts.SearchRequest{Query: ""}
|
||||||
|
resp, err := eng.Search(context.Background(), req)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(resp.Results) != 0 {
|
||||||
|
t.Errorf("expected 0 results for empty query, got %d", len(resp.Results))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDuckDuckGoEngine_NilClient(t *testing.T) {
|
||||||
|
eng := (*DuckDuckGoEngine)(nil)
|
||||||
|
_, err := eng.Search(context.Background(), contracts.SearchRequest{Query: "test"})
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for nil engine")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDuckDuckGoEngine_UninitializedClient(t *testing.T) {
|
||||||
|
eng := &DuckDuckGoEngine{}
|
||||||
|
_, err := eng.Search(context.Background(), contracts.SearchRequest{Query: "test"})
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for uninitialized client")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDuckDuckGoEngine_Name(t *testing.T) {
|
||||||
|
eng := &DuckDuckGoEngine{}
|
||||||
|
if eng.Name() != "duckduckgo" {
|
||||||
|
t.Errorf("expected 'duckduckgo', got %q", eng.Name())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDuckDuckGoRegion(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
lang string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{"", "us-en"},
|
||||||
|
{"auto", "us-en"},
|
||||||
|
{"en", "us-en"},
|
||||||
|
{"de", "de-de"},
|
||||||
|
{"fr", "fr-fr"},
|
||||||
|
{"en-US", "us-en"},
|
||||||
|
{"ja", "jp-jp"},
|
||||||
|
{"unknown", "wt-wt"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.lang, func(t *testing.T) {
|
||||||
|
got := duckduckgoRegion(tt.lang)
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("duckduckgoRegion(%q) = %q, want %q", tt.lang, got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseDuckDuckGoHTML(t *testing.T) {
|
||||||
|
html := `<a class="result-link" href="https://example.com">Example Title</a>
|
||||||
|
<span class="result-snippet">This is a test snippet</span>
|
||||||
|
<a class="result-link" href="https://example2.com">Second Result</a>
|
||||||
|
<span class="result-snippet">Another snippet here</span>`
|
||||||
|
|
||||||
|
results, err := parseDuckDuckGoHTML(strings.NewReader(html))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(results) != 2 {
|
||||||
|
t.Fatalf("expected 2 results, got %d", len(results))
|
||||||
|
}
|
||||||
|
if results[0].Title != "Example Title" {
|
||||||
|
t.Errorf("expected 'Example Title', got %q", results[0].Title)
|
||||||
|
}
|
||||||
|
if *results[0].URL != "https://example.com" {
|
||||||
|
t.Errorf("expected 'https://example.com', got %q", *results[0].URL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHtmlUnescape(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
input string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{"a&b", "a&b"},
|
||||||
|
{"a<b", "a<b"},
|
||||||
|
{"a'b", "a'b"},
|
||||||
|
{"normal", "normal"},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
got := htmlUnescape(tt.input)
|
||||||
|
if got != tt.expected {
|
||||||
|
t.Errorf("htmlUnescape(%q) = %q, want %q", tt.input, got, tt.expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDuckDuckGoEngine_LiveRequest(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("skipping live request")
|
||||||
|
}
|
||||||
|
|
||||||
|
client := &http.Client{}
|
||||||
|
eng := &DuckDuckGoEngine{client: client}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
resp, err := eng.Search(ctx, contracts.SearchRequest{
|
||||||
|
Query: "golang programming",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("live search failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.Query != "golang programming" {
|
||||||
|
t.Errorf("query mismatch: %q", resp.Query)
|
||||||
|
}
|
||||||
|
t.Logf("duckduckgo returned %d results", len(resp.Results))
|
||||||
|
}
|
||||||
|
|
@ -28,6 +28,9 @@ func NewDefaultPortedEngines(client *http.Client) map[string]Engine {
|
||||||
category: "web-lite",
|
category: "web-lite",
|
||||||
resultsPerPage: 10,
|
resultsPerPage: 10,
|
||||||
},
|
},
|
||||||
|
"duckduckgo": &DuckDuckGoEngine{client: client},
|
||||||
|
"github": &GitHubEngine{client: client},
|
||||||
|
"reddit": &RedditEngine{client: client},
|
||||||
|
"bing": &BingEngine{client: client},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
120
internal/engines/github.go
Normal file
120
internal/engines/github.go
Normal file
|
|
@ -0,0 +1,120 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GitHubEngine searches GitHub repositories and code via the public search API.
|
||||||
|
// No authentication required (rate-limited to 10 requests/min unauthenticated).
|
||||||
|
type GitHubEngine struct {
|
||||||
|
client *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *GitHubEngine) Name() string { return "github" }
|
||||||
|
|
||||||
|
func (e *GitHubEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||||
|
if strings.TrimSpace(req.Query) == "" {
|
||||||
|
return contracts.SearchResponse{Query: req.Query}, nil
|
||||||
|
}
|
||||||
|
if e == nil || e.client == nil {
|
||||||
|
return contracts.SearchResponse{}, errors.New("github engine not initialized")
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf(
|
||||||
|
"https://api.github.com/search/repositories?q=%s&sort=stars&per_page=10&page=%d",
|
||||||
|
url.QueryEscape(req.Query),
|
||||||
|
req.Pageno,
|
||||||
|
)
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
httpReq.Header.Set("User-Agent", "gosearch/0.1")
|
||||||
|
httpReq.Header.Set("Accept", "application/vnd.github.v3+json")
|
||||||
|
|
||||||
|
resp, err := e.client.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
||||||
|
return contracts.SearchResponse{}, fmt.Errorf("github api error: status=%d body=%q", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var data struct {
|
||||||
|
TotalCount int `json:"total_count"`
|
||||||
|
Items []struct {
|
||||||
|
FullName string `json:"full_name"`
|
||||||
|
Description string `json:"description"`
|
||||||
|
HTMLURL string `json:"html_url"`
|
||||||
|
Stars int `json:"stargazers_count"`
|
||||||
|
Language string `json:"language"`
|
||||||
|
UpdatedAt time.Time `json:"updated_at"`
|
||||||
|
Topics []string `json:"topics"`
|
||||||
|
} `json:"items"`
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
results := make([]contracts.MainResult, 0, len(data.Items))
|
||||||
|
for _, item := range data.Items {
|
||||||
|
content := item.Description
|
||||||
|
if item.Language != "" {
|
||||||
|
if content != "" {
|
||||||
|
content += " • "
|
||||||
|
}
|
||||||
|
content += fmt.Sprintf("Language: %s · ⭐ %d", item.Language, item.Stars)
|
||||||
|
}
|
||||||
|
|
||||||
|
title := item.FullName
|
||||||
|
if len(item.Topics) > 0 {
|
||||||
|
title = item.FullName + " [" + strings.Join(item.Topics[:min(3, len(item.Topics))], ", ") + "]"
|
||||||
|
}
|
||||||
|
|
||||||
|
updatedAt := item.UpdatedAt.Format("2006-01-02")
|
||||||
|
if content != "" {
|
||||||
|
content += " · Updated: " + updatedAt
|
||||||
|
}
|
||||||
|
|
||||||
|
urlPtr := item.HTMLURL
|
||||||
|
results = append(results, contracts.MainResult{
|
||||||
|
Template: "default.html",
|
||||||
|
Title: title,
|
||||||
|
Content: content,
|
||||||
|
URL: &urlPtr,
|
||||||
|
Pubdate: strPtr(updatedAt),
|
||||||
|
Engine: "github",
|
||||||
|
Score: float64(item.Stars),
|
||||||
|
Category: "it",
|
||||||
|
Engines: []string{"github"},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: data.TotalCount,
|
||||||
|
Results: results,
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func strPtr(s string) *string { return &s }
|
||||||
72
internal/engines/github_test.go
Normal file
72
internal/engines/github_test.go
Normal file
|
|
@ -0,0 +1,72 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGitHubEngine_EmptyQuery(t *testing.T) {
|
||||||
|
eng := &GitHubEngine{}
|
||||||
|
resp, err := eng.Search(context.Background(), contracts.SearchRequest{Query: ""})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(resp.Results) != 0 {
|
||||||
|
t.Errorf("expected 0 results for empty query, got %d", len(resp.Results))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitHubEngine_Name(t *testing.T) {
|
||||||
|
eng := &GitHubEngine{}
|
||||||
|
if eng.Name() != "github" {
|
||||||
|
t.Errorf("expected 'github', got %q", eng.Name())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitHubEngine_Uninitialized(t *testing.T) {
|
||||||
|
eng := &GitHubEngine{}
|
||||||
|
_, err := eng.Search(context.Background(), contracts.SearchRequest{Query: "test"})
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for uninitialized client")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitHubEngine_LiveRequest(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("skipping live request")
|
||||||
|
}
|
||||||
|
|
||||||
|
client := &http.Client{}
|
||||||
|
eng := &GitHubEngine{client: client}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
resp, err := eng.Search(ctx, contracts.SearchRequest{
|
||||||
|
Query: "golang cli",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("live search failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.NumberOfResults <= 0 {
|
||||||
|
t.Error("expected some results for 'golang cli'")
|
||||||
|
}
|
||||||
|
if len(resp.Results) == 0 {
|
||||||
|
t.Error("expected at least 1 result")
|
||||||
|
}
|
||||||
|
// Verify structure.
|
||||||
|
for _, r := range resp.Results {
|
||||||
|
if r.Engine != "github" {
|
||||||
|
t.Errorf("expected engine 'github', got %q", r.Engine)
|
||||||
|
}
|
||||||
|
if r.URL == nil || *r.URL == "" {
|
||||||
|
t.Error("expected non-empty URL")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.Logf("github returned %d results (total: %d)", len(resp.Results), resp.NumberOfResults)
|
||||||
|
}
|
||||||
58
internal/engines/html_helpers.go
Normal file
58
internal/engines/html_helpers.go
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// extractAttr finds attr="value" or attr='value' in an HTML string.
|
||||||
|
func extractAttr(s, attr string) string {
|
||||||
|
prefix := attr + `="`
|
||||||
|
idx := strings.Index(s, prefix)
|
||||||
|
if idx == -1 {
|
||||||
|
prefix = attr + "='"
|
||||||
|
idx = strings.Index(s, prefix)
|
||||||
|
if idx == -1 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
start := idx + len(prefix)
|
||||||
|
end := strings.Index(s[start:], "\"")
|
||||||
|
if end == -1 {
|
||||||
|
end = strings.Index(s[start:], "'")
|
||||||
|
}
|
||||||
|
if end == -1 {
|
||||||
|
end = len(s[start:])
|
||||||
|
}
|
||||||
|
return s[start : start+end]
|
||||||
|
}
|
||||||
|
|
||||||
|
// stripHTML removes all HTML tags from a string.
|
||||||
|
func stripHTML(s string) string {
|
||||||
|
var result strings.Builder
|
||||||
|
inTag := false
|
||||||
|
for _, r := range s {
|
||||||
|
if r == '<' {
|
||||||
|
inTag = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if r == '>' {
|
||||||
|
inTag = false
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !inTag {
|
||||||
|
result.WriteRune(r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(result.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// htmlUnescape handles basic HTML entities.
|
||||||
|
func htmlUnescape(s string) string {
|
||||||
|
s = strings.ReplaceAll(s, "&", "&")
|
||||||
|
s = strings.ReplaceAll(s, "<", "<")
|
||||||
|
s = strings.ReplaceAll(s, ">", ">")
|
||||||
|
s = strings.ReplaceAll(s, """, "\"")
|
||||||
|
s = strings.ReplaceAll(s, "'", "'")
|
||||||
|
s = strings.ReplaceAll(s, " ", " ")
|
||||||
|
return s
|
||||||
|
}
|
||||||
120
internal/engines/reddit.go
Normal file
120
internal/engines/reddit.go
Normal file
|
|
@ -0,0 +1,120 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RedditEngine searches Reddit posts via the public JSON API.
|
||||||
|
type RedditEngine struct {
|
||||||
|
client *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *RedditEngine) Name() string { return "reddit" }
|
||||||
|
|
||||||
|
func (e *RedditEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||||
|
if strings.TrimSpace(req.Query) == "" {
|
||||||
|
return contracts.SearchResponse{Query: req.Query}, nil
|
||||||
|
}
|
||||||
|
if e == nil || e.client == nil {
|
||||||
|
return contracts.SearchResponse{}, errors.New("reddit engine not initialized")
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf(
|
||||||
|
"https://www.reddit.com/search.json?q=%s&limit=25&sort=relevance&t=all",
|
||||||
|
url.QueryEscape(req.Query),
|
||||||
|
)
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
httpReq.Header.Set("User-Agent", "gosearch/0.1 (compatible; +https://git.ashisgreat.xyz/penal-colony/gosearch)")
|
||||||
|
|
||||||
|
resp, err := e.client.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
||||||
|
return contracts.SearchResponse{}, fmt.Errorf("reddit api error: status=%d body=%q", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var data struct {
|
||||||
|
Data struct {
|
||||||
|
Children []struct {
|
||||||
|
Data struct {
|
||||||
|
Title string `json:"title"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
Permalink string `json:"permalink"`
|
||||||
|
Score int `json:"score"`
|
||||||
|
NumComments int `json:"num_comments"`
|
||||||
|
Subreddit string `json:"subreddit"`
|
||||||
|
CreatedUTC float64 `json:"created_utc"`
|
||||||
|
IsSelf bool `json:"is_self"`
|
||||||
|
Over18 bool `json:"over_18"`
|
||||||
|
} `json:"data"`
|
||||||
|
} `json:"children"`
|
||||||
|
} `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
results := make([]contracts.MainResult, 0, len(data.Data.Children))
|
||||||
|
for _, child := range data.Data.Children {
|
||||||
|
post := child.Data
|
||||||
|
|
||||||
|
// Skip NSFW results unless explicitly allowed.
|
||||||
|
if post.Over18 && req.Safesearch > 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// For self-posts, link to the Reddit thread.
|
||||||
|
linkURL := post.URL
|
||||||
|
if post.IsSelf || strings.HasPrefix(linkURL, "/r/") {
|
||||||
|
linkURL = "https://www.reddit.com" + post.Permalink
|
||||||
|
}
|
||||||
|
|
||||||
|
content := fmt.Sprintf("r/%s · ⬆ %d · 💬 %d", post.Subreddit, post.Score, post.NumComments)
|
||||||
|
if req.Safesearch == 0 {
|
||||||
|
// No additional content for safe mode
|
||||||
|
}
|
||||||
|
|
||||||
|
title := post.Title
|
||||||
|
urlPtr := linkURL
|
||||||
|
|
||||||
|
results = append(results, contracts.MainResult{
|
||||||
|
Template: "default.html",
|
||||||
|
Title: title,
|
||||||
|
Content: content,
|
||||||
|
URL: &urlPtr,
|
||||||
|
Engine: "reddit",
|
||||||
|
Score: float64(post.Score),
|
||||||
|
Category: "general",
|
||||||
|
Engines: []string{"reddit"},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: len(results),
|
||||||
|
Results: results,
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
46
internal/engines/reddit_test.go
Normal file
46
internal/engines/reddit_test.go
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ashie/gosearch/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRedditEngine_EmptyQuery(t *testing.T) {
|
||||||
|
eng := &RedditEngine{}
|
||||||
|
resp, err := eng.Search(context.Background(), contracts.SearchRequest{Query: ""})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(resp.Results) != 0 {
|
||||||
|
t.Errorf("expected 0 results for empty query, got %d", len(resp.Results))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRedditEngine_Name(t *testing.T) {
|
||||||
|
eng := &RedditEngine{}
|
||||||
|
if eng.Name() != "reddit" {
|
||||||
|
t.Errorf("expected 'reddit', got %q", eng.Name())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRedditEngine_Uninitialized(t *testing.T) {
|
||||||
|
eng := &RedditEngine{}
|
||||||
|
_, err := eng.Search(context.Background(), contracts.SearchRequest{Query: "test"})
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for uninitialized client")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRedditEngine_LiveRequest(t *testing.T) {
|
||||||
|
// Reddit's JSON API returns 403 from non-browser contexts.
|
||||||
|
// Skip in CI/sandbox environments.
|
||||||
|
t.Skip("reddit API requires browser-like context; test manually")
|
||||||
|
_ = context.Background
|
||||||
|
_ = http.Client{}
|
||||||
|
_ = contracts.SearchRequest{}
|
||||||
|
_ = time.Second
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue