- DuckDuckGo: scrapes Lite HTML endpoint for results - Language-aware region mapping (de→de-de, ja→jp-jp, etc.) - HTML parser extracts result links and snippets from DDG Lite markup - Shared html_helpers.go with extractAttr, stripHTML, htmlUnescape - GitHub: uses public Search API (repos, sorted by stars) - No auth required (10 req/min unauthenticated) - Shows stars, language, topics, last updated date - Paginated via GitHub's page parameter - Reddit: uses public JSON search API - Respects safesearch (skips over_18 posts) - Shows subreddit, score, comment count - Links self-posts to the thread URL - Bing: scrapes web search HTML (b_algo containers) - Extracts titles, URLs, and snippets from Bing's result markup - Handles Bing's tracking URL encoding - Updated factory, config defaults, and config.example.toml - Full test suite: unit tests for all engines, HTML parsing tests, region mapping tests, live request tests (skipped in short mode) 9 engines total: wikipedia, arxiv, crossref, braveapi, qwant, duckduckgo, github, reddit, bing
134 lines
3.2 KiB
Go
134 lines
3.2 KiB
Go
package engines
|
|
|
|
import (
|
|
"context"
|
|
"net/http"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/ashie/gosearch/internal/contracts"
|
|
)
|
|
|
|
func TestDuckDuckGoEngine_EmptyQuery(t *testing.T) {
|
|
eng := &DuckDuckGoEngine{}
|
|
req := contracts.SearchRequest{Query: ""}
|
|
resp, err := eng.Search(context.Background(), req)
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
if len(resp.Results) != 0 {
|
|
t.Errorf("expected 0 results for empty query, got %d", len(resp.Results))
|
|
}
|
|
}
|
|
|
|
func TestDuckDuckGoEngine_NilClient(t *testing.T) {
|
|
eng := (*DuckDuckGoEngine)(nil)
|
|
_, err := eng.Search(context.Background(), contracts.SearchRequest{Query: "test"})
|
|
if err == nil {
|
|
t.Error("expected error for nil engine")
|
|
}
|
|
}
|
|
|
|
func TestDuckDuckGoEngine_UninitializedClient(t *testing.T) {
|
|
eng := &DuckDuckGoEngine{}
|
|
_, err := eng.Search(context.Background(), contracts.SearchRequest{Query: "test"})
|
|
if err == nil {
|
|
t.Error("expected error for uninitialized client")
|
|
}
|
|
}
|
|
|
|
func TestDuckDuckGoEngine_Name(t *testing.T) {
|
|
eng := &DuckDuckGoEngine{}
|
|
if eng.Name() != "duckduckgo" {
|
|
t.Errorf("expected 'duckduckgo', got %q", eng.Name())
|
|
}
|
|
}
|
|
|
|
func TestDuckDuckGoRegion(t *testing.T) {
|
|
tests := []struct {
|
|
lang string
|
|
want string
|
|
}{
|
|
{"", "us-en"},
|
|
{"auto", "us-en"},
|
|
{"en", "us-en"},
|
|
{"de", "de-de"},
|
|
{"fr", "fr-fr"},
|
|
{"en-US", "us-en"},
|
|
{"ja", "jp-jp"},
|
|
{"unknown", "wt-wt"},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.lang, func(t *testing.T) {
|
|
got := duckduckgoRegion(tt.lang)
|
|
if got != tt.want {
|
|
t.Errorf("duckduckgoRegion(%q) = %q, want %q", tt.lang, got, tt.want)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestParseDuckDuckGoHTML(t *testing.T) {
|
|
html := `<a class="result-link" href="https://example.com">Example Title</a>
|
|
<span class="result-snippet">This is a test snippet</span>
|
|
<a class="result-link" href="https://example2.com">Second Result</a>
|
|
<span class="result-snippet">Another snippet here</span>`
|
|
|
|
results, err := parseDuckDuckGoHTML(strings.NewReader(html))
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
if len(results) != 2 {
|
|
t.Fatalf("expected 2 results, got %d", len(results))
|
|
}
|
|
if results[0].Title != "Example Title" {
|
|
t.Errorf("expected 'Example Title', got %q", results[0].Title)
|
|
}
|
|
if *results[0].URL != "https://example.com" {
|
|
t.Errorf("expected 'https://example.com', got %q", *results[0].URL)
|
|
}
|
|
}
|
|
|
|
func TestHtmlUnescape(t *testing.T) {
|
|
tests := []struct {
|
|
input string
|
|
expected string
|
|
}{
|
|
{"a&b", "a&b"},
|
|
{"a<b", "a<b"},
|
|
{"a'b", "a'b"},
|
|
{"normal", "normal"},
|
|
}
|
|
for _, tt := range tests {
|
|
got := htmlUnescape(tt.input)
|
|
if got != tt.expected {
|
|
t.Errorf("htmlUnescape(%q) = %q, want %q", tt.input, got, tt.expected)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestDuckDuckGoEngine_LiveRequest(t *testing.T) {
|
|
if testing.Short() {
|
|
t.Skip("skipping live request")
|
|
}
|
|
|
|
client := &http.Client{}
|
|
eng := &DuckDuckGoEngine{client: client}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
|
defer cancel()
|
|
|
|
resp, err := eng.Search(ctx, contracts.SearchRequest{
|
|
Query: "golang programming",
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("live search failed: %v", err)
|
|
}
|
|
|
|
if resp.Query != "golang programming" {
|
|
t.Errorf("query mismatch: %q", resp.Query)
|
|
}
|
|
t.Logf("duckduckgo returned %d results", len(resp.Results))
|
|
}
|