kafka/internal/engines/duckduckgo_test.go
Franz Kafka a8ab29b23a fix: fix DDG and Bing parsers — verified with live tests
DuckDuckGo:
- Fixed parser to handle single-quoted class attributes (class='result-link')
- Decode DDG tracking URLs (uddg= parameter) to extract real URLs
- Match snippet extraction to actual DDG Lite HTML structure (</td> terminator)

Bing:
- Switched from HTML scraping (blocked by JS detection) to RSS endpoint
  (?format=rss) which returns parseable XML
- Added JSON API response parsing as fallback
- Returns graceful unresponsive_engines entry when blocked

Live test results:
- DuckDuckGo: 9 results 
- GitHub: 10 results (14,768 total) 
- Bing: 10 results via RSS 
- Reddit: skipped (403 from sandbox, needs browser-like context)
2026-03-21 16:57:02 +00:00

134 lines
3.2 KiB
Go

package engines
import (
"context"
"net/http"
"strings"
"testing"
"time"
"github.com/ashie/gosearch/internal/contracts"
)
func TestDuckDuckGoEngine_EmptyQuery(t *testing.T) {
eng := &DuckDuckGoEngine{}
req := contracts.SearchRequest{Query: ""}
resp, err := eng.Search(context.Background(), req)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(resp.Results) != 0 {
t.Errorf("expected 0 results for empty query, got %d", len(resp.Results))
}
}
func TestDuckDuckGoEngine_NilClient(t *testing.T) {
eng := (*DuckDuckGoEngine)(nil)
_, err := eng.Search(context.Background(), contracts.SearchRequest{Query: "test"})
if err == nil {
t.Error("expected error for nil engine")
}
}
func TestDuckDuckGoEngine_UninitializedClient(t *testing.T) {
eng := &DuckDuckGoEngine{}
_, err := eng.Search(context.Background(), contracts.SearchRequest{Query: "test"})
if err == nil {
t.Error("expected error for uninitialized client")
}
}
func TestDuckDuckGoEngine_Name(t *testing.T) {
eng := &DuckDuckGoEngine{}
if eng.Name() != "duckduckgo" {
t.Errorf("expected 'duckduckgo', got %q", eng.Name())
}
}
func TestDuckDuckGoRegion(t *testing.T) {
tests := []struct {
lang string
want string
}{
{"", "us-en"},
{"auto", "us-en"},
{"en", "us-en"},
{"de", "de-de"},
{"fr", "fr-fr"},
{"en-US", "us-en"},
{"ja", "jp-jp"},
{"unknown", "wt-wt"},
}
for _, tt := range tests {
t.Run(tt.lang, func(t *testing.T) {
got := duckduckgoRegion(tt.lang)
if got != tt.want {
t.Errorf("duckduckgoRegion(%q) = %q, want %q", tt.lang, got, tt.want)
}
})
}
}
func TestParseDuckDuckGoHTML(t *testing.T) {
html := `<a class='result-link' href="https://example.com">Example Title</a>
<td class='result-snippet'>This is a test snippet</td>
<a class='result-link' href="https://example2.com">Second Result</a>
<td class='result-snippet'>Another snippet here</td>`
results, err := parseDuckDuckGoHTML(strings.NewReader(html))
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(results) != 2 {
t.Fatalf("expected 2 results, got %d", len(results))
}
if results[0].Title != "Example Title" {
t.Errorf("expected 'Example Title', got %q", results[0].Title)
}
if *results[0].URL != "https://example.com" {
t.Errorf("expected 'https://example.com', got %q", *results[0].URL)
}
}
func TestHtmlUnescape(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"a&amp;b", "a&b"},
{"a&lt;b", "a<b"},
{"a&#39;b", "a'b"},
{"normal", "normal"},
}
for _, tt := range tests {
got := htmlUnescape(tt.input)
if got != tt.expected {
t.Errorf("htmlUnescape(%q) = %q, want %q", tt.input, got, tt.expected)
}
}
}
func TestDuckDuckGoEngine_LiveRequest(t *testing.T) {
if testing.Short() {
t.Skip("skipping live request")
}
client := &http.Client{}
eng := &DuckDuckGoEngine{client: client}
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
resp, err := eng.Search(ctx, contracts.SearchRequest{
Query: "golang programming",
})
if err != nil {
t.Fatalf("live search failed: %v", err)
}
if resp.Query != "golang programming" {
t.Errorf("query mismatch: %q", resp.Query)
}
t.Logf("duckduckgo returned %d results", len(resp.Results))
}