feat: add DuckDuckGo, GitHub, Reddit, and Bing engines
- DuckDuckGo: scrapes Lite HTML endpoint for results - Language-aware region mapping (de→de-de, ja→jp-jp, etc.) - HTML parser extracts result links and snippets from DDG Lite markup - Shared html_helpers.go with extractAttr, stripHTML, htmlUnescape - GitHub: uses public Search API (repos, sorted by stars) - No auth required (10 req/min unauthenticated) - Shows stars, language, topics, last updated date - Paginated via GitHub's page parameter - Reddit: uses public JSON search API - Respects safesearch (skips over_18 posts) - Shows subreddit, score, comment count - Links self-posts to the thread URL - Bing: scrapes web search HTML (b_algo containers) - Extracts titles, URLs, and snippets from Bing's result markup - Handles Bing's tracking URL encoding - Updated factory, config defaults, and config.example.toml - Full test suite: unit tests for all engines, HTML parsing tests, region mapping tests, live request tests (skipped in short mode) 9 engines total: wikipedia, arxiv, crossref, braveapi, qwant, duckduckgo, github, reddit, bing
This commit is contained in:
parent
28b61ff251
commit
df8fe9474b
14 changed files with 1030 additions and 5 deletions
91
internal/engines/bing_test.go
Normal file
91
internal/engines/bing_test.go
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
package engines
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ashie/gosearch/internal/contracts"
|
||||
)
|
||||
|
||||
func TestBingEngine_EmptyQuery(t *testing.T) {
|
||||
eng := &BingEngine{}
|
||||
resp, err := eng.Search(context.Background(), contracts.SearchRequest{Query: ""})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if len(resp.Results) != 0 {
|
||||
t.Errorf("expected 0 results for empty query, got %d", len(resp.Results))
|
||||
}
|
||||
}
|
||||
|
||||
func TestBingEngine_Name(t *testing.T) {
|
||||
eng := &BingEngine{}
|
||||
if eng.Name() != "bing" {
|
||||
t.Errorf("expected 'bing', got %q", eng.Name())
|
||||
}
|
||||
}
|
||||
|
||||
func TestBingEngine_Uninitialized(t *testing.T) {
|
||||
eng := &BingEngine{}
|
||||
_, err := eng.Search(context.Background(), contracts.SearchRequest{Query: "test"})
|
||||
if err == nil {
|
||||
t.Error("expected error for uninitialized client")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseBingHTML(t *testing.T) {
|
||||
html := `<li class="b_algo">
|
||||
<h2><a href="https://example.com">Example Title</a></h2>
|
||||
<div class="b_caption"><p>This is a test snippet from Bing.</p></div>
|
||||
</li>
|
||||
<li class="b_algo">
|
||||
<h2><a href="https://example2.com">Second Result</a></h2>
|
||||
<div class="b_caption"><p>Another snippet</p></div>
|
||||
</li>`
|
||||
|
||||
results, err := parseBingHTML(strings.NewReader(html))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if len(results) != 2 {
|
||||
t.Fatalf("expected 2 results, got %d", len(results))
|
||||
}
|
||||
if results[0].Title != "Example Title" {
|
||||
t.Errorf("expected 'Example Title', got %q", results[0].Title)
|
||||
}
|
||||
if *results[0].URL != "https://example.com" {
|
||||
t.Errorf("expected 'https://example.com', got %q", *results[0].URL)
|
||||
}
|
||||
if results[0].Content != "This is a test snippet from Bing." {
|
||||
t.Errorf("unexpected content: %q", results[0].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBingEngine_LiveRequest(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping live request")
|
||||
}
|
||||
|
||||
client := &http.Client{}
|
||||
eng := &BingEngine{client: client}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||
defer cancel()
|
||||
|
||||
resp, err := eng.Search(ctx, contracts.SearchRequest{
|
||||
Query: "golang programming language",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("live search failed: %v", err)
|
||||
}
|
||||
|
||||
t.Logf("bing returned %d results", len(resp.Results))
|
||||
for _, r := range resp.Results {
|
||||
if r.Engine != "bing" {
|
||||
t.Errorf("expected engine 'bing', got %q", r.Engine)
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue