- DuckDuckGo: scrapes Lite HTML endpoint for results - Language-aware region mapping (de→de-de, ja→jp-jp, etc.) - HTML parser extracts result links and snippets from DDG Lite markup - Shared html_helpers.go with extractAttr, stripHTML, htmlUnescape - GitHub: uses public Search API (repos, sorted by stars) - No auth required (10 req/min unauthenticated) - Shows stars, language, topics, last updated date - Paginated via GitHub's page parameter - Reddit: uses public JSON search API - Respects safesearch (skips over_18 posts) - Shows subreddit, score, comment count - Links self-posts to the thread URL - Bing: scrapes web search HTML (b_algo containers) - Extracts titles, URLs, and snippets from Bing's result markup - Handles Bing's tracking URL encoding - Updated factory, config defaults, and config.example.toml - Full test suite: unit tests for all engines, HTML parsing tests, region mapping tests, live request tests (skipped in short mode) 9 engines total: wikipedia, arxiv, crossref, braveapi, qwant, duckduckgo, github, reddit, bing
36 lines
1,012 B
Go
36 lines
1,012 B
Go
package engines
|
|
|
|
import (
|
|
"net/http"
|
|
"os"
|
|
"time"
|
|
)
|
|
|
|
// NewDefaultPortedEngines returns the starter set of Go-native engines.
|
|
// The service can swap/extend this registry later as more engines are ported.
|
|
func NewDefaultPortedEngines(client *http.Client) map[string]Engine {
|
|
if client == nil {
|
|
client = &http.Client{Timeout: 10 * time.Second}
|
|
}
|
|
|
|
return map[string]Engine{
|
|
"wikipedia": &WikipediaEngine{client: client},
|
|
"arxiv": &ArxivEngine{client: client},
|
|
"crossref": &CrossrefEngine{client: client},
|
|
"braveapi": &BraveEngine{
|
|
client: client,
|
|
apiKey: os.Getenv("BRAVE_API_KEY"),
|
|
accessGateToken: os.Getenv("BRAVE_ACCESS_TOKEN"),
|
|
resultsPerPage: 20,
|
|
},
|
|
"qwant": &QwantEngine{
|
|
client: client,
|
|
category: "web-lite",
|
|
resultsPerPage: 10,
|
|
},
|
|
"duckduckgo": &DuckDuckGoEngine{client: client},
|
|
"github": &GitHubEngine{client: client},
|
|
"reddit": &RedditEngine{client: client},
|
|
"bing": &BingEngine{client: client},
|
|
}
|
|
}
|