SearXNG approach: use Google Search Appliance (GSA) User-Agent pool — these are whitelisted enterprise identifiers Google trusts. Key techniques: - GSA User-Agent (iPhone OS + GSA/ version) instead of Chrome desktop - CONSENT=YES+ cookie to bypass EU consent wall - Parse /url?q= redirector URLs (unquote + strip &sa= params) - div.MjjYud class for result containers (SearXNG selector) - data-sncf divs for snippets - detect sorry.google.com blocks - Suggestions from ouy7Mc class cards
37 lines
1 KiB
Go
37 lines
1 KiB
Go
package engines
|
|
|
|
import (
|
|
"net/http"
|
|
"os"
|
|
"time"
|
|
)
|
|
|
|
// NewDefaultPortedEngines returns the starter set of Go-native engines.
|
|
// The service can swap/extend this registry later as more engines are ported.
|
|
func NewDefaultPortedEngines(client *http.Client) map[string]Engine {
|
|
if client == nil {
|
|
client = &http.Client{Timeout: 10 * time.Second}
|
|
}
|
|
|
|
return map[string]Engine{
|
|
"wikipedia": &WikipediaEngine{client: client},
|
|
"arxiv": &ArxivEngine{client: client},
|
|
"crossref": &CrossrefEngine{client: client},
|
|
"braveapi": &BraveEngine{
|
|
client: client,
|
|
apiKey: os.Getenv("BRAVE_API_KEY"),
|
|
accessGateToken: os.Getenv("BRAVE_ACCESS_TOKEN"),
|
|
resultsPerPage: 20,
|
|
},
|
|
"qwant": &QwantEngine{
|
|
client: client,
|
|
category: "web-lite",
|
|
resultsPerPage: 10,
|
|
},
|
|
"duckduckgo": &DuckDuckGoEngine{client: client},
|
|
"github": &GitHubEngine{client: client},
|
|
"reddit": &RedditEngine{client: client},
|
|
"bing": &BingEngine{client: client},
|
|
"google": &GoogleEngine{client: client},
|
|
}
|
|
}
|