feat: add Brave web search scraper engine
New brave.go: scrapes https://search.brave.com directly. Extracts title, URL, snippet, and favicon from Brave's HTML. No API key required. Rename existing BraveAPIEngine (was BraveEngine) to avoid collision with the new scraper. API engine stays as 'braveapi', scraper as 'brave'.
This commit is contained in:
parent
994d27ff7f
commit
2d22a8cdbb
4 changed files with 179 additions and 6 deletions
|
|
@ -23,7 +23,7 @@ import (
|
|||
"github.com/metamorphosis-dev/kafka/internal/contracts"
|
||||
)
|
||||
|
||||
var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"}
|
||||
var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "brave", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"}
|
||||
|
||||
type Planner struct {
|
||||
PortedSet map[string]bool
|
||||
|
|
@ -122,7 +122,7 @@ func inferFromCategories(categories []string) []string {
|
|||
out = append(out, e)
|
||||
}
|
||||
// stable order
|
||||
order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "google": 5, "arxiv": 6, "crossref": 7, "github": 8, "reddit": 9, "youtube": 10}
|
||||
order := map[string]int{"wikipedia": 0, "braveapi": 1, "brave": 2, "qwant": 3, "duckduckgo": 4, "bing": 5, "google": 6, "arxiv": 7, "crossref": 8, "github": 9, "reddit": 10, "youtube": 11}
|
||||
sortByOrder(out, order)
|
||||
return out
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue