feat: add Brave web search scraper engine

New brave.go: scrapes https://search.brave.com directly.
Extracts title, URL, snippet, and favicon from Brave's HTML.
No API key required.

Rename existing BraveAPIEngine (was BraveEngine) to avoid collision
with the new scraper. API engine stays as 'braveapi', scraper as 'brave'.
This commit is contained in:
Franz Kafka 2026-03-22 15:12:21 +00:00
parent 994d27ff7f
commit 2d22a8cdbb
4 changed files with 179 additions and 6 deletions

View file

@ -23,7 +23,7 @@ import (
"github.com/metamorphosis-dev/kafka/internal/contracts"
)
var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"}
var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "brave", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"}
type Planner struct {
PortedSet map[string]bool
@ -122,7 +122,7 @@ func inferFromCategories(categories []string) []string {
out = append(out, e)
}
// stable order
order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "google": 5, "arxiv": 6, "crossref": 7, "github": 8, "reddit": 9, "youtube": 10}
order := map[string]int{"wikipedia": 0, "braveapi": 1, "brave": 2, "qwant": 3, "duckduckgo": 4, "bing": 5, "google": 6, "arxiv": 7, "crossref": 8, "github": 9, "reddit": 10, "youtube": 11}
sortByOrder(out, order)
return out
}