diff --git a/internal/engines/brave.go b/internal/engines/brave.go
new file mode 100644
index 0000000..cb9313d
--- /dev/null
+++ b/internal/engines/brave.go
@@ -0,0 +1,172 @@
+package engines
+
+import (
+ "context"
+ "fmt"
+ "io"
+ "net/http"
+ "net/url"
+ "regexp"
+ "strings"
+
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
+)
+
+type BraveEngine struct {
+ client *http.Client
+}
+
+func (e *BraveEngine) Name() string { return "brave" }
+
+func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
+ if strings.TrimSpace(req.Query) == "" {
+ return contracts.SearchResponse{Query: req.Query}, nil
+ }
+
+ start := (req.Pageno - 1) * 20
+ u := fmt.Sprintf(
+ "https://search.brave.com/search?q=%s&offset=%d&source=web",
+ url.QueryEscape(req.Query),
+ start,
+ )
+
+ httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
+ if err != nil {
+ return contracts.SearchResponse{}, err
+ }
+ httpReq.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36")
+ httpReq.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
+ httpReq.Header.Set("Accept-Language", "en-US,en;q=0.9")
+
+ resp, err := e.client.Do(httpReq)
+ if err != nil {
+ return contracts.SearchResponse{}, err
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
+ return contracts.SearchResponse{}, fmt.Errorf("brave error: status=%d body=%q", resp.StatusCode, string(body))
+ }
+
+ body, err := io.ReadAll(io.LimitReader(resp.Body, 128*1024))
+ if err != nil {
+ return contracts.SearchResponse{}, err
+ }
+
+ results := parseBraveResults(string(body))
+ return contracts.SearchResponse{
+ Query: req.Query,
+ NumberOfResults: len(results),
+ Results: results,
+ Answers: []map[string]any{},
+ Corrections: []string{},
+ Infoboxes: []map[string]any{},
+ Suggestions: extractBraveSuggestions(string(body)),
+ UnresponsiveEngines: [][2]string{},
+ }, nil
+}
+
+func parseBraveResults(body string) []contracts.MainResult {
+ var results []contracts.MainResult
+
+ // Brave wraps each result in divs with data-type="web" or data-type="news".
+ // Pattern:
...
TITLE ...
SNIPPET
+ webPattern := regexp.MustCompile(`(?s)
]+data-type="web"[^>]*>(.*?)
\s*
]+data-type="(web|news)"`)
+ matches := webPattern.FindAllStringSubmatch(body, -1)
+
+ seen := map[string]bool{}
+
+ for _, match := range matches {
+ if len(match) < 2 {
+ continue
+ }
+ block := match[1]
+
+ // Extract title and URL from the result-title link.
+ titlePattern := regexp.MustCompile(`
]+class="result-title"[^>]+href="([^"]+)"[^>]*>([^<]+)`)
+ titleMatch := titlePattern.FindStringSubmatch(block)
+ if titleMatch == nil {
+ continue
+ }
+ href := titleMatch[1]
+ title := stripTags(titleMatch[2])
+
+ if href == "" || !strings.HasPrefix(href, "http") {
+ continue
+ }
+ if seen[href] {
+ continue
+ }
+ seen[href] = true
+
+ // Extract snippet.
+ snippet := extractBraveSnippet(block)
+
+ // Extract favicon URL.
+ favicon := extractBraveFavicon(block)
+
+ urlPtr := href
+ results = append(results, contracts.MainResult{
+ Title: title,
+ URL: &urlPtr,
+ Content: snippet,
+ Thumbnail: favicon,
+ Engine: "brave",
+ Score: 1.0,
+ Category: "general",
+ Engines: []string{"brave"},
+ })
+ }
+
+ return results
+}
+
+func extractBraveSnippet(block string) string {
+ // Try various snippet selectors Brave uses.
+ patterns := []string{
+ `
]+class="snippet"[^>]*>(.*?)
`,
+ `
]+class="[^"]*description[^"]*"[^>]*>(.*?)
`,
+ `
]+class="[^"]*snippet[^"]*"[^>]*>(.*?)`,
+ }
+
+ for _, pat := range patterns {
+ re := regexp.MustCompile(`(?s)` + pat)
+ m := re.FindStringSubmatch(block)
+ if len(m) >= 2 {
+ text := stripTags(m[1])
+ if text != "" {
+ return strings.TrimSpace(text)
+ }
+ }
+ }
+ return ""
+}
+
+func extractBraveFavicon(block string) string {
+ imgPattern := regexp.MustCompile(`
![]()
]+class="[^"]*favicon[^"]*"[^>]+src="([^"]+)"`)
+ m := imgPattern.FindStringSubmatch(block)
+ if len(m) >= 2 {
+ return m[1]
+ }
+ return ""
+}
+
+func extractBraveSuggestions(body string) []string {
+ var suggestions []string
+ // Brave suggestions appear in a dropdown or related searches section.
+ suggestPattern := regexp.MustCompile(`(?s)
]+class="[^"]*suggestion[^"]*"[^>]*>.*?]*>([^<]+)`)
+ matches := suggestPattern.FindAllStringSubmatch(body, -1)
+ seen := map[string]bool{}
+ for _, m := range matches {
+ if len(m) < 2 {
+ continue
+ }
+ s := strings.TrimSpace(stripTags(m[1]))
+ if s != "" && !seen[s] {
+ seen[s] = true
+ suggestions = append(suggestions, s)
+ }
+ }
+ return suggestions
+}
diff --git a/internal/engines/braveapi.go b/internal/engines/braveapi.go
index 8977cb2..1ae6220 100644
--- a/internal/engines/braveapi.go
+++ b/internal/engines/braveapi.go
@@ -33,16 +33,16 @@ import (
// BraveEngine implements the Brave Web Search API.
// Required: BRAVE_API_KEY env var or config.
// Optional: BRAVE_ACCESS_TOKEN to gate requests.
-type BraveEngine struct {
+type BraveAPIEngine struct {
client *http.Client
apiKey string
accessGateToken string
resultsPerPage int
}
-func (e *BraveEngine) Name() string { return "braveapi" }
+func (e *BraveAPIEngine) Name() string { return "braveapi" }
-func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
+func (e *BraveAPIEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("brave engine not initialized")
}
diff --git a/internal/engines/factory.go b/internal/engines/factory.go
index 528dcb7..68f66eb 100644
--- a/internal/engines/factory.go
+++ b/internal/engines/factory.go
@@ -51,12 +51,13 @@ func NewDefaultPortedEngines(client *http.Client, cfg *config.Config) map[string
"wikipedia": &WikipediaEngine{client: client},
"arxiv": &ArxivEngine{client: client},
"crossref": &CrossrefEngine{client: client},
- "braveapi": &BraveEngine{
+ "braveapi": &BraveAPIEngine{
client: client,
apiKey: braveAPIKey,
accessGateToken: braveAccessToken,
resultsPerPage: 20,
},
+ "brave": &BraveEngine{client: client},
"qwant": &QwantEngine{
client: client,
category: "web-lite",
diff --git a/internal/engines/planner.go b/internal/engines/planner.go
index 9616a4b..270885b 100644
--- a/internal/engines/planner.go
+++ b/internal/engines/planner.go
@@ -23,7 +23,7 @@ import (
"github.com/metamorphosis-dev/kafka/internal/contracts"
)
-var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"}
+var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "brave", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"}
type Planner struct {
PortedSet map[string]bool
@@ -122,7 +122,7 @@ func inferFromCategories(categories []string) []string {
out = append(out, e)
}
// stable order
- order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "google": 5, "arxiv": 6, "crossref": 7, "github": 8, "reddit": 9, "youtube": 10}
+ order := map[string]int{"wikipedia": 0, "braveapi": 1, "brave": 2, "qwant": 3, "duckduckgo": 4, "bing": 5, "google": 6, "arxiv": 7, "crossref": 8, "github": 9, "reddit": 10, "youtube": 11}
sortByOrder(out, order)
return out
}