diff --git a/internal/engines/brave.go b/internal/engines/brave.go new file mode 100644 index 0000000..cb9313d --- /dev/null +++ b/internal/engines/brave.go @@ -0,0 +1,172 @@ +package engines + +import ( + "context" + "fmt" + "io" + "net/http" + "net/url" + "regexp" + "strings" + + "github.com/metamorphosis-dev/kafka/internal/contracts" +) + +type BraveEngine struct { + client *http.Client +} + +func (e *BraveEngine) Name() string { return "brave" } + +func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { + if strings.TrimSpace(req.Query) == "" { + return contracts.SearchResponse{Query: req.Query}, nil + } + + start := (req.Pageno - 1) * 20 + u := fmt.Sprintf( + "https://search.brave.com/search?q=%s&offset=%d&source=web", + url.QueryEscape(req.Query), + start, + ) + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) + if err != nil { + return contracts.SearchResponse{}, err + } + httpReq.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36") + httpReq.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") + httpReq.Header.Set("Accept-Language", "en-US,en;q=0.9") + + resp, err := e.client.Do(httpReq) + if err != nil { + return contracts.SearchResponse{}, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + return contracts.SearchResponse{}, fmt.Errorf("brave error: status=%d body=%q", resp.StatusCode, string(body)) + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, 128*1024)) + if err != nil { + return contracts.SearchResponse{}, err + } + + results := parseBraveResults(string(body)) + return contracts.SearchResponse{ + Query: req.Query, + NumberOfResults: len(results), + Results: results, + Answers: []map[string]any{}, + Corrections: []string{}, + Infoboxes: []map[string]any{}, + Suggestions: extractBraveSuggestions(string(body)), + UnresponsiveEngines: [][2]string{}, + }, nil +} + +func parseBraveResults(body string) []contracts.MainResult { + var results []contracts.MainResult + + // Brave wraps each result in divs with data-type="web" or data-type="news". + // Pattern:
... TITLE ...
SNIPPET
+ webPattern := regexp.MustCompile(`(?s)]+data-type="web"[^>]*>(.*?)
\s*]+data-type="(web|news)"`) + matches := webPattern.FindAllStringSubmatch(body, -1) + + seen := map[string]bool{} + + for _, match := range matches { + if len(match) < 2 { + continue + } + block := match[1] + + // Extract title and URL from the result-title link. + titlePattern := regexp.MustCompile(`]+class="result-title"[^>]+href="([^"]+)"[^>]*>([^<]+)`) + titleMatch := titlePattern.FindStringSubmatch(block) + if titleMatch == nil { + continue + } + href := titleMatch[1] + title := stripTags(titleMatch[2]) + + if href == "" || !strings.HasPrefix(href, "http") { + continue + } + if seen[href] { + continue + } + seen[href] = true + + // Extract snippet. + snippet := extractBraveSnippet(block) + + // Extract favicon URL. + favicon := extractBraveFavicon(block) + + urlPtr := href + results = append(results, contracts.MainResult{ + Title: title, + URL: &urlPtr, + Content: snippet, + Thumbnail: favicon, + Engine: "brave", + Score: 1.0, + Category: "general", + Engines: []string{"brave"}, + }) + } + + return results +} + +func extractBraveSnippet(block string) string { + // Try various snippet selectors Brave uses. + patterns := []string{ + `]+class="snippet"[^>]*>(.*?)`, + `]+class="[^"]*description[^"]*"[^>]*>(.*?)

`, + `]+class="[^"]*snippet[^"]*"[^>]*>(.*?)`, + } + + for _, pat := range patterns { + re := regexp.MustCompile(`(?s)` + pat) + m := re.FindStringSubmatch(block) + if len(m) >= 2 { + text := stripTags(m[1]) + if text != "" { + return strings.TrimSpace(text) + } + } + } + return "" +} + +func extractBraveFavicon(block string) string { + imgPattern := regexp.MustCompile(`]+class="[^"]*favicon[^"]*"[^>]+src="([^"]+)"`) + m := imgPattern.FindStringSubmatch(block) + if len(m) >= 2 { + return m[1] + } + return "" +} + +func extractBraveSuggestions(body string) []string { + var suggestions []string + // Brave suggestions appear in a dropdown or related searches section. + suggestPattern := regexp.MustCompile(`(?s)]+class="[^"]*suggestion[^"]*"[^>]*>.*?]*>([^<]+)`) + matches := suggestPattern.FindAllStringSubmatch(body, -1) + seen := map[string]bool{} + for _, m := range matches { + if len(m) < 2 { + continue + } + s := strings.TrimSpace(stripTags(m[1])) + if s != "" && !seen[s] { + seen[s] = true + suggestions = append(suggestions, s) + } + } + return suggestions +} diff --git a/internal/engines/braveapi.go b/internal/engines/braveapi.go index 8977cb2..1ae6220 100644 --- a/internal/engines/braveapi.go +++ b/internal/engines/braveapi.go @@ -33,16 +33,16 @@ import ( // BraveEngine implements the Brave Web Search API. // Required: BRAVE_API_KEY env var or config. // Optional: BRAVE_ACCESS_TOKEN to gate requests. -type BraveEngine struct { +type BraveAPIEngine struct { client *http.Client apiKey string accessGateToken string resultsPerPage int } -func (e *BraveEngine) Name() string { return "braveapi" } +func (e *BraveAPIEngine) Name() string { return "braveapi" } -func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { +func (e *BraveAPIEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { if e == nil || e.client == nil { return contracts.SearchResponse{}, errors.New("brave engine not initialized") } diff --git a/internal/engines/factory.go b/internal/engines/factory.go index 528dcb7..68f66eb 100644 --- a/internal/engines/factory.go +++ b/internal/engines/factory.go @@ -51,12 +51,13 @@ func NewDefaultPortedEngines(client *http.Client, cfg *config.Config) map[string "wikipedia": &WikipediaEngine{client: client}, "arxiv": &ArxivEngine{client: client}, "crossref": &CrossrefEngine{client: client}, - "braveapi": &BraveEngine{ + "braveapi": &BraveAPIEngine{ client: client, apiKey: braveAPIKey, accessGateToken: braveAccessToken, resultsPerPage: 20, }, + "brave": &BraveEngine{client: client}, "qwant": &QwantEngine{ client: client, category: "web-lite", diff --git a/internal/engines/planner.go b/internal/engines/planner.go index 9616a4b..270885b 100644 --- a/internal/engines/planner.go +++ b/internal/engines/planner.go @@ -23,7 +23,7 @@ import ( "github.com/metamorphosis-dev/kafka/internal/contracts" ) -var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"} +var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "brave", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"} type Planner struct { PortedSet map[string]bool @@ -122,7 +122,7 @@ func inferFromCategories(categories []string) []string { out = append(out, e) } // stable order - order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "google": 5, "arxiv": 6, "crossref": 7, "github": 8, "reddit": 9, "youtube": 10} + order := map[string]int{"wikipedia": 0, "braveapi": 1, "brave": 2, "qwant": 3, "duckduckgo": 4, "bing": 5, "google": 6, "arxiv": 7, "crossref": 8, "github": 9, "reddit": 10, "youtube": 11} sortByOrder(out, order) return out }