Full project rename from kafka to samsa (after Gregor Samsa, who woke one morning from uneasy dreams to find himself transformed). - Module: github.com/metamorphosis-dev/kafka → samsa - Binary: cmd/kafka/ → cmd/samsa/ - CSS: kafka.css → samsa.css - UI: all 'kafka' product names, titles, localStorage keys → samsa - localStorage keys: kafka-theme → samsa-theme, kafka-engines → samsa-engines - OpenSearch: ShortName, LongName, description, URLs updated - AGPL headers: 'kafka' → 'samsa' - Docs, configs, examples updated - Cache key prefix: kafka: → samsa:
172 lines
4.6 KiB
Go
172 lines
4.6 KiB
Go
package engines
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/metamorphosis-dev/samsa/internal/contracts"
|
|
)
|
|
|
|
type BraveEngine struct {
|
|
client *http.Client
|
|
}
|
|
|
|
func (e *BraveEngine) Name() string { return "brave" }
|
|
|
|
func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
|
if strings.TrimSpace(req.Query) == "" {
|
|
return contracts.SearchResponse{Query: req.Query}, nil
|
|
}
|
|
|
|
start := (req.Pageno - 1) * 20
|
|
u := fmt.Sprintf(
|
|
"https://search.brave.com/search?q=%s&offset=%d&source=web",
|
|
url.QueryEscape(req.Query),
|
|
start,
|
|
)
|
|
|
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
httpReq.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36")
|
|
httpReq.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
|
|
httpReq.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
|
|
|
resp, err := e.client.Do(httpReq)
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
|
|
return contracts.SearchResponse{}, fmt.Errorf("brave error: status %d", resp.StatusCode)
|
|
}
|
|
|
|
body, err := io.ReadAll(io.LimitReader(resp.Body, 128*1024))
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
|
|
results := parseBraveResults(string(body))
|
|
return contracts.SearchResponse{
|
|
Query: req.Query,
|
|
NumberOfResults: len(results),
|
|
Results: results,
|
|
Answers: []map[string]any{},
|
|
Corrections: []string{},
|
|
Infoboxes: []map[string]any{},
|
|
Suggestions: extractBraveSuggestions(string(body)),
|
|
UnresponsiveEngines: [][2]string{},
|
|
}, nil
|
|
}
|
|
|
|
func parseBraveResults(body string) []contracts.MainResult {
|
|
var results []contracts.MainResult
|
|
|
|
// Brave wraps each result in divs with data-type="web" or data-type="news".
|
|
// Pattern: <div ... data-type="web"> ... <a class="result-title" href="URL">TITLE</a> ... <div class="snippet">SNIPPET</div>
|
|
webPattern := regexp.MustCompile(`(?s)<div[^>]+data-type="web"[^>]*>(.*?)</div>\s*<div[^>]+data-type="(web|news)"`)
|
|
matches := webPattern.FindAllStringSubmatch(body, -1)
|
|
|
|
seen := map[string]bool{}
|
|
|
|
for _, match := range matches {
|
|
if len(match) < 2 {
|
|
continue
|
|
}
|
|
block := match[1]
|
|
|
|
// Extract title and URL from the result-title link.
|
|
titlePattern := regexp.MustCompile(`<a[^>]+class="result-title"[^>]+href="([^"]+)"[^>]*>([^<]+)</a>`)
|
|
titleMatch := titlePattern.FindStringSubmatch(block)
|
|
if titleMatch == nil {
|
|
continue
|
|
}
|
|
href := titleMatch[1]
|
|
title := stripTags(titleMatch[2])
|
|
|
|
if href == "" || !strings.HasPrefix(href, "http") {
|
|
continue
|
|
}
|
|
if seen[href] {
|
|
continue
|
|
}
|
|
seen[href] = true
|
|
|
|
// Extract snippet.
|
|
snippet := extractBraveSnippet(block)
|
|
|
|
// Extract favicon URL.
|
|
favicon := extractBraveFavicon(block)
|
|
|
|
urlPtr := href
|
|
results = append(results, contracts.MainResult{
|
|
Title: title,
|
|
URL: &urlPtr,
|
|
Content: snippet,
|
|
Thumbnail: favicon,
|
|
Engine: "brave",
|
|
Score: 1.0,
|
|
Category: "general",
|
|
Engines: []string{"brave"},
|
|
})
|
|
}
|
|
|
|
return results
|
|
}
|
|
|
|
func extractBraveSnippet(block string) string {
|
|
// Try various snippet selectors Brave uses.
|
|
patterns := []string{
|
|
`<div[^>]+class="snippet"[^>]*>(.*?)</div>`,
|
|
`<p[^>]+class="[^"]*description[^"]*"[^>]*>(.*?)</p>`,
|
|
`<span[^>]+class="[^"]*snippet[^"]*"[^>]*>(.*?)</span>`,
|
|
}
|
|
|
|
for _, pat := range patterns {
|
|
re := regexp.MustCompile(`(?s)` + pat)
|
|
m := re.FindStringSubmatch(block)
|
|
if len(m) >= 2 {
|
|
text := stripTags(m[1])
|
|
if text != "" {
|
|
return strings.TrimSpace(text)
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func extractBraveFavicon(block string) string {
|
|
imgPattern := regexp.MustCompile(`<img[^>]+class="[^"]*favicon[^"]*"[^>]+src="([^"]+)"`)
|
|
m := imgPattern.FindStringSubmatch(block)
|
|
if len(m) >= 2 {
|
|
return m[1]
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func extractBraveSuggestions(body string) []string {
|
|
var suggestions []string
|
|
// Brave suggestions appear in a dropdown or related searches section.
|
|
suggestPattern := regexp.MustCompile(`(?s)<li[^>]+class="[^"]*suggestion[^"]*"[^>]*>.*?<a[^>]*>([^<]+)</a>`)
|
|
matches := suggestPattern.FindAllStringSubmatch(body, -1)
|
|
seen := map[string]bool{}
|
|
for _, m := range matches {
|
|
if len(m) < 2 {
|
|
continue
|
|
}
|
|
s := strings.TrimSpace(stripTags(m[1]))
|
|
if s != "" && !seen[s] {
|
|
seen[s] = true
|
|
suggestions = append(suggestions, s)
|
|
}
|
|
}
|
|
return suggestions
|
|
}
|