- Fix config validation: upstream URLs allow private IPs (self-hosted) - Fix util.SafeURLScheme to return parsed URL - Replace spoofed GSA User-Agent with honest Kafka UA - Sanitize all engine error messages (strip response bodies) - Replace unused body reads with io.Copy(io.Discard, ...) for reuse - Fix pre-existing braveapi_test using wrong struct type - Fix ratelimit test reference to limiter variable - Update ratelimit tests for new trusted proxy behavior
208 lines
5.6 KiB
Go
208 lines
5.6 KiB
Go
// kafka — a privacy-respecting metasearch engine
|
|
// Copyright (C) 2026-present metamorphosis-dev
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
package engines
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/metamorphosis-dev/kafka/internal/contracts"
|
|
)
|
|
|
|
// BraveEngine implements the Brave Web Search API.
|
|
// Required: BRAVE_API_KEY env var or config.
|
|
// Optional: BRAVE_ACCESS_TOKEN to gate requests.
|
|
type BraveAPIEngine struct {
|
|
client *http.Client
|
|
apiKey string
|
|
accessGateToken string
|
|
resultsPerPage int
|
|
}
|
|
|
|
func (e *BraveAPIEngine) Name() string { return "braveapi" }
|
|
|
|
func (e *BraveAPIEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
|
if e == nil || e.client == nil {
|
|
return contracts.SearchResponse{}, errors.New("brave engine not initialized")
|
|
}
|
|
|
|
if strings.TrimSpace(e.apiKey) == "" {
|
|
return contracts.SearchResponse{
|
|
Query: req.Query,
|
|
NumberOfResults: 0,
|
|
Results: []contracts.MainResult{},
|
|
Answers: []map[string]any{},
|
|
Corrections: []string{},
|
|
Infoboxes: []map[string]any{},
|
|
Suggestions: []string{},
|
|
UnresponsiveEngines: [][2]string{{e.Name(), "missing_api_key"}},
|
|
}, nil
|
|
}
|
|
|
|
if gate := strings.TrimSpace(e.accessGateToken); gate != "" {
|
|
if strings.TrimSpace(req.AccessToken) == "" || req.AccessToken != gate {
|
|
return contracts.SearchResponse{
|
|
Query: req.Query,
|
|
NumberOfResults: 0,
|
|
Results: []contracts.MainResult{},
|
|
Answers: []map[string]any{},
|
|
Corrections: []string{},
|
|
Infoboxes: []map[string]any{},
|
|
Suggestions: []string{},
|
|
UnresponsiveEngines: [][2]string{{e.Name(), "unauthorized"}},
|
|
}, nil
|
|
}
|
|
}
|
|
|
|
q := strings.TrimSpace(req.Query)
|
|
if q == "" {
|
|
return contracts.SearchResponse{Query: req.Query}, nil
|
|
}
|
|
|
|
// Brave API only supports offset values 0-9 (first page of results).
|
|
// Paginating beyond the first page is not supported by Brave.
|
|
offset := 0
|
|
if req.Pageno > 1 {
|
|
offset = (req.Pageno - 1) * e.resultsPerPage
|
|
}
|
|
if offset > 9 {
|
|
offset = 9
|
|
}
|
|
|
|
args := url.Values{}
|
|
args.Set("q", q)
|
|
args.Set("count", fmt.Sprintf("%d", e.resultsPerPage))
|
|
args.Set("offset", fmt.Sprintf("%d", offset))
|
|
|
|
if req.TimeRange != nil {
|
|
switch *req.TimeRange {
|
|
case "day":
|
|
args.Set("time_range", "past_day")
|
|
case "week":
|
|
args.Set("time_range", "past_week")
|
|
case "month":
|
|
args.Set("time_range", "past_month")
|
|
case "year":
|
|
args.Set("time_range", "past_year")
|
|
}
|
|
}
|
|
|
|
if req.Safesearch > 0 {
|
|
args.Set("safesearch", "strict")
|
|
}
|
|
|
|
endpoint := "https://api.search.brave.com/res/v1/web/search?" + args.Encode()
|
|
|
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
httpReq.Header.Set("X-Subscription-Token", e.apiKey)
|
|
|
|
resp, err := e.client.Do(httpReq)
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024))
|
|
return contracts.SearchResponse{}, fmt.Errorf("brave upstream error: status %d", resp.StatusCode)
|
|
}
|
|
|
|
var api struct {
|
|
Web struct {
|
|
Results []struct {
|
|
URL string `json:"url"`
|
|
Title string `json:"title"`
|
|
Description string `json:"description"`
|
|
Age string `json:"age"`
|
|
Thumbnail struct {
|
|
Src string `json:"src"`
|
|
} `json:"thumbnail"`
|
|
} `json:"results"`
|
|
} `json:"web"`
|
|
}
|
|
|
|
if err := json.NewDecoder(resp.Body).Decode(&api); err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
|
|
results := make([]contracts.MainResult, 0, len(api.Web.Results))
|
|
for _, r := range api.Web.Results {
|
|
urlPtr := strings.TrimSpace(r.URL)
|
|
if urlPtr == "" {
|
|
continue
|
|
}
|
|
pub := parseBraveAge(r.Age)
|
|
|
|
results = append(results, contracts.MainResult{
|
|
Template: "default.html",
|
|
Title: r.Title,
|
|
Content: r.Description,
|
|
URL: &urlPtr,
|
|
Pubdate: pub,
|
|
Engine: e.Name(),
|
|
Score: 0,
|
|
Category: "general",
|
|
Priority: "",
|
|
Positions: nil,
|
|
Engines: []string{e.Name()},
|
|
})
|
|
}
|
|
|
|
return contracts.SearchResponse{
|
|
Query: req.Query,
|
|
NumberOfResults: len(results),
|
|
Results: results,
|
|
Answers: []map[string]any{},
|
|
Corrections: []string{},
|
|
Infoboxes: []map[string]any{},
|
|
Suggestions: []string{},
|
|
UnresponsiveEngines: [][2]string{},
|
|
}, nil
|
|
}
|
|
|
|
func parseBraveAge(ageRaw string) *string {
|
|
ageRaw = strings.TrimSpace(ageRaw)
|
|
if ageRaw == "" {
|
|
return nil
|
|
}
|
|
|
|
// Brave sometimes returns RFC3339-like timestamps for `age`.
|
|
layouts := []string{
|
|
time.RFC3339Nano,
|
|
time.RFC3339,
|
|
"2006-01-02T15:04:05Z07:00",
|
|
"2006-01-02",
|
|
}
|
|
for _, layout := range layouts {
|
|
if t, err := time.Parse(layout, ageRaw); err == nil {
|
|
s := t.Format("2006-01-02 15:04:05-0700")
|
|
return &s
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|