package engines import ( "context" "fmt" "io" "net/http" "net/url" "strings" "github.com/metamorphosis-dev/kafka/internal/contracts" ) // GoogleEngine searches Google via direct HTTP. // This may be blocked by Google (CAPTCHA/challenge). When blocked, // the search service falls back to the configured upstream SearXNG // if "google" is NOT in LOCAL_PORTED_ENGINES (i.e., it's treated as upstream). // // To use: add "google" to LOCAL_PORTED_ENGINES env var. // Without that, Google is proxied to upstream SearXNG as normal. type GoogleEngine struct { client *http.Client } func (e *GoogleEngine) Name() string { return "google" } func (e *GoogleEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { if strings.TrimSpace(req.Query) == "" { return contracts.SearchResponse{Query: req.Query}, nil } u := fmt.Sprintf( "https://www.google.com/search?q=%s&num=%d&hl=%s&safe=%s", url.QueryEscape(req.Query), 10, googleHL(req.Language), googleSafeSearchLevel(req.Safesearch), ) httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) if err != nil { return contracts.SearchResponse{}, err } httpReq.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36") httpReq.Header.Set("Accept-Language", "en-US,en;q=0.9") httpReq.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") resp, err := e.client.Do(httpReq) if err != nil { return contracts.SearchResponse{}, err } defer resp.Body.Close() if resp.StatusCode == http.StatusMovedPermanently || resp.StatusCode == http.StatusFound { // "I'm Feeling Lucky" redirect — treat the final URL as the top result. finalURL := resp.Request.URL.String() urlPtr := finalURL return contracts.SearchResponse{ Query: req.Query, NumberOfResults: 1, UnresponsiveEngines: [][2]string{}, Results: []contracts.MainResult{ { Title: req.Query, URL: &urlPtr, Content: "Google result (direct redirect)", Engine: "google", Score: 1.0, Category: "general", Engines: []string{"google"}, }, }, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, }, nil } if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) return contracts.SearchResponse{}, fmt.Errorf("google error: status=%d body=%q", resp.StatusCode, string(body)) } body, err := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) if err != nil { return contracts.SearchResponse{}, err } results := parseGoogleHTML(string(body), req.Query) return contracts.SearchResponse{ Query: req.Query, NumberOfResults: len(results), Results: results, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, UnresponsiveEngines: [][2]string{}, }, nil } // parseGoogleHTML extracts results from Google's HTML. // This is best-effort — Google's results are JS-rendered and this parser // will often return empty results when Google doesn't serve a static page. func parseGoogleHTML(body, query string) []contracts.MainResult { var results []contracts.MainResult // Google occasionally serves result HTML in a