package engines import ( "context" "encoding/json" "encoding/xml" "errors" "fmt" "io" "net/http" "net/url" "strconv" "strings" "github.com/metamorphosis-dev/kafka/internal/contracts" ) // BingEngine searches Bing via the public Bing API. // Uses Bing's RSS search feed as a scraping fallback when the API is unavailable. // Note: Bing's HTML is heavily JS-dependent and blocks non-browser clients, // so this engine falls back gracefully when results cannot be retrieved. type BingEngine struct { client *http.Client } func (e *BingEngine) Name() string { return "bing" } func (e *BingEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { if strings.TrimSpace(req.Query) == "" { return contracts.SearchResponse{Query: req.Query}, nil } if e == nil || e.client == nil { return contracts.SearchResponse{}, errors.New("bing engine not initialized") } endpoint := fmt.Sprintf( "https://www.bing.com/search?q=%s&count=10&offset=%d&format=rss", url.QueryEscape(req.Query), (req.Pageno-1)*10, ) httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) if err != nil { return contracts.SearchResponse{}, err } httpReq.Header.Set("User-Agent", "kafka/0.1 (compatible; +https://git.ashisgreat.xyz/penal-colony/gosearch)") resp, err := e.client.Do(httpReq) if err != nil { return contracts.SearchResponse{}, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) return contracts.SearchResponse{}, fmt.Errorf("bing upstream error: status=%d body=%q", resp.StatusCode, string(body)) } contentType := resp.Header.Get("Content-Type") if strings.Contains(contentType, "json") { return parseBingJSON(resp.Body, req.Query) } if strings.Contains(contentType, "xml") || strings.Contains(contentType, "rss") { return parseBingRSS(resp.Body, req.Query) } // If Bing returned HTML instead of RSS, it likely blocked us. return contracts.SearchResponse{ Query: req.Query, NumberOfResults: 0, Results: []contracts.MainResult{}, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, UnresponsiveEngines: [][2]string{{"bing", "blocked by bot detection"}}, }, nil } // parseBingRSS parses Bing's RSS search results. func parseBingRSS(r io.Reader, query string) (contracts.SearchResponse, error) { type RSS struct { XMLName xml.Name `xml:"rss"` Channel struct { Items []struct { Title string `xml:"title"` Link string `xml:"link"` Descrip string `xml:"description"` } `xml:"item"` } `xml:"channel"` } var rss RSS if err := xml.NewDecoder(r).Decode(&rss); err != nil { return contracts.SearchResponse{}, fmt.Errorf("bing RSS parse error: %w", err) } results := make([]contracts.MainResult, 0, len(rss.Channel.Items)) for _, item := range rss.Channel.Items { if item.Link == "" { continue } linkPtr := item.Link results = append(results, contracts.MainResult{ Template: "default.html", Title: item.Title, Content: stripHTML(item.Descrip), URL: &linkPtr, Engine: "bing", Score: 0, Category: "general", Engines: []string{"bing"}, }) } return contracts.SearchResponse{ Query: query, NumberOfResults: len(results), Results: results, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, UnresponsiveEngines: [][2]string{}, }, nil } // parseBingJSON parses Bing's JSON API response. func parseBingJSON(r io.Reader, query string) (contracts.SearchResponse, error) { var data struct { WebPages struct { TotalEstimatedMatches int `json:"totalEstimatedMatches"` Value []struct { Name string `json:"name"` URL string `json:"url"` Snippet string `json:"snippet"` DateLastCrawled string `json:"dateLastCrawled"` } `json:"value"` } `json:"webPages"` } if err := json.NewDecoder(r).Decode(&data); err != nil { return contracts.SearchResponse{}, fmt.Errorf("bing JSON parse error: %w", err) } results := make([]contracts.MainResult, 0, len(data.WebPages.Value)) for _, item := range data.WebPages.Value { linkPtr := item.URL results = append(results, contracts.MainResult{ Template: "default.html", Title: item.Name, Content: item.Snippet, URL: &linkPtr, Engine: "bing", Score: 0, Category: "general", Engines: []string{"bing"}, }) } return contracts.SearchResponse{ Query: query, NumberOfResults: data.WebPages.TotalEstimatedMatches, Results: results, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, UnresponsiveEngines: [][2]string{}, }, nil } var _ = strconv.Itoa var _ = json.Unmarshal