kafka/internal/engines/bing.go
Franz Kafka 7be03b4017 license: change from MIT to AGPLv3
Update LICENSE file and add AGPL header to all source files.

AGPLv3 ensures that if someone runs Kafka as a network service and
modifies it, they must release their source code under the same license.
2026-03-22 08:27:23 +00:00

191 lines
5.7 KiB
Go

// kafka — a privacy-respecting metasearch engine
// Copyright (C) 2026-present metamorphosis-dev
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package engines
import (
"context"
"encoding/json"
"encoding/xml"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strconv"
"strings"
"github.com/metamorphosis-dev/kafka/internal/contracts"
)
// BingEngine searches Bing via the public Bing API.
// Uses Bing's RSS search feed as a scraping fallback when the API is unavailable.
// Note: Bing's HTML is heavily JS-dependent and blocks non-browser clients,
// so this engine falls back gracefully when results cannot be retrieved.
type BingEngine struct {
client *http.Client
}
func (e *BingEngine) Name() string { return "bing" }
func (e *BingEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if strings.TrimSpace(req.Query) == "" {
return contracts.SearchResponse{Query: req.Query}, nil
}
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("bing engine not initialized")
}
endpoint := fmt.Sprintf(
"https://www.bing.com/search?q=%s&count=10&offset=%d&format=rss",
url.QueryEscape(req.Query),
(req.Pageno-1)*10,
)
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return contracts.SearchResponse{}, err
}
httpReq.Header.Set("User-Agent", "kafka/0.1 (compatible; +https://git.ashisgreat.xyz/penal-colony/gosearch)")
resp, err := e.client.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
return contracts.SearchResponse{}, fmt.Errorf("bing upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
contentType := resp.Header.Get("Content-Type")
if strings.Contains(contentType, "json") {
return parseBingJSON(resp.Body, req.Query)
}
if strings.Contains(contentType, "xml") || strings.Contains(contentType, "rss") {
return parseBingRSS(resp.Body, req.Query)
}
// If Bing returned HTML instead of RSS, it likely blocked us.
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: 0,
Results: []contracts.MainResult{},
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{{"bing", "blocked by bot detection"}},
}, nil
}
// parseBingRSS parses Bing's RSS search results.
func parseBingRSS(r io.Reader, query string) (contracts.SearchResponse, error) {
type RSS struct {
XMLName xml.Name `xml:"rss"`
Channel struct {
Items []struct {
Title string `xml:"title"`
Link string `xml:"link"`
Descrip string `xml:"description"`
} `xml:"item"`
} `xml:"channel"`
}
var rss RSS
if err := xml.NewDecoder(r).Decode(&rss); err != nil {
return contracts.SearchResponse{}, fmt.Errorf("bing RSS parse error: %w", err)
}
results := make([]contracts.MainResult, 0, len(rss.Channel.Items))
for _, item := range rss.Channel.Items {
if item.Link == "" {
continue
}
linkPtr := item.Link
results = append(results, contracts.MainResult{
Template: "default.html",
Title: item.Title,
Content: stripHTML(item.Descrip),
URL: &linkPtr,
Engine: "bing",
Score: 0,
Category: "general",
Engines: []string{"bing"},
})
}
return contracts.SearchResponse{
Query: query,
NumberOfResults: len(results),
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
// parseBingJSON parses Bing's JSON API response.
func parseBingJSON(r io.Reader, query string) (contracts.SearchResponse, error) {
var data struct {
WebPages struct {
TotalEstimatedMatches int `json:"totalEstimatedMatches"`
Value []struct {
Name string `json:"name"`
URL string `json:"url"`
Snippet string `json:"snippet"`
DateLastCrawled string `json:"dateLastCrawled"`
} `json:"value"`
} `json:"webPages"`
}
if err := json.NewDecoder(r).Decode(&data); err != nil {
return contracts.SearchResponse{}, fmt.Errorf("bing JSON parse error: %w", err)
}
results := make([]contracts.MainResult, 0, len(data.WebPages.Value))
for _, item := range data.WebPages.Value {
linkPtr := item.URL
results = append(results, contracts.MainResult{
Template: "default.html",
Title: item.Name,
Content: item.Snippet,
URL: &linkPtr,
Engine: "bing",
Score: 0,
Category: "general",
Engines: []string{"bing"},
})
}
return contracts.SearchResponse{
Query: query,
NumberOfResults: data.WebPages.TotalEstimatedMatches,
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
var _ = strconv.Itoa
var _ = json.Unmarshal