kafka/internal/engines/reddit.go
Franz Kafka b3e3123612 security: fix build errors, add honest Google UA, sanitize error msgs
- Fix config validation: upstream URLs allow private IPs (self-hosted)
- Fix util.SafeURLScheme to return parsed URL
- Replace spoofed GSA User-Agent with honest Kafka UA
- Sanitize all engine error messages (strip response bodies)
- Replace unused body reads with io.Copy(io.Discard, ...) for reuse
- Fix pre-existing braveapi_test using wrong struct type
- Fix ratelimit test reference to limiter variable
- Update ratelimit tests for new trusted proxy behavior
2026-03-22 16:27:49 +00:00

136 lines
3.9 KiB
Go

// kafka — a privacy-respecting metasearch engine
// Copyright (C) 2026-present metamorphosis-dev
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package engines
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"github.com/metamorphosis-dev/kafka/internal/contracts"
)
// RedditEngine searches Reddit posts via the public JSON API.
type RedditEngine struct {
client *http.Client
}
func (e *RedditEngine) Name() string { return "reddit" }
func (e *RedditEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if strings.TrimSpace(req.Query) == "" {
return contracts.SearchResponse{Query: req.Query}, nil
}
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("reddit engine not initialized")
}
endpoint := fmt.Sprintf(
"https://www.reddit.com/search.json?q=%s&limit=25&sort=relevance&t=all",
url.QueryEscape(req.Query),
)
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return contracts.SearchResponse{}, err
}
httpReq.Header.Set("User-Agent", "kafka/0.1 (compatible; +https://git.ashisgreat.xyz/penal-colony/gosearch)")
resp, err := e.client.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
return contracts.SearchResponse{}, fmt.Errorf("reddit api error: status %d", resp.StatusCode)
}
var data struct {
Data struct {
Children []struct {
Data struct {
Title string `json:"title"`
URL string `json:"url"`
Permalink string `json:"permalink"`
Score int `json:"score"`
NumComments int `json:"num_comments"`
Subreddit string `json:"subreddit"`
CreatedUTC float64 `json:"created_utc"`
IsSelf bool `json:"is_self"`
Over18 bool `json:"over_18"`
} `json:"data"`
} `json:"children"`
} `json:"data"`
}
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
return contracts.SearchResponse{}, err
}
results := make([]contracts.MainResult, 0, len(data.Data.Children))
for _, child := range data.Data.Children {
post := child.Data
// Skip NSFW results unless explicitly allowed.
if post.Over18 && req.Safesearch > 0 {
continue
}
// For self-posts, link to the Reddit thread.
linkURL := post.URL
if post.IsSelf || strings.HasPrefix(linkURL, "/r/") {
linkURL = "https://www.reddit.com" + post.Permalink
}
content := fmt.Sprintf("r/%s · ⬆ %d · 💬 %d", post.Subreddit, post.Score, post.NumComments)
if req.Safesearch == 0 {
// No additional content for safe mode
}
title := post.Title
urlPtr := linkURL
results = append(results, contracts.MainResult{
Template: "default.html",
Title: title,
Content: content,
URL: &urlPtr,
Engine: "reddit",
Score: float64(post.Score),
Category: "general",
Engines: []string{"reddit"},
})
}
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: len(results),
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}