kafka/internal/search/request_params.go
Franz Kafka 2b072e4de3
Some checks failed
Build and Push Docker Image / build-and-push (push) Failing after 6s
Mirror to GitHub / mirror (push) Failing after 3s
Tests / test (push) Successful in 25s
feat: add image search with Bing, DuckDuckGo, and Qwant engines
Three new image search engines:
- bing_images: Bing Images via RSS endpoint
- ddg_images: DuckDuckGo Images via VQD API
- qwant_images: Qwant Images via v3 search API

Frontend:
- Image grid layout with responsive columns
- image_item template with thumbnail, title, and source metadata
- Hover animations and lazy loading
- Grid activates automatically when category=images

Backend:
- category=images routes to image engines via planner
- Image engines registered in factory and engine allowlist
- extractImgSrc helper for parsing thumbnail URLs from HTML
- IsImageSearch flag on PageData for template layout switching
2026-03-22 16:49:24 +00:00

253 lines
6.7 KiB
Go

// kafka — a privacy-respecting metasearch engine
// Copyright (C) 2026-present metamorphosis-dev
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package search
import (
"errors"
"net/http"
"regexp"
"strconv"
"strings"
)
var languageCodeRe = regexp.MustCompile(`^[a-z]{2,3}(-[a-zA-Z]{2})?$`)
// maxQueryLength is the maximum allowed length for the search query.
const maxQueryLength = 1024
// knownEngineNames is the allowlist of valid engine identifiers.
var knownEngineNames = map[string]bool{
"wikipedia": true, "arxiv": true, "crossref": true,
"braveapi": true, "brave": true, "qwant": true,
"duckduckgo": true, "github": true, "reddit": true,
"bing": true, "google": true, "youtube": true,
// Image engines
"bing_images": true, "ddg_images": true, "qwant_images": true,
}
// validateEngines filters engine names against the known registry.
func validateEngines(engines []string) []string {
out := make([]string, 0, len(engines))
for _, e := range engines {
if knownEngineNames[strings.ToLower(e)] {
out = append(out, strings.ToLower(e))
}
}
return out
}
func ParseSearchRequest(r *http.Request) (SearchRequest, error) {
// Supports both GET and POST and relies on form values for routing.
if err := r.ParseForm(); err != nil {
return SearchRequest{}, errors.New("invalid request: cannot parse form")
}
format := strings.ToLower(r.FormValue("format"))
switch OutputFormat(format) {
case FormatJSON, FormatCSV, FormatRSS, FormatHTML:
// explicit format — use as-is
default:
// No format specified: default to HTML for browser requests, JSON for API clients.
accept := r.Header.Get("Accept")
if strings.Contains(accept, "text/html") {
format = string(FormatHTML)
} else {
format = string(FormatJSON)
}
}
q := r.FormValue("q")
if strings.TrimSpace(q) == "" {
return SearchRequest{}, errors.New("missing required parameter: q")
}
if len(q) > maxQueryLength {
return SearchRequest{}, errors.New("query exceeds maximum length")
}
pageno := 1
if s := strings.TrimSpace(r.FormValue("pageno")); s != "" {
n, err := strconv.Atoi(s)
if err != nil || n < 1 {
return SearchRequest{}, errors.New("invalid parameter: pageno")
}
pageno = n
}
// MVP defaults.
safesearch := 0
if s := strings.TrimSpace(r.FormValue("safesearch")); s != "" {
n, err := strconv.Atoi(s)
if err != nil || n < 0 || n > 2 {
return SearchRequest{}, errors.New("invalid parameter: safesearch")
}
safesearch = n
}
var timeRange *string
if tr := strings.TrimSpace(r.FormValue("time_range")); tr != "" && tr != "None" {
switch tr {
case "day", "week", "month", "year":
tt := tr
timeRange = &tt
default:
return SearchRequest{}, errors.New("invalid parameter: time_range")
}
}
var timeoutLimit *float64
if s := strings.TrimSpace(r.FormValue("timeout_limit")); s != "" && s != "None" {
v, err := strconv.ParseFloat(s, 64)
if err != nil || v <= 0 {
return SearchRequest{}, errors.New("invalid parameter: timeout_limit")
}
timeoutLimit = &v
}
language := strings.TrimSpace(r.FormValue("language"))
if language == "" {
language = "auto"
}
switch language {
case "auto", "all":
// ok
default:
if !languageCodeRe.MatchString(language) {
return SearchRequest{}, errors.New("invalid parameter: language")
}
}
// engines is an explicit list of engine names.
engines := splitCSV(strings.TrimSpace(r.FormValue("engines")))
// Validate engine names against known registry to prevent injection.
engines = validateEngines(engines)
// categories and category_<name> params mirror the webadapter parsing.
// We don't validate against a registry here; we just preserve the requested values.
catSet := map[string]bool{}
if catsParam := strings.TrimSpace(r.FormValue("categories")); catsParam != "" {
for _, cat := range splitCSV(catsParam) {
catSet[cat] = true
}
}
for k, v := range r.Form {
if !strings.HasPrefix(k, "category_") {
continue
}
category := strings.TrimPrefix(k, "category_")
if category == "" {
continue
}
val := ""
if len(v) > 0 {
val = strings.TrimSpace(v[0])
}
if val == "" || val != "off" {
catSet[category] = true
} else {
delete(catSet, category)
}
}
categories := make([]string, 0, len(catSet))
for c := range catSet {
categories = append(categories, c)
}
if len(categories) == 0 {
categories = []string{"general"}
}
// Parse engine_data-<engine>-<key>=<value> parameters.
engineData := map[string]map[string]string{}
for k, v := range r.Form {
if !strings.HasPrefix(k, "engine_data-") {
continue
}
parts := strings.SplitN(k, "-", 3) // engine_data-<engine>-<key>
if len(parts) != 3 {
continue
}
engine := parts[1]
key := parts[2]
// For HTML forms, r.Form[k] can contain multiple values; keep first.
val := ""
if len(v) > 0 {
val = v[0]
}
if _, ok := engineData[engine]; !ok {
engineData[engine] = map[string]string{}
}
engineData[engine][key] = val
}
accessToken := parseAccessToken(r)
return SearchRequest{
Format: OutputFormat(format),
Query: q,
Pageno: pageno,
Safesearch: safesearch,
TimeRange: timeRange,
TimeoutLimit: timeoutLimit,
Language: language,
Engines: engines,
Categories: categories,
EngineData: engineData,
AccessToken: accessToken,
}, nil
}
func splitCSV(s string) []string {
if s == "" {
return nil
}
raw := strings.Split(s, ",")
out := make([]string, 0, len(raw))
for _, item := range raw {
item = strings.TrimSpace(item)
if item == "" {
continue
}
out = append(out, item)
}
return out
}
func parseAccessToken(r *http.Request) string {
// Supported sources (first non-empty wins):
// - `Authorization: Bearer <token>`
// - `X-Search-Token` / `X-Brave-Access-Token`
// - `token` form value
if auth := r.Header.Get("Authorization"); auth != "" {
const prefix = "Bearer "
if len(auth) > len(prefix) && auth[:len(prefix)] == prefix {
return strings.TrimSpace(auth[len(prefix):])
}
}
if v := strings.TrimSpace(r.Header.Get("X-Search-Token")); v != "" {
return v
}
if v := strings.TrimSpace(r.Header.Get("X-Brave-Access-Token")); v != "" {
return v
}
if v := strings.TrimSpace(r.FormValue("token")); v != "" {
return v
}
return ""
}