From e90f6c08760cd30874c8614bab6f5bba39278a62 Mon Sep 17 00:00:00 2001 From: Franz Kafka Date: Sun, 22 Mar 2026 00:20:43 +0000 Subject: [PATCH 01/12] feat: add autocomplete dropdown UI with keyboard nav - Inline JS in base.html: debounced fetch from /autocompleter on keyup - Keyboard nav: arrows to navigate, Enter to select, Esc to close - Highlight matching prefix in suggestions - Click to select and submit - Dropdown positioned absolutely below search input - Dark mode compatible via existing CSS variables --- internal/views/static/css/kafka.css | 57 ++++++++++++++ internal/views/templates/base.html | 117 ++++++++++++++++++++++++++++ internal/views/templates/index.html | 3 +- 3 files changed, 176 insertions(+), 1 deletion(-) diff --git a/internal/views/static/css/kafka.css b/internal/views/static/css/kafka.css index ad794c4..376b2d8 100644 --- a/internal/views/static/css/kafka.css +++ b/internal/views/static/css/kafka.css @@ -421,6 +421,63 @@ footer a:hover { display: block; } +/* Autocomplete dropdown */ +#search { + position: relative; +} + +#autocomplete-dropdown { + position: absolute; + top: 100%; + left: 0; + right: 0; + background: var(--color-base-background); + border: 1px solid var(--color-search-border); + border-top: none; + border-radius: 0 0 var(--radius) var(--radius); + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); + z-index: 100; + max-height: 320px; + overflow-y: auto; + display: none; +} + +#autocomplete-dropdown.open { + display: block; +} + +.autocomplete-suggestion { + padding: 0.6rem 1rem; + cursor: pointer; + font-size: 0.95rem; + color: var(--color-base-font); + border-bottom: 1px solid var(--color-result-border); + transition: background 0.15s; +} + +.autocomplete-suggestion:last-child { + border-bottom: none; +} + +.autocomplete-suggestion:hover, +.autocomplete-suggestion.active { + background: var(--color-header-background); +} + +.autocomplete-suggestion mark { + background: none; + color: var(--color-link); + font-weight: 600; +} + +.autocomplete-footer { + padding: 0.4rem 1rem; + font-size: 0.75rem; + color: var(--color-suggestion); + border-top: 1px solid var(--color-result-border); + background: var(--color-header-background); +} + /* Responsive */ @media (max-width: 768px) { #results { diff --git a/internal/views/templates/base.html b/internal/views/templates/base.html index 10de540..6572b19 100644 --- a/internal/views/templates/base.html +++ b/internal/views/templates/base.html @@ -20,6 +20,123 @@ + {{end}} diff --git a/internal/views/templates/index.html b/internal/views/templates/index.html index e2ca279..c9df700 100644 --- a/internal/views/templates/index.html +++ b/internal/views/templates/index.html @@ -3,11 +3,12 @@

kafka

From a2f8077669aaee9294c06a9f233000ed6def7736 Mon Sep 17 00:00:00 2001 From: Franz Kafka Date: Sun, 22 Mar 2026 00:20:43 +0000 Subject: [PATCH 02/12] feat: add autocomplete dropdown UI with keyboard nav - Inline JS in base.html: debounced fetch from /autocompleter on keyup - Keyboard nav: arrows to navigate, Enter to select, Esc to close - Highlight matching prefix in suggestions - Click to select and submit - Dropdown positioned absolutely below search input - Dark mode compatible via existing CSS variables --- internal/views/static/css/kafka.css | 57 ++++++++++++++ internal/views/templates/base.html | 117 ++++++++++++++++++++++++++++ internal/views/templates/index.html | 3 +- 3 files changed, 176 insertions(+), 1 deletion(-) diff --git a/internal/views/static/css/kafka.css b/internal/views/static/css/kafka.css index ad794c4..376b2d8 100644 --- a/internal/views/static/css/kafka.css +++ b/internal/views/static/css/kafka.css @@ -421,6 +421,63 @@ footer a:hover { display: block; } +/* Autocomplete dropdown */ +#search { + position: relative; +} + +#autocomplete-dropdown { + position: absolute; + top: 100%; + left: 0; + right: 0; + background: var(--color-base-background); + border: 1px solid var(--color-search-border); + border-top: none; + border-radius: 0 0 var(--radius) var(--radius); + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); + z-index: 100; + max-height: 320px; + overflow-y: auto; + display: none; +} + +#autocomplete-dropdown.open { + display: block; +} + +.autocomplete-suggestion { + padding: 0.6rem 1rem; + cursor: pointer; + font-size: 0.95rem; + color: var(--color-base-font); + border-bottom: 1px solid var(--color-result-border); + transition: background 0.15s; +} + +.autocomplete-suggestion:last-child { + border-bottom: none; +} + +.autocomplete-suggestion:hover, +.autocomplete-suggestion.active { + background: var(--color-header-background); +} + +.autocomplete-suggestion mark { + background: none; + color: var(--color-link); + font-weight: 600; +} + +.autocomplete-footer { + padding: 0.4rem 1rem; + font-size: 0.75rem; + color: var(--color-suggestion); + border-top: 1px solid var(--color-result-border); + background: var(--color-header-background); +} + /* Responsive */ @media (max-width: 768px) { #results { diff --git a/internal/views/templates/base.html b/internal/views/templates/base.html index 10de540..6572b19 100644 --- a/internal/views/templates/base.html +++ b/internal/views/templates/base.html @@ -20,6 +20,123 @@

Powered by kafka — a privacy-respecting, open metasearch engine

+ {{end}} diff --git a/internal/views/templates/index.html b/internal/views/templates/index.html index e2ca279..c9df700 100644 --- a/internal/views/templates/index.html +++ b/internal/views/templates/index.html @@ -3,11 +3,12 @@

kafka

From 8ea318ad4a31f31f4b3b25356d935f208b2b854e Mon Sep 17 00:00:00 2001 From: ashisgreat22 Date: Sun, 22 Mar 2026 01:26:46 +0100 Subject: [PATCH 03/12] docs: update CLAUDE.md with autocomplete package and endpoint Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index bba67e1..1ba6bdc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -37,7 +37,8 @@ There is no Makefile. There is no linter configured. - `internal/config` — TOML-based configuration with env var fallbacks. `Load(path)` reads `config.toml`; env vars override zero-value fields. See `config.example.toml` for all settings. - `internal/engines` — `Engine` interface and all 9 Go-native implementations. `factory.go` registers engines via `NewDefaultPortedEngines()`. `planner.go` routes engines to local or upstream based on `LOCAL_PORTED_ENGINES` env var. - `internal/search` — `Service` orchestrates the pipeline: cache check, planning, parallel engine execution via goroutines/WaitGroup, upstream proxying, response merging. Individual engine failures are reported as `unresponsive_engines` rather than aborting the search. Qwant has fallback logic to upstream on empty results. -- `internal/httpapi` — HTTP handlers for `/`, `/search`, `/healthz`, `/opensearch.xml`. Detects HTMX requests via `HX-Request` header to return fragments instead of full pages. +- `internal/autocomplete` — Fetches search suggestions. Proxies to upstream SearXNG `/autocompleter` if configured, falls back to Wikipedia OpenSearch API otherwise. +- `internal/httpapi` — HTTP handlers for `/`, `/search`, `/autocompleter`, `/healthz`, `/opensearch.xml`. Detects HTMX requests via `HX-Request` header to return fragments instead of full pages. - `internal/upstream` — Client that proxies requests to an upstream SearXNG instance via POST. - `internal/cache` — Valkey/Redis-backed cache with SHA-256 cache keys. No-op if unconfigured. - `internal/middleware` — Three rate limiters (per-IP sliding window, burst+sustained, global) and CORS. All disabled by default. From 4482cb4dde5e39a8cfb517182fcfd2a79d7a6c33 Mon Sep 17 00:00:00 2001 From: ashisgreat22 Date: Sun, 22 Mar 2026 01:26:46 +0100 Subject: [PATCH 04/12] docs: update CLAUDE.md with autocomplete package and endpoint Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index bba67e1..1ba6bdc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -37,7 +37,8 @@ There is no Makefile. There is no linter configured. - `internal/config` — TOML-based configuration with env var fallbacks. `Load(path)` reads `config.toml`; env vars override zero-value fields. See `config.example.toml` for all settings. - `internal/engines` — `Engine` interface and all 9 Go-native implementations. `factory.go` registers engines via `NewDefaultPortedEngines()`. `planner.go` routes engines to local or upstream based on `LOCAL_PORTED_ENGINES` env var. - `internal/search` — `Service` orchestrates the pipeline: cache check, planning, parallel engine execution via goroutines/WaitGroup, upstream proxying, response merging. Individual engine failures are reported as `unresponsive_engines` rather than aborting the search. Qwant has fallback logic to upstream on empty results. -- `internal/httpapi` — HTTP handlers for `/`, `/search`, `/healthz`, `/opensearch.xml`. Detects HTMX requests via `HX-Request` header to return fragments instead of full pages. +- `internal/autocomplete` — Fetches search suggestions. Proxies to upstream SearXNG `/autocompleter` if configured, falls back to Wikipedia OpenSearch API otherwise. +- `internal/httpapi` — HTTP handlers for `/`, `/search`, `/autocompleter`, `/healthz`, `/opensearch.xml`. Detects HTMX requests via `HX-Request` header to return fragments instead of full pages. - `internal/upstream` — Client that proxies requests to an upstream SearXNG instance via POST. - `internal/cache` — Valkey/Redis-backed cache with SHA-256 cache keys. No-op if unconfigured. - `internal/middleware` — Three rate limiters (per-IP sliding window, burst+sustained, global) and CORS. All disabled by default. From 21b77f25bf647f2d11edda9ace9b625db72166d9 Mon Sep 17 00:00:00 2001 From: ashisgreat22 Date: Sun, 22 Mar 2026 01:47:03 +0100 Subject: [PATCH 05/12] refactor: remove SearXNG references and rename binary to kafka - Rename cmd/searxng-go to cmd/kafka - Remove all SearXNG references from source comments while keeping "SearXNG-compatible API" in user-facing docs - Update binary paths in README, CLAUDE.md, and Dockerfile - Update log message to "kafka starting" Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 10 +++++----- Dockerfile | 2 +- README.md | 12 ++++++------ cmd/{searxng-go => kafka}/main.go | 2 +- config.example.toml | 4 ++-- internal/autocomplete/service.go | 6 +++--- internal/contracts/main_result.go | 10 +++++----- internal/contracts/types.go | 6 +++--- internal/engines/braveapi.go | 8 ++++---- internal/engines/engine.go | 2 +- internal/engines/planner.go | 4 ++-- internal/engines/qwant.go | 10 +++++----- internal/search/merge.go | 2 +- internal/search/request_params.go | 4 ++-- internal/search/response.go | 8 ++++---- internal/search/service.go | 2 +- internal/upstream/client.go | 2 +- internal/views/static/css/kafka.css | 1 - 18 files changed, 47 insertions(+), 48 deletions(-) rename cmd/{searxng-go => kafka}/main.go (98%) diff --git a/CLAUDE.md b/CLAUDE.md index 1ba6bdc..b7f254e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -kafka is a privacy-respecting metasearch engine written in Go. It provides a SearXNG-compatible `/search` API and an HTML frontend (HTMX + Go templates). 9 engines are implemented natively in Go; unlisted engines can be proxied to an upstream SearXNG instance. Responses from multiple engines are merged into a single JSON/CSV/RSS/HTML response. +kafka is a privacy-respecting metasearch engine written in Go. It provides a SearXNG-compatible `/search` API and an HTML frontend (HTMX + Go templates). 9 engines are implemented natively in Go; unlisted engines can be proxied to an upstream metasearch instance. Responses from multiple engines are merged into a single JSON/CSV/RSS/HTML response. ## Build & Run Commands @@ -22,7 +22,7 @@ go test -run TestWikipedia ./internal/engines/ go test -v ./internal/engines/ # Run the server (requires config.toml) -go run ./cmd/searxng-go -config config.toml +go run ./cmd/kafka -config config.toml ``` There is no Makefile. There is no linter configured. @@ -37,13 +37,13 @@ There is no Makefile. There is no linter configured. - `internal/config` — TOML-based configuration with env var fallbacks. `Load(path)` reads `config.toml`; env vars override zero-value fields. See `config.example.toml` for all settings. - `internal/engines` — `Engine` interface and all 9 Go-native implementations. `factory.go` registers engines via `NewDefaultPortedEngines()`. `planner.go` routes engines to local or upstream based on `LOCAL_PORTED_ENGINES` env var. - `internal/search` — `Service` orchestrates the pipeline: cache check, planning, parallel engine execution via goroutines/WaitGroup, upstream proxying, response merging. Individual engine failures are reported as `unresponsive_engines` rather than aborting the search. Qwant has fallback logic to upstream on empty results. -- `internal/autocomplete` — Fetches search suggestions. Proxies to upstream SearXNG `/autocompleter` if configured, falls back to Wikipedia OpenSearch API otherwise. +- `internal/autocomplete` — Fetches search suggestions. Proxies to upstream `/autocompleter` if configured, falls back to Wikipedia OpenSearch API otherwise. - `internal/httpapi` — HTTP handlers for `/`, `/search`, `/autocompleter`, `/healthz`, `/opensearch.xml`. Detects HTMX requests via `HX-Request` header to return fragments instead of full pages. -- `internal/upstream` — Client that proxies requests to an upstream SearXNG instance via POST. +- `internal/upstream` — Client that proxies requests to an upstream metasearch instance via POST. - `internal/cache` — Valkey/Redis-backed cache with SHA-256 cache keys. No-op if unconfigured. - `internal/middleware` — Three rate limiters (per-IP sliding window, burst+sustained, global) and CORS. All disabled by default. - `internal/views` — HTML templates and static files embedded via `//go:embed`. Renders full pages or HTMX fragments. Templates: `base.html`, `index.html`, `results.html`, `results_inner.html`, `result_item.html`. -- `cmd/searxng-go` — Entry point. Loads TOML config, seeds env vars for engine code, wires up middleware chain, starts HTTP server. +- `cmd/kafka` — Entry point. Loads TOML config, seeds env vars for engine code, wires up middleware chain, starts HTTP server. **Engine interface** (`internal/engines/engine.go`): ```go diff --git a/Dockerfile b/Dockerfile index c41b5a1..e21960f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,7 @@ RUN go mod download # Copy source and build COPY . . -RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /kafka ./cmd/searxng-go +RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /kafka ./cmd/kafka # Runtime stage FROM alpine:3.21 diff --git a/README.md b/README.md index 2f0868f..c03019e 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,8 @@ A privacy-respecting, open metasearch engine written in Go. SearXNG-compatible A ```bash git clone https://git.ashisgreat.xyz/penal-colony/gosearch.git cd kafka -go build ./cmd/searxng-go -./searxng-go -config config.toml +go build ./cmd/kafka +./kafka -config config.toml ``` ### Docker Compose @@ -76,7 +76,7 @@ sudo nixos-rebuild switch --flake .# ```bash nix develop go test ./... -go run ./cmd/searxng-go -config config.toml +go run ./cmd/kafka -config config.toml ``` ## Endpoints @@ -138,7 +138,7 @@ Copy `config.example.toml` to `config.toml` and edit. All settings can also be o ### Key Sections - **`[server]`** — port, timeout, public base URL for OpenSearch -- **`[upstream]`** — optional upstream SearXNG proxy for unported engines +- **`[upstream]`** — optional upstream metasearch proxy for unported engines - **`[engines]`** — which engines run locally, engine-specific settings - **`[cache]`** — Valkey/Redis address, password, TTL - **`[cors]`** — allowed origins and methods @@ -152,7 +152,7 @@ Copy `config.example.toml` to `config.toml` and edit. All settings can also be o |---|---| | `PORT` | Listen port (default: 8080) | | `BASE_URL` | Public URL for OpenSearch XML | -| `UPSTREAM_SEARXNG_URL` | Upstream SearXNG instance URL | +| `UPSTREAM_SEARXNG_URL` | Upstream instance URL | | `LOCAL_PORTED_ENGINES` | Comma-separated local engine list | | `HTTP_TIMEOUT` | Upstream request timeout | | `BRAVE_API_KEY` | Brave Search API key | @@ -177,7 +177,7 @@ See `config.example.toml` for the full list including rate limiting and CORS var | Reddit | Reddit JSON API | Discussions | | Bing | Bing RSS | General web | -Engines not listed in `engines.local_ported` are proxied to an upstream SearXNG instance if `upstream.url` is configured. +Engines not listed in `engines.local_ported` are proxied to an upstream metasearch instance if `upstream.url` is configured. ## Architecture diff --git a/cmd/searxng-go/main.go b/cmd/kafka/main.go similarity index 98% rename from cmd/searxng-go/main.go rename to cmd/kafka/main.go index dac6258..ab29852 100644 --- a/cmd/searxng-go/main.go +++ b/cmd/kafka/main.go @@ -103,7 +103,7 @@ func main() { }, logger)(handler) addr := fmt.Sprintf(":%d", cfg.Server.Port) - logger.Info("searxng-go starting", + logger.Info("kafka starting", "addr", addr, "cache", searchCache.Enabled(), "rate_limit", cfg.RateLimit.Requests > 0, diff --git a/config.example.toml b/config.example.toml index df77184..1e3b75c 100644 --- a/config.example.toml +++ b/config.example.toml @@ -15,13 +15,13 @@ http_timeout = "10s" base_url = "" [upstream] -# URL of an upstream SearXNG instance for unported engines (env: UPSTREAM_SEARXNG_URL) +# URL of an upstream metasearch instance for unported engines (env: UPSTREAM_SEARXNG_URL) # Leave empty to run without an upstream proxy. url = "" [engines] # Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES) -# Engines not listed here will be proxied to upstream SearXNG. +# Engines not listed here will be proxied to the upstream instance. local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"] [engines.brave] diff --git a/internal/autocomplete/service.go b/internal/autocomplete/service.go index 3892d63..99d963a 100644 --- a/internal/autocomplete/service.go +++ b/internal/autocomplete/service.go @@ -11,7 +11,7 @@ import ( "time" ) -// Service fetches search suggestions from an upstream SearXNG instance +// Service fetches search suggestions from an upstream metasearch instance // or falls back to Wikipedia's OpenSearch API. type Service struct { upstreamURL string @@ -40,7 +40,7 @@ func (s *Service) Suggestions(ctx context.Context, query string) ([]string, erro return s.wikipediaSuggestions(ctx, query) } -// upstreamSuggestions proxies to an upstream SearXNG /autocompleter endpoint. +// upstreamSuggestions proxies to an upstream /autocompleter endpoint. func (s *Service) upstreamSuggestions(ctx context.Context, query string) ([]string, error) { u := s.upstreamURL + "/autocompleter?" + url.Values{"q": {query}}.Encode() req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) @@ -64,7 +64,7 @@ func (s *Service) upstreamSuggestions(ctx context.Context, query string) ([]stri return nil, err } - // SearXNG /autocompleter returns a plain JSON array of strings. + // The /autocompleter endpoint returns a plain JSON array of strings. var out []string if err := json.Unmarshal(body, &out); err != nil { return nil, err diff --git a/internal/contracts/main_result.go b/internal/contracts/main_result.go index 48005f8..20c9231 100644 --- a/internal/contracts/main_result.go +++ b/internal/contracts/main_result.go @@ -5,15 +5,15 @@ import ( "encoding/json" ) -// MainResult represents one element of SearXNG's `results` array. +// MainResult represents one element of the `results` array. // -// SearXNG returns many additional keys beyond what templates use. To keep the +// The API returns many additional keys beyond what templates use. To keep the // contract stable for proxying/merging, we preserve all unknown keys in // `raw` and re-emit them via MarshalJSON. type MainResult struct { raw map[string]any - // Common fields used by SearXNG templates (RSS uses: title, url, content, pubdate). + // Common fields used by templates (RSS uses: title, url, content, pubdate). Template string `json:"template"` Title string `json:"title"` Content string `json:"content"` @@ -28,12 +28,12 @@ type MainResult struct { Positions []int `json:"positions"` Engines []string `json:"engines"` - // These fields exist in SearXNG's MainResult base; keep them so downstream + // These fields exist in the MainResult base; keep them so downstream // callers can generate richer output later. OpenGroup bool `json:"open_group"` CloseGroup bool `json:"close_group"` - // parsed_url in SearXNG is emitted as a tuple; we preserve it as-is. + // parsed_url is emitted as a tuple; we preserve it as-is. ParsedURL any `json:"parsed_url"` } diff --git a/internal/contracts/types.go b/internal/contracts/types.go index a68f77a..81103ce 100644 --- a/internal/contracts/types.go +++ b/internal/contracts/types.go @@ -1,6 +1,6 @@ package contracts -// OutputFormat matches SearXNG's `/search?format=...` values. +// OutputFormat matches the `/search?format=...` values. type OutputFormat string const ( @@ -28,7 +28,7 @@ type SearchRequest struct { Engines []string Categories []string - // EngineData matches SearXNG's `engine_data--=` parameters. + // EngineData matches the `engine_data--=` parameters. EngineData map[string]map[string]string // AccessToken is an optional request token used to gate paid/limited engines. @@ -36,7 +36,7 @@ type SearchRequest struct { AccessToken string } -// SearchResponse matches the JSON schema returned by SearXNG's `webutils.get_json_response()`. +// SearchResponse matches the JSON schema used by `webutils.get_json_response()`. type SearchResponse struct { Query string `json:"query"` NumberOfResults int `json:"number_of_results"` diff --git a/internal/engines/braveapi.go b/internal/engines/braveapi.go index 2cb20ff..77c7abe 100644 --- a/internal/engines/braveapi.go +++ b/internal/engines/braveapi.go @@ -14,7 +14,7 @@ import ( "github.com/metamorphosis-dev/kafka/internal/contracts" ) -// BraveEngine implements the SearXNG `braveapi` engine (Brave Web Search API). +// BraveEngine implements the `braveapi` engine (Brave Web Search API). // // Config / gating: // - BRAVE_API_KEY: required to call Brave @@ -35,8 +35,8 @@ func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) ( return contracts.SearchResponse{}, errors.New("brave engine not initialized") } - // Gate / config checks should not be treated as fatal errors; SearXNG - // treats misconfigured engines as unresponsive. + // Gate / config checks should not be treated as fatal errors; the reference + // implementation treats misconfigured engines as unresponsive. if strings.TrimSpace(e.apiKey) == "" { return contracts.SearchResponse{ Query: req.Query, @@ -93,7 +93,7 @@ func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) ( } } - // SearXNG's python checks `if params["safesearch"]:` which treats any + // The reference implementation checks `if params["safesearch"]:` which treats any // non-zero (moderate/strict) as strict. if req.Safesearch > 0 { args.Set("safesearch", "strict") diff --git a/internal/engines/engine.go b/internal/engines/engine.go index d07aec9..ee87cfd 100644 --- a/internal/engines/engine.go +++ b/internal/engines/engine.go @@ -6,7 +6,7 @@ import ( "github.com/metamorphosis-dev/kafka/internal/contracts" ) -// Engine is a Go-native implementation of a SearXNG engine. +// Engine is a Go-native implementation of a search engine. // // Implementations should return a SearchResponse containing only the results // for that engine subset; the caller will merge multiple engine responses. diff --git a/internal/engines/planner.go b/internal/engines/planner.go index 543f253..56df656 100644 --- a/internal/engines/planner.go +++ b/internal/engines/planner.go @@ -48,7 +48,7 @@ func NewPlanner(portedEngines []string) *Planner { // Plan returns: // - localEngines: engines that are configured as ported for this service -// - upstreamEngines: engines that should be executed by upstream SearXNG +// - upstreamEngines: engines that should be executed by the upstream instance // - requestedEngines: the (possibly inferred) requested engines list // // If the request provides an explicit `engines` parameter, we use it. @@ -80,7 +80,7 @@ func (p *Planner) Plan(req contracts.SearchRequest) (localEngines, upstreamEngin func inferFromCategories(categories []string) []string { // Minimal mapping for the initial porting subset. - // This mirrors the idea of selecting from SearXNG categories without + // This mirrors the idea of selecting from engine categories without // embedding the whole engine registry. set := map[string]bool{} for _, c := range categories { diff --git a/internal/engines/qwant.go b/internal/engines/qwant.go index bb2a03c..8221781 100644 --- a/internal/engines/qwant.go +++ b/internal/engines/qwant.go @@ -14,11 +14,11 @@ import ( "github.com/PuerkitoBio/goquery" ) -// QwantEngine implements a SearXNG-like `qwant` (web) adapter using +// QwantEngine implements a `qwant` (web) adapter using // Qwant v3 endpoint: https://api.qwant.com/v3/search/web. // -// Qwant's API is not fully documented; this mirrors SearXNG's parsing logic -// for the `web` category from `.agent/searxng/searx/engines/qwant.py`. +// Qwant's API is not fully documented; this implements parsing logic +// for the `web` category. type QwantEngine struct { client *http.Client category string // "web" (JSON API) or "web-lite" (HTML fallback) @@ -37,7 +37,7 @@ func (e *QwantEngine) Search(ctx context.Context, req contracts.SearchRequest) ( return contracts.SearchResponse{Query: req.Query}, nil } - // For API parity we use SearXNG web defaults: count=10, offset=(pageno-1)*count. + // For API parity we use web defaults: count=10, offset=(pageno-1)*count. // The engine's config field exists so we can expand to news/images/videos later. count := e.resultsPerPage if count <= 0 { @@ -262,7 +262,7 @@ func (e *QwantEngine) searchWebLite(ctx context.Context, req contracts.SearchReq return } - // In SearXNG: "./span[contains(@class, 'url partner')]" + // Selector: "./span[contains(@class, 'url partner')]" urlText := strings.TrimSpace(item.Find("span.url.partner").First().Text()) if urlText == "" { // fallback: any span with class containing both 'url' and 'partner' diff --git a/internal/search/merge.go b/internal/search/merge.go index 54ff9bb..64ebd6e 100644 --- a/internal/search/merge.go +++ b/internal/search/merge.go @@ -8,7 +8,7 @@ import ( "github.com/metamorphosis-dev/kafka/internal/contracts" ) -// MergeResponses merges multiple SearXNG-compatible JSON responses. +// MergeResponses merges multiple compatible JSON responses. // // MVP merge semantics: // - results are concatenated with a simple de-dup key (engine|title|url) diff --git a/internal/search/request_params.go b/internal/search/request_params.go index 1d48a04..9fdd799 100644 --- a/internal/search/request_params.go +++ b/internal/search/request_params.go @@ -11,7 +11,7 @@ import ( var languageCodeRe = regexp.MustCompile(`^[a-z]{2,3}(-[a-zA-Z]{2})?$`) func ParseSearchRequest(r *http.Request) (SearchRequest, error) { - // SearXNG supports both GET and POST and relies on form values for routing. + // Supports both GET and POST and relies on form values for routing. if err := r.ParseForm(); err != nil { return SearchRequest{}, errors.New("invalid request: cannot parse form") } @@ -90,7 +90,7 @@ func ParseSearchRequest(r *http.Request) (SearchRequest, error) { // engines is an explicit list of engine names. engines := splitCSV(strings.TrimSpace(r.FormValue("engines"))) - // categories and category_ params mirror SearXNG's webadapter parsing. + // categories and category_ params mirror the webadapter parsing. // We don't validate against a registry here; we just preserve the requested values. catSet := map[string]bool{} if catsParam := strings.TrimSpace(r.FormValue("categories")); catsParam != "" { diff --git a/internal/search/response.go b/internal/search/response.go index 3b07096..1a9ce26 100644 --- a/internal/search/response.go +++ b/internal/search/response.go @@ -38,7 +38,7 @@ func WriteSearchResponse(w http.ResponseWriter, format OutputFormat, resp Search } } -// csvRowHeader matches the SearXNG CSV writer key order. +// csvRowHeader matches the CSV writer key order. var csvRowHeader = []string{"title", "url", "content", "host", "engine", "score", "type"} func writeCSV(w http.ResponseWriter, resp SearchResponse) error { @@ -111,14 +111,14 @@ func writeCSV(w http.ResponseWriter, resp SearchResponse) error { func writeRSS(w http.ResponseWriter, resp SearchResponse) error { q := resp.Query - escapedTitle := xmlEscape("SearXNG search: " + q) - escapedDesc := xmlEscape("Search results for \"" + q + "\" - SearXNG") + escapedTitle := xmlEscape("kafka search: " + q) + escapedDesc := xmlEscape("Search results for \"" + q + "\" - kafka") escapedQueryTerms := xmlEscape(q) link := "/search?q=" + url.QueryEscape(q) opensearchQuery := fmt.Sprintf(``, escapedQueryTerms) - // SearXNG template uses the number of results for both totalResults and itemsPerPage. + // The template uses the number of results for both totalResults and itemsPerPage. nr := resp.NumberOfResults var items bytes.Buffer diff --git a/internal/search/service.go b/internal/search/service.go index 91fef2b..62a9308 100644 --- a/internal/search/service.go +++ b/internal/search/service.go @@ -50,7 +50,7 @@ func NewService(cfg ServiceConfig) *Service { } // Search executes the request against local engines (in parallel) and -// optionally upstream SearXNG for unported engines. +// optionally the upstream instance for unported engines. // // Individual engine failures are reported as unresponsive_engines rather // than aborting the entire search. diff --git a/internal/upstream/client.go b/internal/upstream/client.go index 3a11843..64ddec4 100644 --- a/internal/upstream/client.go +++ b/internal/upstream/client.go @@ -68,7 +68,7 @@ func (c *Client) SearchJSON(ctx context.Context, req contracts.SearchRequest, en for engineName, kv := range req.EngineData { for key, value := range kv { - // Mirror SearXNG's naming: `engine_data--=` + // Mirror the naming convention: `engine_data--=` form.Set(fmt.Sprintf("engine_data-%s-%s", engineName, key), value) } } diff --git a/internal/views/static/css/kafka.css b/internal/views/static/css/kafka.css index 376b2d8..824f489 100644 --- a/internal/views/static/css/kafka.css +++ b/internal/views/static/css/kafka.css @@ -1,5 +1,4 @@ /* kafka — clean, minimal search engine CSS */ -/* Inspired by SearXNG's simple theme class conventions */ :root { --color-base: #f5f5f5; From fcd9be16df3c3916207ca12dc2bb591e6d0750cc Mon Sep 17 00:00:00 2001 From: ashisgreat22 Date: Sun, 22 Mar 2026 01:47:03 +0100 Subject: [PATCH 06/12] refactor: remove SearXNG references and rename binary to kafka - Rename cmd/searxng-go to cmd/kafka - Remove all SearXNG references from source comments while keeping "SearXNG-compatible API" in user-facing docs - Update binary paths in README, CLAUDE.md, and Dockerfile - Update log message to "kafka starting" Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 10 +++++----- Dockerfile | 2 +- README.md | 12 ++++++------ cmd/{searxng-go => kafka}/main.go | 2 +- config.example.toml | 4 ++-- internal/autocomplete/service.go | 6 +++--- internal/contracts/main_result.go | 10 +++++----- internal/contracts/types.go | 6 +++--- internal/engines/braveapi.go | 8 ++++---- internal/engines/engine.go | 2 +- internal/engines/planner.go | 4 ++-- internal/engines/qwant.go | 10 +++++----- internal/search/merge.go | 2 +- internal/search/request_params.go | 4 ++-- internal/search/response.go | 8 ++++---- internal/search/service.go | 2 +- internal/upstream/client.go | 2 +- internal/views/static/css/kafka.css | 1 - 18 files changed, 47 insertions(+), 48 deletions(-) rename cmd/{searxng-go => kafka}/main.go (98%) diff --git a/CLAUDE.md b/CLAUDE.md index 1ba6bdc..b7f254e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -kafka is a privacy-respecting metasearch engine written in Go. It provides a SearXNG-compatible `/search` API and an HTML frontend (HTMX + Go templates). 9 engines are implemented natively in Go; unlisted engines can be proxied to an upstream SearXNG instance. Responses from multiple engines are merged into a single JSON/CSV/RSS/HTML response. +kafka is a privacy-respecting metasearch engine written in Go. It provides a SearXNG-compatible `/search` API and an HTML frontend (HTMX + Go templates). 9 engines are implemented natively in Go; unlisted engines can be proxied to an upstream metasearch instance. Responses from multiple engines are merged into a single JSON/CSV/RSS/HTML response. ## Build & Run Commands @@ -22,7 +22,7 @@ go test -run TestWikipedia ./internal/engines/ go test -v ./internal/engines/ # Run the server (requires config.toml) -go run ./cmd/searxng-go -config config.toml +go run ./cmd/kafka -config config.toml ``` There is no Makefile. There is no linter configured. @@ -37,13 +37,13 @@ There is no Makefile. There is no linter configured. - `internal/config` — TOML-based configuration with env var fallbacks. `Load(path)` reads `config.toml`; env vars override zero-value fields. See `config.example.toml` for all settings. - `internal/engines` — `Engine` interface and all 9 Go-native implementations. `factory.go` registers engines via `NewDefaultPortedEngines()`. `planner.go` routes engines to local or upstream based on `LOCAL_PORTED_ENGINES` env var. - `internal/search` — `Service` orchestrates the pipeline: cache check, planning, parallel engine execution via goroutines/WaitGroup, upstream proxying, response merging. Individual engine failures are reported as `unresponsive_engines` rather than aborting the search. Qwant has fallback logic to upstream on empty results. -- `internal/autocomplete` — Fetches search suggestions. Proxies to upstream SearXNG `/autocompleter` if configured, falls back to Wikipedia OpenSearch API otherwise. +- `internal/autocomplete` — Fetches search suggestions. Proxies to upstream `/autocompleter` if configured, falls back to Wikipedia OpenSearch API otherwise. - `internal/httpapi` — HTTP handlers for `/`, `/search`, `/autocompleter`, `/healthz`, `/opensearch.xml`. Detects HTMX requests via `HX-Request` header to return fragments instead of full pages. -- `internal/upstream` — Client that proxies requests to an upstream SearXNG instance via POST. +- `internal/upstream` — Client that proxies requests to an upstream metasearch instance via POST. - `internal/cache` — Valkey/Redis-backed cache with SHA-256 cache keys. No-op if unconfigured. - `internal/middleware` — Three rate limiters (per-IP sliding window, burst+sustained, global) and CORS. All disabled by default. - `internal/views` — HTML templates and static files embedded via `//go:embed`. Renders full pages or HTMX fragments. Templates: `base.html`, `index.html`, `results.html`, `results_inner.html`, `result_item.html`. -- `cmd/searxng-go` — Entry point. Loads TOML config, seeds env vars for engine code, wires up middleware chain, starts HTTP server. +- `cmd/kafka` — Entry point. Loads TOML config, seeds env vars for engine code, wires up middleware chain, starts HTTP server. **Engine interface** (`internal/engines/engine.go`): ```go diff --git a/Dockerfile b/Dockerfile index c41b5a1..e21960f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,7 @@ RUN go mod download # Copy source and build COPY . . -RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /kafka ./cmd/searxng-go +RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /kafka ./cmd/kafka # Runtime stage FROM alpine:3.21 diff --git a/README.md b/README.md index 2f0868f..c03019e 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,8 @@ A privacy-respecting, open metasearch engine written in Go. SearXNG-compatible A ```bash git clone https://git.ashisgreat.xyz/penal-colony/gosearch.git cd kafka -go build ./cmd/searxng-go -./searxng-go -config config.toml +go build ./cmd/kafka +./kafka -config config.toml ``` ### Docker Compose @@ -76,7 +76,7 @@ sudo nixos-rebuild switch --flake .# ```bash nix develop go test ./... -go run ./cmd/searxng-go -config config.toml +go run ./cmd/kafka -config config.toml ``` ## Endpoints @@ -138,7 +138,7 @@ Copy `config.example.toml` to `config.toml` and edit. All settings can also be o ### Key Sections - **`[server]`** — port, timeout, public base URL for OpenSearch -- **`[upstream]`** — optional upstream SearXNG proxy for unported engines +- **`[upstream]`** — optional upstream metasearch proxy for unported engines - **`[engines]`** — which engines run locally, engine-specific settings - **`[cache]`** — Valkey/Redis address, password, TTL - **`[cors]`** — allowed origins and methods @@ -152,7 +152,7 @@ Copy `config.example.toml` to `config.toml` and edit. All settings can also be o |---|---| | `PORT` | Listen port (default: 8080) | | `BASE_URL` | Public URL for OpenSearch XML | -| `UPSTREAM_SEARXNG_URL` | Upstream SearXNG instance URL | +| `UPSTREAM_SEARXNG_URL` | Upstream instance URL | | `LOCAL_PORTED_ENGINES` | Comma-separated local engine list | | `HTTP_TIMEOUT` | Upstream request timeout | | `BRAVE_API_KEY` | Brave Search API key | @@ -177,7 +177,7 @@ See `config.example.toml` for the full list including rate limiting and CORS var | Reddit | Reddit JSON API | Discussions | | Bing | Bing RSS | General web | -Engines not listed in `engines.local_ported` are proxied to an upstream SearXNG instance if `upstream.url` is configured. +Engines not listed in `engines.local_ported` are proxied to an upstream metasearch instance if `upstream.url` is configured. ## Architecture diff --git a/cmd/searxng-go/main.go b/cmd/kafka/main.go similarity index 98% rename from cmd/searxng-go/main.go rename to cmd/kafka/main.go index dac6258..ab29852 100644 --- a/cmd/searxng-go/main.go +++ b/cmd/kafka/main.go @@ -103,7 +103,7 @@ func main() { }, logger)(handler) addr := fmt.Sprintf(":%d", cfg.Server.Port) - logger.Info("searxng-go starting", + logger.Info("kafka starting", "addr", addr, "cache", searchCache.Enabled(), "rate_limit", cfg.RateLimit.Requests > 0, diff --git a/config.example.toml b/config.example.toml index df77184..1e3b75c 100644 --- a/config.example.toml +++ b/config.example.toml @@ -15,13 +15,13 @@ http_timeout = "10s" base_url = "" [upstream] -# URL of an upstream SearXNG instance for unported engines (env: UPSTREAM_SEARXNG_URL) +# URL of an upstream metasearch instance for unported engines (env: UPSTREAM_SEARXNG_URL) # Leave empty to run without an upstream proxy. url = "" [engines] # Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES) -# Engines not listed here will be proxied to upstream SearXNG. +# Engines not listed here will be proxied to the upstream instance. local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"] [engines.brave] diff --git a/internal/autocomplete/service.go b/internal/autocomplete/service.go index 3892d63..99d963a 100644 --- a/internal/autocomplete/service.go +++ b/internal/autocomplete/service.go @@ -11,7 +11,7 @@ import ( "time" ) -// Service fetches search suggestions from an upstream SearXNG instance +// Service fetches search suggestions from an upstream metasearch instance // or falls back to Wikipedia's OpenSearch API. type Service struct { upstreamURL string @@ -40,7 +40,7 @@ func (s *Service) Suggestions(ctx context.Context, query string) ([]string, erro return s.wikipediaSuggestions(ctx, query) } -// upstreamSuggestions proxies to an upstream SearXNG /autocompleter endpoint. +// upstreamSuggestions proxies to an upstream /autocompleter endpoint. func (s *Service) upstreamSuggestions(ctx context.Context, query string) ([]string, error) { u := s.upstreamURL + "/autocompleter?" + url.Values{"q": {query}}.Encode() req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) @@ -64,7 +64,7 @@ func (s *Service) upstreamSuggestions(ctx context.Context, query string) ([]stri return nil, err } - // SearXNG /autocompleter returns a plain JSON array of strings. + // The /autocompleter endpoint returns a plain JSON array of strings. var out []string if err := json.Unmarshal(body, &out); err != nil { return nil, err diff --git a/internal/contracts/main_result.go b/internal/contracts/main_result.go index 48005f8..20c9231 100644 --- a/internal/contracts/main_result.go +++ b/internal/contracts/main_result.go @@ -5,15 +5,15 @@ import ( "encoding/json" ) -// MainResult represents one element of SearXNG's `results` array. +// MainResult represents one element of the `results` array. // -// SearXNG returns many additional keys beyond what templates use. To keep the +// The API returns many additional keys beyond what templates use. To keep the // contract stable for proxying/merging, we preserve all unknown keys in // `raw` and re-emit them via MarshalJSON. type MainResult struct { raw map[string]any - // Common fields used by SearXNG templates (RSS uses: title, url, content, pubdate). + // Common fields used by templates (RSS uses: title, url, content, pubdate). Template string `json:"template"` Title string `json:"title"` Content string `json:"content"` @@ -28,12 +28,12 @@ type MainResult struct { Positions []int `json:"positions"` Engines []string `json:"engines"` - // These fields exist in SearXNG's MainResult base; keep them so downstream + // These fields exist in the MainResult base; keep them so downstream // callers can generate richer output later. OpenGroup bool `json:"open_group"` CloseGroup bool `json:"close_group"` - // parsed_url in SearXNG is emitted as a tuple; we preserve it as-is. + // parsed_url is emitted as a tuple; we preserve it as-is. ParsedURL any `json:"parsed_url"` } diff --git a/internal/contracts/types.go b/internal/contracts/types.go index a68f77a..81103ce 100644 --- a/internal/contracts/types.go +++ b/internal/contracts/types.go @@ -1,6 +1,6 @@ package contracts -// OutputFormat matches SearXNG's `/search?format=...` values. +// OutputFormat matches the `/search?format=...` values. type OutputFormat string const ( @@ -28,7 +28,7 @@ type SearchRequest struct { Engines []string Categories []string - // EngineData matches SearXNG's `engine_data--=` parameters. + // EngineData matches the `engine_data--=` parameters. EngineData map[string]map[string]string // AccessToken is an optional request token used to gate paid/limited engines. @@ -36,7 +36,7 @@ type SearchRequest struct { AccessToken string } -// SearchResponse matches the JSON schema returned by SearXNG's `webutils.get_json_response()`. +// SearchResponse matches the JSON schema used by `webutils.get_json_response()`. type SearchResponse struct { Query string `json:"query"` NumberOfResults int `json:"number_of_results"` diff --git a/internal/engines/braveapi.go b/internal/engines/braveapi.go index 2cb20ff..77c7abe 100644 --- a/internal/engines/braveapi.go +++ b/internal/engines/braveapi.go @@ -14,7 +14,7 @@ import ( "github.com/metamorphosis-dev/kafka/internal/contracts" ) -// BraveEngine implements the SearXNG `braveapi` engine (Brave Web Search API). +// BraveEngine implements the `braveapi` engine (Brave Web Search API). // // Config / gating: // - BRAVE_API_KEY: required to call Brave @@ -35,8 +35,8 @@ func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) ( return contracts.SearchResponse{}, errors.New("brave engine not initialized") } - // Gate / config checks should not be treated as fatal errors; SearXNG - // treats misconfigured engines as unresponsive. + // Gate / config checks should not be treated as fatal errors; the reference + // implementation treats misconfigured engines as unresponsive. if strings.TrimSpace(e.apiKey) == "" { return contracts.SearchResponse{ Query: req.Query, @@ -93,7 +93,7 @@ func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) ( } } - // SearXNG's python checks `if params["safesearch"]:` which treats any + // The reference implementation checks `if params["safesearch"]:` which treats any // non-zero (moderate/strict) as strict. if req.Safesearch > 0 { args.Set("safesearch", "strict") diff --git a/internal/engines/engine.go b/internal/engines/engine.go index d07aec9..ee87cfd 100644 --- a/internal/engines/engine.go +++ b/internal/engines/engine.go @@ -6,7 +6,7 @@ import ( "github.com/metamorphosis-dev/kafka/internal/contracts" ) -// Engine is a Go-native implementation of a SearXNG engine. +// Engine is a Go-native implementation of a search engine. // // Implementations should return a SearchResponse containing only the results // for that engine subset; the caller will merge multiple engine responses. diff --git a/internal/engines/planner.go b/internal/engines/planner.go index 543f253..56df656 100644 --- a/internal/engines/planner.go +++ b/internal/engines/planner.go @@ -48,7 +48,7 @@ func NewPlanner(portedEngines []string) *Planner { // Plan returns: // - localEngines: engines that are configured as ported for this service -// - upstreamEngines: engines that should be executed by upstream SearXNG +// - upstreamEngines: engines that should be executed by the upstream instance // - requestedEngines: the (possibly inferred) requested engines list // // If the request provides an explicit `engines` parameter, we use it. @@ -80,7 +80,7 @@ func (p *Planner) Plan(req contracts.SearchRequest) (localEngines, upstreamEngin func inferFromCategories(categories []string) []string { // Minimal mapping for the initial porting subset. - // This mirrors the idea of selecting from SearXNG categories without + // This mirrors the idea of selecting from engine categories without // embedding the whole engine registry. set := map[string]bool{} for _, c := range categories { diff --git a/internal/engines/qwant.go b/internal/engines/qwant.go index bb2a03c..8221781 100644 --- a/internal/engines/qwant.go +++ b/internal/engines/qwant.go @@ -14,11 +14,11 @@ import ( "github.com/PuerkitoBio/goquery" ) -// QwantEngine implements a SearXNG-like `qwant` (web) adapter using +// QwantEngine implements a `qwant` (web) adapter using // Qwant v3 endpoint: https://api.qwant.com/v3/search/web. // -// Qwant's API is not fully documented; this mirrors SearXNG's parsing logic -// for the `web` category from `.agent/searxng/searx/engines/qwant.py`. +// Qwant's API is not fully documented; this implements parsing logic +// for the `web` category. type QwantEngine struct { client *http.Client category string // "web" (JSON API) or "web-lite" (HTML fallback) @@ -37,7 +37,7 @@ func (e *QwantEngine) Search(ctx context.Context, req contracts.SearchRequest) ( return contracts.SearchResponse{Query: req.Query}, nil } - // For API parity we use SearXNG web defaults: count=10, offset=(pageno-1)*count. + // For API parity we use web defaults: count=10, offset=(pageno-1)*count. // The engine's config field exists so we can expand to news/images/videos later. count := e.resultsPerPage if count <= 0 { @@ -262,7 +262,7 @@ func (e *QwantEngine) searchWebLite(ctx context.Context, req contracts.SearchReq return } - // In SearXNG: "./span[contains(@class, 'url partner')]" + // Selector: "./span[contains(@class, 'url partner')]" urlText := strings.TrimSpace(item.Find("span.url.partner").First().Text()) if urlText == "" { // fallback: any span with class containing both 'url' and 'partner' diff --git a/internal/search/merge.go b/internal/search/merge.go index 54ff9bb..64ebd6e 100644 --- a/internal/search/merge.go +++ b/internal/search/merge.go @@ -8,7 +8,7 @@ import ( "github.com/metamorphosis-dev/kafka/internal/contracts" ) -// MergeResponses merges multiple SearXNG-compatible JSON responses. +// MergeResponses merges multiple compatible JSON responses. // // MVP merge semantics: // - results are concatenated with a simple de-dup key (engine|title|url) diff --git a/internal/search/request_params.go b/internal/search/request_params.go index 1d48a04..9fdd799 100644 --- a/internal/search/request_params.go +++ b/internal/search/request_params.go @@ -11,7 +11,7 @@ import ( var languageCodeRe = regexp.MustCompile(`^[a-z]{2,3}(-[a-zA-Z]{2})?$`) func ParseSearchRequest(r *http.Request) (SearchRequest, error) { - // SearXNG supports both GET and POST and relies on form values for routing. + // Supports both GET and POST and relies on form values for routing. if err := r.ParseForm(); err != nil { return SearchRequest{}, errors.New("invalid request: cannot parse form") } @@ -90,7 +90,7 @@ func ParseSearchRequest(r *http.Request) (SearchRequest, error) { // engines is an explicit list of engine names. engines := splitCSV(strings.TrimSpace(r.FormValue("engines"))) - // categories and category_ params mirror SearXNG's webadapter parsing. + // categories and category_ params mirror the webadapter parsing. // We don't validate against a registry here; we just preserve the requested values. catSet := map[string]bool{} if catsParam := strings.TrimSpace(r.FormValue("categories")); catsParam != "" { diff --git a/internal/search/response.go b/internal/search/response.go index 3b07096..1a9ce26 100644 --- a/internal/search/response.go +++ b/internal/search/response.go @@ -38,7 +38,7 @@ func WriteSearchResponse(w http.ResponseWriter, format OutputFormat, resp Search } } -// csvRowHeader matches the SearXNG CSV writer key order. +// csvRowHeader matches the CSV writer key order. var csvRowHeader = []string{"title", "url", "content", "host", "engine", "score", "type"} func writeCSV(w http.ResponseWriter, resp SearchResponse) error { @@ -111,14 +111,14 @@ func writeCSV(w http.ResponseWriter, resp SearchResponse) error { func writeRSS(w http.ResponseWriter, resp SearchResponse) error { q := resp.Query - escapedTitle := xmlEscape("SearXNG search: " + q) - escapedDesc := xmlEscape("Search results for \"" + q + "\" - SearXNG") + escapedTitle := xmlEscape("kafka search: " + q) + escapedDesc := xmlEscape("Search results for \"" + q + "\" - kafka") escapedQueryTerms := xmlEscape(q) link := "/search?q=" + url.QueryEscape(q) opensearchQuery := fmt.Sprintf(``, escapedQueryTerms) - // SearXNG template uses the number of results for both totalResults and itemsPerPage. + // The template uses the number of results for both totalResults and itemsPerPage. nr := resp.NumberOfResults var items bytes.Buffer diff --git a/internal/search/service.go b/internal/search/service.go index 91fef2b..62a9308 100644 --- a/internal/search/service.go +++ b/internal/search/service.go @@ -50,7 +50,7 @@ func NewService(cfg ServiceConfig) *Service { } // Search executes the request against local engines (in parallel) and -// optionally upstream SearXNG for unported engines. +// optionally the upstream instance for unported engines. // // Individual engine failures are reported as unresponsive_engines rather // than aborting the entire search. diff --git a/internal/upstream/client.go b/internal/upstream/client.go index 3a11843..64ddec4 100644 --- a/internal/upstream/client.go +++ b/internal/upstream/client.go @@ -68,7 +68,7 @@ func (c *Client) SearchJSON(ctx context.Context, req contracts.SearchRequest, en for engineName, kv := range req.EngineData { for key, value := range kv { - // Mirror SearXNG's naming: `engine_data--=` + // Mirror the naming convention: `engine_data--=` form.Set(fmt.Sprintf("engine_data-%s-%s", engineName, key), value) } } diff --git a/internal/views/static/css/kafka.css b/internal/views/static/css/kafka.css index 376b2d8..824f489 100644 --- a/internal/views/static/css/kafka.css +++ b/internal/views/static/css/kafka.css @@ -1,5 +1,4 @@ /* kafka — clean, minimal search engine CSS */ -/* Inspired by SearXNG's simple theme class conventions */ :root { --color-base: #f5f5f5; From 7d23f13dfaf43b086c78aa3028653c4a05e9b14b Mon Sep 17 00:00:00 2001 From: Franz Kafka Date: Sun, 22 Mar 2026 01:25:04 +0000 Subject: [PATCH 07/12] feat: add Google engine using GSA User-Agent scraping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SearXNG approach: use Google Search Appliance (GSA) User-Agent pool — these are whitelisted enterprise identifiers Google trusts. Key techniques: - GSA User-Agent (iPhone OS + GSA/ version) instead of Chrome desktop - CONSENT=YES+ cookie to bypass EU consent wall - Parse /url?q= redirector URLs (unquote + strip &sa= params) - div.MjjYud class for result containers (SearXNG selector) - data-sncf divs for snippets - detect sorry.google.com blocks - Suggestions from ouy7Mc class cards --- internal/engines/factory.go | 3 +- internal/engines/google.go | 271 ++++++++++++++++++++++++++++++++++++ internal/engines/planner.go | 3 +- 3 files changed, 275 insertions(+), 2 deletions(-) create mode 100644 internal/engines/google.go diff --git a/internal/engines/factory.go b/internal/engines/factory.go index 310a20e..937225f 100644 --- a/internal/engines/factory.go +++ b/internal/engines/factory.go @@ -31,6 +31,7 @@ func NewDefaultPortedEngines(client *http.Client) map[string]Engine { "duckduckgo": &DuckDuckGoEngine{client: client}, "github": &GitHubEngine{client: client}, "reddit": &RedditEngine{client: client}, - "bing": &BingEngine{client: client}, + "bing": &BingEngine{client: client}, + "google": &GoogleEngine{client: client}, } } diff --git a/internal/engines/google.go b/internal/engines/google.go new file mode 100644 index 0000000..0371283 --- /dev/null +++ b/internal/engines/google.go @@ -0,0 +1,271 @@ +package engines + +import ( + "context" + "fmt" + "io" + "net/http" + "net/url" + "regexp" + "strings" + + "github.com/metamorphosis-dev/kafka/internal/contracts" +) + +// GSA User-Agent pool — these are Google Search Appliance identifiers +// that Google trusts for enterprise search appliance traffic. +var gsaUserAgents = []string{ + "Mozilla/5.0 (iPhone; CPU iPhone OS 17_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/399.2.845414227 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPhone; CPU iPhone OS 17_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPhone; CPU iPhone OS 17_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPhone; CPU iPhone OS 18_0_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPhone; CPU iPhone OS 18_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/399.2.845414227 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPhone; CPU iPhone OS 18_5_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1", +} + +func gsaUA() string { + return gsaUserAgents[0] // deterministic for now; could rotate +} + +type GoogleEngine struct { + client *http.Client +} + +func (e *GoogleEngine) Name() string { return "google" } + +func (e *GoogleEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { + if strings.TrimSpace(req.Query) == "" { + return contracts.SearchResponse{Query: req.Query}, nil + } + + start := (req.Pageno - 1) * 10 + query := url.QueryEscape(req.Query) + + // Build URL like SearXNG does. + u := fmt.Sprintf( + "https://www.google.com/search?q=%s&filter=0&start=%d&hl=%s&lr=%s&safe=%s", + query, + start, + googleHL(req.Language), + googleUILanguage(req.Language), + googleSafeSearchLevel(req.Safesearch), + ) + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) + if err != nil { + return contracts.SearchResponse{}, err + } + httpReq.Header.Set("User-Agent", gsaUA()) + httpReq.Header.Set("Accept", "*/*") + httpReq.AddCookie(&http.Cookie{Name: "CONSENT", Value: "YES+"}) + + resp, err := e.client.Do(httpReq) + if err != nil { + return contracts.SearchResponse{}, err + } + defer resp.Body.Close() + + // Check for Google block / CAPTCHA page. + if detectGoogleSorry(resp) { + return contracts.SearchResponse{ + Query: req.Query, + NumberOfResults: 0, + Results: nil, + Answers: []map[string]any{}, + Corrections: []string{}, + Infoboxes: []map[string]any{}, + Suggestions: []string{}, + UnresponsiveEngines: [][2]string{{"google", "blocked by Google (CAPTCHA/sorry page)"}}, + }, nil + } + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + return contracts.SearchResponse{}, fmt.Errorf("google error: status=%d body=%q", resp.StatusCode, string(body)) + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, 128*1024)) + if err != nil { + return contracts.SearchResponse{}, err + } + + results := parseGoogleResults(string(body), req.Query) + return contracts.SearchResponse{ + Query: req.Query, + NumberOfResults: len(results), + Results: results, + Answers: []map[string]any{}, + Corrections: []string{}, + Infoboxes: []map[string]any{}, + Suggestions: extractGoogleSuggestions(string(body)), + UnresponsiveEngines: [][2]string{}, + }, nil +} + +// detectGoogleSorry returns true if the response is a Google block/CAPTCHA page. +func detectGoogleSorry(resp *http.Response) bool { + if resp.Request != nil { + if resp.Request.URL.Host == "sorry.google.com" || strings.HasPrefix(resp.Request.URL.Path, "/sorry") { + return true + } + } + return false +} + +// parseGoogleResults extracts search results from Google's HTML. +// Uses the same selectors as SearXNG: div.MjjYud for result containers. +func parseGoogleResults(body, query string) []contracts.MainResult { + var results []contracts.MainResult + + // SearXNG selector: .//div[contains(@class, "MjjYud")] + // Each result block contains a title link and snippet. + // We simulate the XPath matching with regex-based extraction. + + // Find all MjjYud div blocks. + mjjPattern := regexp.MustCompile(`]*class="[^"]*MjjYud[^"]*"[^>]*>(.*?)\s*(?=]*class="[^"]*MjjYud|$)`) + matches := mjjPattern.FindAllStringSubmatch(body, -1) + + for i, match := range matches { + if len(match) < 2 { + continue + } + block := match[1] + + // Extract title and URL from the result link. + // Pattern: TITLE + urlPattern := regexp.MustCompile(`]+href="(/url\?q=[^"&]+)`) + urlMatch := urlPattern.FindStringSubmatch(block) + if len(urlMatch) < 2 { + continue + } + rawURL := urlMatch[1] + // Remove /url?q= prefix and decode. + actualURL := strings.TrimPrefix(rawURL, "/url?q=") + if amp := strings.Index(actualURL, "&"); amp != -1 { + actualURL = actualURL[:amp] + } + if decoded, err := url.QueryUnescape(actualURL); err == nil { + actualURL = decoded + } + + if actualURL == "" || !strings.HasPrefix(actualURL, "http") { + continue + } + + // Extract title from the title tag. + titlePattern := regexp.MustCompile(`]*class="[^"]*qrStP[^"]*"[^>]*>([^<]+)`) + titleMatch := titlePattern.FindStringSubmatch(block) + title := query + if len(titleMatch) >= 2 { + title = stripTags(titleMatch[1]) + } else { + // Fallback: extract visible text from an with data-title or role="link" + linkTitlePattern := regexp.MustCompile(`]+role="link"[^>]*>([^<]+)<`) + ltMatch := linkTitlePattern.FindStringSubmatch(block) + if len(ltMatch) >= 2 { + title = stripTags(ltMatch[1]) + } + } + + // Extract snippet from data-sncf divs (SearXNG's approach). + snippet := extractGoogleSnippet(block) + + urlPtr := actualURL + results = append(results, contracts.MainResult{ + Title: title, + URL: &urlPtr, + Content: snippet, + Engine: "google", + Score: float64(len(matches) - i), + Category: "general", + Engines: []string{"google"}, + Template: "default.html", + }) + } + + return results +} + +// extractGoogleSnippet extracts the snippet text from a Google result block. +func extractGoogleSnippet(block string) string { + // Google's snippets live in divs with data-sncf attribute. + // SearXNG looks for: .//div[contains(@data-sncf, "1")] + snippetPattern := regexp.MustCompile(`]+data-sncf="1"[^>]*>(.*?)`) + matches := snippetPattern.FindAllStringSubmatch(block, -1) + var parts []string + for _, m := range matches { + if len(m) < 2 { + continue + } + text := stripTags(m[1]) + if text != "" { + parts = append(parts, text) + } + } + return strings.Join(parts, " ") +} + +// extractGoogleSuggestions extracts search suggestions from Google result cards. +func extractGoogleSuggestions(body string) []string { + var suggestions []string + // SearXNG xpath: //div[contains(@class, "ouy7Mc")]//a + suggestionPattern := regexp.MustCompile(`]*class="[^"]*ouy7Mc[^"]*"[^>]*>.*?]*>([^<]+)`, regexp.DotAll) + matches := suggestionPattern.FindAllStringSubmatch(body, -1) + seen := map[string]bool{} + for _, m := range matches { + if len(m) < 2 { + continue + } + s := strings.TrimSpace(stripTags(m[1])) + if s != "" && !seen[s] { + seen[s] = true + suggestions = append(suggestions, s) + } + } + return suggestions +} + +// googleHL maps SearXNG locale to Google hl (host language) parameter. +// e.g. "en-US" -> "en-US" +func googleHL(lang string) string { + lang = strings.ToLower(strings.TrimSpace(lang)) + if lang == "" || lang == "auto" { + return "en" + } + return lang +} + +// googleUILanguage maps SearXNG language to Google lr (language restrict) parameter. +// e.g. "en" -> "lang_en", "de" -> "lang_de" +func googleUILanguage(lang string) string { + lang = strings.ToLower(strings.Split(lang, "-")[0]) + if lang == "" || lang == "auto" { + return "" + } + return "lang_" + lang +} + +// googleSafeSearchLevel maps safesearch (0-2) to Google's safe parameter. +func googleSafeSearchLevel(safesearch int) string { + switch safesearch { + case 0: + return "off" + case 1: + return "medium" + case 2: + return "high" + default: + return "medium" + } +} + +// stripTags removes HTML tags from a string. +func stripTags(s string) string { + stripper := regexp.MustCompile(`<[^>]*>`) + s = stripper.ReplaceAllString(s, "") + s = strings.ReplaceAll(s, "&", "&") + s = strings.ReplaceAll(s, """, `"`) + s = strings.ReplaceAll(s, "'", "'") + s = strings.ReplaceAll(s, " ", " ") + return strings.TrimSpace(s) +} diff --git a/internal/engines/planner.go b/internal/engines/planner.go index 543f253..08b0a27 100644 --- a/internal/engines/planner.go +++ b/internal/engines/planner.go @@ -91,6 +91,7 @@ func inferFromCategories(categories []string) []string { set["qwant"] = true set["duckduckgo"] = true set["bing"] = true + set["google"] = true case "science", "scientific publications": set["arxiv"] = true set["crossref"] = true @@ -106,7 +107,7 @@ func inferFromCategories(categories []string) []string { out = append(out, e) } // stable order - order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "arxiv": 5, "crossref": 6, "github": 7, "reddit": 8} + order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "google": 5, "arxiv": 6, "crossref": 7, "github": 8, "reddit": 9} sortByOrder(out, order) return out } From 4be9cf2725ce5245076a128bdb4a873f263c82d9 Mon Sep 17 00:00:00 2001 From: Franz Kafka Date: Sun, 22 Mar 2026 01:25:04 +0000 Subject: [PATCH 08/12] feat: add Google engine using GSA User-Agent scraping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SearXNG approach: use Google Search Appliance (GSA) User-Agent pool — these are whitelisted enterprise identifiers Google trusts. Key techniques: - GSA User-Agent (iPhone OS + GSA/ version) instead of Chrome desktop - CONSENT=YES+ cookie to bypass EU consent wall - Parse /url?q= redirector URLs (unquote + strip &sa= params) - div.MjjYud class for result containers (SearXNG selector) - data-sncf divs for snippets - detect sorry.google.com blocks - Suggestions from ouy7Mc class cards --- internal/engines/factory.go | 3 +- internal/engines/google.go | 271 ++++++++++++++++++++++++++++++++++++ internal/engines/planner.go | 3 +- 3 files changed, 275 insertions(+), 2 deletions(-) create mode 100644 internal/engines/google.go diff --git a/internal/engines/factory.go b/internal/engines/factory.go index 310a20e..937225f 100644 --- a/internal/engines/factory.go +++ b/internal/engines/factory.go @@ -31,6 +31,7 @@ func NewDefaultPortedEngines(client *http.Client) map[string]Engine { "duckduckgo": &DuckDuckGoEngine{client: client}, "github": &GitHubEngine{client: client}, "reddit": &RedditEngine{client: client}, - "bing": &BingEngine{client: client}, + "bing": &BingEngine{client: client}, + "google": &GoogleEngine{client: client}, } } diff --git a/internal/engines/google.go b/internal/engines/google.go new file mode 100644 index 0000000..0371283 --- /dev/null +++ b/internal/engines/google.go @@ -0,0 +1,271 @@ +package engines + +import ( + "context" + "fmt" + "io" + "net/http" + "net/url" + "regexp" + "strings" + + "github.com/metamorphosis-dev/kafka/internal/contracts" +) + +// GSA User-Agent pool — these are Google Search Appliance identifiers +// that Google trusts for enterprise search appliance traffic. +var gsaUserAgents = []string{ + "Mozilla/5.0 (iPhone; CPU iPhone OS 17_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/399.2.845414227 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPhone; CPU iPhone OS 17_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPhone; CPU iPhone OS 17_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPhone; CPU iPhone OS 18_0_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPhone; CPU iPhone OS 18_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/399.2.845414227 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPhone; CPU iPhone OS 18_5_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1", +} + +func gsaUA() string { + return gsaUserAgents[0] // deterministic for now; could rotate +} + +type GoogleEngine struct { + client *http.Client +} + +func (e *GoogleEngine) Name() string { return "google" } + +func (e *GoogleEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { + if strings.TrimSpace(req.Query) == "" { + return contracts.SearchResponse{Query: req.Query}, nil + } + + start := (req.Pageno - 1) * 10 + query := url.QueryEscape(req.Query) + + // Build URL like SearXNG does. + u := fmt.Sprintf( + "https://www.google.com/search?q=%s&filter=0&start=%d&hl=%s&lr=%s&safe=%s", + query, + start, + googleHL(req.Language), + googleUILanguage(req.Language), + googleSafeSearchLevel(req.Safesearch), + ) + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) + if err != nil { + return contracts.SearchResponse{}, err + } + httpReq.Header.Set("User-Agent", gsaUA()) + httpReq.Header.Set("Accept", "*/*") + httpReq.AddCookie(&http.Cookie{Name: "CONSENT", Value: "YES+"}) + + resp, err := e.client.Do(httpReq) + if err != nil { + return contracts.SearchResponse{}, err + } + defer resp.Body.Close() + + // Check for Google block / CAPTCHA page. + if detectGoogleSorry(resp) { + return contracts.SearchResponse{ + Query: req.Query, + NumberOfResults: 0, + Results: nil, + Answers: []map[string]any{}, + Corrections: []string{}, + Infoboxes: []map[string]any{}, + Suggestions: []string{}, + UnresponsiveEngines: [][2]string{{"google", "blocked by Google (CAPTCHA/sorry page)"}}, + }, nil + } + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + return contracts.SearchResponse{}, fmt.Errorf("google error: status=%d body=%q", resp.StatusCode, string(body)) + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, 128*1024)) + if err != nil { + return contracts.SearchResponse{}, err + } + + results := parseGoogleResults(string(body), req.Query) + return contracts.SearchResponse{ + Query: req.Query, + NumberOfResults: len(results), + Results: results, + Answers: []map[string]any{}, + Corrections: []string{}, + Infoboxes: []map[string]any{}, + Suggestions: extractGoogleSuggestions(string(body)), + UnresponsiveEngines: [][2]string{}, + }, nil +} + +// detectGoogleSorry returns true if the response is a Google block/CAPTCHA page. +func detectGoogleSorry(resp *http.Response) bool { + if resp.Request != nil { + if resp.Request.URL.Host == "sorry.google.com" || strings.HasPrefix(resp.Request.URL.Path, "/sorry") { + return true + } + } + return false +} + +// parseGoogleResults extracts search results from Google's HTML. +// Uses the same selectors as SearXNG: div.MjjYud for result containers. +func parseGoogleResults(body, query string) []contracts.MainResult { + var results []contracts.MainResult + + // SearXNG selector: .//div[contains(@class, "MjjYud")] + // Each result block contains a title link and snippet. + // We simulate the XPath matching with regex-based extraction. + + // Find all MjjYud div blocks. + mjjPattern := regexp.MustCompile(`]*class="[^"]*MjjYud[^"]*"[^>]*>(.*?)\s*(?=]*class="[^"]*MjjYud|$)`) + matches := mjjPattern.FindAllStringSubmatch(body, -1) + + for i, match := range matches { + if len(match) < 2 { + continue + } + block := match[1] + + // Extract title and URL from the result link. + // Pattern: TITLE + urlPattern := regexp.MustCompile(`]+href="(/url\?q=[^"&]+)`) + urlMatch := urlPattern.FindStringSubmatch(block) + if len(urlMatch) < 2 { + continue + } + rawURL := urlMatch[1] + // Remove /url?q= prefix and decode. + actualURL := strings.TrimPrefix(rawURL, "/url?q=") + if amp := strings.Index(actualURL, "&"); amp != -1 { + actualURL = actualURL[:amp] + } + if decoded, err := url.QueryUnescape(actualURL); err == nil { + actualURL = decoded + } + + if actualURL == "" || !strings.HasPrefix(actualURL, "http") { + continue + } + + // Extract title from the title tag. + titlePattern := regexp.MustCompile(`]*class="[^"]*qrStP[^"]*"[^>]*>([^<]+)`) + titleMatch := titlePattern.FindStringSubmatch(block) + title := query + if len(titleMatch) >= 2 { + title = stripTags(titleMatch[1]) + } else { + // Fallback: extract visible text from an with data-title or role="link" + linkTitlePattern := regexp.MustCompile(`]+role="link"[^>]*>([^<]+)<`) + ltMatch := linkTitlePattern.FindStringSubmatch(block) + if len(ltMatch) >= 2 { + title = stripTags(ltMatch[1]) + } + } + + // Extract snippet from data-sncf divs (SearXNG's approach). + snippet := extractGoogleSnippet(block) + + urlPtr := actualURL + results = append(results, contracts.MainResult{ + Title: title, + URL: &urlPtr, + Content: snippet, + Engine: "google", + Score: float64(len(matches) - i), + Category: "general", + Engines: []string{"google"}, + Template: "default.html", + }) + } + + return results +} + +// extractGoogleSnippet extracts the snippet text from a Google result block. +func extractGoogleSnippet(block string) string { + // Google's snippets live in divs with data-sncf attribute. + // SearXNG looks for: .//div[contains(@data-sncf, "1")] + snippetPattern := regexp.MustCompile(`]+data-sncf="1"[^>]*>(.*?)`) + matches := snippetPattern.FindAllStringSubmatch(block, -1) + var parts []string + for _, m := range matches { + if len(m) < 2 { + continue + } + text := stripTags(m[1]) + if text != "" { + parts = append(parts, text) + } + } + return strings.Join(parts, " ") +} + +// extractGoogleSuggestions extracts search suggestions from Google result cards. +func extractGoogleSuggestions(body string) []string { + var suggestions []string + // SearXNG xpath: //div[contains(@class, "ouy7Mc")]//a + suggestionPattern := regexp.MustCompile(`]*class="[^"]*ouy7Mc[^"]*"[^>]*>.*?]*>([^<]+)`, regexp.DotAll) + matches := suggestionPattern.FindAllStringSubmatch(body, -1) + seen := map[string]bool{} + for _, m := range matches { + if len(m) < 2 { + continue + } + s := strings.TrimSpace(stripTags(m[1])) + if s != "" && !seen[s] { + seen[s] = true + suggestions = append(suggestions, s) + } + } + return suggestions +} + +// googleHL maps SearXNG locale to Google hl (host language) parameter. +// e.g. "en-US" -> "en-US" +func googleHL(lang string) string { + lang = strings.ToLower(strings.TrimSpace(lang)) + if lang == "" || lang == "auto" { + return "en" + } + return lang +} + +// googleUILanguage maps SearXNG language to Google lr (language restrict) parameter. +// e.g. "en" -> "lang_en", "de" -> "lang_de" +func googleUILanguage(lang string) string { + lang = strings.ToLower(strings.Split(lang, "-")[0]) + if lang == "" || lang == "auto" { + return "" + } + return "lang_" + lang +} + +// googleSafeSearchLevel maps safesearch (0-2) to Google's safe parameter. +func googleSafeSearchLevel(safesearch int) string { + switch safesearch { + case 0: + return "off" + case 1: + return "medium" + case 2: + return "high" + default: + return "medium" + } +} + +// stripTags removes HTML tags from a string. +func stripTags(s string) string { + stripper := regexp.MustCompile(`<[^>]*>`) + s = stripper.ReplaceAllString(s, "") + s = strings.ReplaceAll(s, "&", "&") + s = strings.ReplaceAll(s, """, `"`) + s = strings.ReplaceAll(s, "'", "'") + s = strings.ReplaceAll(s, " ", " ") + return strings.TrimSpace(s) +} diff --git a/internal/engines/planner.go b/internal/engines/planner.go index 543f253..08b0a27 100644 --- a/internal/engines/planner.go +++ b/internal/engines/planner.go @@ -91,6 +91,7 @@ func inferFromCategories(categories []string) []string { set["qwant"] = true set["duckduckgo"] = true set["bing"] = true + set["google"] = true case "science", "scientific publications": set["arxiv"] = true set["crossref"] = true @@ -106,7 +107,7 @@ func inferFromCategories(categories []string) []string { out = append(out, e) } // stable order - order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "arxiv": 5, "crossref": 6, "github": 7, "reddit": 8} + order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "google": 5, "arxiv": 6, "crossref": 7, "github": 8, "reddit": 9} sortByOrder(out, order) return out } From 38122385bd2df0a7558919f1b3b694fcfb2731cc Mon Sep 17 00:00:00 2001 From: Franz Kafka Date: Sun, 22 Mar 2026 01:53:19 +0000 Subject: [PATCH 09/12] feat: add YouTube engine via Data API v3 Uses the official YouTube Data API v3. Requires YOUTUBE_API_KEY environment variable (free from Google Cloud Console). Returns video results with title, description, channel, publish date, and thumbnail URL. Falls back gracefully if no API key. --- internal/engines/factory.go | 6 +- internal/engines/planner.go | 6 +- internal/engines/youtube.go | 182 ++++++++++++++++++++++++++++++++++++ 3 files changed, 191 insertions(+), 3 deletions(-) create mode 100644 internal/engines/youtube.go diff --git a/internal/engines/factory.go b/internal/engines/factory.go index 937225f..53ba87f 100644 --- a/internal/engines/factory.go +++ b/internal/engines/factory.go @@ -32,6 +32,10 @@ func NewDefaultPortedEngines(client *http.Client) map[string]Engine { "github": &GitHubEngine{client: client}, "reddit": &RedditEngine{client: client}, "bing": &BingEngine{client: client}, - "google": &GoogleEngine{client: client}, + "google": &GoogleEngine{client: client}, + "youtube": &YouTubeEngine{ + client: client, + baseURL: "https://www.googleapis.com", + }, } } diff --git a/internal/engines/planner.go b/internal/engines/planner.go index 24af031..b180f7e 100644 --- a/internal/engines/planner.go +++ b/internal/engines/planner.go @@ -7,7 +7,7 @@ import ( "github.com/metamorphosis-dev/kafka/internal/contracts" ) -var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"} +var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"} type Planner struct { PortedSet map[string]bool @@ -99,6 +99,8 @@ func inferFromCategories(categories []string) []string { set["github"] = true case "social media": set["reddit"] = true + case "videos": + set["youtube"] = true } } @@ -107,7 +109,7 @@ func inferFromCategories(categories []string) []string { out = append(out, e) } // stable order - order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "google": 5, "arxiv": 6, "crossref": 7, "github": 8, "reddit": 9} + order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "google": 5, "arxiv": 6, "crossref": 7, "github": 8, "reddit": 9, "youtube": 10} sortByOrder(out, order) return out } diff --git a/internal/engines/youtube.go b/internal/engines/youtube.go new file mode 100644 index 0000000..7580a09 --- /dev/null +++ b/internal/engines/youtube.go @@ -0,0 +1,182 @@ +package engines + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "strings" + "time" + + "github.com/metamorphosis-dev/kafka/internal/contracts" +) + +type YouTubeEngine struct { + client *http.Client + apiKey string + baseURL string +} + +func (e *YouTubeEngine) Name() string { return "youtube" } + +func (e *YouTubeEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { + if strings.TrimSpace(req.Query) == "" { + return contracts.SearchResponse{Query: req.Query}, nil + } + + if e.apiKey == "" { + e.apiKey = os.Getenv("YOUTUBE_API_KEY") + } + + maxResults := 10 + if req.Pageno > 1 { + maxResults = 20 + } + + u := e.baseURL + "/youtube/v3/search?" + url.Values{ + "part": {"snippet"}, + "q": {req.Query}, + "type": {"video"}, + "maxResults": {fmt.Sprintf("%d", maxResults)}, + "key": {e.apiKey}, + }.Encode() + + if req.Language != "" && req.Language != "auto" { + lang := strings.Split(strings.ToLower(req.Language), "-")[0] + u += "&relevanceLanguage=" + lang + } + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) + if err != nil { + return contracts.SearchResponse{}, err + } + + resp, err := e.client.Do(httpReq) + if err != nil { + return contracts.SearchResponse{}, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + return contracts.SearchResponse{}, fmt.Errorf("youtube api error: status=%d body=%q", resp.StatusCode, string(body)) + } + + var apiResp youtubeSearchResponse + if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { + return contracts.SearchResponse{}, err + } + + if apiResp.Error != nil { + return contracts.SearchResponse{}, fmt.Errorf("youtube api error: %s", apiResp.Error.Message) + } + + results := make([]contracts.MainResult, 0, len(apiResp.Items)) + for _, item := range apiResp.Items { + if item.ID.VideoID == "" { + continue + } + + videoURL := "https://www.youtube.com/watch?v=" + item.ID.VideoID + urlPtr := videoURL + + published := "" + if item.Snippet.PublishedAt != "" { + if t, err := time.Parse(time.RFC3339, item.Snippet.PublishedAt); err == nil { + published = t.Format("Jan 2, 2006") + } + } + + content := item.Snippet.Description + if len(content) > 300 { + content = content[:300] + "..." + } + if published != "" { + content = "Published " + published + " · " + content + } + + thumbnail := "" + if item.Snippet.Thumbnails.High.URL != "" { + thumbnail = item.Snippet.Thumbnails.High.URL + } else if item.Snippet.Thumbnails.Medium.URL != "" { + thumbnail = item.Snippet.Thumbnails.Medium.URL + } + + results = append(results, contracts.MainResult{ + Template: "videos.html", + Title: item.Snippet.Title, + URL: &urlPtr, + Content: content, + Thumbnail: thumbnail, + Engine: "youtube", + Score: 1.0, + Category: "videos", + Engines: []string{"youtube"}, + Metadata: map[string]any{ + "channel": item.Snippet.ChannelTitle, + "video_id": item.Snippet.ResourceID.VideoID, + }, + }) + } + + return contracts.SearchResponse{ + Query: req.Query, + NumberOfResults: len(results), + Results: results, + Answers: []map[string]any{}, + Corrections: []string{}, + Infoboxes: []map[string]any{}, + Suggestions: []string{}, + UnresponsiveEngines: [][2]string{}, + }, nil +} + +// YouTube API response types. + +type youtubeSearchResponse struct { + Items []youtubeSearchItem `json:"items"` + PageInfo struct { + TotalResults int `json:"totalResults"` + ResultsPerPage int `json:"resultsPerPage"` + } `json:"pageInfo"` + NextPageToken string `json:"nextPageToken"` + Error *struct { + Code int `json:"code"` + Message string `json:"message"` + Errors []struct { + Domain string `json:"domain"` + Reason string `json:"reason"` + Message string `json:"message"` + } `json:"errors"` + } `json:"error"` +} + +type youtubeSearchItem struct { + ID struct { + VideoID string `json:"videoId"` + } `json:"id"` + Snippet struct { + PublishedAt string `json:"publishedAt"` + ChannelID string `json:"channelId"` + ChannelTitle string `json:"channelTitle"` + Title string `json:"title"` + Description string `json:"description"` + Thumbnails struct { + Default struct { + URL string `json:"url"` + } `json:"default"` + Medium struct { + URL string `json:"url"` + } `json:"medium"` + High struct { + URL string `json:"url"` + } `json:"high"` + } `json:"thumbnails"` + ResourceID struct { + VideoID string `json:"videoId"` + } `json:"resourceId"` + } `json:"snippet"` +} From 1689cab9bdc0f331a71ba947871db4272b016e01 Mon Sep 17 00:00:00 2001 From: Franz Kafka Date: Sun, 22 Mar 2026 01:53:19 +0000 Subject: [PATCH 10/12] feat: add YouTube engine via Data API v3 Uses the official YouTube Data API v3. Requires YOUTUBE_API_KEY environment variable (free from Google Cloud Console). Returns video results with title, description, channel, publish date, and thumbnail URL. Falls back gracefully if no API key. --- internal/engines/factory.go | 6 +- internal/engines/planner.go | 6 +- internal/engines/youtube.go | 182 ++++++++++++++++++++++++++++++++++++ 3 files changed, 191 insertions(+), 3 deletions(-) create mode 100644 internal/engines/youtube.go diff --git a/internal/engines/factory.go b/internal/engines/factory.go index 937225f..53ba87f 100644 --- a/internal/engines/factory.go +++ b/internal/engines/factory.go @@ -32,6 +32,10 @@ func NewDefaultPortedEngines(client *http.Client) map[string]Engine { "github": &GitHubEngine{client: client}, "reddit": &RedditEngine{client: client}, "bing": &BingEngine{client: client}, - "google": &GoogleEngine{client: client}, + "google": &GoogleEngine{client: client}, + "youtube": &YouTubeEngine{ + client: client, + baseURL: "https://www.googleapis.com", + }, } } diff --git a/internal/engines/planner.go b/internal/engines/planner.go index 24af031..b180f7e 100644 --- a/internal/engines/planner.go +++ b/internal/engines/planner.go @@ -7,7 +7,7 @@ import ( "github.com/metamorphosis-dev/kafka/internal/contracts" ) -var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"} +var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"} type Planner struct { PortedSet map[string]bool @@ -99,6 +99,8 @@ func inferFromCategories(categories []string) []string { set["github"] = true case "social media": set["reddit"] = true + case "videos": + set["youtube"] = true } } @@ -107,7 +109,7 @@ func inferFromCategories(categories []string) []string { out = append(out, e) } // stable order - order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "google": 5, "arxiv": 6, "crossref": 7, "github": 8, "reddit": 9} + order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "google": 5, "arxiv": 6, "crossref": 7, "github": 8, "reddit": 9, "youtube": 10} sortByOrder(out, order) return out } diff --git a/internal/engines/youtube.go b/internal/engines/youtube.go new file mode 100644 index 0000000..7580a09 --- /dev/null +++ b/internal/engines/youtube.go @@ -0,0 +1,182 @@ +package engines + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "strings" + "time" + + "github.com/metamorphosis-dev/kafka/internal/contracts" +) + +type YouTubeEngine struct { + client *http.Client + apiKey string + baseURL string +} + +func (e *YouTubeEngine) Name() string { return "youtube" } + +func (e *YouTubeEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { + if strings.TrimSpace(req.Query) == "" { + return contracts.SearchResponse{Query: req.Query}, nil + } + + if e.apiKey == "" { + e.apiKey = os.Getenv("YOUTUBE_API_KEY") + } + + maxResults := 10 + if req.Pageno > 1 { + maxResults = 20 + } + + u := e.baseURL + "/youtube/v3/search?" + url.Values{ + "part": {"snippet"}, + "q": {req.Query}, + "type": {"video"}, + "maxResults": {fmt.Sprintf("%d", maxResults)}, + "key": {e.apiKey}, + }.Encode() + + if req.Language != "" && req.Language != "auto" { + lang := strings.Split(strings.ToLower(req.Language), "-")[0] + u += "&relevanceLanguage=" + lang + } + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) + if err != nil { + return contracts.SearchResponse{}, err + } + + resp, err := e.client.Do(httpReq) + if err != nil { + return contracts.SearchResponse{}, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + return contracts.SearchResponse{}, fmt.Errorf("youtube api error: status=%d body=%q", resp.StatusCode, string(body)) + } + + var apiResp youtubeSearchResponse + if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { + return contracts.SearchResponse{}, err + } + + if apiResp.Error != nil { + return contracts.SearchResponse{}, fmt.Errorf("youtube api error: %s", apiResp.Error.Message) + } + + results := make([]contracts.MainResult, 0, len(apiResp.Items)) + for _, item := range apiResp.Items { + if item.ID.VideoID == "" { + continue + } + + videoURL := "https://www.youtube.com/watch?v=" + item.ID.VideoID + urlPtr := videoURL + + published := "" + if item.Snippet.PublishedAt != "" { + if t, err := time.Parse(time.RFC3339, item.Snippet.PublishedAt); err == nil { + published = t.Format("Jan 2, 2006") + } + } + + content := item.Snippet.Description + if len(content) > 300 { + content = content[:300] + "..." + } + if published != "" { + content = "Published " + published + " · " + content + } + + thumbnail := "" + if item.Snippet.Thumbnails.High.URL != "" { + thumbnail = item.Snippet.Thumbnails.High.URL + } else if item.Snippet.Thumbnails.Medium.URL != "" { + thumbnail = item.Snippet.Thumbnails.Medium.URL + } + + results = append(results, contracts.MainResult{ + Template: "videos.html", + Title: item.Snippet.Title, + URL: &urlPtr, + Content: content, + Thumbnail: thumbnail, + Engine: "youtube", + Score: 1.0, + Category: "videos", + Engines: []string{"youtube"}, + Metadata: map[string]any{ + "channel": item.Snippet.ChannelTitle, + "video_id": item.Snippet.ResourceID.VideoID, + }, + }) + } + + return contracts.SearchResponse{ + Query: req.Query, + NumberOfResults: len(results), + Results: results, + Answers: []map[string]any{}, + Corrections: []string{}, + Infoboxes: []map[string]any{}, + Suggestions: []string{}, + UnresponsiveEngines: [][2]string{}, + }, nil +} + +// YouTube API response types. + +type youtubeSearchResponse struct { + Items []youtubeSearchItem `json:"items"` + PageInfo struct { + TotalResults int `json:"totalResults"` + ResultsPerPage int `json:"resultsPerPage"` + } `json:"pageInfo"` + NextPageToken string `json:"nextPageToken"` + Error *struct { + Code int `json:"code"` + Message string `json:"message"` + Errors []struct { + Domain string `json:"domain"` + Reason string `json:"reason"` + Message string `json:"message"` + } `json:"errors"` + } `json:"error"` +} + +type youtubeSearchItem struct { + ID struct { + VideoID string `json:"videoId"` + } `json:"id"` + Snippet struct { + PublishedAt string `json:"publishedAt"` + ChannelID string `json:"channelId"` + ChannelTitle string `json:"channelTitle"` + Title string `json:"title"` + Description string `json:"description"` + Thumbnails struct { + Default struct { + URL string `json:"url"` + } `json:"default"` + Medium struct { + URL string `json:"url"` + } `json:"medium"` + High struct { + URL string `json:"url"` + } `json:"high"` + } `json:"thumbnails"` + ResourceID struct { + VideoID string `json:"videoId"` + } `json:"resourceId"` + } `json:"snippet"` +} From 41b80a939a66e9ad1c9ac5594ddd3dce5093cbb3 Mon Sep 17 00:00:00 2001 From: Franz Kafka Date: Sun, 22 Mar 2026 01:57:13 +0000 Subject: [PATCH 11/12] feat: add YouTube engine with config file and env support YouTube Data API v3 engine: - Add YouTubeConfig to EnginesConfig with api_key field - Add YOUTUBE_API_KEY env override - Thread *config.Config through search service to factory - Factory falls back to env vars if config fields are empty - Update config.example.toml with youtube section Also update default local_ported to include google and youtube. --- cmd/kafka/main.go | 7 ++++--- config.example.toml | 6 +++++- internal/config/config.go | 10 +++++++++- internal/engines/factory.go | 26 +++++++++++++++++++++++--- internal/search/service.go | 10 ++++++---- 5 files changed, 47 insertions(+), 12 deletions(-) diff --git a/cmd/kafka/main.go b/cmd/kafka/main.go index ab29852..90c750d 100644 --- a/cmd/kafka/main.go +++ b/cmd/kafka/main.go @@ -53,9 +53,10 @@ func main() { } svc := search.NewService(search.ServiceConfig{ - UpstreamURL: cfg.Upstream.URL, - HTTPTimeout: cfg.HTTPTimeout(), - Cache: searchCache, + UpstreamURL: cfg.Upstream.URL, + HTTPTimeout: cfg.HTTPTimeout(), + Cache: searchCache, + EnginesConfig: cfg, }) acSvc := autocomplete.NewService(cfg.Upstream.URL, cfg.HTTPTimeout()) diff --git a/config.example.toml b/config.example.toml index 1e3b75c..34f60a6 100644 --- a/config.example.toml +++ b/config.example.toml @@ -22,7 +22,7 @@ url = "" [engines] # Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES) # Engines not listed here will be proxied to the upstream instance. -local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"] +local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"] [engines.brave] # Brave Search API key (env: BRAVE_API_KEY) @@ -35,6 +35,10 @@ access_token = "" category = "web-lite" results_per_page = 10 +[engines.youtube] +# YouTube Data API v3 key (env: YOUTUBE_API_KEY) +api_key = "" + [cache] # Valkey/Redis cache for search results. # Leave address empty to disable caching entirely. diff --git a/internal/config/config.go b/internal/config/config.go index 93b8d86..7f8b06a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -35,6 +35,7 @@ type EnginesConfig struct { LocalPorted []string `toml:"local_ported"` Brave BraveConfig `toml:"brave"` Qwant QwantConfig `toml:"qwant"` + YouTube YouTubeConfig `toml:"youtube"` } // CacheConfig holds Valkey/Redis cache settings. @@ -85,6 +86,10 @@ type QwantConfig struct { ResultsPerPage int `toml:"results_per_page"` } +type YouTubeConfig struct { + APIKey string `toml:"api_key"` +} + // Load reads configuration from the given TOML file path. // If the file does not exist, it returns defaults (empty values where applicable). // Environment variables are used as fallbacks for any zero-value fields. @@ -109,7 +114,7 @@ func defaultConfig() *Config { }, Upstream: UpstreamConfig{}, Engines: EnginesConfig{ - LocalPorted: []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"}, + LocalPorted: []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"}, Qwant: QwantConfig{ Category: "web-lite", ResultsPerPage: 10, @@ -151,6 +156,9 @@ func applyEnvOverrides(cfg *Config) { if v := os.Getenv("BRAVE_ACCESS_TOKEN"); v != "" { cfg.Engines.Brave.AccessToken = v } + if v := os.Getenv("YOUTUBE_API_KEY"); v != "" { + cfg.Engines.YouTube.APIKey = v + } if v := os.Getenv("VALKEY_ADDRESS"); v != "" { cfg.Cache.Address = v } diff --git a/internal/engines/factory.go b/internal/engines/factory.go index 53ba87f..b7f3c00 100644 --- a/internal/engines/factory.go +++ b/internal/engines/factory.go @@ -4,23 +4,42 @@ import ( "net/http" "os" "time" + + "github.com/metamorphosis-dev/kafka/internal/config" ) // NewDefaultPortedEngines returns the starter set of Go-native engines. // The service can swap/extend this registry later as more engines are ported. -func NewDefaultPortedEngines(client *http.Client) map[string]Engine { +// If cfg is nil, falls back to reading API keys from environment variables. +func NewDefaultPortedEngines(client *http.Client, cfg *config.Config) map[string]Engine { if client == nil { client = &http.Client{Timeout: 10 * time.Second} } + var braveAPIKey, braveAccessToken, youtubeAPIKey string + if cfg != nil { + braveAPIKey = cfg.Engines.Brave.APIKey + braveAccessToken = cfg.Engines.Brave.AccessToken + youtubeAPIKey = cfg.Engines.YouTube.APIKey + } + if braveAPIKey == "" { + braveAPIKey = os.Getenv("BRAVE_API_KEY") + } + if braveAccessToken == "" { + braveAccessToken = os.Getenv("BRAVE_ACCESS_TOKEN") + } + if youtubeAPIKey == "" { + youtubeAPIKey = os.Getenv("YOUTUBE_API_KEY") + } + return map[string]Engine{ "wikipedia": &WikipediaEngine{client: client}, "arxiv": &ArxivEngine{client: client}, "crossref": &CrossrefEngine{client: client}, "braveapi": &BraveEngine{ client: client, - apiKey: os.Getenv("BRAVE_API_KEY"), - accessGateToken: os.Getenv("BRAVE_ACCESS_TOKEN"), + apiKey: braveAPIKey, + accessGateToken: braveAccessToken, resultsPerPage: 20, }, "qwant": &QwantEngine{ @@ -35,6 +54,7 @@ func NewDefaultPortedEngines(client *http.Client) map[string]Engine { "google": &GoogleEngine{client: client}, "youtube": &YouTubeEngine{ client: client, + apiKey: youtubeAPIKey, baseURL: "https://www.googleapis.com", }, } diff --git a/internal/search/service.go b/internal/search/service.go index 62a9308..47d2895 100644 --- a/internal/search/service.go +++ b/internal/search/service.go @@ -7,15 +7,17 @@ import ( "time" "github.com/metamorphosis-dev/kafka/internal/cache" + "github.com/metamorphosis-dev/kafka/internal/config" "github.com/metamorphosis-dev/kafka/internal/contracts" "github.com/metamorphosis-dev/kafka/internal/engines" "github.com/metamorphosis-dev/kafka/internal/upstream" ) type ServiceConfig struct { - UpstreamURL string - HTTPTimeout time.Duration - Cache *cache.Cache + UpstreamURL string + HTTPTimeout time.Duration + Cache *cache.Cache + EnginesConfig *config.Config } type Service struct { @@ -44,7 +46,7 @@ func NewService(cfg ServiceConfig) *Service { return &Service{ upstreamClient: up, planner: engines.NewPlannerFromEnv(), - localEngines: engines.NewDefaultPortedEngines(httpClient), + localEngines: engines.NewDefaultPortedEngines(httpClient, cfg.EnginesConfig), cache: cfg.Cache, } } From a7f594b7fa68d94d4debdace3fbae867b25e4f60 Mon Sep 17 00:00:00 2001 From: Franz Kafka Date: Sun, 22 Mar 2026 01:57:13 +0000 Subject: [PATCH 12/12] feat: add YouTube engine with config file and env support YouTube Data API v3 engine: - Add YouTubeConfig to EnginesConfig with api_key field - Add YOUTUBE_API_KEY env override - Thread *config.Config through search service to factory - Factory falls back to env vars if config fields are empty - Update config.example.toml with youtube section Also update default local_ported to include google and youtube. --- cmd/kafka/main.go | 7 ++++--- config.example.toml | 6 +++++- internal/config/config.go | 10 +++++++++- internal/engines/factory.go | 26 +++++++++++++++++++++++--- internal/search/service.go | 10 ++++++---- 5 files changed, 47 insertions(+), 12 deletions(-) diff --git a/cmd/kafka/main.go b/cmd/kafka/main.go index ab29852..90c750d 100644 --- a/cmd/kafka/main.go +++ b/cmd/kafka/main.go @@ -53,9 +53,10 @@ func main() { } svc := search.NewService(search.ServiceConfig{ - UpstreamURL: cfg.Upstream.URL, - HTTPTimeout: cfg.HTTPTimeout(), - Cache: searchCache, + UpstreamURL: cfg.Upstream.URL, + HTTPTimeout: cfg.HTTPTimeout(), + Cache: searchCache, + EnginesConfig: cfg, }) acSvc := autocomplete.NewService(cfg.Upstream.URL, cfg.HTTPTimeout()) diff --git a/config.example.toml b/config.example.toml index 1e3b75c..34f60a6 100644 --- a/config.example.toml +++ b/config.example.toml @@ -22,7 +22,7 @@ url = "" [engines] # Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES) # Engines not listed here will be proxied to the upstream instance. -local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"] +local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"] [engines.brave] # Brave Search API key (env: BRAVE_API_KEY) @@ -35,6 +35,10 @@ access_token = "" category = "web-lite" results_per_page = 10 +[engines.youtube] +# YouTube Data API v3 key (env: YOUTUBE_API_KEY) +api_key = "" + [cache] # Valkey/Redis cache for search results. # Leave address empty to disable caching entirely. diff --git a/internal/config/config.go b/internal/config/config.go index 93b8d86..7f8b06a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -35,6 +35,7 @@ type EnginesConfig struct { LocalPorted []string `toml:"local_ported"` Brave BraveConfig `toml:"brave"` Qwant QwantConfig `toml:"qwant"` + YouTube YouTubeConfig `toml:"youtube"` } // CacheConfig holds Valkey/Redis cache settings. @@ -85,6 +86,10 @@ type QwantConfig struct { ResultsPerPage int `toml:"results_per_page"` } +type YouTubeConfig struct { + APIKey string `toml:"api_key"` +} + // Load reads configuration from the given TOML file path. // If the file does not exist, it returns defaults (empty values where applicable). // Environment variables are used as fallbacks for any zero-value fields. @@ -109,7 +114,7 @@ func defaultConfig() *Config { }, Upstream: UpstreamConfig{}, Engines: EnginesConfig{ - LocalPorted: []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"}, + LocalPorted: []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"}, Qwant: QwantConfig{ Category: "web-lite", ResultsPerPage: 10, @@ -151,6 +156,9 @@ func applyEnvOverrides(cfg *Config) { if v := os.Getenv("BRAVE_ACCESS_TOKEN"); v != "" { cfg.Engines.Brave.AccessToken = v } + if v := os.Getenv("YOUTUBE_API_KEY"); v != "" { + cfg.Engines.YouTube.APIKey = v + } if v := os.Getenv("VALKEY_ADDRESS"); v != "" { cfg.Cache.Address = v } diff --git a/internal/engines/factory.go b/internal/engines/factory.go index 53ba87f..b7f3c00 100644 --- a/internal/engines/factory.go +++ b/internal/engines/factory.go @@ -4,23 +4,42 @@ import ( "net/http" "os" "time" + + "github.com/metamorphosis-dev/kafka/internal/config" ) // NewDefaultPortedEngines returns the starter set of Go-native engines. // The service can swap/extend this registry later as more engines are ported. -func NewDefaultPortedEngines(client *http.Client) map[string]Engine { +// If cfg is nil, falls back to reading API keys from environment variables. +func NewDefaultPortedEngines(client *http.Client, cfg *config.Config) map[string]Engine { if client == nil { client = &http.Client{Timeout: 10 * time.Second} } + var braveAPIKey, braveAccessToken, youtubeAPIKey string + if cfg != nil { + braveAPIKey = cfg.Engines.Brave.APIKey + braveAccessToken = cfg.Engines.Brave.AccessToken + youtubeAPIKey = cfg.Engines.YouTube.APIKey + } + if braveAPIKey == "" { + braveAPIKey = os.Getenv("BRAVE_API_KEY") + } + if braveAccessToken == "" { + braveAccessToken = os.Getenv("BRAVE_ACCESS_TOKEN") + } + if youtubeAPIKey == "" { + youtubeAPIKey = os.Getenv("YOUTUBE_API_KEY") + } + return map[string]Engine{ "wikipedia": &WikipediaEngine{client: client}, "arxiv": &ArxivEngine{client: client}, "crossref": &CrossrefEngine{client: client}, "braveapi": &BraveEngine{ client: client, - apiKey: os.Getenv("BRAVE_API_KEY"), - accessGateToken: os.Getenv("BRAVE_ACCESS_TOKEN"), + apiKey: braveAPIKey, + accessGateToken: braveAccessToken, resultsPerPage: 20, }, "qwant": &QwantEngine{ @@ -35,6 +54,7 @@ func NewDefaultPortedEngines(client *http.Client) map[string]Engine { "google": &GoogleEngine{client: client}, "youtube": &YouTubeEngine{ client: client, + apiKey: youtubeAPIKey, baseURL: "https://www.googleapis.com", }, } diff --git a/internal/search/service.go b/internal/search/service.go index 62a9308..47d2895 100644 --- a/internal/search/service.go +++ b/internal/search/service.go @@ -7,15 +7,17 @@ import ( "time" "github.com/metamorphosis-dev/kafka/internal/cache" + "github.com/metamorphosis-dev/kafka/internal/config" "github.com/metamorphosis-dev/kafka/internal/contracts" "github.com/metamorphosis-dev/kafka/internal/engines" "github.com/metamorphosis-dev/kafka/internal/upstream" ) type ServiceConfig struct { - UpstreamURL string - HTTPTimeout time.Duration - Cache *cache.Cache + UpstreamURL string + HTTPTimeout time.Duration + Cache *cache.Cache + EnginesConfig *config.Config } type Service struct { @@ -44,7 +46,7 @@ func NewService(cfg ServiceConfig) *Service { return &Service{ upstreamClient: up, planner: engines.NewPlannerFromEnv(), - localEngines: engines.NewDefaultPortedEngines(httpClient), + localEngines: engines.NewDefaultPortedEngines(httpClient, cfg.EnginesConfig), cache: cfg.Cache, } }