From 21b77f25bf647f2d11edda9ace9b625db72166d9 Mon Sep 17 00:00:00 2001 From: ashisgreat22 Date: Sun, 22 Mar 2026 01:47:03 +0100 Subject: [PATCH] refactor: remove SearXNG references and rename binary to kafka - Rename cmd/searxng-go to cmd/kafka - Remove all SearXNG references from source comments while keeping "SearXNG-compatible API" in user-facing docs - Update binary paths in README, CLAUDE.md, and Dockerfile - Update log message to "kafka starting" Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 10 +++++----- Dockerfile | 2 +- README.md | 12 ++++++------ cmd/{searxng-go => kafka}/main.go | 2 +- config.example.toml | 4 ++-- internal/autocomplete/service.go | 6 +++--- internal/contracts/main_result.go | 10 +++++----- internal/contracts/types.go | 6 +++--- internal/engines/braveapi.go | 8 ++++---- internal/engines/engine.go | 2 +- internal/engines/planner.go | 4 ++-- internal/engines/qwant.go | 10 +++++----- internal/search/merge.go | 2 +- internal/search/request_params.go | 4 ++-- internal/search/response.go | 8 ++++---- internal/search/service.go | 2 +- internal/upstream/client.go | 2 +- internal/views/static/css/kafka.css | 1 - 18 files changed, 47 insertions(+), 48 deletions(-) rename cmd/{searxng-go => kafka}/main.go (98%) diff --git a/CLAUDE.md b/CLAUDE.md index 1ba6bdc..b7f254e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -kafka is a privacy-respecting metasearch engine written in Go. It provides a SearXNG-compatible `/search` API and an HTML frontend (HTMX + Go templates). 9 engines are implemented natively in Go; unlisted engines can be proxied to an upstream SearXNG instance. Responses from multiple engines are merged into a single JSON/CSV/RSS/HTML response. +kafka is a privacy-respecting metasearch engine written in Go. It provides a SearXNG-compatible `/search` API and an HTML frontend (HTMX + Go templates). 9 engines are implemented natively in Go; unlisted engines can be proxied to an upstream metasearch instance. Responses from multiple engines are merged into a single JSON/CSV/RSS/HTML response. ## Build & Run Commands @@ -22,7 +22,7 @@ go test -run TestWikipedia ./internal/engines/ go test -v ./internal/engines/ # Run the server (requires config.toml) -go run ./cmd/searxng-go -config config.toml +go run ./cmd/kafka -config config.toml ``` There is no Makefile. There is no linter configured. @@ -37,13 +37,13 @@ There is no Makefile. There is no linter configured. - `internal/config` — TOML-based configuration with env var fallbacks. `Load(path)` reads `config.toml`; env vars override zero-value fields. See `config.example.toml` for all settings. - `internal/engines` — `Engine` interface and all 9 Go-native implementations. `factory.go` registers engines via `NewDefaultPortedEngines()`. `planner.go` routes engines to local or upstream based on `LOCAL_PORTED_ENGINES` env var. - `internal/search` — `Service` orchestrates the pipeline: cache check, planning, parallel engine execution via goroutines/WaitGroup, upstream proxying, response merging. Individual engine failures are reported as `unresponsive_engines` rather than aborting the search. Qwant has fallback logic to upstream on empty results. -- `internal/autocomplete` — Fetches search suggestions. Proxies to upstream SearXNG `/autocompleter` if configured, falls back to Wikipedia OpenSearch API otherwise. +- `internal/autocomplete` — Fetches search suggestions. Proxies to upstream `/autocompleter` if configured, falls back to Wikipedia OpenSearch API otherwise. - `internal/httpapi` — HTTP handlers for `/`, `/search`, `/autocompleter`, `/healthz`, `/opensearch.xml`. Detects HTMX requests via `HX-Request` header to return fragments instead of full pages. -- `internal/upstream` — Client that proxies requests to an upstream SearXNG instance via POST. +- `internal/upstream` — Client that proxies requests to an upstream metasearch instance via POST. - `internal/cache` — Valkey/Redis-backed cache with SHA-256 cache keys. No-op if unconfigured. - `internal/middleware` — Three rate limiters (per-IP sliding window, burst+sustained, global) and CORS. All disabled by default. - `internal/views` — HTML templates and static files embedded via `//go:embed`. Renders full pages or HTMX fragments. Templates: `base.html`, `index.html`, `results.html`, `results_inner.html`, `result_item.html`. -- `cmd/searxng-go` — Entry point. Loads TOML config, seeds env vars for engine code, wires up middleware chain, starts HTTP server. +- `cmd/kafka` — Entry point. Loads TOML config, seeds env vars for engine code, wires up middleware chain, starts HTTP server. **Engine interface** (`internal/engines/engine.go`): ```go diff --git a/Dockerfile b/Dockerfile index c41b5a1..e21960f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,7 @@ RUN go mod download # Copy source and build COPY . . -RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /kafka ./cmd/searxng-go +RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /kafka ./cmd/kafka # Runtime stage FROM alpine:3.21 diff --git a/README.md b/README.md index 2f0868f..c03019e 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,8 @@ A privacy-respecting, open metasearch engine written in Go. SearXNG-compatible A ```bash git clone https://git.ashisgreat.xyz/penal-colony/gosearch.git cd kafka -go build ./cmd/searxng-go -./searxng-go -config config.toml +go build ./cmd/kafka +./kafka -config config.toml ``` ### Docker Compose @@ -76,7 +76,7 @@ sudo nixos-rebuild switch --flake .# ```bash nix develop go test ./... -go run ./cmd/searxng-go -config config.toml +go run ./cmd/kafka -config config.toml ``` ## Endpoints @@ -138,7 +138,7 @@ Copy `config.example.toml` to `config.toml` and edit. All settings can also be o ### Key Sections - **`[server]`** — port, timeout, public base URL for OpenSearch -- **`[upstream]`** — optional upstream SearXNG proxy for unported engines +- **`[upstream]`** — optional upstream metasearch proxy for unported engines - **`[engines]`** — which engines run locally, engine-specific settings - **`[cache]`** — Valkey/Redis address, password, TTL - **`[cors]`** — allowed origins and methods @@ -152,7 +152,7 @@ Copy `config.example.toml` to `config.toml` and edit. All settings can also be o |---|---| | `PORT` | Listen port (default: 8080) | | `BASE_URL` | Public URL for OpenSearch XML | -| `UPSTREAM_SEARXNG_URL` | Upstream SearXNG instance URL | +| `UPSTREAM_SEARXNG_URL` | Upstream instance URL | | `LOCAL_PORTED_ENGINES` | Comma-separated local engine list | | `HTTP_TIMEOUT` | Upstream request timeout | | `BRAVE_API_KEY` | Brave Search API key | @@ -177,7 +177,7 @@ See `config.example.toml` for the full list including rate limiting and CORS var | Reddit | Reddit JSON API | Discussions | | Bing | Bing RSS | General web | -Engines not listed in `engines.local_ported` are proxied to an upstream SearXNG instance if `upstream.url` is configured. +Engines not listed in `engines.local_ported` are proxied to an upstream metasearch instance if `upstream.url` is configured. ## Architecture diff --git a/cmd/searxng-go/main.go b/cmd/kafka/main.go similarity index 98% rename from cmd/searxng-go/main.go rename to cmd/kafka/main.go index dac6258..ab29852 100644 --- a/cmd/searxng-go/main.go +++ b/cmd/kafka/main.go @@ -103,7 +103,7 @@ func main() { }, logger)(handler) addr := fmt.Sprintf(":%d", cfg.Server.Port) - logger.Info("searxng-go starting", + logger.Info("kafka starting", "addr", addr, "cache", searchCache.Enabled(), "rate_limit", cfg.RateLimit.Requests > 0, diff --git a/config.example.toml b/config.example.toml index df77184..1e3b75c 100644 --- a/config.example.toml +++ b/config.example.toml @@ -15,13 +15,13 @@ http_timeout = "10s" base_url = "" [upstream] -# URL of an upstream SearXNG instance for unported engines (env: UPSTREAM_SEARXNG_URL) +# URL of an upstream metasearch instance for unported engines (env: UPSTREAM_SEARXNG_URL) # Leave empty to run without an upstream proxy. url = "" [engines] # Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES) -# Engines not listed here will be proxied to upstream SearXNG. +# Engines not listed here will be proxied to the upstream instance. local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"] [engines.brave] diff --git a/internal/autocomplete/service.go b/internal/autocomplete/service.go index 3892d63..99d963a 100644 --- a/internal/autocomplete/service.go +++ b/internal/autocomplete/service.go @@ -11,7 +11,7 @@ import ( "time" ) -// Service fetches search suggestions from an upstream SearXNG instance +// Service fetches search suggestions from an upstream metasearch instance // or falls back to Wikipedia's OpenSearch API. type Service struct { upstreamURL string @@ -40,7 +40,7 @@ func (s *Service) Suggestions(ctx context.Context, query string) ([]string, erro return s.wikipediaSuggestions(ctx, query) } -// upstreamSuggestions proxies to an upstream SearXNG /autocompleter endpoint. +// upstreamSuggestions proxies to an upstream /autocompleter endpoint. func (s *Service) upstreamSuggestions(ctx context.Context, query string) ([]string, error) { u := s.upstreamURL + "/autocompleter?" + url.Values{"q": {query}}.Encode() req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) @@ -64,7 +64,7 @@ func (s *Service) upstreamSuggestions(ctx context.Context, query string) ([]stri return nil, err } - // SearXNG /autocompleter returns a plain JSON array of strings. + // The /autocompleter endpoint returns a plain JSON array of strings. var out []string if err := json.Unmarshal(body, &out); err != nil { return nil, err diff --git a/internal/contracts/main_result.go b/internal/contracts/main_result.go index 48005f8..20c9231 100644 --- a/internal/contracts/main_result.go +++ b/internal/contracts/main_result.go @@ -5,15 +5,15 @@ import ( "encoding/json" ) -// MainResult represents one element of SearXNG's `results` array. +// MainResult represents one element of the `results` array. // -// SearXNG returns many additional keys beyond what templates use. To keep the +// The API returns many additional keys beyond what templates use. To keep the // contract stable for proxying/merging, we preserve all unknown keys in // `raw` and re-emit them via MarshalJSON. type MainResult struct { raw map[string]any - // Common fields used by SearXNG templates (RSS uses: title, url, content, pubdate). + // Common fields used by templates (RSS uses: title, url, content, pubdate). Template string `json:"template"` Title string `json:"title"` Content string `json:"content"` @@ -28,12 +28,12 @@ type MainResult struct { Positions []int `json:"positions"` Engines []string `json:"engines"` - // These fields exist in SearXNG's MainResult base; keep them so downstream + // These fields exist in the MainResult base; keep them so downstream // callers can generate richer output later. OpenGroup bool `json:"open_group"` CloseGroup bool `json:"close_group"` - // parsed_url in SearXNG is emitted as a tuple; we preserve it as-is. + // parsed_url is emitted as a tuple; we preserve it as-is. ParsedURL any `json:"parsed_url"` } diff --git a/internal/contracts/types.go b/internal/contracts/types.go index a68f77a..81103ce 100644 --- a/internal/contracts/types.go +++ b/internal/contracts/types.go @@ -1,6 +1,6 @@ package contracts -// OutputFormat matches SearXNG's `/search?format=...` values. +// OutputFormat matches the `/search?format=...` values. type OutputFormat string const ( @@ -28,7 +28,7 @@ type SearchRequest struct { Engines []string Categories []string - // EngineData matches SearXNG's `engine_data--=` parameters. + // EngineData matches the `engine_data--=` parameters. EngineData map[string]map[string]string // AccessToken is an optional request token used to gate paid/limited engines. @@ -36,7 +36,7 @@ type SearchRequest struct { AccessToken string } -// SearchResponse matches the JSON schema returned by SearXNG's `webutils.get_json_response()`. +// SearchResponse matches the JSON schema used by `webutils.get_json_response()`. type SearchResponse struct { Query string `json:"query"` NumberOfResults int `json:"number_of_results"` diff --git a/internal/engines/braveapi.go b/internal/engines/braveapi.go index 2cb20ff..77c7abe 100644 --- a/internal/engines/braveapi.go +++ b/internal/engines/braveapi.go @@ -14,7 +14,7 @@ import ( "github.com/metamorphosis-dev/kafka/internal/contracts" ) -// BraveEngine implements the SearXNG `braveapi` engine (Brave Web Search API). +// BraveEngine implements the `braveapi` engine (Brave Web Search API). // // Config / gating: // - BRAVE_API_KEY: required to call Brave @@ -35,8 +35,8 @@ func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) ( return contracts.SearchResponse{}, errors.New("brave engine not initialized") } - // Gate / config checks should not be treated as fatal errors; SearXNG - // treats misconfigured engines as unresponsive. + // Gate / config checks should not be treated as fatal errors; the reference + // implementation treats misconfigured engines as unresponsive. if strings.TrimSpace(e.apiKey) == "" { return contracts.SearchResponse{ Query: req.Query, @@ -93,7 +93,7 @@ func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) ( } } - // SearXNG's python checks `if params["safesearch"]:` which treats any + // The reference implementation checks `if params["safesearch"]:` which treats any // non-zero (moderate/strict) as strict. if req.Safesearch > 0 { args.Set("safesearch", "strict") diff --git a/internal/engines/engine.go b/internal/engines/engine.go index d07aec9..ee87cfd 100644 --- a/internal/engines/engine.go +++ b/internal/engines/engine.go @@ -6,7 +6,7 @@ import ( "github.com/metamorphosis-dev/kafka/internal/contracts" ) -// Engine is a Go-native implementation of a SearXNG engine. +// Engine is a Go-native implementation of a search engine. // // Implementations should return a SearchResponse containing only the results // for that engine subset; the caller will merge multiple engine responses. diff --git a/internal/engines/planner.go b/internal/engines/planner.go index 543f253..56df656 100644 --- a/internal/engines/planner.go +++ b/internal/engines/planner.go @@ -48,7 +48,7 @@ func NewPlanner(portedEngines []string) *Planner { // Plan returns: // - localEngines: engines that are configured as ported for this service -// - upstreamEngines: engines that should be executed by upstream SearXNG +// - upstreamEngines: engines that should be executed by the upstream instance // - requestedEngines: the (possibly inferred) requested engines list // // If the request provides an explicit `engines` parameter, we use it. @@ -80,7 +80,7 @@ func (p *Planner) Plan(req contracts.SearchRequest) (localEngines, upstreamEngin func inferFromCategories(categories []string) []string { // Minimal mapping for the initial porting subset. - // This mirrors the idea of selecting from SearXNG categories without + // This mirrors the idea of selecting from engine categories without // embedding the whole engine registry. set := map[string]bool{} for _, c := range categories { diff --git a/internal/engines/qwant.go b/internal/engines/qwant.go index bb2a03c..8221781 100644 --- a/internal/engines/qwant.go +++ b/internal/engines/qwant.go @@ -14,11 +14,11 @@ import ( "github.com/PuerkitoBio/goquery" ) -// QwantEngine implements a SearXNG-like `qwant` (web) adapter using +// QwantEngine implements a `qwant` (web) adapter using // Qwant v3 endpoint: https://api.qwant.com/v3/search/web. // -// Qwant's API is not fully documented; this mirrors SearXNG's parsing logic -// for the `web` category from `.agent/searxng/searx/engines/qwant.py`. +// Qwant's API is not fully documented; this implements parsing logic +// for the `web` category. type QwantEngine struct { client *http.Client category string // "web" (JSON API) or "web-lite" (HTML fallback) @@ -37,7 +37,7 @@ func (e *QwantEngine) Search(ctx context.Context, req contracts.SearchRequest) ( return contracts.SearchResponse{Query: req.Query}, nil } - // For API parity we use SearXNG web defaults: count=10, offset=(pageno-1)*count. + // For API parity we use web defaults: count=10, offset=(pageno-1)*count. // The engine's config field exists so we can expand to news/images/videos later. count := e.resultsPerPage if count <= 0 { @@ -262,7 +262,7 @@ func (e *QwantEngine) searchWebLite(ctx context.Context, req contracts.SearchReq return } - // In SearXNG: "./span[contains(@class, 'url partner')]" + // Selector: "./span[contains(@class, 'url partner')]" urlText := strings.TrimSpace(item.Find("span.url.partner").First().Text()) if urlText == "" { // fallback: any span with class containing both 'url' and 'partner' diff --git a/internal/search/merge.go b/internal/search/merge.go index 54ff9bb..64ebd6e 100644 --- a/internal/search/merge.go +++ b/internal/search/merge.go @@ -8,7 +8,7 @@ import ( "github.com/metamorphosis-dev/kafka/internal/contracts" ) -// MergeResponses merges multiple SearXNG-compatible JSON responses. +// MergeResponses merges multiple compatible JSON responses. // // MVP merge semantics: // - results are concatenated with a simple de-dup key (engine|title|url) diff --git a/internal/search/request_params.go b/internal/search/request_params.go index 1d48a04..9fdd799 100644 --- a/internal/search/request_params.go +++ b/internal/search/request_params.go @@ -11,7 +11,7 @@ import ( var languageCodeRe = regexp.MustCompile(`^[a-z]{2,3}(-[a-zA-Z]{2})?$`) func ParseSearchRequest(r *http.Request) (SearchRequest, error) { - // SearXNG supports both GET and POST and relies on form values for routing. + // Supports both GET and POST and relies on form values for routing. if err := r.ParseForm(); err != nil { return SearchRequest{}, errors.New("invalid request: cannot parse form") } @@ -90,7 +90,7 @@ func ParseSearchRequest(r *http.Request) (SearchRequest, error) { // engines is an explicit list of engine names. engines := splitCSV(strings.TrimSpace(r.FormValue("engines"))) - // categories and category_ params mirror SearXNG's webadapter parsing. + // categories and category_ params mirror the webadapter parsing. // We don't validate against a registry here; we just preserve the requested values. catSet := map[string]bool{} if catsParam := strings.TrimSpace(r.FormValue("categories")); catsParam != "" { diff --git a/internal/search/response.go b/internal/search/response.go index 3b07096..1a9ce26 100644 --- a/internal/search/response.go +++ b/internal/search/response.go @@ -38,7 +38,7 @@ func WriteSearchResponse(w http.ResponseWriter, format OutputFormat, resp Search } } -// csvRowHeader matches the SearXNG CSV writer key order. +// csvRowHeader matches the CSV writer key order. var csvRowHeader = []string{"title", "url", "content", "host", "engine", "score", "type"} func writeCSV(w http.ResponseWriter, resp SearchResponse) error { @@ -111,14 +111,14 @@ func writeCSV(w http.ResponseWriter, resp SearchResponse) error { func writeRSS(w http.ResponseWriter, resp SearchResponse) error { q := resp.Query - escapedTitle := xmlEscape("SearXNG search: " + q) - escapedDesc := xmlEscape("Search results for \"" + q + "\" - SearXNG") + escapedTitle := xmlEscape("kafka search: " + q) + escapedDesc := xmlEscape("Search results for \"" + q + "\" - kafka") escapedQueryTerms := xmlEscape(q) link := "/search?q=" + url.QueryEscape(q) opensearchQuery := fmt.Sprintf(``, escapedQueryTerms) - // SearXNG template uses the number of results for both totalResults and itemsPerPage. + // The template uses the number of results for both totalResults and itemsPerPage. nr := resp.NumberOfResults var items bytes.Buffer diff --git a/internal/search/service.go b/internal/search/service.go index 91fef2b..62a9308 100644 --- a/internal/search/service.go +++ b/internal/search/service.go @@ -50,7 +50,7 @@ func NewService(cfg ServiceConfig) *Service { } // Search executes the request against local engines (in parallel) and -// optionally upstream SearXNG for unported engines. +// optionally the upstream instance for unported engines. // // Individual engine failures are reported as unresponsive_engines rather // than aborting the entire search. diff --git a/internal/upstream/client.go b/internal/upstream/client.go index 3a11843..64ddec4 100644 --- a/internal/upstream/client.go +++ b/internal/upstream/client.go @@ -68,7 +68,7 @@ func (c *Client) SearchJSON(ctx context.Context, req contracts.SearchRequest, en for engineName, kv := range req.EngineData { for key, value := range kv { - // Mirror SearXNG's naming: `engine_data--=` + // Mirror the naming convention: `engine_data--=` form.Set(fmt.Sprintf("engine_data-%s-%s", engineName, key), value) } } diff --git a/internal/views/static/css/kafka.css b/internal/views/static/css/kafka.css index 376b2d8..824f489 100644 --- a/internal/views/static/css/kafka.css +++ b/internal/views/static/css/kafka.css @@ -1,5 +1,4 @@ /* kafka — clean, minimal search engine CSS */ -/* Inspired by SearXNG's simple theme class conventions */ :root { --color-base: #f5f5f5;