From 5b942a5fd6a0869130b63968719bcb6c8be6abd7 Mon Sep 17 00:00:00 2001 From: Franz Kafka Date: Sun, 22 Mar 2026 11:10:50 +0000 Subject: [PATCH] refactor: clean up verbose and redundant comments Trim or remove comments that: - State the obvious (function names already convey purpose) - Repeat what the code clearly shows - Are excessively long without adding value Keep comments that explain *why*, not *what*. --- internal/autocomplete/service.go | 5 +---- internal/contracts/main_result.go | 13 +------------ internal/contracts/types.go | 15 +++++---------- internal/engines/braveapi.go | 14 +++----------- internal/engines/factory.go | 5 ++--- internal/engines/google.go | 26 -------------------------- internal/engines/planner.go | 4 ---- internal/engines/qwant.go | 18 +----------------- internal/middleware/ratelimit.go | 11 ++--------- internal/search/merge.go | 5 ----- internal/search/types.go | 2 +- 11 files changed, 16 insertions(+), 102 deletions(-) diff --git a/internal/autocomplete/service.go b/internal/autocomplete/service.go index d6460f1..23473d5 100644 --- a/internal/autocomplete/service.go +++ b/internal/autocomplete/service.go @@ -27,8 +27,7 @@ import ( "time" ) -// Service fetches search suggestions from an upstream metasearch instance -// or falls back to Wikipedia's OpenSearch API. +// Service fetches search suggestions from upstream or Wikipedia OpenSearch. type Service struct { upstreamURL string http *http.Client @@ -44,7 +43,6 @@ func NewService(upstreamURL string, timeout time.Duration) *Service { } } -// Suggestions returns search suggestions for the given query. func (s *Service) Suggestions(ctx context.Context, query string) ([]string, error) { if strings.TrimSpace(query) == "" { return nil, nil @@ -56,7 +54,6 @@ func (s *Service) Suggestions(ctx context.Context, query string) ([]string, erro return s.wikipediaSuggestions(ctx, query) } -// upstreamSuggestions proxies to an upstream /autocompleter endpoint. func (s *Service) upstreamSuggestions(ctx context.Context, query string) ([]string, error) { u := s.upstreamURL + "/autocompleter?" + url.Values{"q": {query}}.Encode() req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) diff --git a/internal/contracts/main_result.go b/internal/contracts/main_result.go index c804f6b..02fb29c 100644 --- a/internal/contracts/main_result.go +++ b/internal/contracts/main_result.go @@ -22,14 +22,10 @@ import ( ) // MainResult represents one element of the `results` array. -// -// The API returns many additional keys beyond what templates use. To keep the -// contract stable for proxying/merging, we preserve all unknown keys in -// `raw` and re-emit them via MarshalJSON. +// Unknown keys are preserved in `raw` and re-emitted via MarshalJSON. type MainResult struct { raw map[string]any - // Common fields used by templates (RSS uses: title, url, content, pubdate). Template string `json:"template"` Title string `json:"title"` Content string `json:"content"` @@ -45,17 +41,13 @@ type MainResult struct { Positions []int `json:"positions"` Engines []string `json:"engines"` - // These fields exist in the MainResult base; keep them so downstream - // callers can generate richer output later. OpenGroup bool `json:"open_group"` CloseGroup bool `json:"close_group"` - // parsed_url is emitted as a tuple; we preserve it as-is. ParsedURL any `json:"parsed_url"` } func (mr *MainResult) UnmarshalJSON(data []byte) error { - // Preserve the full object. dec := json.NewDecoder(bytes.NewReader(data)) dec.UseNumber() @@ -66,7 +58,6 @@ func (mr *MainResult) UnmarshalJSON(data []byte) error { mr.raw = m - // Fill the typed/common fields (best-effort; don't fail if types differ). mr.Template = stringOrEmpty(m["template"]) mr.Title = stringOrEmpty(m["title"]) mr.Content = stringOrEmpty(m["content"]) @@ -104,12 +95,10 @@ func (mr *MainResult) UnmarshalJSON(data []byte) error { } func (mr MainResult) MarshalJSON() ([]byte, error) { - // If we came from upstream JSON, preserve all keys exactly. if mr.raw != nil { return json.Marshal(mr.raw) } - // Otherwise, marshal the known fields. m := map[string]any{ "template": mr.Template, "title": mr.Title, diff --git a/internal/contracts/types.go b/internal/contracts/types.go index 279ce57..40ed6bc 100644 --- a/internal/contracts/types.go +++ b/internal/contracts/types.go @@ -20,18 +20,15 @@ package contracts type OutputFormat string const ( - FormatHTML OutputFormat = "html" // accepted for compatibility (not yet implemented) + FormatHTML OutputFormat = "html" // accepted for compatibility FormatJSON OutputFormat = "json" FormatCSV OutputFormat = "csv" FormatRSS OutputFormat = "rss" ) type SearchRequest struct { - // Format is what the client requested via `format=...`. - Format OutputFormat - - Query string - + Format OutputFormat + Query string Pageno int Safesearch int TimeRange *string @@ -39,16 +36,14 @@ type SearchRequest struct { TimeoutLimit *float64 Language string - // Engines and categories are used for deciding which engines run locally vs are proxied. - // For now, engines can be supplied directly via the `engines` form parameter. + // Engines and categories decide which engines run locally vs proxy to upstream. Engines []string Categories []string // EngineData matches the `engine_data--=` parameters. EngineData map[string]map[string]string - // AccessToken is an optional request token used to gate paid/limited engines. - // It is not part of the upstream JSON schema; it only influences local engines. + // AccessToken gates paid/limited engines. Not part of upstream JSON schema. AccessToken string } diff --git a/internal/engines/braveapi.go b/internal/engines/braveapi.go index 641a1d4..81d1f3b 100644 --- a/internal/engines/braveapi.go +++ b/internal/engines/braveapi.go @@ -30,13 +30,9 @@ import ( "github.com/metamorphosis-dev/kafka/internal/contracts" ) -// BraveEngine implements the `braveapi` engine (Brave Web Search API). -// -// Config / gating: -// - BRAVE_API_KEY: required to call Brave -// - BRAVE_ACCESS_TOKEN (optional): if set, the request must include a token -// that matches the env var (via Authorization Bearer, X-Search-Token, -// X-Brave-Access-Token, or form field `token`). +// BraveEngine implements the Brave Web Search API. +// Required: BRAVE_API_KEY env var or config. +// Optional: BRAVE_ACCESS_TOKEN to gate requests. type BraveEngine struct { client *http.Client apiKey string @@ -51,8 +47,6 @@ func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) ( return contracts.SearchResponse{}, errors.New("brave engine not initialized") } - // Gate / config checks should not be treated as fatal errors; the reference - // implementation treats misconfigured engines as unresponsive. if strings.TrimSpace(e.apiKey) == "" { return contracts.SearchResponse{ Query: req.Query, @@ -109,8 +103,6 @@ func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) ( } } - // The reference implementation checks `if params["safesearch"]:` which treats any - // non-zero (moderate/strict) as strict. if req.Safesearch > 0 { args.Set("safesearch", "strict") } diff --git a/internal/engines/factory.go b/internal/engines/factory.go index ddaeb06..528dcb7 100644 --- a/internal/engines/factory.go +++ b/internal/engines/factory.go @@ -24,9 +24,8 @@ import ( "github.com/metamorphosis-dev/kafka/internal/config" ) -// NewDefaultPortedEngines returns the starter set of Go-native engines. -// The service can swap/extend this registry later as more engines are ported. -// If cfg is nil, falls back to reading API keys from environment variables. +// NewDefaultPortedEngines returns the Go-native engine registry. +// If cfg is nil, API keys fall back to environment variables. func NewDefaultPortedEngines(client *http.Client, cfg *config.Config) map[string]Engine { if client == nil { client = &http.Client{Timeout: 10 * time.Second} diff --git a/internal/engines/google.go b/internal/engines/google.go index 0119a98..8563829 100644 --- a/internal/engines/google.go +++ b/internal/engines/google.go @@ -57,7 +57,6 @@ func (e *GoogleEngine) Search(ctx context.Context, req contracts.SearchRequest) start := (req.Pageno - 1) * 10 query := url.QueryEscape(req.Query) - // Build URL like SearXNG does. u := fmt.Sprintf( "https://www.google.com/search?q=%s&filter=0&start=%d&hl=%s&lr=%s&safe=%s", query, @@ -118,7 +117,6 @@ func (e *GoogleEngine) Search(ctx context.Context, req contracts.SearchRequest) }, nil } -// detectGoogleSorry returns true if the response is a Google block/CAPTCHA page. func detectGoogleSorry(resp *http.Response) bool { if resp.Request != nil { if resp.Request.URL.Host == "sorry.google.com" || strings.HasPrefix(resp.Request.URL.Path, "/sorry") { @@ -128,16 +126,9 @@ func detectGoogleSorry(resp *http.Response) bool { return false } -// parseGoogleResults extracts search results from Google's HTML. -// Uses the same selectors as SearXNG: div.MjjYud for result containers. func parseGoogleResults(body, query string) []contracts.MainResult { var results []contracts.MainResult - // SearXNG selector: .//div[contains(@class, "MjjYud")] - // Each result block contains a title link and snippet. - // We simulate the XPath matching with regex-based extraction. - - // Find all MjjYud div blocks. mjjPattern := regexp.MustCompile(`]*class="[^"]*MjjYud[^"]*"[^>]*>(.*?)\s*(?=]*class="[^"]*MjjYud|$)`) matches := mjjPattern.FindAllStringSubmatch(body, -1) @@ -147,15 +138,12 @@ func parseGoogleResults(body, query string) []contracts.MainResult { } block := match[1] - // Extract title and URL from the result link. - // Pattern: TITLE urlPattern := regexp.MustCompile(`]+href="(/url\?q=[^"&]+)`) urlMatch := urlPattern.FindStringSubmatch(block) if len(urlMatch) < 2 { continue } rawURL := urlMatch[1] - // Remove /url?q= prefix and decode. actualURL := strings.TrimPrefix(rawURL, "/url?q=") if amp := strings.Index(actualURL, "&"); amp != -1 { actualURL = actualURL[:amp] @@ -168,14 +156,12 @@ func parseGoogleResults(body, query string) []contracts.MainResult { continue } - // Extract title from the title tag. titlePattern := regexp.MustCompile(`]*class="[^"]*qrStP[^"]*"[^>]*>([^<]+)`) titleMatch := titlePattern.FindStringSubmatch(block) title := query if len(titleMatch) >= 2 { title = stripTags(titleMatch[1]) } else { - // Fallback: extract visible text from an with data-title or role="link" linkTitlePattern := regexp.MustCompile(`]+role="link"[^>]*>([^<]+)<`) ltMatch := linkTitlePattern.FindStringSubmatch(block) if len(ltMatch) >= 2 { @@ -183,7 +169,6 @@ func parseGoogleResults(body, query string) []contracts.MainResult { } } - // Extract snippet from data-sncf divs (SearXNG's approach). snippet := extractGoogleSnippet(block) urlPtr := actualURL @@ -202,10 +187,7 @@ func parseGoogleResults(body, query string) []contracts.MainResult { return results } -// extractGoogleSnippet extracts the snippet text from a Google result block. func extractGoogleSnippet(block string) string { - // Google's snippets live in divs with data-sncf attribute. - // SearXNG looks for: .//div[contains(@data-sncf, "1")] snippetPattern := regexp.MustCompile(`]+data-sncf="1"[^>]*>(.*?)`) matches := snippetPattern.FindAllStringSubmatch(block, -1) var parts []string @@ -221,10 +203,8 @@ func extractGoogleSnippet(block string) string { return strings.Join(parts, " ") } -// extractGoogleSuggestions extracts search suggestions from Google result cards. func extractGoogleSuggestions(body string) []string { var suggestions []string - // SearXNG xpath: //div[contains(@class, "ouy7Mc")]//a suggestionPattern := regexp.MustCompile(`(?s)]*class="[^"]*ouy7Mc[^"]*"[^>]*>.*?]*>([^<]+)`) matches := suggestionPattern.FindAllStringSubmatch(body, -1) seen := map[string]bool{} @@ -241,8 +221,6 @@ func extractGoogleSuggestions(body string) []string { return suggestions } -// googleHL maps SearXNG locale to Google hl (host language) parameter. -// e.g. "en-US" -> "en-US" func googleHL(lang string) string { lang = strings.ToLower(strings.TrimSpace(lang)) if lang == "" || lang == "auto" { @@ -251,8 +229,6 @@ func googleHL(lang string) string { return lang } -// googleUILanguage maps SearXNG language to Google lr (language restrict) parameter. -// e.g. "en" -> "lang_en", "de" -> "lang_de" func googleUILanguage(lang string) string { lang = strings.ToLower(strings.Split(lang, "-")[0]) if lang == "" || lang == "auto" { @@ -261,7 +237,6 @@ func googleUILanguage(lang string) string { return "lang_" + lang } -// googleSafeSearchLevel maps safesearch (0-2) to Google's safe parameter. func googleSafeSearchLevel(safesearch int) string { switch safesearch { case 0: @@ -275,7 +250,6 @@ func googleSafeSearchLevel(safesearch int) string { } } -// stripTags removes HTML tags from a string. func stripTags(s string) string { stripper := regexp.MustCompile(`<[^>]*>`) s = stripper.ReplaceAllString(s, "") diff --git a/internal/engines/planner.go b/internal/engines/planner.go index 295f458..9616a4b 100644 --- a/internal/engines/planner.go +++ b/internal/engines/planner.go @@ -95,9 +95,6 @@ func (p *Planner) Plan(req contracts.SearchRequest) (localEngines, upstreamEngin } func inferFromCategories(categories []string) []string { - // Minimal mapping for the initial porting subset. - // This mirrors the idea of selecting from engine categories without - // embedding the whole engine registry. set := map[string]bool{} for _, c := range categories { switch strings.TrimSpace(strings.ToLower(c)) { @@ -131,7 +128,6 @@ func inferFromCategories(categories []string) []string { } func sortByOrder(list []string, order map[string]int) { - // simple insertion sort (list is tiny) for i := 1; i < len(list); i++ { j := i for j > 0 && order[list[j-1]] > order[list[j]] { diff --git a/internal/engines/qwant.go b/internal/engines/qwant.go index 77f7b70..e15d4f2 100644 --- a/internal/engines/qwant.go +++ b/internal/engines/qwant.go @@ -30,11 +30,7 @@ import ( "github.com/PuerkitoBio/goquery" ) -// QwantEngine implements a `qwant` (web) adapter using -// Qwant v3 endpoint: https://api.qwant.com/v3/search/web. -// -// Qwant's API is not fully documented; this implements parsing logic -// for the `web` category. +// QwantEngine implements the Qwant v3 API (web and web-lite modes). type QwantEngine struct { client *http.Client category string // "web" (JSON API) or "web-lite" (HTML fallback) @@ -53,8 +49,6 @@ func (e *QwantEngine) Search(ctx context.Context, req contracts.SearchRequest) ( return contracts.SearchResponse{Query: req.Query}, nil } - // For API parity we use web defaults: count=10, offset=(pageno-1)*count. - // The engine's config field exists so we can expand to news/images/videos later. count := e.resultsPerPage if count <= 0 { count = 10 @@ -271,9 +265,7 @@ func (e *QwantEngine) searchWebLite(ctx context.Context, req contracts.SearchReq results := make([]contracts.MainResult, 0) seen := map[string]bool{} - // Pattern 1: legacy/known qwant-lite structure. doc.Find("section article").Each(func(_ int, item *goquery.Selection) { - // ignore randomly interspersed advertising adds if item.Find("span.tooltip").Length() > 0 { return } @@ -307,19 +299,14 @@ func (e *QwantEngine) searchWebLite(ctx context.Context, req contracts.SearchReq }) }) - // Pattern 2: broader fallback for updated lite markup: - // any article/list item/div block containing an external anchor. - // We keep this conservative by requiring non-empty title + URL. doc.Find("article, li, div").Each(func(_ int, item *goquery.Selection) { if len(results) >= 20 { return } - // Skip ad-like blocks in fallback pass too. if item.Find("span.tooltip").Length() > 0 { return } - // Skip obvious nav/footer blocks. classAttr, _ := item.Attr("class") classLower := strings.ToLower(classAttr) if strings.Contains(classLower, "nav") || strings.Contains(classLower, "footer") { @@ -368,13 +355,10 @@ func (e *QwantEngine) searchWebLite(ctx context.Context, req contracts.SearchReq } seen[href] = true - // Best-effort snippet extraction from nearby paragraph/span text. content := strings.TrimSpace(item.Find("p").First().Text()) if content == "" { content = strings.TrimSpace(item.Find("span").First().Text()) } - // If there is no snippet, still keep clearly external result links. - // Qwant-lite frequently omits rich snippets for some entries. u := href results = append(results, contracts.MainResult{ diff --git a/internal/middleware/ratelimit.go b/internal/middleware/ratelimit.go index 899029f..78774f2 100644 --- a/internal/middleware/ratelimit.go +++ b/internal/middleware/ratelimit.go @@ -27,19 +27,12 @@ import ( "log/slog" ) -// RateLimitConfig controls per-IP rate limiting using a sliding window counter. type RateLimitConfig struct { - // Requests is the max number of requests allowed per window. - Requests int - // Window is the time window duration (e.g. "1m"). - Window time.Duration - // CleanupInterval is how often stale entries are purged (default: 5m). + Requests int + Window time.Duration CleanupInterval time.Duration } -// RateLimit returns a middleware that limits requests per IP address. -// Uses an in-memory sliding window counter. When the limit is exceeded, -// responds with HTTP 429 and a Retry-After header. func RateLimit(cfg RateLimitConfig, logger *slog.Logger) func(http.Handler) http.Handler { requests := cfg.Requests if requests <= 0 { diff --git a/internal/search/merge.go b/internal/search/merge.go index 7be7353..0e8a15a 100644 --- a/internal/search/merge.go +++ b/internal/search/merge.go @@ -25,11 +25,6 @@ import ( ) // MergeResponses merges multiple compatible JSON responses. -// -// MVP merge semantics: -// - results are concatenated with a simple de-dup key (engine|title|url) -// - suggestions/corrections are de-duplicated as sets -// - answers/infoboxes/unresponsive_engines are concatenated (best-effort) func MergeResponses(responses []contracts.SearchResponse) contracts.SearchResponse { var merged contracts.SearchResponse diff --git a/internal/search/types.go b/internal/search/types.go index 9665dde..89b323d 100644 --- a/internal/search/types.go +++ b/internal/search/types.go @@ -23,7 +23,7 @@ import "github.com/metamorphosis-dev/kafka/internal/contracts" type OutputFormat = contracts.OutputFormat const ( - FormatHTML = contracts.FormatHTML // accepted for compatibility (not yet implemented) + FormatHTML = contracts.FormatHTML // accepted for compatibility FormatJSON = contracts.FormatJSON FormatCSV = contracts.FormatCSV FormatRSS = contracts.FormatRSS