kafka/config.example.toml
Franz Kafka df8fe9474b feat: add DuckDuckGo, GitHub, Reddit, and Bing engines
- DuckDuckGo: scrapes Lite HTML endpoint for results
  - Language-aware region mapping (de→de-de, ja→jp-jp, etc.)
  - HTML parser extracts result links and snippets from DDG Lite markup
  - Shared html_helpers.go with extractAttr, stripHTML, htmlUnescape

- GitHub: uses public Search API (repos, sorted by stars)
  - No auth required (10 req/min unauthenticated)
  - Shows stars, language, topics, last updated date
  - Paginated via GitHub's page parameter

- Reddit: uses public JSON search API
  - Respects safesearch (skips over_18 posts)
  - Shows subreddit, score, comment count
  - Links self-posts to the thread URL

- Bing: scrapes web search HTML (b_algo containers)
  - Extracts titles, URLs, and snippets from Bing's result markup
  - Handles Bing's tracking URL encoding

- Updated factory, config defaults, and config.example.toml
- Full test suite: unit tests for all engines, HTML parsing tests,
  region mapping tests, live request tests (skipped in short mode)

9 engines total: wikipedia, arxiv, crossref, braveapi, qwant,
duckduckgo, github, reddit, bing
2026-03-21 16:52:11 +00:00

63 lines
2 KiB
TOML

# gosearch configuration
# Copy to config.toml and adjust as needed.
# Environment variables are used as fallbacks when a config field is empty/unset.
[server]
# Listen port (env: PORT)
port = 8080
# HTTP timeout for engine and upstream calls (env: HTTP_TIMEOUT)
http_timeout = "10s"
[upstream]
# URL of an upstream SearXNG instance for unported engines (env: UPSTREAM_SEARXNG_URL)
# Leave empty to run without an upstream proxy.
url = ""
[engines]
# Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES)
# Engines not listed here will be proxied to upstream SearXNG.
local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"]
[engines.brave]
# Brave Search API key (env: BRAVE_API_KEY)
api_key = ""
# Optional access token to gate requests (env: BRAVE_ACCESS_TOKEN)
access_token = ""
[engines.qwant]
# Qwant category: "web" or "web-lite" (default: "web-lite")
category = "web-lite"
results_per_page = 10
[cache]
# Valkey/Redis cache for search results.
# Leave address empty to disable caching entirely.
# Env: VALKEY_ADDRESS
address = ""
# Env: VALKEY_PASSWORD
password = ""
# Database index (env: VALKEY_DB)
db = 0
# Cache TTL for search results (env: VALKEY_CACHE_TTL)
default_ttl = "5m"
[cors]
# CORS configuration for browser-based clients.
# Allowed origins: use "*" for all, or specific domains (env: CORS_ALLOWED_ORIGINS)
allowed_origins = ["*"]
# Allowed methods (default: GET, POST, OPTIONS)
# allowed_methods = ["GET", "POST", "OPTIONS"]
# Allowed headers (default: Content-Type, Authorization, X-Search-Token, X-Brave-Access-Token)
# allowed_headers = ["Content-Type", "Authorization"]
# Preflight cache duration in seconds (default: 3600)
# max_age = 3600
[rate_limit]
# Per-IP rate limiting. Set requests to 0 to disable.
# Env: RATE_LIMIT_REQUESTS
requests = 30
# Time window for rate limit (env: RATE_LIMIT_WINDOW)
window = "1m"
# How often to clean up stale IP entries (env: RATE_LIMIT_CLEANUP_INTERVAL)
cleanup_interval = "5m"