- DuckDuckGo: scrapes Lite HTML endpoint for results - Language-aware region mapping (de→de-de, ja→jp-jp, etc.) - HTML parser extracts result links and snippets from DDG Lite markup - Shared html_helpers.go with extractAttr, stripHTML, htmlUnescape - GitHub: uses public Search API (repos, sorted by stars) - No auth required (10 req/min unauthenticated) - Shows stars, language, topics, last updated date - Paginated via GitHub's page parameter - Reddit: uses public JSON search API - Respects safesearch (skips over_18 posts) - Shows subreddit, score, comment count - Links self-posts to the thread URL - Bing: scrapes web search HTML (b_algo containers) - Extracts titles, URLs, and snippets from Bing's result markup - Handles Bing's tracking URL encoding - Updated factory, config defaults, and config.example.toml - Full test suite: unit tests for all engines, HTML parsing tests, region mapping tests, live request tests (skipped in short mode) 9 engines total: wikipedia, arxiv, crossref, braveapi, qwant, duckduckgo, github, reddit, bing
63 lines
2 KiB
TOML
63 lines
2 KiB
TOML
# gosearch configuration
|
|
# Copy to config.toml and adjust as needed.
|
|
# Environment variables are used as fallbacks when a config field is empty/unset.
|
|
|
|
[server]
|
|
# Listen port (env: PORT)
|
|
port = 8080
|
|
|
|
# HTTP timeout for engine and upstream calls (env: HTTP_TIMEOUT)
|
|
http_timeout = "10s"
|
|
|
|
[upstream]
|
|
# URL of an upstream SearXNG instance for unported engines (env: UPSTREAM_SEARXNG_URL)
|
|
# Leave empty to run without an upstream proxy.
|
|
url = ""
|
|
|
|
[engines]
|
|
# Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES)
|
|
# Engines not listed here will be proxied to upstream SearXNG.
|
|
local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"]
|
|
|
|
[engines.brave]
|
|
# Brave Search API key (env: BRAVE_API_KEY)
|
|
api_key = ""
|
|
# Optional access token to gate requests (env: BRAVE_ACCESS_TOKEN)
|
|
access_token = ""
|
|
|
|
[engines.qwant]
|
|
# Qwant category: "web" or "web-lite" (default: "web-lite")
|
|
category = "web-lite"
|
|
results_per_page = 10
|
|
|
|
[cache]
|
|
# Valkey/Redis cache for search results.
|
|
# Leave address empty to disable caching entirely.
|
|
# Env: VALKEY_ADDRESS
|
|
address = ""
|
|
# Env: VALKEY_PASSWORD
|
|
password = ""
|
|
# Database index (env: VALKEY_DB)
|
|
db = 0
|
|
# Cache TTL for search results (env: VALKEY_CACHE_TTL)
|
|
default_ttl = "5m"
|
|
|
|
[cors]
|
|
# CORS configuration for browser-based clients.
|
|
# Allowed origins: use "*" for all, or specific domains (env: CORS_ALLOWED_ORIGINS)
|
|
allowed_origins = ["*"]
|
|
# Allowed methods (default: GET, POST, OPTIONS)
|
|
# allowed_methods = ["GET", "POST", "OPTIONS"]
|
|
# Allowed headers (default: Content-Type, Authorization, X-Search-Token, X-Brave-Access-Token)
|
|
# allowed_headers = ["Content-Type", "Authorization"]
|
|
# Preflight cache duration in seconds (default: 3600)
|
|
# max_age = 3600
|
|
|
|
[rate_limit]
|
|
# Per-IP rate limiting. Set requests to 0 to disable.
|
|
# Env: RATE_LIMIT_REQUESTS
|
|
requests = 30
|
|
# Time window for rate limit (env: RATE_LIMIT_WINDOW)
|
|
window = "1m"
|
|
# How often to clean up stale IP entries (env: RATE_LIMIT_CLEANUP_INTERVAL)
|
|
cleanup_interval = "5m"
|