Adds a Kafka-hosted favicon proxy at /favicon/<domain>: - Fetches favicon.ico from the target domain - In-memory cache with 1-hour TTL and ETag support (304 Not Modified) - Max 64KB per favicon to prevent memory abuse - Privacy: user browser talks to Kafka, not Google/DuckDuckGo New "Self (Kafka)" option in the favicon service selector. Defaults to None. No third-party requests when self is chosen.
278 lines
8.1 KiB
Go
278 lines
8.1 KiB
Go
// samsa — a privacy-respecting metasearch engine
|
|
// Copyright (C) 2026-present metamorphosis-dev
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
package httpapi
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/metamorphosis-dev/samsa/internal/contracts"
|
|
"github.com/metamorphosis-dev/samsa/internal/httpclient"
|
|
"github.com/metamorphosis-dev/samsa/internal/search"
|
|
"github.com/metamorphosis-dev/samsa/internal/views"
|
|
)
|
|
|
|
type Handler struct {
|
|
searchSvc *search.Service
|
|
autocompleteSvc func(ctx context.Context, query string) ([]string, error)
|
|
sourceURL string
|
|
}
|
|
|
|
func NewHandler(searchSvc *search.Service, autocompleteSuggestions func(ctx context.Context, query string) ([]string, error), sourceURL string) *Handler {
|
|
return &Handler{
|
|
searchSvc: searchSvc,
|
|
autocompleteSvc: autocompleteSuggestions,
|
|
sourceURL: sourceURL,
|
|
}
|
|
}
|
|
|
|
func (h *Handler) Healthz(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
|
w.WriteHeader(http.StatusOK)
|
|
_, _ = w.Write([]byte("OK"))
|
|
}
|
|
|
|
// Index renders the homepage with the search box.
|
|
func (h *Handler) Index(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL.Path != "/" {
|
|
http.NotFound(w, r)
|
|
return
|
|
}
|
|
if err := views.RenderIndex(w, h.sourceURL); err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
}
|
|
}
|
|
|
|
// OpenSearch serves the OpenSearch description XML.
|
|
func (h *Handler) OpenSearch(baseURL string) http.HandlerFunc {
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
|
xml, err := views.OpenSearchXML(baseURL)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
return
|
|
}
|
|
w.Header().Set("Content-Type", "application/opensearchdescription+xml; charset=utf-8")
|
|
w.Write(xml)
|
|
}
|
|
}
|
|
|
|
func (h *Handler) Search(w http.ResponseWriter, r *http.Request) {
|
|
q := r.FormValue("q")
|
|
format := r.FormValue("format")
|
|
|
|
// For HTML format with no query, redirect to homepage.
|
|
if q == "" && (format == "" || format == "html") {
|
|
http.Redirect(w, r, "/", http.StatusFound)
|
|
return
|
|
}
|
|
|
|
req, err := search.ParseSearchRequest(r)
|
|
if err != nil {
|
|
if format == "html" || format == "" {
|
|
pd := views.PageData{SourceURL: h.sourceURL, Query: q}
|
|
if views.IsHTMXRequest(r) {
|
|
views.RenderSearchFragment(w, pd)
|
|
} else {
|
|
views.RenderSearch(w, pd)
|
|
}
|
|
return
|
|
}
|
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
resp, err := h.searchSvc.Search(r.Context(), req)
|
|
if err != nil {
|
|
if req.Format == contracts.FormatHTML {
|
|
pd := views.PageData{SourceURL: h.sourceURL, Query: req.Query}
|
|
if views.IsHTMXRequest(r) {
|
|
views.RenderSearchFragment(w, pd)
|
|
} else {
|
|
views.RenderSearch(w, pd)
|
|
}
|
|
return
|
|
}
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
if req.Format == contracts.FormatHTML {
|
|
pd := views.FromResponse(resp, req.Query, req.Pageno,
|
|
r.FormValue("category"), r.FormValue("time"), r.FormValue("type"))
|
|
if err := views.RenderSearchAuto(w, r, pd); err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
}
|
|
return
|
|
}
|
|
|
|
if err := search.WriteSearchResponse(w, req.Format, resp); err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
}
|
|
}
|
|
|
|
// Autocompleter returns search suggestions for the given query.
|
|
func (h *Handler) Autocompleter(w http.ResponseWriter, r *http.Request) {
|
|
query := strings.TrimSpace(r.FormValue("q"))
|
|
if query == "" {
|
|
w.WriteHeader(http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
suggestions, err := h.autocompleteSvc(r.Context(), query)
|
|
if err != nil {
|
|
// Return empty list on error rather than an error status.
|
|
suggestions = []string{}
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
|
_ = json.NewEncoder(w).Encode(suggestions)
|
|
}
|
|
|
|
// Preferences handles GET and POST for the preferences page.
|
|
func (h *Handler) Preferences(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL.Path != "/preferences" {
|
|
http.NotFound(w, r)
|
|
return
|
|
}
|
|
if r.Method == "POST" {
|
|
// Preferences are stored in localStorage on the client via JavaScript.
|
|
// This handler exists only for form submission completeness.
|
|
http.Redirect(w, r, "/preferences", http.StatusFound)
|
|
return
|
|
}
|
|
if err := views.RenderPreferences(w, h.sourceURL); err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
}
|
|
}
|
|
|
|
// faviconCacheEntry holds a cached favicon body and its ETag.
|
|
type faviconCacheEntry struct {
|
|
body []byte
|
|
etag string
|
|
cachedAt time.Time
|
|
}
|
|
|
|
// faviconCache is a simple in-memory cache for fetched favicons.
|
|
// Entries expire after 1 hour.
|
|
var faviconCache = struct {
|
|
m map[string]faviconCacheEntry
|
|
sync.RWMutex
|
|
}{m: make(map[string]faviconCacheEntry)}
|
|
|
|
const faviconCacheTTL = 1 * time.Hour
|
|
|
|
// Favicon serves a fetched favicon for the given domain, with ETag support
|
|
// and a 1-hour in-memory cache. This lets Kafka act as a privacy-preserving
|
|
// favicon proxy: the user's browser talks to Kafka, not Google or DuckDuckGo.
|
|
func (h *Handler) Favicon(w http.ResponseWriter, r *http.Request) {
|
|
domain := strings.TrimPrefix(r.URL.Path, "/favicon/")
|
|
domain = strings.TrimSuffix(domain, "/")
|
|
domain = strings.TrimSpace(domain)
|
|
|
|
if domain == "" || strings.Contains(domain, "/") {
|
|
http.Error(w, "invalid domain", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
// Check cache.
|
|
faviconCache.RLock()
|
|
entry, ok := faviconCache.m[domain]
|
|
faviconCache.RUnlock()
|
|
|
|
now := time.Now()
|
|
if ok && now.Sub(entry.cachedAt) < faviconCacheTTL {
|
|
// ETag-based cache validation.
|
|
if etag := r.Header.Get("If-None-Match"); etag != "" && etag == entry.etag {
|
|
w.WriteHeader(http.StatusNotModified)
|
|
return
|
|
}
|
|
w.Header().Set("Content-Type", "image/x-icon")
|
|
w.Header().Set("ETag", entry.etag)
|
|
w.Header().Set("Cache-Control", "private, max-age=3600")
|
|
w.WriteHeader(http.StatusOK)
|
|
w.Write(entry.body)
|
|
return
|
|
}
|
|
|
|
// Fetch from the domain's favicon.ico.
|
|
fetchURL := "https://" + domain + "/favicon.ico"
|
|
req, err := http.NewRequestWithContext(r.Context(), http.MethodGet, fetchURL, nil)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
return
|
|
}
|
|
req.Header.Set("User-Agent", "Kafka/0.1 (+https://git.ashisgreat.xyz/penal-colony/samsa)")
|
|
req.Header.Set("Accept", "image/x-icon,image/png,image/webp,*/*")
|
|
|
|
client := httpclient.NewClient(5 * time.Second)
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
http.Error(w, "favicon fetch failed", http.StatusBadGateway)
|
|
return
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
// Upstream favicon server issues a redirect or error.
|
|
if resp.StatusCode != http.StatusOK {
|
|
http.Error(w, "favicon not found", http.StatusNotFound)
|
|
return
|
|
}
|
|
|
|
body, err := io.ReadAll(http.MaxBytesReader(w, resp.Body, 64*1024))
|
|
if err != nil {
|
|
http.Error(w, "favicon too large", http.StatusBadGateway)
|
|
return
|
|
}
|
|
|
|
etag := resp.Header.Get("ETag")
|
|
if etag == "" {
|
|
// Fallback ETag: hash of body.
|
|
h := sha256.Sum256(body)
|
|
etag = `"` + hex.EncodeToString(h[:8]) + `"`
|
|
}
|
|
|
|
// Store in cache.
|
|
faviconCache.Lock()
|
|
faviconCache.m[domain] = faviconCacheEntry{
|
|
body: body,
|
|
etag: etag,
|
|
cachedAt: now,
|
|
}
|
|
faviconCache.Unlock()
|
|
|
|
if etagMatch := r.Header.Get("If-None-Match"); etagMatch != "" && etagMatch == etag {
|
|
w.WriteHeader(http.StatusNotModified)
|
|
return
|
|
}
|
|
|
|
contentType := resp.Header.Get("Content-Type")
|
|
if contentType == "" {
|
|
contentType = "image/x-icon"
|
|
}
|
|
w.Header().Set("Content-Type", contentType)
|
|
w.Header().Set("ETag", etag)
|
|
w.Header().Set("Cache-Control", "private, max-age=3600")
|
|
w.WriteHeader(http.StatusOK)
|
|
w.Write(body)
|
|
}
|