Add internal/httpclient package as a singleton RoundTripper used by all outbound engine requests (search, engines, autocomplete, upstream). Key Transport settings: - MaxIdleConnsPerHost = 20 (up from Go default of 2) - MaxIdleConns = 100 - IdleConnTimeout = 90s - DialContext timeout = 5s Previously, the default transport limited each host to 2 idle connections, forcing a new TCP+TLS handshake on every search for each engine. With 12 engines hitting the same upstream hosts in parallel, connections were constantly recycled. Now warm connections are reused across all goroutines and requests.
130 lines
3.8 KiB
Go
130 lines
3.8 KiB
Go
// samsa — a privacy-respecting metasearch engine
|
|
// Copyright (C) 2026-present metamorphosis-dev
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
package upstream
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/metamorphosis-dev/samsa/internal/contracts"
|
|
"github.com/metamorphosis-dev/samsa/internal/httpclient"
|
|
)
|
|
|
|
type Client struct {
|
|
baseURL string
|
|
http *http.Client
|
|
}
|
|
|
|
func NewClient(baseURL string, timeout time.Duration) (*Client, error) {
|
|
if strings.TrimSpace(baseURL) == "" {
|
|
return nil, errors.New("upstream base URL is empty")
|
|
}
|
|
|
|
u, err := url.Parse(baseURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid upstream base URL: %w", err)
|
|
}
|
|
if u.Scheme != "http" && u.Scheme != "https" {
|
|
return nil, fmt.Errorf("upstream URL must use http or https, got %q", u.Scheme)
|
|
}
|
|
// Normalize: trim trailing slash to make URL concatenation predictable.
|
|
base := strings.TrimRight(u.String(), "/")
|
|
|
|
if timeout <= 0 {
|
|
timeout = 10 * time.Second
|
|
}
|
|
|
|
return &Client{
|
|
baseURL: base,
|
|
http: httpclient.NewClient(timeout),
|
|
}, nil
|
|
}
|
|
|
|
func (c *Client) SearchJSON(ctx context.Context, req contracts.SearchRequest, engines []string) (contracts.SearchResponse, error) {
|
|
// Always request upstream JSON; the Go service will handle csv/rss later.
|
|
form := url.Values{}
|
|
form.Set("q", req.Query)
|
|
form.Set("format", "json")
|
|
form.Set("pageno", fmt.Sprintf("%d", req.Pageno))
|
|
form.Set("safesearch", fmt.Sprintf("%d", req.Safesearch))
|
|
form.Set("language", req.Language)
|
|
|
|
if req.TimeRange != nil {
|
|
form.Set("time_range", *req.TimeRange)
|
|
}
|
|
if req.TimeoutLimit != nil {
|
|
form.Set("timeout_limit", formatFloat(*req.TimeoutLimit))
|
|
}
|
|
if len(req.Categories) > 0 {
|
|
form.Set("categories", strings.Join(req.Categories, ","))
|
|
}
|
|
|
|
if len(engines) > 0 {
|
|
form.Set("engines", strings.Join(engines, ","))
|
|
}
|
|
|
|
for engineName, kv := range req.EngineData {
|
|
for key, value := range kv {
|
|
// Mirror the naming convention: `engine_data-<engine>-<key>=<value>`
|
|
form.Set(fmt.Sprintf("engine_data-%s-%s", engineName, key), value)
|
|
}
|
|
}
|
|
|
|
endpoint := c.baseURL + "/search"
|
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, strings.NewReader(form.Encode()))
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
httpReq.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=utf-8")
|
|
|
|
resp, err := c.http.Do(httpReq)
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := io.ReadAll(io.LimitReader(resp.Body, 4*1024*1024))
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return contracts.SearchResponse{}, fmt.Errorf("upstream search failed with status %d", resp.StatusCode)
|
|
}
|
|
|
|
// Decode upstream JSON into our contract types.
|
|
var out contracts.SearchResponse
|
|
dec := json.NewDecoder(strings.NewReader(string(body)))
|
|
if err := dec.Decode(&out); err != nil {
|
|
return contracts.SearchResponse{}, fmt.Errorf("decode upstream JSON: %w", err)
|
|
}
|
|
|
|
return out, nil
|
|
}
|
|
|
|
func formatFloat(f float64) string {
|
|
// Keep stable formatting for upstream parsing.
|
|
return strings.TrimRight(strings.TrimRight(fmt.Sprintf("%.6f", f), "0"), ".")
|
|
}
|
|
|