diff --git a/README.md b/README.md index db1222a..8ff6153 100644 --- a/README.md +++ b/README.md @@ -24,11 +24,17 @@ Create a `config.yaml` file in the working directory: ```yaml port: 8080 upstream_url: "https://api.z.ai/api/anthropic" + +# Retry configuration +max_retries: 3 +retry_base_delay_ms: 1000 ``` - `port`: Port to listen on (default: 8080) - `upstream_url`: Base URL for the Anthropic-compatible upstream API - `temperature` (optional): Override temperature for all requests. If set, this value is used instead of client-specified temperatures. Remove this line to respect client temperatures. +- `max_retries`: Maximum retry attempts for transient errors (429, 503). Default: 3. Set to 0 to disable retries. +- `retry_base_delay_ms`: Base delay in milliseconds for exponential backoff. Default: 1000. Delay formula: `base_delay_ms * 2^(attempt-1)` with ±50% jitter. ## Building @@ -184,6 +190,17 @@ The proxy sets these headers on all upstream requests to mimic the claude-code C | `X-Claude-Code-Session-Id` | Random UUID generated at startup | | `content-type` | `application/json` | +## Retry Behavior + +When the upstream returns a retryable error (HTTP 429 or 503), proxx automatically retries with exponential backoff: + +- **Exponential backoff**: Delay doubles on each retry (1s, 2s, 4s, ...) +- **Jitter**: ±50% random variation added to each delay to avoid thundering herd +- **Retryable statuses**: 429 (rate limit), 503 (service unavailable) +- **Logged**: All retry attempts are logged with attempt number, delay, and jitter + +This improves resilience against temporary upstream issues without client intervention. + ## Security ### Blocked Headers diff --git a/config.yaml b/config.yaml index da433a6..91151c8 100644 --- a/config.yaml +++ b/config.yaml @@ -6,6 +6,10 @@ upstream_url: "https://api.z.ai/api/anthropic" # Remove this line or set to null to use client-specified temperatures # temperature: 0.7 +# Retry configuration for transient errors (429, 503) +max_retries: 3 +retry_base_delay_ms: 1000 + models: - id: "glm-4.7" owned_by: "zhipu" diff --git a/handler.go b/handler.go index b5768ce..d7256eb 100644 --- a/handler.go +++ b/handler.go @@ -22,6 +22,9 @@ type Config struct { UpstreamURL string `yaml:"upstream_url"` Models []ModelConfig `yaml:"models"` Temperature *float64 `yaml:"temperature,omitempty"` + // Retry configuration + MaxRetries int `yaml:"max_retries,omitempty"` // Maximum retry attempts for 429/503 + RetryBaseDelayMs int `yaml:"retry_base_delay_ms,omitempty"` // Base delay in ms (exponential backoff) } var config *Config @@ -229,6 +232,12 @@ func handleChatCompletions(w http.ResponseWriter, r *http.Request) { } } +// retryableStatuses are HTTP status codes that should trigger a retry +var retryableStatuses = map[int]bool{ + 429: true, // Rate limit + 503: true, // Service unavailable +} + func callUpstream(req *AnthropicRequest, apiKey, sessionID string) (*http.Response, error) { bodyBytes, err := json.Marshal(req) if err != nil { @@ -236,17 +245,69 @@ func callUpstream(req *AnthropicRequest, apiKey, sessionID string) (*http.Respon } upstreamURL := config.UpstreamURL + "/v1/messages" - httpReq, err := http.NewRequest(http.MethodPost, upstreamURL, strings.NewReader(string(bodyBytes))) - if err != nil { - return nil, fmt.Errorf("failed to create request: %w", err) + + // Get retry config with defaults + maxRetries := config.MaxRetries + if maxRetries == 0 { + maxRetries = 3 + } + baseDelayMs := config.RetryBaseDelayMs + if baseDelayMs == 0 { + baseDelayMs = 1000 } - headers := ClaudeCodeHeaders(apiKey, sessionID) - for k, v := range headers { - httpReq.Header.Set(k, v) + var lastResp *http.Response + var lastErr error + + for attempt := 0; attempt <= maxRetries; attempt++ { + if attempt > 0 { + delay := time.Duration(baseDelayMs*(1<<(attempt-1))) * time.Millisecond + // Add jitter (±50%) to avoid thundering herd + jitter := time.Duration(rand.Intn(int(delay))) / 2 + log.Printf("[retry] Attempt %d/%d after %v (jitter: %v)", attempt, maxRetries, delay, jitter) + time.Sleep(delay + jitter) + } + + httpReq, err := http.NewRequest(http.MethodPost, upstreamURL, strings.NewReader(string(bodyBytes))) + if err != nil { + lastErr = fmt.Errorf("failed to create request: %w", err) + continue + } + + headers := ClaudeCodeHeaders(apiKey, sessionID) + for k, v := range headers { + httpReq.Header.Set(k, v) + } + + resp, err := httpClient.Do(httpReq) + if err != nil { + lastErr = err + log.Printf("[retry] Request failed: %v", err) + continue + } + + // Check if status is retryable + if !retryableStatuses[resp.StatusCode] { + log.Printf("[retry] Success or non-retryable status %d", resp.StatusCode) + return resp, nil + } + + // Close previous response body if exists + if lastResp != nil { + lastResp.Body.Close() + } + + // Read and log the error body for debugging + respBody, _ := io.ReadAll(resp.Body) + resp.Body.Close() + log.Printf("[retry] Retryable status %d, body: %s", resp.StatusCode, string(respBody)) + + lastResp = resp + lastErr = fmt.Errorf("upstream returned status %d", resp.StatusCode) } - return httpClient.Do(httpReq) + // All retries exhausted + return nil, lastErr } func writeError(w http.ResponseWriter, code int, message, errType, errCode string) {