Add automatic retry for 429/503 with exponential backoff

2026-04-16 19:00:37 +00:00 · 2026-04-16 19:00:37 +00:00 · 78b3239bbd
commit 78b3239bbd
parent 29292addac
3 changed files with 89 additions and 7 deletions
--- a/README.md
+++ b/README.md
@ -24,11 +24,17 @@ Create a `config.yaml` file in the working directory:
 ```yaml
 port: 8080
 upstream_url: "https://api.z.ai/api/anthropic"
 # Retry configuration
 max_retries: 3
 retry_base_delay_ms: 1000
 ```
 - `port`: Port to listen on (default: 8080)
 - `upstream_url`: Base URL for the Anthropic-compatible upstream API
 - `temperature` (optional): Override temperature for all requests. If set, this value is used instead of client-specified temperatures. Remove this line to respect client temperatures.
 - `max_retries`: Maximum retry attempts for transient errors (429, 503). Default: 3. Set to 0 to disable retries.
 - `retry_base_delay_ms`: Base delay in milliseconds for exponential backoff. Default: 1000. Delay formula: `base_delay_ms * 2^(attempt-1)` with ±50% jitter.
 ## Building
@ -184,6 +190,17 @@ The proxy sets these headers on all upstream requests to mimic the claude-code C
 | `X-Claude-Code-Session-Id` | Random UUID generated at startup |
 | `content-type` | `application/json` |
 ## Retry Behavior
 When the upstream returns a retryable error (HTTP 429 or 503), proxx automatically retries with exponential backoff:
 - **Exponential backoff**: Delay doubles on each retry (1s, 2s, 4s, ...)
 - **Jitter**: ±50% random variation added to each delay to avoid thundering herd
 - **Retryable statuses**: 429 (rate limit), 503 (service unavailable)
 - **Logged**: All retry attempts are logged with attempt number, delay, and jitter
 This improves resilience against temporary upstream issues without client intervention.
 ## Security
 ### Blocked Headers
--- a/config.yaml
+++ b/config.yaml
@ -6,6 +6,10 @@ upstream_url: "https://api.z.ai/api/anthropic"
 # Remove this line or set to null to use client-specified temperatures
 # temperature: 0.7
 # Retry configuration for transient errors (429, 503)
 max_retries: 3
 retry_base_delay_ms: 1000
 models:
  - id: "glm-4.7"
    owned_by: "zhipu"
--- a/handler.go
+++ b/handler.go
@ -22,6 +22,9 @@ type Config struct {
 	UpstreamURL string       `yaml:"upstream_url"`
 	Models      []ModelConfig `yaml:"models"`
 	Temperature *float64     `yaml:"temperature,omitempty"`
 	// Retry configuration
 	MaxRetries      int `yaml:"max_retries,omitempty"` // Maximum retry attempts for 429/503
 	RetryBaseDelayMs int `yaml:"retry_base_delay_ms,omitempty"` // Base delay in ms (exponential backoff)
 }
 var config *Config
@ -229,6 +232,12 @@ func handleChatCompletions(w http.ResponseWriter, r *http.Request) {
 	}
 }
 // retryableStatuses are HTTP status codes that should trigger a retry
 var retryableStatuses = map[int]bool{
 	429: true, // Rate limit
 	503: true, // Service unavailable
 }
 func callUpstream(req *AnthropicRequest, apiKey, sessionID string) (*http.Response, error) {
 	bodyBytes, err := json.Marshal(req)
 	if err != nil {
@ -236,17 +245,69 @@ func callUpstream(req *AnthropicRequest, apiKey, sessionID string) (*http.Respon
 	}
 	upstreamURL := config.UpstreamURL + "/v1/messages"
-	httpReq, err := http.NewRequest(http.MethodPost, upstreamURL, strings.NewReader(string(bodyBytes)))
+
-	if err != nil {
+	// Get retry config with defaults
-		return nil, fmt.Errorf("failed to create request: %w", err)
+	maxRetries := config.MaxRetries
 	if maxRetries == 0 {
 		maxRetries = 3
 	}
 	baseDelayMs := config.RetryBaseDelayMs
 	if baseDelayMs == 0 {
 		baseDelayMs = 1000
 	}
-	headers := ClaudeCodeHeaders(apiKey, sessionID)
+	var lastResp *http.Response
-	for k, v := range headers {
+	var lastErr error
-		httpReq.Header.Set(k, v)
+
 	for attempt := 0; attempt <= maxRetries; attempt++ {
 		if attempt > 0 {
 			delay := time.Duration(baseDelayMs*(1<<(attempt-1))) * time.Millisecond
 			// Add jitter (±50%) to avoid thundering herd
 			jitter := time.Duration(rand.Intn(int(delay))) / 2
 			log.Printf("[retry] Attempt %d/%d after %v (jitter: %v)", attempt, maxRetries, delay, jitter)
 			time.Sleep(delay + jitter)
 		}
 		httpReq, err := http.NewRequest(http.MethodPost, upstreamURL, strings.NewReader(string(bodyBytes)))
 		if err != nil {
 			lastErr = fmt.Errorf("failed to create request: %w", err)
 			continue
 		}
 		headers := ClaudeCodeHeaders(apiKey, sessionID)
 		for k, v := range headers {
 			httpReq.Header.Set(k, v)
 		}
 		resp, err := httpClient.Do(httpReq)
 		if err != nil {
 			lastErr = err
 			log.Printf("[retry] Request failed: %v", err)
 			continue
 		}
 		// Check if status is retryable
 		if !retryableStatuses[resp.StatusCode] {
 			log.Printf("[retry] Success or non-retryable status %d", resp.StatusCode)
 			return resp, nil
 		}
 		// Close previous response body if exists
 		if lastResp != nil {
 			lastResp.Body.Close()
 		}
 		// Read and log the error body for debugging
 		respBody, _ := io.ReadAll(resp.Body)
 		resp.Body.Close()
 		log.Printf("[retry] Retryable status %d, body: %s", resp.StatusCode, string(respBody))
 		lastResp = resp
 		lastErr = fmt.Errorf("upstream returned status %d", resp.StatusCode)
 	}
-	return httpClient.Do(httpReq)
+	// All retries exhausted
 	return nil, lastErr
 }
 func writeError(w http.ResponseWriter, code int, message, errType, errCode string) {