Add automatic retry for 429/503 with exponential backoff
This commit is contained in:
parent
29292addac
commit
78b3239bbd
3 changed files with 89 additions and 7 deletions
17
README.md
17
README.md
|
|
@ -24,11 +24,17 @@ Create a `config.yaml` file in the working directory:
|
||||||
```yaml
|
```yaml
|
||||||
port: 8080
|
port: 8080
|
||||||
upstream_url: "https://api.z.ai/api/anthropic"
|
upstream_url: "https://api.z.ai/api/anthropic"
|
||||||
|
|
||||||
|
# Retry configuration
|
||||||
|
max_retries: 3
|
||||||
|
retry_base_delay_ms: 1000
|
||||||
```
|
```
|
||||||
|
|
||||||
- `port`: Port to listen on (default: 8080)
|
- `port`: Port to listen on (default: 8080)
|
||||||
- `upstream_url`: Base URL for the Anthropic-compatible upstream API
|
- `upstream_url`: Base URL for the Anthropic-compatible upstream API
|
||||||
- `temperature` (optional): Override temperature for all requests. If set, this value is used instead of client-specified temperatures. Remove this line to respect client temperatures.
|
- `temperature` (optional): Override temperature for all requests. If set, this value is used instead of client-specified temperatures. Remove this line to respect client temperatures.
|
||||||
|
- `max_retries`: Maximum retry attempts for transient errors (429, 503). Default: 3. Set to 0 to disable retries.
|
||||||
|
- `retry_base_delay_ms`: Base delay in milliseconds for exponential backoff. Default: 1000. Delay formula: `base_delay_ms * 2^(attempt-1)` with ±50% jitter.
|
||||||
|
|
||||||
## Building
|
## Building
|
||||||
|
|
||||||
|
|
@ -184,6 +190,17 @@ The proxy sets these headers on all upstream requests to mimic the claude-code C
|
||||||
| `X-Claude-Code-Session-Id` | Random UUID generated at startup |
|
| `X-Claude-Code-Session-Id` | Random UUID generated at startup |
|
||||||
| `content-type` | `application/json` |
|
| `content-type` | `application/json` |
|
||||||
|
|
||||||
|
## Retry Behavior
|
||||||
|
|
||||||
|
When the upstream returns a retryable error (HTTP 429 or 503), proxx automatically retries with exponential backoff:
|
||||||
|
|
||||||
|
- **Exponential backoff**: Delay doubles on each retry (1s, 2s, 4s, ...)
|
||||||
|
- **Jitter**: ±50% random variation added to each delay to avoid thundering herd
|
||||||
|
- **Retryable statuses**: 429 (rate limit), 503 (service unavailable)
|
||||||
|
- **Logged**: All retry attempts are logged with attempt number, delay, and jitter
|
||||||
|
|
||||||
|
This improves resilience against temporary upstream issues without client intervention.
|
||||||
|
|
||||||
## Security
|
## Security
|
||||||
|
|
||||||
### Blocked Headers
|
### Blocked Headers
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,10 @@ upstream_url: "https://api.z.ai/api/anthropic"
|
||||||
# Remove this line or set to null to use client-specified temperatures
|
# Remove this line or set to null to use client-specified temperatures
|
||||||
# temperature: 0.7
|
# temperature: 0.7
|
||||||
|
|
||||||
|
# Retry configuration for transient errors (429, 503)
|
||||||
|
max_retries: 3
|
||||||
|
retry_base_delay_ms: 1000
|
||||||
|
|
||||||
models:
|
models:
|
||||||
- id: "glm-4.7"
|
- id: "glm-4.7"
|
||||||
owned_by: "zhipu"
|
owned_by: "zhipu"
|
||||||
|
|
|
||||||
75
handler.go
75
handler.go
|
|
@ -22,6 +22,9 @@ type Config struct {
|
||||||
UpstreamURL string `yaml:"upstream_url"`
|
UpstreamURL string `yaml:"upstream_url"`
|
||||||
Models []ModelConfig `yaml:"models"`
|
Models []ModelConfig `yaml:"models"`
|
||||||
Temperature *float64 `yaml:"temperature,omitempty"`
|
Temperature *float64 `yaml:"temperature,omitempty"`
|
||||||
|
// Retry configuration
|
||||||
|
MaxRetries int `yaml:"max_retries,omitempty"` // Maximum retry attempts for 429/503
|
||||||
|
RetryBaseDelayMs int `yaml:"retry_base_delay_ms,omitempty"` // Base delay in ms (exponential backoff)
|
||||||
}
|
}
|
||||||
|
|
||||||
var config *Config
|
var config *Config
|
||||||
|
|
@ -229,6 +232,12 @@ func handleChatCompletions(w http.ResponseWriter, r *http.Request) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// retryableStatuses are HTTP status codes that should trigger a retry
|
||||||
|
var retryableStatuses = map[int]bool{
|
||||||
|
429: true, // Rate limit
|
||||||
|
503: true, // Service unavailable
|
||||||
|
}
|
||||||
|
|
||||||
func callUpstream(req *AnthropicRequest, apiKey, sessionID string) (*http.Response, error) {
|
func callUpstream(req *AnthropicRequest, apiKey, sessionID string) (*http.Response, error) {
|
||||||
bodyBytes, err := json.Marshal(req)
|
bodyBytes, err := json.Marshal(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
@ -236,17 +245,69 @@ func callUpstream(req *AnthropicRequest, apiKey, sessionID string) (*http.Respon
|
||||||
}
|
}
|
||||||
|
|
||||||
upstreamURL := config.UpstreamURL + "/v1/messages"
|
upstreamURL := config.UpstreamURL + "/v1/messages"
|
||||||
httpReq, err := http.NewRequest(http.MethodPost, upstreamURL, strings.NewReader(string(bodyBytes)))
|
|
||||||
if err != nil {
|
// Get retry config with defaults
|
||||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
maxRetries := config.MaxRetries
|
||||||
|
if maxRetries == 0 {
|
||||||
|
maxRetries = 3
|
||||||
|
}
|
||||||
|
baseDelayMs := config.RetryBaseDelayMs
|
||||||
|
if baseDelayMs == 0 {
|
||||||
|
baseDelayMs = 1000
|
||||||
}
|
}
|
||||||
|
|
||||||
headers := ClaudeCodeHeaders(apiKey, sessionID)
|
var lastResp *http.Response
|
||||||
for k, v := range headers {
|
var lastErr error
|
||||||
httpReq.Header.Set(k, v)
|
|
||||||
|
for attempt := 0; attempt <= maxRetries; attempt++ {
|
||||||
|
if attempt > 0 {
|
||||||
|
delay := time.Duration(baseDelayMs*(1<<(attempt-1))) * time.Millisecond
|
||||||
|
// Add jitter (±50%) to avoid thundering herd
|
||||||
|
jitter := time.Duration(rand.Intn(int(delay))) / 2
|
||||||
|
log.Printf("[retry] Attempt %d/%d after %v (jitter: %v)", attempt, maxRetries, delay, jitter)
|
||||||
|
time.Sleep(delay + jitter)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequest(http.MethodPost, upstreamURL, strings.NewReader(string(bodyBytes)))
|
||||||
|
if err != nil {
|
||||||
|
lastErr = fmt.Errorf("failed to create request: %w", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
headers := ClaudeCodeHeaders(apiKey, sessionID)
|
||||||
|
for k, v := range headers {
|
||||||
|
httpReq.Header.Set(k, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := httpClient.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
lastErr = err
|
||||||
|
log.Printf("[retry] Request failed: %v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if status is retryable
|
||||||
|
if !retryableStatuses[resp.StatusCode] {
|
||||||
|
log.Printf("[retry] Success or non-retryable status %d", resp.StatusCode)
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close previous response body if exists
|
||||||
|
if lastResp != nil {
|
||||||
|
lastResp.Body.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read and log the error body for debugging
|
||||||
|
respBody, _ := io.ReadAll(resp.Body)
|
||||||
|
resp.Body.Close()
|
||||||
|
log.Printf("[retry] Retryable status %d, body: %s", resp.StatusCode, string(respBody))
|
||||||
|
|
||||||
|
lastResp = resp
|
||||||
|
lastErr = fmt.Errorf("upstream returned status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
return httpClient.Do(httpReq)
|
// All retries exhausted
|
||||||
|
return nil, lastErr
|
||||||
}
|
}
|
||||||
|
|
||||||
func writeError(w http.ResponseWriter, code int, message, errType, errCode string) {
|
func writeError(w http.ResponseWriter, code int, message, errType, errCode string) {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue