Add automatic retry for 429/503 with exponential backoff
This commit is contained in:
parent
29292addac
commit
78b3239bbd
3 changed files with 89 additions and 7 deletions
75
handler.go
75
handler.go
|
|
@ -22,6 +22,9 @@ type Config struct {
|
|||
UpstreamURL string `yaml:"upstream_url"`
|
||||
Models []ModelConfig `yaml:"models"`
|
||||
Temperature *float64 `yaml:"temperature,omitempty"`
|
||||
// Retry configuration
|
||||
MaxRetries int `yaml:"max_retries,omitempty"` // Maximum retry attempts for 429/503
|
||||
RetryBaseDelayMs int `yaml:"retry_base_delay_ms,omitempty"` // Base delay in ms (exponential backoff)
|
||||
}
|
||||
|
||||
var config *Config
|
||||
|
|
@ -229,6 +232,12 @@ func handleChatCompletions(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
}
|
||||
|
||||
// retryableStatuses are HTTP status codes that should trigger a retry
|
||||
var retryableStatuses = map[int]bool{
|
||||
429: true, // Rate limit
|
||||
503: true, // Service unavailable
|
||||
}
|
||||
|
||||
func callUpstream(req *AnthropicRequest, apiKey, sessionID string) (*http.Response, error) {
|
||||
bodyBytes, err := json.Marshal(req)
|
||||
if err != nil {
|
||||
|
|
@ -236,17 +245,69 @@ func callUpstream(req *AnthropicRequest, apiKey, sessionID string) (*http.Respon
|
|||
}
|
||||
|
||||
upstreamURL := config.UpstreamURL + "/v1/messages"
|
||||
httpReq, err := http.NewRequest(http.MethodPost, upstreamURL, strings.NewReader(string(bodyBytes)))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
|
||||
// Get retry config with defaults
|
||||
maxRetries := config.MaxRetries
|
||||
if maxRetries == 0 {
|
||||
maxRetries = 3
|
||||
}
|
||||
baseDelayMs := config.RetryBaseDelayMs
|
||||
if baseDelayMs == 0 {
|
||||
baseDelayMs = 1000
|
||||
}
|
||||
|
||||
headers := ClaudeCodeHeaders(apiKey, sessionID)
|
||||
for k, v := range headers {
|
||||
httpReq.Header.Set(k, v)
|
||||
var lastResp *http.Response
|
||||
var lastErr error
|
||||
|
||||
for attempt := 0; attempt <= maxRetries; attempt++ {
|
||||
if attempt > 0 {
|
||||
delay := time.Duration(baseDelayMs*(1<<(attempt-1))) * time.Millisecond
|
||||
// Add jitter (±50%) to avoid thundering herd
|
||||
jitter := time.Duration(rand.Intn(int(delay))) / 2
|
||||
log.Printf("[retry] Attempt %d/%d after %v (jitter: %v)", attempt, maxRetries, delay, jitter)
|
||||
time.Sleep(delay + jitter)
|
||||
}
|
||||
|
||||
httpReq, err := http.NewRequest(http.MethodPost, upstreamURL, strings.NewReader(string(bodyBytes)))
|
||||
if err != nil {
|
||||
lastErr = fmt.Errorf("failed to create request: %w", err)
|
||||
continue
|
||||
}
|
||||
|
||||
headers := ClaudeCodeHeaders(apiKey, sessionID)
|
||||
for k, v := range headers {
|
||||
httpReq.Header.Set(k, v)
|
||||
}
|
||||
|
||||
resp, err := httpClient.Do(httpReq)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
log.Printf("[retry] Request failed: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if status is retryable
|
||||
if !retryableStatuses[resp.StatusCode] {
|
||||
log.Printf("[retry] Success or non-retryable status %d", resp.StatusCode)
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// Close previous response body if exists
|
||||
if lastResp != nil {
|
||||
lastResp.Body.Close()
|
||||
}
|
||||
|
||||
// Read and log the error body for debugging
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
log.Printf("[retry] Retryable status %d, body: %s", resp.StatusCode, string(respBody))
|
||||
|
||||
lastResp = resp
|
||||
lastErr = fmt.Errorf("upstream returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
return httpClient.Do(httpReq)
|
||||
// All retries exhausted
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
func writeError(w http.ResponseWriter, code int, message, errType, errCode string) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue