Exempt progress polls from rate limit; retry 429s on clip render

- ratelimit.py: exempt /v1/video, /v1/audio and /v1/loras progress polls from
  BOTH auth and rate limiting (shared _PROGRESS_PATHS), matching /v1/images.
  The township script polls /v1/video/progress ~1/s during a clip; being
  rate-limited, those polls ate the budget so the generation POST got 429'd
  (clip failed) and the polls themselves 429'd (stuck step bar).
- township _render_once: a 429 now backs off and retries the same render (up to
  40 attempts, capped 60s) instead of abandoning the clip; covers clips,
  chained parts and outcomes. Genuine errors still fail fast.
Co-Authored-By: 's avatarClaude Opus 4.8 <noreply@anthropic.com>
parent 2ec9c384
......@@ -31,10 +31,23 @@ from fastapi.responses import JSONResponse
from starlette.middleware.base import BaseHTTPMiddleware
# Lightweight, read-only generation-progress polls. Clients (e.g. the township
# script) poll these ~once/second WHILE a generation runs, so they must be exempt
# from BOTH auth and rate limiting — otherwise the polls consume the rate budget
# and the actual generation request gets 429'd (and the polls themselves 429,
# leaving the step bar stuck).
_PROGRESS_PATHS = {
"/v1/images/progress",
"/v1/video/progress",
"/v1/audio/progress",
"/v1/loras/progress",
}
class BearerAuthMiddleware(BaseHTTPMiddleware):
"""Reject /v1/ API requests that lack a valid Bearer token or active web session."""
_EXEMPT_PATHS = {"/v1/images/progress"}
_EXEMPT_PATHS = _PROGRESS_PATHS
async def dispatch(self, request: Request, call_next):
path = request.url.path
......@@ -121,7 +134,7 @@ class RateLimitMiddleware(BaseHTTPMiddleware):
return ""
# Lightweight polling endpoints that must never be rate-limited
_EXEMPT_PATHS = {"/v1/images/progress"}
_EXEMPT_PATHS = _PROGRESS_PATHS
async def dispatch(self, request: Request, call_next):
if not RATE_LIMITING_ENABLED:
......
......@@ -2416,6 +2416,12 @@ def _stage_videos_render(client, video_model, video_dir, fight_plan, outcome_pla
video_lora_map, video_slug, _cw)
+ _env_video_lora_specs_for(env, env_video_lora_map,
video_slug, _ew)) or None
# Rate-limit (429) is transient — the server is just busy — so back off and
# RETRY the same render instead of abandoning the clip. Only a genuine
# error (or too many 429s) marks the clip failed.
_rl_attempts = 0
_RL_MAX = 40
while True:
try:
mp4 = _run_with_spinner(
label, client.generate_video_clip,
......@@ -2434,7 +2440,17 @@ def _stage_videos_render(client, video_model, video_dir, fight_plan, outcome_pla
return False, None, True
err_str = str(e)
is_rate_limit = "429" in err_str or "rate limit" in err_str.lower()
backoff = clip_delay * (4 if is_rate_limit else 2)
if is_rate_limit:
_rl_attempts += 1
if _rl_attempts > _RL_MAX:
_log(f" ✗ still rate-limited after {_RL_MAX} retries — giving up on this clip")
return False, None, False
backoff = min(clip_delay * 4, 60)
_log(f" ⏳ rate limited (429) — backing off {backoff:.0f}s and "
f"retrying (attempt {_rl_attempts}/{_RL_MAX})")
time.sleep(backoff)
continue # retry — do NOT fail the clip on a 429
backoff = clip_delay * 2
_log(f" ✗ failed: {e} (waiting {backoff:.0f}s)")
time.sleep(backoff)
return False, None, False
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment