Exempt progress polls from rate limit; retry 429s on clip render

- ratelimit.py: exempt /v1/video, /v1/audio and /v1/loras progress polls from
  BOTH auth and rate limiting (shared _PROGRESS_PATHS), matching /v1/images.
  The township script polls /v1/video/progress ~1/s during a clip; being
  rate-limited, those polls ate the budget so the generation POST got 429'd
  (clip failed) and the polls themselves 429'd (stuck step bar).
- township _render_once: a 429 now backs off and retries the same render (up to
  40 attempts, capped 60s) instead of abandoning the clip; covers clips,
  chained parts and outcomes. Genuine errors still fail fast.
Co-Authored-By: 's avatarClaude Opus 4.8 <noreply@anthropic.com>
parent 2ec9c384
...@@ -31,10 +31,23 @@ from fastapi.responses import JSONResponse ...@@ -31,10 +31,23 @@ from fastapi.responses import JSONResponse
from starlette.middleware.base import BaseHTTPMiddleware from starlette.middleware.base import BaseHTTPMiddleware
# Lightweight, read-only generation-progress polls. Clients (e.g. the township
# script) poll these ~once/second WHILE a generation runs, so they must be exempt
# from BOTH auth and rate limiting — otherwise the polls consume the rate budget
# and the actual generation request gets 429'd (and the polls themselves 429,
# leaving the step bar stuck).
_PROGRESS_PATHS = {
"/v1/images/progress",
"/v1/video/progress",
"/v1/audio/progress",
"/v1/loras/progress",
}
class BearerAuthMiddleware(BaseHTTPMiddleware): class BearerAuthMiddleware(BaseHTTPMiddleware):
"""Reject /v1/ API requests that lack a valid Bearer token or active web session.""" """Reject /v1/ API requests that lack a valid Bearer token or active web session."""
_EXEMPT_PATHS = {"/v1/images/progress"} _EXEMPT_PATHS = _PROGRESS_PATHS
async def dispatch(self, request: Request, call_next): async def dispatch(self, request: Request, call_next):
path = request.url.path path = request.url.path
...@@ -121,7 +134,7 @@ class RateLimitMiddleware(BaseHTTPMiddleware): ...@@ -121,7 +134,7 @@ class RateLimitMiddleware(BaseHTTPMiddleware):
return "" return ""
# Lightweight polling endpoints that must never be rate-limited # Lightweight polling endpoints that must never be rate-limited
_EXEMPT_PATHS = {"/v1/images/progress"} _EXEMPT_PATHS = _PROGRESS_PATHS
async def dispatch(self, request: Request, call_next): async def dispatch(self, request: Request, call_next):
if not RATE_LIMITING_ENABLED: if not RATE_LIMITING_ENABLED:
......
...@@ -2416,28 +2416,44 @@ def _stage_videos_render(client, video_model, video_dir, fight_plan, outcome_pla ...@@ -2416,28 +2416,44 @@ def _stage_videos_render(client, video_model, video_dir, fight_plan, outcome_pla
video_lora_map, video_slug, _cw) video_lora_map, video_slug, _cw)
+ _env_video_lora_specs_for(env, env_video_lora_map, + _env_video_lora_specs_for(env, env_video_lora_map,
video_slug, _ew)) or None video_slug, _ew)) or None
try: # Rate-limit (429) is transient — the server is just busy — so back off and
mp4 = _run_with_spinner( # RETRY the same render instead of abandoning the clip. Only a genuine
label, client.generate_video_clip, # error (or too many 429s) marks the clip failed.
prompt=prompt, model=video_model, _rl_attempts = 0
character_profiles=profiles, environment_name=env, _RL_MAX = 40
num_frames=nf, fps=fps, seed=random.randint(0, 2**31), while True:
width=_vw, height=_vh, try:
init_image=init_image, loras=loras, mp4 = _run_with_spinner(
poll_fn=client.video_progress, step_cb=step_cb, label, client.generate_video_clip,
) prompt=prompt, model=video_model,
Path(out_path).write_bytes(mp4) character_profiles=profiles, environment_name=env,
return True, (get_video_duration(out_path) or None), False num_frames=nf, fps=fps, seed=random.randint(0, 2**31),
except Exception as e: width=_vw, height=_vh,
if _is_fatal(e): init_image=init_image, loras=loras,
_log(f" ✗ Fatal: {e}") poll_fn=client.video_progress, step_cb=step_cb,
return False, None, True )
err_str = str(e) Path(out_path).write_bytes(mp4)
is_rate_limit = "429" in err_str or "rate limit" in err_str.lower() return True, (get_video_duration(out_path) or None), False
backoff = clip_delay * (4 if is_rate_limit else 2) except Exception as e:
_log(f" ✗ failed: {e} (waiting {backoff:.0f}s)") if _is_fatal(e):
time.sleep(backoff) _log(f" ✗ Fatal: {e}")
return False, None, False return False, None, True
err_str = str(e)
is_rate_limit = "429" in err_str or "rate limit" in err_str.lower()
if is_rate_limit:
_rl_attempts += 1
if _rl_attempts > _RL_MAX:
_log(f" ✗ still rate-limited after {_RL_MAX} retries — giving up on this clip")
return False, None, False
backoff = min(clip_delay * 4, 60)
_log(f" ⏳ rate limited (429) — backing off {backoff:.0f}s and "
f"retrying (attempt {_rl_attempts}/{_RL_MAX})")
time.sleep(backoff)
continue # retry — do NOT fail the clip on a 429
backoff = clip_delay * 2
_log(f" ✗ failed: {e} (waiting {backoff:.0f}s)")
time.sleep(backoff)
return False, None, False
def _render(label, prompt, profiles, env, nf, out_path, stem=None, fighters=None, def _render(label, prompt, profiles, env, nf, out_path, stem=None, fighters=None,
step_cb=None): step_cb=None):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment