Add --image-cpu-offload option and fix sequential offload logic

- Add --image-cpu-offload CLI flag for explicit sequential CPU offload - Enable sequential CPU offload only on 3rd OOM retry or when --image-cpu-offload is set

Add --image-cpu-offload option and fix sequential offload logic
- Add --image-cpu-offload CLI flag for explicit sequential CPU offload - Enable sequential CPU offload only on 3rd OOM retry or when --image-cpu-offload is set
c8f7c8d9 · Your Name · ac005426 · c8f7c8d9
Commit c8f7c8d9 authored Mar 15, 2026 by Your Name
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 4 deletions

coderai coderai +7 -4

No files found.
--- a/coderai
+++ b/coderai
@@ -3754,10 +3754,8 @@ async def create_image_generation(request: ImageGenerationRequest, http_request:
            torch_dtype = precision_map.get(precision, torch.float32)
            print(f"Using precision: {precision} ({torch_dtype})")
            
-            # Check if offload strategy is specified (for auto-OOM handling)
-            offload_strategy = getattr(global_args, 'offload_strategy', None)
-            offload_dir = getattr(global_args, 'offload_dir', None)
-            use_sequential_offload = offload_strategy is not None or offload_dir is not None
+            # Check if CPU offload is requested via CLI
+            use_sequential_offload = getattr(global_args, 'image_cpu_offload', False)
            
            # Track loading attempts for OOM handling
            load_attempt = 0
@@ -5151,6 +5149,11 @@ def parse_args():
        choices=["bf16", "f32", "f16", "f8"],
        help="Model precision for image generation (default: f32). bf16 recommended for modern GPUs.",
    )
+    parser.add_argument(
+        "--image-cpu-offload",
+        action="store_true",
+        help="Enable sequential CPU offload for image models (lower VRAM usage).",
+    )
    parser.add_argument(
        "--image-seed",
        type=int,