Commit c8f7c8d9 authored by Your Name's avatar Your Name

Add --image-cpu-offload option and fix sequential offload logic

- Add --image-cpu-offload CLI flag for explicit sequential CPU offload
- Enable sequential CPU offload only on 3rd OOM retry or when --image-cpu-offload is set
parent ac005426
......@@ -3754,10 +3754,8 @@ async def create_image_generation(request: ImageGenerationRequest, http_request:
torch_dtype = precision_map.get(precision, torch.float32)
print(f"Using precision: {precision} ({torch_dtype})")
# Check if offload strategy is specified (for auto-OOM handling)
offload_strategy = getattr(global_args, 'offload_strategy', None)
offload_dir = getattr(global_args, 'offload_dir', None)
use_sequential_offload = offload_strategy is not None or offload_dir is not None
# Check if CPU offload is requested via CLI
use_sequential_offload = getattr(global_args, 'image_cpu_offload', False)
# Track loading attempts for OOM handling
load_attempt = 0
......@@ -5151,6 +5149,11 @@ def parse_args():
choices=["bf16", "f32", "f16", "f8"],
help="Model precision for image generation (default: f32). bf16 recommended for modern GPUs.",
)
parser.add_argument(
"--image-cpu-offload",
action="store_true",
help="Enable sequential CPU offload for image models (lower VRAM usage).",
)
parser.add_argument(
"--image-seed",
type=int,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment