Add sequential offload strategy with fine-grained 2% VRAM incremental steps

e23c3f7f · Stefy Lanza (nextime / spora ) · d9a5d274 · e23c3f7f
Commit e23c3f7f authored Mar 01, 2026 by Stefy Lanza (nextime / spora )
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 1 deletion

coderai coderai +7 -1

No files found.
--- a/coderai
+++ b/coderai
@@ -610,6 +610,12 @@ class NvidiaBackend(ModelBackend):
            if is_moe:
                return [0.85, 0.80, 0.75, 0.70, 0.65, 0.60, 0.50, 0.40, 0.30, 0.20, 0.0]
            return [0.95, 0.90, 0.85, 0.80, 0.75, 0.70, 0.65, 0.50, 0.40, 0.30, 0.20, 0.0]
+        elif strategy == "sequential":
+            print(f"  Using sequential offload strategy - fine-grained incremental VRAM reduction")
+            # Fine-grained steps with 2% increments for precise memory management
+            if is_moe:
+                return [0.80, 0.78, 0.76, 0.74, 0.72, 0.70, 0.68, 0.66, 0.64, 0.62, 0.60, 0.55, 0.50, 0.45, 0.40, 0.35, 0.30, 0.25, 0.20, 0.0]
+            return [0.93, 0.91, 0.89, 0.87, 0.85, 0.83, 0.81, 0.79, 0.77, 0.75, 0.73, 0.71, 0.69, 0.67, 0.65, 0.60, 0.55, 0.50, 0.45, 0.40, 0.35, 0.30, 0.20, 0.0]
        else:  # auto
            if total_vram_gb < 3:
                print(f"  Detected small GPU ({total_vram_gb:.1f}GB), using aggressive VRAM usage (99% start)")
@@ -2036,7 +2042,7 @@ def parse_args():
    parser.add_argument(
        "--offload-strategy",
        type=str,
-        choices=["auto", "conservative", "balanced", "aggressive"],
+        choices=["auto", "conservative", "balanced", "aggressive", "sequential"],
        default="auto",
        help="Offload strategy for NVIDIA backend (default: auto)",
    )