Add sequential offload strategy with fine-grained 2% VRAM incremental steps

parent d9a5d274
......@@ -610,6 +610,12 @@ class NvidiaBackend(ModelBackend):
if is_moe:
return [0.85, 0.80, 0.75, 0.70, 0.65, 0.60, 0.50, 0.40, 0.30, 0.20, 0.0]
return [0.95, 0.90, 0.85, 0.80, 0.75, 0.70, 0.65, 0.50, 0.40, 0.30, 0.20, 0.0]
elif strategy == "sequential":
print(f" Using sequential offload strategy - fine-grained incremental VRAM reduction")
# Fine-grained steps with 2% increments for precise memory management
if is_moe:
return [0.80, 0.78, 0.76, 0.74, 0.72, 0.70, 0.68, 0.66, 0.64, 0.62, 0.60, 0.55, 0.50, 0.45, 0.40, 0.35, 0.30, 0.25, 0.20, 0.0]
return [0.93, 0.91, 0.89, 0.87, 0.85, 0.83, 0.81, 0.79, 0.77, 0.75, 0.73, 0.71, 0.69, 0.67, 0.65, 0.60, 0.55, 0.50, 0.45, 0.40, 0.35, 0.30, 0.20, 0.0]
else: # auto
if total_vram_gb < 3:
print(f" Detected small GPU ({total_vram_gb:.1f}GB), using aggressive VRAM usage (99% start)")
......@@ -2036,7 +2042,7 @@ def parse_args():
parser.add_argument(
"--offload-strategy",
type=str,
choices=["auto", "conservative", "balanced", "aggressive"],
choices=["auto", "conservative", "balanced", "aggressive", "sequential"],
default="auto",
help="Offload strategy for NVIDIA backend (default: auto)",
)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment