Change NVIDIA backend VRAM limit from 99.9% to 93% to leave more headroom for CUDA overhead

320ca0e7 · Stefy Lanza (nextime / spora ) · 2ca7368f · 320ca0e7
Commit 320ca0e7 authored Mar 01, 2026 by Stefy Lanza (nextime / spora )
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 6 deletions

coderai coderai +6 -6

No files found.
--- a/coderai
+++ b/coderai
@@ -541,17 +541,17 @@ class NvidiaBackend(ModelBackend):
            return None
    def _get_gpu_memory_map(self) -> Dict:
-        """Get max_memory dict for Accelerate with 99.9% GPU limit, then CPU, then disk."""
+        """Get max_memory dict for Accelerate with 93% GPU limit, then CPU, then disk."""
        import torch
        max_memory = {}
-        # GPU memory: 99.9% of available VRAM per GPU
+        # GPU memory: 93% of available VRAM per GPU
        if torch.cuda.is_available():
            for i in range(torch.cuda.device_count()):
                props = torch.cuda.get_device_properties(i)
                total_vram = props.total_memory
-                # Leave 0.1% headroom for CUDA overhead
+                # Leave 7% headroom for CUDA overhead (changed from 0.1% to 7%)
-                usable_vram = int(total_vram * 0.999)
+                usable_vram = int(total_vram * 0.93)
                max_memory[i] = usable_vram
                print(f"  GPU {i}: {total_vram / 1e9:.1f}GB total, {usable_vram / 1e9:.1f}GB usable")
@@ -606,12 +606,12 @@ class NvidiaBackend(ModelBackend):
        # Prepare model loading arguments
        load_kwargs = {'trust_remote_code': True}
-        # Setup memory management: GPU (95%) → CPU (limit) → Disk
+        # Setup memory management: GPU (93%) → CPU (limit) → Disk
        if self.device == "cuda":
            max_memory = self._get_gpu_memory_map()
            load_kwargs['max_memory'] = max_memory
            load_kwargs['device_map'] = 'auto'
-            print(f"  Memory strategy: GPU (99.9% VRAM) → CPU → Disk")
+            print(f"  Memory strategy: GPU (93% VRAM) → CPU → Disk")
        else:
            # CPU-only mode
            load_kwargs['device_map'] = None