Disable bitsandbytes quantization for Qwen3.5-A3B/MoE models which don't support it

10d10573 · Stefy Lanza (nextime / spora ) · 8665016a · 10d10573
Commit 10d10573 authored Mar 01, 2026 by Stefy Lanza (nextime / spora )
Show whitespace changes
Inline Side-by-side

Showing with 13 additions and 7 deletions

coderai coderai +13 -7

No files found.
--- a/coderai
+++ b/coderai
@@ -665,7 +665,13 @@ class NvidiaBackend(ModelBackend):
        # Prepare model loading arguments
        load_kwargs = {'trust_remote_code': True}
+        # Check if model supports quantization
        if load_in_4bit or load_in_8bit:
+            # Qwen3.5-A3B/MoE models don't support bitsandbytes quantization
+            if 'qwen3.5' in model_name.lower() and ('a3b' in model_name.lower() or 'moe' in model_name.lower()):
+                print(f"Warning: {model_name} does not support bitsandbytes quantization (load_in_4bit/load_in_8bit)")
+                print("Quantization disabled for this model")
+            else:
                try:
                    import bitsandbytes as bnb
                    print(f"Using {4 if load_in_4bit else 8}-bit quantization")