Disable bitsandbytes quantization for Qwen3.5-A3B/MoE models which don't support it

parent 8665016a
...@@ -665,7 +665,13 @@ class NvidiaBackend(ModelBackend): ...@@ -665,7 +665,13 @@ class NvidiaBackend(ModelBackend):
# Prepare model loading arguments # Prepare model loading arguments
load_kwargs = {'trust_remote_code': True} load_kwargs = {'trust_remote_code': True}
# Check if model supports quantization
if load_in_4bit or load_in_8bit: if load_in_4bit or load_in_8bit:
# Qwen3.5-A3B/MoE models don't support bitsandbytes quantization
if 'qwen3.5' in model_name.lower() and ('a3b' in model_name.lower() or 'moe' in model_name.lower()):
print(f"Warning: {model_name} does not support bitsandbytes quantization (load_in_4bit/load_in_8bit)")
print("Quantization disabled for this model")
else:
try: try:
import bitsandbytes as bnb import bitsandbytes as bnb
print(f"Using {4 if load_in_4bit else 8}-bit quantization") print(f"Using {4 if load_in_4bit else 8}-bit quantization")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment