Add debug logging to GPU detection

- Added debug output to see what CUDA device names are detected
- Will help identify why AMD GPU is still being counted as CUDA device
- Debug output shows device names and functional test results
- User can now see what devices PyTorch is detecting
parent 056cbbf3
......@@ -267,8 +267,11 @@ def detect_gpu_backends() -> dict:
try:
# Test if device is actually functional by trying a simple operation
device_name = torch.cuda.get_device_name(i).lower()
print(f"CUDA Device {i}: {device_name}") # Debug output
# Only consider NVIDIA GPUs
if not any(keyword in device_name for keyword in ['nvidia', 'geforce', 'quadro', 'tesla', 'rtx', 'gtx']):
print(f"Skipping non-NVIDIA device {i}: {device_name}")
continue
# Test device functionality
......@@ -277,12 +280,15 @@ def detect_gpu_backends() -> dict:
test_tensor = torch.tensor([1.0], device=f'cuda:{i}')
test_result = test_tensor + 1 # Simple operation
del test_tensor, test_result
print(f"CUDA Device {i} is functional: {device_name}")
working_cuda_devices += 1
except Exception:
except Exception as e:
print(f"CUDA Device {i} failed functional test: {device_name} - {e}")
# Device not functional, skip it
continue
except Exception:
except Exception as e:
print(f"Error checking CUDA device {i}: {e}")
continue
if working_cuda_devices > 0:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment