Fix GPU detection in cluster master to use correct keys from gpu_info

parent f065a2ae
......@@ -373,7 +373,7 @@ class ClusterMaster:
# Check if client has GPU
gpu_info = client_info.get('gpu_info', {})
has_gpu = gpu_info.get('cuda_available', False) or gpu_info.get('rocm_available', False)
has_gpu = gpu_info.get('cuda', False) or gpu_info.get('rocm', False)
if not has_gpu:
continue # Skip CPU-only clients for now (could be extended later)
......@@ -765,7 +765,7 @@ class ClusterMaster:
for proc_key, proc_weight in queue:
client_id = self.processes[proc_key]['client_id']
client_gpu_info = self.clients[client_id]['gpu_info']
has_gpu = client_gpu_info.get('cuda_available', False) or client_gpu_info.get('rocm_available', False)
has_gpu = client_gpu_info.get('cuda', False) or client_gpu_info.get('rocm', False)
client_weight = self.clients[client_id]['weight']
combined_weight = client_weight * proc_weight
......@@ -816,7 +816,7 @@ class ClusterMaster:
has_gpu = False
# Check for CUDA
if gpu_info.get('cuda_available') or gpu_info.get('cuda_devices', 0) > 0:
if gpu_info.get('cuda', False) or gpu_info.get('cuda_devices', 0) > 0:
has_gpu = True
cuda_devices = gpu_info.get('cuda_device_info', [])
if cuda_devices:
......@@ -828,7 +828,7 @@ class ClusterMaster:
print(f"DEBUG: CUDA detected, total VRAM: {total_vram}GB")
# Check for ROCm
if gpu_info.get('rocm_available') or gpu_info.get('rocm_devices', 0) > 0:
if gpu_info.get('rocm', False) or gpu_info.get('rocm_devices', 0) > 0:
has_gpu = True
rocm_devices = gpu_info.get('rocm_device_info', [])
if rocm_devices:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment