Fix cluster client GPU detection - extract backends from capabilities

- Fix cluster master to properly detect GPU backends from client capabilities
- Extract available_backends from capabilities list instead of gpu_info
- Ensure clients with GPU workers are correctly identified as GPU-enabled
parent 55422911
...@@ -204,12 +204,15 @@ class ClusterMaster: ...@@ -204,12 +204,15 @@ class ClusterMaster:
# Generate client ID from token # Generate client ID from token
client_id = hashlib.sha256(token.encode()).hexdigest()[:16] client_id = hashlib.sha256(token.encode()).hexdigest()[:16]
# Check GPU capabilities for logging # Extract available backends from capabilities
gpu_info = client_info.get('gpu_info', {}) capabilities = client_info.get('capabilities', [])
available_backends = gpu_info.get('available_backends', []) available_backends = list(set(cap.split('_')[1] for cap in capabilities if '_' in cap and len(cap.split('_')) > 1))
gpu_backends = [b for b in available_backends if b in ['cuda', 'rocm']] gpu_backends = [b for b in available_backends if b in ['cuda', 'rocm']]
has_gpu = len(gpu_backends) > 0 has_gpu = len(gpu_backends) > 0
# Get GPU info for storage
gpu_info = client_info.get('gpu_info', {})
# Get hostname and IP # Get hostname and IP
hostname = client_info.get('hostname', 'unknown') hostname = client_info.get('hostname', 'unknown')
# Try to get real IP address from websocket # Try to get real IP address from websocket
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment