Fix cluster status GPU detection to use available_backends instead of gpu_info...

Fix cluster status GPU detection to use available_backends instead of gpu_info for more reliable GPU client identification
parent 6d5c742f
...@@ -85,25 +85,45 @@ def api_stats(): ...@@ -85,25 +85,45 @@ def api_stats():
# For admin users, show more detailed stats including cluster information # For admin users, show more detailed stats including cluster information
if user.get('role') == 'admin': if user.get('role') == 'admin':
# Check if we have cluster master information # Get cluster stats from database
try: try:
from .cluster_master import cluster_master from .database import get_connected_cluster_clients
# Add cluster stats if available connected_clients = get_connected_cluster_clients()
data['cluster_clients'] = len(cluster_master.clients) data['cluster_clients'] = len(connected_clients)
data['active_processes'] = len(cluster_master.processes) data['active_processes'] = 0 # TODO: Implement process tracking in database
data['gpu_clients'] = sum(1 for c in cluster_master.clients.values() # Count GPU clients based on available backends (more reliable than gpu_info)
if c['gpu_info'].get('cuda_available') or c['gpu_info'].get('rocm_available')) data['gpu_clients'] = sum(1 for c in connected_clients
if any(b in ['cuda', 'rocm'] for b in c.get('available_backends', [])))
# Add connected nodes details # Add connected nodes details
data['connected_nodes'] = [] data['connected_nodes'] = []
for client_id, client_info in cluster_master.clients.items(): import time
for client in connected_clients:
# Convert last_seen timestamp to Unix timestamp
last_seen_ts = 0
if client.get('last_seen'):
try:
# Parse the timestamp string and convert to Unix timestamp
from datetime import datetime
if isinstance(client['last_seen'], str):
# Assume ISO format like '2023-10-08T12:34:56.789Z'
dt = datetime.fromisoformat(client['last_seen'].replace('Z', '+00:00'))
last_seen_ts = dt.timestamp()
else:
last_seen_ts = time.mktime(client['last_seen'].timetuple())
except:
last_seen_ts = 0
# Determine GPU availability from available_backends (more reliable)
gpu_available = any(b in ['cuda', 'rocm'] for b in client.get('available_backends', []))
data['connected_nodes'].append({ data['connected_nodes'].append({
'client_id': client_id, 'client_id': client['client_id'],
'hostname': client_info.get('hostname', 'unknown'), 'hostname': client.get('hostname', 'unknown'),
'ip_address': client_info.get('ip_address', 'unknown'), 'ip_address': client.get('ip_address', 'unknown'),
'weight': client_info.get('weight', 0), 'weight': client.get('weight', 0),
'gpu_available': client_info['gpu_info'].get('cuda_available') or client_info['gpu_info'].get('rocm_available'), 'gpu_available': gpu_available,
'backends': client_info.get('available_backends', []), 'backends': client.get('available_backends', []),
'last_seen': client_info.get('last_seen', 0) 'last_seen': last_seen_ts
}) })
except: except:
pass pass
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment