Fix cluster client uptime calculation

- Add connected_at timestamp to track when client first connected
- Calculate uptime from connection time instead of last seen time
- Update database schema and API to use proper uptime tracking
parent 1da6025d
...@@ -373,6 +373,7 @@ def init_db(conn) -> None: ...@@ -373,6 +373,7 @@ def init_db(conn) -> None:
gpu_info TEXT, gpu_info TEXT,
available_backends TEXT, available_backends TEXT,
connected BOOLEAN DEFAULT 1, connected BOOLEAN DEFAULT 1,
connected_at TIMESTAMP NULL,
last_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, last_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
...@@ -389,6 +390,7 @@ def init_db(conn) -> None: ...@@ -389,6 +390,7 @@ def init_db(conn) -> None:
gpu_info TEXT, gpu_info TEXT,
available_backends TEXT, available_backends TEXT,
connected BOOLEAN DEFAULT 1, connected BOOLEAN DEFAULT 1,
connected_at TIMESTAMP,
last_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP, last_seen TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
) )
...@@ -1641,8 +1643,8 @@ def save_cluster_client(client_id: str, token: str, hostname: str, ip_address: s ...@@ -1641,8 +1643,8 @@ def save_cluster_client(client_id: str, token: str, hostname: str, ip_address: s
config = get_db_config() config = get_db_config()
if config['type'] == 'mysql': if config['type'] == 'mysql':
cursor.execute(''' cursor.execute('''
INSERT INTO cluster_clients (client_id, token, hostname, ip_address, weight, gpu_info, available_backends, connected, last_seen) INSERT INTO cluster_clients (client_id, token, hostname, ip_address, weight, gpu_info, available_backends, connected, connected_at, last_seen)
VALUES (?, ?, ?, ?, ?, ?, ?, 1, CURRENT_TIMESTAMP) VALUES (?, ?, ?, ?, ?, ?, ?, 1, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
ON DUPLICATE KEY UPDATE ON DUPLICATE KEY UPDATE
hostname = VALUES(hostname), hostname = VALUES(hostname),
ip_address = VALUES(ip_address), ip_address = VALUES(ip_address),
...@@ -1655,8 +1657,8 @@ def save_cluster_client(client_id: str, token: str, hostname: str, ip_address: s ...@@ -1655,8 +1657,8 @@ def save_cluster_client(client_id: str, token: str, hostname: str, ip_address: s
else: else:
cursor.execute(''' cursor.execute('''
INSERT OR REPLACE INTO cluster_clients INSERT OR REPLACE INTO cluster_clients
(client_id, token, hostname, ip_address, weight, gpu_info, available_backends, connected, last_seen) (client_id, token, hostname, ip_address, weight, gpu_info, available_backends, connected, connected_at, last_seen)
VALUES (?, ?, ?, ?, ?, ?, ?, 1, CURRENT_TIMESTAMP) VALUES (?, ?, ?, ?, ?, ?, ?, 1, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
''', (client_id, token, hostname, ip_address, weight, gpu_info_json, available_backends_json)) ''', (client_id, token, hostname, ip_address, weight, gpu_info_json, available_backends_json))
conn.commit() conn.commit()
......
...@@ -440,14 +440,14 @@ def api_cluster_nodes(): ...@@ -440,14 +440,14 @@ def api_cluster_nodes():
gpu_memory.extend([f"ROCm Device {i}: 16GB VRAM" for i in range(rocm_devices)]) gpu_memory.extend([f"ROCm Device {i}: 16GB VRAM" for i in range(rocm_devices)])
total_memory += rocm_devices * 16 total_memory += rocm_devices * 16
# Calculate uptime from last_seen # Calculate uptime from connected_at (when client first connected)
last_seen = client.get('last_seen') connected_at = client.get('connected_at')
if last_seen: if connected_at:
if isinstance(last_seen, str): if isinstance(connected_at, str):
# Parse timestamp string # Parse timestamp string
import datetime import datetime
last_seen = datetime.datetime.fromisoformat(last_seen.replace('Z', '+00:00')).timestamp() connected_at = datetime.datetime.fromisoformat(connected_at.replace('Z', '+00:00')).timestamp()
uptime_seconds = current_time - last_seen uptime_seconds = current_time - connected_at
else: else:
uptime_seconds = 0 uptime_seconds = 0
...@@ -470,7 +470,7 @@ def api_cluster_nodes(): ...@@ -470,7 +470,7 @@ def api_cluster_nodes():
'total_memory': total_memory, 'total_memory': total_memory,
'ip_address': client.get('ip_address', '127.0.0.1'), 'ip_address': client.get('ip_address', '127.0.0.1'),
'connected': client.get('connected', True), 'connected': client.get('connected', True),
'last_seen': last_seen or 0, 'last_seen': client.get('last_seen'),
'uptime_seconds': uptime_seconds, 'uptime_seconds': uptime_seconds,
'active_jobs': 0, # Placeholder 'active_jobs': 0, # Placeholder
'completed_jobs': 0, # Placeholder 'completed_jobs': 0, # Placeholder
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment