Enhance cluster nodes page with uptime, job stats, and master statistics

- Add uptime calculation for cluster nodes and master
- Include active/completed job counts per node and totals for master
- Display cluster master statistics before the nodes list
- Update API response format with master_stats and node-level metrics
- Add uptime formatting and job statistics to frontend
- Update API documentation with new response structure
parent 3f496bf6
...@@ -52,6 +52,39 @@ ...@@ -52,6 +52,39 @@
{% endif %} {% endif %}
{% endwith %} {% endwith %}
<!-- Cluster Master Stats -->
<div class="admin-card" id="masterStats" style="margin-bottom: 2rem;">
<div class="card-header">
<h3><i class="fas fa-server"></i> Cluster Master Statistics</h3>
</div>
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; padding: 1rem;">
<div style="text-align: center;">
<div style="font-size: 2rem; font-weight: bold; color: #667eea;" id="totalNodes">0</div>
<div style="color: #6b7280;">Total Nodes</div>
</div>
<div style="text-align: center;">
<div style="font-size: 2rem; font-weight: bold; color: #10b981;" id="connectedNodes">0</div>
<div style="color: #6b7280;">Connected</div>
</div>
<div style="text-align: center;">
<div style="font-size: 2rem; font-weight: bold; color: #f59e0b;" id="totalWorkers">0</div>
<div style="color: #6b7280;">Total Workers</div>
</div>
<div style="text-align: center;">
<div style="font-size: 2rem; font-weight: bold; color: #ef4444;" id="activeJobs">0</div>
<div style="color: #6b7280;">Active Jobs</div>
</div>
<div style="text-align: center;">
<div style="font-size: 2rem; font-weight: bold; color: #8b5cf6;" id="completedJobs">0</div>
<div style="color: #6b7280;">Completed Jobs</div>
</div>
<div style="text-align: center;">
<div style="font-size: 1.5rem; font-weight: bold; color: #6b7280;" id="masterUptime">00:00:00</div>
<div style="color: #6b7280;">Master Uptime</div>
</div>
</div>
</div>
<table class="table" id="nodesTable"> <table class="table" id="nodesTable">
<thead> <thead>
<tr> <tr>
...@@ -62,6 +95,9 @@ ...@@ -62,6 +95,9 @@
<th>GPU Memory</th> <th>GPU Memory</th>
<th>Workers</th> <th>Workers</th>
<th>IP Address</th> <th>IP Address</th>
<th>Uptime</th>
<th>Active Jobs</th>
<th>Completed Jobs</th>
<th>Actions</th> <th>Actions</th>
</tr> </tr>
</thead> </thead>
...@@ -102,17 +138,34 @@ ...@@ -102,17 +138,34 @@
<script> <script>
let nodesData = []; let nodesData = [];
function formatUptime(seconds) {
const hours = Math.floor(seconds / 3600);
const minutes = Math.floor((seconds % 3600) / 60);
const secs = Math.floor(seconds % 60);
return `${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
}
function updateNodesTable() { function updateNodesTable() {
fetch('/api/admin/cluster_nodes') fetch('/api/admin/cluster_nodes')
.then(response => response.json()) .then(response => response.json())
.then(data => { .then(data => {
// Update master stats
if (data.master_stats) {
document.getElementById('totalNodes').textContent = data.master_stats.total_nodes;
document.getElementById('connectedNodes').textContent = data.master_stats.connected_nodes;
document.getElementById('totalWorkers').textContent = data.master_stats.total_workers;
document.getElementById('activeJobs').textContent = data.master_stats.total_active_jobs;
document.getElementById('completedJobs').textContent = data.master_stats.total_completed_jobs;
document.getElementById('masterUptime').textContent = formatUptime(data.master_stats.uptime_seconds);
}
nodesData = data.nodes || []; nodesData = data.nodes || [];
renderNodesTable(); renderNodesTable();
}) })
.catch(error => { .catch(error => {
console.error('Error fetching nodes:', error); console.error('Error fetching nodes:', error);
document.getElementById('nodesTableBody').innerHTML = document.getElementById('nodesTableBody').innerHTML =
'<tr><td colspan="8" style="text-align: center; color: #dc2626;">Error loading cluster nodes</td></tr>'; '<tr><td colspan="11" style="text-align: center; color: #dc2626;">Error loading cluster nodes</td></tr>';
}); });
} }
...@@ -120,7 +173,7 @@ function renderNodesTable() { ...@@ -120,7 +173,7 @@ function renderNodesTable() {
const tbody = document.getElementById('nodesTableBody'); const tbody = document.getElementById('nodesTableBody');
if (nodesData.length === 0) { if (nodesData.length === 0) {
tbody.innerHTML = '<tr><td colspan="8" style="text-align: center; color: #6b7280;">No cluster nodes found</td></tr>'; tbody.innerHTML = '<tr><td colspan="11" style="text-align: center; color: #6b7280;">No cluster nodes found</td></tr>';
return; return;
} }
...@@ -140,6 +193,9 @@ function renderNodesTable() { ...@@ -140,6 +193,9 @@ function renderNodesTable() {
</td> </td>
<td>${node.workers_available}</td> <td>${node.workers_available}</td>
<td>${node.ip_address}</td> <td>${node.ip_address}</td>
<td>${formatUptime(node.uptime_seconds || 0)}</td>
<td>${node.active_jobs || 0}</td>
<td>${node.completed_jobs || 0}</td>
<td> <td>
<button class="btn btn-sm" onclick="openDriverModal('${node.hostname}', '${node.token}', '${node.hostname}')"> <button class="btn btn-sm" onclick="openDriverModal('${node.hostname}', '${node.token}', '${node.hostname}')">
Set Driver Set Driver
......
...@@ -432,6 +432,14 @@ ...@@ -432,6 +432,14 @@
<div class="response-section"> <div class="response-section">
<h4><i class="fas fa-reply"></i> Response</h4> <h4><i class="fas fa-reply"></i> Response</h4>
<div class="code-block">{ <div class="code-block">{
"master_stats": {
"total_nodes": 3,
"connected_nodes": 2,
"total_workers": 6,
"total_active_jobs": 1,
"total_completed_jobs": 15,
"uptime_seconds": 3600.5
},
"nodes": [ "nodes": [
{ {
"token": "abc123...", "token": "abc123...",
...@@ -443,7 +451,10 @@ ...@@ -443,7 +451,10 @@
"workers_available": 2, "workers_available": 2,
"ip_address": "192.168.1.100", "ip_address": "192.168.1.100",
"connected": true, "connected": true,
"last_seen": 1640995300.0 "last_seen": 1640995300.0,
"uptime_seconds": 1800.5,
"active_jobs": 1,
"completed_jobs": 5
} }
] ]
}</div> }</div>
......
...@@ -44,6 +44,7 @@ class ClusterMaster: ...@@ -44,6 +44,7 @@ class ClusterMaster:
self.processes = {} # type: Dict[str, Dict[str, Any]] self.processes = {} # type: Dict[str, Dict[str, Any]]
self.tokens = {} # type: Dict[str, str] # token -> client_id self.tokens = {} # type: Dict[str, str] # token -> client_id
self.running = False self.running = False
self.start_time = time.time()
# Load balancing # Load balancing
self.process_queue = defaultdict(list) # process_type -> [(client_id, weight), ...] self.process_queue = defaultdict(list) # process_type -> [(client_id, weight), ...]
......
...@@ -389,6 +389,8 @@ def api_cluster_nodes(): ...@@ -389,6 +389,8 @@ def api_cluster_nodes():
current_time = time.time() current_time = time.time()
nodes = [] nodes = []
total_active_jobs = 0
total_completed_jobs = 0
# Get active clients # Get active clients
for client_id, client_info in cluster_master.clients.items(): for client_id, client_info in cluster_master.clients.items():
...@@ -416,6 +418,18 @@ def api_cluster_nodes(): ...@@ -416,6 +418,18 @@ def api_cluster_nodes():
# Get workers available (processes) # Get workers available (processes)
workers_available = len([p for p in cluster_master.processes.values() if p['client_id'] == client_id]) workers_available = len([p for p in cluster_master.processes.values() if p['client_id'] == client_id])
# Calculate uptime
connected_at = client_info.get('connected_at', current_time)
uptime_seconds = current_time - connected_at
# Job statistics (placeholder - would need integration with job queue)
# In a real implementation, track jobs per client
active_jobs = 0 # Placeholder
completed_jobs = 0 # Placeholder
total_active_jobs += active_jobs
total_completed_jobs += completed_jobs
nodes.append({ nodes.append({
'token': token, 'token': token,
'token_name': token_name, 'token_name': token_name,
...@@ -426,7 +440,10 @@ def api_cluster_nodes(): ...@@ -426,7 +440,10 @@ def api_cluster_nodes():
'workers_available': workers_available, 'workers_available': workers_available,
'ip_address': ip_address, 'ip_address': ip_address,
'connected': True, 'connected': True,
'last_seen': client_info.get('last_seen', 0) 'last_seen': client_info.get('last_seen', 0),
'uptime_seconds': uptime_seconds,
'active_jobs': active_jobs,
'completed_jobs': completed_jobs
}) })
# Get recently disconnected clients (last 10 that were connected in last 10 minutes) # Get recently disconnected clients (last 10 that were connected in last 10 minutes)
...@@ -436,7 +453,18 @@ def api_cluster_nodes(): ...@@ -436,7 +453,18 @@ def api_cluster_nodes():
# Sort: active first, then by last_seen desc # Sort: active first, then by last_seen desc
nodes.sort(key=lambda x: (not x['connected'], -x['last_seen'])) nodes.sort(key=lambda x: (not x['connected'], -x['last_seen']))
return {'nodes': nodes} # Cluster master stats
master_uptime = current_time - getattr(cluster_master, 'start_time', current_time)
master_stats = {
'total_nodes': len(nodes),
'connected_nodes': len([n for n in nodes if n['connected']]),
'total_workers': sum(n['workers_available'] for n in nodes),
'total_active_jobs': total_active_jobs,
'total_completed_jobs': total_completed_jobs,
'uptime_seconds': master_uptime
}
return {'master_stats': master_stats, 'nodes': nodes}
@app.route('/api/admin/cluster_nodes/set_driver', methods=['POST']) @app.route('/api/admin/cluster_nodes/set_driver', methods=['POST'])
@admin_required @admin_required
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment