Restrict stats display to admin users and enhance stats information

- Modified analysis page to only show stats sidebar for admin users
- Enhanced /api/stats endpoint to include cluster information for admins
- Added GPU backend detection summary to stats
- Updated JavaScript to display comprehensive system and cluster stats
- Stats now show local resource usage and cluster status for administrators

Note: Full job-specific worker stats (showing resources from the machine executing each specific job) would require additional development to track job-to-worker mappings and implement worker resource reporting.
parent 4f6f914d
...@@ -58,29 +58,42 @@ ...@@ -58,29 +58,42 @@
</style> </style>
<script> <script>
function updateStats() { function updateStats() {
fetch('/stats') fetch('/api/stats')
.then(response => response.json()) .then(response => response.json())
.then(data => { .then(data => {
let html = '<h3>GPU Stats</h3>'; let html = '<h3>System Stats</h3>';
html += `<p style="color: ${data.status === 'Idle' ? 'green' : 'orange'};">Status: ${data.status}</p>`;
if (data.elapsed > 0) { // GPU Information
html += `<p>Elapsed: ${data.elapsed.toFixed(1)}s</p>`; if (data.gpu_info) {
html += '<h4>GPU Information</h4>';
html += `<p>CUDA: ${data.gpu_info.cuda_available ? 'Available' : 'Not available'} (${data.gpu_info.cuda_devices} devices)</p>`;
html += `<p>ROCm: ${data.gpu_info.rocm_available ? 'Available' : 'Not available'} (${data.gpu_info.rocm_devices} devices)</p>`;
html += `<p>Available backends: ${data.gpu_info.available_backends.join(', ') || 'None'}</p>`;
} }
// Local GPU stats
if (data.gpu_count > 0) { if (data.gpu_count > 0) {
html += '<h4>Local GPU Usage</h4>';
data.gpus.forEach((gpu, i) => { data.gpus.forEach((gpu, i) => {
let memPercent = (gpu.memory_used / gpu.memory_total * 100).toFixed(1); let memPercent = (gpu.memory_used / gpu.memory_total * 100).toFixed(1);
html += `<p>GPU ${i}: ${gpu.name}<br>Memory: <progress value="${gpu.memory_used}" max="${gpu.memory_total}"></progress> ${gpu.memory_used.toFixed(2)} / ${gpu.memory_total.toFixed(2)} GB (${memPercent}%)<br>Utilization: ${gpu.utilization}%</p>`; html += `<p>GPU ${i}: ${gpu.name}<br>Memory: <progress value="${gpu.memory_used}" max="${gpu.memory_total}"></progress> ${gpu.memory_used.toFixed(2)} / ${gpu.memory_total.toFixed(2)} GB (${memPercent}%)<br>Utilization: ${gpu.utilization}%</p>`;
}); });
} else {
html += '<p>No GPUs detected</p>';
} }
// CPU and RAM
html += '<h4>Local Resources</h4>';
html += `<p>CPU: ${data.cpu_percent.toFixed(1)}%</p>`; html += `<p>CPU: ${data.cpu_percent.toFixed(1)}%</p>`;
html += `<p>RAM: ${data.ram_used.toFixed(2)} / ${data.ram_total.toFixed(2)} GB</p>`; html += `<p>RAM: ${data.ram_used.toFixed(2)} / ${data.ram_total.toFixed(2)} GB</p>`;
document.getElementById('stats').innerHTML = html;
if (data.result) { // Cluster stats (admin only)
document.getElementById('result_div').innerHTML = '<h3>Result:</h3><p>' + data.result + '</p>'; if (data.cluster_clients !== undefined) {
document.getElementById('result_div').style.display = 'block'; html += '<h4>Cluster Status</h4>';
html += `<p>Connected clients: ${data.cluster_clients}</p>`;
html += `<p>Active processes: ${data.active_processes || 0}</p>`;
html += `<p>GPU-enabled clients: ${data.gpu_clients || 0}</p>`;
} }
document.getElementById('stats').innerHTML = html;
}) })
.catch(e => { .catch(e => {
document.getElementById('stats').innerHTML = '<p>Error loading stats</p>'; document.getElementById('stats').innerHTML = '<p>Error loading stats</p>';
...@@ -246,9 +259,11 @@ ...@@ -246,9 +259,11 @@
{% endif %} {% endif %}
</div> </div>
{% if user.get('role') == 'admin' %}
<div class="sidebar"> <div class="sidebar">
<div id="stats" class="stats">Loading stats...</div> <div id="stats" class="stats">Loading stats...</div>
</div> </div>
{% endif %}
</div> </div>
<!-- File Browser Modal --> <!-- File Browser Modal -->
......
...@@ -80,9 +80,23 @@ def api_stats(): ...@@ -80,9 +80,23 @@ def api_stats():
import torch import torch
import time import time
data = {'status': 'Idle'} # Simplified - in real implementation, get from backend user = request.api_user
data = {'status': 'Idle'}
# For admin users, show more detailed stats including cluster information
if user.get('role') == 'admin':
# Check if we have cluster master information
try:
from .cluster_master import cluster_master
# Add cluster stats if available
data['cluster_clients'] = len(cluster_master.clients)
data['active_processes'] = len(cluster_master.processes)
data['gpu_clients'] = sum(1 for c in cluster_master.clients.values()
if c['gpu_info'].get('cuda_available') or c['gpu_info'].get('rocm_available'))
except:
pass
# GPU stats # GPU stats (local machine)
if torch.cuda.is_available(): if torch.cuda.is_available():
data['gpu_count'] = torch.cuda.device_count() data['gpu_count'] = torch.cuda.device_count()
data['gpus'] = [] data['gpus'] = []
...@@ -91,18 +105,31 @@ def api_stats(): ...@@ -91,18 +105,31 @@ def api_stats():
'name': torch.cuda.get_device_name(i), 'name': torch.cuda.get_device_name(i),
'memory_used': torch.cuda.memory_allocated(i) / 1024**3, # GB 'memory_used': torch.cuda.memory_allocated(i) / 1024**3, # GB
'memory_total': torch.cuda.get_device_properties(i).total_memory / 1024**3, 'memory_total': torch.cuda.get_device_properties(i).total_memory / 1024**3,
'utilization': 0 # Would need pynvml for actual utilization 'utilization': 0, # Would need pynvml for actual utilization
'backend': 'cuda'
} }
data['gpus'].append(gpu) data['gpus'].append(gpu)
else: else:
data['gpu_count'] = 0 data['gpu_count'] = 0
data['gpus'] = []
# CPU and RAM # CPU and RAM (local machine)
data['cpu_percent'] = psutil.cpu_percent() data['cpu_percent'] = psutil.cpu_percent()
ram = psutil.virtual_memory() ram = psutil.virtual_memory()
data['ram_used'] = ram.used / 1024**3 data['ram_used'] = ram.used / 1024**3
data['ram_total'] = ram.total / 1024**3 data['ram_total'] = ram.total / 1024**3
# Add GPU info summary
from .compat import detect_gpu_backends
gpu_info = detect_gpu_backends()
data['gpu_info'] = {
'cuda_available': gpu_info['cuda'],
'rocm_available': gpu_info['rocm'],
'cuda_devices': gpu_info['cuda_devices'],
'rocm_devices': gpu_info['rocm_devices'],
'available_backends': [k for k, v in gpu_info.items() if k.endswith('_available') and v]
}
return json.dumps(data) return json.dumps(data)
@api_bp.route('/api/analyze', methods=['POST']) @api_bp.route('/api/analyze', methods=['POST'])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment