Add client weight display to cluster nodes page

- Add weight column to cluster nodes table showing load balancing weight
- Set default weights: master=0, clients=100
- Update API response to include client weight
- Update frontend to display weight information
- Update API documentation with weight field
parent b48679df
......@@ -91,6 +91,7 @@
<th>Status</th>
<th>Token Name</th>
<th>Hostname</th>
<th>Weight</th>
<th>GPUs</th>
<th>GPU Memory</th>
<th>Workers</th>
......@@ -173,7 +174,7 @@ function renderNodesTable() {
const tbody = document.getElementById('nodesTableBody');
if (nodesData.length === 0) {
tbody.innerHTML = '<tr><td colspan="11" style="text-align: center; color: #6b7280;">No cluster nodes found</td></tr>';
tbody.innerHTML = '<tr><td colspan="12" style="text-align: center; color: #6b7280;">No cluster nodes found</td></tr>';
return;
}
......@@ -186,6 +187,7 @@ function renderNodesTable() {
</td>
<td>${node.token_name}</td>
<td>${node.hostname}</td>
<td>${node.weight || 100}</td>
<td>${node.gpus}</td>
<td>
${node.gpu_memory.length > 0 ? node.gpu_memory.join('<br>') : 'No GPU info'}
......
......@@ -445,6 +445,7 @@
"token": "abc123...",
"token_name": "Worker Node 1",
"hostname": "worker1.example.com",
"weight": 100,
"gpus": 1,
"gpu_memory": ["CUDA Device 0: 8GB VRAM"],
"total_memory": 8,
......
......@@ -36,9 +36,10 @@ from collections import defaultdict
class ClusterMaster:
"""Master server for cluster coordination."""
def __init__(self, port: int = 5003, shared_dir: str = None):
def __init__(self, port: int = 5003, shared_dir: str = None, weight: int = 0):
self.port = port
self.shared_dir = shared_dir
self.weight = weight # Master weight (default 0)
self.server_sock: Optional[socket.socket] = None
self.clients = {} # type: Dict[str, Dict[str, Any]]
self.client_websockets = {} # type: Dict[str, websockets.WebSocketServerProtocol]
......@@ -608,10 +609,11 @@ class ClusterMaster:
cluster_master = ClusterMaster()
def start_cluster_master(port: int = 5003, shared_dir: str = None) -> None:
def start_cluster_master(port: int = 5003, shared_dir: str = None, weight: int = 0) -> None:
"""Start the cluster master server."""
cluster_master.port = port
cluster_master.shared_dir = shared_dir
cluster_master.weight = weight
asyncio.run(cluster_master.start())
......@@ -621,10 +623,11 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(description='VidAI Cluster Master')
parser.add_argument('--port', type=int, default=5003, help='Port to listen on (default: 5003)')
parser.add_argument('--shared-dir', help='Shared directory for file transfers')
parser.add_argument('--weight', type=int, default=0, help='Master weight for load balancing (default: 0)')
args = parser.parse_args()
print(f"Starting VidAI Cluster Master on port {args.port}")
print(f"Starting VidAI Cluster Master on port {args.port} with weight {args.weight}")
if args.shared_dir:
print(f"Using shared directory: {args.shared_dir}")
# Validate shared directory exists
......@@ -632,4 +635,4 @@ if __name__ == "__main__":
print(f"Warning: Shared directory {args.shared_dir} does not exist. Creating it.")
os.makedirs(args.shared_dir, exist_ok=True)
start_cluster_master(args.port, args.shared_dir)
\ No newline at end of file
start_cluster_master(args.port, args.shared_dir, args.weight)
\ No newline at end of file
......@@ -443,7 +443,8 @@ def api_cluster_nodes():
'last_seen': client_info.get('last_seen', 0),
'uptime_seconds': uptime_seconds,
'active_jobs': active_jobs,
'completed_jobs': completed_jobs
'completed_jobs': completed_jobs,
'weight': client_info.get('weight', 100)
})
# Get recently disconnected clients (last 10 that were connected in last 10 minutes)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment