Re-queue jobs when no workers are available instead of failing

parent 5f82047c
...@@ -1022,7 +1022,9 @@ class ClusterMaster: ...@@ -1022,7 +1022,9 @@ class ClusterMaster:
else: else:
print(f"Failed to assign job {job['id']} to worker {worker_key}") print(f"Failed to assign job {job['id']} to worker {worker_key}")
else: else:
print(f"No suitable worker found for job {job['id']}") print(f"No suitable worker found for job {job['id']}, re-queuing")
from .database import update_queue_status
update_queue_status(job['id'], 'queued', error='No suitable worker found, re-queued')
await asyncio.sleep(5) # Poll every 5 seconds await asyncio.sleep(5) # Poll every 5 seconds
......
...@@ -262,8 +262,8 @@ class QueueManager: ...@@ -262,8 +262,8 @@ class QueueManager:
response = handle_web_message(message) response = handle_web_message(message)
if response and response.msg_type == 'error': if response and response.msg_type == 'error':
# Immediate error # Immediate error - re-queue the job instead of failing
update_queue_status(job['id'], 'failed', error=response.data.get('error', 'Backend error')) update_queue_status(job['id'], 'queued', error=response.data.get('error', 'No workers available, re-queued'))
return return
# Poll for result (backend handles async processing) # Poll for result (backend handles async processing)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment