Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in
Toggle navigation
V
vidai
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
SexHackMe
vidai
Commits
ee3095c3
Commit
ee3095c3
authored
Oct 08, 2025
by
Stefy Lanza (nextime / spora )
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Store cluster processes in database for queue to check available workers
parent
fd1c1545
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
55 additions
and
15 deletions
+55
-15
backend.py
vidai/backend.py
+0
-9
cluster_master.py
vidai/cluster_master.py
+17
-1
database.py
vidai/database.py
+26
-0
queue.py
vidai/queue.py
+12
-5
No files found.
vidai/backend.py
View file @
ee3095c3
...
@@ -23,8 +23,6 @@ import time
...
@@ -23,8 +23,6 @@ import time
import
threading
import
threading
from
.comm
import
SocketServer
,
Message
from
.comm
import
SocketServer
,
Message
from
.config
import
get_analysis_backend
,
get_training_backend
,
set_analysis_backend
,
set_training_backend
from
.config
import
get_analysis_backend
,
get_training_backend
,
set_analysis_backend
,
set_training_backend
from
.cluster_master
import
start_cluster_master
import
threading
worker_sockets
=
{}
# type: dict
worker_sockets
=
{}
# type: dict
...
@@ -131,13 +129,6 @@ def backend_process() -> None:
...
@@ -131,13 +129,6 @@ def backend_process() -> None:
worker_server
=
SocketServer
(
socket_path
=
worker_socket_path
,
comm_type
=
'unix'
)
worker_server
=
SocketServer
(
socket_path
=
worker_socket_path
,
comm_type
=
'unix'
)
worker_server
.
start
(
worker_message_handler
)
worker_server
.
start
(
worker_message_handler
)
# Start cluster master in background thread
from
.config
import
get_cluster_host
,
get_cluster_port
cluster_host
=
get_cluster_host
()
or
'0.0.0.0'
cluster_port
=
get_cluster_port
()
cluster_thread
=
threading
.
Thread
(
target
=
start_cluster_master
,
args
=
(
cluster_host
,
cluster_port
,
None
,
None
,
False
),
daemon
=
True
)
cluster_thread
.
start
()
try
:
try
:
while
True
:
while
True
:
time
.
sleep
(
1
)
time
.
sleep
(
1
)
...
...
vidai/cluster_master.py
View file @
ee3095c3
...
@@ -290,6 +290,15 @@ class ClusterMaster:
...
@@ -290,6 +290,15 @@ class ClusterMaster:
# Sort by weight (highest first)
# Sort by weight (highest first)
self
.
process_queue
[
proc_type
]
.
sort
(
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
self
.
process_queue
[
proc_type
]
.
sort
(
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
# Save to database
from
.database
import
get_db_connection
conn
=
get_db_connection
()
cursor
=
conn
.
cursor
()
cursor
.
execute
(
'INSERT INTO cluster_processes (client_id, process_name, weight, status) VALUES (?, ?, ?, ?)'
,
(
client_id
,
proc_name
,
proc_info
.
get
(
'weight'
,
10
),
proc_info
.
get
(
'status'
,
'active'
)))
conn
.
commit
()
conn
.
close
()
print
(
f
"Client {client_id} registered {len(processes)} processes"
)
print
(
f
"Client {client_id} registered {len(processes)} processes"
)
return
{
'type'
:
'registration_success'
}
return
{
'type'
:
'registration_success'
}
...
@@ -302,11 +311,18 @@ class ClusterMaster:
...
@@ -302,11 +311,18 @@ class ClusterMaster:
def
_remove_client
(
self
,
client_id
:
str
)
->
None
:
def
_remove_client
(
self
,
client_id
:
str
)
->
None
:
"""Remove a client and its processes."""
"""Remove a client and its processes."""
from
.database
import
disconnect_cluster_client
from
.database
import
disconnect_cluster_client
,
get_db_connection
# Mark as disconnected in database
# Mark as disconnected in database
disconnect_cluster_client
(
client_id
)
disconnect_cluster_client
(
client_id
)
# Delete processes from database
conn
=
get_db_connection
()
cursor
=
conn
.
cursor
()
cursor
.
execute
(
'DELETE FROM cluster_processes WHERE client_id = ?'
,
(
client_id
,))
conn
.
commit
()
conn
.
close
()
if
client_id
in
self
.
client_websockets
:
if
client_id
in
self
.
client_websockets
:
del
self
.
client_websockets
[
client_id
]
del
self
.
client_websockets
[
client_id
]
...
...
vidai/database.py
View file @
ee3095c3
...
@@ -628,6 +628,32 @@ def init_db(conn) -> None:
...
@@ -628,6 +628,32 @@ def init_db(conn) -> None:
# Column might already exist
# Column might already exist
pass
pass
# Cluster processes table
if
config
[
'type'
]
==
'mysql'
:
cursor
.
execute
(
'''
CREATE TABLE IF NOT EXISTS cluster_processes (
id INT AUTO_INCREMENT PRIMARY KEY,
client_id VARCHAR(32) NOT NULL,
process_name VARCHAR(255) NOT NULL,
weight INT DEFAULT 10,
status VARCHAR(20) DEFAULT 'active',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (client_id) REFERENCES cluster_clients (client_id) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
'''
)
else
:
cursor
.
execute
(
'''
CREATE TABLE IF NOT EXISTS cluster_processes (
id INTEGER PRIMARY KEY,
client_id TEXT NOT NULL,
process_name TEXT NOT NULL,
weight INTEGER DEFAULT 10,
status TEXT DEFAULT 'active',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (client_id) REFERENCES cluster_clients (client_id)
)
'''
)
# Insert default admin user if not exist
# Insert default admin user if not exist
import
hashlib
import
hashlib
default_password
=
hashlib
.
sha256
(
'admin'
.
encode
())
.
hexdigest
()
default_password
=
hashlib
.
sha256
(
'admin'
.
encode
())
.
hexdigest
()
...
...
vidai/queue.py
View file @
ee3095c3
...
@@ -90,7 +90,6 @@ class QueueManager:
...
@@ -90,7 +90,6 @@ class QueueManager:
def
_can_start_job
(
self
,
job
:
Dict
[
str
,
Any
])
->
bool
:
def
_can_start_job
(
self
,
job
:
Dict
[
str
,
Any
])
->
bool
:
"""Check if a job can be started (worker available)."""
"""Check if a job can be started (worker available)."""
from
.cluster_master
import
cluster_master
from
.config
import
get_analysis_backend
,
get_training_backend
from
.config
import
get_analysis_backend
,
get_training_backend
from
.backend
import
worker_sockets
from
.backend
import
worker_sockets
...
@@ -101,11 +100,9 @@ class QueueManager:
...
@@ -101,11 +100,9 @@ class QueueManager:
process_type
=
'training'
process_type
=
'training'
else
:
else
:
process_type
=
request_type
process_type
=
request_type
model_path
=
job
[
'data'
]
.
get
(
'model_path'
,
'Qwen/Qwen2.5-VL-7B-Instruct'
)
# Check for distributed worker
# Check for distributed worker via TCP query
worker_key
=
cluster_master
.
select_worker_for_job
(
process_type
,
model_path
,
job
[
'data'
])
if
self
.
_has_distributed_worker
(
process_type
):
if
worker_key
:
return
True
return
True
# Check for local worker
# Check for local worker
...
@@ -123,6 +120,16 @@ class QueueManager:
...
@@ -123,6 +120,16 @@ class QueueManager:
return
False
return
False
def
_has_distributed_worker
(
self
,
process_type
:
str
)
->
bool
:
"""Check if there is a distributed worker available for the process type."""
from
.database
import
get_db_connection
conn
=
get_db_connection
()
cursor
=
conn
.
cursor
()
cursor
.
execute
(
'SELECT id FROM cluster_processes WHERE process_name LIKE ? AND status = ?'
,
(
f
'{process_type}
%
'
,
'active'
))
result
=
cursor
.
fetchone
()
conn
.
close
()
return
result
is
not
None
def
_process_queue
(
self
)
->
None
:
def
_process_queue
(
self
)
->
None
:
"""Background thread to process queued jobs."""
"""Background thread to process queued jobs."""
while
self
.
running
:
while
self
.
running
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment