Commit 81c39eb8 authored by Your Name's avatar Your Name

Refactor: Move backend and manager classes to codai modules

- Move NvidiaBackend to codai/backends/cuda.py
- Move VulkanBackend to codai/backends/vulkan.py
- Move ModelManager, WhisperServerManager, MultiModelManager to codai/models/manager.py
- Move QueueManager to codai/queue/manager.py
- Add proper exports in codai/backends/__init__.py
- Update imports in coderai to use new modules
- Fix import paths for base class and cache functions
parent 7c6b60f0
"""Backend detection module."""
"""Backend detection and management module."""
from codai.backends.base import ModelBackend
from codai.backends.cuda import NvidiaBackend
from codai.backends.vulkan import VulkanBackend
def detect_available_backends():
......
This diff is collapsed.
This diff is collapsed.
......@@ -13,7 +13,7 @@ from codai.models.parser import ModelParserAdapter
from codai.backends import detect_available_backends
from codai.backends.cuda import NvidiaBackend
from codai.backends.vulkan import VulkanBackend
from codai.models.cache import get_cached_model_path, download_model
from codai.models.cache import get_cached_model_path, download_model, get_model_cache_dir
from codai.pydantic.textrequest import ModelInfo
......
"""Queue manager module."""
"""Queue manager module - manages request queues for model loading notifications."""
from typing import Dict, Any, Optional
from typing import Dict, Optional
import asyncio
import time
class QueueManager:
"""Manager for handling request queues."""
"""
Manages request queue for model loading notifications.
When clients are waiting for a model to load, sends them progress updates.
"""
def __init__(self):
self.queues = {}
self.results = {}
self.waiting_requests: Dict[str, float] = {} # request_id -> start_time
self.current_request_id: Optional[str] = None
self.model_loading: bool = False
self.model_name: Optional[str] = None
self.lock = asyncio.Lock()
async def add_request(self, request_id: str, request_data: Any):
"""Add a request to the queue."""
pass
async def add_waiting(self, request_id: str) -> None:
"""Add a request to the waiting queue."""
async with self.lock:
self.waiting_requests[request_id] = time.time()
async def get_result(self, request_id: str) -> Optional[Any]:
"""Get the result of a request."""
pass
async def remove_waiting(self, request_id: str) -> None:
"""Remove a request from the waiting queue."""
async with self.lock:
self.waiting_requests.pop(request_id, None)
async def process_queue(self):
"""Process the queue."""
pass
async def start_processing(self, request_id: str, model_name: str = None) -> None:
"""Mark a request as now processing (model loaded)."""
async with self.lock:
self.waiting_requests.pop(request_id, None)
self.current_request_id = request_id
self.model_name = model_name
async def finish_processing(self) -> None:
"""Mark current request as finished."""
async with self.lock:
self.current_request_id = None
async def is_waiting(self, request_id: str) -> bool:
"""Check if a request is in the waiting queue."""
async with self.lock:
return request_id in self.waiting_requests
async def get_wait_time(self, request_id: str) -> float:
"""Get how long a request has been waiting in seconds."""
async with self.lock:
if request_id in self.waiting_requests:
return time.time() - self.waiting_requests[request_id]
return 0.0
async def get_queue_position(self, request_id: str) -> int:
"""Get the position of a request in the queue (1-based)."""
async with self.lock:
keys = list(self.waiting_requests.keys())
try:
return keys.index(request_id) + 1
except ValueError:
return 0
# Global queue manager instance
queue_manager = QueueManager()
......@@ -30,6 +30,11 @@ from threading import Thread
# Import codai module for enhanced tool call parsing
from codai.models import ModelParserDispatcher, OpenAIFormatter
# Import from codai modules for use in this file
from codai.models.manager import ModelManager, WhisperServerManager, MultiModelManager
from codai.queue.manager import QueueManager, queue_manager
from codai.backends import NvidiaBackend, VulkanBackend, detect_available_backends
# Per-model semaphores for request concurrency control
model_semaphores: dict = {}
load_mode = {"mode": "ondemand"} # Track load mode globally
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment