Refactor: Move backend and manager classes to codai modules

- Move NvidiaBackend to codai/backends/cuda.py - Move VulkanBackend to codai/backends/vulkan.py - Move ModelManager, WhisperServerManager, MultiModelManager to codai/models/manager.py - Move QueueManager to codai/queue/manager.py - Add proper exports in codai/backends/__init__.py - Update imports in coderai to use new modules - Fix import paths for base class and cache functions

Refactor: Move backend and manager classes to codai modules
- Move NvidiaBackend to codai/backends/cuda.py - Move VulkanBackend to codai/backends/vulkan.py - Move ModelManager, WhisperServerManager, MultiModelManager to codai/models/manager.py - Move QueueManager to codai/queue/manager.py - Add proper exports in codai/backends/__init__.py - Update imports in coderai to use new modules - Fix import paths for base class and cache functions
81c39eb8 · Your Name · 7c6b60f0 · 81c39eb8 · 81c39eb8 · 81c39eb8
Commit 81c39eb8 authored Mar 17, 2026 by Your Name
6 changed files
--- a/codai/backends/__init__.py
+++ b/codai/backends/__init__.py
-"""Backend detection module."""
+"""Backend detection and management module."""
+from codai.backends.base import ModelBackend
+from codai.backends.cuda import NvidiaBackend
+from codai.backends.vulkan import VulkanBackend
 def detect_available_backends():

--- a/codai/backends/cuda.py
+++ b/codai/backends/cuda.py
--- a/codai/backends/vulkan.py
+++ b/codai/backends/vulkan.py
--- a/codai/models/manager.py
+++ b/codai/models/manager.py
@@ -13,7 +13,7 @@ from codai.models.parser import ModelParserAdapter
 from codai.backends import detect_available_backends
 from codai.backends.cuda import NvidiaBackend
 from codai.backends.vulkan import VulkanBackend
-from codai.models.cache import get_cached_model_path, download_model
+from codai.models.cache import get_cached_model_path, download_model, get_model_cache_dir
 from codai.pydantic.textrequest import ModelInfo

--- a/codai/queue/manager.py
+++ b/codai/queue/manager.py
-"""Queue manager module."""
+"""Queue manager module - manages request queues for model loading notifications."""
-from typing import Dict, Any, Optional
+from typing import Dict, Optional
 import asyncio
+import time
 class QueueManager:
-    """Manager for handling request queues."""
+    """
+    Manages request queue for model loading notifications.
+    When clients are waiting for a model to load, sends them progress updates.
+    """
    def __init__(self):
-        self.queues = {}
+        self.waiting_requests: Dict[str, float] = {}  # request_id -> start_time
-        self.results = {}
+        self.current_request_id: Optional[str] = None
+        self.model_loading: bool = False
+        self.model_name: Optional[str] = None
+        self.lock = asyncio.Lock()
-    async def add_request(self, request_id: str, request_data: Any):
+    async def add_waiting(self, request_id: str) -> None:
-        """Add a request to the queue."""
+        """Add a request to the waiting queue."""
-        pass
+        async with self.lock:
+            self.waiting_requests[request_id] = time.time()
-    async def get_result(self, request_id: str) -> Optional[Any]:
+    async def remove_waiting(self, request_id: str) -> None:
-        """Get the result of a request."""
+        """Remove a request from the waiting queue."""
-        pass
+        async with self.lock:
+            self.waiting_requests.pop(request_id, None)
-    async def process_queue(self):
+    async def start_processing(self, request_id: str, model_name: str = None) -> None:
-        """Process the queue."""
+        """Mark a request as now processing (model loaded)."""
-        pass
+        async with self.lock:
+            self.waiting_requests.pop(request_id, None)
+            self.current_request_id = request_id
+            self.model_name = model_name
+    async def finish_processing(self) -> None:
+        """Mark current request as finished."""
+        async with self.lock:
+            self.current_request_id = None
+    async def is_waiting(self, request_id: str) -> bool:
+        """Check if a request is in the waiting queue."""
+        async with self.lock:
+            return request_id in self.waiting_requests
+    async def get_wait_time(self, request_id: str) -> float:
+        """Get how long a request has been waiting in seconds."""
+        async with self.lock:
+            if request_id in self.waiting_requests:
+                return time.time() - self.waiting_requests[request_id]
+            return 0.0
+    async def get_queue_position(self, request_id: str) -> int:
+        """Get the position of a request in the queue (1-based)."""
+        async with self.lock:
+            keys = list(self.waiting_requests.keys())
+            try:
+                return keys.index(request_id) + 1
+            except ValueError:
+                return 0
+# Global queue manager instance
+queue_manager = QueueManager()
--- a/coderai
+++ b/coderai
@@ -30,6 +30,11 @@ from threading import Thread
 # Import codai module for enhanced tool call parsing
 from codai.models import ModelParserDispatcher, OpenAIFormatter
+# Import from codai modules for use in this file
+from codai.models.manager import ModelManager, WhisperServerManager, MultiModelManager
+from codai.queue.manager import QueueManager, queue_manager
+from codai.backends import NvidiaBackend, VulkanBackend, detect_available_backends
 # Per-model semaphores for request concurrency control
 model_semaphores: dict = {}
 load_mode = {"mode": "ondemand"}  # Track load mode globally