Fix VulkanBackend async/sync method signatures

Fixed load_model and generate to be non-async methods (matching base class): - load_model: changed from async def returning bool to def returning None - generate: changed from async def to def (removed streaming support in sync version) - Removed 'stream' parameter from generate since it's now sync - chat: changed from async def to def - generate_stream remains async def (correct for streaming)

Fix VulkanBackend async/sync method signatures
Fixed load_model and generate to be non-async methods (matching base class): - load_model: changed from async def returning bool to def returning None - generate: changed from async def to def (removed streaming support in sync version) - Removed 'stream' parameter from generate since it's now sync - chat: changed from async def to def - generate_stream remains async def (correct for streaming)
289a58f7 · Your Name · 660bce2d · 289a58f7
Commit 289a58f7 authored Mar 17, 2026 by Your Name
Hide whitespace changes
Inline Side-by-side

Showing with 13 additions and 25 deletions

vulkan.py codai/backends/vulkan.py +13 -25

No files found.
--- a/codai/backends/vulkan.py
+++ b/codai/backends/vulkan.py
@@ -378,16 +378,13 @@ class VulkanBackend(ModelBackend):
        
        return formatted
    
-    async def load_model(self, model_path: str, model_type: str = "text", **kwargs) -> bool:
+    def load_model(self, model_path: str, model_type: str = "text", **kwargs) -> None:
        """Load a GGUF model.
        
        Args:
            model_path: Path to the GGUF model file or HuggingFace model ID
            model_type: Type of model (text, image, audio)
            **kwargs: Additional parameters
-            
-        Returns:
-            True if model loaded successfully
        """
        if not LLAMA_CPP_AVAILABLE:
            raise ImportError("llama-cpp-python is required for GGUF models. Install with: pip install llama-cpp-python")
@@ -468,13 +465,11 @@ class VulkanBackend(ModelBackend):
            print(f"DEBUG: VulkanBackend loaded model: {model_path}")
            print(f"DEBUG: n_gpu_layers={self.n_gpu_layers}, n_ctx={self.n_ctx}")
            print(f"DEBUG: chat_template={self.chat_template}")
-            
-            return True
        except Exception as e:
            print(f"Error loading GGUF model: {e}")
            raise
    
-    async def generate(
+    def generate(
        self,
        prompt: str,
        **kwargs
@@ -510,23 +505,16 @@ class VulkanBackend(ModelBackend):
            stop = get_reasoning_stop_tokens(self.chat_template)
        
        try:
-            if stream:
-                # Collect all chunks
-                chunks = []
-                async for chunk in self.generate_stream(prompt, **kwargs):
-                    chunks.append(chunk)
-                return "".join(chunks)
-            else:
-                result = self.model.create_completion(
-                    prompt=prompt,
-                    max_tokens=max_tokens,
-                    temperature=temperature,
-                    top_p=top_p,
-                    top_k=top_k,
-                    repeat_penalty=repeat_penalty,
-                    stop=stop,
-                )
-                return result['choices'][0]['text']
+            result = self.model.create_completion(
+                prompt=prompt,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                top_k=top_k,
+                repeat_penalty=repeat_penalty,
+                stop=stop,
+            )
+            return result['choices'][0]['text']
        except Exception as e:
            print(f"Error during generation: {e}")
            raise
@@ -607,7 +595,7 @@ class VulkanBackend(ModelBackend):
            print(f"Error during streaming generation: {e}")
            raise
    
-    async def chat(
+    def chat(
        self,
        messages: List[Dict[str, str]],
        **kwargs