Commit a4a8c340 authored by Your Name's avatar Your Name

Fix: Force VRAM cleanup when switching from image to text model

- Add garbage collection and torch.cuda.empty_cache() after unloading image models
- Add a small delay to allow VRAM to be freed before loading new model
- This should help prevent OOM errors when switching between image and text models
parent 13b56ea0
...@@ -2459,6 +2459,21 @@ class MultiModelManager: ...@@ -2459,6 +2459,21 @@ class MultiModelManager:
except Exception as e: except Exception as e:
print(f"Warning during cleanup of '{key}': {e}") print(f"Warning during cleanup of '{key}': {e}")
del self.models[key] del self.models[key]
# Force garbage collection and clear GPU cache
import gc
gc.collect()
try:
import torch
if torch.cuda.is_available():
torch.cuda.empty_cache()
except:
pass
# Add a small delay to allow VRAM to be freed
import time
time.sleep(1)
# Now try to reload the default model # Now try to reload the default model
try: try:
from llama_cpp import Llama from llama_cpp import Llama
...@@ -2589,6 +2604,20 @@ class MultiModelManager: ...@@ -2589,6 +2604,20 @@ class MultiModelManager:
print(f"Warning during cleanup of '{key}': {e}") print(f"Warning during cleanup of '{key}': {e}")
del self.models[key] del self.models[key]
# Force garbage collection and clear GPU cache
import gc
gc.collect()
try:
import torch
if torch.cuda.is_available():
torch.cuda.empty_cache()
except:
pass
# Add a small delay to allow VRAM to be freed
import time
time.sleep(1)
# Check if requested model is already loaded - if so, reuse it # Check if requested model is already loaded - if so, reuse it
if requested_model in self.models: if requested_model in self.models:
self.current_model_key = requested_model self.current_model_key = requested_model
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment