Commit b1b0818f authored by Your Name's avatar Your Name

Default to preload model at startup

- Default load mode is now 'loadall' (preload) instead of 'ondemand'
- Only use ondemand when --nopreload is explicitly specified
- Model will now be loaded at startup by default
parent b4fdeea6
...@@ -293,16 +293,18 @@ def main(): ...@@ -293,16 +293,18 @@ def main():
sys.exit(1) sys.exit(1)
# Determine load mode # Determine load mode
load_mode = None # Default is to preload (loadall) unless --nopreload is specified
load_mode = "loadall" # Default: preload models
if args.loadall: if args.loadall:
load_mode = "loadall" load_mode = "loadall"
elif args.loadswap: elif args.loadswap:
load_mode = "loadswap" load_mode = "loadswap"
elif args.nopreload: elif args.nopreload:
load_mode = "nopreload" load_mode = "ondemand"
if load_mode: set_load_mode(load_mode)
set_load_mode(load_mode) if load_mode == "ondemand":
print("Load mode: ondemand (load model on first request)")
# Initialize model manager # Initialize model manager
print("\n=== Initializing Model Manager ===") print("\n=== Initializing Model Manager ===")
...@@ -344,16 +346,20 @@ def main(): ...@@ -344,16 +346,20 @@ def main():
'ctx': get_ctx_by_index(args.n_ctx, idx, 0), 'ctx': get_ctx_by_index(args.n_ctx, idx, 0),
}) })
# Load first model # Load first model (unless nopreload mode)
try: if load_mode == "loadall":
mm = multi_model_manager.get_model_for_request(model_names[0]) try:
if mm is not None: print(f"Loading model: {model_names[0]}...")
print(f"Model loaded successfully: {model_names[0]}") mm = multi_model_manager._load_default_model()
else: if mm is not None and mm.backend is not None:
print(f"Warning: Model {model_names[0]} not loaded (will load on first request)") print(f"Model loaded successfully: {model_names[0]}")
except Exception as e: else:
print(f"Warning: Failed to load model: {e}") print(f"Warning: Model {model_names[0]} failed to load")
print(f"Model will load on first request") except Exception as e:
print(f"Warning: Failed to load model: {e}")
print(f"Model will load on first request")
else:
print(f"Load mode: ondemand (model will load on first request)")
# Set up audio model if specified # Set up audio model if specified
if audio_models: if audio_models:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment