Rewrite done

parent 80b1c060
...@@ -209,17 +209,20 @@ elif [ "$BACKEND" = "vulkan" ]; then ...@@ -209,17 +209,20 @@ elif [ "$BACKEND" = "vulkan" ]; then
echo -e "${GREEN}✓ Found Vulkan shader compiler: $GLSLC_CMD${NC}" echo -e "${GREEN}✓ Found Vulkan shader compiler: $GLSLC_CMD${NC}"
fi fi
# Build with Vulkan support # Build with Vulkan support (add CUDA too if available)
echo -e "${YELLOW}Building llama-cpp-python with Vulkan support...${NC}" echo -e "${YELLOW}Building llama-cpp-python with Vulkan support...${NC}"
CMAKE_ARGS="-DGGML_VULKAN=ON" pip install --upgrade llama-cpp-python --no-cache-dir || { _LLAMA_CMAKE="-DGGML_VULKAN=ON"
if command -v nvcc &> /dev/null || [ -d "/usr/local/cuda" ]; then
_LLAMA_CMAKE="$_LLAMA_CMAKE -DGGML_CUDA=ON"
echo -e "${GREEN} ✓ Also enabling CUDA support (NVIDIA detected)${NC}"
fi
CMAKE_ARGS="$_LLAMA_CMAKE" pip install --upgrade llama-cpp-python --no-cache-dir || {
echo -e "${RED}Build failed!${NC}" echo -e "${RED}Build failed!${NC}"
exit 1 exit 1
} }
echo -e "${YELLOW}Installing Vulkan-specific requirements...${NC}" echo -e "${YELLOW}Installing Vulkan-specific requirements...${NC}"
pip install -r requirements-vulkan.txt pip install -r requirements-vulkan.txt
# Build whispercpp Python package with Vulkan support for GPU-accelerated audio transcription
echo -e "${YELLOW}Building whispercpp with Vulkan support for GPU-accelerated transcription...${NC}" echo -e "${YELLOW}Building whispercpp with Vulkan support for GPU-accelerated transcription...${NC}"
# First, uninstall any existing whispercpp (pip version doesn't have Vulkan) # First, uninstall any existing whispercpp (pip version doesn't have Vulkan)
...@@ -318,11 +321,16 @@ elif [ "$BACKEND" = "vulkan-nvidia" ]; then ...@@ -318,11 +321,16 @@ elif [ "$BACKEND" = "vulkan-nvidia" ]; then
echo -e "${GREEN}✓ Found Vulkan shader compiler: $GLSLC_CMD${NC}" echo -e "${GREEN}✓ Found Vulkan shader compiler: $GLSLC_CMD${NC}"
fi fi
# Build with Vulkan support # Build with Vulkan support (add CUDA too if available)
# Note: llama.cpp doesn't have a compile-time option to disable specific GPUs # Note: llama.cpp doesn't have a compile-time option to disable specific GPUs
# The device selection happens at runtime via environment variables # The device selection happens at runtime via environment variables
echo -e "${YELLOW}Building llama-cpp-python with Vulkan support...${NC}" echo -e "${YELLOW}Building llama-cpp-python with Vulkan support...${NC}"
CMAKE_ARGS="-DGGML_VULKAN=ON" pip install --upgrade llama-cpp-python --no-cache-dir || { _LLAMA_CMAKE="-DGGML_VULKAN=ON"
if command -v nvcc &> /dev/null || [ -d "/usr/local/cuda" ]; then
_LLAMA_CMAKE="$_LLAMA_CMAKE -DGGML_CUDA=ON"
echo -e "${GREEN} ✓ Also enabling CUDA support (NVIDIA detected)${NC}"
fi
CMAKE_ARGS="$_LLAMA_CMAKE" pip install --upgrade llama-cpp-python --no-cache-dir || {
echo -e "${RED}Build failed!${NC}" echo -e "${RED}Build failed!${NC}"
exit 1 exit 1
} }
...@@ -378,10 +386,15 @@ elif [ "$BACKEND" = "cuda" ]; then ...@@ -378,10 +386,15 @@ elif [ "$BACKEND" = "cuda" ]; then
echo -e "${GREEN}✓ Found CUDA at /usr/local/cuda${NC}" echo -e "${GREEN}✓ Found CUDA at /usr/local/cuda${NC}"
fi fi
# Build llama-cpp-python with CUDA support # Build llama-cpp-python with CUDA support (add Vulkan too if available)
echo -e "${YELLOW}Building llama-cpp-python with CUDA support...${NC}" echo -e "${YELLOW}Building llama-cpp-python with CUDA support...${NC}"
echo -e "${YELLOW}This may take several minutes...${NC}" echo -e "${YELLOW}This may take several minutes...${NC}"
CMAKE_ARGS="-DGGML_CUDA=ON" pip install --upgrade llama-cpp-python --no-cache-dir || { _LLAMA_CMAKE="-DGGML_CUDA=ON"
if pkg-config --exists vulkan 2>/dev/null; then
_LLAMA_CMAKE="$_LLAMA_CMAKE -DGGML_VULKAN=ON"
echo -e "${GREEN} ✓ Also enabling Vulkan support (Vulkan detected)${NC}"
fi
CMAKE_ARGS="$_LLAMA_CMAKE" pip install --upgrade llama-cpp-python --no-cache-dir || {
echo "" echo ""
echo -e "${RED}Build failed!${NC}" echo -e "${RED}Build failed!${NC}"
echo -e "${YELLOW}Make sure CUDA toolkit is installed:${NC}" echo -e "${YELLOW}Make sure CUDA toolkit is installed:${NC}"
......
This diff is collapsed.
...@@ -65,23 +65,37 @@ class SessionManager: ...@@ -65,23 +65,37 @@ class SessionManager:
self.config_dir = config_dir self.config_dir = config_dir
self.secret = get_or_create_secret(config_dir) self.secret = get_or_create_secret(config_dir)
self.session_timeout = timedelta(minutes=session_timeout_minutes) self.session_timeout = timedelta(minutes=session_timeout_minutes)
self._lock = __import__('threading').Lock()
def _load_auth_data(self) -> Dict[str, Any]: def _load_auth_data(self) -> Dict[str, Any]:
"""Load auth.json data.""" """Load auth.json data."""
auth_path = self.config_dir / "auth.json" auth_path = self.config_dir / "auth.json"
if auth_path.exists(): if auth_path.exists():
try:
with open(auth_path, 'r') as f: with open(auth_path, 'r') as f:
return json.load(f) content = f.read()
if content.strip():
return json.loads(content)
except (json.JSONDecodeError, OSError):
pass
return {"users": [], "tokens": [], "sessions": {}} return {"users": [], "tokens": [], "sessions": {}}
def _save_auth_data(self, auth_data: Dict[str, Any]): def _save_auth_data(self, auth_data: Dict[str, Any]):
"""Save auth.json data.""" """Save auth.json data atomically."""
auth_path = self.config_dir / "auth.json" auth_path = self.config_dir / "auth.json"
# Atomic write with self._lock:
temp_path = auth_path.with_suffix('.tmp') import os, tempfile
with open(temp_path, 'w') as f: fd, tmp = tempfile.mkstemp(dir=str(self.config_dir), suffix='.tmp')
try:
with os.fdopen(fd, 'w') as f:
json.dump(auth_data, f, indent=2) json.dump(auth_data, f, indent=2)
temp_path.replace(auth_path) os.replace(tmp, str(auth_path))
except Exception:
try:
os.unlink(tmp)
except OSError:
pass
raise
def create_session(self, username: str) -> str: def create_session(self, username: str) -> str:
"""Create a new session for a user. """Create a new session for a user.
......
This diff is collapsed.
This diff is collapsed.
...@@ -3,60 +3,52 @@ ...@@ -3,60 +3,52 @@
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{% block title %}CoderAI Admin{% endblock %}</title> <title>{% block title %}CoderAI{% endblock %}</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
<link rel="stylesheet" href="/static/admin/style.css"> <link rel="stylesheet" href="/static/admin/style.css">
{% block head %}{% endblock %}
</head> </head>
<body> <body>
{% if username %}
<div class="layout"> {% if username %}
<aside class="sidebar"> <nav class="topnav">
<div class="logo"> <div class="topnav-inner">
<h1>CoderAI</h1> <div class="topnav-left">
</div> <a href="/admin" class="nav-logo">
<nav class="nav"> <div class="nav-logo-mark">AI</div>
<a href="/admin" class="nav-item {% if request.url.path == '/admin' %}active{% endif %}"> <span class="nav-logo-name">CoderAI</span>
<span class="icon">📊</span>
<span>Overview</span>
</a>
{% if is_admin %}
<a href="/admin/models" class="nav-item {% if '/models' in request.url.path %}active{% endif %}">
<span class="icon">🤖</span>
<span>Models</span>
</a>
<a href="/admin/tokens" class="nav-item {% if '/tokens' in request.url.path %}active{% endif %}">
<span class="icon">🔑</span>
<span>API Tokens</span>
</a>
<a href="/admin/users" class="nav-item {% if '/users' in request.url.path %}active{% endif %}">
<span class="icon">👥</span>
<span>Users</span>
</a> </a>
<div class="nav-links">
<a href="/admin" class="nav-link {% if request.url.path == '/admin' %}active{% endif %}">Overview</a>
<a href="/chat" class="nav-link {% if request.url.path == '/chat' %}active{% endif %}">Chat</a>
{% if is_admin|default(false) %}
<a href="/admin/models" class="nav-link {% if '/models' in request.url.path %}active{% endif %}">Models</a>
<a href="/admin/tokens" class="nav-link {% if '/tokens' in request.url.path %}active{% endif %}">Tokens</a>
<a href="/admin/users" class="nav-link {% if '/users' in request.url.path %}active{% endif %}">Users</a>
<a href="/admin/settings" class="nav-link {% if '/settings' in request.url.path %}active{% endif %}">Settings</a>
{% endif %} {% endif %}
<a href="/chat" class="nav-item {% if '/chat' in request.url.path %}active{% endif %}">
<span class="icon">💬</span>
<span>Chat</span>
</a>
</nav>
<div class="sidebar-footer">
<div class="user-info">
<span class="icon">👤</span>
<span>{{ username }}</span>
</div> </div>
<a href="/logout" class="logout-btn">Logout</a>
</div> </div>
</aside> <div class="topnav-right">
<main class="main-content"> <span class="nav-username">{{ username }}</span>
<div class="content-wrapper"> <div class="nav-sep"></div>
{% block content %}{% endblock %} <a href="/logout" class="nav-logout">Sign out</a>
</div> </div>
</main>
</div> </div>
{% else %} </nav>
<div class="content-wrapper"> <main class="main">
{% endif %}
<div class="{% block wrapper_class %}container{% endblock %}">
{% block content %}{% endblock %} {% block content %}{% endblock %}
</div> </div>
{% endif %}
{% if username %}
</main>
{% endif %}
{% block scripts %}{% endblock %} {% block scripts %}{% endblock %}
</body> </body>
</html> </html>
{% extends "base.html" %} {% extends "base.html" %}
{% block title %}Change Password — CoderAI{% endblock %}
{% block title %}Change Password - CoderAI{% endblock %}
{% block content %} {% block content %}
<div class="page-header"> <div class="centered-wrap">
<div class="centered-card">
<h1>Change Password</h1> <h1>Change Password</h1>
{% if must_change %} <p class="sub">
<p class="text-warning">You must change your password before continuing.</p> {% if must_change %}You must set a new password before continuing.
{% endif %} {% else %}Update your account password.{% endif %}
</div> </p>
{% if error %} {% if error %}
<div class="alert alert-error"> <div class="alert alert-error">{{ error }}</div>
{{ error }} {% endif %}
</div>
{% endif %}
<div class="card"> <form method="post" action="/admin/change-password">
<form method="post" action="/admin/change-password" class="form">
{% if not must_change %} {% if not must_change %}
<div class="form-group"> <div class="form-row">
<label for="old_password">Current Password</label> <label class="form-label" for="old_password">Current Password</label>
<input type="password" id="old_password" name="old_password" required> <input class="form-input" type="password" id="old_password" name="old_password"
placeholder="••••••••" required autocomplete="current-password">
</div> </div>
{% endif %} {% endif %}
<div class="form-row">
<div class="form-group"> <label class="form-label" for="new_password">New Password</label>
<label for="new_password">New Password</label> <input class="form-input" type="password" id="new_password" name="new_password"
<input type="password" id="new_password" name="new_password" required minlength="8"> placeholder="••••••••" required minlength="8" autocomplete="new-password">
<small class="form-text">Minimum 8 characters</small> <span class="form-hint">At least 8 characters</span>
</div> </div>
<div class="form-row">
<div class="form-group"> <label class="form-label" for="confirm_password">Confirm Password</label>
<label for="confirm_password">Confirm New Password</label> <input class="form-input" type="password" id="confirm_password" name="confirm_password"
<input type="password" id="confirm_password" name="confirm_password" required minlength="8"> placeholder="••••••••" required minlength="8" autocomplete="new-password">
</div> </div>
<div class="form-actions"> <div class="form-actions">
<button type="submit" class="btn btn-primary">Change Password</button> <button type="submit" class="btn btn-primary">Update password</button>
{% if not must_change %} {% if not must_change %}
<a href="/admin" class="btn btn-secondary">Cancel</a> <a href="/admin" class="btn btn-ghost">Cancel</a>
{% endif %} {% endif %}
</div> </div>
</form> </form>
</div>
</div> </div>
{% endblock %} {% endblock %}
{% extends "base.html" %} {% extends "base.html" %}
{% block title %}Chat — CoderAI{% endblock %}
{% block title %}Chat - CoderAI{% endblock %} {% block wrapper_class %}{% endblock %}
{% block content %} {% block content %}
<div class="chat-container"> <div class="chat-wrap" style="margin:0 1.5rem 1rem;border-radius:8px">
<div class="chat-header"> <div class="chat-bar">
<div class="chat-title">
<h2>Chat</h2> <h2>Chat</h2>
</div>
<div class="chat-controls"> <div class="chat-controls">
<select id="model-selector" class="form-control"> <select id="model-sel" class="form-input" style="font-size:13px;padding:.3rem .625rem;min-width:200px">
<option value="">Select a model...</option> <option value="">Select model…</option>
</select> </select>
<button class="btn btn-secondary" onclick="newChat()">New Chat</button> <button class="btn btn-ghost btn-sm" onclick="newChat()">Clear</button>
</div> </div>
</div> </div>
<div class="chat-messages" id="chat-messages"> <div class="chat-messages" id="chat-msgs">
<div class="welcome-message"> <div class="chat-empty">
<h3>Welcome to CoderAI Chat</h3> <h3>CoderAI Chat</h3>
<p>Select a model and start chatting</p> <p>Select a model and start typing</p>
</div> </div>
</div> </div>
<div class="chat-input-container"> <div class="chat-foot">
<form id="chat-form" class="chat-input-form"> <div id="typing" style="font-size:11px;color:var(--text-3);height:14px;margin-bottom:.3rem;font-family:var(--mono)"></div>
<textarea id="chat-input" class="chat-input" <div class="chat-input-row">
placeholder="Type your message..." <textarea id="chat-in" class="chat-textarea" placeholder="Send a message…" rows="1"></textarea>
rows="3"></textarea> <button class="btn btn-primary" id="send-btn" onclick="send()" style="padding:.5rem .75rem;align-self:flex-end">
<button type="submit" class="btn btn-primary" id="send-btn">Send</button> <svg viewBox="0 0 16 16" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="width:13px;height:13px"><line x1="14" y1="2" x2="7" y2="9"/><polygon points="14 2 10 14 7 9 2 6 14 2"/></svg>
</form> </button>
</div>
<div class="chat-hint">Enter to send · Shift+Enter for newline</div>
</div> </div>
</div> </div>
{% endblock %} {% endblock %}
{% block scripts %} {% block scripts %}
<script> <script>
let currentModel = null; let history = [];
let messages = []; let busy = false;
async function loadModels() { async function loadModels() {
try { try {
const response = await fetch('/v1/models'); const d = await fetch('/v1/models').then(r => r.json());
const data = await response.json(); const sel = document.getElementById('model-sel');
sel.innerHTML = '<option value="">Select model…</option>';
const selector = document.getElementById('model-selector'); (d.data || []).forEach(m => {
selector.innerHTML = '<option value="">Select a model...</option>'; const o = document.createElement('option');
o.value = o.textContent = m.id;
data.data.forEach(model => { sel.appendChild(o);
const option = document.createElement('option');
option.value = model.id;
option.textContent = model.id;
selector.appendChild(option);
}); });
} catch (error) { } catch {}
console.error('Failed to load models:', error);
}
} }
document.getElementById('model-selector').addEventListener('change', (e) => {
currentModel = e.target.value;
});
function newChat() { function newChat() {
messages = []; history = [];
document.getElementById('chat-messages').innerHTML = ` document.getElementById('chat-msgs').innerHTML = '<div class="chat-empty"><h3>New conversation</h3><p>Start typing below</p></div>';
<div class="welcome-message">
<h3>New Chat Started</h3>
<p>Select a model and start chatting</p>
</div>
`;
} }
function addMessage(role, content) { function addMsg(role, text) {
const messagesDiv = document.getElementById('chat-messages'); const wrap = document.getElementById('chat-msgs');
wrap.querySelector('.chat-empty')?.remove();
// Remove welcome message if present const t = new Date().toLocaleTimeString([],{hour:'2-digit',minute:'2-digit'});
const welcome = messagesDiv.querySelector('.welcome-message'); const d = document.createElement('div');
if (welcome) { d.className = 'msg ' + role;
welcome.remove(); d.innerHTML = `
} <div class="msg-av ${role === 'user' ? 'user' : 'ai'}">${role === 'user' ? 'YOU' : 'AI'}</div>
<div class="msg-body">
const messageDiv = document.createElement('div'); <div class="msg-meta">${role === 'user' ? 'You' : 'Assistant'} · ${t}</div>
messageDiv.className = `message message-${role}`; <div class="msg-text">${String(text).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/\n/g,'<br>')}</div>
</div>`;
const avatar = document.createElement('div'); wrap.appendChild(d);
avatar.className = 'message-avatar'; wrap.scrollTop = wrap.scrollHeight;
avatar.textContent = role === 'user' ? '👤' : '🤖';
const contentDiv = document.createElement('div');
contentDiv.className = 'message-content';
contentDiv.textContent = content;
messageDiv.appendChild(avatar);
messageDiv.appendChild(contentDiv);
messagesDiv.appendChild(messageDiv);
// Scroll to bottom
messagesDiv.scrollTop = messagesDiv.scrollHeight;
} }
document.getElementById('chat-form').addEventListener('submit', async (e) => { async function send() {
e.preventDefault(); if (busy) return;
const model = document.getElementById('model-sel').value;
if (!model) { document.getElementById('model-sel').focus(); return; }
const input = document.getElementById('chat-in');
const text = input.value.trim();
if (!text) return;
if (!currentModel) { addMsg('user', text);
alert('Please select a model first'); history.push({role:'user', content:text});
return;
}
const input = document.getElementById('chat-input');
const message = input.value.trim();
if (!message) {
return;
}
// Add user message
addMessage('user', message);
messages.push({ role: 'user', content: message });
// Clear input
input.value = ''; input.value = '';
input.style.height = 'auto';
// Disable send button busy = true;
const sendBtn = document.getElementById('send-btn'); document.getElementById('send-btn').disabled = true;
sendBtn.disabled = true; document.getElementById('typing').textContent = 'Assistant is typing…';
sendBtn.textContent = 'Sending...';
try { try {
const response = await fetch('/v1/chat/completions', { const r = await fetch('/v1/chat/completions', {
method: 'POST', method:'POST', headers:{'Content-Type':'application/json'},
headers: { body: JSON.stringify({model, messages: history, stream:false})
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: currentModel,
messages: messages,
stream: false
})
}); });
if (!r.ok) throw new Error('HTTP ' + r.status);
if (!response.ok) { const d = await r.json();
throw new Error('Request failed'); const reply = d.choices[0].message.content;
} addMsg('assistant', reply);
history.push({role:'assistant', content:reply});
const data = await response.json(); } catch (e) {
const assistantMessage = data.choices[0].message.content; addMsg('assistant', 'Error: ' + e.message);
addMessage('assistant', assistantMessage);
messages.push({ role: 'assistant', content: assistantMessage });
} catch (error) {
addMessage('assistant', 'Error: ' + error.message);
} finally { } finally {
sendBtn.disabled = false; busy = false;
sendBtn.textContent = 'Send'; document.getElementById('send-btn').disabled = false;
document.getElementById('typing').textContent = '';
} }
}); }
// Handle Enter key (Shift+Enter for new line) document.getElementById('chat-in').addEventListener('keydown', e => {
document.getElementById('chat-input').addEventListener('keydown', (e) => { if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); send(); }
if (e.key === 'Enter' && !e.shiftKey) { });
e.preventDefault(); document.getElementById('chat-in').addEventListener('input', function() {
document.getElementById('chat-form').dispatchEvent(new Event('submit')); this.style.height = 'auto';
} this.style.height = Math.min(this.scrollHeight, 140) + 'px';
}); });
// Load models on page load
loadModels(); loadModels();
</script> </script>
{% endblock %} {% endblock %}
{% extends "base.html" %} {% extends "base.html" %}
{% block title %}Overview — CoderAI{% endblock %}
{% block title %}Dashboard - CoderAI{% endblock %}
{% block content %} {% block content %}
<div class="page-header"> <div class="page-header">
<div>
<h1>Overview</h1> <h1>Overview</h1>
<p>System status</p>
</div>
<div class="header-actions"> <div class="header-actions">
<button class="btn btn-secondary" onclick="reloadConfig()">Reload Config</button> <span class="live" id="live-label">Live</span>
</div> </div>
</div> </div>
<div class="dashboard-grid"> <div class="stat-grid">
<div class="card"> <div class="stat">
<h3>System Status</h3> <div class="stat-label">Status</div>
<div class="status-grid"> <div class="stat-value small" id="sys-status" style="font-size:1.125rem"></div>
<div class="status-item"> <div class="stat-sub" id="sys-backend">loading…</div>
<span class="label">Backend:</span>
<span class="value" id="backend">Loading...</span>
</div> </div>
<div class="status-item"> <div class="stat">
<span class="label">GPU:</span> <div class="stat-label">Models Loaded</div>
<span class="value" id="gpu">Loading...</span> <div class="stat-value" id="models-count"></div>
<div class="stat-sub" id="models-mode"></div>
</div> </div>
<div class="status-item"> <div class="stat">
<span class="label">Uptime:</span> <div class="stat-label">Requests</div>
<span class="value" id="uptime">Loading...</span> <div class="stat-value" id="req-total">0</div>
<div class="stat-sub"><span id="req-active">0</span> active</div>
</div> </div>
<div class="status-item"> <div class="stat">
<span class="label">Status:</span> <div class="stat-label">VRAM</div>
<span class="value status-ok" id="status">OK</span> <div class="stat-value" id="vram-pct"></div>
<div class="progress" style="margin-top:.625rem">
<div class="progress-fill" id="vram-bar" style="width:0%"></div>
</div> </div>
<div class="progress-labels">
<span id="vram-used"></span><span id="vram-total"></span>
</div> </div>
</div> </div>
</div>
<div class="card"> <div class="card" style="margin-bottom:1rem">
<h3>Active Models</h3> <div class="card-title">Models</div>
<div id="active-models"> <div id="active-models"><span class="muted small">No models loaded</span></div>
<p class="text-muted">No models loaded</p>
</div>
{% if is_admin %} {% if is_admin %}
<a href="/admin/models" class="btn btn-primary btn-sm">Manage Models</a> <div style="margin-top:.875rem">
{% endif %} <a href="/admin/models" class="btn btn-ghost btn-sm">Manage models</a>
</div>
<div class="card">
<h3>Request Stats</h3>
<div class="stats-grid">
<div class="stat-item">
<div class="stat-value" id="total-requests">0</div>
<div class="stat-label">Total Requests</div>
</div>
<div class="stat-item">
<div class="stat-value" id="active-requests">0</div>
<div class="stat-label">Active</div>
</div>
<div class="stat-item">
<div class="stat-value" id="queued-requests">0</div>
<div class="stat-label">Queued</div>
</div>
</div>
</div>
<div class="card">
<h3>VRAM Usage</h3>
<div class="progress-bar">
<div class="progress-fill" id="vram-progress" style="width: 0%"></div>
</div>
<p class="text-muted" id="vram-text">0 GB / 0 GB (0%)</p>
</div> </div>
{% endif %}
</div> </div>
<div class="card"> <div class="card mb-0">
<h3>Recent Activity</h3> <div class="card-title">Recent Activity</div>
<div class="table-responsive"> <div class="table-wrap" style="border:none">
<table class="table"> <table>
<thead> <thead><tr><th>Time</th><th>Model</th><th>Type</th><th>Status</th><th>Duration</th></tr></thead>
<tr> <tbody id="activity-body">
<th>Time</th> <tr class="empty-row"><td colspan="5">No recent activity</td></tr>
<th>Model</th>
<th>Type</th>
<th>Status</th>
<th>Duration</th>
</tr>
</thead>
<tbody id="activity-table">
<tr>
<td colspan="5" class="text-center text-muted">No recent activity</td>
</tr>
</tbody> </tbody>
</table> </table>
</div> </div>
...@@ -95,45 +65,45 @@ ...@@ -95,45 +65,45 @@
{% block scripts %} {% block scripts %}
<script> <script>
async function loadStatus() { async function poll() {
try { try {
const response = await fetch('/admin/api/status'); const d = await fetch('/admin/api/status').then(r => r.json());
const data = await response.json(); const ok = d.status === 'ok';
document.getElementById('sys-status').textContent = ok ? 'Online' : 'Error';
document.getElementById('sys-status').className = 'stat-value small ' + (ok ? 'text-green' : 'text-red');
document.getElementById('sys-backend').textContent = d.backend || d.load_mode || '—';
document.getElementById('models-count').textContent = d.models_loaded ?? '—';
document.getElementById('models-mode').textContent = d.load_mode ? d.load_mode + ' mode' : '';
document.getElementById('backend').textContent = data.backend || 'auto'; const loaded = d.loaded_models || [];
document.getElementById('uptime').textContent = data.uptime || '0h 0m'; const enabled = d.enabled_models || [];
document.getElementById('status').textContent = data.status === 'ok' ? 'OK' : 'Error'; const loadedSet = new Set(loaded);
const notLoaded = enabled.filter(m => !loadedSet.has(m));
let html = '';
if(loaded.length) html += loaded.map(m => `<span class="badge badge-admin" style="margin:.125rem" title="Loaded">● ${m}</span>`).join('');
if(notLoaded.length) html += notLoaded.map(m => `<span class="badge" style="margin:.125rem;opacity:.55" title="Enabled, not loaded">○ ${m}</span>`).join('');
document.getElementById('active-models').innerHTML = html || '<span class="muted small">No models loaded</span>';
// Update models loaded count if (d.vram) {
if (data.models_loaded > 0) { const pct = Math.round(d.vram.used / d.vram.total * 100);
document.getElementById('active-models').innerHTML = document.getElementById('vram-pct').textContent = pct + '%';
`<p>${data.models_loaded} model(s) loaded</p>`; document.getElementById('vram-bar').style.width = pct + '%';
} document.getElementById('vram-used').textContent = d.vram.used.toFixed(1) + ' GB';
} catch (error) { document.getElementById('vram-total').textContent = d.vram.total.toFixed(1) + ' GB';
console.error('Failed to load status:', error);
}
}
async function reloadConfig() {
if (confirm('Reload configuration from disk? This will not restart the server.')) {
try {
const response = await fetch('/admin/api/system/reload', { method: 'POST' });
if (response.ok) {
alert('Configuration reloaded successfully');
loadStatus();
} else { } else {
alert('Failed to reload configuration'); document.getElementById('vram-pct').textContent = 'N/A';
} }
} catch (error) {
alert('Error: ' + error.message); if (d.requests) {
document.getElementById('req-total').textContent = d.requests.total ?? 0;
document.getElementById('req-active').textContent = d.requests.active ?? 0;
} }
} catch {
document.getElementById('sys-status').textContent = 'Offline';
document.getElementById('sys-status').className = 'stat-value small text-red';
} }
} }
poll();
// Load status on page load setInterval(poll, 7000);
loadStatus();
// Refresh status every 5 seconds
setInterval(loadStatus, 5000);
</script> </script>
{% endblock %} {% endblock %}
{% extends "base.html" %} <!DOCTYPE html>
<html lang="en">
{% block title %}Login - CoderAI{% endblock %} <head>
<meta charset="UTF-8">
{% block content %} <meta name="viewport" content="width=device-width, initial-scale=1.0">
<div class="login-container"> <title>Sign in — CoderAI</title>
<div class="login-box"> <link rel="preconnect" href="https://fonts.googleapis.com">
<div class="login-header"> <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
<link rel="stylesheet" href="/static/admin/style.css">
</head>
<body>
<div class="login-wrap">
<div class="login-card">
<div class="login-logo">
<div class="login-mark">AI</div>
<div class="login-logo-text">
<h1>CoderAI</h1> <h1>CoderAI</h1>
<p>Admin Dashboard</p> <p>Local AI Server</p>
</div>
</div> </div>
{% if error %} {% if error %}
<div class="alert alert-error"> <div class="alert alert-error" style="margin-bottom:1.25rem">{{ error }}</div>
{{ error }}
</div>
{% endif %} {% endif %}
<form method="post" action="/login" class="login-form"> <form method="post" action="/login">
<div class="form-group"> <div class="form-row">
<label for="username">Username</label> <label class="form-label" for="username">Username</label>
<input type="text" id="username" name="username" required autofocus> <input class="form-input" type="text" id="username" name="username"
placeholder="admin" required autofocus autocomplete="username">
</div> </div>
<div class="form-row">
<div class="form-group"> <label class="form-label" for="password">Password</label>
<label for="password">Password</label> <input class="form-input" type="password" id="password" name="password"
<input type="password" id="password" name="password" required> placeholder="••••••••" required autocomplete="current-password">
</div> </div>
<button type="submit" class="btn btn-primary" style="width:100%;justify-content:center;margin-top:.875rem;padding:.5625rem">
<button type="submit" class="btn btn-primary btn-block">Login</button> Sign in
</button>
</form> </form>
<div class="login-footer"> <div class="login-footer">default: admin / admin</div>
<p class="text-muted">Default credentials: admin / admin</p>
</div>
</div> </div>
</div> </div>
{% endblock %} </body>
</html>
This diff is collapsed.
{% extends "base.html" %}
{% block title %}Settings — CoderAI{% endblock %}
{% block content %}
<div class="page-header">
<div>
<h1>Settings</h1>
<p>Server configuration — restart CoderAI to apply changes</p>
</div>
<div class="header-actions">
<span id="save-status" class="muted small" style="margin-right:.5rem"></span>
<button class="btn btn-primary" onclick="saveSettings()">Save changes</button>
</div>
</div>
<div id="settings-alert" style="display:none"></div>
<!-- Server -->
<div class="card mb-0">
<div class="card-title">Server</div>
<div style="display:grid;grid-template-columns:1fr 160px;gap:1rem;align-items:start">
<div class="form-row" style="margin:0">
<label class="form-label">Listen host</label>
<input type="text" id="s-host" class="form-input" placeholder="0.0.0.0">
<span class="form-hint">IP address or hostname to bind to (0.0.0.0 = all interfaces)</span>
</div>
<div class="form-row" style="margin:0">
<label class="form-label">Port</label>
<input type="number" id="s-port" class="form-input" placeholder="8000" min="1" max="65535">
</div>
</div>
<div class="form-row" style="margin-top:1rem;margin-bottom:.25rem">
<label style="display:flex;align-items:center;gap:.5rem;cursor:pointer">
<input type="checkbox" id="s-https" onchange="toggleHttps()">
<span style="font-size:13px;font-weight:500">Enable HTTPS</span>
</label>
</div>
<div id="https-fields" style="display:none;margin-top:.75rem">
<div class="form-row">
<label class="form-label">SSL key path <span class="muted">(leave blank to auto-generate)</span></label>
<input type="text" id="s-key" class="form-input" placeholder="/path/to/key.pem">
</div>
<div class="form-row" style="margin:0">
<label class="form-label">SSL certificate path</label>
<input type="text" id="s-cert" class="form-input" placeholder="/path/to/cert.pem">
</div>
</div>
</div>
<!-- Storage -->
<div class="card mb-0" style="margin-top:1rem">
<div class="card-title">Storage</div>
<div class="form-row">
<label class="form-label">HuggingFace cache directory <span class="muted">(leave blank for default ~/.cache/huggingface)</span></label>
<input type="text" id="s-hf-cache" class="form-input" placeholder="e.g. /data/models/huggingface">
</div>
<div class="form-row" style="margin:0">
<label class="form-label">GGUF cache directory <span class="muted">(leave blank for default ~/.cache/coderai/models)</span></label>
<input type="text" id="s-gguf-cache" class="form-input" placeholder="e.g. /data/models/gguf">
</div>
</div>
{% endblock %}
{% block scripts %}
<script>
function toggleHttps(){
document.getElementById('https-fields').style.display =
document.getElementById('s-https').checked ? 'block' : 'none';
}
function showAlert(type, msg){
const el = document.getElementById('settings-alert');
el.className = 'alert alert-' + (type === 'error' ? 'error' : 'info');
el.textContent = msg;
el.style.display = 'flex';
if(type !== 'error') setTimeout(()=>{ el.style.display='none'; }, 4000);
}
async function loadSettings(){
try{
const d = await fetch('/admin/api/settings').then(r=>r.json());
document.getElementById('s-host').value = d.server?.host ?? '0.0.0.0';
document.getElementById('s-port').value = d.server?.port ?? 8000;
document.getElementById('s-https').checked = !!d.server?.https;
document.getElementById('s-key').value = d.server?.https_key_path ?? '';
document.getElementById('s-cert').value = d.server?.https_cert_path ?? '';
document.getElementById('s-hf-cache').value = d.models?.hf_cache_dir ?? '';
document.getElementById('s-gguf-cache').value = d.models?.gguf_cache_dir ?? '';
toggleHttps();
}catch(e){ showAlert('error','Failed to load settings: '+e.message); }
}
async function saveSettings(){
const strOrNull = id => document.getElementById(id).value.trim() || null;
const data = {
server:{
host: document.getElementById('s-host').value.trim() || '0.0.0.0',
port: parseInt(document.getElementById('s-port').value) || 8000,
https: document.getElementById('s-https').checked,
https_key_path: strOrNull('s-key'),
https_cert_path: strOrNull('s-cert'),
},
models:{
hf_cache_dir: strOrNull('s-hf-cache'),
gguf_cache_dir: strOrNull('s-gguf-cache'),
}
};
try{
const r = await fetch('/admin/api/settings',{
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify(data)
});
if(r.ok) showAlert('info','Settings saved. Restart CoderAI to apply.');
else{ const e=await r.json(); showAlert('error', e.detail||'Save failed'); }
}catch(e){ showAlert('error','Error: '+e.message); }
}
loadSettings();
</script>
{% endblock %}
This diff is collapsed.
{% extends "base.html" %} {% extends "base.html" %}
{% block title %}Users — CoderAI{% endblock %}
{% block title %}Users - CoderAI{% endblock %}
{% block content %} {% block content %}
<div class="page-header"> <div class="page-header">
<div>
<h1>Users</h1> <h1>Users</h1>
<p>{{ users|length }} account{{ 's' if users|length != 1 else '' }}</p>
</div>
<div class="header-actions"> <div class="header-actions">
<button class="btn btn-primary" onclick="showCreateUserModal()">Create User</button> <button class="btn btn-primary" onclick="openModal('add-modal')">Add user</button>
</div> </div>
</div> </div>
<div class="card"> <div class="table-wrap">
<div class="table-responsive"> <table>
<table class="table">
<thead> <thead>
<tr> <tr><th>User</th><th>Role</th><th>Created</th><th></th></tr>
<th>Username</th>
<th>Role</th>
<th>Created</th>
<th>Actions</th>
</tr>
</thead> </thead>
<tbody> <tbody>
{% for user in users %} {% for user in users %}
<tr> <tr>
<td>{{ user.username }}</td> <td class="td-name">
<td> {{ user.username }}
<span class="badge {% if user.role == 'admin' %}badge-primary{% else %}badge-secondary{% endif %}"> {% if user.username == username %}<span class="badge badge-user" style="margin-left:.375rem">you</span>{% endif %}
{{ user.role }}
</span>
</td> </td>
<td>{{ user.created_at[:10] }}</td> <td><span class="badge {% if user.role == 'admin' %}badge-admin{% else %}badge-user{% endif %}">{{ user.role }}</span></td>
<td> <td class="mono small dim">{{ user.created_at[:10] }}</td>
{% if user.username != username %} <td style="text-align:right">
<button class="btn btn-danger btn-sm" onclick="deleteUser({{ user.id }}, '{{ user.username }}')">Delete</button> {% if user.username == username %}
<a href="/admin/change-password" class="btn btn-ghost btn-sm">Change password</a>
{% else %} {% else %}
<a href="/admin/change-password" class="btn btn-secondary btn-sm">Change Password</a> <button class="btn btn-danger btn-sm" onclick="delUser({{ user.id }}, '{{ user.username }}')">Delete</button>
{% endif %} {% endif %}
</td> </td>
</tr> </tr>
{% else %}
<tr class="empty-row"><td colspan="4">No users found</td></tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
</table> </table>
</div>
</div> </div>
<!-- Create User Modal --> <div id="add-modal" class="modal">
<div id="create-user-modal" class="modal"> <div class="modal-box">
<div class="modal-content"> <div class="modal-head">
<div class="modal-header"> <span class="modal-title">Add user</span>
<h2>Create User</h2> <button class="modal-close" onclick="closeModal('add-modal')">×</button>
<button class="modal-close" onclick="hideCreateUserModal()">&times;</button>
</div> </div>
<div class="modal-body"> <div class="modal-body">
<form id="create-user-form"> <div id="add-err" class="alert alert-error" style="display:none"></div>
<div class="form-group"> <div class="form-row">
<label for="new-username">Username</label> <label class="form-label">Username</label>
<input type="text" id="new-username" class="form-control" required> <input type="text" id="new-uname" class="form-input" placeholder="username" autocomplete="off">
</div> </div>
<div class="form-row">
<div class="form-group"> <label class="form-label">Password</label>
<label for="new-password">Password</label> <input type="password" id="new-pwd" class="form-input" placeholder="••••••••" autocomplete="new-password">
<input type="password" id="new-password" class="form-control" required minlength="8"> <span class="form-hint">Minimum 8 characters</span>
<small class="form-text">Minimum 8 characters</small>
</div> </div>
<div class="form-row">
<div class="form-group"> <label class="form-label">Role</label>
<label for="new-role">Role</label> <select id="new-role" class="form-input">
<select id="new-role" class="form-control">
<option value="user">User</option> <option value="user">User</option>
<option value="admin">Admin</option> <option value="admin">Admin</option>
</select> </select>
</div> </div>
<div class="form-actions"> <div class="form-actions">
<button type="submit" class="btn btn-primary">Create User</button> <button class="btn btn-primary" onclick="addUser()">Create</button>
<button type="button" class="btn btn-secondary" onclick="hideCreateUserModal()">Cancel</button> <button class="btn btn-ghost" onclick="closeModal('add-modal')">Cancel</button>
</div> </div>
</form>
</div> </div>
</div> </div>
</div> </div>
...@@ -85,60 +76,36 @@ ...@@ -85,60 +76,36 @@
{% block scripts %} {% block scripts %}
<script> <script>
function showCreateUserModal() { function openModal(id) { document.getElementById(id).classList.add('show'); }
document.getElementById('create-user-modal').style.display = 'flex'; function closeModal(id) {
document.getElementById(id).classList.remove('show');
document.getElementById('add-err').style.display = 'none';
document.getElementById('new-uname').value = '';
document.getElementById('new-pwd').value = '';
} }
function hideCreateUserModal() { async function addUser() {
document.getElementById('create-user-modal').style.display = 'none'; const uname = document.getElementById('new-uname').value.trim();
document.getElementById('create-user-form').reset(); const pwd = document.getElementById('new-pwd').value;
} const errEl = document.getElementById('add-err');
errEl.style.display = 'none';
document.getElementById('create-user-form').addEventListener('submit', async (e) => { if (!uname) { errEl.textContent = 'Username required'; errEl.style.display = 'flex'; return; }
e.preventDefault(); if (pwd.length < 8) { errEl.textContent = 'Password must be at least 8 characters'; errEl.style.display = 'flex'; return; }
const username = document.getElementById('new-username').value;
const password = document.getElementById('new-password').value;
const role = document.getElementById('new-role').value;
try { try {
const response = await fetch('/admin/api/users', { const r = await fetch('/admin/api/users', {
method: 'POST', method: 'POST', headers: {'Content-Type':'application/json'},
headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({username: uname, password: pwd, role: document.getElementById('new-role').value})
body: JSON.stringify({ username, password, role })
});
if (response.ok) {
hideCreateUserModal();
location.reload();
} else {
const error = await response.json();
alert('Failed to create user: ' + (error.detail || 'Unknown error'));
}
} catch (error) {
alert('Error: ' + error.message);
}
});
async function deleteUser(userId, username) {
if (!confirm(`Are you sure you want to delete user "${username}"?`)) {
return;
}
try {
const response = await fetch(`/admin/api/users/${userId}`, {
method: 'DELETE'
}); });
if (r.ok) { location.reload(); }
else { const e = await r.json(); errEl.textContent = e.detail || 'Failed'; errEl.style.display = 'flex'; }
} catch (e) { errEl.textContent = e.message; errEl.style.display = 'flex'; }
}
if (response.ok) { async function delUser(id, name) {
location.reload(); if (!confirm('Delete user "' + name + '"?')) return;
} else { const r = await fetch('/admin/api/users/'+id, {method:'DELETE'});
const error = await response.json(); if (r.ok) location.reload();
alert('Failed to delete user: ' + (error.detail || 'Unknown error')); else { const e = await r.json(); alert(e.detail || 'Failed'); }
}
} catch (error) {
alert('Error: ' + error.message);
}
} }
</script> </script>
{% endblock %} {% endblock %}
...@@ -91,6 +91,16 @@ app.include_router(text_router) ...@@ -91,6 +91,16 @@ app.include_router(text_router)
app.include_router(admin_router) app.include_router(admin_router)
@app.exception_handler(401)
async def unauthorized_redirect(request: Request, exc: HTTPException):
"""Redirect browser clients to login page on 401; return JSON for API clients."""
accept = request.headers.get("accept", "")
if "text/html" in accept:
from fastapi.responses import RedirectResponse
return RedirectResponse(url="/login", status_code=302)
return JSONResponse(status_code=401, content={"detail": exc.detail})
@app.get("/v1/models", response_model=ModelList) @app.get("/v1/models", response_model=ModelList)
async def list_models(): async def list_models():
"""List available models.""" """List available models."""
......
...@@ -476,7 +476,7 @@ async def _generate_with_sdcpp(sd_model, request, global_args, http_request=None ...@@ -476,7 +476,7 @@ async def _generate_with_sdcpp(sd_model, request, global_args, http_request=None
} }
def _load_sdcpp_model(model_path: str, global_args): def _load_sdcpp_model(model_path: str, global_args, model_config: dict = None):
""" """
Try to load a model using stable-diffusion-cpp-python. Try to load a model using stable-diffusion-cpp-python.
...@@ -492,6 +492,10 @@ def _load_sdcpp_model(model_path: str, global_args): ...@@ -492,6 +492,10 @@ def _load_sdcpp_model(model_path: str, global_args):
# Build sd.cpp constructor args from config # Build sd.cpp constructor args from config
kwargs = { kwargs = {
'model_path': model_path, 'model_path': model_path,
'offload_params_to_cpu': False, # Use GPU by default
'keep_clip_on_cpu': False,
'keep_control_net_on_cpu': False,
'keep_vae_on_cpu': False,
} }
# Add optional paths from CLI args # Add optional paths from CLI args
...@@ -501,16 +505,27 @@ def _load_sdcpp_model(model_path: str, global_args): ...@@ -501,16 +505,27 @@ def _load_sdcpp_model(model_path: str, global_args):
if hasattr(global_args, 'llm_path') and global_args.llm_path: if hasattr(global_args, 'llm_path') and global_args.llm_path:
kwargs['lora_model_dir'] = global_args.llm_path kwargs['lora_model_dir'] = global_args.llm_path
# --no-ram mode: maximize GPU offloading for sd.cpp # If backend is explicitly cpu, offload to CPU
backend = (model_config or {}).get('backend', 'auto') if model_config else 'auto'
if backend == 'cpu':
kwargs['offload_params_to_cpu'] = True
kwargs['keep_clip_on_cpu'] = True
kwargs['keep_vae_on_cpu'] = True
if no_ram: if no_ram:
# stable-diffusion-cpp-python supports n_threads and gpu-related params
# Force full GPU offload by keeping all operations on GPU
kwargs['keep_clip_on_cpu'] = False # Don't offload CLIP to CPU
kwargs['keep_control_net_cpu'] = False # Don't offload ControlNet to CPU
kwargs['keep_vae_on_cpu'] = False # Don't offload VAE to CPU
print("--no-ram mode: sd.cpp maximizing GPU usage (no CPU offload for CLIP/VAE/ControlNet)") print("--no-ram mode: sd.cpp maximizing GPU usage (no CPU offload for CLIP/VAE/ControlNet)")
try:
sd_model = StableDiffusion(**kwargs)
except Exception as e:
if 'cpu' not in str(backend) and ('memory' in str(e).lower() or 'cuda' in str(e).lower() or 'out of' in str(e).lower()):
print(f"GPU load failed ({e}), retrying with CPU offload...")
kwargs['offload_params_to_cpu'] = True
kwargs['keep_clip_on_cpu'] = True
kwargs['keep_vae_on_cpu'] = True
sd_model = StableDiffusion(**kwargs) sd_model = StableDiffusion(**kwargs)
else:
raise
return sd_model return sd_model
...@@ -665,7 +680,8 @@ async def create_image_generation(request: ImageGenerationRequest, http_request: ...@@ -665,7 +680,8 @@ async def create_image_generation(request: ImageGenerationRequest, http_request:
# Only use sd.cpp if we have a local file path # Only use sd.cpp if we have a local file path
if resolved_path and os.path.isfile(resolved_path): if resolved_path and os.path.isfile(resolved_path):
sd_model = _load_sdcpp_model(resolved_path, global_args) cfg = multi_model_manager.config.get(model_key) or multi_model_manager.config.get(model_name) or {}
sd_model = _load_sdcpp_model(resolved_path, global_args, model_config=cfg)
if sd_model is not None: if sd_model is not None:
# Cache the loaded model in the manager # Cache the loaded model in the manager
......
...@@ -123,12 +123,13 @@ def setup_default_config(config_dir: Path): ...@@ -123,12 +123,13 @@ def setup_default_config(config_dir: Path):
# Default auth.json with admin / admin # Default auth.json with admin / admin
from pathlib import Path from pathlib import Path
import secrets import secrets
try:
from argon2 import PasswordHasher from argon2 import PasswordHasher
if hasattr(argon2, 'PasswordHasher'): ph = PasswordHasher()
ph = argon2.PasswordHasher()
default_admin_hash = ph.hash("admin") default_admin_hash = ph.hash("admin")
else: except ImportError:
default_admin_hash = "argon2id$v=19$m=65536,t=3,p=4$...admin_hash_placeholder" from codai.admin.auth import hash_password
default_admin_hash = hash_password("admin")
default_auth = { default_auth = {
"users": [{ "users": [{
......
...@@ -29,12 +29,54 @@ class BackendConfig: ...@@ -29,12 +29,54 @@ class BackendConfig:
class ModelsConfig: class ModelsConfig:
"""Models configuration.""" """Models configuration."""
default_load_mode: str = "ondemand" default_load_mode: str = "ondemand"
hf_cache_dir: Optional[str] = None
gguf_cache_dir: Optional[str] = None
@dataclass @dataclass
class OffloadConfig: class OffloadConfig:
"""Offload configuration.""" """Offload configuration."""
directory: str = "./offload" directory: str = "./offload"
strategy: str = "auto"
max_gpu_percent: Optional[float] = None
no_ram: bool = False
load_in_4bit: bool = False
load_in_8bit: bool = False
manual_ram_gb: Optional[float] = None
flash_attention: bool = False
@dataclass
class VulkanConfig:
"""Vulkan backend configuration."""
n_gpu_layers: int = -1
n_ctx: int = 2048
device_id: int = 0
single_gpu: bool = False
@dataclass
class ImageConfig:
"""Image generation configuration."""
llm_path: Optional[str] = None
vae_path: Optional[str] = None
sample_method: str = "res_multistep"
steps: int = 4
width: int = 512
height: int = 512
cfg_scale: float = 1.0
precision: str = "f32"
cpu_offload: bool = False
seed: Optional[int] = None
vae_tiling: bool = False
clip_on_cpu: bool = False
@dataclass
class WhisperConfig:
"""Whisper ASR configuration."""
server_path: Optional[str] = None
server_port: int = 8744
@dataclass @dataclass
...@@ -45,6 +87,9 @@ class Config: ...@@ -45,6 +87,9 @@ class Config:
backend: BackendConfig = field(default_factory=BackendConfig) backend: BackendConfig = field(default_factory=BackendConfig)
models: ModelsConfig = field(default_factory=ModelsConfig) models: ModelsConfig = field(default_factory=ModelsConfig)
offload: OffloadConfig = field(default_factory=OffloadConfig) offload: OffloadConfig = field(default_factory=OffloadConfig)
vulkan: VulkanConfig = field(default_factory=VulkanConfig)
image: ImageConfig = field(default_factory=ImageConfig)
whisper: WhisperConfig = field(default_factory=WhisperConfig)
system_prompt: Optional[str] = None system_prompt: Optional[str] = None
tools_closer_prompt: bool = False tools_closer_prompt: bool = False
grammar_guided: bool = False grammar_guided: bool = False
...@@ -140,7 +185,8 @@ class ConfigManager: ...@@ -140,7 +185,8 @@ class ConfigManager:
ph = PasswordHasher() ph = PasswordHasher()
default_admin_hash = ph.hash("admin") default_admin_hash = ph.hash("admin")
except ImportError: except ImportError:
default_admin_hash = "argon2id$v=19$m=65536,t=3,p=4$...admin_hash_placeholder" from codai.admin.auth import hash_password
default_admin_hash = hash_password("admin")
default_auth = { default_auth = {
"users": [{ "users": [{
...@@ -182,6 +228,9 @@ class ConfigManager: ...@@ -182,6 +228,9 @@ class ConfigManager:
backend=BackendConfig(**config_data.get("backend", {})), backend=BackendConfig(**config_data.get("backend", {})),
models=ModelsConfig(**config_data.get("models", {})), models=ModelsConfig(**config_data.get("models", {})),
offload=OffloadConfig(**config_data.get("offload", {})), offload=OffloadConfig(**config_data.get("offload", {})),
vulkan=VulkanConfig(**config_data.get("vulkan", {})),
image=ImageConfig(**config_data.get("image", {})),
whisper=WhisperConfig(**config_data.get("whisper", {})),
system_prompt=config_data.get("system_prompt"), system_prompt=config_data.get("system_prompt"),
tools_closer_prompt=config_data.get("tools_closer_prompt", False), tools_closer_prompt=config_data.get("tools_closer_prompt", False),
grammar_guided=config_data.get("grammar_guided", False), grammar_guided=config_data.get("grammar_guided", False),
...@@ -242,10 +291,43 @@ class ConfigManager: ...@@ -242,10 +291,43 @@ class ConfigManager:
"tts_backend": self.config.backend.tts_backend "tts_backend": self.config.backend.tts_backend
}, },
"models": { "models": {
"default_load_mode": self.config.models.default_load_mode "default_load_mode": self.config.models.default_load_mode,
"hf_cache_dir": self.config.models.hf_cache_dir,
"gguf_cache_dir": self.config.models.gguf_cache_dir,
}, },
"offload": { "offload": {
"directory": self.config.offload.directory "directory": self.config.offload.directory,
"strategy": self.config.offload.strategy,
"max_gpu_percent": self.config.offload.max_gpu_percent,
"no_ram": self.config.offload.no_ram,
"load_in_4bit": self.config.offload.load_in_4bit,
"load_in_8bit": self.config.offload.load_in_8bit,
"manual_ram_gb": self.config.offload.manual_ram_gb,
"flash_attention": self.config.offload.flash_attention
},
"vulkan": {
"n_gpu_layers": self.config.vulkan.n_gpu_layers,
"n_ctx": self.config.vulkan.n_ctx,
"device_id": self.config.vulkan.device_id,
"single_gpu": self.config.vulkan.single_gpu
},
"image": {
"llm_path": self.config.image.llm_path,
"vae_path": self.config.image.vae_path,
"sample_method": self.config.image.sample_method,
"steps": self.config.image.steps,
"width": self.config.image.width,
"height": self.config.image.height,
"cfg_scale": self.config.image.cfg_scale,
"precision": self.config.image.precision,
"cpu_offload": self.config.image.cpu_offload,
"seed": self.config.image.seed,
"vae_tiling": self.config.image.vae_tiling,
"clip_on_cpu": self.config.image.clip_on_cpu
},
"whisper": {
"server_path": self.config.whisper.server_path,
"server_port": self.config.whisper.server_port
}, },
"system_prompt": self.config.system_prompt, "system_prompt": self.config.system_prompt,
"tools_closer_prompt": self.config.tools_closer_prompt, "tools_closer_prompt": self.config.tools_closer_prompt,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -31,3 +31,8 @@ llama-cpp-python>=0.2.0 ...@@ -31,3 +31,8 @@ llama-cpp-python>=0.2.0
# Requires specific CUDA versions and may need manual installation # Requires specific CUDA versions and may need manual installation
# Install with: pip install flash-attn --no-build-isolation # Install with: pip install flash-attn --no-build-isolation
# flash-attn>=2.5.0 # flash-attn>=2.5.0
# Optional: fast path for linear attention models (RWKV, Mamba, etc.)
causal-conv1d
# flash-linear-attention requires CUDA and must be installed from source:
# pip install git+https://github.com/fla-org/flash-linear-attention --no-build-isolation
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment