Fix Claude CLI streaming: pass-through SSE strings, handle assistant/tool_use...

Fix Claude CLI streaming: pass-through SSE strings, handle assistant/tool_use events, non-streaming via --output-format json
parent 23f7362e
......@@ -3651,20 +3651,6 @@ def DatabaseManager__create_cache_tables(self, cursor, auto_increment, timestamp
)
''')
cursor.execute(f'''
CREATE TABLE IF NOT EXISTS context_dimensions (
id INTEGER PRIMARY KEY {auto_increment},
provider_id VARCHAR(255) NOT NULL,
model_name VARCHAR(255) NOT NULL,
context_size INTEGER,
condense_context INTEGER,
condense_method TEXT,
effective_context INTEGER DEFAULT 0,
last_updated TIMESTAMP DEFAULT {timestamp_default},
UNIQUE(provider_id, model_name)
)
''')
logger.info("⚠️ CACHE DATABASE: Only minimal cache tables created - NO USER TABLES")
......@@ -4216,6 +4202,48 @@ def DatabaseManager__run_config_migrations(self, cursor, auto_increment, timesta
except Exception as e:
logger.warning(f"Migration check for user_notifications table: {e}")
# Migration: Create context_dimensions table if missing
try:
if self.db_type == 'sqlite':
cursor.execute("PRAGMA table_info(context_dimensions)")
if not cursor.fetchall():
cursor.execute(f'''
CREATE TABLE context_dimensions (
id INTEGER PRIMARY KEY {auto_increment},
provider_id VARCHAR(255) NOT NULL,
model_name VARCHAR(255) NOT NULL,
context_size INTEGER,
condense_context INTEGER,
condense_method TEXT,
effective_context INTEGER DEFAULT 0,
last_updated TIMESTAMP DEFAULT {timestamp_default},
UNIQUE(provider_id, model_name)
)
''')
logger.info("✅ Migration: Created context_dimensions table")
else:
cursor.execute("""
SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = 'context_dimensions'
""")
if not cursor.fetchone():
cursor.execute(f'''
CREATE TABLE context_dimensions (
id INTEGER PRIMARY KEY {auto_increment},
provider_id VARCHAR(255) NOT NULL,
model_name VARCHAR(255) NOT NULL,
context_size INTEGER,
condense_context INTEGER,
condense_method TEXT,
effective_context INTEGER DEFAULT 0,
last_updated TIMESTAMP DEFAULT {timestamp_default},
UNIQUE(provider_id, model_name)
)
''')
logger.info("✅ Migration: Created context_dimensions table")
except Exception as e:
logger.warning(f"Migration check for context_dimensions table: {e}")
logger.info("✅ All database migrations completed")
# Patch the methods
......
......@@ -595,7 +595,7 @@ class RequestHandler:
}], model_name)
else:
# Fallback to estimation if no content
max_tokens = request_data.get('max_tokens', 0)
max_tokens = request_data.get('max_tokens') or 0
if max_tokens > 0:
completion_tokens = min(max_tokens, estimated_prompt_tokens * 2)
else:
......@@ -1182,10 +1182,13 @@ class RequestHandler:
logger.debug(f"Async chunk type: {type(chunk)}")
logger.debug(f"Async chunk: {chunk}")
# For async generators, chunks might be bytes (SSE format)
# For async generators, chunks might be bytes or pre-formatted SSE strings
if isinstance(chunk, bytes):
logger.debug(f"Yielding raw bytes chunk: {len(chunk)} bytes")
yield chunk
elif isinstance(chunk, str):
# Already SSE-formatted (e.g. "data: {...}\n\n") — pass through directly
yield chunk.encode('utf-8')
else:
# Fallback: treat as dict and serialize
chunk_dict = chunk.model_dump() if hasattr(chunk, 'model_dump') else chunk
......@@ -2967,12 +2970,12 @@ class RotationHandler:
estimated_prompt_tokens = count_messages_tokens(messages, model_name)
# More realistic completion estimate
max_tokens = request_data.get('max_tokens', 0)
max_tokens = request_data.get('max_tokens') or 0
if max_tokens > 0:
estimated_completion = min(max_tokens, estimated_prompt_tokens * 2)
else:
estimated_completion = max(estimated_prompt_tokens, 50)
total_tokens = estimated_prompt_tokens + estimated_completion
prompt_tokens = estimated_prompt_tokens
completion_tokens = estimated_completion
......@@ -3636,11 +3639,12 @@ class RotationHandler:
logger.debug(f"Async chunk type: {type(chunk)}")
logger.debug(f"Async chunk: {chunk}")
# For Kiro, chunks are already properly formatted SSE bytes
# Just pass them through directly
# For Kiro/Claude CLI, chunks may be pre-formatted SSE bytes or strings
if isinstance(chunk, bytes):
logger.debug(f"Yielding raw bytes chunk: {len(chunk)} bytes")
yield chunk
elif isinstance(chunk, str):
yield chunk.encode('utf-8')
else:
# Fallback: treat as dict and serialize
chunk_dict = chunk.model_dump() if hasattr(chunk, 'model_dump') else chunk
......@@ -4464,12 +4468,12 @@ class AutoselectHandler:
estimated_prompt_tokens = count_messages_tokens(messages, model_name)
# More realistic completion estimate
max_tokens = request_data.get('max_tokens', 0)
max_tokens = request_data.get('max_tokens') or 0
if max_tokens > 0:
estimated_completion = min(max_tokens, estimated_prompt_tokens * 2)
else:
estimated_completion = max(estimated_prompt_tokens, 50)
total_tokens = estimated_prompt_tokens + estimated_completion
prompt_tokens = estimated_prompt_tokens
completion_tokens = estimated_completion
......
......@@ -360,11 +360,12 @@ class ClaudeProviderHandler(BaseProviderHandler):
logger.warning(f"ClaudeCliMode: failed to load credentials: {exc}")
return None
def _messages_to_cli_prompt(self, messages: List[Dict]) -> str:
def _messages_to_cli_prompt(self, messages: List[Dict],
tools: Optional[List[Dict]] = None) -> str:
"""
Convert an OpenAI-style messages list to a flat text prompt for the
claude CLI. System messages are included as a prefix; no Anthropic
system-prompt injection is needed in CLI mode.
Convert an OpenAI-style messages list (plus optional tool definitions)
to a flat text prompt for the claude CLI sent via stdin.
System messages and tool definitions are included as a prefix.
"""
system_parts: List[str] = []
turn_parts: List[str] = []
......@@ -373,15 +374,14 @@ class ClaudeProviderHandler(BaseProviderHandler):
role = msg.get('role', '')
content = msg.get('content', '')
# Normalise content to str
if isinstance(content, list):
text_fragments = []
fragments = []
for block in content:
if isinstance(block, dict) and block.get('type') == 'text':
text_fragments.append(block.get('text', ''))
fragments.append(block.get('text', ''))
elif isinstance(block, str):
text_fragments.append(block)
content = '\n'.join(text_fragments)
fragments.append(block)
content = '\n'.join(fragments)
elif not isinstance(content, str):
content = str(content)
......@@ -392,12 +392,155 @@ class ClaudeProviderHandler(BaseProviderHandler):
elif role == 'assistant':
turn_parts.append(f'Assistant: {content}')
if tools:
tools_json = json.dumps(tools, ensure_ascii=False)
system_parts.append(
f'Available tools (respond with tool_use blocks as needed):\n{tools_json}'
)
parts: List[str] = []
if system_parts:
parts.append('[System Instructions: ' + '\n'.join(system_parts) + ']')
parts.extend(turn_parts)
return '\n\n'.join(parts)
async def _cli_discover_models(self, config_dir: str) -> List['Model']:
"""
Ask the claude CLI which models it supports using --output-format json.
Returns a list of Model objects parsed from the JSON result.
The single-object JSON output format (not stream-json) is used here
because it carries a `modelUsage` map with real contextWindow metadata,
and the `result` text lists all models Claude knows about.
"""
import re
logger = _logging.getLogger(__name__)
env = os.environ.copy()
env['CLAUDE_CONFIG_DIR'] = config_dir
env['CLAUDE_CODE_USE_KEYCHAIN'] = 'false'
prompt = (
"Which models are you compatible with? "
"Give me only a JSON list without any other comment or word "
"except for the list of the model IDs."
)
cmd = [
'claude', '-p', prompt,
'--output-format', 'json',
'--dangerously-skip-permissions',
'--no-session-persistence',
]
logger.info(
"ClaudeCliMode: model discovery subprocess\n"
f" Replicate with: CLAUDE_CONFIG_DIR={config_dir} CLAUDE_CODE_USE_KEYCHAIN=false "
+ ' '.join(cmd)
)
process = await asyncio.create_subprocess_exec(
*cmd,
env=env,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
try:
stdout_bytes, stderr_bytes = await asyncio.wait_for(
process.communicate(), timeout=60.0
)
except asyncio.TimeoutError:
logger.error("ClaudeCliMode: model discovery subprocess timed out")
process.kill()
await process.wait()
return []
if stderr_bytes:
logger.debug(
f"ClaudeCliMode: discovery stderr:\n"
f"{stderr_bytes.decode('utf-8', errors='replace')[:2000]}"
)
stdout_str = stdout_bytes.decode('utf-8', errors='replace').strip()
logger.debug(f"ClaudeCliMode: discovery raw output: {stdout_str[:1000]}")
if not stdout_str:
logger.warning("ClaudeCliMode: model discovery returned empty output")
return []
try:
data = json.loads(stdout_str)
except json.JSONDecodeError as e:
logger.warning(f"ClaudeCliMode: model discovery JSON parse error: {e}")
return []
if data.get('is_error') or data.get('subtype') != 'success':
logger.warning(
f"ClaudeCliMode: model discovery error: {data.get('result', '')[:200]}"
)
return []
# modelUsage keys → real metadata (contextWindow, maxOutputTokens)
# Note: only models actually invoked in this call appear here; haiku is
# used for internal routing so it shows up even though we didn't ask for it.
model_usage: dict = data.get('modelUsage', {})
# result text contains the JSON list we asked for, possibly wrapped in
# a markdown code fence like ```json\n[...]\n```
result_text: str = data.get('result', '')
logger.info(f"ClaudeCliMode: discovery result: {result_text!r}")
# Parse the JSON array from result (strip code fences if present)
json_match = re.search(r'\[[\s\S]*?\]', result_text)
result_ids: set = set()
if json_match:
try:
parsed = json.loads(json_match.group())
if isinstance(parsed, list):
result_ids = {m for m in parsed if isinstance(m, str) and m.startswith('claude-')}
except json.JSONDecodeError:
pass
# Fall back to regex scan of the result text if JSON parse failed
if not result_ids:
result_ids = set(re.findall(r'claude-[a-z0-9][a-z0-9.\-]*[a-z0-9]', result_text))
logger.info(f"ClaudeCliMode: model IDs from result: {sorted(result_ids)}")
logger.info(f"ClaudeCliMode: model IDs from modelUsage: {sorted(model_usage.keys())}")
# Known context window overrides — avoids a costly second prompt.
# modelUsage carries real values for models used in this call; for the
# rest we apply these known constants rather than querying Claude again.
_known_context: dict = {
'claude-opus-4-7': 1000000,
}
# Union: result_ids is the authoritative list; modelUsage adds metadata
all_ids = result_ids | set(model_usage.keys())
if not all_ids:
return []
models = []
for mid in sorted(all_ids):
usage_meta = model_usage.get(mid, {})
context_size = (
usage_meta.get('contextWindow')
or _known_context.get(mid)
or 200000
)
max_output = usage_meta.get('maxOutputTokens')
m = Model(
id=mid,
name=mid,
provider_id=self.provider_id,
context_size=context_size,
context_length=context_size,
)
if max_output:
m.max_output_tokens = max_output
models.append(m)
return models
async def _handle_cli_streaming_request(self, prompt: str, model: str, config_dir: str):
"""
Spawn a claude CLI subprocess, stream its JSON output, and yield
......@@ -412,9 +555,11 @@ class ClaudeProviderHandler(BaseProviderHandler):
env['CLAUDE_CODE_USE_KEYCHAIN'] = 'false'
cmd = [
'stdbuf', '-oL',
'claude', '-p',
'--input-format', 'stream-json',
'--output-format', 'stream-json',
'--include-partial-messages',
'--tools', '',
'--dangerously-skip-permissions',
'--no-session-persistence',
......@@ -423,7 +568,20 @@ class ClaudeProviderHandler(BaseProviderHandler):
if clean_model:
cmd += ['--model', clean_model]
logger.info(f"ClaudeCliMode: launching subprocess model={clean_model} dir={config_dir}")
stdin_payload: Dict = {
'type': 'user_message',
'content': [{'type': 'text', 'text': prompt}],
}
input_msg = json.dumps(stdin_payload) + '\n'
# Log a shell-replicable command for debugging
cmd_str = ' '.join(cmd)
logger.info(
f"ClaudeCliMode: launching subprocess model={clean_model} dir={config_dir}\n"
f" Replicate with: CLAUDE_CONFIG_DIR={config_dir} CLAUDE_CODE_USE_KEYCHAIN=false "
f"{cmd_str} <<'EOF'\n{input_msg.strip()}\nEOF"
)
process = await asyncio.create_subprocess_exec(
*cmd,
......@@ -433,11 +591,6 @@ class ClaudeProviderHandler(BaseProviderHandler):
stderr=asyncio.subprocess.PIPE,
)
# Send the prompt as a stream-json user_message then close stdin
input_msg = json.dumps({
'type': 'user_message',
'content': [{'type': 'text', 'text': prompt}],
}) + '\n'
process.stdin.write(input_msg.encode())
await process.stdin.drain()
process.stdin.close()
......@@ -446,6 +599,12 @@ class ClaudeProviderHandler(BaseProviderHandler):
created_time = int(time.time())
first_chunk = True
# State for accumulating tool_use blocks
# { block_index: {"id": ..., "name": ..., "arguments": ""} }
tool_blocks: dict = {}
tool_header_sent: set = set()
cli_prev_text_len: int = 0
try:
while True:
try:
......@@ -461,15 +620,31 @@ class ClaudeProviderHandler(BaseProviderHandler):
if not line_str:
continue
logger.debug(f"ClaudeCliMode: raw event: {line_str}")
try:
data = json.loads(line_str)
except json.JSONDecodeError:
logger.debug(f"ClaudeCliMode: non-JSON line: {line_str}")
continue
event_type = data.get('type')
if event_type == 'content_block_delta':
if event_type == 'content_block_start':
cb = data.get('content_block', {})
if cb.get('type') == 'tool_use':
idx = data.get('index', 0)
tool_blocks[idx] = {
'id': cb.get('id', f'call_{idx}'),
'name': cb.get('name', ''),
'arguments': '',
}
logger.debug(f"ClaudeCliMode: tool_use block started idx={idx} name={cb.get('name')}")
elif event_type == 'content_block_delta':
delta = data.get('delta', {})
idx = data.get('index', 0)
if delta.get('type') == 'text_delta':
text = delta.get('text', '')
if not text:
......@@ -481,19 +656,83 @@ class ClaudeProviderHandler(BaseProviderHandler):
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"content": text}, "finish_reason": None}]})}\n\n'
elif event_type in ('message_stop', 'result'):
logger.debug(f"ClaudeCliMode: received {event_type}, closing stream")
elif delta.get('type') == 'input_json_delta' and idx in tool_blocks:
partial = delta.get('partial_json', '')
tool_blocks[idx]['arguments'] += partial
# Emit streaming tool_calls delta
if idx not in tool_header_sent:
tool_header_sent.add(idx)
if first_chunk:
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"role": "assistant", "content": None, "tool_calls": [{"index": idx, "id": tool_blocks[idx]["id"], "type": "function", "function": {"name": tool_blocks[idx]["name"], "arguments": ""}}]}, "finish_reason": None}]})}\n\n'
first_chunk = False
else:
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"tool_calls": [{"index": idx, "id": tool_blocks[idx]["id"], "type": "function", "function": {"name": tool_blocks[idx]["name"], "arguments": ""}}]}, "finish_reason": None}]})}\n\n'
if partial:
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"tool_calls": [{"index": idx, "function": {"arguments": partial}}]}, "finish_reason": None}]})}\n\n'
elif event_type == 'assistant':
# Claude CLI stream-json format: partial or final assistant message
msg = data.get('message', {})
last_text = ''
for block in msg.get('content', []):
if not isinstance(block, dict):
continue
btype = block.get('type')
if btype == 'text':
last_text += block.get('text', '')
elif btype == 'tool_use':
# Tool call in assistant event — register and emit if not yet seen
tc_id = block.get('id', f'call_{len(tool_blocks)}')
if tc_id not in tool_header_sent:
tool_header_sent.add(tc_id)
idx = len(tool_blocks)
tool_blocks[idx] = {
'id': tc_id,
'name': block.get('name', ''),
'arguments': json.dumps(block.get('input', {}), ensure_ascii=False),
}
role_delta = {'role': 'assistant', 'content': None} if first_chunk else {}
first_chunk = False
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {**role_delta, "tool_calls": [{"index": idx, "id": tc_id, "type": "function", "function": {"name": tool_blocks[idx]["name"], "arguments": tool_blocks[idx]["arguments"]}}]}, "finish_reason": None}]})}\n\n'
if last_text:
# Content is cumulative; emit only new characters
new_text = last_text[cli_prev_text_len:]
cli_prev_text_len = len(last_text)
if new_text:
if first_chunk:
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}, "finish_reason": None}]})}\n\n'
first_chunk = False
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"content": new_text}, "finish_reason": None}]})}\n\n'
elif event_type == 'result':
result_text = data.get('result', '')
logger.debug(f"ClaudeCliMode: result event, is_error={data.get('is_error')}, text_len={len(result_text)}")
# Only emit via result if we haven't already streamed content via other events
if result_text and first_chunk:
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}, "finish_reason": None}]})}\n\n'
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"content": result_text}, "finish_reason": None}]})}\n\n'
first_chunk = False
break
elif event_type == 'message_stop':
logger.debug("ClaudeCliMode: received message_stop")
break
else:
logger.debug(f"ClaudeCliMode: unhandled event type={event_type}")
except Exception as exc:
logger.error(f"ClaudeCliMode: streaming error: {exc}", exc_info=True)
finally:
try:
stderr_bytes = await asyncio.wait_for(process.stderr.read(), timeout=2.0)
if stderr_bytes:
logger.error(f"ClaudeCliMode: stderr: {stderr_bytes.decode('utf-8', errors='replace')[:500]}")
decoded = stderr_bytes.decode('utf-8', errors='replace')
logger.debug(f"ClaudeCliMode: stderr:\n{decoded[:2000]}")
except Exception:
pass
finally:
try:
process.terminate()
await asyncio.wait_for(process.wait(), timeout=5.0)
......@@ -503,41 +742,87 @@ class ClaudeProviderHandler(BaseProviderHandler):
except Exception:
pass
# Final stop + DONE
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]})}\n\n'
finish_reason = 'tool_calls' if tool_blocks else 'stop'
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {}, "finish_reason": finish_reason}]})}\n\n'
yield 'data: [DONE]\n\n'
async def _handle_cli_request(self, prompt: str, model: str, config_dir: str) -> dict:
"""Non-streaming CLI request – collects the full response text."""
accumulated: List[str] = []
async def _handle_cli_request(self, prompt: str, model: str, config_dir: str,
tools: Optional[List[Dict]] = None) -> dict:
"""Non-streaming CLI request using --output-format json with prompt via stdin."""
logger = _logging.getLogger(__name__)
clean_model = model.split('/')[-1] if '/' in model else model
async for chunk_str in self._handle_cli_streaming_request(prompt, model, config_dir):
if chunk_str.startswith('data: ') and '[DONE]' not in chunk_str:
try:
data = json.loads(chunk_str[6:])
choices = data.get('choices', [])
if choices:
text = choices[0].get('delta', {}).get('content', '')
if text:
accumulated.append(text)
except json.JSONDecodeError:
pass
env = os.environ.copy()
env['CLAUDE_CONFIG_DIR'] = config_dir
env['CLAUDE_CODE_USE_KEYCHAIN'] = 'false'
cmd = [
'claude', '-p',
'--output-format', 'json',
'--dangerously-skip-permissions',
'--no-session-persistence',
]
if tools:
cmd += ['--tools', json.dumps(tools, ensure_ascii=False)]
if clean_model:
cmd += ['--model', clean_model]
logger.info(
f"ClaudeCliMode: non-streaming subprocess model={clean_model} dir={config_dir}\n"
f" Replicate with: CLAUDE_CONFIG_DIR={config_dir} CLAUDE_CODE_USE_KEYCHAIN=false "
+ ' '.join(cmd) + f" <<'EOF'\n{prompt[:200]}...\nEOF"
)
process = await asyncio.create_subprocess_exec(
*cmd,
env=env,
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
try:
stdout_bytes, stderr_bytes = await asyncio.wait_for(
process.communicate(input=prompt.encode()), timeout=120.0
)
except asyncio.TimeoutError:
logger.error("ClaudeCliMode: non-streaming subprocess timed out")
process.kill()
await process.wait()
return {
'id': f'chatcmpl-cli-{int(time.time())}',
'object': 'chat.completion',
'created': int(time.time()),
'model': f'{self.provider_id}/{clean_model}',
'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'Request timed out.'}, 'finish_reason': 'stop'}],
'usage': {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0},
}
if stderr_bytes:
logger.debug(f"ClaudeCliMode: stderr:\n{stderr_bytes.decode('utf-8', errors='replace')[:2000]}")
stdout_str = stdout_bytes.decode('utf-8', errors='replace').strip()
logger.debug(f"ClaudeCliMode: raw output: {stdout_str[:500]}")
result_text = ''
try:
data = json.loads(stdout_str)
if data.get('is_error'):
logger.warning(f"ClaudeCliMode: CLI returned error: {data.get('result', '')[:200]}")
result_text = data.get('result', '')
except json.JSONDecodeError:
result_text = stdout_str
clean_model = model.split('/')[-1] if '/' in model else model
full_text = ''.join(accumulated)
return {
'id': f'chatcmpl-cli-{int(time.time())}',
'object': 'chat.completion',
'created': int(time.time()),
'model': f'{self.provider_id}/{clean_model}',
'choices': [{
'index': 0,
'message': {'role': 'assistant', 'content': full_text},
'finish_reason': 'stop',
}],
'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': result_text}, 'finish_reason': 'stop'}],
'usage': {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0},
}
def _init_session_identifiers(self):
"""Initialize persistent session identifiers (device_id, account_uuid, session_id)."""
import uuid
......@@ -1229,14 +1514,15 @@ class ClaudeProviderHandler(BaseProviderHandler):
cli_creds = self._get_cli_credentials()
if cli_creds is not None:
logger.info(f"ClaudeProviderHandler: using CLI subprocess mode for model {model}")
prompt = self._messages_to_cli_prompt(messages)
anthropic_tools = self._convert_tools_to_anthropic(tools) if tools else None
prompt = self._messages_to_cli_prompt(messages, tools=anthropic_tools)
config_dir = await ClaudeCliSessionManager.get_config_dir(
self.user_id, self.provider_id, cli_creds
)
if stream:
return self._handle_cli_streaming_request(prompt, model, config_dir)
else:
return await self._handle_cli_request(prompt, model, config_dir)
return await self._handle_cli_request(prompt, model, config_dir, tools=anthropic_tools)
# ── Fall through to HTTP API mode ────────────────────────────────
logger.info(f"ClaudeProviderHandler: Handling request for model {model} (Direct HTTP mode)")
......@@ -2111,6 +2397,31 @@ class ClaudeProviderHandler(BaseProviderHandler):
await self.apply_rate_limit()
# [0/3] CLI subprocess model discovery
import aisbf.cli_mode as cli_mode_mod
if cli_mode_mod.CLAUDE_CLI_MODE:
cli_creds = self._get_cli_credentials()
if cli_creds is not None:
try:
logging.info("ClaudeProviderHandler: [0/3] CLI subprocess model discovery...")
config_dir = await ClaudeCliSessionManager.get_config_dir(
self.user_id, self.provider_id, cli_creds
)
cli_models = await self._cli_discover_models(config_dir)
if cli_models:
self._save_models_cache(cli_models)
logging.info(
f"ClaudeProviderHandler: ✓ CLI discovery returned {len(cli_models)} models"
)
return cli_models
logging.warning(
"ClaudeProviderHandler: CLI discovery returned no models, falling through"
)
except Exception as cli_disc_err:
logging.warning(
f"ClaudeProviderHandler: CLI model discovery failed: {cli_disc_err}"
)
try:
logging.info("ClaudeProviderHandler: [1/3] Attempting primary API endpoint...")
......
......@@ -578,6 +578,11 @@ _MUST_CHANGE_PASSWORD_WHITELIST = (
'/dashboard/settings',
'/dashboard/logout',
'/api/admin/settings/',
'/dashboard/tor/status',
'/dashboard/response-cache/stats',
'/dashboard/response-cache/clear',
'/dashboard/test-smtp',
'/dashboard/restart',
)
# --- Login rate limiter ---
......@@ -1546,10 +1551,14 @@ async def api_token_authorization_middleware(request: Request, call_next):
if request.method == "GET" and path in ["/api/models", "/api/v1/models"]:
return await call_next(request)
# If authentication is globally disabled, skip all token scope checks
if not (server_config and server_config.get('auth_enabled', False)):
return await call_next(request)
is_global_token = getattr(request.state, 'is_global_token', False)
user_id = getattr(request.state, 'user_id', None)
is_admin = getattr(request.state, 'is_admin', False)
# Debug logging
logger.info(f"API Token Auth: path={path}, is_global_token={is_global_token}, user_id={user_id}")
......@@ -5578,6 +5587,7 @@ async def dashboard_settings(request: Request):
'fullconfig_tokens': []
}
warning = request.query_params.get('warning')
return templates.TemplateResponse(
request=request,
name="dashboard/settings.html",
......@@ -5586,7 +5596,8 @@ async def dashboard_settings(request: Request):
"session": request.session,
"__version__": __version__,
"config": aisbf_config,
"os": os
"os": os,
"warning": warning,
}
)
......@@ -5599,7 +5610,6 @@ async def dashboard_settings_save(
auth_enabled: bool = Form(False),
auth_tokens: str = Form(""),
dashboard_username: str = Form(...),
dashboard_password: str = Form(""),
condensation_model_id: str = Form(...),
autoselect_model_id: str = Form(...),
database_type: str = Form("sqlite"),
......@@ -5691,8 +5701,6 @@ async def dashboard_settings_save(
aisbf_config['auth']['enabled'] = auth_enabled
aisbf_config['auth']['tokens'] = [t.strip() for t in auth_tokens.split('\n') if t.strip()]
aisbf_config['dashboard']['username'] = dashboard_username
if dashboard_password: # Only update if provided - hash the password
aisbf_config['dashboard']['password'] = _db_hash_password(dashboard_password)
aisbf_config['internal_model']['condensation_model_id'] = condensation_model_id
aisbf_config['internal_model']['autoselect_model_id'] = autoselect_model_id
......@@ -5840,12 +5848,10 @@ async def dashboard_settings_save(
aisbf_config['dashboard']['notifications']['wallet_topup'] = admin_notify_wallet_topup
aisbf_config['dashboard']['notifications']['user_deleted_account'] = admin_notify_user_deleted_account
# Handle new_admin_password from the Admin tab (distinct from dashboard_password in Dashboard tab)
if new_admin_password:
if new_admin_password == confirm_admin_password:
aisbf_config['dashboard']['password'] = _db_hash_password(new_admin_password)
request.session.pop('must_change_password', None)
# silently ignore mismatch — UI should validate
# Save config
config_path = Path.home() / '.aisbf' / 'aisbf.json'
......@@ -5853,9 +5859,9 @@ async def dashboard_settings_save(
with open(config_path, 'w') as f:
json.dump(aisbf_config, f, indent=2)
# If a new dashboard password was submitted, clear the forced-change flag
if dashboard_password:
request.session.pop('must_change_password', None)
# Reload dashboard credentials in memory so the new username/password takes effect immediately
if server_config is not None:
server_config['dashboard_config'] = aisbf_config.get('dashboard', {})
return templates.TemplateResponse(
request=request,
......@@ -10055,10 +10061,10 @@ async def v1_chat_completions(request: Request, body: ChatCompletionRequest):
# PATH 1: Direct provider model (format: {provider}/{model})
if provider_id not in config.providers:
raise HTTPException(
status_code=404,
detail=f"User autoselect '{actual_model}' not found. Available: {list(handler.user_autoselects.keys())}"
)
raise HTTPException(
status_code=404,
detail=f"Provider '{provider_id}' not found. Available: {list(config.providers.keys())}"
)
# Validate kiro credentials before processing request
provider_config = config.get_provider(provider_id)
......
......@@ -551,10 +551,18 @@ function renderProviderDetails(key) {
${CLAUDE_CLI_MODE ? `
<div style="margin-top: 20px; padding-top: 15px; border-top: 1px solid #1e3a5f;">
<h5 style="margin: 0 0 8px 0; color: #4ade80;">Claude CLI Mode Active</h5>
<small style="color: #4ade80; display: block; margin-bottom: 14px;">
<small style="color: #4ade80; display: block; margin-bottom: 8px;">
The claude CLI was detected at startup. When enabled, requests are piped
through the local claude binary instead of the HTTP API.
</small>
<div style="background: #3a2a00; border: 1px solid #f59e0b; border-radius: 6px; padding: 10px 14px; margin-bottom: 14px;">
<span style="color: #f59e0b; font-weight: 600;">⚠ Experimental:</span>
<span style="color: #fcd34d; font-size: 0.85em;">
CLI mode is experimental. Tool calling (function calling) does not yet work reliably —
the CLI subprocess may refuse or mishandle tool definitions. Use with simple
(non-tool) requests only until this is resolved.
</span>
</div>
<div class="form-group" style="margin-bottom: 16px;">
<label style="display: flex; align-items: center; gap: 10px; cursor: pointer;">
......
......@@ -41,10 +41,20 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
<div class="alert alert-error">{{ error }}</div>
{% endif %}
{% if warning == 'default_password' %}
<div style="background:#b71c1c; border:2px solid #e53935; color:#fff; padding:16px 20px; border-radius:6px; margin-bottom:20px; display:flex; align-items:flex-start; gap:12px;">
<span style="font-size:1.4em; line-height:1;">&#9888;</span>
<div>
<strong>Security Warning: Default password in use.</strong><br>
You are logged in with the factory-default <code style="background:rgba(0,0,0,.3);padding:1px 5px;border-radius:3px;">admin / admin</code> credentials.
Please change your password immediately using the <strong>Admin</strong> tab below before using AISBF.
</div>
</div>
{% endif %}
<div class="settings-tabs">
<div class="settings-tab active" onclick="switchTab('server')"><i class="fas fa-server"></i> Server</div>
<div class="settings-tab" onclick="switchTab('auth')"><i class="fas fa-key"></i> Auth &amp; MCP</div>
<div class="settings-tab" onclick="switchTab('dashboard')"><i class="fas fa-tachometer-alt"></i> Dashboard</div>
<div class="settings-tab" onclick="switchTab('models')"><i class="fas fa-brain"></i> Models</div>
<div class="settings-tab" onclick="switchTab('database')"><i class="fas fa-database"></i> Database</div>
<div class="settings-tab" onclick="switchTab('cache')"><i class="fas fa-bolt"></i> Cache</div>
......@@ -139,20 +149,6 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
</div>
</div><!-- /tab-auth -->
<div class="settings-section" id="tab-dashboard">
<div class="section-title"><i class="fas fa-tachometer-alt"></i> Dashboard</div>
<div class="form-group">
<label for="dashboard_username">Dashboard Username</label>
<input type="text" id="dashboard_username" name="dashboard_username" value="{{ config.dashboard.username }}" required>
</div>
<div class="form-group">
<label for="dashboard_password">Dashboard Password</label>
<input type="password" id="dashboard_password" name="dashboard_password" placeholder="Leave blank to keep current">
</div>
</div><!-- /tab-dashboard -->
<div class="settings-section" id="tab-models">
<div class="section-title"><i class="fas fa-brain"></i> Internal Models</div>
......@@ -879,15 +875,18 @@ brew services restart tor # macOS</code></pre>
<div class="section-title"><i class="fas fa-shield-alt"></i> Admin Account &amp; Notifications</div>
<div class="form-group">
<label for="new_admin_password">New Admin Password</label>
<label for="dashboard_username">Admin Username</label>
<input type="text" id="dashboard_username" name="dashboard_username" value="{{ config.dashboard.username }}" required>
</div>
<div class="form-group">
<label for="new_admin_password">New Password</label>
<input type="password" id="new_admin_password" name="new_admin_password" placeholder="Leave blank to keep current password">
<small style="color: #666; display: block; margin-top: 5px;">Enter a new password to change the admin dashboard password</small>
</div>
<div class="form-group">
<label for="confirm_admin_password">Confirm New Admin Password</label>
<label for="confirm_admin_password">Confirm New Password</label>
<input type="password" id="confirm_admin_password" name="confirm_admin_password" placeholder="Confirm new password">
<small style="color: #666; display: block; margin-top: 5px;">Re-enter the new password to confirm</small>
</div>
<div class="form-group">
......@@ -1194,13 +1193,12 @@ async function checkTorStatus() {
// Check TOR status on page load
document.addEventListener('DOMContentLoaded', function() {
checkTorStatus();
// Refresh status every 30 seconds
setInterval(checkTorStatus, 30000);
// Load cache statistics
refreshCacheStats();
// Refresh cache stats every 10 seconds
setInterval(refreshCacheStats, 10000);
{% if warning == 'default_password' %}
switchTab('admin');
{% endif %}
});
async function refreshCacheStats() {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment