Fix Claude CLI streaming: pass-through SSE strings, handle assistant/tool_use...

Fix Claude CLI streaming: pass-through SSE strings, handle assistant/tool_use events, non-streaming via --output-format json
parent 23f7362e
...@@ -3651,20 +3651,6 @@ def DatabaseManager__create_cache_tables(self, cursor, auto_increment, timestamp ...@@ -3651,20 +3651,6 @@ def DatabaseManager__create_cache_tables(self, cursor, auto_increment, timestamp
) )
''') ''')
cursor.execute(f'''
CREATE TABLE IF NOT EXISTS context_dimensions (
id INTEGER PRIMARY KEY {auto_increment},
provider_id VARCHAR(255) NOT NULL,
model_name VARCHAR(255) NOT NULL,
context_size INTEGER,
condense_context INTEGER,
condense_method TEXT,
effective_context INTEGER DEFAULT 0,
last_updated TIMESTAMP DEFAULT {timestamp_default},
UNIQUE(provider_id, model_name)
)
''')
logger.info("⚠️ CACHE DATABASE: Only minimal cache tables created - NO USER TABLES") logger.info("⚠️ CACHE DATABASE: Only minimal cache tables created - NO USER TABLES")
...@@ -4216,6 +4202,48 @@ def DatabaseManager__run_config_migrations(self, cursor, auto_increment, timesta ...@@ -4216,6 +4202,48 @@ def DatabaseManager__run_config_migrations(self, cursor, auto_increment, timesta
except Exception as e: except Exception as e:
logger.warning(f"Migration check for user_notifications table: {e}") logger.warning(f"Migration check for user_notifications table: {e}")
# Migration: Create context_dimensions table if missing
try:
if self.db_type == 'sqlite':
cursor.execute("PRAGMA table_info(context_dimensions)")
if not cursor.fetchall():
cursor.execute(f'''
CREATE TABLE context_dimensions (
id INTEGER PRIMARY KEY {auto_increment},
provider_id VARCHAR(255) NOT NULL,
model_name VARCHAR(255) NOT NULL,
context_size INTEGER,
condense_context INTEGER,
condense_method TEXT,
effective_context INTEGER DEFAULT 0,
last_updated TIMESTAMP DEFAULT {timestamp_default},
UNIQUE(provider_id, model_name)
)
''')
logger.info("✅ Migration: Created context_dimensions table")
else:
cursor.execute("""
SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = 'context_dimensions'
""")
if not cursor.fetchone():
cursor.execute(f'''
CREATE TABLE context_dimensions (
id INTEGER PRIMARY KEY {auto_increment},
provider_id VARCHAR(255) NOT NULL,
model_name VARCHAR(255) NOT NULL,
context_size INTEGER,
condense_context INTEGER,
condense_method TEXT,
effective_context INTEGER DEFAULT 0,
last_updated TIMESTAMP DEFAULT {timestamp_default},
UNIQUE(provider_id, model_name)
)
''')
logger.info("✅ Migration: Created context_dimensions table")
except Exception as e:
logger.warning(f"Migration check for context_dimensions table: {e}")
logger.info("✅ All database migrations completed") logger.info("✅ All database migrations completed")
# Patch the methods # Patch the methods
......
...@@ -595,7 +595,7 @@ class RequestHandler: ...@@ -595,7 +595,7 @@ class RequestHandler:
}], model_name) }], model_name)
else: else:
# Fallback to estimation if no content # Fallback to estimation if no content
max_tokens = request_data.get('max_tokens', 0) max_tokens = request_data.get('max_tokens') or 0
if max_tokens > 0: if max_tokens > 0:
completion_tokens = min(max_tokens, estimated_prompt_tokens * 2) completion_tokens = min(max_tokens, estimated_prompt_tokens * 2)
else: else:
...@@ -1182,10 +1182,13 @@ class RequestHandler: ...@@ -1182,10 +1182,13 @@ class RequestHandler:
logger.debug(f"Async chunk type: {type(chunk)}") logger.debug(f"Async chunk type: {type(chunk)}")
logger.debug(f"Async chunk: {chunk}") logger.debug(f"Async chunk: {chunk}")
# For async generators, chunks might be bytes (SSE format) # For async generators, chunks might be bytes or pre-formatted SSE strings
if isinstance(chunk, bytes): if isinstance(chunk, bytes):
logger.debug(f"Yielding raw bytes chunk: {len(chunk)} bytes") logger.debug(f"Yielding raw bytes chunk: {len(chunk)} bytes")
yield chunk yield chunk
elif isinstance(chunk, str):
# Already SSE-formatted (e.g. "data: {...}\n\n") — pass through directly
yield chunk.encode('utf-8')
else: else:
# Fallback: treat as dict and serialize # Fallback: treat as dict and serialize
chunk_dict = chunk.model_dump() if hasattr(chunk, 'model_dump') else chunk chunk_dict = chunk.model_dump() if hasattr(chunk, 'model_dump') else chunk
...@@ -2967,7 +2970,7 @@ class RotationHandler: ...@@ -2967,7 +2970,7 @@ class RotationHandler:
estimated_prompt_tokens = count_messages_tokens(messages, model_name) estimated_prompt_tokens = count_messages_tokens(messages, model_name)
# More realistic completion estimate # More realistic completion estimate
max_tokens = request_data.get('max_tokens', 0) max_tokens = request_data.get('max_tokens') or 0
if max_tokens > 0: if max_tokens > 0:
estimated_completion = min(max_tokens, estimated_prompt_tokens * 2) estimated_completion = min(max_tokens, estimated_prompt_tokens * 2)
else: else:
...@@ -3636,11 +3639,12 @@ class RotationHandler: ...@@ -3636,11 +3639,12 @@ class RotationHandler:
logger.debug(f"Async chunk type: {type(chunk)}") logger.debug(f"Async chunk type: {type(chunk)}")
logger.debug(f"Async chunk: {chunk}") logger.debug(f"Async chunk: {chunk}")
# For Kiro, chunks are already properly formatted SSE bytes # For Kiro/Claude CLI, chunks may be pre-formatted SSE bytes or strings
# Just pass them through directly
if isinstance(chunk, bytes): if isinstance(chunk, bytes):
logger.debug(f"Yielding raw bytes chunk: {len(chunk)} bytes") logger.debug(f"Yielding raw bytes chunk: {len(chunk)} bytes")
yield chunk yield chunk
elif isinstance(chunk, str):
yield chunk.encode('utf-8')
else: else:
# Fallback: treat as dict and serialize # Fallback: treat as dict and serialize
chunk_dict = chunk.model_dump() if hasattr(chunk, 'model_dump') else chunk chunk_dict = chunk.model_dump() if hasattr(chunk, 'model_dump') else chunk
...@@ -4464,7 +4468,7 @@ class AutoselectHandler: ...@@ -4464,7 +4468,7 @@ class AutoselectHandler:
estimated_prompt_tokens = count_messages_tokens(messages, model_name) estimated_prompt_tokens = count_messages_tokens(messages, model_name)
# More realistic completion estimate # More realistic completion estimate
max_tokens = request_data.get('max_tokens', 0) max_tokens = request_data.get('max_tokens') or 0
if max_tokens > 0: if max_tokens > 0:
estimated_completion = min(max_tokens, estimated_prompt_tokens * 2) estimated_completion = min(max_tokens, estimated_prompt_tokens * 2)
else: else:
......
...@@ -360,11 +360,12 @@ class ClaudeProviderHandler(BaseProviderHandler): ...@@ -360,11 +360,12 @@ class ClaudeProviderHandler(BaseProviderHandler):
logger.warning(f"ClaudeCliMode: failed to load credentials: {exc}") logger.warning(f"ClaudeCliMode: failed to load credentials: {exc}")
return None return None
def _messages_to_cli_prompt(self, messages: List[Dict]) -> str: def _messages_to_cli_prompt(self, messages: List[Dict],
tools: Optional[List[Dict]] = None) -> str:
""" """
Convert an OpenAI-style messages list to a flat text prompt for the Convert an OpenAI-style messages list (plus optional tool definitions)
claude CLI. System messages are included as a prefix; no Anthropic to a flat text prompt for the claude CLI sent via stdin.
system-prompt injection is needed in CLI mode. System messages and tool definitions are included as a prefix.
""" """
system_parts: List[str] = [] system_parts: List[str] = []
turn_parts: List[str] = [] turn_parts: List[str] = []
...@@ -373,15 +374,14 @@ class ClaudeProviderHandler(BaseProviderHandler): ...@@ -373,15 +374,14 @@ class ClaudeProviderHandler(BaseProviderHandler):
role = msg.get('role', '') role = msg.get('role', '')
content = msg.get('content', '') content = msg.get('content', '')
# Normalise content to str
if isinstance(content, list): if isinstance(content, list):
text_fragments = [] fragments = []
for block in content: for block in content:
if isinstance(block, dict) and block.get('type') == 'text': if isinstance(block, dict) and block.get('type') == 'text':
text_fragments.append(block.get('text', '')) fragments.append(block.get('text', ''))
elif isinstance(block, str): elif isinstance(block, str):
text_fragments.append(block) fragments.append(block)
content = '\n'.join(text_fragments) content = '\n'.join(fragments)
elif not isinstance(content, str): elif not isinstance(content, str):
content = str(content) content = str(content)
...@@ -392,12 +392,155 @@ class ClaudeProviderHandler(BaseProviderHandler): ...@@ -392,12 +392,155 @@ class ClaudeProviderHandler(BaseProviderHandler):
elif role == 'assistant': elif role == 'assistant':
turn_parts.append(f'Assistant: {content}') turn_parts.append(f'Assistant: {content}')
if tools:
tools_json = json.dumps(tools, ensure_ascii=False)
system_parts.append(
f'Available tools (respond with tool_use blocks as needed):\n{tools_json}'
)
parts: List[str] = [] parts: List[str] = []
if system_parts: if system_parts:
parts.append('[System Instructions: ' + '\n'.join(system_parts) + ']') parts.append('[System Instructions: ' + '\n'.join(system_parts) + ']')
parts.extend(turn_parts) parts.extend(turn_parts)
return '\n\n'.join(parts) return '\n\n'.join(parts)
async def _cli_discover_models(self, config_dir: str) -> List['Model']:
"""
Ask the claude CLI which models it supports using --output-format json.
Returns a list of Model objects parsed from the JSON result.
The single-object JSON output format (not stream-json) is used here
because it carries a `modelUsage` map with real contextWindow metadata,
and the `result` text lists all models Claude knows about.
"""
import re
logger = _logging.getLogger(__name__)
env = os.environ.copy()
env['CLAUDE_CONFIG_DIR'] = config_dir
env['CLAUDE_CODE_USE_KEYCHAIN'] = 'false'
prompt = (
"Which models are you compatible with? "
"Give me only a JSON list without any other comment or word "
"except for the list of the model IDs."
)
cmd = [
'claude', '-p', prompt,
'--output-format', 'json',
'--dangerously-skip-permissions',
'--no-session-persistence',
]
logger.info(
"ClaudeCliMode: model discovery subprocess\n"
f" Replicate with: CLAUDE_CONFIG_DIR={config_dir} CLAUDE_CODE_USE_KEYCHAIN=false "
+ ' '.join(cmd)
)
process = await asyncio.create_subprocess_exec(
*cmd,
env=env,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
try:
stdout_bytes, stderr_bytes = await asyncio.wait_for(
process.communicate(), timeout=60.0
)
except asyncio.TimeoutError:
logger.error("ClaudeCliMode: model discovery subprocess timed out")
process.kill()
await process.wait()
return []
if stderr_bytes:
logger.debug(
f"ClaudeCliMode: discovery stderr:\n"
f"{stderr_bytes.decode('utf-8', errors='replace')[:2000]}"
)
stdout_str = stdout_bytes.decode('utf-8', errors='replace').strip()
logger.debug(f"ClaudeCliMode: discovery raw output: {stdout_str[:1000]}")
if not stdout_str:
logger.warning("ClaudeCliMode: model discovery returned empty output")
return []
try:
data = json.loads(stdout_str)
except json.JSONDecodeError as e:
logger.warning(f"ClaudeCliMode: model discovery JSON parse error: {e}")
return []
if data.get('is_error') or data.get('subtype') != 'success':
logger.warning(
f"ClaudeCliMode: model discovery error: {data.get('result', '')[:200]}"
)
return []
# modelUsage keys → real metadata (contextWindow, maxOutputTokens)
# Note: only models actually invoked in this call appear here; haiku is
# used for internal routing so it shows up even though we didn't ask for it.
model_usage: dict = data.get('modelUsage', {})
# result text contains the JSON list we asked for, possibly wrapped in
# a markdown code fence like ```json\n[...]\n```
result_text: str = data.get('result', '')
logger.info(f"ClaudeCliMode: discovery result: {result_text!r}")
# Parse the JSON array from result (strip code fences if present)
json_match = re.search(r'\[[\s\S]*?\]', result_text)
result_ids: set = set()
if json_match:
try:
parsed = json.loads(json_match.group())
if isinstance(parsed, list):
result_ids = {m for m in parsed if isinstance(m, str) and m.startswith('claude-')}
except json.JSONDecodeError:
pass
# Fall back to regex scan of the result text if JSON parse failed
if not result_ids:
result_ids = set(re.findall(r'claude-[a-z0-9][a-z0-9.\-]*[a-z0-9]', result_text))
logger.info(f"ClaudeCliMode: model IDs from result: {sorted(result_ids)}")
logger.info(f"ClaudeCliMode: model IDs from modelUsage: {sorted(model_usage.keys())}")
# Known context window overrides — avoids a costly second prompt.
# modelUsage carries real values for models used in this call; for the
# rest we apply these known constants rather than querying Claude again.
_known_context: dict = {
'claude-opus-4-7': 1000000,
}
# Union: result_ids is the authoritative list; modelUsage adds metadata
all_ids = result_ids | set(model_usage.keys())
if not all_ids:
return []
models = []
for mid in sorted(all_ids):
usage_meta = model_usage.get(mid, {})
context_size = (
usage_meta.get('contextWindow')
or _known_context.get(mid)
or 200000
)
max_output = usage_meta.get('maxOutputTokens')
m = Model(
id=mid,
name=mid,
provider_id=self.provider_id,
context_size=context_size,
context_length=context_size,
)
if max_output:
m.max_output_tokens = max_output
models.append(m)
return models
async def _handle_cli_streaming_request(self, prompt: str, model: str, config_dir: str): async def _handle_cli_streaming_request(self, prompt: str, model: str, config_dir: str):
""" """
Spawn a claude CLI subprocess, stream its JSON output, and yield Spawn a claude CLI subprocess, stream its JSON output, and yield
...@@ -412,9 +555,11 @@ class ClaudeProviderHandler(BaseProviderHandler): ...@@ -412,9 +555,11 @@ class ClaudeProviderHandler(BaseProviderHandler):
env['CLAUDE_CODE_USE_KEYCHAIN'] = 'false' env['CLAUDE_CODE_USE_KEYCHAIN'] = 'false'
cmd = [ cmd = [
'stdbuf', '-oL',
'claude', '-p', 'claude', '-p',
'--input-format', 'stream-json', '--input-format', 'stream-json',
'--output-format', 'stream-json', '--output-format', 'stream-json',
'--include-partial-messages',
'--tools', '', '--tools', '',
'--dangerously-skip-permissions', '--dangerously-skip-permissions',
'--no-session-persistence', '--no-session-persistence',
...@@ -423,7 +568,20 @@ class ClaudeProviderHandler(BaseProviderHandler): ...@@ -423,7 +568,20 @@ class ClaudeProviderHandler(BaseProviderHandler):
if clean_model: if clean_model:
cmd += ['--model', clean_model] cmd += ['--model', clean_model]
logger.info(f"ClaudeCliMode: launching subprocess model={clean_model} dir={config_dir}") stdin_payload: Dict = {
'type': 'user_message',
'content': [{'type': 'text', 'text': prompt}],
}
input_msg = json.dumps(stdin_payload) + '\n'
# Log a shell-replicable command for debugging
cmd_str = ' '.join(cmd)
logger.info(
f"ClaudeCliMode: launching subprocess model={clean_model} dir={config_dir}\n"
f" Replicate with: CLAUDE_CONFIG_DIR={config_dir} CLAUDE_CODE_USE_KEYCHAIN=false "
f"{cmd_str} <<'EOF'\n{input_msg.strip()}\nEOF"
)
process = await asyncio.create_subprocess_exec( process = await asyncio.create_subprocess_exec(
*cmd, *cmd,
...@@ -433,11 +591,6 @@ class ClaudeProviderHandler(BaseProviderHandler): ...@@ -433,11 +591,6 @@ class ClaudeProviderHandler(BaseProviderHandler):
stderr=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
) )
# Send the prompt as a stream-json user_message then close stdin
input_msg = json.dumps({
'type': 'user_message',
'content': [{'type': 'text', 'text': prompt}],
}) + '\n'
process.stdin.write(input_msg.encode()) process.stdin.write(input_msg.encode())
await process.stdin.drain() await process.stdin.drain()
process.stdin.close() process.stdin.close()
...@@ -446,6 +599,12 @@ class ClaudeProviderHandler(BaseProviderHandler): ...@@ -446,6 +599,12 @@ class ClaudeProviderHandler(BaseProviderHandler):
created_time = int(time.time()) created_time = int(time.time())
first_chunk = True first_chunk = True
# State for accumulating tool_use blocks
# { block_index: {"id": ..., "name": ..., "arguments": ""} }
tool_blocks: dict = {}
tool_header_sent: set = set()
cli_prev_text_len: int = 0
try: try:
while True: while True:
try: try:
...@@ -461,15 +620,31 @@ class ClaudeProviderHandler(BaseProviderHandler): ...@@ -461,15 +620,31 @@ class ClaudeProviderHandler(BaseProviderHandler):
if not line_str: if not line_str:
continue continue
logger.debug(f"ClaudeCliMode: raw event: {line_str}")
try: try:
data = json.loads(line_str) data = json.loads(line_str)
except json.JSONDecodeError: except json.JSONDecodeError:
logger.debug(f"ClaudeCliMode: non-JSON line: {line_str}")
continue continue
event_type = data.get('type') event_type = data.get('type')
if event_type == 'content_block_delta': if event_type == 'content_block_start':
cb = data.get('content_block', {})
if cb.get('type') == 'tool_use':
idx = data.get('index', 0)
tool_blocks[idx] = {
'id': cb.get('id', f'call_{idx}'),
'name': cb.get('name', ''),
'arguments': '',
}
logger.debug(f"ClaudeCliMode: tool_use block started idx={idx} name={cb.get('name')}")
elif event_type == 'content_block_delta':
delta = data.get('delta', {}) delta = data.get('delta', {})
idx = data.get('index', 0)
if delta.get('type') == 'text_delta': if delta.get('type') == 'text_delta':
text = delta.get('text', '') text = delta.get('text', '')
if not text: if not text:
...@@ -481,19 +656,83 @@ class ClaudeProviderHandler(BaseProviderHandler): ...@@ -481,19 +656,83 @@ class ClaudeProviderHandler(BaseProviderHandler):
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"content": text}, "finish_reason": None}]})}\n\n' yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"content": text}, "finish_reason": None}]})}\n\n'
elif event_type in ('message_stop', 'result'): elif delta.get('type') == 'input_json_delta' and idx in tool_blocks:
logger.debug(f"ClaudeCliMode: received {event_type}, closing stream") partial = delta.get('partial_json', '')
tool_blocks[idx]['arguments'] += partial
# Emit streaming tool_calls delta
if idx not in tool_header_sent:
tool_header_sent.add(idx)
if first_chunk:
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"role": "assistant", "content": None, "tool_calls": [{"index": idx, "id": tool_blocks[idx]["id"], "type": "function", "function": {"name": tool_blocks[idx]["name"], "arguments": ""}}]}, "finish_reason": None}]})}\n\n'
first_chunk = False
else:
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"tool_calls": [{"index": idx, "id": tool_blocks[idx]["id"], "type": "function", "function": {"name": tool_blocks[idx]["name"], "arguments": ""}}]}, "finish_reason": None}]})}\n\n'
if partial:
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"tool_calls": [{"index": idx, "function": {"arguments": partial}}]}, "finish_reason": None}]})}\n\n'
elif event_type == 'assistant':
# Claude CLI stream-json format: partial or final assistant message
msg = data.get('message', {})
last_text = ''
for block in msg.get('content', []):
if not isinstance(block, dict):
continue
btype = block.get('type')
if btype == 'text':
last_text += block.get('text', '')
elif btype == 'tool_use':
# Tool call in assistant event — register and emit if not yet seen
tc_id = block.get('id', f'call_{len(tool_blocks)}')
if tc_id not in tool_header_sent:
tool_header_sent.add(tc_id)
idx = len(tool_blocks)
tool_blocks[idx] = {
'id': tc_id,
'name': block.get('name', ''),
'arguments': json.dumps(block.get('input', {}), ensure_ascii=False),
}
role_delta = {'role': 'assistant', 'content': None} if first_chunk else {}
first_chunk = False
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {**role_delta, "tool_calls": [{"index": idx, "id": tc_id, "type": "function", "function": {"name": tool_blocks[idx]["name"], "arguments": tool_blocks[idx]["arguments"]}}]}, "finish_reason": None}]})}\n\n'
if last_text:
# Content is cumulative; emit only new characters
new_text = last_text[cli_prev_text_len:]
cli_prev_text_len = len(last_text)
if new_text:
if first_chunk:
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}, "finish_reason": None}]})}\n\n'
first_chunk = False
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"content": new_text}, "finish_reason": None}]})}\n\n'
elif event_type == 'result':
result_text = data.get('result', '')
logger.debug(f"ClaudeCliMode: result event, is_error={data.get('is_error')}, text_len={len(result_text)}")
# Only emit via result if we haven't already streamed content via other events
if result_text and first_chunk:
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}, "finish_reason": None}]})}\n\n'
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"content": result_text}, "finish_reason": None}]})}\n\n'
first_chunk = False
break
elif event_type == 'message_stop':
logger.debug("ClaudeCliMode: received message_stop")
break break
else:
logger.debug(f"ClaudeCliMode: unhandled event type={event_type}")
except Exception as exc: except Exception as exc:
logger.error(f"ClaudeCliMode: streaming error: {exc}", exc_info=True) logger.error(f"ClaudeCliMode: streaming error: {exc}", exc_info=True)
finally:
try: try:
stderr_bytes = await asyncio.wait_for(process.stderr.read(), timeout=2.0) stderr_bytes = await asyncio.wait_for(process.stderr.read(), timeout=2.0)
if stderr_bytes: if stderr_bytes:
logger.error(f"ClaudeCliMode: stderr: {stderr_bytes.decode('utf-8', errors='replace')[:500]}") decoded = stderr_bytes.decode('utf-8', errors='replace')
logger.debug(f"ClaudeCliMode: stderr:\n{decoded[:2000]}")
except Exception: except Exception:
pass pass
finally:
try: try:
process.terminate() process.terminate()
await asyncio.wait_for(process.wait(), timeout=5.0) await asyncio.wait_for(process.wait(), timeout=5.0)
...@@ -503,41 +742,87 @@ class ClaudeProviderHandler(BaseProviderHandler): ...@@ -503,41 +742,87 @@ class ClaudeProviderHandler(BaseProviderHandler):
except Exception: except Exception:
pass pass
# Final stop + DONE finish_reason = 'tool_calls' if tool_blocks else 'stop'
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]})}\n\n' yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {}, "finish_reason": finish_reason}]})}\n\n'
yield 'data: [DONE]\n\n' yield 'data: [DONE]\n\n'
async def _handle_cli_request(self, prompt: str, model: str, config_dir: str) -> dict: async def _handle_cli_request(self, prompt: str, model: str, config_dir: str,
"""Non-streaming CLI request – collects the full response text.""" tools: Optional[List[Dict]] = None) -> dict:
accumulated: List[str] = [] """Non-streaming CLI request using --output-format json with prompt via stdin."""
logger = _logging.getLogger(__name__)
clean_model = model.split('/')[-1] if '/' in model else model
env = os.environ.copy()
env['CLAUDE_CONFIG_DIR'] = config_dir
env['CLAUDE_CODE_USE_KEYCHAIN'] = 'false'
cmd = [
'claude', '-p',
'--output-format', 'json',
'--dangerously-skip-permissions',
'--no-session-persistence',
]
if tools:
cmd += ['--tools', json.dumps(tools, ensure_ascii=False)]
if clean_model:
cmd += ['--model', clean_model]
logger.info(
f"ClaudeCliMode: non-streaming subprocess model={clean_model} dir={config_dir}\n"
f" Replicate with: CLAUDE_CONFIG_DIR={config_dir} CLAUDE_CODE_USE_KEYCHAIN=false "
+ ' '.join(cmd) + f" <<'EOF'\n{prompt[:200]}...\nEOF"
)
process = await asyncio.create_subprocess_exec(
*cmd,
env=env,
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
try:
stdout_bytes, stderr_bytes = await asyncio.wait_for(
process.communicate(input=prompt.encode()), timeout=120.0
)
except asyncio.TimeoutError:
logger.error("ClaudeCliMode: non-streaming subprocess timed out")
process.kill()
await process.wait()
return {
'id': f'chatcmpl-cli-{int(time.time())}',
'object': 'chat.completion',
'created': int(time.time()),
'model': f'{self.provider_id}/{clean_model}',
'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'Request timed out.'}, 'finish_reason': 'stop'}],
'usage': {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0},
}
if stderr_bytes:
logger.debug(f"ClaudeCliMode: stderr:\n{stderr_bytes.decode('utf-8', errors='replace')[:2000]}")
stdout_str = stdout_bytes.decode('utf-8', errors='replace').strip()
logger.debug(f"ClaudeCliMode: raw output: {stdout_str[:500]}")
async for chunk_str in self._handle_cli_streaming_request(prompt, model, config_dir): result_text = ''
if chunk_str.startswith('data: ') and '[DONE]' not in chunk_str:
try: try:
data = json.loads(chunk_str[6:]) data = json.loads(stdout_str)
choices = data.get('choices', []) if data.get('is_error'):
if choices: logger.warning(f"ClaudeCliMode: CLI returned error: {data.get('result', '')[:200]}")
text = choices[0].get('delta', {}).get('content', '') result_text = data.get('result', '')
if text:
accumulated.append(text)
except json.JSONDecodeError: except json.JSONDecodeError:
pass result_text = stdout_str
clean_model = model.split('/')[-1] if '/' in model else model
full_text = ''.join(accumulated)
return { return {
'id': f'chatcmpl-cli-{int(time.time())}', 'id': f'chatcmpl-cli-{int(time.time())}',
'object': 'chat.completion', 'object': 'chat.completion',
'created': int(time.time()), 'created': int(time.time()),
'model': f'{self.provider_id}/{clean_model}', 'model': f'{self.provider_id}/{clean_model}',
'choices': [{ 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': result_text}, 'finish_reason': 'stop'}],
'index': 0,
'message': {'role': 'assistant', 'content': full_text},
'finish_reason': 'stop',
}],
'usage': {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0}, 'usage': {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0},
} }
def _init_session_identifiers(self): def _init_session_identifiers(self):
"""Initialize persistent session identifiers (device_id, account_uuid, session_id).""" """Initialize persistent session identifiers (device_id, account_uuid, session_id)."""
import uuid import uuid
...@@ -1229,14 +1514,15 @@ class ClaudeProviderHandler(BaseProviderHandler): ...@@ -1229,14 +1514,15 @@ class ClaudeProviderHandler(BaseProviderHandler):
cli_creds = self._get_cli_credentials() cli_creds = self._get_cli_credentials()
if cli_creds is not None: if cli_creds is not None:
logger.info(f"ClaudeProviderHandler: using CLI subprocess mode for model {model}") logger.info(f"ClaudeProviderHandler: using CLI subprocess mode for model {model}")
prompt = self._messages_to_cli_prompt(messages) anthropic_tools = self._convert_tools_to_anthropic(tools) if tools else None
prompt = self._messages_to_cli_prompt(messages, tools=anthropic_tools)
config_dir = await ClaudeCliSessionManager.get_config_dir( config_dir = await ClaudeCliSessionManager.get_config_dir(
self.user_id, self.provider_id, cli_creds self.user_id, self.provider_id, cli_creds
) )
if stream: if stream:
return self._handle_cli_streaming_request(prompt, model, config_dir) return self._handle_cli_streaming_request(prompt, model, config_dir)
else: else:
return await self._handle_cli_request(prompt, model, config_dir) return await self._handle_cli_request(prompt, model, config_dir, tools=anthropic_tools)
# ── Fall through to HTTP API mode ──────────────────────────────── # ── Fall through to HTTP API mode ────────────────────────────────
logger.info(f"ClaudeProviderHandler: Handling request for model {model} (Direct HTTP mode)") logger.info(f"ClaudeProviderHandler: Handling request for model {model} (Direct HTTP mode)")
...@@ -2111,6 +2397,31 @@ class ClaudeProviderHandler(BaseProviderHandler): ...@@ -2111,6 +2397,31 @@ class ClaudeProviderHandler(BaseProviderHandler):
await self.apply_rate_limit() await self.apply_rate_limit()
# [0/3] CLI subprocess model discovery
import aisbf.cli_mode as cli_mode_mod
if cli_mode_mod.CLAUDE_CLI_MODE:
cli_creds = self._get_cli_credentials()
if cli_creds is not None:
try:
logging.info("ClaudeProviderHandler: [0/3] CLI subprocess model discovery...")
config_dir = await ClaudeCliSessionManager.get_config_dir(
self.user_id, self.provider_id, cli_creds
)
cli_models = await self._cli_discover_models(config_dir)
if cli_models:
self._save_models_cache(cli_models)
logging.info(
f"ClaudeProviderHandler: ✓ CLI discovery returned {len(cli_models)} models"
)
return cli_models
logging.warning(
"ClaudeProviderHandler: CLI discovery returned no models, falling through"
)
except Exception as cli_disc_err:
logging.warning(
f"ClaudeProviderHandler: CLI model discovery failed: {cli_disc_err}"
)
try: try:
logging.info("ClaudeProviderHandler: [1/3] Attempting primary API endpoint...") logging.info("ClaudeProviderHandler: [1/3] Attempting primary API endpoint...")
......
...@@ -578,6 +578,11 @@ _MUST_CHANGE_PASSWORD_WHITELIST = ( ...@@ -578,6 +578,11 @@ _MUST_CHANGE_PASSWORD_WHITELIST = (
'/dashboard/settings', '/dashboard/settings',
'/dashboard/logout', '/dashboard/logout',
'/api/admin/settings/', '/api/admin/settings/',
'/dashboard/tor/status',
'/dashboard/response-cache/stats',
'/dashboard/response-cache/clear',
'/dashboard/test-smtp',
'/dashboard/restart',
) )
# --- Login rate limiter --- # --- Login rate limiter ---
...@@ -1546,6 +1551,10 @@ async def api_token_authorization_middleware(request: Request, call_next): ...@@ -1546,6 +1551,10 @@ async def api_token_authorization_middleware(request: Request, call_next):
if request.method == "GET" and path in ["/api/models", "/api/v1/models"]: if request.method == "GET" and path in ["/api/models", "/api/v1/models"]:
return await call_next(request) return await call_next(request)
# If authentication is globally disabled, skip all token scope checks
if not (server_config and server_config.get('auth_enabled', False)):
return await call_next(request)
is_global_token = getattr(request.state, 'is_global_token', False) is_global_token = getattr(request.state, 'is_global_token', False)
user_id = getattr(request.state, 'user_id', None) user_id = getattr(request.state, 'user_id', None)
is_admin = getattr(request.state, 'is_admin', False) is_admin = getattr(request.state, 'is_admin', False)
...@@ -5578,6 +5587,7 @@ async def dashboard_settings(request: Request): ...@@ -5578,6 +5587,7 @@ async def dashboard_settings(request: Request):
'fullconfig_tokens': [] 'fullconfig_tokens': []
} }
warning = request.query_params.get('warning')
return templates.TemplateResponse( return templates.TemplateResponse(
request=request, request=request,
name="dashboard/settings.html", name="dashboard/settings.html",
...@@ -5586,7 +5596,8 @@ async def dashboard_settings(request: Request): ...@@ -5586,7 +5596,8 @@ async def dashboard_settings(request: Request):
"session": request.session, "session": request.session,
"__version__": __version__, "__version__": __version__,
"config": aisbf_config, "config": aisbf_config,
"os": os "os": os,
"warning": warning,
} }
) )
...@@ -5599,7 +5610,6 @@ async def dashboard_settings_save( ...@@ -5599,7 +5610,6 @@ async def dashboard_settings_save(
auth_enabled: bool = Form(False), auth_enabled: bool = Form(False),
auth_tokens: str = Form(""), auth_tokens: str = Form(""),
dashboard_username: str = Form(...), dashboard_username: str = Form(...),
dashboard_password: str = Form(""),
condensation_model_id: str = Form(...), condensation_model_id: str = Form(...),
autoselect_model_id: str = Form(...), autoselect_model_id: str = Form(...),
database_type: str = Form("sqlite"), database_type: str = Form("sqlite"),
...@@ -5691,8 +5701,6 @@ async def dashboard_settings_save( ...@@ -5691,8 +5701,6 @@ async def dashboard_settings_save(
aisbf_config['auth']['enabled'] = auth_enabled aisbf_config['auth']['enabled'] = auth_enabled
aisbf_config['auth']['tokens'] = [t.strip() for t in auth_tokens.split('\n') if t.strip()] aisbf_config['auth']['tokens'] = [t.strip() for t in auth_tokens.split('\n') if t.strip()]
aisbf_config['dashboard']['username'] = dashboard_username aisbf_config['dashboard']['username'] = dashboard_username
if dashboard_password: # Only update if provided - hash the password
aisbf_config['dashboard']['password'] = _db_hash_password(dashboard_password)
aisbf_config['internal_model']['condensation_model_id'] = condensation_model_id aisbf_config['internal_model']['condensation_model_id'] = condensation_model_id
aisbf_config['internal_model']['autoselect_model_id'] = autoselect_model_id aisbf_config['internal_model']['autoselect_model_id'] = autoselect_model_id
...@@ -5840,12 +5848,10 @@ async def dashboard_settings_save( ...@@ -5840,12 +5848,10 @@ async def dashboard_settings_save(
aisbf_config['dashboard']['notifications']['wallet_topup'] = admin_notify_wallet_topup aisbf_config['dashboard']['notifications']['wallet_topup'] = admin_notify_wallet_topup
aisbf_config['dashboard']['notifications']['user_deleted_account'] = admin_notify_user_deleted_account aisbf_config['dashboard']['notifications']['user_deleted_account'] = admin_notify_user_deleted_account
# Handle new_admin_password from the Admin tab (distinct from dashboard_password in Dashboard tab)
if new_admin_password: if new_admin_password:
if new_admin_password == confirm_admin_password: if new_admin_password == confirm_admin_password:
aisbf_config['dashboard']['password'] = _db_hash_password(new_admin_password) aisbf_config['dashboard']['password'] = _db_hash_password(new_admin_password)
request.session.pop('must_change_password', None) request.session.pop('must_change_password', None)
# silently ignore mismatch — UI should validate
# Save config # Save config
config_path = Path.home() / '.aisbf' / 'aisbf.json' config_path = Path.home() / '.aisbf' / 'aisbf.json'
...@@ -5853,9 +5859,9 @@ async def dashboard_settings_save( ...@@ -5853,9 +5859,9 @@ async def dashboard_settings_save(
with open(config_path, 'w') as f: with open(config_path, 'w') as f:
json.dump(aisbf_config, f, indent=2) json.dump(aisbf_config, f, indent=2)
# If a new dashboard password was submitted, clear the forced-change flag # Reload dashboard credentials in memory so the new username/password takes effect immediately
if dashboard_password: if server_config is not None:
request.session.pop('must_change_password', None) server_config['dashboard_config'] = aisbf_config.get('dashboard', {})
return templates.TemplateResponse( return templates.TemplateResponse(
request=request, request=request,
...@@ -10057,7 +10063,7 @@ async def v1_chat_completions(request: Request, body: ChatCompletionRequest): ...@@ -10057,7 +10063,7 @@ async def v1_chat_completions(request: Request, body: ChatCompletionRequest):
if provider_id not in config.providers: if provider_id not in config.providers:
raise HTTPException( raise HTTPException(
status_code=404, status_code=404,
detail=f"User autoselect '{actual_model}' not found. Available: {list(handler.user_autoselects.keys())}" detail=f"Provider '{provider_id}' not found. Available: {list(config.providers.keys())}"
) )
# Validate kiro credentials before processing request # Validate kiro credentials before processing request
......
...@@ -551,10 +551,18 @@ function renderProviderDetails(key) { ...@@ -551,10 +551,18 @@ function renderProviderDetails(key) {
${CLAUDE_CLI_MODE ? ` ${CLAUDE_CLI_MODE ? `
<div style="margin-top: 20px; padding-top: 15px; border-top: 1px solid #1e3a5f;"> <div style="margin-top: 20px; padding-top: 15px; border-top: 1px solid #1e3a5f;">
<h5 style="margin: 0 0 8px 0; color: #4ade80;">Claude CLI Mode Active</h5> <h5 style="margin: 0 0 8px 0; color: #4ade80;">Claude CLI Mode Active</h5>
<small style="color: #4ade80; display: block; margin-bottom: 14px;"> <small style="color: #4ade80; display: block; margin-bottom: 8px;">
The claude CLI was detected at startup. When enabled, requests are piped The claude CLI was detected at startup. When enabled, requests are piped
through the local claude binary instead of the HTTP API. through the local claude binary instead of the HTTP API.
</small> </small>
<div style="background: #3a2a00; border: 1px solid #f59e0b; border-radius: 6px; padding: 10px 14px; margin-bottom: 14px;">
<span style="color: #f59e0b; font-weight: 600;">⚠ Experimental:</span>
<span style="color: #fcd34d; font-size: 0.85em;">
CLI mode is experimental. Tool calling (function calling) does not yet work reliably —
the CLI subprocess may refuse or mishandle tool definitions. Use with simple
(non-tool) requests only until this is resolved.
</span>
</div>
<div class="form-group" style="margin-bottom: 16px;"> <div class="form-group" style="margin-bottom: 16px;">
<label style="display: flex; align-items: center; gap: 10px; cursor: pointer;"> <label style="display: flex; align-items: center; gap: 10px; cursor: pointer;">
......
...@@ -41,10 +41,20 @@ along with this program. If not, see <https://www.gnu.org/licenses/>. ...@@ -41,10 +41,20 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
<div class="alert alert-error">{{ error }}</div> <div class="alert alert-error">{{ error }}</div>
{% endif %} {% endif %}
{% if warning == 'default_password' %}
<div style="background:#b71c1c; border:2px solid #e53935; color:#fff; padding:16px 20px; border-radius:6px; margin-bottom:20px; display:flex; align-items:flex-start; gap:12px;">
<span style="font-size:1.4em; line-height:1;">&#9888;</span>
<div>
<strong>Security Warning: Default password in use.</strong><br>
You are logged in with the factory-default <code style="background:rgba(0,0,0,.3);padding:1px 5px;border-radius:3px;">admin / admin</code> credentials.
Please change your password immediately using the <strong>Admin</strong> tab below before using AISBF.
</div>
</div>
{% endif %}
<div class="settings-tabs"> <div class="settings-tabs">
<div class="settings-tab active" onclick="switchTab('server')"><i class="fas fa-server"></i> Server</div> <div class="settings-tab active" onclick="switchTab('server')"><i class="fas fa-server"></i> Server</div>
<div class="settings-tab" onclick="switchTab('auth')"><i class="fas fa-key"></i> Auth &amp; MCP</div> <div class="settings-tab" onclick="switchTab('auth')"><i class="fas fa-key"></i> Auth &amp; MCP</div>
<div class="settings-tab" onclick="switchTab('dashboard')"><i class="fas fa-tachometer-alt"></i> Dashboard</div>
<div class="settings-tab" onclick="switchTab('models')"><i class="fas fa-brain"></i> Models</div> <div class="settings-tab" onclick="switchTab('models')"><i class="fas fa-brain"></i> Models</div>
<div class="settings-tab" onclick="switchTab('database')"><i class="fas fa-database"></i> Database</div> <div class="settings-tab" onclick="switchTab('database')"><i class="fas fa-database"></i> Database</div>
<div class="settings-tab" onclick="switchTab('cache')"><i class="fas fa-bolt"></i> Cache</div> <div class="settings-tab" onclick="switchTab('cache')"><i class="fas fa-bolt"></i> Cache</div>
...@@ -139,20 +149,6 @@ along with this program. If not, see <https://www.gnu.org/licenses/>. ...@@ -139,20 +149,6 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
</div> </div>
</div><!-- /tab-auth --> </div><!-- /tab-auth -->
<div class="settings-section" id="tab-dashboard">
<div class="section-title"><i class="fas fa-tachometer-alt"></i> Dashboard</div>
<div class="form-group">
<label for="dashboard_username">Dashboard Username</label>
<input type="text" id="dashboard_username" name="dashboard_username" value="{{ config.dashboard.username }}" required>
</div>
<div class="form-group">
<label for="dashboard_password">Dashboard Password</label>
<input type="password" id="dashboard_password" name="dashboard_password" placeholder="Leave blank to keep current">
</div>
</div><!-- /tab-dashboard -->
<div class="settings-section" id="tab-models"> <div class="settings-section" id="tab-models">
<div class="section-title"><i class="fas fa-brain"></i> Internal Models</div> <div class="section-title"><i class="fas fa-brain"></i> Internal Models</div>
...@@ -879,15 +875,18 @@ brew services restart tor # macOS</code></pre> ...@@ -879,15 +875,18 @@ brew services restart tor # macOS</code></pre>
<div class="section-title"><i class="fas fa-shield-alt"></i> Admin Account &amp; Notifications</div> <div class="section-title"><i class="fas fa-shield-alt"></i> Admin Account &amp; Notifications</div>
<div class="form-group"> <div class="form-group">
<label for="new_admin_password">New Admin Password</label> <label for="dashboard_username">Admin Username</label>
<input type="text" id="dashboard_username" name="dashboard_username" value="{{ config.dashboard.username }}" required>
</div>
<div class="form-group">
<label for="new_admin_password">New Password</label>
<input type="password" id="new_admin_password" name="new_admin_password" placeholder="Leave blank to keep current password"> <input type="password" id="new_admin_password" name="new_admin_password" placeholder="Leave blank to keep current password">
<small style="color: #666; display: block; margin-top: 5px;">Enter a new password to change the admin dashboard password</small>
</div> </div>
<div class="form-group"> <div class="form-group">
<label for="confirm_admin_password">Confirm New Admin Password</label> <label for="confirm_admin_password">Confirm New Password</label>
<input type="password" id="confirm_admin_password" name="confirm_admin_password" placeholder="Confirm new password"> <input type="password" id="confirm_admin_password" name="confirm_admin_password" placeholder="Confirm new password">
<small style="color: #666; display: block; margin-top: 5px;">Re-enter the new password to confirm</small>
</div> </div>
<div class="form-group"> <div class="form-group">
...@@ -1194,13 +1193,12 @@ async function checkTorStatus() { ...@@ -1194,13 +1193,12 @@ async function checkTorStatus() {
// Check TOR status on page load // Check TOR status on page load
document.addEventListener('DOMContentLoaded', function() { document.addEventListener('DOMContentLoaded', function() {
checkTorStatus(); checkTorStatus();
// Refresh status every 30 seconds
setInterval(checkTorStatus, 30000); setInterval(checkTorStatus, 30000);
// Load cache statistics
refreshCacheStats(); refreshCacheStats();
// Refresh cache stats every 10 seconds
setInterval(refreshCacheStats, 10000); setInterval(refreshCacheStats, 10000);
{% if warning == 'default_password' %}
switchTab('admin');
{% endif %}
}); });
async function refreshCacheStats() { async function refreshCacheStats() {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment