Fix Claude CLI streaming: pass-through SSE strings, handle assistant/tool_use...

Fix Claude CLI streaming: pass-through SSE strings, handle assistant/tool_use events, non-streaming via --output-format json

Fix Claude CLI streaming: pass-through SSE strings, handle assistant/tool_use...
Fix Claude CLI streaming: pass-through SSE strings, handle assistant/tool_use events, non-streaming via --output-format json
65fc640d · Stefy Lanza (nextime / spora ) · 23f7362e · 65fc640d · 65fc640d · 65fc640d
Commit 65fc640d authored Apr 23, 2026 by Stefy Lanza (nextime / spora )
6 changed files
--- a/aisbf/database.py
+++ b/aisbf/database.py
@@ -3651,20 +3651,6 @@ def DatabaseManager__create_cache_tables(self, cursor, auto_increment, timestamp
        )
    ''')
    
-    cursor.execute(f'''
-        CREATE TABLE IF NOT EXISTS context_dimensions (
-            id INTEGER PRIMARY KEY {auto_increment},
-            provider_id VARCHAR(255) NOT NULL,
-            model_name VARCHAR(255) NOT NULL,
-            context_size INTEGER,
-            condense_context INTEGER,
-            condense_method TEXT,
-            effective_context INTEGER DEFAULT 0,
-            last_updated TIMESTAMP DEFAULT {timestamp_default},
-            UNIQUE(provider_id, model_name)
-        )
-    ''')
-    
    logger.info("⚠️ CACHE DATABASE: Only minimal cache tables created - NO USER TABLES")


@@ -4216,6 +4202,48 @@ def DatabaseManager__run_config_migrations(self, cursor, auto_increment, timesta
    except Exception as e:
        logger.warning(f"Migration check for user_notifications table: {e}")

+    # Migration: Create context_dimensions table if missing
+    try:
+        if self.db_type == 'sqlite':
+            cursor.execute("PRAGMA table_info(context_dimensions)")
+            if not cursor.fetchall():
+                cursor.execute(f'''
+                    CREATE TABLE context_dimensions (
+                        id INTEGER PRIMARY KEY {auto_increment},
+                        provider_id VARCHAR(255) NOT NULL,
+                        model_name VARCHAR(255) NOT NULL,
+                        context_size INTEGER,
+                        condense_context INTEGER,
+                        condense_method TEXT,
+                        effective_context INTEGER DEFAULT 0,
+                        last_updated TIMESTAMP DEFAULT {timestamp_default},
+                        UNIQUE(provider_id, model_name)
+                    )
+                ''')
+                logger.info("✅ Migration: Created context_dimensions table")
+        else:
+            cursor.execute("""
+                SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES
+                WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = 'context_dimensions'
+            """)
+            if not cursor.fetchone():
+                cursor.execute(f'''
+                    CREATE TABLE context_dimensions (
+                        id INTEGER PRIMARY KEY {auto_increment},
+                        provider_id VARCHAR(255) NOT NULL,
+                        model_name VARCHAR(255) NOT NULL,
+                        context_size INTEGER,
+                        condense_context INTEGER,
+                        condense_method TEXT,
+                        effective_context INTEGER DEFAULT 0,
+                        last_updated TIMESTAMP DEFAULT {timestamp_default},
+                        UNIQUE(provider_id, model_name)
+                    )
+                ''')
+                logger.info("✅ Migration: Created context_dimensions table")
+    except Exception as e:
+        logger.warning(f"Migration check for context_dimensions table: {e}")
+
    logger.info("✅ All database migrations completed")

 # Patch the methods

--- a/aisbf/handlers.py
+++ b/aisbf/handlers.py
@@ -595,7 +595,7 @@ class RequestHandler:
                                }], model_name)
                            else:
                                # Fallback to estimation if no content
-                                max_tokens = request_data.get('max_tokens', 0)
+                                max_tokens = request_data.get('max_tokens') or 0
                                if max_tokens > 0:
                                    completion_tokens = min(max_tokens, estimated_prompt_tokens * 2)
                                else:
@@ -1182,10 +1182,13 @@ class RequestHandler:
                                logger.debug(f"Async chunk type: {type(chunk)}")
                                logger.debug(f"Async chunk: {chunk}")
                                
-                                # For async generators, chunks might be bytes (SSE format)
+                                # For async generators, chunks might be bytes or pre-formatted SSE strings
                                if isinstance(chunk, bytes):
                                    logger.debug(f"Yielding raw bytes chunk: {len(chunk)} bytes")
                                    yield chunk
+                                elif isinstance(chunk, str):
+                                    # Already SSE-formatted (e.g. "data: {...}\n\n") — pass through directly
+                                    yield chunk.encode('utf-8')
                                else:
                                    # Fallback: treat as dict and serialize
                                    chunk_dict = chunk.model_dump() if hasattr(chunk, 'model_dump') else chunk
@@ -2967,12 +2970,12 @@ class RotationHandler:
                                    estimated_prompt_tokens = count_messages_tokens(messages, model_name)
                                    
                                    # More realistic completion estimate
-                                    max_tokens = request_data.get('max_tokens', 0)
+                                    max_tokens = request_data.get('max_tokens') or 0
                                    if max_tokens > 0:
                                        estimated_completion = min(max_tokens, estimated_prompt_tokens * 2)
                                    else:
                                        estimated_completion = max(estimated_prompt_tokens, 50)
-                                    
+
                                    total_tokens = estimated_prompt_tokens + estimated_completion
                                    prompt_tokens = estimated_prompt_tokens
                                    completion_tokens = estimated_completion
@@ -3636,11 +3639,12 @@ class RotationHandler:
                                    logger.debug(f"Async chunk type: {type(chunk)}")
                                    logger.debug(f"Async chunk: {chunk}")

-                                    # For Kiro, chunks are already properly formatted SSE bytes
-                                    # Just pass them through directly
+                                    # For Kiro/Claude CLI, chunks may be pre-formatted SSE bytes or strings
                                    if isinstance(chunk, bytes):
                                        logger.debug(f"Yielding raw bytes chunk: {len(chunk)} bytes")
                                        yield chunk
+                                    elif isinstance(chunk, str):
+                                        yield chunk.encode('utf-8')
                                    else:
                                        # Fallback: treat as dict and serialize
                                        chunk_dict = chunk.model_dump() if hasattr(chunk, 'model_dump') else chunk
@@ -4464,12 +4468,12 @@ class AutoselectHandler:
                        estimated_prompt_tokens = count_messages_tokens(messages, model_name)
                        
                        # More realistic completion estimate
-                        max_tokens = request_data.get('max_tokens', 0)
+                        max_tokens = request_data.get('max_tokens') or 0
                        if max_tokens > 0:
                            estimated_completion = min(max_tokens, estimated_prompt_tokens * 2)
                        else:
                            estimated_completion = max(estimated_prompt_tokens, 50)
-                        
+
                        total_tokens = estimated_prompt_tokens + estimated_completion
                        prompt_tokens = estimated_prompt_tokens
                        completion_tokens = estimated_completion

--- a/aisbf/providers/claude.py
+++ b/aisbf/providers/claude.py
@@ -360,11 +360,12 @@ class ClaudeProviderHandler(BaseProviderHandler):
                logger.warning(f"ClaudeCliMode: failed to load credentials: {exc}")
            return None

-    def _messages_to_cli_prompt(self, messages: List[Dict]) -> str:
+    def _messages_to_cli_prompt(self, messages: List[Dict],
+                                 tools: Optional[List[Dict]] = None) -> str:
        """
-        Convert an OpenAI-style messages list to a flat text prompt for the
-        claude CLI.  System messages are included as a prefix; no Anthropic
-        system-prompt injection is needed in CLI mode.
+        Convert an OpenAI-style messages list (plus optional tool definitions)
+        to a flat text prompt for the claude CLI sent via stdin.
+        System messages and tool definitions are included as a prefix.
        """
        system_parts: List[str] = []
        turn_parts: List[str] = []
@@ -373,15 +374,14 @@ class ClaudeProviderHandler(BaseProviderHandler):
            role = msg.get('role', '')
            content = msg.get('content', '')

-            # Normalise content to str
            if isinstance(content, list):
-                text_fragments = []
+                fragments = []
                for block in content:
                    if isinstance(block, dict) and block.get('type') == 'text':
-                        text_fragments.append(block.get('text', ''))
+                        fragments.append(block.get('text', ''))
                    elif isinstance(block, str):
-                        text_fragments.append(block)
-                content = '\n'.join(text_fragments)
+                        fragments.append(block)
+                content = '\n'.join(fragments)
            elif not isinstance(content, str):
                content = str(content)

@@ -392,12 +392,155 @@ class ClaudeProviderHandler(BaseProviderHandler):
            elif role == 'assistant':
                turn_parts.append(f'Assistant: {content}')

+        if tools:
+            tools_json = json.dumps(tools, ensure_ascii=False)
+            system_parts.append(
+                f'Available tools (respond with tool_use blocks as needed):\n{tools_json}'
+            )
+
        parts: List[str] = []
        if system_parts:
            parts.append('[System Instructions: ' + '\n'.join(system_parts) + ']')
        parts.extend(turn_parts)
        return '\n\n'.join(parts)

+    async def _cli_discover_models(self, config_dir: str) -> List['Model']:
+        """
+        Ask the claude CLI which models it supports using --output-format json.
+        Returns a list of Model objects parsed from the JSON result.
+        The single-object JSON output format (not stream-json) is used here
+        because it carries a `modelUsage` map with real contextWindow metadata,
+        and the `result` text lists all models Claude knows about.
+        """
+        import re
+        logger = _logging.getLogger(__name__)
+
+        env = os.environ.copy()
+        env['CLAUDE_CONFIG_DIR'] = config_dir
+        env['CLAUDE_CODE_USE_KEYCHAIN'] = 'false'
+
+        prompt = (
+            "Which models are you compatible with? "
+            "Give me only a JSON list without any other comment or word "
+            "except for the list of the model IDs."
+        )
+        cmd = [
+            'claude', '-p', prompt,
+            '--output-format', 'json',
+            '--dangerously-skip-permissions',
+            '--no-session-persistence',
+        ]
+
+        logger.info(
+            "ClaudeCliMode: model discovery subprocess\n"
+            f"  Replicate with: CLAUDE_CONFIG_DIR={config_dir} CLAUDE_CODE_USE_KEYCHAIN=false "
+            + ' '.join(cmd)
+        )
+
+        process = await asyncio.create_subprocess_exec(
+            *cmd,
+            env=env,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+
+        try:
+            stdout_bytes, stderr_bytes = await asyncio.wait_for(
+                process.communicate(), timeout=60.0
+            )
+        except asyncio.TimeoutError:
+            logger.error("ClaudeCliMode: model discovery subprocess timed out")
+            process.kill()
+            await process.wait()
+            return []
+
+        if stderr_bytes:
+            logger.debug(
+                f"ClaudeCliMode: discovery stderr:\n"
+                f"{stderr_bytes.decode('utf-8', errors='replace')[:2000]}"
+            )
+
+        stdout_str = stdout_bytes.decode('utf-8', errors='replace').strip()
+        logger.debug(f"ClaudeCliMode: discovery raw output: {stdout_str[:1000]}")
+
+        if not stdout_str:
+            logger.warning("ClaudeCliMode: model discovery returned empty output")
+            return []
+
+        try:
+            data = json.loads(stdout_str)
+        except json.JSONDecodeError as e:
+            logger.warning(f"ClaudeCliMode: model discovery JSON parse error: {e}")
+            return []
+
+        if data.get('is_error') or data.get('subtype') != 'success':
+            logger.warning(
+                f"ClaudeCliMode: model discovery error: {data.get('result', '')[:200]}"
+            )
+            return []
+
+        # modelUsage keys → real metadata (contextWindow, maxOutputTokens)
+        # Note: only models actually invoked in this call appear here; haiku is
+        # used for internal routing so it shows up even though we didn't ask for it.
+        model_usage: dict = data.get('modelUsage', {})
+
+        # result text contains the JSON list we asked for, possibly wrapped in
+        # a markdown code fence like ```json\n[...]\n```
+        result_text: str = data.get('result', '')
+        logger.info(f"ClaudeCliMode: discovery result: {result_text!r}")
+
+        # Parse the JSON array from result (strip code fences if present)
+        json_match = re.search(r'\[[\s\S]*?\]', result_text)
+        result_ids: set = set()
+        if json_match:
+            try:
+                parsed = json.loads(json_match.group())
+                if isinstance(parsed, list):
+                    result_ids = {m for m in parsed if isinstance(m, str) and m.startswith('claude-')}
+            except json.JSONDecodeError:
+                pass
+
+        # Fall back to regex scan of the result text if JSON parse failed
+        if not result_ids:
+            result_ids = set(re.findall(r'claude-[a-z0-9][a-z0-9.\-]*[a-z0-9]', result_text))
+
+        logger.info(f"ClaudeCliMode: model IDs from result: {sorted(result_ids)}")
+        logger.info(f"ClaudeCliMode: model IDs from modelUsage: {sorted(model_usage.keys())}")
+
+        # Known context window overrides — avoids a costly second prompt.
+        # modelUsage carries real values for models used in this call; for the
+        # rest we apply these known constants rather than querying Claude again.
+        _known_context: dict = {
+            'claude-opus-4-7': 1000000,
+        }
+
+        # Union: result_ids is the authoritative list; modelUsage adds metadata
+        all_ids = result_ids | set(model_usage.keys())
+        if not all_ids:
+            return []
+
+        models = []
+        for mid in sorted(all_ids):
+            usage_meta = model_usage.get(mid, {})
+            context_size = (
+                usage_meta.get('contextWindow')
+                or _known_context.get(mid)
+                or 200000
+            )
+            max_output = usage_meta.get('maxOutputTokens')
+            m = Model(
+                id=mid,
+                name=mid,
+                provider_id=self.provider_id,
+                context_size=context_size,
+                context_length=context_size,
+            )
+            if max_output:
+                m.max_output_tokens = max_output
+            models.append(m)
+
+        return models
+
    async def _handle_cli_streaming_request(self, prompt: str, model: str, config_dir: str):
        """
        Spawn a claude CLI subprocess, stream its JSON output, and yield
@@ -412,9 +555,11 @@ class ClaudeProviderHandler(BaseProviderHandler):
        env['CLAUDE_CODE_USE_KEYCHAIN'] = 'false'

        cmd = [
+            'stdbuf', '-oL',
            'claude', '-p',
            '--input-format', 'stream-json',
            '--output-format', 'stream-json',
+            '--include-partial-messages',
            '--tools', '',
            '--dangerously-skip-permissions',
            '--no-session-persistence',
@@ -423,7 +568,20 @@ class ClaudeProviderHandler(BaseProviderHandler):
        if clean_model:
            cmd += ['--model', clean_model]

-        logger.info(f"ClaudeCliMode: launching subprocess model={clean_model} dir={config_dir}")
+        stdin_payload: Dict = {
+            'type': 'user_message',
+            'content': [{'type': 'text', 'text': prompt}],
+        }
+
+        input_msg = json.dumps(stdin_payload) + '\n'
+
+        # Log a shell-replicable command for debugging
+        cmd_str = ' '.join(cmd)
+        logger.info(
+            f"ClaudeCliMode: launching subprocess model={clean_model} dir={config_dir}\n"
+            f"  Replicate with: CLAUDE_CONFIG_DIR={config_dir} CLAUDE_CODE_USE_KEYCHAIN=false "
+            f"{cmd_str} <<'EOF'\n{input_msg.strip()}\nEOF"
+        )

        process = await asyncio.create_subprocess_exec(
            *cmd,
@@ -433,11 +591,6 @@ class ClaudeProviderHandler(BaseProviderHandler):
            stderr=asyncio.subprocess.PIPE,
        )

-        # Send the prompt as a stream-json user_message then close stdin
-        input_msg = json.dumps({
-            'type': 'user_message',
-            'content': [{'type': 'text', 'text': prompt}],
-        }) + '\n'
        process.stdin.write(input_msg.encode())
        await process.stdin.drain()
        process.stdin.close()
@@ -446,6 +599,12 @@ class ClaudeProviderHandler(BaseProviderHandler):
        created_time = int(time.time())
        first_chunk = True

+        # State for accumulating tool_use blocks
+        # { block_index: {"id": ..., "name": ..., "arguments": ""} }
+        tool_blocks: dict = {}
+        tool_header_sent: set = set()
+        cli_prev_text_len: int = 0
+
        try:
            while True:
                try:
@@ -461,15 +620,31 @@ class ClaudeProviderHandler(BaseProviderHandler):
                if not line_str:
                    continue

+                logger.debug(f"ClaudeCliMode: raw event: {line_str}")
+
                try:
                    data = json.loads(line_str)
                except json.JSONDecodeError:
+                    logger.debug(f"ClaudeCliMode: non-JSON line: {line_str}")
                    continue

                event_type = data.get('type')

-                if event_type == 'content_block_delta':
+                if event_type == 'content_block_start':
+                    cb = data.get('content_block', {})
+                    if cb.get('type') == 'tool_use':
+                        idx = data.get('index', 0)
+                        tool_blocks[idx] = {
+                            'id': cb.get('id', f'call_{idx}'),
+                            'name': cb.get('name', ''),
+                            'arguments': '',
+                        }
+                        logger.debug(f"ClaudeCliMode: tool_use block started idx={idx} name={cb.get('name')}")
+
+                elif event_type == 'content_block_delta':
                    delta = data.get('delta', {})
+                    idx = data.get('index', 0)
+
                    if delta.get('type') == 'text_delta':
                        text = delta.get('text', '')
                        if not text:
@@ -481,19 +656,83 @@ class ClaudeProviderHandler(BaseProviderHandler):

                        yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"content": text}, "finish_reason": None}]})}\n\n'

-                elif event_type in ('message_stop', 'result'):
-                    logger.debug(f"ClaudeCliMode: received {event_type}, closing stream")
+                    elif delta.get('type') == 'input_json_delta' and idx in tool_blocks:
+                        partial = delta.get('partial_json', '')
+                        tool_blocks[idx]['arguments'] += partial
+
+                        # Emit streaming tool_calls delta
+                        if idx not in tool_header_sent:
+                            tool_header_sent.add(idx)
+                            if first_chunk:
+                                yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"role": "assistant", "content": None, "tool_calls": [{"index": idx, "id": tool_blocks[idx]["id"], "type": "function", "function": {"name": tool_blocks[idx]["name"], "arguments": ""}}]}, "finish_reason": None}]})}\n\n'
+                                first_chunk = False
+                            else:
+                                yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"tool_calls": [{"index": idx, "id": tool_blocks[idx]["id"], "type": "function", "function": {"name": tool_blocks[idx]["name"], "arguments": ""}}]}, "finish_reason": None}]})}\n\n'
+
+                        if partial:
+                            yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"tool_calls": [{"index": idx, "function": {"arguments": partial}}]}, "finish_reason": None}]})}\n\n'
+
+                elif event_type == 'assistant':
+                    # Claude CLI stream-json format: partial or final assistant message
+                    msg = data.get('message', {})
+                    last_text = ''
+                    for block in msg.get('content', []):
+                        if not isinstance(block, dict):
+                            continue
+                        btype = block.get('type')
+                        if btype == 'text':
+                            last_text += block.get('text', '')
+                        elif btype == 'tool_use':
+                            # Tool call in assistant event — register and emit if not yet seen
+                            tc_id = block.get('id', f'call_{len(tool_blocks)}')
+                            if tc_id not in tool_header_sent:
+                                tool_header_sent.add(tc_id)
+                                idx = len(tool_blocks)
+                                tool_blocks[idx] = {
+                                    'id': tc_id,
+                                    'name': block.get('name', ''),
+                                    'arguments': json.dumps(block.get('input', {}), ensure_ascii=False),
+                                }
+                                role_delta = {'role': 'assistant', 'content': None} if first_chunk else {}
+                                first_chunk = False
+                                yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {**role_delta, "tool_calls": [{"index": idx, "id": tc_id, "type": "function", "function": {"name": tool_blocks[idx]["name"], "arguments": tool_blocks[idx]["arguments"]}}]}, "finish_reason": None}]})}\n\n'
+                    if last_text:
+                        # Content is cumulative; emit only new characters
+                        new_text = last_text[cli_prev_text_len:]
+                        cli_prev_text_len = len(last_text)
+                        if new_text:
+                            if first_chunk:
+                                yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}, "finish_reason": None}]})}\n\n'
+                                first_chunk = False
+                            yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"content": new_text}, "finish_reason": None}]})}\n\n'
+
+                elif event_type == 'result':
+                    result_text = data.get('result', '')
+                    logger.debug(f"ClaudeCliMode: result event, is_error={data.get('is_error')}, text_len={len(result_text)}")
+                    # Only emit via result if we haven't already streamed content via other events
+                    if result_text and first_chunk:
+                        yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}, "finish_reason": None}]})}\n\n'
+                        yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {"content": result_text}, "finish_reason": None}]})}\n\n'
+                        first_chunk = False
+                    break
+
+                elif event_type == 'message_stop':
+                    logger.debug("ClaudeCliMode: received message_stop")
                    break

+                else:
+                    logger.debug(f"ClaudeCliMode: unhandled event type={event_type}")
+
        except Exception as exc:
            logger.error(f"ClaudeCliMode: streaming error: {exc}", exc_info=True)
+        finally:
            try:
                stderr_bytes = await asyncio.wait_for(process.stderr.read(), timeout=2.0)
                if stderr_bytes:
-                    logger.error(f"ClaudeCliMode: stderr: {stderr_bytes.decode('utf-8', errors='replace')[:500]}")
+                    decoded = stderr_bytes.decode('utf-8', errors='replace')
+                    logger.debug(f"ClaudeCliMode: stderr:\n{decoded[:2000]}")
            except Exception:
                pass
-        finally:
            try:
                process.terminate()
                await asyncio.wait_for(process.wait(), timeout=5.0)
@@ -503,41 +742,87 @@ class ClaudeProviderHandler(BaseProviderHandler):
                except Exception:
                    pass

-        # Final stop + DONE
-        yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]})}\n\n'
+        finish_reason = 'tool_calls' if tool_blocks else 'stop'
+        yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_time, "model": f"{self.provider_id}/{clean_model}", "choices": [{"index": 0, "delta": {}, "finish_reason": finish_reason}]})}\n\n'
        yield 'data: [DONE]\n\n'

-    async def _handle_cli_request(self, prompt: str, model: str, config_dir: str) -> dict:
-        """Non-streaming CLI request – collects the full response text."""
-        accumulated: List[str] = []
+    async def _handle_cli_request(self, prompt: str, model: str, config_dir: str,
+                                   tools: Optional[List[Dict]] = None) -> dict:
+        """Non-streaming CLI request using --output-format json with prompt via stdin."""
+        logger = _logging.getLogger(__name__)
+        clean_model = model.split('/')[-1] if '/' in model else model

-        async for chunk_str in self._handle_cli_streaming_request(prompt, model, config_dir):
-            if chunk_str.startswith('data: ') and '[DONE]' not in chunk_str:
-                try:
-                    data = json.loads(chunk_str[6:])
-                    choices = data.get('choices', [])
-                    if choices:
-                        text = choices[0].get('delta', {}).get('content', '')
-                        if text:
-                            accumulated.append(text)
-                except json.JSONDecodeError:
-                    pass
+        env = os.environ.copy()
+        env['CLAUDE_CONFIG_DIR'] = config_dir
+        env['CLAUDE_CODE_USE_KEYCHAIN'] = 'false'
+
+        cmd = [
+            'claude', '-p',
+            '--output-format', 'json',
+            '--dangerously-skip-permissions',
+            '--no-session-persistence',
+        ]
+        if tools:
+            cmd += ['--tools', json.dumps(tools, ensure_ascii=False)]
+        if clean_model:
+            cmd += ['--model', clean_model]
+
+        logger.info(
+            f"ClaudeCliMode: non-streaming subprocess model={clean_model} dir={config_dir}\n"
+            f"  Replicate with: CLAUDE_CONFIG_DIR={config_dir} CLAUDE_CODE_USE_KEYCHAIN=false "
+            + ' '.join(cmd) + f" <<'EOF'\n{prompt[:200]}...\nEOF"
+        )
+
+        process = await asyncio.create_subprocess_exec(
+            *cmd,
+            env=env,
+            stdin=asyncio.subprocess.PIPE,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+
+        try:
+            stdout_bytes, stderr_bytes = await asyncio.wait_for(
+                process.communicate(input=prompt.encode()), timeout=120.0
+            )
+        except asyncio.TimeoutError:
+            logger.error("ClaudeCliMode: non-streaming subprocess timed out")
+            process.kill()
+            await process.wait()
+            return {
+                'id': f'chatcmpl-cli-{int(time.time())}',
+                'object': 'chat.completion',
+                'created': int(time.time()),
+                'model': f'{self.provider_id}/{clean_model}',
+                'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'Request timed out.'}, 'finish_reason': 'stop'}],
+                'usage': {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0},
+            }
+
+        if stderr_bytes:
+            logger.debug(f"ClaudeCliMode: stderr:\n{stderr_bytes.decode('utf-8', errors='replace')[:2000]}")
+
+        stdout_str = stdout_bytes.decode('utf-8', errors='replace').strip()
+        logger.debug(f"ClaudeCliMode: raw output: {stdout_str[:500]}")
+
+        result_text = ''
+        try:
+            data = json.loads(stdout_str)
+            if data.get('is_error'):
+                logger.warning(f"ClaudeCliMode: CLI returned error: {data.get('result', '')[:200]}")
+            result_text = data.get('result', '')
+        except json.JSONDecodeError:
+            result_text = stdout_str

-        clean_model = model.split('/')[-1] if '/' in model else model
-        full_text = ''.join(accumulated)
        return {
            'id': f'chatcmpl-cli-{int(time.time())}',
            'object': 'chat.completion',
            'created': int(time.time()),
            'model': f'{self.provider_id}/{clean_model}',
-            'choices': [{
-                'index': 0,
-                'message': {'role': 'assistant', 'content': full_text},
-                'finish_reason': 'stop',
-            }],
+            'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': result_text}, 'finish_reason': 'stop'}],
            'usage': {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0},
        }

+
    def _init_session_identifiers(self):
        """Initialize persistent session identifiers (device_id, account_uuid, session_id)."""
        import uuid
@@ -1229,14 +1514,15 @@ class ClaudeProviderHandler(BaseProviderHandler):
            cli_creds = self._get_cli_credentials()
            if cli_creds is not None:
                logger.info(f"ClaudeProviderHandler: using CLI subprocess mode for model {model}")
-                prompt = self._messages_to_cli_prompt(messages)
+                anthropic_tools = self._convert_tools_to_anthropic(tools) if tools else None
+                prompt = self._messages_to_cli_prompt(messages, tools=anthropic_tools)
                config_dir = await ClaudeCliSessionManager.get_config_dir(
                    self.user_id, self.provider_id, cli_creds
                )
                if stream:
                    return self._handle_cli_streaming_request(prompt, model, config_dir)
                else:
-                    return await self._handle_cli_request(prompt, model, config_dir)
+                    return await self._handle_cli_request(prompt, model, config_dir, tools=anthropic_tools)
        # ── Fall through to HTTP API mode ────────────────────────────────

        logger.info(f"ClaudeProviderHandler: Handling request for model {model} (Direct HTTP mode)")
@@ -2111,6 +2397,31 @@ class ClaudeProviderHandler(BaseProviderHandler):

            await self.apply_rate_limit()

+            # [0/3] CLI subprocess model discovery
+            import aisbf.cli_mode as cli_mode_mod
+            if cli_mode_mod.CLAUDE_CLI_MODE:
+                cli_creds = self._get_cli_credentials()
+                if cli_creds is not None:
+                    try:
+                        logging.info("ClaudeProviderHandler: [0/3] CLI subprocess model discovery...")
+                        config_dir = await ClaudeCliSessionManager.get_config_dir(
+                            self.user_id, self.provider_id, cli_creds
+                        )
+                        cli_models = await self._cli_discover_models(config_dir)
+                        if cli_models:
+                            self._save_models_cache(cli_models)
+                            logging.info(
+                                f"ClaudeProviderHandler: ✓ CLI discovery returned {len(cli_models)} models"
+                            )
+                            return cli_models
+                        logging.warning(
+                            "ClaudeProviderHandler: CLI discovery returned no models, falling through"
+                        )
+                    except Exception as cli_disc_err:
+                        logging.warning(
+                            f"ClaudeProviderHandler: CLI model discovery failed: {cli_disc_err}"
+                        )
+
            try:
                logging.info("ClaudeProviderHandler: [1/3] Attempting primary API endpoint...")


--- a/main.py
+++ b/main.py
@@ -578,6 +578,11 @@ _MUST_CHANGE_PASSWORD_WHITELIST = (
    '/dashboard/settings',
    '/dashboard/logout',
    '/api/admin/settings/',
+    '/dashboard/tor/status',
+    '/dashboard/response-cache/stats',
+    '/dashboard/response-cache/clear',
+    '/dashboard/test-smtp',
+    '/dashboard/restart',
 )

 # --- Login rate limiter ---
@@ -1546,10 +1551,14 @@ async def api_token_authorization_middleware(request: Request, call_next):
    if request.method == "GET" and path in ["/api/models", "/api/v1/models"]:
        return await call_next(request)
    
+    # If authentication is globally disabled, skip all token scope checks
+    if not (server_config and server_config.get('auth_enabled', False)):
+        return await call_next(request)
+
    is_global_token = getattr(request.state, 'is_global_token', False)
    user_id = getattr(request.state, 'user_id', None)
    is_admin = getattr(request.state, 'is_admin', False)
-    
+
    # Debug logging
    logger.info(f"API Token Auth: path={path}, is_global_token={is_global_token}, user_id={user_id}")
    
@@ -5578,6 +5587,7 @@ async def dashboard_settings(request: Request):
            'fullconfig_tokens': []
        }
    
+    warning = request.query_params.get('warning')
    return templates.TemplateResponse(
        request=request,
        name="dashboard/settings.html",
@@ -5586,7 +5596,8 @@ async def dashboard_settings(request: Request):
        "session": request.session,
        "__version__": __version__,
        "config": aisbf_config,
-        "os": os
+        "os": os,
+        "warning": warning,
    }
    )

@@ -5599,7 +5610,6 @@ async def dashboard_settings_save(
   auth_enabled: bool = Form(False),
   auth_tokens: str = Form(""),
   dashboard_username: str = Form(...),
-   dashboard_password: str = Form(""),
   condensation_model_id: str = Form(...),
   autoselect_model_id: str = Form(...),
   database_type: str = Form("sqlite"),
@@ -5691,8 +5701,6 @@ async def dashboard_settings_save(
    aisbf_config['auth']['enabled'] = auth_enabled
    aisbf_config['auth']['tokens'] = [t.strip() for t in auth_tokens.split('\n') if t.strip()]
    aisbf_config['dashboard']['username'] = dashboard_username
-    if dashboard_password:  # Only update if provided - hash the password
-        aisbf_config['dashboard']['password'] = _db_hash_password(dashboard_password)
    aisbf_config['internal_model']['condensation_model_id'] = condensation_model_id
    aisbf_config['internal_model']['autoselect_model_id'] = autoselect_model_id

@@ -5840,12 +5848,10 @@ async def dashboard_settings_save(
    aisbf_config['dashboard']['notifications']['wallet_topup'] = admin_notify_wallet_topup
    aisbf_config['dashboard']['notifications']['user_deleted_account'] = admin_notify_user_deleted_account

-    # Handle new_admin_password from the Admin tab (distinct from dashboard_password in Dashboard tab)
    if new_admin_password:
        if new_admin_password == confirm_admin_password:
            aisbf_config['dashboard']['password'] = _db_hash_password(new_admin_password)
            request.session.pop('must_change_password', None)
-        # silently ignore mismatch — UI should validate

    # Save config
    config_path = Path.home() / '.aisbf' / 'aisbf.json'
@@ -5853,9 +5859,9 @@ async def dashboard_settings_save(
    with open(config_path, 'w') as f:
        json.dump(aisbf_config, f, indent=2)

-    # If a new dashboard password was submitted, clear the forced-change flag
-    if dashboard_password:
-        request.session.pop('must_change_password', None)
+    # Reload dashboard credentials in memory so the new username/password takes effect immediately
+    if server_config is not None:
+        server_config['dashboard_config'] = aisbf_config.get('dashboard', {})

    return templates.TemplateResponse(
        request=request,
@@ -10055,10 +10061,10 @@ async def v1_chat_completions(request: Request, body: ChatCompletionRequest):
    
    # PATH 1: Direct provider model (format: {provider}/{model})
    if provider_id not in config.providers:
-            raise HTTPException(
-                status_code=404,
-                detail=f"User autoselect '{actual_model}' not found. Available: {list(handler.user_autoselects.keys())}"
-            )
+        raise HTTPException(
+            status_code=404,
+            detail=f"Provider '{provider_id}' not found. Available: {list(config.providers.keys())}"
+        )
    
    # Validate kiro credentials before processing request
    provider_config = config.get_provider(provider_id)

--- a/templates/dashboard/providers.html
+++ b/templates/dashboard/providers.html
@@ -551,10 +551,18 @@ function renderProviderDetails(key) {
                ${CLAUDE_CLI_MODE ? `
                <div style="margin-top: 20px; padding-top: 15px; border-top: 1px solid #1e3a5f;">
                    <h5 style="margin: 0 0 8px 0; color: #4ade80;">Claude CLI Mode Active</h5>
-                    <small style="color: #4ade80; display: block; margin-bottom: 14px;">
+                    <small style="color: #4ade80; display: block; margin-bottom: 8px;">
                        The claude CLI was detected at startup. When enabled, requests are piped
                        through the local claude binary instead of the HTTP API.
                    </small>
+                    <div style="background: #3a2a00; border: 1px solid #f59e0b; border-radius: 6px; padding: 10px 14px; margin-bottom: 14px;">
+                        <span style="color: #f59e0b; font-weight: 600;">⚠ Experimental:</span>
+                        <span style="color: #fcd34d; font-size: 0.85em;">
+                            CLI mode is experimental. Tool calling (function calling) does not yet work reliably —
+                            the CLI subprocess may refuse or mishandle tool definitions. Use with simple
+                            (non-tool) requests only until this is resolved.
+                        </span>
+                    </div>

                    <div class="form-group" style="margin-bottom: 16px;">
                        <label style="display: flex; align-items: center; gap: 10px; cursor: pointer;">

--- a/templates/dashboard/settings.html
+++ b/templates/dashboard/settings.html
@@ -41,10 +41,20 @@ along with this program.  If not, see <https://www.gnu.org/licenses/>.
 <div class="alert alert-error">{{ error }}</div>
 {% endif %}

+{% if warning == 'default_password' %}
+<div style="background:#b71c1c; border:2px solid #e53935; color:#fff; padding:16px 20px; border-radius:6px; margin-bottom:20px; display:flex; align-items:flex-start; gap:12px;">
+    <span style="font-size:1.4em; line-height:1;">&#9888;</span>
+    <div>
+        <strong>Security Warning: Default password in use.</strong><br>
+        You are logged in with the factory-default <code style="background:rgba(0,0,0,.3);padding:1px 5px;border-radius:3px;">admin / admin</code> credentials.
+        Please change your password immediately using the <strong>Admin</strong> tab below before using AISBF.
+    </div>
+</div>
+{% endif %}
+
 <div class="settings-tabs">
    <div class="settings-tab active" onclick="switchTab('server')"><i class="fas fa-server"></i> Server</div>
    <div class="settings-tab" onclick="switchTab('auth')"><i class="fas fa-key"></i> Auth &amp; MCP</div>
-    <div class="settings-tab" onclick="switchTab('dashboard')"><i class="fas fa-tachometer-alt"></i> Dashboard</div>
    <div class="settings-tab" onclick="switchTab('models')"><i class="fas fa-brain"></i> Models</div>
    <div class="settings-tab" onclick="switchTab('database')"><i class="fas fa-database"></i> Database</div>
    <div class="settings-tab" onclick="switchTab('cache')"><i class="fas fa-bolt"></i> Cache</div>
@@ -139,20 +149,6 @@ along with this program.  If not, see <https://www.gnu.org/licenses/>.
    </div>
    </div><!-- /tab-auth -->
    
-    <div class="settings-section" id="tab-dashboard">
-    <div class="section-title"><i class="fas fa-tachometer-alt"></i> Dashboard</div>
-    
-    <div class="form-group">
-        <label for="dashboard_username">Dashboard Username</label>
-        <input type="text" id="dashboard_username" name="dashboard_username" value="{{ config.dashboard.username }}" required>
-    </div>
-    
-    <div class="form-group">
-        <label for="dashboard_password">Dashboard Password</label>
-        <input type="password" id="dashboard_password" name="dashboard_password" placeholder="Leave blank to keep current">
-    </div>
-    </div><!-- /tab-dashboard -->
-    
    <div class="settings-section" id="tab-models">
    <div class="section-title"><i class="fas fa-brain"></i> Internal Models</div>
    
@@ -879,15 +875,18 @@ brew services restart tor   # macOS</code></pre>
    <div class="section-title"><i class="fas fa-shield-alt"></i> Admin Account &amp; Notifications</div>

    <div class="form-group">
-        <label for="new_admin_password">New Admin Password</label>
+        <label for="dashboard_username">Admin Username</label>
+        <input type="text" id="dashboard_username" name="dashboard_username" value="{{ config.dashboard.username }}" required>
+    </div>
+
+    <div class="form-group">
+        <label for="new_admin_password">New Password</label>
        <input type="password" id="new_admin_password" name="new_admin_password" placeholder="Leave blank to keep current password">
-        <small style="color: #666; display: block; margin-top: 5px;">Enter a new password to change the admin dashboard password</small>
    </div>

    <div class="form-group">
-        <label for="confirm_admin_password">Confirm New Admin Password</label>
+        <label for="confirm_admin_password">Confirm New Password</label>
        <input type="password" id="confirm_admin_password" name="confirm_admin_password" placeholder="Confirm new password">
-        <small style="color: #666; display: block; margin-top: 5px;">Re-enter the new password to confirm</small>
    </div>

    <div class="form-group">
@@ -1194,13 +1193,12 @@ async function checkTorStatus() {
 // Check TOR status on page load
 document.addEventListener('DOMContentLoaded', function() {
    checkTorStatus();
-    // Refresh status every 30 seconds
    setInterval(checkTorStatus, 30000);
-    
-    // Load cache statistics
    refreshCacheStats();
-    // Refresh cache stats every 10 seconds
    setInterval(refreshCacheStats, 10000);
+    {% if warning == 'default_password' %}
+    switchTab('admin');
+    {% endif %}
 });

 async function refreshCacheStats() {