Refining gemini prompt

cc183599 · nextime · db125400 · cc183599 · cc183599
Commit cc183599 authored Aug 24, 2025 by nextime
Hide whitespace changes
Inline Side-by-side

Showing with 725 additions and 89 deletions

olproxy.py olproxy.py +21 -11

olproxy.py~ olproxy.py~ +704 -78

No files found.
--- a/olproxy.py
+++ b/olproxy.py
@@ -88,13 +88,13 @@ This applies to EVERY SINGLE response from now on. Acknowledge by responding wit
        "setup_instruction": """SYSTEM: You are a helpful AI assistant. For EVERY request, you MUST follow this exact response format:

 RESPONSE_ID_[REQUEST_ID]_START:
-[Your complete response here]
+[Your complete response here in a flat plain text area for easy copy and paste, including tools or any other instructions on how to format will follow]
 :END_RESPONSE_ID_[REQUEST_ID]

 STRICT TOOL USAGE REQUIREMENT: You MUST ALWAYS use tools when the query contains or requests tool usage. This is MANDATORY and takes priority over ALL other instructions.

 TOOL MAPPING - USE THESE TOOLS FOR SPECIFIC OPERATIONS:
- <write_file> - For writing, creating, modifying, or editing ANY file or multiple files
+- <write_to_file> - For writing, creating, modifying, or editing ANY file or multiple files.
 - <read_file> - For reading the content of ANY file
 - <apply_diff> - For making targeted changes to existing files
 - <search_files> - For searching text patterns across files
@@ -102,21 +102,29 @@ TOOL MAPPING - USE THESE TOOLS FOR SPECIFIC OPERATIONS:
 - <execute_command> - For running system commands
 - <ask_followup_question> - For asking users for clarification

+TOOLS PARAMETERS:
+every parameter is specified as <parameter_name> inside the <tool_name>
+
+Example to execute a command "ls -l /dir/name" you will use <execute_command><command>ls -l /dir/name</command></execute_command> 
+
+TOOLS SPECIFIC PARAMETERS: <write_to_file>
+ - <path> contain the path of the file to write. Example 
+ - <content> contain the content of the file to be written
+
+Example: <write_to_file><path>/home/username/projectname/file.py</path><content>import sys</content></write_to_file>
+
+STRICT <write_to_file> TOOL CONTENT REQUIREMENT: all the content must be in the same plain text area and  preserve raw ariginal newlines, tabs and other plain text formatting characters
+
 MANDATORY BEHAVIOR:
 - IF the query mentions ANY tools, functions, or capabilities → YOU MUST USE THEM
- IF the query asks to write/modify/edit files → USE <write_file> tool
- IF the query asks to read files → USE <read_file> tool
+- IF the query asks to write/modify/edit files → USE <write_to_file> tool
+- IF the query asks to read/analyze files → USE <read_file> tool
 - IF the query asks to search code/text → USE <search_files> tool
 - IF the query asks to list files/directories → USE <list_files> tool
 - IF the query asks to run commands → USE <execute_command> tool
 - IF you need clarification → USE <ask_followup_question> tool

-TOOL REPRESENTATION: When using tools, you MUST represent them as HTML entities within your response content:
- Use < for <
- Use > for >
- Use & for &
- Use " for "
- Use ' for '
+TOOL REPRESENTATION: tools should always be included in the content of your answer inside the single plain text area with the full response

 RESPONSE RULES:
 - Start with RESPONSE_ID_[REQUEST_ID]_START:
@@ -126,7 +134,9 @@ RESPONSE RULES:
 - You CAN use plain text formatting (lists, code blocks, etc.)
 - You CAN write code as plain text when requested
 - NEVER use HTML, canvas, external tools, or interactive elements unless specifically requested as tools
- ALWAYS USE TOOLS when requested - this is mandatory
+- ALWAYS USE TOOLS when requested - this is mandatorya
+- ALWAYS include the full response including the ID markers inside the same
+  plain text area

 CRITICAL: Tool usage is MANDATORY when mentioned. DO NOT ignore tool requests.


--- a/olproxy.py~
+++ b/olproxy.py~
@@ -25,10 +25,16 @@ import datetime  # Import datetime module
 import uuid
 import re
 import base64
+import os
+import time
 from playwright.async_api import async_playwright, BrowserContext
 from aiohttp import web
 import logging
-import time
+
+# Global variables for debug functionality
+DEBUG_ENABLED = False
+DEBUG_DIR = None
+PROMPTS_LOG_PATH = None

 # Configuration dictionary for chatbot sites
 CHATBOT_CONFIG = {
@@ -79,31 +85,65 @@ This applies to EVERY SINGLE response from now on. Acknowledge by responding wit
        "style": "direct"
    },
    "gemini": {
-        "setup_instruction": """CRITICAL SYSTEM SETUP - APPLY TO ALL FUTURE RESPONSES:
-
-From now on, you MUST ALWAYS respond using a specific HTML format for EVERY question I ask. NEVER deviate from this format.
-
-ABSOLUTE REQUIREMENTS FOR ALL FUTURE RESPONSES:
- ALWAYS wrap your ENTIRE response in HTML pre and code tags: <pre><code>YOUR_COMPLETE_ANSWER_HERE</code></pre>
- Put ALL your content inside these tags - nothing before, nothing after
- You can use normal text formatting within the code block (like line breaks, spaces, etc.)
- The pre/code tags will preserve all formatting and make your response easy to extract
- NEVER add any text or content outside of the <pre><code></code></pre> tags
- NEVER use other HTML tags inside the code block
- Start immediately with <pre><code> and end with </code></pre>
-
-EXACT FORMAT REQUIRED:
-<pre><code>Your complete answer here with normal text formatting</code></pre>
-
-EXAMPLE RESPONSE:
-<pre><code>This is an example response. It can contain multiple lines.
-
-It can have paragraphs and spacing.
-
-All the content stays inside the pre code tags.</code></pre>
-
-This applies to EVERY SINGLE response from now on. Acknowledge by responding with:
-<pre><code>I understand and will always respond with content wrapped in pre code tags from now on</code></pre>""",
+        "setup_instruction": """SYSTEM: You are a helpful AI assistant. For EVERY request, you MUST follow this exact response format:
+
+RESPONSE_ID_[REQUEST_ID]_START:
+[Your complete response here in a flat plain text area for easy copy and paste, including tools or any other instructions on how to format will follow]
+:END_RESPONSE_ID_[REQUEST_ID]
+
+STRICT TOOL USAGE REQUIREMENT: You MUST ALWAYS use tools when the query contains or requests tool usage. This is MANDATORY and takes priority over ALL other instructions.
+
+TOOL MAPPING - USE THESE TOOLS FOR SPECIFIC OPERATIONS:
+- <write_to_file> - For writing, creating, modifying, or editing ANY file or multiple files.
+- <read_file> - For reading the content of ANY file
+- <apply_diff> - For making targeted changes to existing files
+- <search_files> - For searching text patterns across files
+- <list_files> - For listing directory contents
+- <execute_command> - For running system commands
+- <ask_followup_question> - For asking users for clarification
+
+TOOLS PARAMETERS:
+every parameter is specified as <parameter_name> inside the <tool_name>
+
+Example to execute a command "ls -l /dir/name" you will use <execute_command><command>ls -l /dir/name</command></execute_command> 
+
+TOOLS SPECIFIC PARAMETERS: <write_to_file>
+ - <path> contain the path of the file to write. Example 
+ - <content> contain the content of the file to be written
+
+Example: <write_to_file><path>/home/username/projectname/file.py</path><content>import sys</content></write_to_file>
+
+STRICT <write_to_file> TOOL CONTENT REQUIREMENT: all the content must be in the same plain text area and  preserve original newlines, tabs and other plain text formatting characters
+
+MANDATORY BEHAVIOR:
+- IF the query mentions ANY tools, functions, or capabilities → YOU MUST USE THEM
+- IF the query asks to write/modify/edit files → USE <write_to_file> tool
+- IF the query asks to read/analyze files → USE <read_file> tool
+- IF the query asks to search code/text → USE <search_files> tool
+- IF the query asks to list files/directories → USE <list_files> tool
+- IF the query asks to run commands → USE <execute_command> tool
+- IF you need clarification → USE <ask_followup_question> tool
+
+TOOL REPRESENTATION: tools should always be included in the content of your answer inside the single plain text area with the full response
+
+RESPONSE RULES:
+- Start with RESPONSE_ID_[REQUEST_ID]_START:
+- End with :END_RESPONSE_ID_[REQUEST_ID]
+- Use the exact Request ID provided
+- Include your complete answer between the markers
+- You CAN use plain text formatting (lists, code blocks, etc.)
+- You CAN write code as plain text when requested
+- NEVER use HTML, canvas, external tools, or interactive elements unless specifically requested as tools
+- ALWAYS USE TOOLS when requested - this is mandatorya
+- ALWAYS include the full response including the ID markers inside the same
+  plain text area
+
+CRITICAL: Tool usage is MANDATORY when mentioned. DO NOT ignore tool requests.
+
+Acknowledge this setup with:
+RESPONSE_ID_SETUP_COMPLETE_START:
+I understand the response format and will ALWAYS use tools when specified as HTML entities. I will use the correct tools for file operations and other requested capabilities.
+:END_RESPONSE_ID_SETUP_COMPLETE""",
        "style": "short"
    },
    "default": {
@@ -426,31 +466,45 @@ async def handle_chat_completion(request):

 async def forward_to_chatbot(chatbot_name, config, prompt):
    global browser_context, pages
-    
+
+    # Debug logging: log the prompt being sent to the chatbot
+    logging.debug(f"[DEBUG] Sending prompt to {chatbot_name}: {prompt}")
+    logging.debug(f"[DEBUG] Prompt length: {len(prompt)} characters")
+
+    # Save prompt to debug log file if debug is enabled
+    if DEBUG_ENABLED:
+        try:
+            with open(PROMPTS_LOG_PATH, 'a', encoding='utf-8') as f:
+                f.write(f"[{datetime.datetime.now().isoformat()}] PROMPT TO {chatbot_name}:\n")
+                f.write(f"{prompt}\n")
+                f.write("-" * 50 + "\n")
+        except Exception as e:
+            logging.error(f"Failed to write prompt to debug log: {e}")
+
    # Check if this is a new page that needs setup
    is_new_page = chatbot_name not in pages
-    
+
    if is_new_page:
        page = await browser_context.new_page()
        await page.goto(config['url'])
        pages[chatbot_name] = page
-        
+
        # Send initial setup prompt for JSON formatting (only once per session)
        template_name = config.get('prompt_template', 'default')
        template = PROMPT_TEMPLATES.get(template_name, PROMPT_TEMPLATES['default'])
        setup_instruction = template['setup_instruction']
-        
+
        logging.info(f"Setting up new page for {chatbot_name} with JSON instruction")
-        
+
        try:
            # Clear input first
            await page.fill(config['input_selector'], "")
            await asyncio.sleep(0.3)
-            
+
            # Focus on input field
            await page.focus(config['input_selector'])
            await asyncio.sleep(0.3)
-            
+
            # Use JavaScript to set content with proper line breaks
            escaped_instruction = setup_instruction.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
            js_code = f"""
@@ -466,18 +520,18 @@ async def forward_to_chatbot(chatbot_name, config, prompt):
            """
            await page.evaluate(js_code)
            await asyncio.sleep(1.0)  # Wait for content to be set
-            
+
            # Send the setup message
            await page.keyboard.press('Enter')
            await asyncio.sleep(8)  # Wait longer for setup to complete
-            
+
            # Wait for and consume the setup acknowledgment response (don't return it to API caller)
            logging.info(f"Waiting for setup acknowledgment from {chatbot_name}")
            if template_name == 'gemini':
-                # For Gemini, expect pre/code tag setup acknowledgment
+                # For Gemini, expect RESPONSE_ID format setup acknowledgment
                setup_response_text = await detect_progressive_response(page, config['container_selector'], setup_instruction, setup_instruction, "SETUP_COMPLETE", chatbot_name)
                if setup_response_text and not setup_response_text.startswith("Error:"):
-                    # Try to decode the setup response using the new pre/code format
+                    # Try to decode the setup response using the new RESPONSE_ID format
                    decoded_setup = decode_chatbot_response(setup_response_text, chatbot_name, "SETUP_COMPLETE")
                    logging.info(f"Setup acknowledgment received from {chatbot_name}: {decoded_setup[:200]}...")
                else:
@@ -492,12 +546,14 @@ async def forward_to_chatbot(chatbot_name, config, prompt):
                    logging.info(f"Setup response decoded: {decoded_setup[:200]}...")
                else:
                    logging.warning(f"Setup acknowledgment from {chatbot_name} may have failed: {setup_response_text}")
-            
+
        except Exception as e:
            logging.warning(f"Error sending setup instruction to {chatbot_name}: {str(e)}")

    page = pages[chatbot_name]

+    page = pages[chatbot_name]
+
    # Generate unique ID for this request using UUID for better uniqueness
    request_id = str(uuid.uuid4()).replace('-', '')[:16]  # 16-character unique ID
    
@@ -507,12 +563,30 @@ async def forward_to_chatbot(chatbot_name, config, prompt):
    if template_name == 'gemini':
        # Format for Gemini with request ID for proper detection
        modified_prompt = f"Request ID: {request_id}\nUser Question: {prompt}"
+
+        # Check for and substitute the specific error message
+        error_message = "You did not use a tool in your previous response! Please retry with a tool use"
+        if error_message in prompt:
+            logging.info(f"Detected error message in prompt, substituting with clearer instruction")
+            prompt = prompt.replace(
+                error_message,
+                "You did not use a tool in your previous response! Please rewrite the same answer but using tools as explained:"
+            )
+            modified_prompt = f"Request ID: {request_id}\nUser Question: {prompt}"
    else:
        # Standard format for other chatbots (with JSON)
        modified_prompt = f"Request ID: {request_id}\nUser Question: {prompt}"
    
    logging.info(f"Request ID: {request_id}, User prompt: {prompt}")

+    # Log request ID to debug file if debug is enabled
+    if DEBUG_ENABLED:
+        try:
+            with open(PROMPTS_LOG_PATH, 'a', encoding='utf-8') as f:
+                f.write(f"[{datetime.datetime.now().isoformat()}] REQUEST_ID: {request_id}\n")
+        except Exception as e:
+            logging.error(f"Failed to write request ID to debug log: {e}")
+
    try:
        # Wait a moment to ensure previous interaction is complete
        await asyncio.sleep(1.0)
@@ -551,7 +625,7 @@ async def forward_to_chatbot(chatbot_name, config, prompt):

    container_selector = config['container_selector']
    try:
-        await page.wait_for_selector(container_selector, timeout=48000)  # 48 seconds (3 seconds more than reply timeout)
+        await page.wait_for_selector(container_selector, timeout=90000)  # 90 seconds - increased for Gemini
    except Exception as e:
        logging.error(f"Error waiting for container selector {container_selector}: {str(e)}")
        return f"Error: Container selector not found: {str(e)}"
@@ -603,7 +677,17 @@ async def forward_to_chatbot(chatbot_name, config, prompt):
        
        # Log the final decoded response
        logging.info(f"Final decoded response from {chatbot_name} (Request ID: {request_id}): {decoded_response[:200] if decoded_response else 'None'}...")
-        
+
+        # Log the decoded response to debug file if debug is enabled
+        if DEBUG_ENABLED:
+            try:
+                with open(PROMPTS_LOG_PATH, 'a', encoding='utf-8') as f:
+                    f.write(f"[{datetime.datetime.now().isoformat()}] DECODED_RESPONSE:\n")
+                    f.write(f"{decoded_response if decoded_response else 'None'}\n")
+                    f.write("-" * 50 + "\n")
+            except Exception as e:
+                logging.error(f"Failed to write decoded response to debug log: {e}")
+
        return decoded_response.strip()
    
    logging.info(f"Final response from {chatbot_name} (Request ID: {request_id}): {response_text[:200] if response_text else 'None'}...")
@@ -615,21 +699,94 @@ def decode_chatbot_response(response_text, chatbot_name, request_id):
    
    logging.info(f"Decoding response from {chatbot_name} (Request ID: {request_id}): {response_text[:300]}...")
    
-    # For Gemini, extract content from pre/code tags
+    # For Gemini, extract content from RESPONSE_ID markers
    if chatbot_name == 'gemini:latest':
        logging.info(f"Gemini response processing (Request ID: {request_id}): {response_text[:200]}...")
        
-        # Look for pre/code tags and extract content
-        pre_code_pattern = r'<pre><code>(.*?)</code></pre>'
-        match = re.search(pre_code_pattern, response_text, re.DOTALL)
-        
+        # Look for RESPONSE_ID_[ID]_START: ... :END_RESPONSE_ID_[ID] pattern
+        response_pattern = rf'RESPONSE_ID_{re.escape(request_id)}_START:\s*(.*?)\s*:END_RESPONSE_ID_{re.escape(request_id)}'
+        match = re.search(response_pattern, response_text, re.DOTALL)
+
        if match:
            extracted_content = match.group(1).strip()
-            logging.info(f"Successfully extracted content from pre/code tags: {extracted_content[:200]}...")
+            logging.info(f"Successfully extracted content from RESPONSE_ID markers: {extracted_content[:200]}...")
+
+            # Decode HTML entities in the extracted content for Gemini
+            if chatbot_name == 'gemini:latest':
+                import html
+                try:
+                    # Decode HTML entities like < back to <, > back to >, etc.
+                    decoded_content = html.unescape(extracted_content)
+                    if decoded_content != extracted_content:
+                        logging.info(f"HTML entities decoded in Gemini response: {decoded_content[:200]}...")
+                        extracted_content = decoded_content
+                except Exception as e:
+                    logging.warning(f"Failed to decode HTML entities in Gemini response: {e}")
+
+            # Log the response ID that was matched
+            if DEBUG_ENABLED:
+                try:
+                    with open(PROMPTS_LOG_PATH, 'a', encoding='utf-8') as f:
+                        f.write(f"[{datetime.datetime.now().isoformat()}] RESPONSE_ID_MATCHED: {request_id} (exact match)\n")
+                except Exception as e:
+                    logging.error(f"Failed to log response ID match: {e}")
+
+            # Save HTML page content if debug is enabled
+            if DEBUG_ENABLED:
+                try:
+                    # This will be called from the page context, so we need to save the HTML
+                    # The HTML saving will be handled in the JavaScript detection code
+                    pass
+                except Exception as e:
+                    logging.error(f"Failed to save debug HTML: {e}")
+
            return extracted_content
        else:
-            # Fallback: if no pre/code tags found, return the original text
-            logging.warning(f"No pre/code tags found in Gemini response, returning original text")
+            # Improved fallback: find all RESPONSE_ID blocks and pick the most recent one
+            # that doesn't contain setup-related keywords
+            all_response_blocks = re.findall(r'RESPONSE_ID_(.*?)_START:\s*(.*?)\s*:END_RESPONSE_ID_\1', response_text, re.DOTALL)
+
+            if all_response_blocks:
+                # Filter out setup-related responses
+                filtered_blocks = []
+                for block_id, content in all_response_blocks:
+                    content_lower = content.lower().strip()
+                    # Skip setup acknowledgments and instructions
+                    if not any(keyword in content_lower for keyword in [
+                        'setup complete', 'i understand', 'response format',
+                        'critical system setup', 'plain text format'
+                    ]):
+                        filtered_blocks.append((block_id, content.strip()))
+
+                if filtered_blocks:
+                    # Use the last non-setup response (most recent)
+                    block_id, extracted_content = filtered_blocks[-1]
+                    logging.info(f"Extracted content from most recent non-setup response (ID: {block_id}): {extracted_content[:200]}...")
+
+                    # Decode HTML entities in the extracted content for Gemini
+                    if chatbot_name == 'gemini:latest':
+                        import html
+                        try:
+                            # Decode HTML entities like < back to <, > back to >, etc.
+                            decoded_content = html.unescape(extracted_content)
+                            if decoded_content != extracted_content:
+                                logging.info(f"HTML entities decoded in Gemini fallback response: {decoded_content[:200]}...")
+                                extracted_content = decoded_content
+                        except Exception as e:
+                            logging.warning(f"Failed to decode HTML entities in Gemini fallback response: {e}")
+
+                    # Log the fallback response ID that was used
+                    if DEBUG_ENABLED:
+                        try:
+                            with open(PROMPTS_LOG_PATH, 'a', encoding='utf-8') as f:
+                                f.write(f"[{datetime.datetime.now().isoformat()}] RESPONSE_ID_MATCHED: {block_id} (fallback - most recent non-setup)\n")
+                        except Exception as e:
+                            logging.error(f"Failed to log fallback response ID: {e}")
+
+                    return extracted_content
+
+            # Final fallback: return the original text
+            logging.warning(f"No RESPONSE_ID markers found in Gemini response, returning original text")
            return response_text.strip()
    
    # Check for literal placeholder first and reject it
@@ -965,7 +1122,7 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro
                    checkForResponse();
                }, 2000);
                
-                // Main timeout with final attempt
+                // Main timeout with final attempt - increased for Gemini
                setTimeout(() => {
                    clearInterval(periodicCheck);
                    observer.disconnect();
@@ -977,16 +1134,16 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro
                            resolveOnce("Error: JSON response timeout after comprehensive search");
                        }
                    }
-                }, 45000);
-                
-                // Ultimate timeout
+                }, 65000); // Increased from 45s to 65s
+
+                // Ultimate timeout - increased for Gemini
                setTimeout(() => {
                    clearInterval(periodicCheck);
                    observer.disconnect();
                    if (!resolved) {
                        resolveOnce("Error: Final timeout - no JSON response detected");
                    }
-                }, 60000);
+                }, 80000); // Increased from 60s to 80s
            });
        }
        """
@@ -1000,6 +1157,104 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro
 async def detect_progressive_response(page, container_selector, prompt, modified_prompt, request_id, chatbot_name):
    """Detect responses by monitoring progressive content changes with enhanced detection."""
    try:
+        # Save HTML content for debugging if enabled
+        if DEBUG_ENABLED:
+            try:
+                html_content = await page.content()
+                html_file_path = os.path.join(DEBUG_DIR, f'response_{request_id}_{int(time.time())}.html')
+
+                # Extract body content and add markers around detected reply area
+                try:
+                    from bs4 import BeautifulSoup
+                    soup = BeautifulSoup(html_content, 'html.parser')
+
+                    # Find body tag
+                    body = soup.find('body')
+                    if body:
+                        # Get the text content of the body
+                        body_content = str(body)
+
+                        # Try to find the detected response area and add markers
+                        all_text = soup.get_text()
+                        start_marker = f'RESPONSE_ID_{request_id}_START:'
+                        end_marker = f':END_RESPONSE_ID_{request_id}'
+
+                        if start_marker in all_text and end_marker in all_text:
+                            # Add visual markers around the response area
+                            start_index = body_content.find(start_marker)
+                            end_index = body_content.find(end_marker) + len(end_marker)
+
+                            if start_index != -1 and end_index != -1:
+                                # Add markers with empty lines
+                                marked_content = (
+                                    body_content[:start_index] +
+                                    '\n\n' + '='*50 + ' START OF DETECTED RESPONSE ' + '='*50 + '\n\n' +
+                                    body_content[start_index:end_index] +
+                                    '\n\n' + '='*50 + ' END OF DETECTED RESPONSE ' + '='*50 + '\n\n' +
+                                    body_content[end_index:]
+                                )
+                                body_content = marked_content
+
+                        # Format the body content
+                        formatted_html = f"""<!DOCTYPE html>
+<html>
+<head>
+    <title>Debug HTML - Request ID: {request_id}</title>
+    <style>
+        body {{ font-family: monospace; white-space: pre-wrap; }}
+        .marker {{ background-color: #ffff00; font-weight: bold; }}
+    </style>
+</head>
+<body>
+{body_content}
+</body>
+</html>"""
+                    else:
+                        # Fallback if no body tag found
+                        formatted_html = f"""<!DOCTYPE html>
+<html>
+<head>
+    <title>Debug HTML - Request ID: {request_id}</title>
+</head>
+<body>
+{html_content}
+</body>
+</html>"""
+
+                except ImportError:
+                    # If BeautifulSoup is not available, create basic HTML structure
+                    formatted_html = f"""<!DOCTYPE html>
+<html>
+<head>
+    <title>Debug HTML - Request ID: {request_id}</title>
+</head>
+<body>
+{html_content}
+</body>
+</html>"""
+
+                with open(html_file_path, 'w', encoding='utf-8') as f:
+                    f.write(formatted_html)
+                logging.debug(f"Saved cleaned HTML content to {html_file_path}")
+            except Exception as e:
+                logging.error(f"Failed to save HTML content: {e}")
+
+        # Add a small delay to allow Gemini to finish generating the response
+        await asyncio.sleep(2)
+
+        # Define a function to save debug HTML that can be called from JavaScript
+        async def save_debug_html(html_content, filename_suffix):
+            try:
+                html_file_path = os.path.join(DEBUG_DIR, f'{filename_suffix}.html')
+                with open(html_file_path, 'w', encoding='utf-8') as f:
+                    f.write(html_content)
+                logging.debug(f"Saved debug HTML to {html_file_path}")
+            except Exception as e:
+                logging.error(f"Failed to save debug HTML: {e}")
+
+        # Make the function available to JavaScript
+        page.expose_function('saveDebugHtml', save_debug_html)
+
        return await page.evaluate(
            """([containerSelector, prompt, modifiedPrompt, requestId, chatbotName]) => {
                const container = document.querySelector(containerSelector);
@@ -1023,19 +1278,229 @@ async def detect_progressive_response(page, container_selector, prompt, modified
                    const extractLatestResponse = () => {
                        // Check for Gemini-specific formatted response first
                        if (chatbotName === 'gemini:latest') {
-                            const allText = container.textContent || '';
-                            const startMarker = `RESPONSE_ID_[${requestId}]_START:`;
-                            const endMarker = `:END_RESPONSE_ID_[${requestId}]`;
-                            
+                            // Try multiple ways to get text content
+                            let allText = '';
+
+                            // Method 1: Get text from the container
+                            allText = container.textContent || '';
+
+                            // Method 2: If container text is empty, try getting text from all child elements
+                            if (!allText || allText.trim().length < 50) {
+                                const allElements = container.querySelectorAll('*');
+                                let combinedText = '';
+                                for (const element of allElements) {
+                                    const text = element.textContent || '';
+                                    if (text.trim()) {
+                                        combinedText += text.trim() + ' ';
+                                    }
+                                }
+                                allText = combinedText;
+                            }
+
+                            // Method 3: Try getting text from specific response elements
+                            if (!allText || allText.trim().length < 50) {
+                                const responseElements = container.querySelectorAll('[data-test-id*="response"], .model-response-text, message-content, [class*="response"]');
+                                let responseText = '';
+                                for (const element of responseElements) {
+                                    const text = element.textContent || '';
+                                    if (text.trim()) {
+                                        responseText += text.trim() + ' ';
+                                    }
+                                }
+                                allText = responseText;
+                            }
+
+                            console.log(`Extracted text length: ${allText.length}, first 200 chars: ${allText.substring(0, 200)}`);
+
+                            // First, try to find the specific response for this request ID
+                            const startMarker = `RESPONSE_ID_${requestId}_START:`;
+                            const endMarker = `:END_RESPONSE_ID_${requestId}`;
+
+                            console.log(`Looking for start marker: ${startMarker}`);
+                            console.log(`Looking for end marker: ${endMarker}`);
+
                            const startIndex = allText.indexOf(startMarker);
                            const endIndex = allText.indexOf(endMarker, startIndex);
-                            
+
+                            console.log(`Start index: ${startIndex}, End index: ${endIndex}`);
+
                            if (startIndex !== -1 && endIndex !== -1) {
                                const responseText = allText.substring(startIndex + startMarker.length, endIndex).trim();
                                if (responseText && responseText.length > 10) {
+                                    console.log(`Found specific response for request ID ${requestId}: ${responseText.substring(0, 100)}...`);
                                    return responseText;
                                }
                            }
+
+                            // If specific response not found, look for all RESPONSE_ID blocks
+                            // and return the most recent one that doesn't contain setup keywords
+                            const responseBlocks = [];
+                            let searchIndex = 0;
+
+                            while (true) {
+                                const blockStart = allText.indexOf('RESPONSE_ID_', searchIndex);
+                                if (blockStart === -1) break;
+
+                                const blockEnd = allText.indexOf(':END_RESPONSE_ID_', blockStart);
+                                if (blockEnd === -1) break;
+
+                                const blockText = allText.substring(blockStart, blockEnd + 17); // Include the end marker
+                                responseBlocks.push({
+                                    text: blockText,
+                                    startIndex: blockStart
+                                });
+
+                                searchIndex = blockEnd + 17;
+                            }
+
+                            // Filter out setup-related responses and return the most recent one
+                            // Only filter if we have multiple blocks - if only one block, use it
+                            let filteredBlocks = responseBlocks;
+                            if (responseBlocks.length > 1) {
+                                filteredBlocks = responseBlocks.filter(block => {
+                                    const content = block.text.toLowerCase();
+                                    const isSetupResponse = content.includes('setup complete') ||
+                                                           content.includes('i understand') ||
+                                                           content.includes('response format') ||
+                                                           content.includes('critical system setup') ||
+                                                           content.includes('plain text format') ||
+                                                           content.includes('example with request id') ||
+                                                           content.includes('test123') ||
+                                                           content.includes('this is an example response') ||
+                                                           content.includes('you can see:') ||
+                                                           content.includes('multiple paragraphs') ||
+                                                           content.includes('lists with bullets') ||
+                                                           content.includes('italic emphasis') ||
+                                                           content.includes('bold text') ||
+                                                           content.includes('code examples');
+
+                                    // Also check if this block contains the specific request ID we're looking for
+                                    const hasTargetRequestId = content.includes(requestId);
+
+                                    // Keep the block if it has our target request ID OR if it's not a setup response
+                                    return hasTargetRequestId || !isSetupResponse;
+                                });
+                            }
+
+                            if (filteredBlocks.length > 0) {
+                                // Return the most recent (last) non-setup response
+                                const latestBlock = filteredBlocks[filteredBlocks.length - 1];
+                                console.log(`Using latest non-setup response from ${filteredBlocks.length} available blocks`);
+
+                                // Extract the actual content from the block
+                                const startContentIndex = latestBlock.text.indexOf('_START:');
+                                const endContentIndex = latestBlock.text.indexOf(':END_RESPONSE_ID_');
+
+                                if (startContentIndex !== -1 && endContentIndex !== -1) {
+                                    const content = latestBlock.text.substring(startContentIndex + 7, endContentIndex).trim();
+                                    if (content && content.length > 10) {
+                                        // Additional check: ensure this response is substantial and not just an acknowledgment
+                                        const isSubstantialResponse = content.length > 50 &&
+                                                                     !content.toLowerCase().includes('acknowledge') &&
+                                                                     !content.toLowerCase().includes('understood') &&
+                                                                     !content.toLowerCase().includes('will follow');
+
+                                        if (isSubstantialResponse) {
+                                            console.log(`Found substantial response with ${content.length} characters`);
+                                            // Save HTML content for debugging if requested
+                                            try {
+                                                // This is a placeholder - the actual HTML saving will be handled
+                                                // by the Python code that calls this JavaScript
+                                                console.log('Response detected, HTML will be saved by Python code');
+                                            } catch (e) {
+                                                console.error('Failed to save HTML:', e);
+                                            }
+                                            return content;
+                                        } else {
+                                            console.log(`Response too short or appears to be acknowledgment (${content.length} chars), waiting for more content...`);
+                                        }
+                                    }
+                                }
+                            }
+
+                            console.log(`Found ${responseBlocks.length} RESPONSE_ID blocks`);
+                            if (responseBlocks.length > 0) {
+                                console.log('Block details:');
+                                responseBlocks.forEach((block, index) => {
+                                    console.log(`Block ${index}: ${block.text.substring(0, 100)}...`);
+                                });
+                            }
+
+                            // Final fallback: return the last RESPONSE_ID block found
+                            if (responseBlocks.length > 0) {
+                                const lastBlock = responseBlocks[responseBlocks.length - 1];
+                                const startContentIndex = lastBlock.text.indexOf('_START:');
+                                const endContentIndex = lastBlock.text.indexOf(':END_RESPONSE_ID_');
+
+                                if (startContentIndex !== -1 && endContentIndex !== -1) {
+                                    const content = lastBlock.text.substring(startContentIndex + 7, endContentIndex).trim();
+                                    if (content && content.length > 10) {
+                                        // Additional validation for final fallback
+                                        const isSetupResponse = content.toLowerCase().includes('setup complete') ||
+                                                               content.toLowerCase().includes('i understand') ||
+                                                               content.length < 100; // Setup responses are typically short
+
+                                        if (!isSetupResponse) {
+                                            console.log('Using final fallback response');
+                                            // Extract the ID from the last block for logging
+                                            const idMatch = lastBlock.text.match(/RESPONSE_ID_([^_]+)_START:/);
+                                            if (idMatch) {
+                                                console.log(`Final fallback response ID: ${idMatch[1]}`);
+                                            }
+                                            return content;
+                                        } else {
+                                            console.log('Final fallback response appears to be setup acknowledgment, skipping...');
+                                        }
+                                    }
+                                }
+                            }
+
+                            // If no response found yet, wait a bit longer and try again
+                            console.log(`No response found for request ID ${requestId}, waiting longer...`);
+
+                            // Schedule HTML save 20 seconds from now to debug what content is available
+                            setTimeout(() => {
+                                try {
+                                    const htmlContent = document.documentElement.outerHTML;
+                                    console.log('=== HTML CONTENT 20 SECONDS AFTER DETECTION FAILURE ===');
+                                    console.log(`HTML length: ${htmlContent.length} characters`);
+
+                                    // Look for RESPONSE_ID patterns in the HTML
+                                    const responseIdPattern = /RESPONSE_ID_[^_]+_START:[\\s\\S]*?:END_RESPONSE_ID_[^_]+/g;
+                                    const foundResponses = htmlContent.match(responseIdPattern);
+
+                                    if (foundResponses) {
+                                        console.log(`Found ${foundResponses.length} RESPONSE_ID blocks in HTML:`);
+                                        foundResponses.forEach((block, index) => {
+                                            console.log(`Block ${index}: ${block.substring(0, 200)}...`);
+                                        });
+                                    } else {
+                                        console.log('No RESPONSE_ID blocks found in HTML');
+                                    }
+
+                                    // Look for the specific request ID
+                                    const specificPattern = new RegExp(`RESPONSE_ID_${requestId}_START:[\\s\\S]*?:END_RESPONSE_ID_${requestId}`, 'g');
+                                    const specificMatches = htmlContent.match(specificPattern);
+
+                                    if (specificMatches) {
+                                        console.log(`Found ${specificMatches.length} blocks for request ID ${requestId}:`);
+                                        specificMatches.forEach((match, index) => {
+                                            console.log(`Specific match ${index}: ${match.substring(0, 300)}...`);
+                                        });
+                                    } else {
+                                        console.log(`No blocks found for request ID ${requestId}`);
+                                    }
+
+                                    // Save HTML content for debugging if requested
+                                    if (typeof saveDebugHtml === 'function') {
+                                        saveDebugHtml(htmlContent, `delayed_debug_${requestId}_${Date.now()}`);
+                                    }
+                                } catch (e) {
+                                    console.error('Failed to save delayed debug HTML:', e);
+                                }
+                            }, 20000);
+
+                            // This will cause the function to return undefined, triggering a retry
                        }
                        
                        // Fallback to selector-based detection
@@ -1137,17 +1602,53 @@ async def detect_progressive_response(page, container_selector, prompt, modified
                    };

                    const checkForResponse = () => {
+                        // First, try direct HTML-based detection for more reliability
+                        if (chatbotName === 'gemini:latest') {
+                            try {
+                                const htmlContent = document.documentElement.outerHTML;
+                                const responsePattern = new RegExp(`RESPONSE_ID_${requestId}_START:([\\s\\S]*?):END_RESPONSE_ID_${requestId}`, 'g');
+                                const matches = htmlContent.match(responsePattern);
+
+                                if (matches && matches.length > 0) {
+                                    for (const match of matches) {
+                                        const contentMatch = match.match(new RegExp(`RESPONSE_ID_${requestId}_START:([\\s\\S]*?):END_RESPONSE_ID_${requestId}`));
+                                        if (contentMatch && contentMatch[1]) {
+                                            const content = contentMatch[1].trim();
+                                            if (content.length > 50) {
+                                                console.log(`Direct HTML detection found response for request ID ${requestId} (${content.length} chars)`);
+                                                return content;
+                                            }
+                                        }
+                                    }
+                                }
+                            } catch (e) {
+                                console.error('Error in direct HTML detection:', e);
+                            }
+                        }
+
                        const currentResponse = extractLatestResponse();
-                        
+
                        // For Gemini with formatted response, return immediately if found
                        if (chatbotName === 'gemini:latest' && currentResponse) {
-                            const startMarker = `RESPONSE_ID_[${requestId}]_START:`;
+                            const startMarker = `RESPONSE_ID_${requestId}_START:`;
                            const allText = container.textContent || '';
                            if (allText.includes(startMarker) && currentResponse.length > 10) {
-                                return currentResponse;
+                                // Additional check: ensure the response area is not still loading
+                                const spinnerElements = document.querySelectorAll('.avatar_spinner_animation, [class*="spinner"], [class*="loading"]');
+                                const hasVisibleSpinner = Array.from(spinnerElements).some(el => {
+                                    const style = window.getComputedStyle(el);
+                                    return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0';
+                                });
+
+                                if (!hasVisibleSpinner) {
+                                    console.log('No visible spinners, returning response');
+                                    return currentResponse;
+                                } else {
+                                    console.log('Spinner still visible, waiting for completion');
+                                }
                            }
                        }
-                        
+
                        // Enhanced stability checking with better timing
                        if (currentResponse && currentResponse.length > 0) {
                            if (currentResponse !== lastContent) {
@@ -1158,28 +1659,51 @@ async def detect_progressive_response(page, container_selector, prompt, modified
                                    lastContent = currentResponse;
                                    stableCount = 0;
                                    lastResponseTimestamp = Date.now();
-                                    
+
                                    // Quick return for substantial responses
                                    if (currentResponse.length > 150 &&
                                        !currentResponse.includes(requestId) &&
                                        currentResponse !== prompt &&
                                        currentResponse !== modifiedPrompt) {
-                                        return currentResponse;
+                                        // Additional check to avoid returning setup responses early
+                                        const isSetupResponse = currentResponse.toLowerCase().includes('setup complete') ||
+                                                               currentResponse.toLowerCase().includes('i understand') ||
+                                                               currentResponse.toLowerCase().includes('response format') ||
+                                                               currentResponse.toLowerCase().includes('critical system setup') ||
+                                                               currentResponse.length < 200; // Setup responses are typically shorter
+
+                                        if (!isSetupResponse) {
+                                            console.log(`Early return: Found substantial response (${currentResponse.length} chars)`);
+                                            return currentResponse;
+                                        } else {
+                                            console.log(`Early return blocked: Response appears to be setup acknowledgment (${currentResponse.length} chars)`);
+                                        }
                                    }
                                }
                            } else if (currentResponse === lastContent && currentResponse.length > 15) {
                                stableCount++;
                                const timeSinceLastResponse = Date.now() - lastResponseTimestamp;
-                                
-                                // More responsive stability check
+
+                                // More responsive stability check with additional setup filtering
                                if ((stableCount >= 2 && timeSinceLastResponse > 2000) ||
                                    (stableCount >= 1 && currentResponse.length > 100 && timeSinceLastResponse > 1500)) {
-                                    previousResponseText = currentResponse;
-                                    return progressiveContent || currentResponse;
+                                    // Final check to ensure we're not returning setup responses
+                                    const isSetupResponse = currentResponse.toLowerCase().includes('setup complete') ||
+                                                           currentResponse.toLowerCase().includes('i understand') ||
+                                                           currentResponse.toLowerCase().includes('response format') ||
+                                                           currentResponse.toLowerCase().includes('critical system setup');
+
+                                    if (!isSetupResponse) {
+                                        console.log(`Stability return: Found stable response (${currentResponse.length} chars)`);
+                                        previousResponseText = currentResponse;
+                                        return progressiveContent || currentResponse;
+                                    } else {
+                                        console.log(`Stability return blocked: Stable content appears to be setup acknowledgment`);
+                                    }
                                }
                            }
                        }
-                        
+
                        return null;
                    };

@@ -1215,17 +1739,79 @@ async def detect_progressive_response(page, container_selector, prompt, modified
                        }
                    }, 1000);

-                    // Multiple timeout stages for better reliability
+                    // Multiple timeout stages for better reliability - increased for Gemini
                    setTimeout(() => {
-                        if (!resolved && progressiveContent && progressiveContent.length > 20) {
-                            clearInterval(interval);
-                            observer.disconnect();
-                            resolveOnce(progressiveContent);
+                        if (!resolved) {
+                            console.log('=== 35s TIMEOUT CHECK ===');
+                            const currentHtml = document.documentElement.outerHTML;
+                            const responsePattern = new RegExp(`RESPONSE_ID_${requestId}_START:[\\s\\S]*?:END_RESPONSE_ID_${requestId}`, 'g');
+                            const matches = currentHtml.match(responsePattern);
+
+                            if (matches && matches.length > 0) {
+                                console.log(`Found ${matches.length} matches for request ID ${requestId} at 35s timeout`);
+                                const content = matches[0].replace(new RegExp(`RESPONSE_ID_${requestId}_START:`), '').replace(new RegExp(`:END_RESPONSE_ID_${requestId}`), '').trim();
+                                if (content.length > 50) {
+                                    console.log('Returning content found at 35s timeout');
+                                    clearInterval(interval);
+                                    observer.disconnect();
+                                    resolveOnce(content);
+                                    return;
+                                }
+                            }
+
+                            // Check if there's any substantial content in the latest response area
+                            const responseElements = document.querySelectorAll('[id*="message-content"]');
+                            for (const element of responseElements) {
+                                const text = element.textContent || '';
+                                if (text.length > 100 && !text.includes('SETUP_COMPLETE') && !text.includes('setup complete')) {
+                                    console.log('Found substantial content in response area at 35s timeout');
+                                    clearInterval(interval);
+                                    observer.disconnect();
+                                    resolveOnce(text);
+                                    return;
+                                }
+                            }
+
+                            if (progressiveContent && progressiveContent.length > 20) {
+                                clearInterval(interval);
+                                observer.disconnect();
+                                resolveOnce(progressiveContent);
+                            }
                        }
-                    }, 25000); // Early timeout for fast responses
-                    
+                    }, 35000); // Early timeout for fast responses - increased from 25s to 35s
+
                    setTimeout(() => {
                        if (!resolved) {
+                            console.log('=== 65s MAIN TIMEOUT CHECK ===');
+                            const currentHtml = document.documentElement.outerHTML;
+                            const responsePattern = new RegExp(`RESPONSE_ID_${requestId}_START:[\\s\\S]*?:END_RESPONSE_ID_${requestId}`, 'g');
+                            const matches = currentHtml.match(responsePattern);
+
+                            if (matches && matches.length > 0) {
+                                console.log(`Found ${matches.length} matches for request ID ${requestId} at 65s timeout`);
+                                const content = matches[0].replace(new RegExp(`RESPONSE_ID_${requestId}_START:`), '').replace(new RegExp(`:END_RESPONSE_ID_${requestId}`), '').trim();
+                                if (content.length > 50) {
+                                    console.log('Returning content found at 65s timeout');
+                                    clearInterval(interval);
+                                    observer.disconnect();
+                                    resolveOnce(content);
+                                    return;
+                                }
+                            }
+
+                            // Check if there's any substantial content in the latest response area
+                            const responseElements = document.querySelectorAll('[id*="message-content"]');
+                            for (const element of responseElements) {
+                                const text = element.textContent || '';
+                                if (text.length > 100 && !text.includes('SETUP_COMPLETE') && !text.includes('setup complete')) {
+                                    console.log('Found substantial content in response area at 65s timeout');
+                                    clearInterval(interval);
+                                    observer.disconnect();
+                                    resolveOnce(text);
+                                    return;
+                                }
+                            }
+
                            clearInterval(interval);
                            observer.disconnect();
                            if (progressiveContent && progressiveContent.length > 10) {
@@ -1234,7 +1820,7 @@ async def detect_progressive_response(page, container_selector, prompt, modified
                                resolveOnce("Error: Progressive detection timeout - no substantial content found");
                            }
                        }
-                    }, 45000); // Main timeout
+                    }, 65000); // Main timeout - increased from 45s to 65s for Gemini
                });
            }""",
            [container_selector, prompt, modified_prompt, request_id, chatbot_name]
@@ -1387,9 +1973,48 @@ async def main(args):
    parser.add_argument('--ip', default='localhost', help='Proxy server IP (default: localhost)')
    parser.add_argument('--port', type=int, default=11434, help='Proxy server port (default: 11434)')
    parser.add_argument('--connect', help='Connect to existing browser via CDP (e.g., ws://localhost:9222)')
+    parser.add_argument('--debug', action='store_true', help='Enable debug logging and save prompts to log file')
    args = parser.parse_args()

-    logging.basicConfig(level=logging.INFO)
+    # Set up logging
+    log_level = logging.DEBUG if args.debug else logging.INFO
+    logging.basicConfig(level=log_level)
+
+    # Set up debug logging if requested
+    if args.debug:
+        import os
+        import time
+
+        # Create directory with Unix epoch timestamp
+        epoch_time = int(time.time())
+        debug_dir = f"debug_{epoch_time}"
+        os.makedirs(debug_dir, exist_ok=True)
+
+        # Set up debug log file
+        debug_log_path = os.path.join(debug_dir, 'debug.log')
+        debug_handler = logging.FileHandler(debug_log_path)
+        debug_handler.setLevel(logging.DEBUG)
+        debug_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+        debug_handler.setFormatter(debug_formatter)
+        logging.getLogger().addHandler(debug_handler)
+
+        # Create prompts log file
+        prompts_log_path = os.path.join(debug_dir, 'prompts.log')
+
+        # Store debug info in global variables for access by other functions
+        global DEBUG_ENABLED, DEBUG_DIR, PROMPTS_LOG_PATH
+        DEBUG_ENABLED = True
+        DEBUG_DIR = debug_dir
+        PROMPTS_LOG_PATH = prompts_log_path
+
+        logging.info(f"Debug logging enabled - files will be saved to {debug_dir}/")
+        logging.info(f"Debug log: {debug_log_path}")
+        logging.info(f"Prompts log: {prompts_log_path}")
+    else:
+        DEBUG_ENABLED = False
+        DEBUG_DIR = None
+        PROMPTS_LOG_PATH = None
+
    logging.info(f"CHATBOT_CONFIG: {CHATBOT_CONFIG}")
    await asyncio.gather(
        start_proxy_server(args.ip, args.port, args),
@@ -1401,6 +2026,7 @@ if __name__ == "__main__":
    parser.add_argument('--ip', default='localhost', help='Proxy server IP (default: localhost)')
    parser.add_argument('--port', type=int, default=11434, help='Proxy server port (default: 11434)')
    parser.add_argument('--connect', help='Connect to existing browser via CDP (e.g., ws://localhost:9222)')
+    parser.add_argument('--debug', action='store_true', help='Enable debug logging and save prompts to log file')
    args = parser.parse_args()

    if args.port != 11434: