Commit 41cc9c97 authored by nextime's avatar nextime

Implement improved JSON-based response detection system

- Replace spy word detection with robust JSON-based response extraction
- Add UUID-based unique request identification for better reliability
- Implement comprehensive JSON parsing with error recovery mechanisms
- Add support for partial JSON response extraction
- Enhance error handling for malformed JSON responses
- Improve detection of responses containing code blocks and special formatting
- Better handling of dynamic content and progressive response loading
- Update CHANGELOG.md to document breaking changes and improvements

This addresses the core issue with complex HTML parsing by requesting
structured JSON responses from chatbots, making response extraction
much more reliable and accurate.
parent 862ec611
......@@ -8,9 +8,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
- Initial project documentation
- GPLv3 license implementation
- Comprehensive README with usage examples
- Enhanced JSON-based response detection system with unique request IDs
- UUID-based request identification for improved reliability
- Robust JSON parsing with error recovery mechanisms
- Support for partial JSON response extraction
### Changed
- **BREAKING**: Replaced spy word detection system with JSON-based response extraction
- Modified prompt injection to request structured JSON responses
- Improved response detection reliability and accuracy
- Enhanced error handling for malformed JSON responses
### Fixed
- Resolved issues with complex HTML response parsing
- Improved detection of responses containing code blocks and special formatting
- Better handling of dynamic content and progressive response loading
## [0.1.0] - 2025-08-23
......@@ -45,7 +57,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `POST /v1/chat/completions` - OpenAI-compatible chat completion
- **Response Extraction Strategies**:
- Primary: Spy word detection with flexible pattern matching
- Primary: JSON-based response detection with unique UUID identifiers
- Secondary: Progressive content monitoring with stability detection
- Fallback: Heuristic latest response detection
......@@ -65,6 +77,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Built with Python 3.7+ compatibility
- Uses `aiohttp` for async HTTP server
- Uses `playwright` for browser automation
- Uses `uuid` for unique request identification
- Implements comprehensive error handling
- Supports concurrent requests with proper browser page management
- Includes detailed logging for debugging and monitoring
......
......@@ -22,6 +22,8 @@ import argparse
import json
import asyncio
import datetime # Import datetime module
import uuid
import re
from playwright.async_api import async_playwright, BrowserContext
from aiohttp import web
import logging
......@@ -364,13 +366,13 @@ async def forward_to_chatbot(chatbot_name, config, prompt):
page = pages[chatbot_name]
# Generate unique ID for this request
request_id = int(time.time() * 1000) # Millisecond timestamp
spy_word_start = f"{config.get('spy_word_base', 'SPYWORD_DEFAULT')}_{request_id}"
spy_word_end = f"{spy_word_start}_END"
# Generate unique ID for this request using UUID for better uniqueness
request_id = str(uuid.uuid4()).replace('-', '')[:16] # 16-character unique ID
# Modify prompt to include unique spy word instructions
modified_prompt = f"{prompt}\n\nIMPORTANT: Please start your response with exactly '{spy_word_start}: ' and end with exactly ' {spy_word_end}' (including the colon and spaces)."
# Create JSON-based prompt with unique ID
json_instruction = f'Format all responses as a raw JSON object with a "response" key containing the answer as a string, exactly as it would appear unfiltered from an API, including all content (code, explanations, instructions) with escaped newlines and special characters as well as command and special formatting from the API, use tools and agents as declared in the question when needed, and a second "id" key containing a unique id "{request_id}"'
modified_prompt = f"{json_instruction}\n\n{prompt}"
logging.info(f"Request ID: {request_id}, Modified prompt: {modified_prompt}")
try:
......@@ -389,25 +391,24 @@ async def forward_to_chatbot(chatbot_name, config, prompt):
# Wait for user's prompt to appear in the chat area
try:
# Use a more flexible approach to detect user prompt
await asyncio.sleep(2) # Give time for the prompt to appear
logging.info(f"User prompt submitted for {chatbot_name}")
except Exception as e:
logging.warning(f"Failed to detect user prompt for {chatbot_name}: {str(e)}")
# Enhanced response detection with multiple strategies
# New JSON-based response detection
response_text = None
logging.info(f"Searching for spy pattern: {spy_word_start} ... {spy_word_end}")
logging.info(f"Searching for JSON response with ID: {request_id}")
# Strategy 1: Advanced spy word detection with flexible matching
response_text = await detect_response_with_spy_words(page, container_selector, spy_word_start, spy_word_end, prompt, modified_prompt, request_id, chatbot_name)
# Primary strategy: JSON response detection with unique ID
response_text = await detect_json_response_with_id(page, container_selector, request_id, prompt, modified_prompt, chatbot_name)
# Strategy 2: If spy words fail, try progressive content detection
# Fallback strategy: If JSON detection fails, try progressive content detection
if not response_text or "Error:" in response_text:
logging.info(f"Spy word detection failed, trying progressive content detection for {chatbot_name} (Request ID: {request_id})")
logging.info(f"JSON detection failed, trying progressive content detection for {chatbot_name} (Request ID: {request_id})")
response_text = await detect_progressive_response(page, container_selector, prompt, modified_prompt, request_id, chatbot_name)
# Strategy 3: Final fallback - latest response detection
# Final fallback: Latest response detection
if not response_text or "Error:" in response_text:
logging.info(f"Progressive detection failed, using latest response fallback for {chatbot_name} (Request ID: {request_id})")
response_text = await detect_latest_response(page, container_selector, prompt, modified_prompt, request_id, chatbot_name)
......@@ -416,20 +417,18 @@ async def forward_to_chatbot(chatbot_name, config, prompt):
return (response_text or "Error: No response detected").strip()
async def detect_response_with_spy_words(page, container_selector, spy_word_start, spy_word_end, prompt, modified_prompt, request_id, chatbot_name):
"""Enhanced spy word detection with flexible pattern matching."""
async def detect_json_response_with_id(page, container_selector, request_id, prompt, modified_prompt, chatbot_name):
"""Detect JSON responses with unique ID for reliable extraction."""
try:
return await page.evaluate(
"""([containerSelector, spyWordStart, spyWordEnd, prompt, modifiedPrompt, requestId]) => {
"""([containerSelector, requestId, prompt, modifiedPrompt]) => {
const container = document.querySelector(containerSelector);
if (!container) return "Error: Container not found";
return new Promise((resolve) => {
let resolved = false;
let bestMatch = null;
let partialContent = '';
let hasStartWord = false;
let hasEndWord = false;
let partialJsonContent = '';
const resolveOnce = (result) => {
if (!resolved) {
......@@ -438,78 +437,66 @@ async def detect_response_with_spy_words(page, container_selector, spy_word_star
}
};
// Flexible text checking - handles content split across elements
const checkAndExtractResponse = (fullText) => {
if (!fullText || fullText === prompt || fullText === modifiedPrompt) return null;
// Look for spy word patterns with various spacing/formatting
const startPatterns = [
`${spyWordStart}: `,
`${spyWordStart}:`,
`${spyWordStart} :`,
spyWordStart
// Function to extract and parse JSON response
const extractJsonResponse = (text) => {
if (!text || text === prompt || text === modifiedPrompt) return null;
// Look for JSON patterns with our unique ID
const jsonPatterns = [
// Standard JSON object patterns
/\{[^{}]*"id"\s*:\s*"[^"]*' + requestId + '[^"]*"[^{}]*"response"\s*:\s*"([^"\\\\]|\\\\.)*"[^{}]*\}/g,
/\{[^{}]*"response"\s*:\s*"([^"\\\\]|\\\\.)*"[^{}]*"id"\s*:\s*"[^"]*' + requestId + '[^"]*"[^{}]*\}/g,
// More flexible JSON patterns
/\{[\\s\\S]*?"id"[\\s\\S]*?"' + requestId + '"[\\s\\S]*?"response"[\\s\\S]*?\}/g,
/\{[\\s\\S]*?"response"[\\s\\S]*?"id"[\\s\\S]*?"' + requestId + '"[\\s\\S]*?\}/g
];
const endPatterns = [
` ${spyWordEnd}`,
`${spyWordEnd}`,
` ${spyWordEnd} `
];
let startIndex = -1;
let endIndex = -1;
let usedStartPattern = '';
let usedEndPattern = '';
// Find start pattern
for (const pattern of startPatterns) {
const idx = fullText.indexOf(pattern);
if (idx !== -1) {
startIndex = idx + pattern.length;
usedStartPattern = pattern;
break;
for (const pattern of jsonPatterns) {
const matches = text.match(pattern);
if (matches) {
for (const match of matches) {
try {
const jsonObj = JSON.parse(match);
if (jsonObj.id && jsonObj.id.includes(requestId) && jsonObj.response) {
console.log(`Found JSON response with ID: ${jsonObj.id}`);
return jsonObj.response;
}
} catch (e) {
// Try to fix common JSON issues
try {
const fixedJson = match
.replace(/\\n/g, '\\\\n')
.replace(/\\t/g, '\\\\t')
.replace(/\\r/g, '\\\\r')
.replace(/"/g, '\\\\"')
.replace(/\\\\"/g, '"')
.replace(/^"/, '')
.replace(/"$/, '');
const jsonObj = JSON.parse(fixedJson);
if (jsonObj.id && jsonObj.id.includes(requestId) && jsonObj.response) {
console.log(`Found fixed JSON response with ID: ${jsonObj.id}`);
return jsonObj.response;
}
// Find end pattern
for (const pattern of endPatterns) {
const idx = fullText.lastIndexOf(pattern);
if (idx !== -1 && idx > startIndex) {
endIndex = idx;
usedEndPattern = pattern;
break;
}
} catch (e2) {
console.log(`Failed to parse JSON: ${match.substring(0, 100)}...`);
}
if (startIndex !== -1 && endIndex !== -1 && endIndex > startIndex) {
const extracted = fullText.substring(startIndex, endIndex).trim();
if (extracted.length > 10) {
console.log(`Found complete spy word response: ${extracted.substring(0, 100)}...`);
return extracted;
}
}
// Check for partial matches to track progress
if (fullText.includes(spyWordStart)) {
hasStartWord = true;
// Extract everything after the start word
for (const pattern of startPatterns) {
const idx = fullText.indexOf(pattern);
if (idx !== -1) {
partialContent = fullText.substring(idx + pattern.length);
break;
}
}
}
if (fullText.includes(spyWordEnd)) {
hasEndWord = true;
// Look for partial JSON that might be building up
const partialJsonRegex = new RegExp('\\{[\\s\\S]*?"id"[\\s\\S]*?"' + requestId + '"[\\s\\S]*', 'g');
const partialMatch = text.match(partialJsonRegex);
if (partialMatch && partialMatch[0].length > partialJsonContent.length) {
partialJsonContent = partialMatch[0];
console.log(`Found partial JSON: ${partialJsonContent.substring(0, 100)}...`);
}
return null;
};
// Combine text from multiple elements intelligently
// Get combined text from all relevant elements
const getCombinedText = () => {
const chatElements = container.querySelectorAll([
'div[data-testid*="cellInnerDiv"]',
......@@ -519,33 +506,32 @@ async def detect_response_with_spy_words(page, container_selector, spy_word_star
'main div[role="article"]',
'div[class*="css-"]',
'span[class*="css-"]',
'pre', 'code', // Include code blocks for JSON
'p', 'div', 'span'
].join(', '));
let combinedTexts = [];
let currentResponseText = '';
let allTexts = [];
for (const element of chatElements) {
const text = element.textContent ? element.textContent.trim() : '';
if (text && text !== prompt && text !== modifiedPrompt && text.length > 5) {
// Check if this might be part of bot response
if (text.includes(spyWordStart) || text.includes(spyWordEnd) ||
(hasStartWord && !text.includes('Grok something'))) {
combinedTexts.push(text);
if (text && text !== prompt && text !== modifiedPrompt && text.length > 10) {
// Look for elements that might contain JSON or our request ID
if (text.includes(requestId) || text.includes('{') || text.includes('"response"')) {
allTexts.push(text);
}
}
}
// Try different combinations
// Try different text combinations
const combinations = [
combinedTexts.join(' '),
combinedTexts.join('\\n'),
combinedTexts.join(''),
...combinedTexts // Individual texts
allTexts.join(' '),
allTexts.join('\\n'),
allTexts.join(''),
...allTexts // Individual texts
];
for (const combo of combinations) {
const result = checkAndExtractResponse(combo);
const result = extractJsonResponse(combo);
if (result) return result;
}
......@@ -561,8 +547,8 @@ async def detect_response_with_spy_words(page, container_selector, spy_word_star
}
// Update best partial match
if (hasStartWord && partialContent.length > (bestMatch?.length || 0)) {
bestMatch = partialContent;
if (partialJsonContent.length > (bestMatch?.length || 0)) {
bestMatch = partialJsonContent;
}
});
......@@ -572,43 +558,64 @@ async def detect_response_with_spy_words(page, container_selector, spy_word_star
characterData: true
});
// Initial check
// Initial check after a short delay
setTimeout(() => {
const result = getCombinedText();
if (result) {
observer.disconnect();
resolveOnce(result);
}
}, 1000);
}, 2000);
// Progressive timeout with partial results
// Progressive timeout - check for partial results
setTimeout(() => {
const result = getCombinedText();
if (result) {
observer.disconnect();
resolveOnce(result);
} else if (bestMatch && bestMatch.length > 50) {
} else if (partialJsonContent.length > 50) {
// Try to extract response from partial JSON
try {
const responseMatch = partialJsonContent.match(/"response"\\s*:\\s*"([^"\\\\]|\\\\.)*"/);
if (responseMatch) {
const responseValue = responseMatch[0].match(/"response"\\s*:\\s*"(.*)"/)[1];
observer.disconnect();
resolveOnce(bestMatch);
resolveOnce(responseValue);
return;
}
} catch (e) {
console.log(`Failed to extract from partial JSON: ${e}`);
}
}, 30000); // 30 seconds
}
}, 45000); // 45 seconds
// Final timeout
setTimeout(() => {
observer.disconnect();
if (bestMatch && bestMatch.length > 20) {
resolveOnce(bestMatch);
// Last attempt to extract something useful
try {
const responseMatch = bestMatch.match(/"response"\\s*:\\s*"([^"\\\\]|\\\\.)*"/);
if (responseMatch) {
const responseValue = responseMatch[0].match(/"response"\\s*:\\s*"(.*)"/)[1];
resolveOnce(responseValue);
return;
}
} catch (e) {
console.log(`Final extraction failed: ${e}`);
}
resolveOnce("Error: JSON parsing timeout - partial content found but unparseable");
} else {
resolveOnce("Error: Spy word timeout");
resolveOnce("Error: JSON response timeout - no matching ID found");
}
}, 90000); // 90 seconds for complex responses
}, 120000); // 2 minutes for complex responses
});
}""",
[container_selector, spy_word_start, spy_word_end, prompt, modified_prompt, request_id]
[container_selector, request_id, prompt, modified_prompt]
)
except Exception as e:
logging.error(f"Error in spy word detection: {e}")
return "Error: Spy word detection failed"
logging.error(f"Error in JSON response detection: {e}")
return "Error: JSON response detection failed"
async def detect_progressive_response(page, container_selector, prompt, modified_prompt, request_id, chatbot_name):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment