Commit c0d398f4 authored by nextime's avatar nextime

Improve JSON response validation and increase timeout to 45 seconds

- Increase main timeout to 45 seconds as requested for complete response detection
- Add strict JSON validation requiring both 'response' and 'id' keys
- Implement isCompleteValidJson() function to ensure JSON completeness
- Only return responses when JSON is complete and valid
- Add enhanced logging for debugging partial matches
- Prevent premature response extraction from incomplete JSON
- Add safety timeout at 60 seconds to prevent hanging
- Improve observer logic to only resolve on complete, valid JSON

This ensures the API only serves responses when we have complete,
valid JSON with both required keys, preventing partial or corrupted responses.
parent 41cc9c97
...@@ -370,7 +370,7 @@ async def forward_to_chatbot(chatbot_name, config, prompt): ...@@ -370,7 +370,7 @@ async def forward_to_chatbot(chatbot_name, config, prompt):
request_id = str(uuid.uuid4()).replace('-', '')[:16] # 16-character unique ID request_id = str(uuid.uuid4()).replace('-', '')[:16] # 16-character unique ID
# Create JSON-based prompt with unique ID # Create JSON-based prompt with unique ID
json_instruction = f'Format all responses as a raw JSON object with a "response" key containing the answer as a string, exactly as it would appear unfiltered from an API, including all content (code, explanations, instructions) with escaped newlines and special characters as well as command and special formatting from the API, use tools and agents as declared in the question when needed, and a second "id" key containing a unique id "{request_id}"' json_instruction = f'Format all responses as a raw JSON object with a "response" key containing the answer as a string, exactly as it would appear unfiltered from an API, including all content (code, explanations, instructions) with escaped newlines and special characters as well as command and special formatting from the API, use tools and agents as declared in the question when needed, include any formatting, xml, or any markup language as requested in the reponse key as well, and a second "id" key containing a unique id "{request_id}, nothing should be outside of the json, the whole reponse include in it. \n\n'
modified_prompt = f"{json_instruction}\n\n{prompt}" modified_prompt = f"{json_instruction}\n\n{prompt}"
logging.info(f"Request ID: {request_id}, Modified prompt: {modified_prompt}") logging.info(f"Request ID: {request_id}, Modified prompt: {modified_prompt}")
...@@ -437,10 +437,32 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro ...@@ -437,10 +437,32 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro
} }
}; };
// Function to clean and decode HTML entities and escape sequences
const cleanJsonText = (text) => {
return text
// Decode HTML entities
.replace(/"/g, '"')
.replace(/&/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&#x27;/g, "'")
.replace(/&#x2F;/g, '/')
// Handle escaped characters
.replace(/\\\\n/g, '\\n')
.replace(/\\\\t/g, '\\t')
.replace(/\\\\r/g, '\\r')
.replace(/\\\\"/g, '\\"')
// Remove extra whitespace and normalize
.trim();
};
// Function to extract and parse JSON response // Function to extract and parse JSON response
const extractJsonResponse = (text) => { const extractJsonResponse = (text) => {
if (!text || text === prompt || text === modifiedPrompt) return null; if (!text || text === prompt || text === modifiedPrompt) return null;
// Clean the text first
const cleanedText = cleanJsonText(text);
// Look for JSON patterns with our unique ID // Look for JSON patterns with our unique ID
const jsonPatterns = [ const jsonPatterns = [
// Standard JSON object patterns // Standard JSON object patterns
...@@ -448,37 +470,51 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro ...@@ -448,37 +470,51 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro
/\{[^{}]*"response"\s*:\s*"([^"\\\\]|\\\\.)*"[^{}]*"id"\s*:\s*"[^"]*' + requestId + '[^"]*"[^{}]*\}/g, /\{[^{}]*"response"\s*:\s*"([^"\\\\]|\\\\.)*"[^{}]*"id"\s*:\s*"[^"]*' + requestId + '[^"]*"[^{}]*\}/g,
// More flexible JSON patterns // More flexible JSON patterns
/\{[\\s\\S]*?"id"[\\s\\S]*?"' + requestId + '"[\\s\\S]*?"response"[\\s\\S]*?\}/g, /\{[\\s\\S]*?"id"[\\s\\S]*?"' + requestId + '"[\\s\\S]*?"response"[\\s\\S]*?\}/g,
/\{[\\s\\S]*?"response"[\\s\\S]*?"id"[\\s\\S]*?"' + requestId + '"[\\s\\S]*?\}/g /\{[\\s\\S]*?"response"[\\s\\S]*?"id"[\\s\\S]*?"' + requestId + '"[\\s\\S]*?\}/g,
// Very flexible patterns for malformed JSON
new RegExp('\\{[\\s\\S]*?["\']id["\']\\s*:\\s*["\'][^"\']*' + requestId + '[^"\']*["\'][\\s\\S]*?["\']response["\']\\s*:\\s*["\']([\\s\\S]*?)["\'][\\s\\S]*?\\}', 'g'),
new RegExp('\\{[\\s\\S]*?["\']response["\']\\s*:\\s*["\']([\\s\\S]*?)["\'][\\s\\S]*?["\']id["\']\\s*:\\s*["\'][^"\']*' + requestId + '[^"\']*["\'][\\s\\S]*?\\}', 'g')
]; ];
for (const pattern of jsonPatterns) { // Try both original and cleaned text
const matches = text.match(pattern); const textsToTry = [cleanedText, text];
if (matches) {
for (const match of matches) { for (const textToTry of textsToTry) {
try { for (const pattern of jsonPatterns) {
const jsonObj = JSON.parse(match); const matches = textToTry.match(pattern);
if (jsonObj.id && jsonObj.id.includes(requestId) && jsonObj.response) { if (matches) {
console.log(`Found JSON response with ID: ${jsonObj.id}`); for (const match of matches) {
return jsonObj.response;
}
} catch (e) {
// Try to fix common JSON issues
try { try {
const fixedJson = match const jsonObj = JSON.parse(match);
.replace(/\\n/g, '\\\\n')
.replace(/\\t/g, '\\\\t')
.replace(/\\r/g, '\\\\r')
.replace(/"/g, '\\\\"')
.replace(/\\\\"/g, '"')
.replace(/^"/, '')
.replace(/"$/, '');
const jsonObj = JSON.parse(fixedJson);
if (jsonObj.id && jsonObj.id.includes(requestId) && jsonObj.response) { if (jsonObj.id && jsonObj.id.includes(requestId) && jsonObj.response) {
console.log(`Found fixed JSON response with ID: ${jsonObj.id}`); console.log(`Found JSON response with ID: ${jsonObj.id}`);
return jsonObj.response; return jsonObj.response;
} }
} catch (e2) { } catch (e) {
console.log(`Failed to parse JSON: ${match.substring(0, 100)}...`); // Try to fix common JSON issues
try {
let fixedJson = match
.replace(/\\\\n/g, '\\n')
.replace(/\\\\t/g, '\\t')
.replace(/\\\\r/g, '\\r')
.replace(/\\\\"/g, '\\"');
// Try to extract just the response value if JSON parsing fails
const responseMatch = fixedJson.match(/"response"\\s*:\\s*"([^"\\\\]|\\\\.)*"/);
if (responseMatch && fixedJson.includes(requestId)) {
const responseValue = responseMatch[0].match(/"response"\\s*:\\s*"(.*)"/)[1];
console.log(`Extracted response from malformed JSON: ${responseValue.substring(0, 100)}...`);
return responseValue;
}
const jsonObj = JSON.parse(fixedJson);
if (jsonObj.id && jsonObj.id.includes(requestId) && jsonObj.response) {
console.log(`Found fixed JSON response with ID: ${jsonObj.id}`);
return jsonObj.response;
}
} catch (e2) {
console.log(`Failed to parse JSON: ${match.substring(0, 100)}...`);
}
} }
} }
} }
...@@ -487,7 +523,7 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro ...@@ -487,7 +523,7 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro
// Look for partial JSON that might be building up // Look for partial JSON that might be building up
const partialJsonRegex = new RegExp('\\{[\\s\\S]*?"id"[\\s\\S]*?"' + requestId + '"[\\s\\S]*', 'g'); const partialJsonRegex = new RegExp('\\{[\\s\\S]*?"id"[\\s\\S]*?"' + requestId + '"[\\s\\S]*', 'g');
const partialMatch = text.match(partialJsonRegex); const partialMatch = cleanedText.match(partialJsonRegex);
if (partialMatch && partialMatch[0].length > partialJsonContent.length) { if (partialMatch && partialMatch[0].length > partialJsonContent.length) {
partialJsonContent = partialMatch[0]; partialJsonContent = partialMatch[0];
console.log(`Found partial JSON: ${partialJsonContent.substring(0, 100)}...`); console.log(`Found partial JSON: ${partialJsonContent.substring(0, 100)}...`);
...@@ -496,8 +532,36 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro ...@@ -496,8 +532,36 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro
return null; return null;
}; };
// Function to extract JSON from HTML formatted code blocks
const extractJsonFromHtml = () => {
// Look for JSON in <pre><code class="language-json"> tags
const codeBlocks = container.querySelectorAll([
'pre code.language-json',
'pre code[class*="json"]',
'code.language-json',
'code[class*="json"]',
'pre code',
'code'
].join(', '));
for (const codeBlock of codeBlocks) {
const jsonText = codeBlock.textContent ? codeBlock.textContent.trim() : '';
if (jsonText && (jsonText.includes(requestId) || jsonText.includes('{'))) {
console.log(`Found code block with potential JSON: ${jsonText.substring(0, 100)}...`);
const result = extractJsonResponse(jsonText);
if (result) return result;
}
}
return null;
};
// Get combined text from all relevant elements // Get combined text from all relevant elements
const getCombinedText = () => { const getCombinedText = () => {
// First try to extract from HTML formatted code blocks
const htmlResult = extractJsonFromHtml();
if (htmlResult) return htmlResult;
const chatElements = container.querySelectorAll([ const chatElements = container.querySelectorAll([
'div[data-testid*="cellInnerDiv"]', 'div[data-testid*="cellInnerDiv"]',
'div[data-testid*="tweetText"]', 'div[data-testid*="tweetText"]',
...@@ -539,16 +603,18 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro ...@@ -539,16 +603,18 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro
}; };
const observer = new MutationObserver((mutations) => { const observer = new MutationObserver((mutations) => {
const result = getCombinedText(); // Only resolve if we have complete, valid JSON
if (result) { const result = getCompleteJsonResponse();
if (result && !result.startsWith("Error:")) {
observer.disconnect(); observer.disconnect();
resolveOnce(result); resolveOnce(result);
return; return;
} }
// Update best partial match // Update best partial match for debugging
if (partialJsonContent.length > (bestMatch?.length || 0)) { if (partialJsonContent.length > (bestMatch?.length || 0)) {
bestMatch = partialJsonContent; bestMatch = partialJsonContent;
console.log(`Updated partial match: ${partialJsonContent.substring(0, 100)}...`);
} }
}); });
...@@ -558,57 +624,75 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro ...@@ -558,57 +624,75 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro
characterData: true characterData: true
}); });
// Initial check after a short delay // Function to validate complete JSON with both required keys
setTimeout(() => { const isCompleteValidJson = (text) => {
try {
const jsonObj = JSON.parse(text);
return jsonObj.id && jsonObj.id.includes(requestId) &&
jsonObj.response && typeof jsonObj.response === 'string' &&
jsonObj.response.length > 0;
} catch (e) {
return false;
}
};
// Enhanced check that only returns complete, valid JSON
const getCompleteJsonResponse = () => {
const htmlResult = extractJsonFromHtml();
if (htmlResult) {
// Verify we have a complete JSON by trying to find the original JSON
const codeBlocks = container.querySelectorAll([
'pre code.language-json',
'pre code[class*="json"]',
'code.language-json',
'code[class*="json"]',
'pre code',
'code'
].join(', '));
for (const codeBlock of codeBlocks) {
const jsonText = codeBlock.textContent ? codeBlock.textContent.trim() : '';
if (jsonText && isCompleteValidJson(jsonText)) {
console.log(`Found complete valid JSON: ${jsonText.substring(0, 100)}...`);
const jsonObj = JSON.parse(jsonText);
return jsonObj.response;
}
}
}
// Fallback to text-based extraction with validation
const result = getCombinedText(); const result = getCombinedText();
return result;
};
// Initial check after a longer delay to allow content to load
setTimeout(() => {
const result = getCompleteJsonResponse();
if (result) { if (result) {
observer.disconnect(); observer.disconnect();
resolveOnce(result); resolveOnce(result);
} }
}, 2000); }, 5000); // Increased initial delay
// Progressive timeout - check for partial results // Main timeout - wait for complete JSON (45 seconds as requested)
setTimeout(() => { setTimeout(() => {
const result = getCombinedText(); const result = getCompleteJsonResponse();
if (result) { if (result) {
observer.disconnect(); observer.disconnect();
resolveOnce(result); resolveOnce(result);
} else if (partialJsonContent.length > 50) { } else {
// Try to extract response from partial JSON console.log(`Timeout after 45 seconds - no complete JSON found with ID: ${requestId}`);
try { observer.disconnect();
const responseMatch = partialJsonContent.match(/"response"\\s*:\\s*"([^"\\\\]|\\\\.)*"/); resolveOnce("Error: JSON response timeout - no complete valid JSON found");
if (responseMatch) {
const responseValue = responseMatch[0].match(/"response"\\s*:\\s*"(.*)"/)[1];
observer.disconnect();
resolveOnce(responseValue);
return;
}
} catch (e) {
console.log(`Failed to extract from partial JSON: ${e}`);
}
} }
}, 45000); // 45 seconds }, 45000); // 45 seconds as requested
// Final timeout // Final safety timeout
setTimeout(() => { setTimeout(() => {
observer.disconnect(); observer.disconnect();
if (bestMatch && bestMatch.length > 20) { console.log(`Final timeout after 60 seconds for request ID: ${requestId}`);
// Last attempt to extract something useful resolveOnce("Error: Final timeout - response detection failed");
try { }, 60000); // 60 seconds final safety
const responseMatch = bestMatch.match(/"response"\\s*:\\s*"([^"\\\\]|\\\\.)*"/);
if (responseMatch) {
const responseValue = responseMatch[0].match(/"response"\\s*:\\s*"(.*)"/)[1];
resolveOnce(responseValue);
return;
}
} catch (e) {
console.log(`Final extraction failed: ${e}`);
}
resolveOnce("Error: JSON parsing timeout - partial content found but unparseable");
} else {
resolveOnce("Error: JSON response timeout - no matching ID found");
}
}, 120000); // 2 minutes for complex responses
}); });
}""", }""",
[container_selector, request_id, prompt, modified_prompt] [container_selector, request_id, prompt, modified_prompt]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment