Commit c0d398f4 authored by nextime's avatar nextime

Improve JSON response validation and increase timeout to 45 seconds

- Increase main timeout to 45 seconds as requested for complete response detection
- Add strict JSON validation requiring both 'response' and 'id' keys
- Implement isCompleteValidJson() function to ensure JSON completeness
- Only return responses when JSON is complete and valid
- Add enhanced logging for debugging partial matches
- Prevent premature response extraction from incomplete JSON
- Add safety timeout at 60 seconds to prevent hanging
- Improve observer logic to only resolve on complete, valid JSON

This ensures the API only serves responses when we have complete,
valid JSON with both required keys, preventing partial or corrupted responses.
parent 41cc9c97
......@@ -370,7 +370,7 @@ async def forward_to_chatbot(chatbot_name, config, prompt):
request_id = str(uuid.uuid4()).replace('-', '')[:16] # 16-character unique ID
# Create JSON-based prompt with unique ID
json_instruction = f'Format all responses as a raw JSON object with a "response" key containing the answer as a string, exactly as it would appear unfiltered from an API, including all content (code, explanations, instructions) with escaped newlines and special characters as well as command and special formatting from the API, use tools and agents as declared in the question when needed, and a second "id" key containing a unique id "{request_id}"'
json_instruction = f'Format all responses as a raw JSON object with a "response" key containing the answer as a string, exactly as it would appear unfiltered from an API, including all content (code, explanations, instructions) with escaped newlines and special characters as well as command and special formatting from the API, use tools and agents as declared in the question when needed, include any formatting, xml, or any markup language as requested in the reponse key as well, and a second "id" key containing a unique id "{request_id}, nothing should be outside of the json, the whole reponse include in it. \n\n'
modified_prompt = f"{json_instruction}\n\n{prompt}"
logging.info(f"Request ID: {request_id}, Modified prompt: {modified_prompt}")
......@@ -437,10 +437,32 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro
}
};
// Function to clean and decode HTML entities and escape sequences
const cleanJsonText = (text) => {
return text
// Decode HTML entities
.replace(/"/g, '"')
.replace(/&/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&#x27;/g, "'")
.replace(/&#x2F;/g, '/')
// Handle escaped characters
.replace(/\\\\n/g, '\\n')
.replace(/\\\\t/g, '\\t')
.replace(/\\\\r/g, '\\r')
.replace(/\\\\"/g, '\\"')
// Remove extra whitespace and normalize
.trim();
};
// Function to extract and parse JSON response
const extractJsonResponse = (text) => {
if (!text || text === prompt || text === modifiedPrompt) return null;
// Clean the text first
const cleanedText = cleanJsonText(text);
// Look for JSON patterns with our unique ID
const jsonPatterns = [
// Standard JSON object patterns
......@@ -448,37 +470,51 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro
/\{[^{}]*"response"\s*:\s*"([^"\\\\]|\\\\.)*"[^{}]*"id"\s*:\s*"[^"]*' + requestId + '[^"]*"[^{}]*\}/g,
// More flexible JSON patterns
/\{[\\s\\S]*?"id"[\\s\\S]*?"' + requestId + '"[\\s\\S]*?"response"[\\s\\S]*?\}/g,
/\{[\\s\\S]*?"response"[\\s\\S]*?"id"[\\s\\S]*?"' + requestId + '"[\\s\\S]*?\}/g
/\{[\\s\\S]*?"response"[\\s\\S]*?"id"[\\s\\S]*?"' + requestId + '"[\\s\\S]*?\}/g,
// Very flexible patterns for malformed JSON
new RegExp('\\{[\\s\\S]*?["\']id["\']\\s*:\\s*["\'][^"\']*' + requestId + '[^"\']*["\'][\\s\\S]*?["\']response["\']\\s*:\\s*["\']([\\s\\S]*?)["\'][\\s\\S]*?\\}', 'g'),
new RegExp('\\{[\\s\\S]*?["\']response["\']\\s*:\\s*["\']([\\s\\S]*?)["\'][\\s\\S]*?["\']id["\']\\s*:\\s*["\'][^"\']*' + requestId + '[^"\']*["\'][\\s\\S]*?\\}', 'g')
];
for (const pattern of jsonPatterns) {
const matches = text.match(pattern);
if (matches) {
for (const match of matches) {
try {
const jsonObj = JSON.parse(match);
if (jsonObj.id && jsonObj.id.includes(requestId) && jsonObj.response) {
console.log(`Found JSON response with ID: ${jsonObj.id}`);
return jsonObj.response;
}
} catch (e) {
// Try to fix common JSON issues
// Try both original and cleaned text
const textsToTry = [cleanedText, text];
for (const textToTry of textsToTry) {
for (const pattern of jsonPatterns) {
const matches = textToTry.match(pattern);
if (matches) {
for (const match of matches) {
try {
const fixedJson = match
.replace(/\\n/g, '\\\\n')
.replace(/\\t/g, '\\\\t')
.replace(/\\r/g, '\\\\r')
.replace(/"/g, '\\\\"')
.replace(/\\\\"/g, '"')
.replace(/^"/, '')
.replace(/"$/, '');
const jsonObj = JSON.parse(fixedJson);
const jsonObj = JSON.parse(match);
if (jsonObj.id && jsonObj.id.includes(requestId) && jsonObj.response) {
console.log(`Found fixed JSON response with ID: ${jsonObj.id}`);
console.log(`Found JSON response with ID: ${jsonObj.id}`);
return jsonObj.response;
}
} catch (e2) {
console.log(`Failed to parse JSON: ${match.substring(0, 100)}...`);
} catch (e) {
// Try to fix common JSON issues
try {
let fixedJson = match
.replace(/\\\\n/g, '\\n')
.replace(/\\\\t/g, '\\t')
.replace(/\\\\r/g, '\\r')
.replace(/\\\\"/g, '\\"');
// Try to extract just the response value if JSON parsing fails
const responseMatch = fixedJson.match(/"response"\\s*:\\s*"([^"\\\\]|\\\\.)*"/);
if (responseMatch && fixedJson.includes(requestId)) {
const responseValue = responseMatch[0].match(/"response"\\s*:\\s*"(.*)"/)[1];
console.log(`Extracted response from malformed JSON: ${responseValue.substring(0, 100)}...`);
return responseValue;
}
const jsonObj = JSON.parse(fixedJson);
if (jsonObj.id && jsonObj.id.includes(requestId) && jsonObj.response) {
console.log(`Found fixed JSON response with ID: ${jsonObj.id}`);
return jsonObj.response;
}
} catch (e2) {
console.log(`Failed to parse JSON: ${match.substring(0, 100)}...`);
}
}
}
}
......@@ -487,7 +523,7 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro
// Look for partial JSON that might be building up
const partialJsonRegex = new RegExp('\\{[\\s\\S]*?"id"[\\s\\S]*?"' + requestId + '"[\\s\\S]*', 'g');
const partialMatch = text.match(partialJsonRegex);
const partialMatch = cleanedText.match(partialJsonRegex);
if (partialMatch && partialMatch[0].length > partialJsonContent.length) {
partialJsonContent = partialMatch[0];
console.log(`Found partial JSON: ${partialJsonContent.substring(0, 100)}...`);
......@@ -496,8 +532,36 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro
return null;
};
// Function to extract JSON from HTML formatted code blocks
const extractJsonFromHtml = () => {
// Look for JSON in <pre><code class="language-json"> tags
const codeBlocks = container.querySelectorAll([
'pre code.language-json',
'pre code[class*="json"]',
'code.language-json',
'code[class*="json"]',
'pre code',
'code'
].join(', '));
for (const codeBlock of codeBlocks) {
const jsonText = codeBlock.textContent ? codeBlock.textContent.trim() : '';
if (jsonText && (jsonText.includes(requestId) || jsonText.includes('{'))) {
console.log(`Found code block with potential JSON: ${jsonText.substring(0, 100)}...`);
const result = extractJsonResponse(jsonText);
if (result) return result;
}
}
return null;
};
// Get combined text from all relevant elements
const getCombinedText = () => {
// First try to extract from HTML formatted code blocks
const htmlResult = extractJsonFromHtml();
if (htmlResult) return htmlResult;
const chatElements = container.querySelectorAll([
'div[data-testid*="cellInnerDiv"]',
'div[data-testid*="tweetText"]',
......@@ -539,16 +603,18 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro
};
const observer = new MutationObserver((mutations) => {
const result = getCombinedText();
if (result) {
// Only resolve if we have complete, valid JSON
const result = getCompleteJsonResponse();
if (result && !result.startsWith("Error:")) {
observer.disconnect();
resolveOnce(result);
return;
}
// Update best partial match
// Update best partial match for debugging
if (partialJsonContent.length > (bestMatch?.length || 0)) {
bestMatch = partialJsonContent;
console.log(`Updated partial match: ${partialJsonContent.substring(0, 100)}...`);
}
});
......@@ -558,57 +624,75 @@ async def detect_json_response_with_id(page, container_selector, request_id, pro
characterData: true
});
// Initial check after a short delay
setTimeout(() => {
// Function to validate complete JSON with both required keys
const isCompleteValidJson = (text) => {
try {
const jsonObj = JSON.parse(text);
return jsonObj.id && jsonObj.id.includes(requestId) &&
jsonObj.response && typeof jsonObj.response === 'string' &&
jsonObj.response.length > 0;
} catch (e) {
return false;
}
};
// Enhanced check that only returns complete, valid JSON
const getCompleteJsonResponse = () => {
const htmlResult = extractJsonFromHtml();
if (htmlResult) {
// Verify we have a complete JSON by trying to find the original JSON
const codeBlocks = container.querySelectorAll([
'pre code.language-json',
'pre code[class*="json"]',
'code.language-json',
'code[class*="json"]',
'pre code',
'code'
].join(', '));
for (const codeBlock of codeBlocks) {
const jsonText = codeBlock.textContent ? codeBlock.textContent.trim() : '';
if (jsonText && isCompleteValidJson(jsonText)) {
console.log(`Found complete valid JSON: ${jsonText.substring(0, 100)}...`);
const jsonObj = JSON.parse(jsonText);
return jsonObj.response;
}
}
}
// Fallback to text-based extraction with validation
const result = getCombinedText();
return result;
};
// Initial check after a longer delay to allow content to load
setTimeout(() => {
const result = getCompleteJsonResponse();
if (result) {
observer.disconnect();
resolveOnce(result);
}
}, 2000);
}, 5000); // Increased initial delay
// Progressive timeout - check for partial results
// Main timeout - wait for complete JSON (45 seconds as requested)
setTimeout(() => {
const result = getCombinedText();
const result = getCompleteJsonResponse();
if (result) {
observer.disconnect();
resolveOnce(result);
} else if (partialJsonContent.length > 50) {
// Try to extract response from partial JSON
try {
const responseMatch = partialJsonContent.match(/"response"\\s*:\\s*"([^"\\\\]|\\\\.)*"/);
if (responseMatch) {
const responseValue = responseMatch[0].match(/"response"\\s*:\\s*"(.*)"/)[1];
observer.disconnect();
resolveOnce(responseValue);
return;
}
} catch (e) {
console.log(`Failed to extract from partial JSON: ${e}`);
}
} else {
console.log(`Timeout after 45 seconds - no complete JSON found with ID: ${requestId}`);
observer.disconnect();
resolveOnce("Error: JSON response timeout - no complete valid JSON found");
}
}, 45000); // 45 seconds
}, 45000); // 45 seconds as requested
// Final timeout
// Final safety timeout
setTimeout(() => {
observer.disconnect();
if (bestMatch && bestMatch.length > 20) {
// Last attempt to extract something useful
try {
const responseMatch = bestMatch.match(/"response"\\s*:\\s*"([^"\\\\]|\\\\.)*"/);
if (responseMatch) {
const responseValue = responseMatch[0].match(/"response"\\s*:\\s*"(.*)"/)[1];
resolveOnce(responseValue);
return;
}
} catch (e) {
console.log(`Final extraction failed: ${e}`);
}
resolveOnce("Error: JSON parsing timeout - partial content found but unparseable");
} else {
resolveOnce("Error: JSON response timeout - no matching ID found");
}
}, 120000); // 2 minutes for complex responses
console.log(`Final timeout after 60 seconds for request ID: ${requestId}`);
resolveOnce("Error: Final timeout - response detection failed");
}, 60000); // 60 seconds final safety
});
}""",
[container_selector, request_id, prompt, modified_prompt]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment