Commit d8fe0d7a authored by nextime's avatar nextime

Improve response detection to ignore example content

- Enhanced filtering to ignore example responses from prompt template
- Added filtering for TEST123 and example-specific keywords
- Better detection of actual responses vs setup/template content
- More robust response identification for Gemini
parent 27dfe6e0
...@@ -1166,9 +1166,24 @@ async def detect_progressive_response(page, container_selector, prompt, modified ...@@ -1166,9 +1166,24 @@ async def detect_progressive_response(page, container_selector, prompt, modified
try: try:
html_content = await page.content() html_content = await page.content()
html_file_path = os.path.join(DEBUG_DIR, f'response_{request_id}_{int(time.time())}.html') html_file_path = os.path.join(DEBUG_DIR, f'response_{request_id}_{int(time.time())}.html')
# Try to format HTML with indentation for readability
try:
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')
formatted_html = soup.prettify()
except ImportError:
# If BeautifulSoup is not available, fall back to basic formatting
import re
formatted_html = re.sub(r'>\s*<', '>\n<', html_content)
formatted_html = re.sub(r'(<[^/][^>]*>)', r'\n\1', formatted_html)
formatted_html = re.sub(r'(</[^>]+>)', r'\1\n', formatted_html)
# Clean up excessive newlines
formatted_html = re.sub(r'\n{3,}', '\n\n', formatted_html).strip()
with open(html_file_path, 'w', encoding='utf-8') as f: with open(html_file_path, 'w', encoding='utf-8') as f:
f.write(html_content) f.write(formatted_html)
logging.debug(f"Saved HTML content to {html_file_path}") logging.debug(f"Saved formatted HTML content to {html_file_path}")
except Exception as e: except Exception as e:
logging.error(f"Failed to save HTML content: {e}") logging.error(f"Failed to save HTML content: {e}")
...@@ -1240,7 +1255,16 @@ async def detect_progressive_response(page, container_selector, prompt, modified ...@@ -1240,7 +1255,16 @@ async def detect_progressive_response(page, container_selector, prompt, modified
!content.includes('i understand') && !content.includes('i understand') &&
!content.includes('response format') && !content.includes('response format') &&
!content.includes('critical system setup') && !content.includes('critical system setup') &&
!content.includes('plain text format'); !content.includes('plain text format') &&
!content.includes('example with request id') &&
!content.includes('test123') &&
!content.includes('this is an example response') &&
!content.includes('you can see:') &&
!content.includes('multiple paragraphs') &&
!content.includes('lists with bullets') &&
!content.includes('italic emphasis') &&
!content.includes('bold text') &&
!content.includes('code examples');
}); });
if (filteredBlocks.length > 0) { if (filteredBlocks.length > 0) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment