Commit 562d5df5 authored by Your Name's avatar Your Name

Add debug output to QwenParser

parent 861f8741
...@@ -98,6 +98,8 @@ class QwenParser(BaseParser): ...@@ -98,6 +98,8 @@ class QwenParser(BaseParser):
# Clean text first # Clean text first
clean_text = re.sub(r'<\|.*?\|>', '', text) clean_text = re.sub(r'<\|.*?\|>', '', text)
print(f"DEBUG QwenParser: Input text length = {len(text)}")
print(f"DEBUG QwenParser: Cleaned text: {repr(clean_text[:200])}")
# Use raw string for regex with special tokens # Use raw string for regex with special tokens
think_pattern = r'<think>.*?</think>' think_pattern = r'<think>.*?</think>'
clean_text = re.sub(think_pattern, '', clean_text, flags=re.DOTALL) clean_text = re.sub(think_pattern, '', clean_text, flags=re.DOTALL)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment