Commit 433eb3ee authored by Your Name's avatar Your Name

Improve QwenParser with cleaner parsing logic and coder style fallback

- Added pre-cleaning for thinking/special tokens
- Unified tag matching for both <tool> and <tool_call>
- Added markdown code block stripping inside tags
- Added lazy JSON parsing fallback
- Added _parse_coder_style() and _relaxed_val() helper methods
parent 73d1c77c
...@@ -98,65 +98,65 @@ class BaseParser: ...@@ -98,65 +98,65 @@ class BaseParser:
# Remove any leading/trailing whitespace # Remove any leading/trailing whitespace
return text.strip() return text.strip()
# 1. QWEN PARSER (Instruct & Coder Style)
class QwenParser(BaseParser): class QwenParser(BaseParser):
@validate_tool_output @validate_tool_output
def parse(self, text: str) -> List[Dict]: def parse(self, text: str) -> List[Dict]:
results = [] results = []
# Remove thinking blocks if present # 1. Pre-cleaning (Remove thinking and special tokens)
think_pattern = r'<\/?think>(.*?)<\/?think>' # Some Qwen variants use <|thought|> or <think>
clean_text = re.sub(think_pattern, '', text, flags=re.DOTALL) clean_text = re.sub(r'<\|.*?\|>|<(?:thought|think)>.*?</(?:thought|think)>', '', text, flags=re.DOTALL | re.IGNORECASE)
# 1. Qwen format: <tool=func_name> or <tool=func_name/> # 2. MATCH BOTH <tool> AND <tool_call>
# Match <tool=func_name>...</tool> or <tool=func_name/> # This regex finds any JSON-like content between tags named 'tool' or 'tool_call'
qwen_blocks = re.findall(r'<tool=([^>]+)>\s*(.*?)\s*</tool>', clean_text, re.DOTALL) tag_pattern = r'<(?:tool|tool_call)>(.*?)(?:</(?:tool|tool_call)>|$)'
for func_name, body in qwen_blocks: matches = re.findall(tag_pattern, clean_text, re.DOTALL | re.IGNORECASE)
if not func_name.strip():
continue
params = re.findall(r'<parameter=([^>]+)>(.*?)</parameter>', body, re.DOTALL)
args = {}
for k, v in params:
val = v.strip()
try:
args[k.strip()] = json.loads(val)
except:
args[k.strip()] = val
results.append(self._to_oa(func_name.strip(), args))
# 2. Try Instruct Style (JSON) first for block in matches:
# Matches <tool_call> or ]~b] regardless of case/hyphens block = block.strip()
json_blocks = re.findall(r'<(?:tool[_-]?call|function_call|call)>(.*?)</(?:tool[_-]?call|function_call|call)>', clean_text, re.IGNORECASE | re.DOTALL) if not block:
continue
for block in json_blocks:
# Attempt to parse as JSON (handles markdown code blocks inside tags)
json_str = re.sub(r'```(?:json)?\s*(.*?)\s*```', r'\1', block, flags=re.DOTALL).strip()
try: try:
content = self._clean_json_string(block) data = json.loads(json_str)
data = json.loads(content) # Ensure it follows the expected tool-calling schema
if 'name' in data: if 'name' in data:
results.append(self._to_oa(data['name'], data.get('arguments', {}))) results.append(self._to_oa(data['name'], data.get('arguments', {})))
except json.JSONDecodeError: except json.JSONDecodeError:
continue # Fallback: Try a 'lazy' find for the first '{' and last '}'
try:
# 3. Fallback: Coder Style / Parameter Tags start = json_str.find('{')
end = json_str.rfind('}')
if start != -1 and end != -1:
data = json.loads(json_str[start:end+1])
results.append(self._to_oa(data['name'], data.get('arguments', {})))
except:
continue
# 3. CODER STYLE FALLBACK (<function=name><parameter=key>value</parameter></function>)
if not results: if not results:
tag_pattern = r'<(?:tool|function)=([^>]+)>(.*?)(?:</(?:tool|function|tool_call)>|$)' results = self._parse_coder_style(clean_text)
matches = re.finditer(tag_pattern, clean_text, re.DOTALL | re.IGNORECASE)
for match in matches:
func_name = match.group(1).strip()
body = match.group(2)
params = re.findall(r'<parameter=([^>]+)>(.*?)</parameter>', body, re.DOTALL)
args = {}
for k, v in params:
val = v.strip()
try:
args[k.strip()] = json.loads(val)
except:
args[k.strip()] = val
results.append(self._to_oa(func_name, args))
return results return results
def _parse_coder_style(self, text: str):
# Specific fix for Coder style: <function=name> or <tool=name>
found = []
pattern = r'<(?:function|tool)=([^>]+)>(.*?)(?:</(?:function|tool)>|$)'
for name, body in re.findall(pattern, text, re.DOTALL):
params = re.findall(r'<parameter=([^>]+)>(.*?)</parameter>', body, re.DOTALL)
args = {k.strip(): self._relaxed_val(v) for k, v in params}
found.append(self._to_oa(name.strip(), args))
return found
def _relaxed_val(self, val):
val = val.strip()
try: return json.loads(val)
except: return val
# 2. DEEPSEEK PARSER # 2. DEEPSEEK PARSER
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment