fix: harden multimodal client I/O handling

9876a8fe · Stefy Lanza (nextime / spora ) · 05d3ae28 · 9876a8fe · 9876a8fe
Commit 9876a8fe authored May 06, 2026 by Stefy Lanza (nextime / spora )
Show whitespace changes
Inline Side-by-side

Showing with 60 additions and 15 deletions

test_manual_multimodal_test_client.py tests/test_manual_multimodal_test_client.py +32 -11

manual_multimodal_test_client.py tools/manual_multimodal_test_client.py +28 -4

No files found.
--- a/tests/test_manual_multimodal_test_client.py
+++ b/tests/test_manual_multimodal_test_client.py
@@ -169,18 +169,18 @@ def test_build_request_spec_for_transcription_uses_multipart_file(tmp_path):

    spec = build_request_spec(config)

-    assert spec == {
-        "method": "POST",
-        "url": "http://127.0.0.1:6745/v1/audio/transcriptions",
-        "headers": {"Accept": "application/json"},
-        "data": {
+    assert spec["method"] == "POST"
+    assert spec["url"] == "http://127.0.0.1:6745/v1/audio/transcriptions"
+    assert spec["headers"] == {"Accept": "application/json"}
+    assert spec["data"] == {
        "model": "audio:test",
        "prompt": "Transcribe carefully",
-        },
-        "files": {
-            "file": ("sample.wav", b"wav-bytes"),
-        },
    }
+    uploaded_name, uploaded_file = spec["files"]["file"]
+    assert uploaded_name == "sample.wav"
+    assert uploaded_file.read() == b"wav-bytes"
+    assert uploaded_file.closed is False
+    uploaded_file.close()


 def test_build_request_spec_for_audio_generation_uses_json_payload(tmp_path):
@@ -406,6 +406,27 @@ def test_task5_handle_response_payload_returns_llm_text_without_artifact(tmp_pat
    assert result["payload"] == payload


+def test_task5_handle_response_payload_flattens_structured_chat_content(tmp_path):
+    payload = {
+        "choices": [{
+            "message": {
+                "content": [
+                    {"type": "text", "text": "hello"},
+                    {"type": "input_text", "text": "from model"},
+                    {"type": "tool_result", "value": 7},
+                ]
+            }
+        }]
+    }
+    response = DummyResponse(payload)
+
+    result = handle_response_payload("llm", response, tmp_path)
+
+    assert result["text"] == 'hello\nfrom model\n{"type": "tool_result", "value": 7}'
+    assert result["artifact_path"] is None
+    assert result["payload"] == payload
+
+
 def test_task5_handle_response_payload_downloads_url_artifact(monkeypatch, tmp_path):
    payload = {
        "data": [{"url": "http://example.invalid/audio.wav", "text": "generated audio summary"}]

--- a/tools/manual_multimodal_test_client.py
+++ b/tools/manual_multimodal_test_client.py
@@ -4,6 +4,7 @@ import argparse
 import base64
 import json
 import time
+from contextlib import ExitStack
 from pathlib import Path

 import requests
@@ -113,6 +114,7 @@ def build_request_spec(config: dict) -> dict:

    if mode == "transcription":
        audio_path = _require_file(config.get("audio_file"), "--audio-file")
+        file_stack = ExitStack()
        return {
            "method": "POST",
            "url": f"{config['url']}/v1/audio/transcriptions",
@@ -122,8 +124,9 @@ def build_request_spec(config: dict) -> dict:
                "prompt": config["prompt"],
            },
            "files": {
-                "file": (audio_path.name, audio_path.read_bytes()),
+                "file": (audio_path.name, file_stack.enter_context(audio_path.open("rb"))),
            },
+            "_close": file_stack.close,
        }

    if mode == "audio-generation":
@@ -220,12 +223,28 @@ def _write_artifact(output_dir: Path, mode: str, payload: bytes) -> Path:
    return artifact_path


+def _stringify_chat_content(content) -> str:
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts = []
+        for item in content:
+            if isinstance(item, dict) and item.get("type") in {"text", "input_text"} and isinstance(item.get("text"), str):
+                parts.append(item["text"])
+            else:
+                parts.append(json.dumps(item, sort_keys=True) if isinstance(item, (dict, list)) else str(item))
+        return "\n".join(parts)
+    if isinstance(content, (dict, list)):
+        return json.dumps(content, sort_keys=True)
+    return str(content)
+
+
 def handle_response_payload(mode: str, response, output_dir: Path) -> dict:
    response.raise_for_status()
    payload = response.json()

    if mode in {"llm", "video-doubt", "music-audio-doubt"}:
-        text = payload["choices"][0]["message"]["content"]
+        text = _stringify_chat_content(payload["choices"][0]["message"]["content"])
        return {"text": text, "artifact_path": None, "payload": payload}

    if mode == "transcription":
@@ -249,5 +268,10 @@ def handle_response_payload(mode: str, response, output_dir: Path) -> dict:

 def execute_request(spec: dict):
    method = spec["method"]
-    kwargs = {key: value for key, value in spec.items() if key not in {"method", "url"}}
+    cleanup = spec.get("_close")
+    kwargs = {key: value for key, value in spec.items() if key not in {"method", "url", "_close"}}
+    try:
        return requests.request(method=method, url=spec["url"], timeout=300, **kwargs)
+    finally:
+        if cleanup is not None:
+            cleanup()