feat: add multimodal client doubt modes

parent 49c2f682
......@@ -234,6 +234,52 @@ def test_build_request_spec_for_video_generation_uses_json_payload(tmp_path):
}
def test_build_request_spec_for_video_doubt_uses_text_endpoint_with_video_context(tmp_path):
video_path = tmp_path / "clip.mp4"
video_path.write_bytes(b"video-bytes")
config = {
"mode": "video-doubt",
"url": "http://127.0.0.1:6745",
"model": "vision:test",
"prompt": "What happens in this clip?",
"output_dir": tmp_path,
"token": None,
"audio_file": None,
"video_file": str(video_path),
"response_format": None,
}
spec = build_request_spec(config)
assert spec["url"].endswith("/v1/chat/completions")
assert spec["json"]["model"] == "vision:test"
assert str(video_path) in spec["json"]["messages"][0]["content"]
assert "What happens in this clip?" in spec["json"]["messages"][0]["content"]
def test_build_request_spec_for_music_audio_doubt_uses_text_endpoint_with_audio_context(tmp_path):
audio_path = tmp_path / "clip.wav"
audio_path.write_bytes(b"audio-bytes")
config = {
"mode": "music-audio-doubt",
"url": "http://127.0.0.1:6745",
"model": "audio:test",
"prompt": "Describe the music.",
"output_dir": tmp_path,
"token": None,
"audio_file": str(audio_path),
"video_file": None,
"response_format": None,
}
spec = build_request_spec(config)
assert spec["url"].endswith("/v1/chat/completions")
assert spec["json"]["model"] == "audio:test"
assert str(audio_path) in spec["json"]["messages"][0]["content"]
assert "Describe the music." in spec["json"]["messages"][0]["content"]
def test_build_request_spec_for_transcription_requires_audio_file_flag(tmp_path):
config = {
"mode": "transcription",
......
......@@ -145,6 +145,40 @@ def build_request_spec(config: dict) -> dict:
},
}
if mode == "video-doubt":
video_path = _require_file(config.get("video_file"), "--video-file")
content = (
f"Video file: {video_path}\n"
f"Question: {config['prompt']}\n"
"Answer based on the referenced video input if the model/backend supports it."
)
return {
"method": "POST",
"url": f"{config['url']}/v1/chat/completions",
"headers": headers,
"json": {
"model": config["model"],
"messages": [{"role": "user", "content": content}],
},
}
if mode == "music-audio-doubt":
audio_path = _require_file(config.get("audio_file"), "--audio-file")
content = (
f"Audio file: {audio_path}\n"
f"Question: {config['prompt']}\n"
"Answer based on the referenced audio input if the model/backend supports it."
)
return {
"method": "POST",
"url": f"{config['url']}/v1/chat/completions",
"headers": headers,
"json": {
"model": config["model"],
"messages": [{"role": "user", "content": content}],
},
}
raise ValueError(f"Unsupported mode for this task: {mode}")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment