Remove qwen_omni_utils import and set audio inputs directly

252525b0 · Stefy Lanza (nextime / spora ) · 6b2dfd39 · 252525b0
Commit 252525b0 authored Dec 11, 2025 by Stefy Lanza (nextime / spora )
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 3 deletions

transcript.py transcript.py +4 -3

No files found.
--- a/transcript.py
+++ b/transcript.py
@@ -77,8 +77,7 @@ def main():
    model = Qwen2_5OmniForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-Omni-7B", torch_dtype="auto", device_map="auto", trust_remote_code=True)
    processor = Qwen2_5OmniProcessor.from_pretrained("Qwen/Qwen2.5-Omni-7B", trust_remote_code=True)

-    # Import utils after loading model
-    from qwen_omni_utils import process_mm_info
+    # Note: qwen_omni_utils not available, using direct processing

    # Load audio
    audio, sr = librosa.load(audio_file, sr=16000)
@@ -107,7 +106,9 @@ def main():

        # Preparation for inference
        text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
-        audios, images, videos = process_mm_info(conversation, use_audio_in_video=False)
+        audios = [audio_chunk]
+        images = []
+        videos = []
        inputs = processor(text=text, audio=audios, images=images, videos=videos, return_tensors="pt", padding=True, use_audio_in_video=False)
        inputs = inputs.to(model.device).to(model.dtype)