Add transformers backend for MusicGen (Python 3.13+ compatible)

The generate_music() function now supports two backends:

1. audiocraft (preferred):
   - Original MusicGen implementation
   - Works on Python 3.12 and lower
   - Falls back to transformers if not available

2. transformers (fallback):
   - Uses HuggingFace transformers library
   - Works on Python 3.13+
   - No spacy/blis dependency issues

The function automatically:
- Tries audiocraft first (if available)
- Falls back to transformers if audiocraft fails or is not installed
- Provides clear error messages if neither backend is available

This allows MusicGen music generation to work on Python 3.13 without
the problematic audiocraft → spacy → thinc → blis dependency chain.
parent f28cca78
...@@ -3586,15 +3586,21 @@ def generate_tts(text, output_path, voice_name="edge_female_us", custom_voice_id ...@@ -3586,15 +3586,21 @@ def generate_tts(text, output_path, voice_name="edge_female_us", custom_voice_id
def generate_music(prompt, output_path, duration_seconds=10, model_size="medium", args=None): def generate_music(prompt, output_path, duration_seconds=10, model_size="medium", args=None):
"""Generate music using MusicGen""" """Generate music using MusicGen
if not MUSICGEN_AVAILABLE:
print("❌ MusicGen not available. Install with: pip install audiocraft") Supports two backends:
return None 1. audiocraft (preferred, but not compatible with Python 3.13)
2. transformers (works on Python 3.13+)
Automatically falls back to transformers if audiocraft is not available.
"""
print(f"🎵 Generating music with MusicGen ({model_size})...") print(f"🎵 Generating music with MusicGen ({model_size})...")
print(f" Prompt: {prompt}") print(f" Prompt: {prompt}")
print(f" Duration: {duration_seconds}s") print(f" Duration: {duration_seconds}s")
# Try audiocraft first (preferred method)
if MUSICGEN_AVAILABLE:
print(" Using audiocraft backend...")
try: try:
# Load model # Load model
device = "cuda" if torch.cuda.is_available() else "cpu" device = "cuda" if torch.cuda.is_available() else "cpu"
...@@ -3622,7 +3628,60 @@ def generate_music(prompt, output_path, duration_seconds=10, model_size="medium" ...@@ -3622,7 +3628,60 @@ def generate_music(prompt, output_path, duration_seconds=10, model_size="medium"
print(f" ✅ Saved music: {output_path}") print(f" ✅ Saved music: {output_path}")
return output_path return output_path
except Exception as e: except Exception as e:
print(f"❌ MusicGen failed: {e}") print(f" ⚠️ audiocraft failed: {e}")
print(" Trying transformers backend...")
# Fallback to transformers (works on Python 3.13+)
if TRANSFORMERS_AVAILABLE:
print(" Using transformers backend (Python 3.13+ compatible)...")
try:
from transformers import AutoProcessor, MusicgenForConditionalGeneration
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if device == "cuda" else torch.float32
# Map model size to model name
model_name_map = {
"small": "facebook/musicgen-small",
"medium": "facebook/musicgen-medium",
"large": "facebook/musicgen-large",
}
model_name = model_name_map.get(model_size, f"facebook/musicgen-{model_size}")
print(f" Loading model: {model_name}")
processor = AutoProcessor.from_pretrained(model_name)
model = MusicgenForConditionalGeneration.from_pretrained(
model_name,
torch_dtype=dtype
).to(device)
# Calculate tokens for duration (roughly 50 tokens per second)
max_tokens = int(duration_seconds * 50)
# Generate
inputs = processor(text=[prompt], padding=True, return_tensors="pt").to(device)
audio_values = model.generate(**inputs, max_new_tokens=max_tokens)
# Save
sampling_rate = model.config.audio_encoder.sampling_rate
audio_data = audio_values[0, 0].cpu().numpy()
if SCIPY_AVAILABLE:
scipy.io.wavfile.write(output_path, rate=sampling_rate, data=audio_data)
else:
import numpy as np
sf.write(output_path, audio_data, sampling_rate)
print(f" ✅ Saved music: {output_path}")
return output_path
except Exception as e:
print(f"❌ transformers MusicGen failed: {e}")
return None
print("❌ MusicGen not available.")
print(" Install one of:")
print(" - Python 3.12 or lower: pip install audiocraft")
print(" - Python 3.13+: pip install transformers scipy")
return None return None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment