Ad123 / app.py
aach456's picture
Update app.py
f6cb684 verified
raw
history blame
4.6 kB
import gradio as gr
import torch
import numpy as np
from diffusers import I2VGenXLPipeline
from transformers import MusicgenForConditionalGeneration, AutoProcessor
from PIL import Image
from moviepy.editor import ImageSequenceClip
import io
import scipy.io.wavfile
import ffmpeg
def generate_video(image, prompt, negative_prompt, video_length):
generator = torch.manual_seed(8888)
# Set the device to CPU or a non-NVIDIA GPU
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")
# Load the pipeline
pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float32)
pipeline.to(device) # Move the model to the selected device
# Generate frames with progress tracking
frames = []
total_frames = video_length * 30 # Assuming 30 frames per second
for i in range(total_frames):
frame = pipeline(
prompt=prompt,
image=image,
num_inference_steps=5,
negative_prompt=negative_prompt,
guidance_scale=9.0,
generator=generator,
num_frames=1
).frames[0]
frames.append(np.array(frame))
# Update progress
yield (i + 1) / total_frames # Yield progress
# Create a video clip from the frames
output_file = "output_video.mp4"
clip = ImageSequenceClip(frames, fps=30) # Set the frames per second
clip.write_videofile(output_file, codec='libx264', audio=False)
return output_file
def generate_music(prompt, unconditional=False):
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model.to(device)
# Generate music
if unconditional:
unconditional_inputs = model.get_unconditional_inputs(num_samples=1)
audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
else:
processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
inputs = processor(
text=prompt,
padding=True,
return_tensors="pt",
)
audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=256)
sampling_rate = model.config.audio_encoder.sampling_rate
audio_file = "musicgen_out.wav"
# Ensure audio_values is 1D and scale if necessary
audio_data = audio_values[0].cpu().numpy()
# Check if audio_data is in the correct format
if audio_data.ndim > 1:
audio_data = audio_data[0] # Take the first channel if stereo
# Scale audio data to 16-bit PCM format
audio_data = np.clip(audio_data, -1.0, 1.0) # Ensure values are in the range [-1, 1]
audio_data = (audio_data * 32767).astype(np.int16) # Scale to int16
# Save the generated audio
scipy.io.wavfile.write(audio_file, sampling_rate, audio_data)
return audio_file
def combine_audio_video(audio_file, video_file):
output_file = "combined_output.mp4"
audio = ffmpeg.input(audio_file)
video = ffmpeg.input(video_file)
output = ffmpeg.output(video, audio, output_file, vcodec='copy', acodec='aac')
ffmpeg.run(output)
return output_file
def interface(image_path, prompt, negative_prompt, video_length, music_prompt, unconditional):
image = Image.open(image_path)
video_file = generate_video(image, prompt, negative_prompt, video_length)
audio_file = generate_music(music_prompt, unconditional)
combined_file = combine_audio_video(audio_file, video_file)
return combined_file
with gr.Blocks() as demo:
gr.Markdown("# AI-Powered Video and Music Generation")
with gr.Row():
image_input = gr.Image(type="filepath", label="Upload Image")
prompt_input = gr.Textbox(label="Enter the Video Prompt")
negative_prompt_input = gr.Textbox(label="Enter the Negative Prompt")
video_length_input = gr.Number(label="Video Length (seconds)", value=10, precision=0)
music_prompt_input = gr.Textbox(label="Enter the Music Prompt")
unconditional_checkbox = gr.Checkbox(label="Generate Unconditional Music")
generate_button = gr.Button("Generate Video and Music")
output_video = gr.Video(label="Output Video with Sound")
generate_button.click(
interface,
inputs=[image_input, prompt_input, negative_prompt_input, video_length_input, music_prompt_input, unconditional_checkbox],
outputs=output_video,
show_progress=True
)
demo.launch()