Spaces:

crittiksglobal
/

zerogpu-studio

Running on Zero

zerogpu-studio / app.py

Nirmal Liyon

Fix SD 3.5 params: guidance_scale=3.5, steps=28

ea6e1c5 3 days ago

28.7 kB

	# ═══════════════════════════════════════════════════════════════════════════════
	# ⚡ CRITTIKS GLOBAL - ZEROGPU CREATIVE STUDIO v3.0 ULTIMATE
	# ═══════════════════════════════════════════════════════════════════════════════
	# BLEEDING-EDGE MODELS:
	# - Z-Image-Turbo: 8-step ultra-fast image generation
	# - Wan 2.2 I2V 14B: FP8 quantized + AoTI + Lightning LoRA (4-step video!)
	# - MusicGen: AI music generation
	# ═══════════════════════════════════════════════════════════════════════════════

	import os
	import gc
	import spaces
	import torch
	import numpy as np
	from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageEnhance
	import textwrap
	import tempfile
	import random
	import gradio as gr

	# ═══════════════════════════════════════════════════════════════════════════════
	# 📦 GLOBAL MODEL HOLDERS
	# ═══════════════════════════════════════════════════════════════════════════════
	music_model = None
	music_processor = None
	video_pipe = None
	video_loaded = False

	# ═══════════════════════════════════════════════════════════════════════════════
	# 🎨 CONFIGURATION
	# ═══════════════════════════════════════════════════════════════════════════════
	MAX_SEED = np.iinfo(np.int32).max

	# Video settings for Wan 2.2
	VIDEO_MAX_DIM = 832
	VIDEO_MIN_DIM = 480
	VIDEO_SQUARE_DIM = 640
	VIDEO_MULTIPLE_OF = 16
	VIDEO_FIXED_FPS = 16
	VIDEO_MIN_FRAMES = 8
	VIDEO_MAX_FRAMES = 80

	# Image settings for Z-Image-Turbo
	IMAGE_DEFAULT_SIZE = 1024

	# Sample preview image
	def create_sample_image():
	"""Create a sample gradient image for preview"""
	img = Image.new('RGB', (512, 512))
	for y in range(512):
	for x in range(512):
	r = int(50 + (x / 512) * 100)
	g = int(30 + (y / 512) * 80)
	b = int(80 + ((x + y) / 1024) * 120)
	img.putpixel((x, y), (r, g, b))
	return img

	SAMPLE_IMAGE = create_sample_image()

	# ═══════════════════════════════════════════════════════════════════════════════
	# 🎨 STYLE & OPTION CONFIGURATIONS
	# ═══════════════════════════════════════════════════════════════════════════════
	OCCASIONS = [
	"Christmas", "New Year", "Birthday", "Wedding", "Valentine's Day",
	"Anniversary", "Graduation", "Thank You", "Diwali", "Eid",
	"Mother's Day", "Father's Day", "Halloween", "Easter"
	]

	STYLES = {
	"Festive Magic": "magical christmas scene, golden lights, snow, warm cozy atmosphere, cinematic",
	"Luxury Gold": "luxurious black and gold, elegant marble, premium design, sophisticated",
	"Soft Dreams": "soft pastel colors, dreamy clouds, ethereal glow, romantic",
	"Neon Future": "cyberpunk neon lights, futuristic city, purple cyan pink, sci-fi",
	"Nature Beauty": "beautiful nature, flowers, green forest, sunlight rays, peaceful",
	"Ocean Calm": "serene ocean sunset, beach, turquoise water, golden hour",
	"Cosmic Galaxy": "cosmic nebula, stars, aurora borealis, space, mystical",
	"Dark Elegance": "dark moody aesthetic, silver accents, dramatic lighting, premium"
	}

	OVERLAY_STYLES = {
	"None": None,
	"Frosted Glass": {"color": (20, 30, 50), "opacity": 0.6},
	"Dark Luxury": {"color": (0, 0, 0), "opacity": 0.7},
	"Light Dream": {"color": (255, 255, 255), "opacity": 0.4},
	"Neon Glow": {"color": (30, 0, 60), "opacity": 0.65},
	"Forest Green": {"color": (20, 50, 30), "opacity": 0.6},
	"Sunset Warm": {"color": (70, 35, 20), "opacity": 0.6},
	"Ocean Blue": {"color": (20, 40, 70), "opacity": 0.55}
	}

	MUSIC_STYLES = {
	"Peaceful Piano": "peaceful piano melody, emotional, cinematic, gentle",
	"Acoustic Guitar": "warm acoustic guitar, soft strumming, heartfelt melody",
	"Epic Orchestra": "epic orchestral music, cinematic strings, powerful",
	"Lo-Fi Chill": "lofi hip hop beats, relaxing, chill vibes, jazzy",
	"Ambient Space": "ambient space music, ethereal pads, dreamy atmosphere",
	"Holiday Magic": "christmas holiday music, bells, magical festive cheer"
	}

	# ═══════════════════════════════════════════════════════════════════════════════
	# 🎨 GLASS OVERLAY FUNCTION
	# ═══════════════════════════════════════════════════════════════════════════════
	def add_glass_overlay(image, text, footer, overlay_style="Frosted Glass", font_size=42, enable_overlay=True):
	"""Add glass overlay with customizable text size"""
	if image.mode != 'RGB':
	image = image.convert('RGB')

	width, height = image.size
	result = image.copy()
	draw = ImageDraw.Draw(result)

	style = OVERLAY_STYLES.get(overlay_style)

	# Calculate panel dimensions
	margin = int(width * 0.04)
	panel_height = int(height * 0.28)
	panel_top = height - panel_height - margin
	panel_bottom = panel_top + panel_height
	panel_left = margin
	panel_right = width - margin

	# Apply overlay panel if enabled and style exists
	if enable_overlay and style is not None:
	region = image.crop((panel_left, panel_top, panel_right, panel_bottom))
	blurred = region.filter(ImageFilter.GaussianBlur(radius=20))
	blurred = ImageEnhance.Brightness(blurred).enhance(0.6)

	color_overlay = Image.new('RGB', (panel_right - panel_left, panel_height), style["color"])
	blended = Image.blend(blurred, color_overlay, style["opacity"])

	result.paste(blended, (panel_left, panel_top))
	draw = ImageDraw.Draw(result)

	for i in range(4):
	draw.rectangle(
	[(panel_left + i, panel_top + i), (panel_right - i, panel_bottom - i)],
	outline=(255, 255, 255)
	)

	# Load font with custom size
	try:
	font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", int(font_size))
	small_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", int(font_size * 0.45))
	except:
	font = ImageFont.load_default()
	small_font = font

	# Calculate text wrapping
	avg_char_width = font_size * 0.55
	max_chars = max(15, int((panel_right - panel_left - 40) / avg_char_width))
	lines = textwrap.wrap(text, width=max_chars)

	line_height = int(font_size * 1.25)
	total_height = len(lines) * line_height
	text_y = panel_top + (panel_height - total_height) // 2 - 10

	# Draw text with shadow
	for line in lines:
	bbox = draw.textbbox((0, 0), line, font=font)
	text_x = (width - (bbox[2] - bbox[0])) // 2
	draw.text((text_x + 2, text_y + 2), line, font=font, fill=(0, 0, 0))
	draw.text((text_x + 1, text_y + 1), line, font=font, fill=(0, 0, 0))
	draw.text((text_x, text_y), line, font=font, fill=(255, 255, 255))
	text_y += line_height

	# Draw footer
	if footer:
	bbox = draw.textbbox((0, 0), footer, font=small_font)
	footer_x = (width - (bbox[2] - bbox[0])) // 2
	draw.text((footer_x + 1, panel_bottom - 30 + 1), footer, font=small_font, fill=(0, 0, 0))
	draw.text((footer_x, panel_bottom - 30), footer, font=small_font, fill=(200, 200, 200))

	return result

	# ═══════════════════════════════════════════════════════════════════════════════
	# 👁️ LIVE PREVIEW (No GPU)
	# ═══════════════════════════════════════════════════════════════════════════════
	def update_preview(message, overlay_style, font_size, enable_overlay, recipient):
	"""Generate live preview with current settings"""
	if not message:
	message = "Your message here..."
	if not recipient:
	recipient = "Friend"

	footer = f"For {recipient} \| Crittiks Global"

	preview = add_glass_overlay(
	SAMPLE_IMAGE.copy(),
	message,
	footer,
	overlay_style,
	font_size,
	enable_overlay
	)

	return preview

	# ═══════════════════════════════════════════════════════════════════════════════
	# 🖼️ STABLE DIFFUSION 3.5 LARGE - High Quality Image Generation
	# ═══════════════════════════════════════════════════════════════════════════════
	image_pipe = None

	@spaces.GPU
	def generate_image_gpu(prompt, style_desc, height=1024, width=1024, steps=28, seed=None, randomize_seed=True):
	"""Generate image using Stable Diffusion 3.5 Large"""
	global image_pipe
	from diffusers import StableDiffusion3Pipeline

	if image_pipe is None:
	print("Loading Stable Diffusion 3.5 Large...")
	image_pipe = StableDiffusion3Pipeline.from_pretrained(
	"stabilityai/stable-diffusion-3.5-large",
	torch_dtype=torch.bfloat16,
	)
	image_pipe.to("cuda")
	print("SD 3.5 Large loaded!")

	# Handle seed
	if randomize_seed or seed is None:
	seed = torch.randint(0, 2**32 - 1, (1,)).item()

	generator = torch.Generator("cuda").manual_seed(int(seed))

	full_prompt = f"{prompt}, {style_desc}, masterpiece, ultra detailed, 8k quality, cinematic lighting, professional photography, NO TEXT NO WORDS NO LETTERS"

	image = image_pipe(
	prompt=full_prompt,
	num_inference_steps=28,
	guidance_scale=3.5,
	generator=generator,
	).images[0]

	return image, seed

	# ═══════════════════════════════════════════════════════════════════════════════
	# 🎵 MUSICGEN: AI Music Generation
	# ═══════════════════════════════════════════════════════════════════════════════
	@spaces.GPU
	def generate_music_gpu(prompt):
	"""Generate music using MusicGen"""
	global music_model, music_processor
	from transformers import AutoProcessor, MusicgenForConditionalGeneration
	import scipy.io.wavfile as wavfile

	if music_model is None:
	print("Loading MusicGen...")
	music_processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
	music_model = MusicgenForConditionalGeneration.from_pretrained(
	"facebook/musicgen-small",
	torch_dtype=torch.float16
	)
	music_model.to("cuda")
	print("MusicGen loaded!")

	inputs = music_processor(text=[prompt], padding=True, return_tensors="pt").to("cuda")
	audio_values = music_model.generate(**inputs, max_new_tokens=256, do_sample=True)

	sampling_rate = music_model.config.audio_encoder.sampling_rate
	audio_data = audio_values[0, 0].cpu().numpy()
	audio_data = audio_data / np.max(np.abs(audio_data))
	audio_data = (audio_data * 32767).astype(np.int16)

	temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
	wavfile.write(temp_file.name, sampling_rate, audio_data)

	return temp_file.name

	# ═══════════════════════════════════════════════════════════════════════════════
	# 🎬 WAN 2.2 I2V: FP8 + AoTI + Lightning LoRA (4-Step Video!)
	# ═══════════════════════════════════════════════════════════════════════════════
	def resize_image_for_video(image: Image.Image) -> Image.Image:
	"""Resize image for Wan 2.2 video generation"""
	width, height = image.size

	if width == height:
	return image.resize((VIDEO_SQUARE_DIM, VIDEO_SQUARE_DIM), Image.LANCZOS)

	aspect_ratio = width / height
	MAX_ASPECT_RATIO = VIDEO_MAX_DIM / VIDEO_MIN_DIM
	MIN_ASPECT_RATIO = VIDEO_MIN_DIM / VIDEO_MAX_DIM

	image_to_resize = image

	if aspect_ratio > MAX_ASPECT_RATIO:
	crop_width = int(round(height * MAX_ASPECT_RATIO))
	left = (width - crop_width) // 2
	image_to_resize = image.crop((left, 0, left + crop_width, height))
	elif aspect_ratio < MIN_ASPECT_RATIO:
	crop_height = int(round(width / MIN_ASPECT_RATIO))
	top = (height - crop_height) // 2
	image_to_resize = image.crop((0, top, width, top + crop_height))

	if width > height:
	target_w = VIDEO_MAX_DIM
	target_h = int(round(target_w / aspect_ratio))
	else:
	target_h = VIDEO_MAX_DIM
	target_w = int(round(target_h * aspect_ratio))

	final_w = round(target_w / VIDEO_MULTIPLE_OF) * VIDEO_MULTIPLE_OF
	final_h = round(target_h / VIDEO_MULTIPLE_OF) * VIDEO_MULTIPLE_OF

	final_w = max(VIDEO_MIN_DIM, min(VIDEO_MAX_DIM, final_w))
	final_h = max(VIDEO_MIN_DIM, min(VIDEO_MAX_DIM, final_h))

	return image_to_resize.resize((final_w, final_h), Image.LANCZOS)

	def get_num_frames(duration_seconds: float):
	"""Calculate number of frames from duration"""
	return 1 + int(np.clip(
	int(round(duration_seconds * VIDEO_FIXED_FPS)),
	VIDEO_MIN_FRAMES,
	VIDEO_MAX_FRAMES,
	))

	def load_video_pipeline():
	"""Load Wan 2.2 I2V pipeline with FP8 quantization and AoTI"""
	global video_pipe, video_loaded

	if video_loaded:
	return video_pipe

	print("Loading Wan 2.2 I2V 14B with FP8 + AoTI optimization...")

	from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
	from diffusers.models.transformers.transformer_wan import WanTransformer3DModel

	MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"

	# Load pipeline with bf16 transformers
	video_pipe = WanImageToVideoPipeline.from_pretrained(
	MODEL_ID,
	transformer=WanTransformer3DModel.from_pretrained(
	'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
	subfolder='transformer',
	torch_dtype=torch.bfloat16,
	device_map='cuda',
	),
	transformer_2=WanTransformer3DModel.from_pretrained(
	'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
	subfolder='transformer_2',
	torch_dtype=torch.bfloat16,
	device_map='cuda',
	),
	torch_dtype=torch.bfloat16,
	).to('cuda')

	# Load Lightning LoRA for fast inference
	print("Loading Lightning LoRA for 4-step inference...")
	video_pipe.load_lora_weights(
	"Kijai/WanVideo_comfy",
	weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
	adapter_name="lightx2v"
	)
	video_pipe.load_lora_weights(
	"Kijai/WanVideo_comfy",
	weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
	adapter_name="lightx2v_2",
	load_into_transformer_2=True
	)
	video_pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
	video_pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
	video_pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
	video_pipe.unload_lora_weights()

	# Apply FP8 quantization
	print("Applying FP8 quantization...")
	from torchao.quantization import quantize_
	from torchao.quantization import Float8DynamicActivationFloat8WeightConfig, Int8WeightOnlyConfig

	quantize_(video_pipe.text_encoder, Int8WeightOnlyConfig())
	quantize_(video_pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
	quantize_(video_pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())

	# Load AoTI compiled blocks
	print("Loading AoTI compiled blocks...")
	import aoti
	aoti.aoti_blocks_load(video_pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
	aoti.aoti_blocks_load(video_pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')

	video_loaded = True
	print("Wan 2.2 I2V fully loaded with all optimizations!")

	return video_pipe

	# Dynamic duration calculation
	def get_video_duration(image, duration_seconds, steps):
	"""Calculate GPU duration based on parameters"""
	BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
	BASE_STEP_DURATION = 15

	resized = resize_image_for_video(image)
	width, height = resized.size
	frames = get_num_frames(duration_seconds)
	factor = frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
	step_duration = BASE_STEP_DURATION * factor ** 1.5
	return 30 + int(steps) * step_duration

	@spaces.GPU
	def generate_video_gpu(image, text, footer, overlay_style, font_size, enable_overlay,
	duration_seconds=3.0, steps=6, seed=None, randomize_seed=True):
	"""Generate video using Wan 2.2 I2V with text overlay"""
	from diffusers.utils import export_to_video

	if image is None:
	raise gr.Error("Please generate an image first!")

	# Load pipeline
	pipe = load_video_pipeline()

	# Handle seed
	if randomize_seed or seed is None:
	current_seed = random.randint(0, MAX_SEED)
	else:
	current_seed = int(seed)

	# Resize image
	resized_image = resize_image_for_video(image)
	num_frames = get_num_frames(duration_seconds)

	# Video generation prompt
	video_prompt = "make this image come alive, cinematic motion, smooth animation, natural movement"
	negative_prompt = "low quality, worst quality, motion artifacts, jitter, unstable, blurry, static"

	# Generate video frames
	output_frames = pipe(
	image=resized_image,
	prompt=video_prompt,
	negative_prompt=negative_prompt,
	height=resized_image.height,
	width=resized_image.width,
	num_frames=num_frames,
	guidance_scale=1.0,
	guidance_scale_2=1.0,
	num_inference_steps=int(steps),
	generator=torch.Generator(device="cuda").manual_seed(current_seed),
	).frames[0]

	# Apply text overlay to each frame
	processed_frames = []
	for frame in output_frames:
	if isinstance(frame, np.ndarray):
	frame = Image.fromarray(frame)

	# Scale font for video dimensions
	video_font_size = font_size * (resized_image.width / 1024)

	frame_with_text = add_glass_overlay(
	frame, text, footer, overlay_style,
	video_font_size,
	enable_overlay
	)
	processed_frames.append(frame_with_text)

	# Export video
	temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
	export_to_video(processed_frames, temp_file.name, fps=VIDEO_FIXED_FPS)

	# Cleanup
	gc.collect()
	torch.cuda.empty_cache()

	return temp_file.name, current_seed

	# ═══════════════════════════════════════════════════════════════════════════════
	# 🎬 MAIN GENERATION FUNCTION
	# ═══════════════════════════════════════════════════════════════════════════════
	def generate_all(recipient, occasion, visual_style, message, overlay_style,
	font_size, enable_overlay, enable_music, music_style,
	enable_video, video_duration, video_steps,
	progress=gr.Progress()):

	if not recipient:
	recipient = "Friend"
	if not message:
	message = f"Wishing you a wonderful {occasion}!"

	footer = f"For {recipient} \| Crittiks Global"
	status = []
	seed_used = None

	# === IMAGE (Z-Image-Turbo - 8 steps!) ===
	progress(0.1, desc="Generating image with Z-Image-Turbo (8-step fast)...")
	try:
	style_desc = STYLES.get(visual_style, "beautiful elegant cinematic")
	img_prompt = f"Beautiful {occasion} greeting card background, artistic composition"
	image, seed_used = generate_image_gpu(img_prompt, style_desc)
	status.append("Image OK")
	except Exception as e:
	return None, None, None, f"Image error: {str(e)}", None

	# === OVERLAY ===
	progress(0.3, desc="Adding text overlay...")
	final_image = add_glass_overlay(image, message, footer, overlay_style, font_size, enable_overlay)
	status.append("Overlay OK")

	# === MUSIC (MusicGen) ===
	audio_path = None
	if enable_music:
	progress(0.4, desc="Generating music with MusicGen...")
	try:
	music_prompt = MUSIC_STYLES.get(music_style, "peaceful ambient music")
	audio_path = generate_music_gpu(music_prompt)
	status.append("Music OK")
	except Exception as e:
	status.append(f"Music: {str(e)[:30]}")

	# === VIDEO (Wan 2.2 with FP8+AoTI - 4-6 steps!) ===
	video_path = None
	if enable_video:
	progress(0.6, desc="Generating video with Wan 2.2 I2V (FP8 + Lightning LoRA)...")
	try:
	video_path, _ = generate_video_gpu(
	image, message, footer, overlay_style, font_size, enable_overlay,
	video_duration, video_steps
	)
	status.append("Video OK")
	except Exception as e:
	status.append(f"Video: {str(e)[:50]}")

	progress(1.0, desc="Complete!")

	final_status = " \| ".join(status) + f" \| Seed: {seed_used}"
	return final_image, audio_path, video_path, final_status, seed_used

	# ═══════════════════════════════════════════════════════════════════════════════
	# 🖥️ GRADIO UI
	# ═══════════════════════════════════════════════════════════════════════════════
	custom_theme = gr.themes.Soft(
	primary_hue="cyan",
	secondary_hue="purple",
	neutral_hue="slate",
	).set(
	button_primary_background_fill="*primary_500",
	button_primary_background_fill_hover="*primary_600",
	)

	with gr.Blocks(
	title="Crittiks Global \| ZeroGPU Studio v3.0",
	theme=custom_theme
	) as demo:

	gr.Markdown("""
	# Crittiks Global - ZeroGPU Studio v3.0
	### Premium AI Image, Video & Music Generation

	Models: Stable Diffusion 3.5 Large \| Wan 2.2 I2V 14B \| MusicGen
	""")

	with gr.Row():
	# === LEFT PANEL ===
	with gr.Column(scale=1):

	gr.Markdown("### Card Settings")
	recipient = gr.Textbox(label="Recipient", placeholder="Who is this for?", value="")
	occasion = gr.Dropdown(choices=OCCASIONS, label="Occasion", value="Christmas")
	visual_style = gr.Dropdown(choices=list(STYLES.keys()), label="Visual Style", value="Festive Magic")
	message = gr.Textbox(label="Message", placeholder="Your greeting message...", lines=2, value="")

	gr.Markdown("### Text & Overlay")
	enable_overlay = gr.Checkbox(label="Enable Glass Overlay", value=True)
	overlay_style = gr.Dropdown(
	choices=list(OVERLAY_STYLES.keys()),
	label="Overlay Style",
	value="Frosted Glass"
	)
	font_size = gr.Slider(minimum=24, maximum=72, step=2, value=42, label="Text Size")

	gr.Markdown("### Media Options")
	enable_music = gr.Checkbox(label="Generate Music (MusicGen)", value=False)
	music_style = gr.Dropdown(
	choices=list(MUSIC_STYLES.keys()),
	label="Music Style",
	value="Peaceful Piano"
	)

	gr.Markdown("### Video Settings (Wan 2.2 I2V)")
	enable_video = gr.Checkbox(label="Generate Video (4-6 step fast!)", value=False)
	video_duration = gr.Slider(
	minimum=1.0, maximum=5.0, step=0.5, value=3.0,
	label="Video Duration (seconds)"
	)
	video_steps = gr.Slider(
	minimum=4, maximum=12, step=1, value=6,
	label="Video Steps (4-6 recommended)"
	)

	generate_btn = gr.Button("GENERATE", variant="primary", size="lg")
	seed_output = gr.Number(label="Seed Used", interactive=False)

	# === RIGHT PANEL ===
	with gr.Column(scale=2):

	gr.Markdown("### Live Preview")
	preview_image = gr.Image(label="Preview (updates live)", type="pil", height=200)

	gr.Markdown("### Generated Content")
	with gr.Tabs():
	with gr.TabItem("Card"):
	output_image = gr.Image(label="Your Card", type="pil", height=450)
	with gr.TabItem("Video"):
	output_video = gr.Video(label="AI Video", height=450, autoplay=True)
	with gr.TabItem("Music"):
	output_audio = gr.Audio(label="Generated Music", type="filepath")

	output_status = gr.Textbox(label="Status")

	# === EVENT HANDLERS ===
	preview_inputs = [message, overlay_style, font_size, enable_overlay, recipient]

	message.change(fn=update_preview, inputs=preview_inputs, outputs=preview_image)
	overlay_style.change(fn=update_preview, inputs=preview_inputs, outputs=preview_image)
	font_size.change(fn=update_preview, inputs=preview_inputs, outputs=preview_image)
	enable_overlay.change(fn=update_preview, inputs=preview_inputs, outputs=preview_image)
	recipient.change(fn=update_preview, inputs=preview_inputs, outputs=preview_image)

	generate_btn.click(
	fn=generate_all,
	inputs=[
	recipient, occasion, visual_style, message, overlay_style,
	font_size, enable_overlay, enable_music, music_style,
	enable_video, video_duration, video_steps
	],
	outputs=[output_image, output_audio, output_video, output_status, seed_output]
	)

	demo.load(fn=update_preview, inputs=preview_inputs, outputs=preview_image)

	gr.Markdown("""
	---
	Performance:
	- Image: ~10-15 seconds (SD 3.5 Large, 28 steps)
	- Video: ~30-60 seconds (Wan 2.2 I2V)
	- First generation loads models, then faster

	Model Credits:
	- [stabilityai/stable-diffusion-3.5-large](https://huggingface.co/stabilityai/stable-diffusion-3.5-large)
	- [Wan-AI/Wan2.2-I2V-A14B](https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers)
	""")

	demo.queue()

	if __name__ == "__main__":
	# Require HuggingFace login for ZeroGPU quota
	demo.launch()