# ═══════════════════════════════════════════════════════════════════════════════
# ⚡ CRITTIKS GLOBAL - ZEROGPU CREATIVE STUDIO v3.0 ULTIMATE
# ═══════════════════════════════════════════════════════════════════════════════
# BLEEDING-EDGE MODELS:
# - Z-Image-Turbo: 8-step ultra-fast image generation
# - Wan 2.2 I2V 14B: FP8 quantized + AoTI + Lightning LoRA (4-step video!)
# - MusicGen: AI music generation
# ═══════════════════════════════════════════════════════════════════════════════

import os
import gc
import spaces
import torch
import numpy as np
from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageEnhance
import textwrap
import tempfile
import random
import gradio as gr

# ═══════════════════════════════════════════════════════════════════════════════
# 📦 GLOBAL MODEL HOLDERS
# ═══════════════════════════════════════════════════════════════════════════════
music_model = None
music_processor = None
video_pipe = None
video_loaded = False

# ═══════════════════════════════════════════════════════════════════════════════
# 🎨 CONFIGURATION
# ═══════════════════════════════════════════════════════════════════════════════
MAX_SEED = np.iinfo(np.int32).max

# Video settings for Wan 2.2
VIDEO_MAX_DIM = 832
VIDEO_MIN_DIM = 480
VIDEO_SQUARE_DIM = 640
VIDEO_MULTIPLE_OF = 16
VIDEO_FIXED_FPS = 16
VIDEO_MIN_FRAMES = 8
VIDEO_MAX_FRAMES = 80

# Image settings for Z-Image-Turbo
IMAGE_DEFAULT_SIZE = 1024

# Sample preview image
def create_sample_image():
    """Create a sample gradient image for preview"""
    img = Image.new('RGB', (512, 512))
    for y in range(512):
        for x in range(512):
            r = int(50 + (x / 512) * 100)
            g = int(30 + (y / 512) * 80)
            b = int(80 + ((x + y) / 1024) * 120)
            img.putpixel((x, y), (r, g, b))
    return img

SAMPLE_IMAGE = create_sample_image()

# ═══════════════════════════════════════════════════════════════════════════════
# 🎨 STYLE & OPTION CONFIGURATIONS
# ═══════════════════════════════════════════════════════════════════════════════
OCCASIONS = [
    "Christmas", "New Year", "Birthday", "Wedding", "Valentine's Day",
    "Anniversary", "Graduation", "Thank You", "Diwali", "Eid",
    "Mother's Day", "Father's Day", "Halloween", "Easter"
]

STYLES = {
    "Festive Magic": "magical christmas scene, golden lights, snow, warm cozy atmosphere, cinematic",
    "Luxury Gold": "luxurious black and gold, elegant marble, premium design, sophisticated",
    "Soft Dreams": "soft pastel colors, dreamy clouds, ethereal glow, romantic",
    "Neon Future": "cyberpunk neon lights, futuristic city, purple cyan pink, sci-fi",
    "Nature Beauty": "beautiful nature, flowers, green forest, sunlight rays, peaceful",
    "Ocean Calm": "serene ocean sunset, beach, turquoise water, golden hour",
    "Cosmic Galaxy": "cosmic nebula, stars, aurora borealis, space, mystical",
    "Dark Elegance": "dark moody aesthetic, silver accents, dramatic lighting, premium"
}

OVERLAY_STYLES = {
    "None": None,
    "Frosted Glass": {"color": (20, 30, 50), "opacity": 0.6},
    "Dark Luxury": {"color": (0, 0, 0), "opacity": 0.7},
    "Light Dream": {"color": (255, 255, 255), "opacity": 0.4},
    "Neon Glow": {"color": (30, 0, 60), "opacity": 0.65},
    "Forest Green": {"color": (20, 50, 30), "opacity": 0.6},
    "Sunset Warm": {"color": (70, 35, 20), "opacity": 0.6},
    "Ocean Blue": {"color": (20, 40, 70), "opacity": 0.55}
}

MUSIC_STYLES = {
    "Peaceful Piano": "peaceful piano melody, emotional, cinematic, gentle",
    "Acoustic Guitar": "warm acoustic guitar, soft strumming, heartfelt melody",
    "Epic Orchestra": "epic orchestral music, cinematic strings, powerful",
    "Lo-Fi Chill": "lofi hip hop beats, relaxing, chill vibes, jazzy",
    "Ambient Space": "ambient space music, ethereal pads, dreamy atmosphere",
    "Holiday Magic": "christmas holiday music, bells, magical festive cheer"
}

# ═══════════════════════════════════════════════════════════════════════════════
# 🎨 GLASS OVERLAY FUNCTION
# ═══════════════════════════════════════════════════════════════════════════════
def add_glass_overlay(image, text, footer, overlay_style="Frosted Glass", font_size=42, enable_overlay=True):
    """Add glass overlay with customizable text size"""
    if image.mode != 'RGB':
        image = image.convert('RGB')

    width, height = image.size
    result = image.copy()
    draw = ImageDraw.Draw(result)

    style = OVERLAY_STYLES.get(overlay_style)

    # Calculate panel dimensions
    margin = int(width * 0.04)
    panel_height = int(height * 0.28)
    panel_top = height - panel_height - margin
    panel_bottom = panel_top + panel_height
    panel_left = margin
    panel_right = width - margin

    # Apply overlay panel if enabled and style exists
    if enable_overlay and style is not None:
        region = image.crop((panel_left, panel_top, panel_right, panel_bottom))
        blurred = region.filter(ImageFilter.GaussianBlur(radius=20))
        blurred = ImageEnhance.Brightness(blurred).enhance(0.6)

        color_overlay = Image.new('RGB', (panel_right - panel_left, panel_height), style["color"])
        blended = Image.blend(blurred, color_overlay, style["opacity"])

        result.paste(blended, (panel_left, panel_top))
        draw = ImageDraw.Draw(result)

        for i in range(4):
            draw.rectangle(
                [(panel_left + i, panel_top + i), (panel_right - i, panel_bottom - i)],
                outline=(255, 255, 255)
            )

    # Load font with custom size
    try:
        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", int(font_size))
        small_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", int(font_size * 0.45))
    except:
        font = ImageFont.load_default()
        small_font = font

    # Calculate text wrapping
    avg_char_width = font_size * 0.55
    max_chars = max(15, int((panel_right - panel_left - 40) / avg_char_width))
    lines = textwrap.wrap(text, width=max_chars)

    line_height = int(font_size * 1.25)
    total_height = len(lines) * line_height
    text_y = panel_top + (panel_height - total_height) // 2 - 10

    # Draw text with shadow
    for line in lines:
        bbox = draw.textbbox((0, 0), line, font=font)
        text_x = (width - (bbox[2] - bbox[0])) // 2
        draw.text((text_x + 2, text_y + 2), line, font=font, fill=(0, 0, 0))
        draw.text((text_x + 1, text_y + 1), line, font=font, fill=(0, 0, 0))
        draw.text((text_x, text_y), line, font=font, fill=(255, 255, 255))
        text_y += line_height

    # Draw footer
    if footer:
        bbox = draw.textbbox((0, 0), footer, font=small_font)
        footer_x = (width - (bbox[2] - bbox[0])) // 2
        draw.text((footer_x + 1, panel_bottom - 30 + 1), footer, font=small_font, fill=(0, 0, 0))
        draw.text((footer_x, panel_bottom - 30), footer, font=small_font, fill=(200, 200, 200))

    return result

# ═══════════════════════════════════════════════════════════════════════════════
# 👁️ LIVE PREVIEW (No GPU)
# ═══════════════════════════════════════════════════════════════════════════════
def update_preview(message, overlay_style, font_size, enable_overlay, recipient):
    """Generate live preview with current settings"""
    if not message:
        message = "Your message here..."
    if not recipient:
        recipient = "Friend"

    footer = f"For {recipient} | Crittiks Global"

    preview = add_glass_overlay(
        SAMPLE_IMAGE.copy(),
        message,
        footer,
        overlay_style,
        font_size,
        enable_overlay
    )

    return preview

# ═══════════════════════════════════════════════════════════════════════════════
# 🖼️ STABLE DIFFUSION 3.5 LARGE - High Quality Image Generation
# ═══════════════════════════════════════════════════════════════════════════════
image_pipe = None

@spaces.GPU
def generate_image_gpu(prompt, style_desc, height=1024, width=1024, steps=28, seed=None, randomize_seed=True):
    """Generate image using Stable Diffusion 3.5 Large"""
    global image_pipe
    from diffusers import StableDiffusion3Pipeline

    if image_pipe is None:
        print("Loading Stable Diffusion 3.5 Large...")
        image_pipe = StableDiffusion3Pipeline.from_pretrained(
            "stabilityai/stable-diffusion-3.5-large",
            torch_dtype=torch.bfloat16,
        )
        image_pipe.to("cuda")
        print("SD 3.5 Large loaded!")

    # Handle seed
    if randomize_seed or seed is None:
        seed = torch.randint(0, 2**32 - 1, (1,)).item()

    generator = torch.Generator("cuda").manual_seed(int(seed))

    full_prompt = f"{prompt}, {style_desc}, masterpiece, ultra detailed, 8k quality, cinematic lighting, professional photography, NO TEXT NO WORDS NO LETTERS"

    image = image_pipe(
        prompt=full_prompt,
        num_inference_steps=28,
        guidance_scale=3.5,
        generator=generator,
    ).images[0]

    return image, seed

# ═══════════════════════════════════════════════════════════════════════════════
# 🎵 MUSICGEN: AI Music Generation
# ═══════════════════════════════════════════════════════════════════════════════
@spaces.GPU
def generate_music_gpu(prompt):
    """Generate music using MusicGen"""
    global music_model, music_processor
    from transformers import AutoProcessor, MusicgenForConditionalGeneration
    import scipy.io.wavfile as wavfile

    if music_model is None:
        print("Loading MusicGen...")
        music_processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
        music_model = MusicgenForConditionalGeneration.from_pretrained(
            "facebook/musicgen-small",
            torch_dtype=torch.float16
        )
        music_model.to("cuda")
        print("MusicGen loaded!")

    inputs = music_processor(text=[prompt], padding=True, return_tensors="pt").to("cuda")
    audio_values = music_model.generate(**inputs, max_new_tokens=256, do_sample=True)

    sampling_rate = music_model.config.audio_encoder.sampling_rate
    audio_data = audio_values[0, 0].cpu().numpy()
    audio_data = audio_data / np.max(np.abs(audio_data))
    audio_data = (audio_data * 32767).astype(np.int16)

    temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
    wavfile.write(temp_file.name, sampling_rate, audio_data)

    return temp_file.name

# ═══════════════════════════════════════════════════════════════════════════════
# 🎬 WAN 2.2 I2V: FP8 + AoTI + Lightning LoRA (4-Step Video!)
# ═══════════════════════════════════════════════════════════════════════════════
def resize_image_for_video(image: Image.Image) -> Image.Image:
    """Resize image for Wan 2.2 video generation"""
    width, height = image.size

    if width == height:
        return image.resize((VIDEO_SQUARE_DIM, VIDEO_SQUARE_DIM), Image.LANCZOS)

    aspect_ratio = width / height
    MAX_ASPECT_RATIO = VIDEO_MAX_DIM / VIDEO_MIN_DIM
    MIN_ASPECT_RATIO = VIDEO_MIN_DIM / VIDEO_MAX_DIM

    image_to_resize = image

    if aspect_ratio > MAX_ASPECT_RATIO:
        crop_width = int(round(height * MAX_ASPECT_RATIO))
        left = (width - crop_width) // 2
        image_to_resize = image.crop((left, 0, left + crop_width, height))
    elif aspect_ratio < MIN_ASPECT_RATIO:
        crop_height = int(round(width / MIN_ASPECT_RATIO))
        top = (height - crop_height) // 2
        image_to_resize = image.crop((0, top, width, top + crop_height))

    if width > height:
        target_w = VIDEO_MAX_DIM
        target_h = int(round(target_w / aspect_ratio))
    else:
        target_h = VIDEO_MAX_DIM
        target_w = int(round(target_h * aspect_ratio))

    final_w = round(target_w / VIDEO_MULTIPLE_OF) * VIDEO_MULTIPLE_OF
    final_h = round(target_h / VIDEO_MULTIPLE_OF) * VIDEO_MULTIPLE_OF

    final_w = max(VIDEO_MIN_DIM, min(VIDEO_MAX_DIM, final_w))
    final_h = max(VIDEO_MIN_DIM, min(VIDEO_MAX_DIM, final_h))

    return image_to_resize.resize((final_w, final_h), Image.LANCZOS)

def get_num_frames(duration_seconds: float):
    """Calculate number of frames from duration"""
    return 1 + int(np.clip(
        int(round(duration_seconds * VIDEO_FIXED_FPS)),
        VIDEO_MIN_FRAMES,
        VIDEO_MAX_FRAMES,
    ))

def load_video_pipeline():
    """Load Wan 2.2 I2V pipeline with FP8 quantization and AoTI"""
    global video_pipe, video_loaded

    if video_loaded:
        return video_pipe

    print("Loading Wan 2.2 I2V 14B with FP8 + AoTI optimization...")

    from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
    from diffusers.models.transformers.transformer_wan import WanTransformer3DModel

    MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"

    # Load pipeline with bf16 transformers
    video_pipe = WanImageToVideoPipeline.from_pretrained(
        MODEL_ID,
        transformer=WanTransformer3DModel.from_pretrained(
            'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
            subfolder='transformer',
            torch_dtype=torch.bfloat16,
            device_map='cuda',
        ),
        transformer_2=WanTransformer3DModel.from_pretrained(
            'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
            subfolder='transformer_2',
            torch_dtype=torch.bfloat16,
            device_map='cuda',
        ),
        torch_dtype=torch.bfloat16,
    ).to('cuda')

    # Load Lightning LoRA for fast inference
    print("Loading Lightning LoRA for 4-step inference...")
    video_pipe.load_lora_weights(
        "Kijai/WanVideo_comfy",
        weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
        adapter_name="lightx2v"
    )
    video_pipe.load_lora_weights(
        "Kijai/WanVideo_comfy",
        weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
        adapter_name="lightx2v_2",
        load_into_transformer_2=True
    )
    video_pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
    video_pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
    video_pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
    video_pipe.unload_lora_weights()

    # Apply FP8 quantization
    print("Applying FP8 quantization...")
    from torchao.quantization import quantize_
    from torchao.quantization import Float8DynamicActivationFloat8WeightConfig, Int8WeightOnlyConfig

    quantize_(video_pipe.text_encoder, Int8WeightOnlyConfig())
    quantize_(video_pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
    quantize_(video_pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())

    # Load AoTI compiled blocks
    print("Loading AoTI compiled blocks...")
    import aoti
    aoti.aoti_blocks_load(video_pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
    aoti.aoti_blocks_load(video_pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')

    video_loaded = True
    print("Wan 2.2 I2V fully loaded with all optimizations!")

    return video_pipe

# Dynamic duration calculation
def get_video_duration(image, duration_seconds, steps):
    """Calculate GPU duration based on parameters"""
    BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
    BASE_STEP_DURATION = 15

    resized = resize_image_for_video(image)
    width, height = resized.size
    frames = get_num_frames(duration_seconds)
    factor = frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
    step_duration = BASE_STEP_DURATION * factor ** 1.5
    return 30 + int(steps) * step_duration

@spaces.GPU
def generate_video_gpu(image, text, footer, overlay_style, font_size, enable_overlay,
                       duration_seconds=3.0, steps=6, seed=None, randomize_seed=True):
    """Generate video using Wan 2.2 I2V with text overlay"""
    from diffusers.utils import export_to_video

    if image is None:
        raise gr.Error("Please generate an image first!")

    # Load pipeline
    pipe = load_video_pipeline()

    # Handle seed
    if randomize_seed or seed is None:
        current_seed = random.randint(0, MAX_SEED)
    else:
        current_seed = int(seed)

    # Resize image
    resized_image = resize_image_for_video(image)
    num_frames = get_num_frames(duration_seconds)

    # Video generation prompt
    video_prompt = "make this image come alive, cinematic motion, smooth animation, natural movement"
    negative_prompt = "low quality, worst quality, motion artifacts, jitter, unstable, blurry, static"

    # Generate video frames
    output_frames = pipe(
        image=resized_image,
        prompt=video_prompt,
        negative_prompt=negative_prompt,
        height=resized_image.height,
        width=resized_image.width,
        num_frames=num_frames,
        guidance_scale=1.0,
        guidance_scale_2=1.0,
        num_inference_steps=int(steps),
        generator=torch.Generator(device="cuda").manual_seed(current_seed),
    ).frames[0]

    # Apply text overlay to each frame
    processed_frames = []
    for frame in output_frames:
        if isinstance(frame, np.ndarray):
            frame = Image.fromarray(frame)

        # Scale font for video dimensions
        video_font_size = font_size * (resized_image.width / 1024)

        frame_with_text = add_glass_overlay(
            frame, text, footer, overlay_style,
            video_font_size,
            enable_overlay
        )
        processed_frames.append(frame_with_text)

    # Export video
    temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
    export_to_video(processed_frames, temp_file.name, fps=VIDEO_FIXED_FPS)

    # Cleanup
    gc.collect()
    torch.cuda.empty_cache()

    return temp_file.name, current_seed

# ═══════════════════════════════════════════════════════════════════════════════
# 🎬 MAIN GENERATION FUNCTION
# ═══════════════════════════════════════════════════════════════════════════════
def generate_all(recipient, occasion, visual_style, message, overlay_style,
                 font_size, enable_overlay, enable_music, music_style,
                 enable_video, video_duration, video_steps,
                 progress=gr.Progress()):

    if not recipient:
        recipient = "Friend"
    if not message:
        message = f"Wishing you a wonderful {occasion}!"

    footer = f"For {recipient} | Crittiks Global"
    status = []
    seed_used = None

    # === IMAGE (Z-Image-Turbo - 8 steps!) ===
    progress(0.1, desc="Generating image with Z-Image-Turbo (8-step fast)...")
    try:
        style_desc = STYLES.get(visual_style, "beautiful elegant cinematic")
        img_prompt = f"Beautiful {occasion} greeting card background, artistic composition"
        image, seed_used = generate_image_gpu(img_prompt, style_desc)
        status.append("Image OK")
    except Exception as e:
        return None, None, None, f"Image error: {str(e)}", None

    # === OVERLAY ===
    progress(0.3, desc="Adding text overlay...")
    final_image = add_glass_overlay(image, message, footer, overlay_style, font_size, enable_overlay)
    status.append("Overlay OK")

    # === MUSIC (MusicGen) ===
    audio_path = None
    if enable_music:
        progress(0.4, desc="Generating music with MusicGen...")
        try:
            music_prompt = MUSIC_STYLES.get(music_style, "peaceful ambient music")
            audio_path = generate_music_gpu(music_prompt)
            status.append("Music OK")
        except Exception as e:
            status.append(f"Music: {str(e)[:30]}")

    # === VIDEO (Wan 2.2 with FP8+AoTI - 4-6 steps!) ===
    video_path = None
    if enable_video:
        progress(0.6, desc="Generating video with Wan 2.2 I2V (FP8 + Lightning LoRA)...")
        try:
            video_path, _ = generate_video_gpu(
                image, message, footer, overlay_style, font_size, enable_overlay,
                video_duration, video_steps
            )
            status.append("Video OK")
        except Exception as e:
            status.append(f"Video: {str(e)[:50]}")

    progress(1.0, desc="Complete!")

    final_status = " | ".join(status) + f" | Seed: {seed_used}"
    return final_image, audio_path, video_path, final_status, seed_used

# ═══════════════════════════════════════════════════════════════════════════════
# 🖥️ GRADIO UI
# ═══════════════════════════════════════════════════════════════════════════════
custom_theme = gr.themes.Soft(
    primary_hue="cyan",
    secondary_hue="purple",
    neutral_hue="slate",
).set(
    button_primary_background_fill="*primary_500",
    button_primary_background_fill_hover="*primary_600",
)

with gr.Blocks(
    title="Crittiks Global | ZeroGPU Studio v3.0",
    theme=custom_theme
) as demo:

    gr.Markdown("""
    # Crittiks Global - ZeroGPU Studio v3.0
    ### Premium AI Image, Video & Music Generation

    **Models:** Stable Diffusion 3.5 Large | Wan 2.2 I2V 14B | MusicGen
    """)

    with gr.Row():
        # === LEFT PANEL ===
        with gr.Column(scale=1):

            gr.Markdown("### Card Settings")
            recipient = gr.Textbox(label="Recipient", placeholder="Who is this for?", value="")
            occasion = gr.Dropdown(choices=OCCASIONS, label="Occasion", value="Christmas")
            visual_style = gr.Dropdown(choices=list(STYLES.keys()), label="Visual Style", value="Festive Magic")
            message = gr.Textbox(label="Message", placeholder="Your greeting message...", lines=2, value="")

            gr.Markdown("### Text & Overlay")
            enable_overlay = gr.Checkbox(label="Enable Glass Overlay", value=True)
            overlay_style = gr.Dropdown(
                choices=list(OVERLAY_STYLES.keys()),
                label="Overlay Style",
                value="Frosted Glass"
            )
            font_size = gr.Slider(minimum=24, maximum=72, step=2, value=42, label="Text Size")

            gr.Markdown("### Media Options")
            enable_music = gr.Checkbox(label="Generate Music (MusicGen)", value=False)
            music_style = gr.Dropdown(
                choices=list(MUSIC_STYLES.keys()),
                label="Music Style",
                value="Peaceful Piano"
            )

            gr.Markdown("### Video Settings (Wan 2.2 I2V)")
            enable_video = gr.Checkbox(label="Generate Video (4-6 step fast!)", value=False)
            video_duration = gr.Slider(
                minimum=1.0, maximum=5.0, step=0.5, value=3.0,
                label="Video Duration (seconds)"
            )
            video_steps = gr.Slider(
                minimum=4, maximum=12, step=1, value=6,
                label="Video Steps (4-6 recommended)"
            )

            generate_btn = gr.Button("GENERATE", variant="primary", size="lg")
            seed_output = gr.Number(label="Seed Used", interactive=False)

        # === RIGHT PANEL ===
        with gr.Column(scale=2):

            gr.Markdown("### Live Preview")
            preview_image = gr.Image(label="Preview (updates live)", type="pil", height=200)

            gr.Markdown("### Generated Content")
            with gr.Tabs():
                with gr.TabItem("Card"):
                    output_image = gr.Image(label="Your Card", type="pil", height=450)
                with gr.TabItem("Video"):
                    output_video = gr.Video(label="AI Video", height=450, autoplay=True)
                with gr.TabItem("Music"):
                    output_audio = gr.Audio(label="Generated Music", type="filepath")

            output_status = gr.Textbox(label="Status")

    # === EVENT HANDLERS ===
    preview_inputs = [message, overlay_style, font_size, enable_overlay, recipient]

    message.change(fn=update_preview, inputs=preview_inputs, outputs=preview_image)
    overlay_style.change(fn=update_preview, inputs=preview_inputs, outputs=preview_image)
    font_size.change(fn=update_preview, inputs=preview_inputs, outputs=preview_image)
    enable_overlay.change(fn=update_preview, inputs=preview_inputs, outputs=preview_image)
    recipient.change(fn=update_preview, inputs=preview_inputs, outputs=preview_image)

    generate_btn.click(
        fn=generate_all,
        inputs=[
            recipient, occasion, visual_style, message, overlay_style,
            font_size, enable_overlay, enable_music, music_style,
            enable_video, video_duration, video_steps
        ],
        outputs=[output_image, output_audio, output_video, output_status, seed_output]
    )

    demo.load(fn=update_preview, inputs=preview_inputs, outputs=preview_image)

    gr.Markdown("""
    ---
    **Performance:**
    - Image: ~10-15 seconds (SD 3.5 Large, 28 steps)
    - Video: ~30-60 seconds (Wan 2.2 I2V)
    - First generation loads models, then faster

    **Model Credits:**
    - [stabilityai/stable-diffusion-3.5-large](https://huggingface.co/stabilityai/stable-diffusion-3.5-large)
    - [Wan-AI/Wan2.2-I2V-A14B](https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers)
    """)

demo.queue()

if __name__ == "__main__":
    # Require HuggingFace login for ZeroGPU quota
    demo.launch()