Spaces:
Running
on
Zero
Running
on
Zero
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # ⚡ CRITTIKS GLOBAL - ZEROGPU CREATIVE STUDIO v3.0 ULTIMATE | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # BLEEDING-EDGE MODELS: | |
| # - Z-Image-Turbo: 8-step ultra-fast image generation | |
| # - Wan 2.2 I2V 14B: FP8 quantized + AoTI + Lightning LoRA (4-step video!) | |
| # - MusicGen: AI music generation | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| import os | |
| import gc | |
| import spaces | |
| import torch | |
| import numpy as np | |
| from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageEnhance | |
| import textwrap | |
| import tempfile | |
| import random | |
| import gradio as gr | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # 📦 GLOBAL MODEL HOLDERS | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| music_model = None | |
| music_processor = None | |
| video_pipe = None | |
| video_loaded = False | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # 🎨 CONFIGURATION | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| MAX_SEED = np.iinfo(np.int32).max | |
| # Video settings for Wan 2.2 | |
| VIDEO_MAX_DIM = 832 | |
| VIDEO_MIN_DIM = 480 | |
| VIDEO_SQUARE_DIM = 640 | |
| VIDEO_MULTIPLE_OF = 16 | |
| VIDEO_FIXED_FPS = 16 | |
| VIDEO_MIN_FRAMES = 8 | |
| VIDEO_MAX_FRAMES = 80 | |
| # Image settings for Z-Image-Turbo | |
| IMAGE_DEFAULT_SIZE = 1024 | |
| # Sample preview image | |
| def create_sample_image(): | |
| """Create a sample gradient image for preview""" | |
| img = Image.new('RGB', (512, 512)) | |
| for y in range(512): | |
| for x in range(512): | |
| r = int(50 + (x / 512) * 100) | |
| g = int(30 + (y / 512) * 80) | |
| b = int(80 + ((x + y) / 1024) * 120) | |
| img.putpixel((x, y), (r, g, b)) | |
| return img | |
| SAMPLE_IMAGE = create_sample_image() | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # 🎨 STYLE & OPTION CONFIGURATIONS | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| OCCASIONS = [ | |
| "Christmas", "New Year", "Birthday", "Wedding", "Valentine's Day", | |
| "Anniversary", "Graduation", "Thank You", "Diwali", "Eid", | |
| "Mother's Day", "Father's Day", "Halloween", "Easter" | |
| ] | |
| STYLES = { | |
| "Festive Magic": "magical christmas scene, golden lights, snow, warm cozy atmosphere, cinematic", | |
| "Luxury Gold": "luxurious black and gold, elegant marble, premium design, sophisticated", | |
| "Soft Dreams": "soft pastel colors, dreamy clouds, ethereal glow, romantic", | |
| "Neon Future": "cyberpunk neon lights, futuristic city, purple cyan pink, sci-fi", | |
| "Nature Beauty": "beautiful nature, flowers, green forest, sunlight rays, peaceful", | |
| "Ocean Calm": "serene ocean sunset, beach, turquoise water, golden hour", | |
| "Cosmic Galaxy": "cosmic nebula, stars, aurora borealis, space, mystical", | |
| "Dark Elegance": "dark moody aesthetic, silver accents, dramatic lighting, premium" | |
| } | |
| OVERLAY_STYLES = { | |
| "None": None, | |
| "Frosted Glass": {"color": (20, 30, 50), "opacity": 0.6}, | |
| "Dark Luxury": {"color": (0, 0, 0), "opacity": 0.7}, | |
| "Light Dream": {"color": (255, 255, 255), "opacity": 0.4}, | |
| "Neon Glow": {"color": (30, 0, 60), "opacity": 0.65}, | |
| "Forest Green": {"color": (20, 50, 30), "opacity": 0.6}, | |
| "Sunset Warm": {"color": (70, 35, 20), "opacity": 0.6}, | |
| "Ocean Blue": {"color": (20, 40, 70), "opacity": 0.55} | |
| } | |
| MUSIC_STYLES = { | |
| "Peaceful Piano": "peaceful piano melody, emotional, cinematic, gentle", | |
| "Acoustic Guitar": "warm acoustic guitar, soft strumming, heartfelt melody", | |
| "Epic Orchestra": "epic orchestral music, cinematic strings, powerful", | |
| "Lo-Fi Chill": "lofi hip hop beats, relaxing, chill vibes, jazzy", | |
| "Ambient Space": "ambient space music, ethereal pads, dreamy atmosphere", | |
| "Holiday Magic": "christmas holiday music, bells, magical festive cheer" | |
| } | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # 🎨 GLASS OVERLAY FUNCTION | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| def add_glass_overlay(image, text, footer, overlay_style="Frosted Glass", font_size=42, enable_overlay=True): | |
| """Add glass overlay with customizable text size""" | |
| if image.mode != 'RGB': | |
| image = image.convert('RGB') | |
| width, height = image.size | |
| result = image.copy() | |
| draw = ImageDraw.Draw(result) | |
| style = OVERLAY_STYLES.get(overlay_style) | |
| # Calculate panel dimensions | |
| margin = int(width * 0.04) | |
| panel_height = int(height * 0.28) | |
| panel_top = height - panel_height - margin | |
| panel_bottom = panel_top + panel_height | |
| panel_left = margin | |
| panel_right = width - margin | |
| # Apply overlay panel if enabled and style exists | |
| if enable_overlay and style is not None: | |
| region = image.crop((panel_left, panel_top, panel_right, panel_bottom)) | |
| blurred = region.filter(ImageFilter.GaussianBlur(radius=20)) | |
| blurred = ImageEnhance.Brightness(blurred).enhance(0.6) | |
| color_overlay = Image.new('RGB', (panel_right - panel_left, panel_height), style["color"]) | |
| blended = Image.blend(blurred, color_overlay, style["opacity"]) | |
| result.paste(blended, (panel_left, panel_top)) | |
| draw = ImageDraw.Draw(result) | |
| for i in range(4): | |
| draw.rectangle( | |
| [(panel_left + i, panel_top + i), (panel_right - i, panel_bottom - i)], | |
| outline=(255, 255, 255) | |
| ) | |
| # Load font with custom size | |
| try: | |
| font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", int(font_size)) | |
| small_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", int(font_size * 0.45)) | |
| except: | |
| font = ImageFont.load_default() | |
| small_font = font | |
| # Calculate text wrapping | |
| avg_char_width = font_size * 0.55 | |
| max_chars = max(15, int((panel_right - panel_left - 40) / avg_char_width)) | |
| lines = textwrap.wrap(text, width=max_chars) | |
| line_height = int(font_size * 1.25) | |
| total_height = len(lines) * line_height | |
| text_y = panel_top + (panel_height - total_height) // 2 - 10 | |
| # Draw text with shadow | |
| for line in lines: | |
| bbox = draw.textbbox((0, 0), line, font=font) | |
| text_x = (width - (bbox[2] - bbox[0])) // 2 | |
| draw.text((text_x + 2, text_y + 2), line, font=font, fill=(0, 0, 0)) | |
| draw.text((text_x + 1, text_y + 1), line, font=font, fill=(0, 0, 0)) | |
| draw.text((text_x, text_y), line, font=font, fill=(255, 255, 255)) | |
| text_y += line_height | |
| # Draw footer | |
| if footer: | |
| bbox = draw.textbbox((0, 0), footer, font=small_font) | |
| footer_x = (width - (bbox[2] - bbox[0])) // 2 | |
| draw.text((footer_x + 1, panel_bottom - 30 + 1), footer, font=small_font, fill=(0, 0, 0)) | |
| draw.text((footer_x, panel_bottom - 30), footer, font=small_font, fill=(200, 200, 200)) | |
| return result | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # 👁️ LIVE PREVIEW (No GPU) | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| def update_preview(message, overlay_style, font_size, enable_overlay, recipient): | |
| """Generate live preview with current settings""" | |
| if not message: | |
| message = "Your message here..." | |
| if not recipient: | |
| recipient = "Friend" | |
| footer = f"For {recipient} | Crittiks Global" | |
| preview = add_glass_overlay( | |
| SAMPLE_IMAGE.copy(), | |
| message, | |
| footer, | |
| overlay_style, | |
| font_size, | |
| enable_overlay | |
| ) | |
| return preview | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # 🖼️ STABLE DIFFUSION 3.5 LARGE - High Quality Image Generation | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| image_pipe = None | |
| def generate_image_gpu(prompt, style_desc, height=1024, width=1024, steps=28, seed=None, randomize_seed=True): | |
| """Generate image using Stable Diffusion 3.5 Large""" | |
| global image_pipe | |
| from diffusers import StableDiffusion3Pipeline | |
| if image_pipe is None: | |
| print("Loading Stable Diffusion 3.5 Large...") | |
| image_pipe = StableDiffusion3Pipeline.from_pretrained( | |
| "stabilityai/stable-diffusion-3.5-large", | |
| torch_dtype=torch.bfloat16, | |
| ) | |
| image_pipe.to("cuda") | |
| print("SD 3.5 Large loaded!") | |
| # Handle seed | |
| if randomize_seed or seed is None: | |
| seed = torch.randint(0, 2**32 - 1, (1,)).item() | |
| generator = torch.Generator("cuda").manual_seed(int(seed)) | |
| full_prompt = f"{prompt}, {style_desc}, masterpiece, ultra detailed, 8k quality, cinematic lighting, professional photography, NO TEXT NO WORDS NO LETTERS" | |
| image = image_pipe( | |
| prompt=full_prompt, | |
| num_inference_steps=28, | |
| guidance_scale=3.5, | |
| generator=generator, | |
| ).images[0] | |
| return image, seed | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # 🎵 MUSICGEN: AI Music Generation | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| def generate_music_gpu(prompt): | |
| """Generate music using MusicGen""" | |
| global music_model, music_processor | |
| from transformers import AutoProcessor, MusicgenForConditionalGeneration | |
| import scipy.io.wavfile as wavfile | |
| if music_model is None: | |
| print("Loading MusicGen...") | |
| music_processor = AutoProcessor.from_pretrained("facebook/musicgen-small") | |
| music_model = MusicgenForConditionalGeneration.from_pretrained( | |
| "facebook/musicgen-small", | |
| torch_dtype=torch.float16 | |
| ) | |
| music_model.to("cuda") | |
| print("MusicGen loaded!") | |
| inputs = music_processor(text=[prompt], padding=True, return_tensors="pt").to("cuda") | |
| audio_values = music_model.generate(**inputs, max_new_tokens=256, do_sample=True) | |
| sampling_rate = music_model.config.audio_encoder.sampling_rate | |
| audio_data = audio_values[0, 0].cpu().numpy() | |
| audio_data = audio_data / np.max(np.abs(audio_data)) | |
| audio_data = (audio_data * 32767).astype(np.int16) | |
| temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) | |
| wavfile.write(temp_file.name, sampling_rate, audio_data) | |
| return temp_file.name | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # 🎬 WAN 2.2 I2V: FP8 + AoTI + Lightning LoRA (4-Step Video!) | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| def resize_image_for_video(image: Image.Image) -> Image.Image: | |
| """Resize image for Wan 2.2 video generation""" | |
| width, height = image.size | |
| if width == height: | |
| return image.resize((VIDEO_SQUARE_DIM, VIDEO_SQUARE_DIM), Image.LANCZOS) | |
| aspect_ratio = width / height | |
| MAX_ASPECT_RATIO = VIDEO_MAX_DIM / VIDEO_MIN_DIM | |
| MIN_ASPECT_RATIO = VIDEO_MIN_DIM / VIDEO_MAX_DIM | |
| image_to_resize = image | |
| if aspect_ratio > MAX_ASPECT_RATIO: | |
| crop_width = int(round(height * MAX_ASPECT_RATIO)) | |
| left = (width - crop_width) // 2 | |
| image_to_resize = image.crop((left, 0, left + crop_width, height)) | |
| elif aspect_ratio < MIN_ASPECT_RATIO: | |
| crop_height = int(round(width / MIN_ASPECT_RATIO)) | |
| top = (height - crop_height) // 2 | |
| image_to_resize = image.crop((0, top, width, top + crop_height)) | |
| if width > height: | |
| target_w = VIDEO_MAX_DIM | |
| target_h = int(round(target_w / aspect_ratio)) | |
| else: | |
| target_h = VIDEO_MAX_DIM | |
| target_w = int(round(target_h * aspect_ratio)) | |
| final_w = round(target_w / VIDEO_MULTIPLE_OF) * VIDEO_MULTIPLE_OF | |
| final_h = round(target_h / VIDEO_MULTIPLE_OF) * VIDEO_MULTIPLE_OF | |
| final_w = max(VIDEO_MIN_DIM, min(VIDEO_MAX_DIM, final_w)) | |
| final_h = max(VIDEO_MIN_DIM, min(VIDEO_MAX_DIM, final_h)) | |
| return image_to_resize.resize((final_w, final_h), Image.LANCZOS) | |
| def get_num_frames(duration_seconds: float): | |
| """Calculate number of frames from duration""" | |
| return 1 + int(np.clip( | |
| int(round(duration_seconds * VIDEO_FIXED_FPS)), | |
| VIDEO_MIN_FRAMES, | |
| VIDEO_MAX_FRAMES, | |
| )) | |
| def load_video_pipeline(): | |
| """Load Wan 2.2 I2V pipeline with FP8 quantization and AoTI""" | |
| global video_pipe, video_loaded | |
| if video_loaded: | |
| return video_pipe | |
| print("Loading Wan 2.2 I2V 14B with FP8 + AoTI optimization...") | |
| from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline | |
| from diffusers.models.transformers.transformer_wan import WanTransformer3DModel | |
| MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers" | |
| # Load pipeline with bf16 transformers | |
| video_pipe = WanImageToVideoPipeline.from_pretrained( | |
| MODEL_ID, | |
| transformer=WanTransformer3DModel.from_pretrained( | |
| 'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers', | |
| subfolder='transformer', | |
| torch_dtype=torch.bfloat16, | |
| device_map='cuda', | |
| ), | |
| transformer_2=WanTransformer3DModel.from_pretrained( | |
| 'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers', | |
| subfolder='transformer_2', | |
| torch_dtype=torch.bfloat16, | |
| device_map='cuda', | |
| ), | |
| torch_dtype=torch.bfloat16, | |
| ).to('cuda') | |
| # Load Lightning LoRA for fast inference | |
| print("Loading Lightning LoRA for 4-step inference...") | |
| video_pipe.load_lora_weights( | |
| "Kijai/WanVideo_comfy", | |
| weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors", | |
| adapter_name="lightx2v" | |
| ) | |
| video_pipe.load_lora_weights( | |
| "Kijai/WanVideo_comfy", | |
| weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors", | |
| adapter_name="lightx2v_2", | |
| load_into_transformer_2=True | |
| ) | |
| video_pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.]) | |
| video_pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"]) | |
| video_pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"]) | |
| video_pipe.unload_lora_weights() | |
| # Apply FP8 quantization | |
| print("Applying FP8 quantization...") | |
| from torchao.quantization import quantize_ | |
| from torchao.quantization import Float8DynamicActivationFloat8WeightConfig, Int8WeightOnlyConfig | |
| quantize_(video_pipe.text_encoder, Int8WeightOnlyConfig()) | |
| quantize_(video_pipe.transformer, Float8DynamicActivationFloat8WeightConfig()) | |
| quantize_(video_pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig()) | |
| # Load AoTI compiled blocks | |
| print("Loading AoTI compiled blocks...") | |
| import aoti | |
| aoti.aoti_blocks_load(video_pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da') | |
| aoti.aoti_blocks_load(video_pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da') | |
| video_loaded = True | |
| print("Wan 2.2 I2V fully loaded with all optimizations!") | |
| return video_pipe | |
| # Dynamic duration calculation | |
| def get_video_duration(image, duration_seconds, steps): | |
| """Calculate GPU duration based on parameters""" | |
| BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624 | |
| BASE_STEP_DURATION = 15 | |
| resized = resize_image_for_video(image) | |
| width, height = resized.size | |
| frames = get_num_frames(duration_seconds) | |
| factor = frames * width * height / BASE_FRAMES_HEIGHT_WIDTH | |
| step_duration = BASE_STEP_DURATION * factor ** 1.5 | |
| return 30 + int(steps) * step_duration | |
| def generate_video_gpu(image, text, footer, overlay_style, font_size, enable_overlay, | |
| duration_seconds=3.0, steps=6, seed=None, randomize_seed=True): | |
| """Generate video using Wan 2.2 I2V with text overlay""" | |
| from diffusers.utils import export_to_video | |
| if image is None: | |
| raise gr.Error("Please generate an image first!") | |
| # Load pipeline | |
| pipe = load_video_pipeline() | |
| # Handle seed | |
| if randomize_seed or seed is None: | |
| current_seed = random.randint(0, MAX_SEED) | |
| else: | |
| current_seed = int(seed) | |
| # Resize image | |
| resized_image = resize_image_for_video(image) | |
| num_frames = get_num_frames(duration_seconds) | |
| # Video generation prompt | |
| video_prompt = "make this image come alive, cinematic motion, smooth animation, natural movement" | |
| negative_prompt = "low quality, worst quality, motion artifacts, jitter, unstable, blurry, static" | |
| # Generate video frames | |
| output_frames = pipe( | |
| image=resized_image, | |
| prompt=video_prompt, | |
| negative_prompt=negative_prompt, | |
| height=resized_image.height, | |
| width=resized_image.width, | |
| num_frames=num_frames, | |
| guidance_scale=1.0, | |
| guidance_scale_2=1.0, | |
| num_inference_steps=int(steps), | |
| generator=torch.Generator(device="cuda").manual_seed(current_seed), | |
| ).frames[0] | |
| # Apply text overlay to each frame | |
| processed_frames = [] | |
| for frame in output_frames: | |
| if isinstance(frame, np.ndarray): | |
| frame = Image.fromarray(frame) | |
| # Scale font for video dimensions | |
| video_font_size = font_size * (resized_image.width / 1024) | |
| frame_with_text = add_glass_overlay( | |
| frame, text, footer, overlay_style, | |
| video_font_size, | |
| enable_overlay | |
| ) | |
| processed_frames.append(frame_with_text) | |
| # Export video | |
| temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) | |
| export_to_video(processed_frames, temp_file.name, fps=VIDEO_FIXED_FPS) | |
| # Cleanup | |
| gc.collect() | |
| torch.cuda.empty_cache() | |
| return temp_file.name, current_seed | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # 🎬 MAIN GENERATION FUNCTION | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| def generate_all(recipient, occasion, visual_style, message, overlay_style, | |
| font_size, enable_overlay, enable_music, music_style, | |
| enable_video, video_duration, video_steps, | |
| progress=gr.Progress()): | |
| if not recipient: | |
| recipient = "Friend" | |
| if not message: | |
| message = f"Wishing you a wonderful {occasion}!" | |
| footer = f"For {recipient} | Crittiks Global" | |
| status = [] | |
| seed_used = None | |
| # === IMAGE (Z-Image-Turbo - 8 steps!) === | |
| progress(0.1, desc="Generating image with Z-Image-Turbo (8-step fast)...") | |
| try: | |
| style_desc = STYLES.get(visual_style, "beautiful elegant cinematic") | |
| img_prompt = f"Beautiful {occasion} greeting card background, artistic composition" | |
| image, seed_used = generate_image_gpu(img_prompt, style_desc) | |
| status.append("Image OK") | |
| except Exception as e: | |
| return None, None, None, f"Image error: {str(e)}", None | |
| # === OVERLAY === | |
| progress(0.3, desc="Adding text overlay...") | |
| final_image = add_glass_overlay(image, message, footer, overlay_style, font_size, enable_overlay) | |
| status.append("Overlay OK") | |
| # === MUSIC (MusicGen) === | |
| audio_path = None | |
| if enable_music: | |
| progress(0.4, desc="Generating music with MusicGen...") | |
| try: | |
| music_prompt = MUSIC_STYLES.get(music_style, "peaceful ambient music") | |
| audio_path = generate_music_gpu(music_prompt) | |
| status.append("Music OK") | |
| except Exception as e: | |
| status.append(f"Music: {str(e)[:30]}") | |
| # === VIDEO (Wan 2.2 with FP8+AoTI - 4-6 steps!) === | |
| video_path = None | |
| if enable_video: | |
| progress(0.6, desc="Generating video with Wan 2.2 I2V (FP8 + Lightning LoRA)...") | |
| try: | |
| video_path, _ = generate_video_gpu( | |
| image, message, footer, overlay_style, font_size, enable_overlay, | |
| video_duration, video_steps | |
| ) | |
| status.append("Video OK") | |
| except Exception as e: | |
| status.append(f"Video: {str(e)[:50]}") | |
| progress(1.0, desc="Complete!") | |
| final_status = " | ".join(status) + f" | Seed: {seed_used}" | |
| return final_image, audio_path, video_path, final_status, seed_used | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # 🖥️ GRADIO UI | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| custom_theme = gr.themes.Soft( | |
| primary_hue="cyan", | |
| secondary_hue="purple", | |
| neutral_hue="slate", | |
| ).set( | |
| button_primary_background_fill="*primary_500", | |
| button_primary_background_fill_hover="*primary_600", | |
| ) | |
| with gr.Blocks( | |
| title="Crittiks Global | ZeroGPU Studio v3.0", | |
| theme=custom_theme | |
| ) as demo: | |
| gr.Markdown(""" | |
| # Crittiks Global - ZeroGPU Studio v3.0 | |
| ### Premium AI Image, Video & Music Generation | |
| **Models:** Stable Diffusion 3.5 Large | Wan 2.2 I2V 14B | MusicGen | |
| """) | |
| with gr.Row(): | |
| # === LEFT PANEL === | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Card Settings") | |
| recipient = gr.Textbox(label="Recipient", placeholder="Who is this for?", value="") | |
| occasion = gr.Dropdown(choices=OCCASIONS, label="Occasion", value="Christmas") | |
| visual_style = gr.Dropdown(choices=list(STYLES.keys()), label="Visual Style", value="Festive Magic") | |
| message = gr.Textbox(label="Message", placeholder="Your greeting message...", lines=2, value="") | |
| gr.Markdown("### Text & Overlay") | |
| enable_overlay = gr.Checkbox(label="Enable Glass Overlay", value=True) | |
| overlay_style = gr.Dropdown( | |
| choices=list(OVERLAY_STYLES.keys()), | |
| label="Overlay Style", | |
| value="Frosted Glass" | |
| ) | |
| font_size = gr.Slider(minimum=24, maximum=72, step=2, value=42, label="Text Size") | |
| gr.Markdown("### Media Options") | |
| enable_music = gr.Checkbox(label="Generate Music (MusicGen)", value=False) | |
| music_style = gr.Dropdown( | |
| choices=list(MUSIC_STYLES.keys()), | |
| label="Music Style", | |
| value="Peaceful Piano" | |
| ) | |
| gr.Markdown("### Video Settings (Wan 2.2 I2V)") | |
| enable_video = gr.Checkbox(label="Generate Video (4-6 step fast!)", value=False) | |
| video_duration = gr.Slider( | |
| minimum=1.0, maximum=5.0, step=0.5, value=3.0, | |
| label="Video Duration (seconds)" | |
| ) | |
| video_steps = gr.Slider( | |
| minimum=4, maximum=12, step=1, value=6, | |
| label="Video Steps (4-6 recommended)" | |
| ) | |
| generate_btn = gr.Button("GENERATE", variant="primary", size="lg") | |
| seed_output = gr.Number(label="Seed Used", interactive=False) | |
| # === RIGHT PANEL === | |
| with gr.Column(scale=2): | |
| gr.Markdown("### Live Preview") | |
| preview_image = gr.Image(label="Preview (updates live)", type="pil", height=200) | |
| gr.Markdown("### Generated Content") | |
| with gr.Tabs(): | |
| with gr.TabItem("Card"): | |
| output_image = gr.Image(label="Your Card", type="pil", height=450) | |
| with gr.TabItem("Video"): | |
| output_video = gr.Video(label="AI Video", height=450, autoplay=True) | |
| with gr.TabItem("Music"): | |
| output_audio = gr.Audio(label="Generated Music", type="filepath") | |
| output_status = gr.Textbox(label="Status") | |
| # === EVENT HANDLERS === | |
| preview_inputs = [message, overlay_style, font_size, enable_overlay, recipient] | |
| message.change(fn=update_preview, inputs=preview_inputs, outputs=preview_image) | |
| overlay_style.change(fn=update_preview, inputs=preview_inputs, outputs=preview_image) | |
| font_size.change(fn=update_preview, inputs=preview_inputs, outputs=preview_image) | |
| enable_overlay.change(fn=update_preview, inputs=preview_inputs, outputs=preview_image) | |
| recipient.change(fn=update_preview, inputs=preview_inputs, outputs=preview_image) | |
| generate_btn.click( | |
| fn=generate_all, | |
| inputs=[ | |
| recipient, occasion, visual_style, message, overlay_style, | |
| font_size, enable_overlay, enable_music, music_style, | |
| enable_video, video_duration, video_steps | |
| ], | |
| outputs=[output_image, output_audio, output_video, output_status, seed_output] | |
| ) | |
| demo.load(fn=update_preview, inputs=preview_inputs, outputs=preview_image) | |
| gr.Markdown(""" | |
| --- | |
| **Performance:** | |
| - Image: ~10-15 seconds (SD 3.5 Large, 28 steps) | |
| - Video: ~30-60 seconds (Wan 2.2 I2V) | |
| - First generation loads models, then faster | |
| **Model Credits:** | |
| - [stabilityai/stable-diffusion-3.5-large](https://huggingface.co/stabilityai/stable-diffusion-3.5-large) | |
| - [Wan-AI/Wan2.2-I2V-A14B](https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers) | |
| """) | |
| demo.queue() | |
| if __name__ == "__main__": | |
| # Require HuggingFace login for ZeroGPU quota | |
| demo.launch() | |