import gradio as gr import edge_tts import asyncio import tempfile import xml.sax.saxutils EBURON_VERSION = "2.0" # ----------------------------- # Custom CSS – ElevenLabs-style # ----------------------------- EBURON_CSS = f""" body {{ background: radial-gradient(circle at top left, #020617 0, #020617 45%, #020617 100%); color: #e5e7eb; margin: 0; padding: 0; }} * {{ font-family: system-ui, -apple-system, BlinkMacSystemFont, "SF Pro Text", "Segoe UI", sans-serif; -webkit-font-smoothing: antialiased; }} #eburon-root {{ max-width: 1100px; margin: 0 auto; padding: 20px 18px 32px 18px; }} #eburon-top-nav {{ display: flex; align-items: center; justify-content: space-between; margin-bottom: 18px; }} #eburon-nav-left {{ display: flex; align-items: center; gap: 14px; }} #eburon-logo-circle {{ width: 32px; height: 32px; border-radius: 999px; background: conic-gradient(from 210deg, #22c55e, #38bdf8, #6366f1, #22c55e); display: flex; align-items: center; justify-content: center; color: #020617; font-weight: 800; font-size: 17px; box-shadow: 0 0 22px rgba(59, 130, 246, 0.8); }} #eburon-product-title {{ display: flex; flex-direction: column; }} #eburon-product-title span:nth-child(1) {{ font-size: 18px; font-weight: 700; letter-spacing: 0.08em; text-transform: uppercase; color: #e5e7eb; }} #eburon-product-title span:nth-child(2) {{ font-size: 11px; color: #9ca3af; }} #eburon-nav-tabs {{ display: inline-flex; align-items: center; gap: 4px; padding: 3px; border-radius: 999px; background: rgba(15, 23, 42, 0.9); border: 1px solid rgba(55, 65, 81, 0.9); font-size: 11px; }} .eburon-tab {{ padding: 5px 10px; border-radius: 999px; cursor: default; color: #9ca3af; }} .eburon-tab-active {{ background: linear-gradient(135deg, #38bdf8, #6366f1); color: #020617; font-weight: 600; }} #eburon-nav-right {{ display: flex; align-items: center; gap: 8px; font-size: 11px; color: #9ca3af; }} #eburon-pill-version {{ padding: 4px 10px; border-radius: 999px; border: 1px solid rgba(148, 163, 184, 0.4); background: radial-gradient(circle at top, rgba(31, 41, 55, 1), rgba(15, 23, 42, 1)); }} #eburon-pill-usage {{ padding: 4px 10px; border-radius: 999px; border: 1px solid rgba(59, 130, 246, 0.7); background: radial-gradient(circle at top, rgba(30, 64, 175, 0.85), rgba(15, 23, 42, 1)); }} .eburon-main-card {{ border-radius: 20px; background: radial-gradient(circle at top left, #020617, #020617 60%); border: 1px solid rgba(51, 65, 85, 0.9); box-shadow: 0 24px 48px rgba(15, 23, 42, 0.95); padding: 16px 18px 18px 18px; }} .eburon-section-header {{ display: flex; justify-content: space-between; align-items: center; margin-bottom: 8px; }} .eburon-section-title {{ font-size: 14px; font-weight: 600; color: #e5e7eb; }} .eburon-section-subtitle {{ font-size: 11px; color: #9ca3af; }} textarea {{ background-color: #020617 !important; border-radius: 14px !important; border: 1px solid rgba(55, 65, 81, 0.9) !important; color: #e5e7eb !important; font-size: 13px !important; }} select, input[type="range"] {{ background-color: #020617 !important; border-radius: 999px !important; border: 1px solid rgba(55, 65, 81, 0.9) !important; color: #e5e7eb !important; }} label span, .gr-textbox label, .gr-slider label, .gr-dropdown label {{ font-size: 11px !important; color: #9ca3af !important; }} #eburon-generate-btn button {{ width: 100%; border-radius: 999px; font-weight: 600; letter-spacing: 0.02em; padding: 10px 16px; background: linear-gradient(135deg, #22c55e, #38bdf8); box-shadow: 0 12px 32px rgba(56, 189, 248, 0.75); border: none; }} #eburon-generate-btn button:hover {{ transform: translateY(-1px); box-shadow: 0 18px 42px rgba(56, 189, 248, 0.95); }} #eburon-audio-card {{ border-radius: 18px; background: radial-gradient(circle at top right, #020617, #020617 65%); border: 1px solid rgba(55, 65, 81, 0.9); box-shadow: 0 18px 40px rgba(15, 23, 42, 0.95); padding: 12px 14px 14px 14px; }} #eburon-audio-header {{ display: flex; justify-content: space-between; align-items: center; margin-bottom: 4px; }} #eburon-audio-title {{ font-size: 12px; font-weight: 600; color: #e5e7eb; }} #eburon-audio-subtitle {{ font-size: 11px; color: #9ca3af; }} .svelte-1g805jl {{ border-radius: 999px !important; }} .eburon-mini-pill {{ padding: 2px 7px; border-radius: 999px; border: 1px solid rgba(75, 85, 99, 0.9); font-size: 10px; color: #9ca3af; }} """ # ----------------------------- # Core TTS Logic with Emotions # ----------------------------- async def get_voices(): voices = await edge_tts.list_voices() # Prioritize voices known for good emotional range (e.g., US, UK) voice_labels = [ f"{v['ShortName']} - {v['Locale']} ({v['Gender']})" for v in voices ] voice_labels.sort() return voice_labels async def text_to_speech(text, voice, rate, pitch, style, style_degree): if not text.strip(): return None, "Please enter some text." if not voice: return None, "Please select a voice." voice_short_name = voice.split(" - ")[0].strip() # Format Rate and Pitch rate_str = f"{rate:+d}%" pitch_str = f"{pitch:+d}Hz" # Escape special characters for XML safe_text = xml.sax.saxutils.escape(text) # --------------------------------------------------------- # Construct SSML for Emotional Output # --------------------------------------------------------- # If "General" is selected, we don't use the express-as tag. # Otherwise, we wrap the content. if style != "General": ssml_content = ( f"" f"{safe_text}" f"" ) else: ssml_content = ( f"{safe_text}" ) # Full SSML wrapper ssml = ( f"" f"" f"{ssml_content}" f"" f"" ) # Note: When using SSML, we pass the SSML string as 'text' and don't use rate/pitch args in Communicate communicate = edge_tts.Communicate(text=ssml, voice=voice_short_name) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: tmp_path = tmp_file.name await communicate.save(tmp_path) return tmp_path, None async def tts_interface(text, voice, rate, pitch, style, style_degree): # Warning logic if user selects an emotion for a voice that likely doesn't support it # (Simplified check: Non-Neural voices or specific locales might ignore it) warning_msg = None if style != "General" and "Neural" not in voice: warning_msg = "Note: The selected voice might not support emotions. Neural voices work best." audio, error = await text_to_speech(text, voice, rate, pitch, style, style_degree) if error: return None, gr.Warning(error) if warning_msg: return audio, gr.Warning(warning_msg) return audio, None # ----------------------------- # Eburon Speech Studio v2.0 UI # ----------------------------- async def create_demo(): voices = await get_voices() # Common styles supported by Microsoft Azure/Edge Neural voices styles = [ "General", "Cheerful", "Sad", "Angry", "Terrified", "Whispering", "Excited", "Friendly", "Unfriendly", "Shouting", "Hopeful" ] with gr.Blocks(title="Eburon Speech Studio v2.0", css=EBURON_CSS) as demo: with gr.Column(elem_id="eburon-root"): # HEADER gr.HTML(f"""
E
EBURON EMOTION Neural Expression Engine · v{EBURON_VERSION}
Synthesis
Voice Lab
Pro
SSML Enabled
""") with gr.Row(): # LEFT: Script Input with gr.Column(scale=2, min_width=460): with gr.Group(elem_classes="eburon-main-card"): gr.HTML("""
Script
Type your text. Use standard punctuation for best pause handling.
Unlimited
""") text_input = gr.Textbox( label="", placeholder="Enter text here...", lines=14, value="I can't believe you did that! That is absolutely amazing." ) # RIGHT: Voice & Emotion Controls with gr.Column(scale=1, min_width=340): with gr.Group(elem_classes="eburon-main-card"): gr.HTML("""
Voice & Emotion
Select neural voice and emotional overlay.
""") voice_dropdown = gr.Dropdown( choices=[""] + voices, label="Voice Model", value="en-US-AriaNeural - en-US (Female)" if any("AriaNeural" in v for v in voices) else "", info="Select 'Neural' voices for best results." ) style_dropdown = gr.Dropdown( choices=styles, label="Expressive Style", value="General", info="Applies emotional tone to the voice." ) style_degree = gr.Slider( minimum=0.1, maximum=2.0, value=1.0, step=0.1, label="Emotion Intensity", info="< 1 is subtle, > 1 is exaggerated." ) gr.HTML("
") rate_slider = gr.Slider( minimum=-50, maximum=50, value=0, label="Speed", step=1 ) pitch_slider = gr.Slider( minimum=-20, maximum=20, value=0, label="Pitch", step=1 ) # BOTTOM: Generate & Player with gr.Row(): with gr.Column(scale=1, min_width=260): generate_btn = gr.Button("Generate Audio", variant="primary", elem_id="eburon-generate-btn") status_msg = gr.Markdown(visible=False) with gr.Column(scale=2, min_width=460): with gr.Group(elem_id="eburon-audio-card"): gr.HTML("""
Output
Generated result
MP3
""") audio_output = gr.Audio(label="", type="filepath", autoplay=True, interactive=False) generate_btn.click( fn=tts_interface, inputs=[text_input, voice_dropdown, rate_slider, pitch_slider, style_dropdown, style_degree], outputs=[audio_output, status_msg] ) return demo async def main(): demo = await create_demo() demo.queue(default_concurrency_limit=20) demo.launch() if __name__ == "__main__": asyncio.run(main())