uri-pareras commited on
Commit
31c417c
·
verified ·
1 Parent(s): b0fd395

[FEAT] Added detected language textbox

Browse files
Files changed (1) hide show
  1. app.py +19 -6
app.py CHANGED
@@ -32,10 +32,10 @@ def process_audio(audio, source_lang_name, target_lang_name):
32
  Processes the audio input to perform speech-to-text translation or transcription.
33
  """
34
  if audio is None:
35
- return "Please provide an audio file or record one."
36
 
37
  if target_lang_name is None:
38
- return "Please select a target language."
39
 
40
  source_lang = LANG_TO_NAME.get(source_lang_name)
41
  target_lang = LANG_TO_NAME.get(target_lang_name)
@@ -46,7 +46,9 @@ def process_audio(audio, source_lang_name, target_lang_name):
46
 
47
  if source_lang != "autodetect":
48
  pipe_kwargs["src_lang"] = source_lang
49
-
 
 
50
  # If source and target languages are the same, perform transcription
51
  if source_lang == target_lang:
52
  pipe_kwargs["mode"] = "asr"
@@ -55,7 +57,17 @@ def process_audio(audio, source_lang_name, target_lang_name):
55
  pipe_kwargs["tgt_lang"] = target_lang_name
56
 
57
  result = pipe(audio, **pipe_kwargs)
58
- return result[0]["text"] if isinstance(result, list) else result
 
 
 
 
 
 
 
 
 
 
59
 
60
 
61
  # Create the Gradio interface
@@ -82,11 +94,12 @@ with gr.Blocks() as demo:
82
 
83
  with gr.Column():
84
  output_text = gr.Textbox(label="Output", lines=10, interactive=False)
 
85
 
86
  submit_button.click(
87
  fn=process_audio,
88
  inputs=[audio_input, source_lang_dropdown, target_lang_dropdown],
89
- outputs=output_text,
90
  )
91
 
92
  gr.Markdown("## Examples")
@@ -104,7 +117,7 @@ with gr.Blocks() as demo:
104
  ],
105
  ],
106
  inputs=[audio_input, source_lang_dropdown, target_lang_dropdown],
107
- outputs=output_text,
108
  fn=process_audio,
109
  )
110
 
 
32
  Processes the audio input to perform speech-to-text translation or transcription.
33
  """
34
  if audio is None:
35
+ return "Please provide an audio file or record one.", ""
36
 
37
  if target_lang_name is None:
38
+ return "Please select a target language.", ""
39
 
40
  source_lang = LANG_TO_NAME.get(source_lang_name)
41
  target_lang = LANG_TO_NAME.get(target_lang_name)
 
46
 
47
  if source_lang != "autodetect":
48
  pipe_kwargs["src_lang"] = source_lang
49
+
50
+ pipe_kwargs["return_timestamps"] = True # To get the language
51
+
52
  # If source and target languages are the same, perform transcription
53
  if source_lang == target_lang:
54
  pipe_kwargs["mode"] = "asr"
 
57
  pipe_kwargs["tgt_lang"] = target_lang_name
58
 
59
  result = pipe(audio, **pipe_kwargs)
60
+
61
+ text = result["text"] if isinstance(result, dict) else result
62
+
63
+ detected_language = ""
64
+ if source_lang == "autodetect":
65
+ try:
66
+ detected_language = result["chunks"][0]["language"]
67
+ except (KeyError, IndexError):
68
+ detected_language = "N/A"
69
+
70
+ return text, detected_language
71
 
72
 
73
  # Create the Gradio interface
 
94
 
95
  with gr.Column():
96
  output_text = gr.Textbox(label="Output", lines=10, interactive=False)
97
+ detected_lang_output = gr.Textbox(label="Detected Source Language", interactive=False)
98
 
99
  submit_button.click(
100
  fn=process_audio,
101
  inputs=[audio_input, source_lang_dropdown, target_lang_dropdown],
102
+ outputs=[output_text, detected_lang_output],
103
  )
104
 
105
  gr.Markdown("## Examples")
 
117
  ],
118
  ],
119
  inputs=[audio_input, source_lang_dropdown, target_lang_dropdown],
120
+ outputs=[output_text, detected_lang_output],
121
  fn=process_audio,
122
  )
123