Remove color picking
Browse files
app.py
CHANGED
|
@@ -73,11 +73,6 @@ def get_S_prime_and_T(text, model_name, model, audio_duration):
|
|
| 73 |
return S_prime, T
|
| 74 |
|
| 75 |
|
| 76 |
-
def parse_rgb(rgb_string):
|
| 77 |
-
rgb_string = rgb_string[4:-1]
|
| 78 |
-
rgb_list = rgb_string.split(',')
|
| 79 |
-
return [int(elem) for elem in rgb_list]
|
| 80 |
-
|
| 81 |
def delete_mp4s_except_given_filepath(filepath):
|
| 82 |
files_in_dir = os.listdir()
|
| 83 |
mp4_files_in_dir = [x for x in files_in_dir if x.endswith(".mp4")]
|
|
@@ -86,7 +81,7 @@ def delete_mp4s_except_given_filepath(filepath):
|
|
| 86 |
os.remove(mp4_file)
|
| 87 |
|
| 88 |
|
| 89 |
-
def align(Microphone, File_Upload, text,
|
| 90 |
# Create utt_id, specify output_video_filepath and delete any MP4s
|
| 91 |
# that are not that filepath. These stray MP4s can be created
|
| 92 |
# if a user refreshes or exits the page while this 'align' function is executing.
|
|
@@ -187,9 +182,6 @@ def align(Microphone, File_Upload, text, col1, col2, col3, split_on_newline, pro
|
|
| 187 |
fontsize=45,
|
| 188 |
resegment_text_to_fill_space=resegment_text_to_fill_space,
|
| 189 |
max_lines_per_segment=4,
|
| 190 |
-
text_already_spoken_rgb=parse_rgb(col1),
|
| 191 |
-
text_being_spoken_rgb=parse_rgb(col2),
|
| 192 |
-
text_not_yet_spoken_rgb=parse_rgb(col3),
|
| 193 |
),
|
| 194 |
)
|
| 195 |
|
|
@@ -255,7 +247,7 @@ with gr.Blocks(title="NeMo Forced Aligner", theme="huggingface") as demo:
|
|
| 255 |
gr.Markdown("# NeMo Forced Aligner")
|
| 256 |
gr.Markdown(
|
| 257 |
"Demo for [NeMo Forced Aligner](https://github.com/NVIDIA/NeMo/tree/main/tools/nemo_forced_aligner) (NFA). "
|
| 258 |
-
"Upload audio and (optionally) the text spoken in the audio to generate a video where each part of the text will be highlighted as it is spoken. ",
|
| 259 |
)
|
| 260 |
gr.Markdown("You can also download CTM and ASS files to add subtitles to your videos. ")
|
| 261 |
|
|
@@ -274,12 +266,6 @@ with gr.Blocks(title="NeMo Forced Aligner", theme="huggingface") as demo:
|
|
| 274 |
label="Separate text on new lines",
|
| 275 |
)
|
| 276 |
|
| 277 |
-
gr.Markdown("[Optional] For fun - adjust the colors of the text in the output video")
|
| 278 |
-
with gr.Row():
|
| 279 |
-
col1 = gr.ColorPicker(label="text already spoken", value="#fcba03")
|
| 280 |
-
col2 = gr.ColorPicker(label="text being spoken", value="#bf45bf")
|
| 281 |
-
col3 = gr.ColorPicker(label="text to be spoken", value="#3e1af0")
|
| 282 |
-
|
| 283 |
submit_button = gr.Button("Submit")
|
| 284 |
|
| 285 |
with gr.Column(scale=1):
|
|
@@ -302,7 +288,7 @@ with gr.Blocks(title="NeMo Forced Aligner", theme="huggingface") as demo:
|
|
| 302 |
|
| 303 |
submit_button.click(
|
| 304 |
fn=align,
|
| 305 |
-
inputs=[mic_in, audio_file_in, ref_text,
|
| 306 |
outputs=[video_out, text_out, non_tmp_output_video_filepath, ass_file, word_ctm_file, segment_ctm_file],
|
| 307 |
).then(
|
| 308 |
fn=delete_non_tmp_video, inputs=[non_tmp_output_video_filepath], outputs=None,
|
|
|
|
| 73 |
return S_prime, T
|
| 74 |
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
def delete_mp4s_except_given_filepath(filepath):
|
| 77 |
files_in_dir = os.listdir()
|
| 78 |
mp4_files_in_dir = [x for x in files_in_dir if x.endswith(".mp4")]
|
|
|
|
| 81 |
os.remove(mp4_file)
|
| 82 |
|
| 83 |
|
| 84 |
+
def align(Microphone, File_Upload, text, split_on_newline, progress=gr.Progress()):
|
| 85 |
# Create utt_id, specify output_video_filepath and delete any MP4s
|
| 86 |
# that are not that filepath. These stray MP4s can be created
|
| 87 |
# if a user refreshes or exits the page while this 'align' function is executing.
|
|
|
|
| 182 |
fontsize=45,
|
| 183 |
resegment_text_to_fill_space=resegment_text_to_fill_space,
|
| 184 |
max_lines_per_segment=4,
|
|
|
|
|
|
|
|
|
|
| 185 |
),
|
| 186 |
)
|
| 187 |
|
|
|
|
| 247 |
gr.Markdown("# NeMo Forced Aligner")
|
| 248 |
gr.Markdown(
|
| 249 |
"Demo for [NeMo Forced Aligner](https://github.com/NVIDIA/NeMo/tree/main/tools/nemo_forced_aligner) (NFA). "
|
| 250 |
+
"Upload audio and (optionally) the text spoken in the audio in Tamazight to generate a video where each part of the text will be highlighted as it is spoken. ",
|
| 251 |
)
|
| 252 |
gr.Markdown("You can also download CTM and ASS files to add subtitles to your videos. ")
|
| 253 |
|
|
|
|
| 266 |
label="Separate text on new lines",
|
| 267 |
)
|
| 268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
submit_button = gr.Button("Submit")
|
| 270 |
|
| 271 |
with gr.Column(scale=1):
|
|
|
|
| 288 |
|
| 289 |
submit_button.click(
|
| 290 |
fn=align,
|
| 291 |
+
inputs=[mic_in, audio_file_in, ref_text, split_on_newline],
|
| 292 |
outputs=[video_out, text_out, non_tmp_output_video_filepath, ass_file, word_ctm_file, segment_ctm_file],
|
| 293 |
).then(
|
| 294 |
fn=delete_non_tmp_video, inputs=[non_tmp_output_video_filepath], outputs=None,
|