NeMo-Forced-Aligner

Running

App Files Files Community

ayymen commited on 10 days ago

Commit

913bde6

verified ·

1 Parent(s): 882a590

Remove color picking

Browse files

Files changed (1) hide show

app.py +3 -17

app.py CHANGED Viewed

@@ -73,11 +73,6 @@ def get_S_prime_and_T(text, model_name, model, audio_duration):
 	return S_prime, T
-def parse_rgb(rgb_string):
-	rgb_string = rgb_string[4:-1]
-	rgb_list = rgb_string.split(',')
-	return [int(elem) for elem in rgb_list]
 def delete_mp4s_except_given_filepath(filepath):
 	files_in_dir = os.listdir()
 	mp4_files_in_dir = [x for x in files_in_dir if x.endswith(".mp4")]
@@ -86,7 +81,7 @@ def delete_mp4s_except_given_filepath(filepath):
 			os.remove(mp4_file)
-def align(Microphone, File_Upload, text, col1, col2, col3, split_on_newline, progress=gr.Progress()):
 	# Create utt_id,  specify output_video_filepath and delete any MP4s
 	# that are not that filepath. These stray MP4s can be created
 	# if a user refreshes or exits the page while this 'align' function is executing.
@@ -187,9 +182,6 @@ def align(Microphone, File_Upload, text, col1, col2, col3, split_on_newline, pro
 				fontsize=45,
 				resegment_text_to_fill_space=resegment_text_to_fill_space,
 				max_lines_per_segment=4,
-				text_already_spoken_rgb=parse_rgb(col1),
-				text_being_spoken_rgb=parse_rgb(col2),
-				text_not_yet_spoken_rgb=parse_rgb(col3),
 			),
 		)
@@ -255,7 +247,7 @@ with gr.Blocks(title="NeMo Forced Aligner", theme="huggingface") as demo:
 			gr.Markdown("# NeMo Forced Aligner")
 			gr.Markdown(
 				"Demo for [NeMo Forced Aligner](https://github.com/NVIDIA/NeMo/tree/main/tools/nemo_forced_aligner) (NFA). "
-				"Upload audio and (optionally) the text spoken in the audio to generate a video where each part of the text will be highlighted as it is spoken. ",
 			)
 			gr.Markdown("You can also download CTM and ASS files to add subtitles to your videos. ")
@@ -274,12 +266,6 @@ with gr.Blocks(title="NeMo Forced Aligner", theme="huggingface") as demo:
                 label="Separate text on new lines",
             )
-			gr.Markdown("[Optional] For fun - adjust the colors of the text in the output video")
-			with gr.Row():
-				col1 = gr.ColorPicker(label="text already spoken", value="#fcba03")
-				col2 = gr.ColorPicker(label="text being spoken", value="#bf45bf")
-				col3 = gr.ColorPicker(label="text to be spoken", value="#3e1af0")
 			submit_button = gr.Button("Submit")
 		with gr.Column(scale=1):
@@ -302,7 +288,7 @@ with gr.Blocks(title="NeMo Forced Aligner", theme="huggingface") as demo:
 	submit_button.click(
 		fn=align,
-		inputs=[mic_in, audio_file_in, ref_text, col1, col2, col3, split_on_newline],
 		outputs=[video_out, text_out, non_tmp_output_video_filepath, ass_file, word_ctm_file, segment_ctm_file],
 	).then(
 		fn=delete_non_tmp_video, inputs=[non_tmp_output_video_filepath], outputs=None,

 	return S_prime, T
 def delete_mp4s_except_given_filepath(filepath):
 	files_in_dir = os.listdir()
 	mp4_files_in_dir = [x for x in files_in_dir if x.endswith(".mp4")]
 			os.remove(mp4_file)
+def align(Microphone, File_Upload, text, split_on_newline, progress=gr.Progress()):
 	# Create utt_id,  specify output_video_filepath and delete any MP4s
 	# that are not that filepath. These stray MP4s can be created
 	# if a user refreshes or exits the page while this 'align' function is executing.
 				fontsize=45,
 				resegment_text_to_fill_space=resegment_text_to_fill_space,
 				max_lines_per_segment=4,
 			),
 		)
 			gr.Markdown("# NeMo Forced Aligner")
 			gr.Markdown(
 				"Demo for [NeMo Forced Aligner](https://github.com/NVIDIA/NeMo/tree/main/tools/nemo_forced_aligner) (NFA). "
+				"Upload audio and (optionally) the text spoken in the audio in Tamazight to generate a video where each part of the text will be highlighted as it is spoken. ",
 			)
 			gr.Markdown("You can also download CTM and ASS files to add subtitles to your videos. ")
                 label="Separate text on new lines",
             )
 			submit_button = gr.Button("Submit")
 		with gr.Column(scale=1):
 	submit_button.click(
 		fn=align,
+		inputs=[mic_in, audio_file_in, ref_text, split_on_newline],
 		outputs=[video_out, text_out, non_tmp_output_video_filepath, ass_file, word_ctm_file, segment_ctm_file],
 	).then(
 		fn=delete_non_tmp_video, inputs=[non_tmp_output_video_filepath], outputs=None,