Spaces:
Runtime error
Runtime error
fix
Browse files- app.py +2 -2
- mdx_models/{model_data_v2.json → model_data.json} +0 -0
- uvr_processing.py +23 -22
app.py
CHANGED
|
@@ -89,8 +89,8 @@ if __name__ == "__main__":
|
|
| 89 |
# predict(audio_file, api_name="/predict") -> output
|
| 90 |
app_api = gr.Interface(
|
| 91 |
fn=inference,
|
| 92 |
-
inputs=gr.Audio(type="filepath"),
|
| 93 |
-
outputs=gr.File(file_count="multiple"),
|
| 94 |
)
|
| 95 |
|
| 96 |
app = gr.TabbedInterface(
|
|
|
|
| 89 |
# predict(audio_file, api_name="/predict") -> output
|
| 90 |
app_api = gr.Interface(
|
| 91 |
fn=inference,
|
| 92 |
+
inputs=gr.Audio(type="filepath"),
|
| 93 |
+
outputs=gr.File(file_count="multiple"),
|
| 94 |
)
|
| 95 |
|
| 96 |
app = gr.TabbedInterface(
|
mdx_models/{model_data_v2.json → model_data.json}
RENAMED
|
File without changes
|
uvr_processing.py
CHANGED
|
@@ -31,7 +31,7 @@ def run_mdx(model_params: Dict,
|
|
| 31 |
device_base: str = "cuda",
|
| 32 |
) -> Tuple[str, str]:
|
| 33 |
"""
|
| 34 |
-
|
| 35 |
"""
|
| 36 |
if device_base == "cuda":
|
| 37 |
device = torch.device("cuda:0")
|
|
@@ -146,7 +146,7 @@ def extract_bgm(mdx_model_params: Dict,
|
|
| 146 |
output_dir: Path,
|
| 147 |
device_base: str = "cuda") -> Path:
|
| 148 |
"""
|
| 149 |
-
|
| 150 |
"""
|
| 151 |
background_path, _ = run_mdx(model_params=mdx_model_params,
|
| 152 |
input_filename=input_filename,
|
|
@@ -166,9 +166,9 @@ def extract_vocal(mdx_model_params: Dict,
|
|
| 166 |
dereverb_flag: bool = False,
|
| 167 |
device_base: str = "cuda") -> Path:
|
| 168 |
"""
|
| 169 |
-
|
| 170 |
"""
|
| 171 |
-
#
|
| 172 |
vocals_path, _ = run_mdx(mdx_model_params,
|
| 173 |
input_filename,
|
| 174 |
output_dir,
|
|
@@ -176,7 +176,7 @@ def extract_vocal(mdx_model_params: Dict,
|
|
| 176 |
denoise=True,
|
| 177 |
device_base=device_base,
|
| 178 |
)
|
| 179 |
-
#
|
| 180 |
if main_vocals_flag:
|
| 181 |
time.sleep(2)
|
| 182 |
backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params,
|
|
@@ -187,44 +187,45 @@ def extract_vocal(mdx_model_params: Dict,
|
|
| 187 |
device_base=device_base,
|
| 188 |
)
|
| 189 |
vocals_path = main_vocals_path
|
| 190 |
-
#
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
|
|
|
| 201 |
return vocals_path
|
| 202 |
|
| 203 |
def process_uvr_task(mdxnet_models_dir: Path,
|
| 204 |
input_file_path: Path,
|
| 205 |
output_dir: Path,
|
| 206 |
-
main_vocals_flag: bool = False, #
|
| 207 |
-
dereverb_flag: bool = False, #
|
| 208 |
) -> Tuple[Path, Path]:
|
| 209 |
|
| 210 |
device_base = "cuda" if torch.cuda.is_available() else "cpu"
|
| 211 |
|
| 212 |
# load mdx model definition
|
| 213 |
-
with open(mdxnet_models_dir/"
|
| 214 |
mdx_model_params = json.load(infile) # type: Dict
|
| 215 |
|
| 216 |
output_dir.mkdir(parents=True, exist_ok=True)
|
| 217 |
input_file_path = convert_to_stereo_and_wav(input_file_path) # type: Path
|
| 218 |
|
| 219 |
-
# 1.
|
| 220 |
background_path = extract_bgm(mdx_model_params,
|
| 221 |
input_file_path,
|
| 222 |
mdxnet_models_dir,
|
| 223 |
output_dir,
|
| 224 |
device_base=device_base)
|
| 225 |
|
| 226 |
-
# 2.
|
| 227 |
-
#
|
| 228 |
vocals_path = extract_vocal(mdx_model_params,
|
| 229 |
input_file_path,
|
| 230 |
mdxnet_models_dir,
|
|
|
|
| 31 |
device_base: str = "cuda",
|
| 32 |
) -> Tuple[str, str]:
|
| 33 |
"""
|
| 34 |
+
Separate vocals using MDX model
|
| 35 |
"""
|
| 36 |
if device_base == "cuda":
|
| 37 |
device = torch.device("cuda:0")
|
|
|
|
| 146 |
output_dir: Path,
|
| 147 |
device_base: str = "cuda") -> Path:
|
| 148 |
"""
|
| 149 |
+
Extract pure background music, remove vocals
|
| 150 |
"""
|
| 151 |
background_path, _ = run_mdx(model_params=mdx_model_params,
|
| 152 |
input_filename=input_filename,
|
|
|
|
| 166 |
dereverb_flag: bool = False,
|
| 167 |
device_base: str = "cuda") -> Path:
|
| 168 |
"""
|
| 169 |
+
Extract vocals
|
| 170 |
"""
|
| 171 |
+
# First use UVR-MDX-NET-Voc_FT.onnx basic vocal separation model
|
| 172 |
vocals_path, _ = run_mdx(mdx_model_params,
|
| 173 |
input_filename,
|
| 174 |
output_dir,
|
|
|
|
| 176 |
denoise=True,
|
| 177 |
device_base=device_base,
|
| 178 |
)
|
| 179 |
+
# If "main_vocals_flag" is enabled, use UVR_MDXNET_KARA_2.onnx to further separate main vocals (Main) from backup vocals/background vocals (Backup)
|
| 180 |
if main_vocals_flag:
|
| 181 |
time.sleep(2)
|
| 182 |
backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params,
|
|
|
|
| 187 |
device_base=device_base,
|
| 188 |
)
|
| 189 |
vocals_path = main_vocals_path
|
| 190 |
+
# If "dereverb_flag" is enabled, use Reverb_HQ_By_FoxJoy.onnx for dereverberation
|
| 191 |
+
# deactived since Model license unknown
|
| 192 |
+
# if dereverb_flag:
|
| 193 |
+
# time.sleep(2)
|
| 194 |
+
# _, vocals_dereverb_path = run_mdx(mdx_model_params,
|
| 195 |
+
# output_dir,
|
| 196 |
+
# mdxnet_models_dir/"Reverb_HQ_By_FoxJoy.onnx",
|
| 197 |
+
# vocals_path,
|
| 198 |
+
# denoise=True,
|
| 199 |
+
# device_base=device_base,
|
| 200 |
+
# )
|
| 201 |
+
# vocals_path = vocals_dereverb_path
|
| 202 |
return vocals_path
|
| 203 |
|
| 204 |
def process_uvr_task(mdxnet_models_dir: Path,
|
| 205 |
input_file_path: Path,
|
| 206 |
output_dir: Path,
|
| 207 |
+
main_vocals_flag: bool = False, # If "Main" is enabled, use UVR_MDXNET_KARA_2.onnx to further separate main and backup vocals
|
| 208 |
+
dereverb_flag: bool = False, # If "DeReverb" is enabled, use Reverb_HQ_By_FoxJoy.onnx for dereverberation
|
| 209 |
) -> Tuple[Path, Path]:
|
| 210 |
|
| 211 |
device_base = "cuda" if torch.cuda.is_available() else "cpu"
|
| 212 |
|
| 213 |
# load mdx model definition
|
| 214 |
+
with open(mdxnet_models_dir/"model_data.json") as infile:
|
| 215 |
mdx_model_params = json.load(infile) # type: Dict
|
| 216 |
|
| 217 |
output_dir.mkdir(parents=True, exist_ok=True)
|
| 218 |
input_file_path = convert_to_stereo_and_wav(input_file_path) # type: Path
|
| 219 |
|
| 220 |
+
# 1. Extract pure background music, remove vocals
|
| 221 |
background_path = extract_bgm(mdx_model_params,
|
| 222 |
input_file_path,
|
| 223 |
mdxnet_models_dir,
|
| 224 |
output_dir,
|
| 225 |
device_base=device_base)
|
| 226 |
|
| 227 |
+
# 2. Separate vocals
|
| 228 |
+
# First use UVR-MDX-NET-Voc_FT.onnx basic vocal separation model
|
| 229 |
vocals_path = extract_vocal(mdx_model_params,
|
| 230 |
input_file_path,
|
| 231 |
mdxnet_models_dir,
|