Spaces:
Running
Running
Dmitry Atamanov
commited on
common : fix audio loading by miniaudio (#2862)
Browse files- examples/common-whisper.cpp +13 -18
- examples/common.cpp +0 -15
- examples/common.h +0 -3
examples/common-whisper.cpp
CHANGED
|
@@ -76,30 +76,25 @@ bool read_audio_data(const std::string & fname, std::vector<float>& pcmf32, std:
|
|
| 76 |
|
| 77 |
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, audio_data.size());
|
| 78 |
}
|
| 79 |
-
else if (
|
| 80 |
-
if ((result = ma_decoder_init_memory(audio_data.data(), audio_data.size(), &decoder_config, &decoder)) != MA_SUCCESS) {
|
| 81 |
-
fprintf(stderr, "Error: failed to open audio data from fname buffer (%s)\n", ma_result_description(result));
|
| 82 |
-
|
| 83 |
-
return false;
|
| 84 |
-
}
|
| 85 |
-
}
|
| 86 |
-
else if ((result = ma_decoder_init_file(fname.c_str(), &decoder_config, &decoder)) != MA_SUCCESS) {
|
| 87 |
#if defined(WHISPER_FFMPEG)
|
| 88 |
-
|
| 89 |
-
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
|
| 97 |
-
|
| 98 |
-
|
| 99 |
#else
|
| 100 |
-
|
|
|
|
| 101 |
|
| 102 |
-
|
|
|
|
| 103 |
#endif
|
| 104 |
}
|
| 105 |
|
|
|
|
| 76 |
|
| 77 |
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, audio_data.size());
|
| 78 |
}
|
| 79 |
+
else if (((result = ma_decoder_init_file(fname.c_str(), &decoder_config, &decoder)) != MA_SUCCESS)) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
#if defined(WHISPER_FFMPEG)
|
| 81 |
+
if (ffmpeg_decode_audio(fname, audio_data) != 0) {
|
| 82 |
+
fprintf(stderr, "error: failed to ffmpeg decode '%s'\n", fname.c_str());
|
| 83 |
|
| 84 |
+
return false;
|
| 85 |
+
}
|
| 86 |
|
| 87 |
+
if ((result = ma_decoder_init_memory(audio_data.data(), audio_data.size(), &decoder_config, &decoder)) != MA_SUCCESS) {
|
| 88 |
+
fprintf(stderr, "error: failed to read audio data as wav (%s)\n", ma_result_description(result));
|
| 89 |
|
| 90 |
+
return false;
|
| 91 |
+
}
|
| 92 |
#else
|
| 93 |
+
if ((result = ma_decoder_init_memory(fname.c_str(), fname.size(), &decoder_config, &decoder)) != MA_SUCCESS) {
|
| 94 |
+
fprintf(stderr, "error: failed to read audio data as wav (%s)\n", ma_result_description(result));
|
| 95 |
|
| 96 |
+
return false;
|
| 97 |
+
}
|
| 98 |
#endif
|
| 99 |
}
|
| 100 |
|
examples/common.cpp
CHANGED
|
@@ -609,21 +609,6 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat(
|
|
| 609 |
|
| 610 |
}
|
| 611 |
|
| 612 |
-
bool is_wav_buffer(const std::string buf) {
|
| 613 |
-
// RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format
|
| 614 |
-
// WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
|
| 615 |
-
if (buf.size() < 12 || buf.substr(0, 4) != "RIFF" || buf.substr(8, 4) != "WAVE") {
|
| 616 |
-
return false;
|
| 617 |
-
}
|
| 618 |
-
|
| 619 |
-
uint32_t chunk_size = *reinterpret_cast<const uint32_t*>(buf.data() + 4);
|
| 620 |
-
if (chunk_size + 8 != buf.size()) {
|
| 621 |
-
return false;
|
| 622 |
-
}
|
| 623 |
-
|
| 624 |
-
return true;
|
| 625 |
-
}
|
| 626 |
-
|
| 627 |
void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
|
| 628 |
const float rc = 1.0f / (2.0f * M_PI * cutoff);
|
| 629 |
const float dt = 1.0f / sample_rate;
|
|
|
|
| 609 |
|
| 610 |
}
|
| 611 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 612 |
void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
|
| 613 |
const float rc = 1.0f / (2.0f * M_PI * cutoff);
|
| 614 |
const float dt = 1.0f / sample_rate;
|
examples/common.h
CHANGED
|
@@ -134,9 +134,6 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat(
|
|
| 134 |
// Audio utils
|
| 135 |
//
|
| 136 |
|
| 137 |
-
// Check if a buffer is a WAV audio file
|
| 138 |
-
bool is_wav_buffer(const std::string buf);
|
| 139 |
-
|
| 140 |
// Write PCM data into WAV audio file
|
| 141 |
class wav_writer {
|
| 142 |
private:
|
|
|
|
| 134 |
// Audio utils
|
| 135 |
//
|
| 136 |
|
|
|
|
|
|
|
|
|
|
| 137 |
// Write PCM data into WAV audio file
|
| 138 |
class wav_writer {
|
| 139 |
private:
|