Spaces:
Paused
Paused
Helw150
commited on
Commit
·
e2607b6
1
Parent(s):
67da1a1
Revert "Add Buffering to Avoid Speech Gaps due to Orca Slowdown"
Browse filesThis reverts commit 67da1a10bcbad20268886cfbe19245c983e72846.
- app.py +5 -9
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -69,7 +69,7 @@ def response(state: AppState, audio: tuple):
|
|
| 69 |
if not audio:
|
| 70 |
return AppState()
|
| 71 |
|
| 72 |
-
file_name = f"
|
| 73 |
|
| 74 |
sf.write(file_name, audio[1], audio[0], format="wav")
|
| 75 |
|
|
@@ -103,8 +103,7 @@ def response(state: AppState, audio: tuple):
|
|
| 103 |
state.model_outs = None
|
| 104 |
prev_outs = causal_outs
|
| 105 |
stream = orca.stream_open()
|
| 106 |
-
|
| 107 |
-
buff = []
|
| 108 |
for resp, outs in diva_audio(
|
| 109 |
(audio[0], audio[1]),
|
| 110 |
prev_outs=(prev_outs if prev_outs is not None else None),
|
|
@@ -113,18 +112,15 @@ def response(state: AppState, audio: tuple):
|
|
| 113 |
if prev_resp == LOADER_STR:
|
| 114 |
prev_resp = ""
|
| 115 |
state.conversation[-1]["content"] = resp
|
| 116 |
-
audio_chunk = None
|
| 117 |
pcm = stream.synthesize(resp[len(prev_resp) :])
|
|
|
|
| 118 |
if pcm is not None:
|
| 119 |
-
buff.extend(pcm)
|
| 120 |
-
if len(buff) > (orca.sample_rate*2):
|
| 121 |
mp3_io = io.BytesIO()
|
| 122 |
sf.write(
|
| 123 |
-
mp3_io, np.asarray(
|
| 124 |
)
|
| 125 |
audio_chunk = mp3_io.getvalue()
|
| 126 |
mp3_io.close()
|
| 127 |
-
buff = buff[orca.sample_rate:]
|
| 128 |
yield state, state.conversation, audio_chunk
|
| 129 |
|
| 130 |
del outs.logits
|
|
@@ -260,4 +256,4 @@ with gr.Blocks(theme=theme, js=js) as demo:
|
|
| 260 |
)
|
| 261 |
|
| 262 |
if __name__ == "__main__":
|
| 263 |
-
demo.launch(
|
|
|
|
| 69 |
if not audio:
|
| 70 |
return AppState()
|
| 71 |
|
| 72 |
+
file_name = f"/tmp/{xxhash.xxh32(bytes(audio[1])).hexdigest()}.wav"
|
| 73 |
|
| 74 |
sf.write(file_name, audio[1], audio[0], format="wav")
|
| 75 |
|
|
|
|
| 103 |
state.model_outs = None
|
| 104 |
prev_outs = causal_outs
|
| 105 |
stream = orca.stream_open()
|
| 106 |
+
|
|
|
|
| 107 |
for resp, outs in diva_audio(
|
| 108 |
(audio[0], audio[1]),
|
| 109 |
prev_outs=(prev_outs if prev_outs is not None else None),
|
|
|
|
| 112 |
if prev_resp == LOADER_STR:
|
| 113 |
prev_resp = ""
|
| 114 |
state.conversation[-1]["content"] = resp
|
|
|
|
| 115 |
pcm = stream.synthesize(resp[len(prev_resp) :])
|
| 116 |
+
audio_chunk = None
|
| 117 |
if pcm is not None:
|
|
|
|
|
|
|
| 118 |
mp3_io = io.BytesIO()
|
| 119 |
sf.write(
|
| 120 |
+
mp3_io, np.asarray(pcm).astype(np.int16), orca.sample_rate, format="mp3"
|
| 121 |
)
|
| 122 |
audio_chunk = mp3_io.getvalue()
|
| 123 |
mp3_io.close()
|
|
|
|
| 124 |
yield state, state.conversation, audio_chunk
|
| 125 |
|
| 126 |
del outs.logits
|
|
|
|
| 256 |
)
|
| 257 |
|
| 258 |
if __name__ == "__main__":
|
| 259 |
+
demo.launch()
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
transformers==4.43.3
|
| 2 |
-
gradio==5.1
|
| 3 |
spaces
|
| 4 |
accelerate
|
| 5 |
|
|
|
|
| 1 |
transformers==4.43.3
|
| 2 |
+
gradio==5.0.1
|
| 3 |
spaces
|
| 4 |
accelerate
|
| 5 |
|