Spaces:
Sleeping
Sleeping
AshDavid12
commited on
Commit
·
a9516a4
1
Parent(s):
e9d738a
no buffer
Browse files- client.py +23 -31
- infer.py +9 -11
- poetry.lock +21 -1
- pyproject.toml +1 -0
client.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import asyncio
|
| 2 |
import io
|
|
|
|
| 3 |
|
| 4 |
import numpy as np
|
| 5 |
import websockets
|
|
@@ -8,6 +9,8 @@ import ssl
|
|
| 8 |
import wave
|
| 9 |
import logging
|
| 10 |
import sys
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# Parameters for reading and sending the audio
|
| 13 |
#AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav" # Use WAV file
|
|
@@ -35,41 +38,26 @@ async def send_receive():
|
|
| 35 |
await asyncio.gather(send_task, receive_task)
|
| 36 |
except Exception as e:
|
| 37 |
logger.error(f"WebSocket connection error: {e}")
|
|
|
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
logger.info(f"Opening WAV file: {wav_file}")
|
| 42 |
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
response = requests.get(wav_file)
|
| 46 |
-
response.raise_for_status()
|
| 47 |
-
wav_bytes = io.BytesIO(response.content)
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
# Send audio data in chunks directly from the WAV file
|
| 51 |
-
chunk_size = 1024 # Sending data in chunks of 3200 bytes, which can be adjusted
|
| 52 |
-
|
| 53 |
-
total_chunks = 0
|
| 54 |
-
total_bytes_sent = 0
|
| 55 |
-
|
| 56 |
-
# While loop to send audio data chunk by chunk
|
| 57 |
-
while True:
|
| 58 |
-
chunk = wav_bytes.read(chunk_size)
|
| 59 |
-
if not chunk:
|
| 60 |
-
break
|
| 61 |
-
await websocket.send(chunk)
|
| 62 |
-
total_chunks += 1
|
| 63 |
-
total_bytes_sent += len(chunk)
|
| 64 |
-
#logger.debug(f"Sent chunk {total_chunks}: {len(chunk)} bytes")
|
| 65 |
-
#await asyncio.sleep(0.1) # Simulate real-time streamin
|
| 66 |
-
#logger.info(f"Finished sending audio data: {total_chunks} chunks sent, total bytes sent: {total_bytes_sent}")
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
-
finally:
|
| 72 |
-
logger.info("WAV file closed")
|
| 73 |
|
| 74 |
async def receive_transcriptions(websocket):
|
| 75 |
try:
|
|
@@ -80,6 +68,10 @@ async def receive_transcriptions(websocket):
|
|
| 80 |
except Exception as e:
|
| 81 |
logger.error(f"Receive transcription error: {e}")
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
if __name__ == "__main__":
|
| 84 |
asyncio.run(send_receive())
|
| 85 |
|
|
|
|
| 1 |
import asyncio
|
| 2 |
import io
|
| 3 |
+
import json
|
| 4 |
|
| 5 |
import numpy as np
|
| 6 |
import websockets
|
|
|
|
| 9 |
import wave
|
| 10 |
import logging
|
| 11 |
import sys
|
| 12 |
+
import sounddevice as sd
|
| 13 |
+
|
| 14 |
|
| 15 |
# Parameters for reading and sending the audio
|
| 16 |
#AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav" # Use WAV file
|
|
|
|
| 38 |
await asyncio.gather(send_task, receive_task)
|
| 39 |
except Exception as e:
|
| 40 |
logger.error(f"WebSocket connection error: {e}")
|
| 41 |
+
max_size_bytes = 50_000_000 # 10 MB
|
| 42 |
|
| 43 |
+
SAMPLE_RATE = 16000
|
| 44 |
+
CHUNK_SIZE =1024
|
|
|
|
| 45 |
|
| 46 |
+
async def send_audio_chunks(websocket):
|
| 47 |
+
"""Capture audio and send chunks to the server via WebSocket."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
+
def audio_callback(indata, frames, time, status):
|
| 50 |
+
"""Callback function called when new audio is available."""
|
| 51 |
+
# Convert the audio input to a JSON-serializable format (e.g., list of samples)
|
| 52 |
+
audio_chunk = indata[:, 0].tolist() # Use only the first channel
|
| 53 |
+
asyncio.run_coroutine_threadsafe(
|
| 54 |
+
websocket.send(json.dumps(audio_chunk)), asyncio.get_event_loop()
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
# Start the audio stream
|
| 58 |
+
with sd.InputStream(callback=audio_callback, channels=1, samplerate=SAMPLE_RATE, blocksize=CHUNK_SIZE):
|
| 59 |
+
await asyncio.Future() # Keep the stream open and running
|
| 60 |
|
|
|
|
|
|
|
| 61 |
|
| 62 |
async def receive_transcriptions(websocket):
|
| 63 |
try:
|
|
|
|
| 68 |
except Exception as e:
|
| 69 |
logger.error(f"Receive transcription error: {e}")
|
| 70 |
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
|
| 75 |
if __name__ == "__main__":
|
| 76 |
asyncio.run(send_receive())
|
| 77 |
|
infer.py
CHANGED
|
@@ -162,23 +162,21 @@ async def process_audio_stream(websocket: WebSocket):
|
|
| 162 |
|
| 163 |
audio_chunk = process_received_audio(data)
|
| 164 |
#logger.debug(f"Processed audio chunk {chunk_counter}: {len(audio_chunk)} samples")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
-
audio_buffer = np.concatenate((audio_buffer, audio_chunk))
|
| 167 |
#logger.debug(f"Audio buffer size: {len(audio_buffer)} samples")
|
| 168 |
except Exception as e:
|
| 169 |
logger.error(f"Error receiving data: {e}")
|
| 170 |
break
|
| 171 |
|
| 172 |
-
|
| 173 |
-
if len(audio_buffer) >= min_chunk_size * sampling_rate:
|
| 174 |
-
if transcription_task is None or transcription_task.done():
|
| 175 |
-
# Start a new transcription task
|
| 176 |
-
#logger.info(f"Starting transcription task for {len(audio_buffer)} samples")
|
| 177 |
-
transcription_task = asyncio.create_task(
|
| 178 |
-
transcribe_and_send(websocket, audio_buffer.copy())
|
| 179 |
-
)
|
| 180 |
-
audio_buffer = np.array([], dtype=np.float32)
|
| 181 |
-
#logger.debug("Audio buffer reset after starting transcription task")
|
| 182 |
|
| 183 |
async def transcribe_and_send(websocket: WebSocket, audio_data):
|
| 184 |
"""Run transcription in a separate thread and send the result to the client."""
|
|
|
|
| 162 |
|
| 163 |
audio_chunk = process_received_audio(data)
|
| 164 |
#logger.debug(f"Processed audio chunk {chunk_counter}: {len(audio_chunk)} samples")
|
| 165 |
+
# Check if enough audio has been buffered
|
| 166 |
+
if len(audio_buffer) >= min_chunk_size * sampling_rate:
|
| 167 |
+
if transcription_task is None or transcription_task.done():
|
| 168 |
+
# Start a new transcription task
|
| 169 |
+
# logger.info(f"Starting transcription task for {len(audio_buffer)} samples")
|
| 170 |
+
transcription_task = asyncio.create_task(
|
| 171 |
+
transcribe_and_send(websocket, audio_chunk)
|
| 172 |
+
)
|
| 173 |
|
|
|
|
| 174 |
#logger.debug(f"Audio buffer size: {len(audio_buffer)} samples")
|
| 175 |
except Exception as e:
|
| 176 |
logger.error(f"Error receiving data: {e}")
|
| 177 |
break
|
| 178 |
|
| 179 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
async def transcribe_and_send(websocket: WebSocket, audio_data):
|
| 182 |
"""Run transcription in a separate thread and send the result to the client."""
|
poetry.lock
CHANGED
|
@@ -2935,6 +2935,26 @@ files = [
|
|
| 2935 |
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
|
| 2936 |
]
|
| 2937 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2938 |
[[package]]
|
| 2939 |
name = "soundfile"
|
| 2940 |
version = "0.12.1"
|
|
@@ -3842,4 +3862,4 @@ type = ["pytest-mypy"]
|
|
| 3842 |
[metadata]
|
| 3843 |
lock-version = "2.0"
|
| 3844 |
python-versions = "3.9.1"
|
| 3845 |
-
content-hash = "
|
|
|
|
| 2935 |
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
|
| 2936 |
]
|
| 2937 |
|
| 2938 |
+
[[package]]
|
| 2939 |
+
name = "sounddevice"
|
| 2940 |
+
version = "0.5.0"
|
| 2941 |
+
description = "Play and Record Sound with Python"
|
| 2942 |
+
optional = false
|
| 2943 |
+
python-versions = ">=3.7"
|
| 2944 |
+
files = [
|
| 2945 |
+
{file = "sounddevice-0.5.0-py3-none-any.whl", hash = "sha256:8a734043ab1f751cb20f6f25d8f07408a1aadf2eeca923061849d38bb59f9e3d"},
|
| 2946 |
+
{file = "sounddevice-0.5.0-py3-none-macosx_10_6_x86_64.macosx_10_6_universal2.whl", hash = "sha256:73eb7cb1e8ab1e1ba09c228239e9d0b160006de380921687e44610ad9a19ac32"},
|
| 2947 |
+
{file = "sounddevice-0.5.0-py3-none-win32.whl", hash = "sha256:919de43040e8737258370ddf929a9cd1a3d6c493ca173bab70a3c7cb15c71e97"},
|
| 2948 |
+
{file = "sounddevice-0.5.0-py3-none-win_amd64.whl", hash = "sha256:f28b7ef16f293d7b048a614dd087dfe39c3e313d94a50539bb52022b7ef27ece"},
|
| 2949 |
+
{file = "sounddevice-0.5.0.tar.gz", hash = "sha256:0de95277654b3d403d9c15ded3c6cedf307e9b27cc9ce7bd995a2891d0c955af"},
|
| 2950 |
+
]
|
| 2951 |
+
|
| 2952 |
+
[package.dependencies]
|
| 2953 |
+
CFFI = ">=1.0"
|
| 2954 |
+
|
| 2955 |
+
[package.extras]
|
| 2956 |
+
numpy = ["NumPy"]
|
| 2957 |
+
|
| 2958 |
[[package]]
|
| 2959 |
name = "soundfile"
|
| 2960 |
version = "0.12.1"
|
|
|
|
| 3862 |
[metadata]
|
| 3863 |
lock-version = "2.0"
|
| 3864 |
python-versions = "3.9.1"
|
| 3865 |
+
content-hash = "8b654ee2a2cc97497e78fbe0de6258f3fb006e3f9bbe7234f800843f66adcb7b"
|
pyproject.toml
CHANGED
|
@@ -23,6 +23,7 @@ soundfile = "^0.12.1"
|
|
| 23 |
openai = "^1.42.0"
|
| 24 |
numpy = "^1.22.0"
|
| 25 |
torch = "2.1.0"
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
|
|
|
|
| 23 |
openai = "^1.42.0"
|
| 24 |
numpy = "^1.22.0"
|
| 25 |
torch = "2.1.0"
|
| 26 |
+
sounddevice = "^0.5.0"
|
| 27 |
|
| 28 |
|
| 29 |
|