Spaces:

Gigaverse
/

ivrit-ai-streaming

Sleeping

App Files Files Community

AshDavid12 commited on Sep 16, 2024

Commit

a9516a4

1 Parent(s): e9d738a

no buffer

Browse files

Files changed (4) hide show

client.py +23 -31
infer.py +9 -11
poetry.lock +21 -1
pyproject.toml +1 -0

client.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import asyncio
 import io
 import numpy as np
 import websockets
@@ -8,6 +9,8 @@ import ssl
 import wave
 import logging
 import sys
 # Parameters for reading and sending the audio
 #AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav"  # Use WAV file
@@ -35,41 +38,26 @@ async def send_receive():
             await asyncio.gather(send_task, receive_task)
     except Exception as e:
         logger.error(f"WebSocket connection error: {e}")
-async def send_audio(websocket):
-    wav_file = AUDIO_FILE_URL  # Replace with the path to your WAV file
-    logger.info(f"Opening WAV file: {wav_file}")
-    try:
-        # Download the WAV file
-        response = requests.get(wav_file)
-        response.raise_for_status()
-        wav_bytes = io.BytesIO(response.content)
-        # Send audio data in chunks directly from the WAV file
-        chunk_size = 1024  # Sending data in chunks of 3200 bytes, which can be adjusted
-        total_chunks = 0
-        total_bytes_sent = 0
-        # While loop to send audio data chunk by chunk
-        while True:
-            chunk = wav_bytes.read(chunk_size)
-            if not chunk:
-                break
-            await websocket.send(chunk)
-            total_chunks += 1
-            total_bytes_sent += len(chunk)
-            #logger.debug(f"Sent chunk {total_chunks}: {len(chunk)} bytes")
-            #await asyncio.sleep(0.1)  # Simulate real-time streamin
-            #logger.info(f"Finished sending audio data: {total_chunks} chunks sent, total bytes sent: {total_bytes_sent}")
-    except Exception as e:
-        logger.error(f"Send audio error: {e}")
-    finally:
-        logger.info("WAV file closed")
 async def receive_transcriptions(websocket):
     try:
@@ -80,6 +68,10 @@ async def receive_transcriptions(websocket):
     except Exception as e:
         logger.error(f"Receive transcription error: {e}")
 if __name__ == "__main__":
     asyncio.run(send_receive())

 import asyncio
 import io
+import json
 import numpy as np
 import websockets
 import wave
 import logging
 import sys
+import sounddevice as sd
 # Parameters for reading and sending the audio
 #AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav"  # Use WAV file
             await asyncio.gather(send_task, receive_task)
     except Exception as e:
         logger.error(f"WebSocket connection error: {e}")
+max_size_bytes = 50_000_000  # 10 MB
+SAMPLE_RATE = 16000
+CHUNK_SIZE =1024
+async def send_audio_chunks(websocket):
+    """Capture audio and send chunks to the server via WebSocket."""
+    def audio_callback(indata, frames, time, status):
+        """Callback function called when new audio is available."""
+        # Convert the audio input to a JSON-serializable format (e.g., list of samples)
+        audio_chunk = indata[:, 0].tolist()  # Use only the first channel
+        asyncio.run_coroutine_threadsafe(
+            websocket.send(json.dumps(audio_chunk)), asyncio.get_event_loop()
+        )
+    # Start the audio stream
+    with sd.InputStream(callback=audio_callback, channels=1, samplerate=SAMPLE_RATE, blocksize=CHUNK_SIZE):
+        await asyncio.Future()  # Keep the stream open and running
 async def receive_transcriptions(websocket):
     try:
     except Exception as e:
         logger.error(f"Receive transcription error: {e}")
 if __name__ == "__main__":
     asyncio.run(send_receive())

infer.py CHANGED Viewed

@@ -162,23 +162,21 @@ async def process_audio_stream(websocket: WebSocket):
             audio_chunk = process_received_audio(data)
             #logger.debug(f"Processed audio chunk {chunk_counter}: {len(audio_chunk)} samples")
-            audio_buffer = np.concatenate((audio_buffer, audio_chunk))
             #logger.debug(f"Audio buffer size: {len(audio_buffer)} samples")
         except Exception as e:
             logger.error(f"Error receiving data: {e}")
             break
-        # Check if enough audio has been buffered
-        if len(audio_buffer) >= min_chunk_size * sampling_rate:
-            if transcription_task is None or transcription_task.done():
-                # Start a new transcription task
-                #logger.info(f"Starting transcription task for {len(audio_buffer)} samples")
-                transcription_task = asyncio.create_task(
-                    transcribe_and_send(websocket, audio_buffer.copy())
-                )
-                audio_buffer = np.array([], dtype=np.float32)
-                #logger.debug("Audio buffer reset after starting transcription task")
 async def transcribe_and_send(websocket: WebSocket, audio_data):
     """Run transcription in a separate thread and send the result to the client."""

             audio_chunk = process_received_audio(data)
             #logger.debug(f"Processed audio chunk {chunk_counter}: {len(audio_chunk)} samples")
+            # Check if enough audio has been buffered
+            if len(audio_buffer) >= min_chunk_size * sampling_rate:
+                if transcription_task is None or transcription_task.done():
+                    # Start a new transcription task
+                    # logger.info(f"Starting transcription task for {len(audio_buffer)} samples")
+                    transcription_task = asyncio.create_task(
+                        transcribe_and_send(websocket, audio_chunk)
+                    )
             #logger.debug(f"Audio buffer size: {len(audio_buffer)} samples")
         except Exception as e:
             logger.error(f"Error receiving data: {e}")
             break
 async def transcribe_and_send(websocket: WebSocket, audio_data):
     """Run transcription in a separate thread and send the result to the client."""

poetry.lock CHANGED Viewed

@@ -2935,6 +2935,26 @@ files = [
     {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
 ]
 [[package]]
 name = "soundfile"
 version = "0.12.1"
@@ -3842,4 +3862,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = "3.9.1"
-content-hash = "7e3bbbe5cc618ae8b5762bdf1991ca224636038b44b6b425c66ea3f5ec0f15af"

     {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
 ]
+[[package]]
+name = "sounddevice"
+version = "0.5.0"
+description = "Play and Record Sound with Python"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "sounddevice-0.5.0-py3-none-any.whl", hash = "sha256:8a734043ab1f751cb20f6f25d8f07408a1aadf2eeca923061849d38bb59f9e3d"},
+    {file = "sounddevice-0.5.0-py3-none-macosx_10_6_x86_64.macosx_10_6_universal2.whl", hash = "sha256:73eb7cb1e8ab1e1ba09c228239e9d0b160006de380921687e44610ad9a19ac32"},
+    {file = "sounddevice-0.5.0-py3-none-win32.whl", hash = "sha256:919de43040e8737258370ddf929a9cd1a3d6c493ca173bab70a3c7cb15c71e97"},
+    {file = "sounddevice-0.5.0-py3-none-win_amd64.whl", hash = "sha256:f28b7ef16f293d7b048a614dd087dfe39c3e313d94a50539bb52022b7ef27ece"},
+    {file = "sounddevice-0.5.0.tar.gz", hash = "sha256:0de95277654b3d403d9c15ded3c6cedf307e9b27cc9ce7bd995a2891d0c955af"},
+]
+[package.dependencies]
+CFFI = ">=1.0"
+[package.extras]
+numpy = ["NumPy"]
 [[package]]
 name = "soundfile"
 version = "0.12.1"
 [metadata]
 lock-version = "2.0"
 python-versions = "3.9.1"
+content-hash = "8b654ee2a2cc97497e78fbe0de6258f3fb006e3f9bbe7234f800843f66adcb7b"

pyproject.toml CHANGED Viewed

@@ -23,6 +23,7 @@ soundfile = "^0.12.1"
 openai = "^1.42.0"
 numpy = "^1.22.0"
 torch = "2.1.0"

 openai = "^1.42.0"
 numpy = "^1.22.0"
 torch = "2.1.0"
+sounddevice = "^0.5.0"