Extract duplicated code (Whisper loading, audio recording, transcription, VAD processing) into reusable sttlib/ package. Rewrite all 3 scripts as thin wrappers. Add 24 unit tests with mocked hardware. Fix GPU fallback bug in assistant.py and args.system assignment bug.
29 lines
838 B
Python
29 lines
838 B
Python
import sys
|
|
import numpy as np
|
|
import sounddevice as sd
|
|
|
|
|
|
def record_until_enter(sample_rate=16000):
|
|
"""Record audio until user presses Enter. Returns float32 numpy array."""
|
|
print("\n[READY] Press Enter to START recording...")
|
|
input()
|
|
print("[RECORDING] Press Enter to STOP...")
|
|
|
|
recording = []
|
|
|
|
def callback(indata, frames, time, status):
|
|
if status:
|
|
print(status, file=sys.stderr)
|
|
recording.append(indata.copy())
|
|
|
|
with sd.InputStream(samplerate=sample_rate, channels=1, callback=callback):
|
|
input()
|
|
|
|
return np.concatenate(recording, axis=0)
|
|
|
|
|
|
def pcm_bytes_to_float32(pcm_bytes):
|
|
"""Convert raw 16-bit PCM bytes to float32 array normalized to [-1, 1]."""
|
|
audio_int16 = np.frombuffer(pcm_bytes, dtype=np.int16)
|
|
return audio_int16.astype(np.float32) / 32768.0
|