Refactor tool-speechtotext: extract sttlib shared library and add tests

Extract duplicated code (Whisper loading, audio recording, transcription,
VAD processing) into reusable sttlib/ package. Rewrite all 3 scripts as
thin wrappers. Add 24 unit tests with mocked hardware. Fix GPU fallback
bug in assistant.py and args.system assignment bug.
This commit is contained in:
local
2026-02-08 00:40:31 +00:00
parent 848681087e
commit 104da381fb
15 changed files with 480 additions and 195 deletions

View File

@@ -0,0 +1,28 @@
import sys
import numpy as np
import sounddevice as sd
def record_until_enter(sample_rate=16000):
"""Record audio until user presses Enter. Returns float32 numpy array."""
print("\n[READY] Press Enter to START recording...")
input()
print("[RECORDING] Press Enter to STOP...")
recording = []
def callback(indata, frames, time, status):
if status:
print(status, file=sys.stderr)
recording.append(indata.copy())
with sd.InputStream(samplerate=sample_rate, channels=1, callback=callback):
input()
return np.concatenate(recording, axis=0)
def pcm_bytes_to_float32(pcm_bytes):
"""Convert raw 16-bit PCM bytes to float32 array normalized to [-1, 1]."""
audio_int16 = np.frombuffer(pcm_bytes, dtype=np.int16)
return audio_int16.astype(np.float32) / 32768.0