Refactor tool-speechtotext: extract sttlib shared library and add tests

Extract duplicated code (Whisper loading, audio recording, transcription, VAD processing) into reusable sttlib/ package. Rewrite all 3 scripts as thin wrappers. Add 24 unit tests with mocked hardware. Fix GPU fallback bug in assistant.py and args.system assignment bug.
2026-02-08 00:40:31 +00:00
parent 848681087e
commit 104da381fb
15 changed files with 480 additions and 195 deletions
--- a/python/tool-speechtotext/tests/test_audio.py
+++ b/python/tool-speechtotext/tests/test_audio.py
@@ -0,0 +1,38 @@
+import struct
+import numpy as np
+from sttlib.audio import pcm_bytes_to_float32
+
+
+def test_known_value():
+    # 16384 in int16 -> 0.5 in float32
+    pcm = struct.pack("<h", 16384)
+    result = pcm_bytes_to_float32(pcm)
+    assert abs(result[0] - 0.5) < 1e-5
+
+
+def test_silence():
+    pcm = b"\x00\x00" * 10
+    result = pcm_bytes_to_float32(pcm)
+    assert np.all(result == 0.0)
+
+
+def test_full_scale():
+    # max int16 = 32767 -> ~1.0
+    pcm = struct.pack("<h", 32767)
+    result = pcm_bytes_to_float32(pcm)
+    assert abs(result[0] - (32767 / 32768.0)) < 1e-5
+
+
+def test_negative():
+    # min int16 = -32768 -> -1.0
+    pcm = struct.pack("<h", -32768)
+    result = pcm_bytes_to_float32(pcm)
+    assert result[0] == -1.0
+
+
+def test_round_trip_shape():
+    # 100 samples worth of bytes
+    pcm = b"\x00\x00" * 100
+    result = pcm_bytes_to_float32(pcm)
+    assert result.shape == (100,)
+    assert result.dtype == np.float32