Refactor tool-speechtotext: extract sttlib shared library and add tests
Extract duplicated code (Whisper loading, audio recording, transcription, VAD processing) into reusable sttlib/ package. Rewrite all 3 scripts as thin wrappers. Add 24 unit tests with mocked hardware. Fix GPU fallback bug in assistant.py and args.system assignment bug.
This commit is contained in:
78
python/tool-speechtotext/tests/test_transcription.py
Normal file
78
python/tool-speechtotext/tests/test_transcription.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from unittest.mock import MagicMock
|
||||
from sttlib.transcription import transcribe, is_hallucination
|
||||
|
||||
|
||||
# --- is_hallucination tests ---
|
||||
|
||||
def test_known_hallucinations():
|
||||
assert is_hallucination("Thank you")
|
||||
assert is_hallucination("thanks for watching")
|
||||
assert is_hallucination("Subscribe")
|
||||
assert is_hallucination("the end")
|
||||
|
||||
|
||||
def test_short_text():
|
||||
assert is_hallucination("hi")
|
||||
assert is_hallucination("")
|
||||
assert is_hallucination("a")
|
||||
|
||||
|
||||
def test_normal_text():
|
||||
assert not is_hallucination("Hello how are you")
|
||||
assert not is_hallucination("Please open the terminal")
|
||||
|
||||
|
||||
def test_case_insensitivity():
|
||||
assert is_hallucination("THANK YOU")
|
||||
assert is_hallucination("Thank You For Watching")
|
||||
|
||||
|
||||
def test_substring_match():
|
||||
assert is_hallucination("I want to subscribe to your channel")
|
||||
|
||||
|
||||
def test_exactly_three_chars():
|
||||
assert not is_hallucination("hey")
|
||||
|
||||
|
||||
# --- transcribe tests ---
|
||||
|
||||
def _make_segment(text):
|
||||
seg = MagicMock()
|
||||
seg.text = text
|
||||
return seg
|
||||
|
||||
|
||||
def test_transcribe_joins_segments():
|
||||
model = MagicMock()
|
||||
model.transcribe.return_value = (
|
||||
[_make_segment("Hello "), _make_segment("world")],
|
||||
None,
|
||||
)
|
||||
result = transcribe(model, MagicMock())
|
||||
assert result == "Hello world"
|
||||
|
||||
|
||||
def test_transcribe_empty():
|
||||
model = MagicMock()
|
||||
model.transcribe.return_value = ([], None)
|
||||
result = transcribe(model, MagicMock())
|
||||
assert result == ""
|
||||
|
||||
|
||||
def test_transcribe_strips_whitespace():
|
||||
model = MagicMock()
|
||||
model.transcribe.return_value = (
|
||||
[_make_segment(" hello ")],
|
||||
None,
|
||||
)
|
||||
result = transcribe(model, MagicMock())
|
||||
assert result == "hello"
|
||||
|
||||
|
||||
def test_transcribe_passes_beam_size():
|
||||
model = MagicMock()
|
||||
model.transcribe.return_value = ([], None)
|
||||
audio = MagicMock()
|
||||
transcribe(model, audio)
|
||||
model.transcribe.assert_called_once_with(audio, beam_size=5)
|
||||
Reference in New Issue
Block a user