Files
Code/python/tool-speechtotext/tests/test_transcription.py
local 104da381fb Refactor tool-speechtotext: extract sttlib shared library and add tests
Extract duplicated code (Whisper loading, audio recording, transcription,
VAD processing) into reusable sttlib/ package. Rewrite all 3 scripts as
thin wrappers. Add 24 unit tests with mocked hardware. Fix GPU fallback
bug in assistant.py and args.system assignment bug.
2026-02-08 00:40:31 +00:00

79 lines
1.9 KiB
Python

from unittest.mock import MagicMock
from sttlib.transcription import transcribe, is_hallucination
# --- is_hallucination tests ---
def test_known_hallucinations():
assert is_hallucination("Thank you")
assert is_hallucination("thanks for watching")
assert is_hallucination("Subscribe")
assert is_hallucination("the end")
def test_short_text():
assert is_hallucination("hi")
assert is_hallucination("")
assert is_hallucination("a")
def test_normal_text():
assert not is_hallucination("Hello how are you")
assert not is_hallucination("Please open the terminal")
def test_case_insensitivity():
assert is_hallucination("THANK YOU")
assert is_hallucination("Thank You For Watching")
def test_substring_match():
assert is_hallucination("I want to subscribe to your channel")
def test_exactly_three_chars():
assert not is_hallucination("hey")
# --- transcribe tests ---
def _make_segment(text):
seg = MagicMock()
seg.text = text
return seg
def test_transcribe_joins_segments():
model = MagicMock()
model.transcribe.return_value = (
[_make_segment("Hello "), _make_segment("world")],
None,
)
result = transcribe(model, MagicMock())
assert result == "Hello world"
def test_transcribe_empty():
model = MagicMock()
model.transcribe.return_value = ([], None)
result = transcribe(model, MagicMock())
assert result == ""
def test_transcribe_strips_whitespace():
model = MagicMock()
model.transcribe.return_value = (
[_make_segment(" hello ")],
None,
)
result = transcribe(model, MagicMock())
assert result == "hello"
def test_transcribe_passes_beam_size():
model = MagicMock()
model.transcribe.return_value = ([], None)
audio = MagicMock()
transcribe(model, audio)
model.transcribe.assert_called_once_with(audio, beam_size=5)