Extract duplicated code (Whisper loading, audio recording, transcription, VAD processing) into reusable sttlib/ package. Rewrite all 3 scripts as thin wrappers. Add 24 unit tests with mocked hardware. Fix GPU fallback bug in assistant.py and args.system assignment bug.
79 lines
1.9 KiB
Python
79 lines
1.9 KiB
Python
from unittest.mock import MagicMock
|
|
from sttlib.transcription import transcribe, is_hallucination
|
|
|
|
|
|
# --- is_hallucination tests ---
|
|
|
|
def test_known_hallucinations():
|
|
assert is_hallucination("Thank you")
|
|
assert is_hallucination("thanks for watching")
|
|
assert is_hallucination("Subscribe")
|
|
assert is_hallucination("the end")
|
|
|
|
|
|
def test_short_text():
|
|
assert is_hallucination("hi")
|
|
assert is_hallucination("")
|
|
assert is_hallucination("a")
|
|
|
|
|
|
def test_normal_text():
|
|
assert not is_hallucination("Hello how are you")
|
|
assert not is_hallucination("Please open the terminal")
|
|
|
|
|
|
def test_case_insensitivity():
|
|
assert is_hallucination("THANK YOU")
|
|
assert is_hallucination("Thank You For Watching")
|
|
|
|
|
|
def test_substring_match():
|
|
assert is_hallucination("I want to subscribe to your channel")
|
|
|
|
|
|
def test_exactly_three_chars():
|
|
assert not is_hallucination("hey")
|
|
|
|
|
|
# --- transcribe tests ---
|
|
|
|
def _make_segment(text):
|
|
seg = MagicMock()
|
|
seg.text = text
|
|
return seg
|
|
|
|
|
|
def test_transcribe_joins_segments():
|
|
model = MagicMock()
|
|
model.transcribe.return_value = (
|
|
[_make_segment("Hello "), _make_segment("world")],
|
|
None,
|
|
)
|
|
result = transcribe(model, MagicMock())
|
|
assert result == "Hello world"
|
|
|
|
|
|
def test_transcribe_empty():
|
|
model = MagicMock()
|
|
model.transcribe.return_value = ([], None)
|
|
result = transcribe(model, MagicMock())
|
|
assert result == ""
|
|
|
|
|
|
def test_transcribe_strips_whitespace():
|
|
model = MagicMock()
|
|
model.transcribe.return_value = (
|
|
[_make_segment(" hello ")],
|
|
None,
|
|
)
|
|
result = transcribe(model, MagicMock())
|
|
assert result == "hello"
|
|
|
|
|
|
def test_transcribe_passes_beam_size():
|
|
model = MagicMock()
|
|
model.transcribe.return_value = ([], None)
|
|
audio = MagicMock()
|
|
transcribe(model, audio)
|
|
model.transcribe.assert_called_once_with(audio, beam_size=5)
|