Code/python/tool-speechtotext/tests/test_transcription.py

from unittest.mock import MagicMock
from sttlib.transcription import transcribe, is_hallucination


# --- is_hallucination tests ---

def test_known_hallucinations():
    assert is_hallucination("Thank you")
    assert is_hallucination("thanks for watching")
    assert is_hallucination("Subscribe")
    assert is_hallucination("the end")


def test_short_text():
    assert is_hallucination("hi")
    assert is_hallucination("")
    assert is_hallucination("a")


def test_normal_text():
    assert not is_hallucination("Hello how are you")
    assert not is_hallucination("Please open the terminal")


def test_case_insensitivity():
    assert is_hallucination("THANK YOU")
    assert is_hallucination("Thank You For Watching")


def test_substring_match():
    assert is_hallucination("I want to subscribe to your channel")


def test_exactly_three_chars():
    assert not is_hallucination("hey")


# --- transcribe tests ---

def _make_segment(text):
    seg = MagicMock()
    seg.text = text
    return seg


def test_transcribe_joins_segments():
    model = MagicMock()
    model.transcribe.return_value = (
        [_make_segment("Hello "), _make_segment("world")],
        None,
    )
    result = transcribe(model, MagicMock())
    assert result == "Hello world"


def test_transcribe_empty():
    model = MagicMock()
    model.transcribe.return_value = ([], None)
    result = transcribe(model, MagicMock())
    assert result == ""


def test_transcribe_strips_whitespace():
    model = MagicMock()
    model.transcribe.return_value = (
        [_make_segment("  hello  ")],
        None,
    )
    result = transcribe(model, MagicMock())
    assert result == "hello"


def test_transcribe_passes_beam_size():
    model = MagicMock()
    model.transcribe.return_value = ([], None)
    audio = MagicMock()
    transcribe(model, audio)
    model.transcribe.assert_called_once_with(audio, beam_size=5)