Extract duplicated code (Whisper loading, audio recording, transcription, VAD processing) into reusable sttlib/ package. Rewrite all 3 scripts as thin wrappers. Add 24 unit tests with mocked hardware. Fix GPU fallback bug in assistant.py and args.system assignment bug.
20 lines
646 B
Python
20 lines
646 B
Python
HALLUCINATION_PATTERNS = [
|
|
"thank you", "thanks for watching", "subscribe",
|
|
"bye", "the end", "thank you for watching",
|
|
"please subscribe", "like and subscribe",
|
|
]
|
|
|
|
|
|
def transcribe(model, audio_float32):
|
|
"""Transcribe audio using Whisper. Returns stripped text."""
|
|
segments, _ = model.transcribe(audio_float32, beam_size=5)
|
|
return "".join(segment.text for segment in segments).strip()
|
|
|
|
|
|
def is_hallucination(text):
|
|
"""Return True if text looks like a Whisper hallucination."""
|
|
lowered = text.lower().strip()
|
|
if len(lowered) < 3:
|
|
return True
|
|
return any(p in lowered for p in HALLUCINATION_PATTERNS)
|