from unittest.mock import patch, MagicMock from sttlib.vad import VADProcessor, FRAME_DURATION_MS, MIN_UTTERANCE_FRAMES def _make_vad_processor(aggressiveness=3, silence_threshold=0.8): """Create VADProcessor with a mocked webrtcvad.Vad.""" with patch("sttlib.vad.webrtcvad.Vad") as mock_vad_cls: mock_vad = MagicMock() mock_vad_cls.return_value = mock_vad proc = VADProcessor(aggressiveness, silence_threshold) return proc, mock_vad def _frame(label="x"): """Return a fake 30ms frame (just needs to be distinct bytes).""" return label.encode() * 960 # 480 samples * 2 bytes def test_no_speech_returns_none(): proc, mock_vad = _make_vad_processor() mock_vad.is_speech.return_value = False for _ in range(100): assert proc.process_frame(_frame()) is None def test_speech_then_silence_triggers_utterance(): proc, mock_vad = _make_vad_processor(silence_threshold=0.3) # Feed enough speech frames speech_count = MIN_UTTERANCE_FRAMES + 5 mock_vad.is_speech.return_value = True for _ in range(speech_count): result = proc.process_frame(_frame("s")) assert result is None # not done yet # Feed silence frames until threshold (0.3s = 10 frames at 30ms) mock_vad.is_speech.return_value = False result = None for _ in range(20): result = proc.process_frame(_frame("q")) if result is not None: break assert result is not None assert len(result) > 0 def test_short_utterance_filtered(): # Use very short silence threshold so silence frames don't push total # past MIN_UTTERANCE_FRAMES. With threshold=0.09s (3 frames of silence): # 0 pre-buffer + 1 speech + 3 silence = 4 total < MIN_UTTERANCE_FRAMES (10) proc, mock_vad = _make_vad_processor(silence_threshold=0.09) # Single speech frame triggers VAD mock_vad.is_speech.return_value = True proc.process_frame(_frame("s")) # Immediately go silent — threshold reached in 3 frames mock_vad.is_speech.return_value = False result = None for _ in range(20): result = proc.process_frame(_frame("q")) if result is not None: break # Should be filtered (too short — only 4 total frames) assert result is None def test_pre_buffer_included(): proc, mock_vad = _make_vad_processor(silence_threshold=0.3) # Fill pre-buffer with non-speech frames mock_vad.is_speech.return_value = False pre_frame = _frame("p") for _ in range(10): proc.process_frame(pre_frame) # Speech starts mock_vad.is_speech.return_value = True speech_frame = _frame("s") for _ in range(MIN_UTTERANCE_FRAMES): proc.process_frame(speech_frame) # Silence to trigger mock_vad.is_speech.return_value = False result = None for _ in range(20): result = proc.process_frame(_frame("q")) if result is not None: break assert result is not None # Result should contain pre-buffer frames assert pre_frame in result def test_reset_after_utterance(): proc, mock_vad = _make_vad_processor(silence_threshold=0.3) # First utterance mock_vad.is_speech.return_value = True for _ in range(MIN_UTTERANCE_FRAMES + 5): proc.process_frame(_frame("s")) mock_vad.is_speech.return_value = False for _ in range(20): result = proc.process_frame(_frame("q")) if result is not None: break assert result is not None # After reset, should be able to collect a second utterance assert not proc.triggered assert proc.utterance_frames == [] mock_vad.is_speech.return_value = True for _ in range(MIN_UTTERANCE_FRAMES + 5): proc.process_frame(_frame("s")) mock_vad.is_speech.return_value = False result2 = None for _ in range(20): result2 = proc.process_frame(_frame("q")) if result2 is not None: break assert result2 is not None def test_silence_threshold_boundary(): # Use 0.3s threshold: 0.3 / 0.03 = exactly 10 frames needed threshold = 0.3 proc, mock_vad = _make_vad_processor(silence_threshold=threshold) # Start with speech mock_vad.is_speech.return_value = True for _ in range(MIN_UTTERANCE_FRAMES + 5): proc.process_frame(_frame("s")) frames_needed = 10 # 0.3s / 0.03s per frame mock_vad.is_speech.return_value = False # Feed one less than needed — should NOT trigger for i in range(frames_needed - 1): result = proc.process_frame(_frame("q")) assert result is None, f"Triggered too early at frame {i}" # The 10th frame should trigger (silence_duration = 0.3 >= 0.3) result = proc.process_frame(_frame("q")) assert result is not None