Refactor tool-speechtotext: extract sttlib shared library and add tests
Extract duplicated code (Whisper loading, audio recording, transcription, VAD processing) into reusable sttlib/ package. Rewrite all 3 scripts as thin wrappers. Add 24 unit tests with mocked hardware. Fix GPU fallback bug in assistant.py and args.system assignment bug.
This commit is contained in:
15
python/tool-speechtotext/sttlib/whisper_loader.py
Normal file
15
python/tool-speechtotext/sttlib/whisper_loader.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import os
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
os.environ["CT2_CUDA_ALLOW_FP16"] = "1"
|
||||
|
||||
|
||||
def load_whisper_model(model_size):
|
||||
"""Load Whisper with GPU (cuda/float16) -> CPU (cpu/int8) fallback."""
|
||||
print(f"Loading Whisper model ({model_size})...")
|
||||
try:
|
||||
return WhisperModel(model_size, device="cuda", compute_type="float16")
|
||||
except Exception as e:
|
||||
print(f"GPU loading failed: {e}")
|
||||
print("Falling back to CPU (int8)")
|
||||
return WhisperModel(model_size, device="cpu", compute_type="int8")
|
||||
Reference in New Issue
Block a user