Code/python/tool-speechtotext/sttlib/whisper_loader.py

import os
from faster_whisper import WhisperModel

os.environ["CT2_CUDA_ALLOW_FP16"] = "1"


def load_whisper_model(model_size):
    """Load Whisper with GPU (cuda/float16) -> CPU (cpu/int8) fallback."""
    print(f"Loading Whisper model ({model_size})...")
    try:
        return WhisperModel(model_size, device="cuda", compute_type="float16")
    except Exception as e:
        print(f"GPU loading failed: {e}")
        print("Falling back to CPU (int8)")
        return WhisperModel(model_size, device="cpu", compute_type="int8")