import os from faster_whisper import WhisperModel os.environ["CT2_CUDA_ALLOW_FP16"] = "1" def load_whisper_model(model_size): """Load Whisper with GPU (cuda/float16) -> CPU (cpu/int8) fallback.""" print(f"Loading Whisper model ({model_size})...") try: return WhisperModel(model_size, device="cuda", compute_type="float16") except Exception as e: print(f"GPU loading failed: {e}") print("Falling back to CPU (int8)") return WhisperModel(model_size, device="cpu", compute_type="int8")