v2 - works

amazing made a screecast out of this
This commit is contained in:
dl92
2026-01-13 18:01:38 +00:00
parent 33248895ff
commit f09b390d90

View File

@@ -2,25 +2,38 @@ import sounddevice as sd
import numpy as np import numpy as np
import pyperclip import pyperclip
import requests import requests
import sys
from faster_whisper import WhisperModel from faster_whisper import WhisperModel
# Configuration import os
MODEL_SIZE = "base" # "base" is fast, "small" is more accurate os.environ["CT2_CUDA_ALLOW_FP16"] = "1"
OLLAMA_URL = "http://localhost:11435/api/generate"
OLLAMA_MODEL = "llama3" # --- Configuration ---
MODEL_SIZE = "medium" # Options: "base", "small", "medium", "large-v3"
OLLAMA_URL = "http://localhost:11434/api/generate" # Default is 11434
OLLAMA_MODEL = "qwen3:latest"
# Load Whisper on GPU # Load Whisper on GPU
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="default") # float16 is faster and uses less VRAM on NVIDIA cards
print("Loading Whisper model...")
try:
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="float16")
except Exception as e:
print(f"Error loading GPU: {e}")
print("Falling back to CPU (Check your CUDA/cuDNN installation)")
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="int16")
def record_audio(): def record_audio():
fs = 16000
fs = 16000 # Sample rate print("\n[READY] Press Enter to START recording...")
print("\n--- Press Enter to START recording ---")
input() input()
print("Recording... Press Enter to STOP.") print("[RECORDING] Press Enter to STOP...")
recording = [] recording = []
def callback(indata, frames, time, status): def callback(indata, frames, time, status):
if status:
print(status, file=sys.stderr)
recording.append(indata.copy()) recording.append(indata.copy())
with sd.InputStream(samplerate=fs, channels=1, callback=callback): with sd.InputStream(samplerate=fs, channels=1, callback=callback):
@@ -29,17 +42,24 @@ def record_audio():
return np.concatenate(recording, axis=0) return np.concatenate(recording, axis=0)
def main(): def main():
print(f"System active. Model: {OLLAMA_MODEL}")
while True: while True:
try:
audio_data = record_audio() audio_data = record_audio()
# Transcribe print("[TRANSCRIBING]...")
segments, _ = model.transcribe(audio_data.flatten(), beam_size=5) segments, _ = model.transcribe(audio_data.flatten(), beam_size=5)
text = "".join([segment.text for segment in segments]).strip() text = "".join([segment.text for segment in segments]).strip()
if not text:
print("No speech detected. Try again.")
continue
print(f"You said: {text}") print(f"You said: {text}")
pyperclip.copy(text) # Copies to clipboard automatically pyperclip.copy(text)
# Send to Ollama # Send to Ollama
print(f"[OLLAMA] Thinking...")
response = requests.post(OLLAMA_URL, json={ response = requests.post(OLLAMA_URL, json={
"model": OLLAMA_MODEL, "model": OLLAMA_MODEL,
"prompt": text, "prompt": text,
@@ -49,5 +69,11 @@ def main():
result = response.json().get("response", "") result = response.json().get("response", "")
print(f"\nLLM Response:\n{result}\n") print(f"\nLLM Response:\n{result}\n")
except KeyboardInterrupt:
print("\nExiting...")
break
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()