v2 - works
amazing made a screecast out of this
This commit is contained in:
@@ -2,25 +2,38 @@ import sounddevice as sd
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pyperclip
|
import pyperclip
|
||||||
import requests
|
import requests
|
||||||
|
import sys
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
# Configuration
|
import os
|
||||||
MODEL_SIZE = "base" # "base" is fast, "small" is more accurate
|
os.environ["CT2_CUDA_ALLOW_FP16"] = "1"
|
||||||
OLLAMA_URL = "http://localhost:11435/api/generate"
|
|
||||||
OLLAMA_MODEL = "llama3"
|
# --- Configuration ---
|
||||||
|
MODEL_SIZE = "medium" # Options: "base", "small", "medium", "large-v3"
|
||||||
|
OLLAMA_URL = "http://localhost:11434/api/generate" # Default is 11434
|
||||||
|
OLLAMA_MODEL = "qwen3:latest"
|
||||||
|
|
||||||
# Load Whisper on GPU
|
# Load Whisper on GPU
|
||||||
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="default")
|
# float16 is faster and uses less VRAM on NVIDIA cards
|
||||||
|
print("Loading Whisper model...")
|
||||||
|
try:
|
||||||
|
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="float16")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error loading GPU: {e}")
|
||||||
|
print("Falling back to CPU (Check your CUDA/cuDNN installation)")
|
||||||
|
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="int16")
|
||||||
|
|
||||||
def record_audio():
|
def record_audio():
|
||||||
|
fs = 16000
|
||||||
fs = 16000 # Sample rate
|
print("\n[READY] Press Enter to START recording...")
|
||||||
print("\n--- Press Enter to START recording ---")
|
|
||||||
input()
|
input()
|
||||||
print("Recording... Press Enter to STOP.")
|
print("[RECORDING] Press Enter to STOP...")
|
||||||
|
|
||||||
recording = []
|
recording = []
|
||||||
|
|
||||||
def callback(indata, frames, time, status):
|
def callback(indata, frames, time, status):
|
||||||
|
if status:
|
||||||
|
print(status, file=sys.stderr)
|
||||||
recording.append(indata.copy())
|
recording.append(indata.copy())
|
||||||
|
|
||||||
with sd.InputStream(samplerate=fs, channels=1, callback=callback):
|
with sd.InputStream(samplerate=fs, channels=1, callback=callback):
|
||||||
@@ -29,17 +42,24 @@ def record_audio():
|
|||||||
return np.concatenate(recording, axis=0)
|
return np.concatenate(recording, axis=0)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
print(f"System active. Model: {OLLAMA_MODEL}")
|
||||||
while True:
|
while True:
|
||||||
|
try:
|
||||||
audio_data = record_audio()
|
audio_data = record_audio()
|
||||||
|
|
||||||
# Transcribe
|
print("[TRANSCRIBING]...")
|
||||||
segments, _ = model.transcribe(audio_data.flatten(), beam_size=5)
|
segments, _ = model.transcribe(audio_data.flatten(), beam_size=5)
|
||||||
text = "".join([segment.text for segment in segments]).strip()
|
text = "".join([segment.text for segment in segments]).strip()
|
||||||
|
|
||||||
|
if not text:
|
||||||
|
print("No speech detected. Try again.")
|
||||||
|
continue
|
||||||
|
|
||||||
print(f"You said: {text}")
|
print(f"You said: {text}")
|
||||||
pyperclip.copy(text) # Copies to clipboard automatically
|
pyperclip.copy(text)
|
||||||
|
|
||||||
# Send to Ollama
|
# Send to Ollama
|
||||||
|
print(f"[OLLAMA] Thinking...")
|
||||||
response = requests.post(OLLAMA_URL, json={
|
response = requests.post(OLLAMA_URL, json={
|
||||||
"model": OLLAMA_MODEL,
|
"model": OLLAMA_MODEL,
|
||||||
"prompt": text,
|
"prompt": text,
|
||||||
@@ -49,5 +69,11 @@ def main():
|
|||||||
result = response.json().get("response", "")
|
result = response.json().get("response", "")
|
||||||
print(f"\nLLM Response:\n{result}\n")
|
print(f"\nLLM Response:\n{result}\n")
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\nExiting...")
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred: {e}")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
Reference in New Issue
Block a user