initial
gemini v1
This commit is contained in:
53
python/tool-speechtotext/assistant.py
Normal file
53
python/tool-speechtotext/assistant.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import sounddevice as sd
|
||||
import numpy as np
|
||||
import pyperclip
|
||||
import requests
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
# Configuration
|
||||
MODEL_SIZE = "base" # "base" is fast, "small" is more accurate
|
||||
OLLAMA_URL = "http://localhost:11435/api/generate"
|
||||
OLLAMA_MODEL = "llama3"
|
||||
|
||||
# Load Whisper on GPU
|
||||
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="default")
|
||||
|
||||
def record_audio():
|
||||
|
||||
fs = 16000 # Sample rate
|
||||
print("\n--- Press Enter to START recording ---")
|
||||
input()
|
||||
print("Recording... Press Enter to STOP.")
|
||||
|
||||
recording = []
|
||||
def callback(indata, frames, time, status):
|
||||
recording.append(indata.copy())
|
||||
|
||||
with sd.InputStream(samplerate=fs, channels=1, callback=callback):
|
||||
input()
|
||||
|
||||
return np.concatenate(recording, axis=0)
|
||||
|
||||
def main():
|
||||
while True:
|
||||
audio_data = record_audio()
|
||||
|
||||
# Transcribe
|
||||
segments, _ = model.transcribe(audio_data.flatten(), beam_size=5)
|
||||
text = "".join([segment.text for segment in segments]).strip()
|
||||
|
||||
print(f"You said: {text}")
|
||||
pyperclip.copy(text) # Copies to clipboard automatically
|
||||
|
||||
# Send to Ollama
|
||||
response = requests.post(OLLAMA_URL, json={
|
||||
"model": OLLAMA_MODEL,
|
||||
"prompt": text,
|
||||
"stream": False
|
||||
})
|
||||
|
||||
result = response.json().get("response", "")
|
||||
print(f"\nLLM Response:\n{result}\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user