gemini v1
This commit is contained in:
dl92
2026-01-13 16:47:04 +00:00
parent f8d7fdda5d
commit 33248895ff
2 changed files with 58 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
{
"python-envs.defaultEnvManager": "ms-python.python:conda",
"python-envs.defaultPackageManager": "ms-python.python:conda",
"python-envs.pythonProjects": []
}

View File

@@ -0,0 +1,53 @@
import sounddevice as sd
import numpy as np
import pyperclip
import requests
from faster_whisper import WhisperModel
# Configuration
MODEL_SIZE = "base" # "base" is fast, "small" is more accurate
OLLAMA_URL = "http://localhost:11435/api/generate"
OLLAMA_MODEL = "llama3"
# Load Whisper on GPU
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="default")
def record_audio():
fs = 16000 # Sample rate
print("\n--- Press Enter to START recording ---")
input()
print("Recording... Press Enter to STOP.")
recording = []
def callback(indata, frames, time, status):
recording.append(indata.copy())
with sd.InputStream(samplerate=fs, channels=1, callback=callback):
input()
return np.concatenate(recording, axis=0)
def main():
while True:
audio_data = record_audio()
# Transcribe
segments, _ = model.transcribe(audio_data.flatten(), beam_size=5)
text = "".join([segment.text for segment in segments]).strip()
print(f"You said: {text}")
pyperclip.copy(text) # Copies to clipboard automatically
# Send to Ollama
response = requests.post(OLLAMA_URL, json={
"model": OLLAMA_MODEL,
"prompt": text,
"stream": False
})
result = response.json().get("response", "")
print(f"\nLLM Response:\n{result}\n")
if __name__ == "__main__":
main()