import sounddevice as sd
import numpy as np
import pyperclip
import requests
from faster_whisper import WhisperModel

# Configuration
MODEL_SIZE = "base" # "base" is fast, "small" is more accurate
OLLAMA_URL = "http://localhost:11435/api/generate"
OLLAMA_MODEL = "llama3"

# Load Whisper on GPU
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="default")

def record_audio():

    fs = 16000  # Sample rate
    print("\n--- Press Enter to START recording ---")
    input()
    print("Recording... Press Enter to STOP.")
    
    recording = []
    def callback(indata, frames, time, status):
        recording.append(indata.copy())
    
    with sd.InputStream(samplerate=fs, channels=1, callback=callback):
        input()
    
    return np.concatenate(recording, axis=0)

def main():
    while True:
        audio_data = record_audio()
        
        # Transcribe
        segments, _ = model.transcribe(audio_data.flatten(), beam_size=5)
        text = "".join([segment.text for segment in segments]).strip()
        
        print(f"You said: {text}")
        pyperclip.copy(text) # Copies to clipboard automatically
        
        # Send to Ollama
        response = requests.post(OLLAMA_URL, json={
            "model": OLLAMA_MODEL,
            "prompt": text,
            "stream": False
        })
        
        result = response.json().get("response", "")
        print(f"\nLLM Response:\n{result}\n")

if __name__ == "__main__":
    main()