diff --git a/python/tool-speechtotext/.vscode/settings.json b/python/tool-speechtotext/.vscode/settings.json
new file mode 100644
index 0000000..a8c2003
--- /dev/null
+++ b/python/tool-speechtotext/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+    "python-envs.defaultEnvManager": "ms-python.python:conda",
+    "python-envs.defaultPackageManager": "ms-python.python:conda",
+    "python-envs.pythonProjects": []
+}
\ No newline at end of file
diff --git a/python/tool-speechtotext/assistant.py b/python/tool-speechtotext/assistant.py
new file mode 100644
index 0000000..7dd1c39
--- /dev/null
+++ b/python/tool-speechtotext/assistant.py
@@ -0,0 +1,53 @@
+import sounddevice as sd
+import numpy as np
+import pyperclip
+import requests
+from faster_whisper import WhisperModel
+
+# Configuration
+MODEL_SIZE = "base" # "base" is fast, "small" is more accurate
+OLLAMA_URL = "http://localhost:11435/api/generate"
+OLLAMA_MODEL = "llama3"
+
+# Load Whisper on GPU
+model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="default")
+
+def record_audio():
+
+    fs = 16000  # Sample rate
+    print("\n--- Press Enter to START recording ---")
+    input()
+    print("Recording... Press Enter to STOP.")
+    
+    recording = []
+    def callback(indata, frames, time, status):
+        recording.append(indata.copy())
+    
+    with sd.InputStream(samplerate=fs, channels=1, callback=callback):
+        input()
+    
+    return np.concatenate(recording, axis=0)
+
+def main():
+    while True:
+        audio_data = record_audio()
+        
+        # Transcribe
+        segments, _ = model.transcribe(audio_data.flatten(), beam_size=5)
+        text = "".join([segment.text for segment in segments]).strip()
+        
+        print(f"You said: {text}")
+        pyperclip.copy(text) # Copies to clipboard automatically
+        
+        # Send to Ollama
+        response = requests.post(OLLAMA_URL, json={
+            "model": OLLAMA_MODEL,
+            "prompt": text,
+            "stream": False
+        })
+        
+        result = response.json().get("response", "")
+        print(f"\nLLM Response:\n{result}\n")
+
+if __name__ == "__main__":
+    main()