command line app STT, text to local LLM

This commit is contained in:
dl92
2026-01-14 00:20:56 +00:00
parent f09b390d90
commit 781659a552
2 changed files with 77 additions and 23 deletions

View File

@@ -3,6 +3,7 @@ import numpy as np
import pyperclip import pyperclip
import requests import requests
import sys import sys
import argparse
from faster_whisper import WhisperModel from faster_whisper import WhisperModel
import os import os
@@ -10,12 +11,14 @@ os.environ["CT2_CUDA_ALLOW_FP16"] = "1"
# --- Configuration --- # --- Configuration ---
MODEL_SIZE = "medium" # Options: "base", "small", "medium", "large-v3" MODEL_SIZE = "medium" # Options: "base", "small", "medium", "large-v3"
OLLAMA_URL = "http://localhost:11434/api/generate" # Default is 11434 OLLAMA_URL = "http://localhost:11434/api/generate" # Default is 11434
OLLAMA_MODEL = "qwen3:latest" DEFAULT_OLLAMA_MODEL = "qwen3:latest"
# Load Whisper on GPU # Load Whisper on GPU
# float16 is faster and uses less VRAM on NVIDIA cards # float16 is faster and uses less VRAM on NVIDIA cards
print("Loading Whisper model...") print("Loading Whisper model...")
try: try:
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="float16") model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="float16")
except Exception as e: except Exception as e:
@@ -23,6 +26,7 @@ except Exception as e:
print("Falling back to CPU (Check your CUDA/cuDNN installation)") print("Falling back to CPU (Check your CUDA/cuDNN installation)")
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="int16") model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="int16")
def record_audio(): def record_audio():
fs = 16000 fs = 16000
print("\n[READY] Press Enter to START recording...") print("\n[READY] Press Enter to START recording...")
@@ -41,8 +45,43 @@ def record_audio():
return np.concatenate(recording, axis=0) return np.concatenate(recording, axis=0)
def main(): def main():
print(f"System active. Model: {OLLAMA_MODEL}") # 1. Setup Parser
print(f"System active. Model: {DEFAULT_OLLAMA_MODEL}")
parser = argparse.ArgumentParser(description="Whisper + Ollama CLI")
# Known Arguments (Hardcoded logic)
parser.add_argument("--nollm", "-n", action='store_true',
help="turn off llm")
parser.add_argument("--system", "-s", default=None,
help="The system prompt for Ollama")
parser.add_argument("--model_size", default="base",
help="Whisper model size: base, small, medium")
parser.add_argument(
"--ollama_model", default=DEFAULT_OLLAMA_MODEL, help="Ollama model name")
parser.add_argument(
"--num_ctx", default='5000', help="context length")
parser.add_argument(
"--temp", default='0.7', help="temperature")
# 2. Capture "Unknown" arguments
# args = known values, unknown = a list like ['--num_ctx', '4096', '--temp', '0.7']
args, unknown = parser.parse_known_args()
# Convert unknown list to a dictionary for the Ollama 'options' field
# This logic pairs ['--key', 'value'] into {key: value}
extra_options = {}
for i in range(0, len(unknown), 2):
key = unknown[i].lstrip('-') # remove the '--'
val = unknown[i+1]
# Try to convert numbers to actual ints/floats
try:
val = float(val) if '.' in val else int(val)
except ValueError:
pass
extra_options[key] = val
while True: while True:
try: try:
audio_data = record_audio() audio_data = record_audio()
@@ -55,19 +94,27 @@ def main():
print("No speech detected. Try again.") print("No speech detected. Try again.")
continue continue
print(f"You said: {text}") # print(f"You said: {text}")
pyperclip.copy(text) pyperclip.copy(text)
# Send to Ollama if (args.nollm == False):
print(f"[OLLAMA] Thinking...") # Send to Ollama
response = requests.post(OLLAMA_URL, json={ print(f"[OLLAMA] Thinking...")
"model": OLLAMA_MODEL, payload = {
"prompt": text, "model": args.ollama_model,
"stream": False "prompt": text,
}) "stream": False,
"options": extra_options,
}
result = response.json().get("response", "") if args.system:
print(f"\nLLM Response:\n{result}\n") payload["system"] = args
response = requests.post(OLLAMA_URL, json=payload)
result = response.json().get("response", "")
print(f"\nLLM Response:\n{result}\n")
else:
print(f"\n{text}\n")
except KeyboardInterrupt: except KeyboardInterrupt:
print("\nExiting...") print("\nExiting...")
@@ -75,5 +122,6 @@ def main():
except Exception as e: except Exception as e:
print(f"An error occurred: {e}") print(f"An error occurred: {e}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -0,0 +1,6 @@
#!/bin/bash
export CT2_CUDA_ALLOW_FP16=1
# 'mamba run' executes the command within the context of the environment
# without needing to source .bashrc or shell hooks manually.
mamba run -n whisper-ollama python ~/family-repo/Code/python/tool-speechtotext/assistant.py "$@"