command line app STT, text to local LLM
This commit is contained in:
@@ -3,6 +3,7 @@ import numpy as np
|
|||||||
import pyperclip
|
import pyperclip
|
||||||
import requests
|
import requests
|
||||||
import sys
|
import sys
|
||||||
|
import argparse
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
import os
|
import os
|
||||||
@@ -11,11 +12,13 @@ os.environ["CT2_CUDA_ALLOW_FP16"] = "1"
|
|||||||
# --- Configuration ---
|
# --- Configuration ---
|
||||||
MODEL_SIZE = "medium" # Options: "base", "small", "medium", "large-v3"
|
MODEL_SIZE = "medium" # Options: "base", "small", "medium", "large-v3"
|
||||||
OLLAMA_URL = "http://localhost:11434/api/generate" # Default is 11434
|
OLLAMA_URL = "http://localhost:11434/api/generate" # Default is 11434
|
||||||
OLLAMA_MODEL = "qwen3:latest"
|
DEFAULT_OLLAMA_MODEL = "qwen3:latest"
|
||||||
|
|
||||||
# Load Whisper on GPU
|
# Load Whisper on GPU
|
||||||
# float16 is faster and uses less VRAM on NVIDIA cards
|
# float16 is faster and uses less VRAM on NVIDIA cards
|
||||||
print("Loading Whisper model...")
|
print("Loading Whisper model...")
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="float16")
|
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="float16")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -23,6 +26,7 @@ except Exception as e:
|
|||||||
print("Falling back to CPU (Check your CUDA/cuDNN installation)")
|
print("Falling back to CPU (Check your CUDA/cuDNN installation)")
|
||||||
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="int16")
|
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="int16")
|
||||||
|
|
||||||
|
|
||||||
def record_audio():
|
def record_audio():
|
||||||
fs = 16000
|
fs = 16000
|
||||||
print("\n[READY] Press Enter to START recording...")
|
print("\n[READY] Press Enter to START recording...")
|
||||||
@@ -41,8 +45,43 @@ def record_audio():
|
|||||||
|
|
||||||
return np.concatenate(recording, axis=0)
|
return np.concatenate(recording, axis=0)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
print(f"System active. Model: {OLLAMA_MODEL}")
|
# 1. Setup Parser
|
||||||
|
print(f"System active. Model: {DEFAULT_OLLAMA_MODEL}")
|
||||||
|
parser = argparse.ArgumentParser(description="Whisper + Ollama CLI")
|
||||||
|
|
||||||
|
# Known Arguments (Hardcoded logic)
|
||||||
|
parser.add_argument("--nollm", "-n", action='store_true',
|
||||||
|
help="turn off llm")
|
||||||
|
parser.add_argument("--system", "-s", default=None,
|
||||||
|
help="The system prompt for Ollama")
|
||||||
|
parser.add_argument("--model_size", default="base",
|
||||||
|
help="Whisper model size: base, small, medium")
|
||||||
|
parser.add_argument(
|
||||||
|
"--ollama_model", default=DEFAULT_OLLAMA_MODEL, help="Ollama model name")
|
||||||
|
parser.add_argument(
|
||||||
|
"--num_ctx", default='5000', help="context length")
|
||||||
|
parser.add_argument(
|
||||||
|
"--temp", default='0.7', help="temperature")
|
||||||
|
|
||||||
|
# 2. Capture "Unknown" arguments
|
||||||
|
# args = known values, unknown = a list like ['--num_ctx', '4096', '--temp', '0.7']
|
||||||
|
args, unknown = parser.parse_known_args()
|
||||||
|
|
||||||
|
# Convert unknown list to a dictionary for the Ollama 'options' field
|
||||||
|
# This logic pairs ['--key', 'value'] into {key: value}
|
||||||
|
extra_options = {}
|
||||||
|
for i in range(0, len(unknown), 2):
|
||||||
|
key = unknown[i].lstrip('-') # remove the '--'
|
||||||
|
val = unknown[i+1]
|
||||||
|
# Try to convert numbers to actual ints/floats
|
||||||
|
try:
|
||||||
|
val = float(val) if '.' in val else int(val)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
extra_options[key] = val
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
audio_data = record_audio()
|
audio_data = record_audio()
|
||||||
@@ -55,19 +94,27 @@ def main():
|
|||||||
print("No speech detected. Try again.")
|
print("No speech detected. Try again.")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(f"You said: {text}")
|
# print(f"You said: {text}")
|
||||||
pyperclip.copy(text)
|
pyperclip.copy(text)
|
||||||
|
|
||||||
|
if (args.nollm == False):
|
||||||
# Send to Ollama
|
# Send to Ollama
|
||||||
print(f"[OLLAMA] Thinking...")
|
print(f"[OLLAMA] Thinking...")
|
||||||
response = requests.post(OLLAMA_URL, json={
|
payload = {
|
||||||
"model": OLLAMA_MODEL,
|
"model": args.ollama_model,
|
||||||
"prompt": text,
|
"prompt": text,
|
||||||
"stream": False
|
"stream": False,
|
||||||
})
|
"options": extra_options,
|
||||||
|
}
|
||||||
|
|
||||||
|
if args.system:
|
||||||
|
payload["system"] = args
|
||||||
|
response = requests.post(OLLAMA_URL, json=payload)
|
||||||
|
|
||||||
result = response.json().get("response", "")
|
result = response.json().get("response", "")
|
||||||
print(f"\nLLM Response:\n{result}\n")
|
print(f"\nLLM Response:\n{result}\n")
|
||||||
|
else:
|
||||||
|
print(f"\n{text}\n")
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print("\nExiting...")
|
print("\nExiting...")
|
||||||
@@ -75,5 +122,6 @@ def main():
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"An error occurred: {e}")
|
print(f"An error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
6
python/tool-speechtotext/talk.sh
Executable file
6
python/tool-speechtotext/talk.sh
Executable file
@@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
export CT2_CUDA_ALLOW_FP16=1
|
||||||
|
|
||||||
|
# 'mamba run' executes the command within the context of the environment
|
||||||
|
# without needing to source .bashrc or shell hooks manually.
|
||||||
|
mamba run -n whisper-ollama python ~/family-repo/Code/python/tool-speechtotext/assistant.py "$@"
|
||||||
Reference in New Issue
Block a user