Files
Code/python/tool-speechtotext/assistant.py
local 104da381fb Refactor tool-speechtotext: extract sttlib shared library and add tests
Extract duplicated code (Whisper loading, audio recording, transcription,
VAD processing) into reusable sttlib/ package. Rewrite all 3 scripts as
thin wrappers. Add 24 unit tests with mocked hardware. Fix GPU fallback
bug in assistant.py and args.system assignment bug.
2026-02-08 00:40:31 +00:00

84 lines
2.6 KiB
Python

import argparse
import pyperclip
import requests
from sttlib import load_whisper_model, record_until_enter, transcribe
# --- Configuration ---
OLLAMA_URL = "http://localhost:11434/api/generate"
DEFAULT_OLLAMA_MODEL = "qwen3:latest"
def main():
print(f"System active. Model: {DEFAULT_OLLAMA_MODEL}")
parser = argparse.ArgumentParser(description="Whisper + Ollama CLI")
parser.add_argument("--nollm", "-n", action='store_true',
help="turn off llm")
parser.add_argument("--system", "-s", default=None,
help="The system prompt for Ollama")
parser.add_argument("--model_size", default="base",
help="Whisper model size: base, small, medium")
parser.add_argument(
"--ollama_model", default=DEFAULT_OLLAMA_MODEL, help="Ollama model name")
parser.add_argument(
"--num_ctx", default='5000', help="context length")
parser.add_argument(
"--temp", default='0.7', help="temperature")
args, unknown = parser.parse_known_args()
# Convert unknown list to a dictionary for the Ollama 'options' field
extra_options = {}
for i in range(0, len(unknown), 2):
key = unknown[i].lstrip('-')
val = unknown[i+1]
try:
val = float(val) if '.' in val else int(val)
except ValueError:
pass
extra_options[key] = val
model = load_whisper_model(args.model_size)
while True:
try:
audio_data = record_until_enter()
print("[TRANSCRIBING]...")
text = transcribe(model, audio_data.flatten())
if not text:
print("No speech detected. Try again.")
continue
print(f"You said: {text}")
pyperclip.copy(text)
if not args.nollm:
print(f"[OLLAMA] Thinking...")
payload = {
"model": args.ollama_model,
"prompt": text,
"stream": False,
"options": extra_options,
}
if args.system:
payload["system"] = args.system
response = requests.post(OLLAMA_URL, json=payload)
result = response.json().get("response", "")
print(f"\nLLM Response:\n{result}\n")
else:
print(f"\n{text}\n")
except KeyboardInterrupt:
print("\nExiting...")
break
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()