Extract duplicated code (Whisper loading, audio recording, transcription, VAD processing) into reusable sttlib/ package. Rewrite all 3 scripts as thin wrappers. Add 24 unit tests with mocked hardware. Fix GPU fallback bug in assistant.py and args.system assignment bug.
84 lines
2.6 KiB
Python
84 lines
2.6 KiB
Python
import argparse
|
|
import pyperclip
|
|
import requests
|
|
from sttlib import load_whisper_model, record_until_enter, transcribe
|
|
|
|
# --- Configuration ---
|
|
OLLAMA_URL = "http://localhost:11434/api/generate"
|
|
DEFAULT_OLLAMA_MODEL = "qwen3:latest"
|
|
|
|
|
|
def main():
|
|
print(f"System active. Model: {DEFAULT_OLLAMA_MODEL}")
|
|
parser = argparse.ArgumentParser(description="Whisper + Ollama CLI")
|
|
|
|
parser.add_argument("--nollm", "-n", action='store_true',
|
|
help="turn off llm")
|
|
parser.add_argument("--system", "-s", default=None,
|
|
help="The system prompt for Ollama")
|
|
parser.add_argument("--model_size", default="base",
|
|
help="Whisper model size: base, small, medium")
|
|
parser.add_argument(
|
|
"--ollama_model", default=DEFAULT_OLLAMA_MODEL, help="Ollama model name")
|
|
parser.add_argument(
|
|
"--num_ctx", default='5000', help="context length")
|
|
parser.add_argument(
|
|
"--temp", default='0.7', help="temperature")
|
|
|
|
args, unknown = parser.parse_known_args()
|
|
|
|
# Convert unknown list to a dictionary for the Ollama 'options' field
|
|
extra_options = {}
|
|
for i in range(0, len(unknown), 2):
|
|
key = unknown[i].lstrip('-')
|
|
val = unknown[i+1]
|
|
try:
|
|
val = float(val) if '.' in val else int(val)
|
|
except ValueError:
|
|
pass
|
|
extra_options[key] = val
|
|
|
|
model = load_whisper_model(args.model_size)
|
|
|
|
while True:
|
|
try:
|
|
audio_data = record_until_enter()
|
|
|
|
print("[TRANSCRIBING]...")
|
|
text = transcribe(model, audio_data.flatten())
|
|
|
|
if not text:
|
|
print("No speech detected. Try again.")
|
|
continue
|
|
|
|
print(f"You said: {text}")
|
|
pyperclip.copy(text)
|
|
|
|
if not args.nollm:
|
|
print(f"[OLLAMA] Thinking...")
|
|
payload = {
|
|
"model": args.ollama_model,
|
|
"prompt": text,
|
|
"stream": False,
|
|
"options": extra_options,
|
|
}
|
|
|
|
if args.system:
|
|
payload["system"] = args.system
|
|
|
|
response = requests.post(OLLAMA_URL, json=payload)
|
|
result = response.json().get("response", "")
|
|
print(f"\nLLM Response:\n{result}\n")
|
|
else:
|
|
print(f"\n{text}\n")
|
|
|
|
except KeyboardInterrupt:
|
|
print("\nExiting...")
|
|
break
|
|
except Exception as e:
|
|
print(f"An error occurred: {e}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|