voice_to_terminal#1
automate terminal with voice
This commit is contained in:
6
python/tool-speechtotext/terminal.sh
Executable file
6
python/tool-speechtotext/terminal.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
export CT2_CUDA_ALLOW_FP16=1
|
||||
|
||||
# 'mamba run' executes the command within the context of the environment
|
||||
# without needing to source .bashrc or shell hooks manually.
|
||||
mamba run -n whisper-ollama python ~/family-repo/Code/python/tool-speechtotext/voice_to_terminal.py "$@"
|
||||
164
python/tool-speechtotext/voice_to_terminal.py
Normal file
164
python/tool-speechtotext/voice_to_terminal.py
Normal file
@@ -0,0 +1,164 @@
|
||||
import argparse
|
||||
import sounddevice as sd
|
||||
import numpy as np
|
||||
import pyperclip
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
import ollama
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
# --- Settings ---
|
||||
os.environ["CT2_CUDA_ALLOW_FP16"] = "1"
|
||||
MODEL_SIZE = "medium"
|
||||
OLLAMA_MODEL = "qwen2.5-coder:7b"
|
||||
CONFIRM_COMMANDS = True # Set to False to run commands instantly
|
||||
|
||||
# Load Whisper
|
||||
print("Loading Whisper model...")
|
||||
model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="float16")
|
||||
|
||||
|
||||
def run_terminal_command(command: str):
|
||||
"""Executes a bash command in the terminal. Handles file ops, system info, etc."""
|
||||
|
||||
# 1. Visual Confirmation Block
|
||||
if CONFIRM_COMMANDS:
|
||||
print(f"\n{'='*40}")
|
||||
print(f"⚠️ AI SUGGESTED COMMAND: \033[1;32m{command}\033[0m")
|
||||
choice = input(" Confirm execution? [Y/n]: ").strip().lower()
|
||||
print(f"{'='*40}\n")
|
||||
|
||||
if choice == 'n':
|
||||
return "User rejected this command."
|
||||
|
||||
# 2. Safety Blacklist (Last line of defense)
|
||||
blacklist = ["rm -rf /", "mkfs", "dd if=", ":(){ :|:& };:"]
|
||||
if any(forbidden in command for forbidden in blacklist):
|
||||
return "Error: Command blocked for security reasons."
|
||||
|
||||
# 3. Execution
|
||||
try:
|
||||
result = subprocess.run(command, shell=True,
|
||||
capture_output=True, text=True, timeout=20)
|
||||
output = f"STDOUT: {result.stdout}\nSTDERR: {result.stderr}"
|
||||
return output if output.strip() else "Success (No output)."
|
||||
except Exception as e:
|
||||
return f"Execution Error: {str(e)}"
|
||||
|
||||
|
||||
# Register tool
|
||||
available_tools = {'run_terminal_command': run_terminal_command}
|
||||
|
||||
|
||||
def record_audio():
|
||||
fs, recording = 16000, []
|
||||
print("\n[READY] Press Enter to START...")
|
||||
input()
|
||||
print("[RECORDING] Press Enter to STOP...")
|
||||
def cb(indata, f, t, s): recording.append(indata.copy())
|
||||
with sd.InputStream(samplerate=fs, channels=1, callback=cb):
|
||||
input()
|
||||
return np.concatenate(recording, axis=0)
|
||||
|
||||
|
||||
def main():
|
||||
# 1. Setup Parser for CLI flags
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Whisper + Ollama Terminal Assistant")
|
||||
parser.add_argument("--model", default=OLLAMA_MODEL,
|
||||
help="Ollama model name")
|
||||
parser.add_argument("--confirm", action='store_true',
|
||||
default=CONFIRM_COMMANDS, help="Confirm commands")
|
||||
args, unknown = parser.parse_known_args()
|
||||
|
||||
# Initialize conversation with a strict System Role
|
||||
# This "nudges" the model to use the tool feature rather than just chatting
|
||||
messages = [{
|
||||
'role': 'system',
|
||||
'content': (
|
||||
'You are a Linux terminal expert. When the user asks for a system task, '
|
||||
'you MUST use the "run_terminal_command" tool. Do not explain your actions '
|
||||
'in text; simply provide the command via the tool.'
|
||||
)
|
||||
}]
|
||||
|
||||
print(f"--- Assistant Active (Model: {args.model}) ---")
|
||||
print(f"Confirmation Mode: {'ON' if args.confirm else 'OFF'}")
|
||||
|
||||
while True:
|
||||
try:
|
||||
# A. Record and Transcribe
|
||||
audio_data = record_audio()
|
||||
print("[TRANSCRIBING]...")
|
||||
segments, _ = model.transcribe(audio_data.flatten(), beam_size=5)
|
||||
user_text = "".join([s.text for s in segments]).strip()
|
||||
|
||||
if not user_text:
|
||||
continue
|
||||
|
||||
print(f"\nYOU: {user_text}")
|
||||
messages.append({'role': 'user', 'content': user_text})
|
||||
|
||||
# B. Get AI Response (Strict Temperature 0 for reliability)
|
||||
response = ollama.chat(
|
||||
model=args.model,
|
||||
messages=messages,
|
||||
tools=[run_terminal_command],
|
||||
options={'temperature': 0}
|
||||
)
|
||||
|
||||
# C. Detect Tool Calls (Handle both formal calls and raw JSON text)
|
||||
tool_calls = response.message.tool_calls
|
||||
|
||||
# REPAIR LOGIC: If AI "talks" in JSON instead of using the tool field
|
||||
if not tool_calls and '"run_terminal_command"' in response.message.content:
|
||||
import json
|
||||
try:
|
||||
content = response.message.content
|
||||
# Extract JSON block from text
|
||||
start, end = content.find('{'), content.rfind('}') + 1
|
||||
raw_json = json.loads(content[start:end])
|
||||
# Reconstruct as a tool call format
|
||||
tool_calls = [{'function': {
|
||||
'name': 'run_terminal_command',
|
||||
'arguments': raw_json.get('arguments', raw_json)
|
||||
}}]
|
||||
except:
|
||||
pass
|
||||
|
||||
# D. Execute Tools if found
|
||||
if tool_calls:
|
||||
for tool_call in tool_calls:
|
||||
# Parse arguments based on format (official object vs dictionary)
|
||||
if hasattr(tool_call, 'function'):
|
||||
func_args = tool_call.function.arguments
|
||||
else:
|
||||
func_args = tool_call['function']['arguments']
|
||||
|
||||
# Run the terminal command locally
|
||||
result = run_terminal_command(func_args['command'])
|
||||
|
||||
# Add result back to history so AI can see it
|
||||
messages.append(response.message)
|
||||
messages.append({'role': 'tool', 'content': result})
|
||||
|
||||
# Get the final "Human" explanation from AI
|
||||
final_response = ollama.chat(
|
||||
model=args.model, messages=messages)
|
||||
print(f"AI: {final_response.message.content}")
|
||||
messages.append(final_response.message)
|
||||
else:
|
||||
# Normal Chatting
|
||||
print(f"AI: {response.message.content}")
|
||||
messages.append(response.message)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nExiting Assistant...")
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"System Error: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user