import argparse import sounddevice as sd import numpy as np import pyperclip import sys import os import subprocess import ollama from faster_whisper import WhisperModel # --- Settings --- os.environ["CT2_CUDA_ALLOW_FP16"] = "1" MODEL_SIZE = "medium" OLLAMA_MODEL = "qwen2.5-coder:7b" CONFIRM_COMMANDS = True # Set to False to run commands instantly # Load Whisper print("Loading Whisper model...") model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="float16") def run_terminal_command(command: str): """Executes a bash command in the terminal. Handles file ops, system info, etc.""" # 1. Visual Confirmation Block if CONFIRM_COMMANDS: print(f"\n{'='*40}") print(f"⚠️ AI SUGGESTED COMMAND: \033[1;32m{command}\033[0m") choice = input(" Confirm execution? [Y/n]: ").strip().lower() print(f"{'='*40}\n") if choice == 'n': return "User rejected this command." # 2. Safety Blacklist (Last line of defense) blacklist = ["rm -rf /", "mkfs", "dd if=", ":(){ :|:& };:"] if any(forbidden in command for forbidden in blacklist): return "Error: Command blocked for security reasons." # 3. Execution try: result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=20) output = f"STDOUT: {result.stdout}\nSTDERR: {result.stderr}" return output if output.strip() else "Success (No output)." except Exception as e: return f"Execution Error: {str(e)}" # Register tool available_tools = {'run_terminal_command': run_terminal_command} def record_audio(): fs, recording = 16000, [] print("\n[READY] Press Enter to START...") input() print("[RECORDING] Press Enter to STOP...") def cb(indata, f, t, s): recording.append(indata.copy()) with sd.InputStream(samplerate=fs, channels=1, callback=cb): input() return np.concatenate(recording, axis=0) def main(): # 1. Setup Parser for CLI flags parser = argparse.ArgumentParser( description="Whisper + Ollama Terminal Assistant") parser.add_argument("--model", default=OLLAMA_MODEL, help="Ollama model name") parser.add_argument("--confirm", action='store_true', default=CONFIRM_COMMANDS, help="Confirm commands") args, unknown = parser.parse_known_args() # Initialize conversation with a strict System Role # This "nudges" the model to use the tool feature rather than just chatting messages = [{ 'role': 'system', 'content': ( 'You are a Linux terminal expert. When the user asks for a system task, ' 'you MUST use the "run_terminal_command" tool. Do not explain your actions ' 'in text; simply provide the command via the tool.' ) }] print(f"--- Assistant Active (Model: {args.model}) ---") print(f"Confirmation Mode: {'ON' if args.confirm else 'OFF'}") while True: try: # A. Record and Transcribe audio_data = record_audio() print("[TRANSCRIBING]...") segments, _ = model.transcribe(audio_data.flatten(), beam_size=5) user_text = "".join([s.text for s in segments]).strip() if not user_text: continue print(f"\nYOU: {user_text}") messages.append({'role': 'user', 'content': user_text}) # B. Get AI Response (Strict Temperature 0 for reliability) response = ollama.chat( model=args.model, messages=messages, tools=[run_terminal_command], options={'temperature': 0} ) # C. Detect Tool Calls (Handle both formal calls and raw JSON text) tool_calls = response.message.tool_calls # REPAIR LOGIC: If AI "talks" in JSON instead of using the tool field if not tool_calls and '"run_terminal_command"' in response.message.content: import json try: content = response.message.content # Extract JSON block from text start, end = content.find('{'), content.rfind('}') + 1 raw_json = json.loads(content[start:end]) # Reconstruct as a tool call format tool_calls = [{'function': { 'name': 'run_terminal_command', 'arguments': raw_json.get('arguments', raw_json) }}] except: pass # D. Execute Tools if found if tool_calls: for tool_call in tool_calls: # Parse arguments based on format (official object vs dictionary) if hasattr(tool_call, 'function'): func_args = tool_call.function.arguments else: func_args = tool_call['function']['arguments'] # Run the terminal command locally result = run_terminal_command(func_args['command']) # Add result back to history so AI can see it messages.append(response.message) messages.append({'role': 'tool', 'content': result}) # Get the final "Human" explanation from AI final_response = ollama.chat( model=args.model, messages=messages) print(f"AI: {final_response.message.content}") messages.append(final_response.message) else: # Normal Chatting print(f"AI: {response.message.content}") messages.append(response.message) except KeyboardInterrupt: print("\nExiting Assistant...") break except Exception as e: print(f"System Error: {e}") if __name__ == "__main__": main()