import sounddevice as sd import numpy as np import pyperclip import sys import argparse import os import subprocess import ollama import json from faster_whisper import WhisperModel # --- Configuration --- os.environ["CT2_CUDA_ALLOW_FP16"] = "1" MODEL_SIZE = "medium" OLLAMA_MODEL = "qwen2.5-coder:7b" CONFIRM_COMMANDS = True # Set to False to run commands instantly # Load Whisper on GPU print("Loading Whisper model...") try: model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="float16") except Exception as e: print(f"Error loading GPU: {e}, falling back to CPU") model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8") # --- Terminal Tool --- def run_terminal_command(command: str): """ Executes a bash command in the Linux terminal. Used for file management, system info, and terminal tasks. """ if CONFIRM_COMMANDS: print(f"\n{'='*40}") print(f"⚠️ AI SUGGESTED: \033[1;32m{command}\033[0m") # Allow user to provide feedback if they say 'n' choice = input(" Confirm? [Y/n] or provide feedback: ").strip() if choice.lower() == 'n': return "USER_REJECTION: The user did not approve this command. Please suggest an alternative." elif choice and choice.lower() != 'y': return f"USER_FEEDBACK: The user rejected the command with this reason: '{choice}'. Please adjust." print(f"{'='*40}\n") # Safety Guardrail blacklist = ["rm -rf /", "mkfs", "dd if="] if any(forbidden in command for forbidden in blacklist): return "Error: Command blocked for security reasons." try: result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=20) output = f"STDOUT: {result.stdout}\nSTDERR: {result.stderr}" return output if output.strip() else "Success (No output)." except Exception as e: return f"Execution Error: {str(e)}" def record_audio(): fs, recording = 16000, [] print("\n[READY] Press Enter to START...") input() print("[RECORDING] Press Enter to STOP...") def cb(indata, f, t, s): recording.append(indata.copy()) with sd.InputStream(samplerate=fs, channels=1, callback=cb): input() return np.concatenate(recording, axis=0) def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", default=OLLAMA_MODEL) args, _ = parser.parse_known_args() # Initial System Prompt messages = [{ 'role': 'system', 'content': ( 'You are a Linux expert assistant. When asked for a system task, ' 'use the "run_terminal_command" tool. If the user rejects a command, ' 'analyze their feedback and suggest a corrected alternative.' ) }] print(f"--- Assistant Active (Model: {args.model}) ---") while True: try: # 1. Voice Capture audio_data = record_audio() segments, _ = model.transcribe(audio_data.flatten(), beam_size=5) user_text = "".join([s.text for s in segments]).strip() if not user_text: continue print(f"\nYOU: {user_text}") messages.append({'role': 'user', 'content': user_text}) # 2. AI Interaction Loop (Supports up to 3 retries if rejected) for attempt in range(3): response = ollama.chat( model=args.model, messages=messages, tools=[run_terminal_command], options={'temperature': 0} ) tool_calls = response.message.tool_calls # Fallback Repair: Catch raw JSON output if not tool_calls and '"run_terminal_command"' in response.message.content: try: c = response.message.content start, end = c.find('{'), c.rfind('}') + 1 raw_json = json.loads(c[start:end]) tool_calls = [{'function': { 'name': 'run_terminal_command', 'arguments': raw_json.get('arguments', raw_json) }}] except: pass # 3. Execution Logic if tool_calls: call = tool_calls[0] # Normalize arguments format f_args = call.function.arguments if hasattr( call, 'function') else call['function']['arguments'] result = run_terminal_command(f_args['command']) # Update History messages.append(response.message) messages.append({'role': 'tool', 'content': result}) # If REJECTED, the loop continues and the AI sees the feedback if "USER_REJECTION" in result or "USER_FEEDBACK" in result: print("[RETHINKING] AI is adjusting the command...") continue else: # Success: Let AI explain the result final_res = ollama.chat( model=args.model, messages=messages) print(f"AI: {final_res.message.content}") messages.append(final_res.message) break else: # Normal Chat print(f"AI: {response.message.content}") messages.append(response.message) break except KeyboardInterrupt: print("\nExiting...") break except Exception as e: print(f"System Error: {e}") if __name__ == "__main__": main()