added feed backloop

2026-01-14 02:04:31 +00:00
parent 7f3ea89297
commit 31684ecded
1 changed files with 79 additions and 81 deletions
--- a/python/tool-speechtotext/voice_to_terminal.py
+++ b/python/tool-speechtotext/voice_to_terminal.py
@@ -1,43 +1,53 @@
-import argparse
 import sounddevice as sd
 import numpy as np
 import pyperclip
 import sys
+import argparse
 import os
 import subprocess
 import ollama
+import json
 from faster_whisper import WhisperModel

-# --- Settings ---
+# --- Configuration ---
 os.environ["CT2_CUDA_ALLOW_FP16"] = "1"
 MODEL_SIZE = "medium"
 OLLAMA_MODEL = "qwen2.5-coder:7b"
 CONFIRM_COMMANDS = True  # Set to False to run commands instantly

-# Load Whisper
+# Load Whisper on GPU
 print("Loading Whisper model...")
-model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="float16")
+try:
+    model = WhisperModel(MODEL_SIZE, device="cuda", compute_type="float16")
+except Exception as e:
+    print(f"Error loading GPU: {e}, falling back to CPU")
+    model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8")
+
+# --- Terminal Tool ---


 def run_terminal_command(command: str):
-    """Executes a bash command in the terminal. Handles file ops, system info, etc."""
-
-    # 1. Visual Confirmation Block
+    """
+    Executes a bash command in the Linux terminal.
+    Used for file management, system info, and terminal tasks.
+    """
    if CONFIRM_COMMANDS:
        print(f"\n{'='*40}")
-        print(f"⚠️  AI SUGGESTED COMMAND: \033[1;32m{command}\033[0m")
-        choice = input("   Confirm execution? [Y/n]: ").strip().lower()
+        print(f"⚠️  AI SUGGESTED: \033[1;32m{command}\033[0m")
+        # Allow user to provide feedback if they say 'n'
+        choice = input("   Confirm? [Y/n] or provide feedback: ").strip()
+
+        if choice.lower() == 'n':
+            return "USER_REJECTION: The user did not approve this command. Please suggest an alternative."
+        elif choice and choice.lower() != 'y':
+            return f"USER_FEEDBACK: The user rejected the command with this reason: '{choice}'. Please adjust."
        print(f"{'='*40}\n")

-        if choice == 'n':
-            return "User rejected this command."
-
-    # 2. Safety Blacklist (Last line of defense)
-    blacklist = ["rm -rf /", "mkfs", "dd if=", ":(){ :|:& };:"]
+    # Safety Guardrail
+    blacklist = ["rm -rf /", "mkfs", "dd if="]
    if any(forbidden in command for forbidden in blacklist):
        return "Error: Command blocked for security reasons."

-    # 3. Execution
    try:
        result = subprocess.run(command, shell=True,
                                capture_output=True, text=True, timeout=20)
@@ -47,10 +57,6 @@ def run_terminal_command(command: str):
        return f"Execution Error: {str(e)}"


-# Register tool
-available_tools = {'run_terminal_command': run_terminal_command}
-
-
 def record_audio():
    fs, recording = 16000, []
    print("\n[READY] Press Enter to START...")
@@ -63,98 +69,90 @@ def record_audio():


 def main():
-    # 1. Setup Parser for CLI flags
-    parser = argparse.ArgumentParser(
-        description="Whisper + Ollama Terminal Assistant")
-    parser.add_argument("--model", default=OLLAMA_MODEL,
-                        help="Ollama model name")
-    parser.add_argument("--confirm", action='store_true',
-                        default=CONFIRM_COMMANDS, help="Confirm commands")
-    args, unknown = parser.parse_known_args()
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", default=OLLAMA_MODEL)
+    args, _ = parser.parse_known_args()

-    # Initialize conversation with a strict System Role
-    # This "nudges" the model to use the tool feature rather than just chatting
+    # Initial System Prompt
    messages = [{
        'role': 'system',
        'content': (
-            'You are a Linux terminal expert. When the user asks for a system task, '
-            'you MUST use the "run_terminal_command" tool. Do not explain your actions '
-            'in text; simply provide the command via the tool.'
+            'You are a Linux expert assistant. When asked for a system task, '
+            'use the "run_terminal_command" tool. If the user rejects a command, '
+            'analyze their feedback and suggest a corrected alternative.'
        )
    }]

    print(f"--- Assistant Active (Model: {args.model}) ---")
-    print(f"Confirmation Mode: {'ON' if args.confirm else 'OFF'}")

    while True:
        try:
-            # A. Record and Transcribe
+            # 1. Voice Capture
            audio_data = record_audio()
-            print("[TRANSCRIBING]...")
            segments, _ = model.transcribe(audio_data.flatten(), beam_size=5)
            user_text = "".join([s.text for s in segments]).strip()
-
            if not user_text:
                continue

            print(f"\nYOU: {user_text}")
            messages.append({'role': 'user', 'content': user_text})

-            # B. Get AI Response (Strict Temperature 0 for reliability)
-            response = ollama.chat(
-                model=args.model,
-                messages=messages,
-                tools=[run_terminal_command],
-                options={'temperature': 0}
-            )
+            # 2. AI Interaction Loop (Supports up to 3 retries if rejected)
+            for attempt in range(3):
+                response = ollama.chat(
+                    model=args.model,
+                    messages=messages,
+                    tools=[run_terminal_command],
+                    options={'temperature': 0}
+                )

-            # C. Detect Tool Calls (Handle both formal calls and raw JSON text)
-            tool_calls = response.message.tool_calls
+                tool_calls = response.message.tool_calls

-            # REPAIR LOGIC: If AI "talks" in JSON instead of using the tool field
-            if not tool_calls and '"run_terminal_command"' in response.message.content:
-                import json
-                try:
-                    content = response.message.content
-                    # Extract JSON block from text
-                    start, end = content.find('{'), content.rfind('}') + 1
-                    raw_json = json.loads(content[start:end])
-                    # Reconstruct as a tool call format
-                    tool_calls = [{'function': {
-                        'name': 'run_terminal_command',
-                        'arguments': raw_json.get('arguments', raw_json)
-                    }}]
-                except:
-                    pass
+                # Fallback Repair: Catch raw JSON output
+                if not tool_calls and '"run_terminal_command"' in response.message.content:
+                    try:
+                        c = response.message.content
+                        start, end = c.find('{'), c.rfind('}') + 1
+                        raw_json = json.loads(c[start:end])
+                        tool_calls = [{'function': {
+                            'name': 'run_terminal_command',
+                            'arguments': raw_json.get('arguments', raw_json)
+                        }}]
+                    except:
+                        pass

-            # D. Execute Tools if found
-            if tool_calls:
-                for tool_call in tool_calls:
-                    # Parse arguments based on format (official object vs dictionary)
-                    if hasattr(tool_call, 'function'):
-                        func_args = tool_call.function.arguments
-                    else:
-                        func_args = tool_call['function']['arguments']
+                # 3. Execution Logic
+                if tool_calls:
+                    call = tool_calls[0]
+                    # Normalize arguments format
+                    f_args = call.function.arguments if hasattr(
+                        call, 'function') else call['function']['arguments']

-                    # Run the terminal command locally
-                    result = run_terminal_command(func_args['command'])
+                    result = run_terminal_command(f_args['command'])

-                    # Add result back to history so AI can see it
+                    # Update History
                    messages.append(response.message)
                    messages.append({'role': 'tool', 'content': result})

-                # Get the final "Human" explanation from AI
-                final_response = ollama.chat(
-                    model=args.model, messages=messages)
-                print(f"AI: {final_response.message.content}")
-                messages.append(final_response.message)
-            else:
-                # Normal Chatting
-                print(f"AI: {response.message.content}")
-                messages.append(response.message)
+                    # If REJECTED, the loop continues and the AI sees the feedback
+                    if "USER_REJECTION" in result or "USER_FEEDBACK" in result:
+                        print("[RETHINKING] AI is adjusting the command...")
+                        continue
+                    else:
+                        # Success: Let AI explain the result
+                        final_res = ollama.chat(
+                            model=args.model, messages=messages)
+                        print(f"AI: {final_res.message.content}")
+                        messages.append(final_res.message)
+                        break
+                else:
+                    # Normal Chat
+                    print(f"AI: {response.message.content}")
+                    messages.append(response.message)
+                    break

        except KeyboardInterrupt:
-            print("\nExiting Assistant...")
+            print("\nExiting...")
            break
        except Exception as e:
            print(f"System Error: {e}")