"""Dual AI backend: Ollama (fast/local) and Claude CLI (smart).""" import subprocess import ollama DEFAULT_OLLAMA_MODEL = "qwen2.5:7b" _ollama_model = DEFAULT_OLLAMA_MODEL def set_ollama_model(model): """Change the Ollama model used for fast queries.""" global _ollama_model _ollama_model = model def ask_ollama(prompt, system=None, model=None): """Query Ollama with an optional system prompt.""" model = model or _ollama_model messages = [] if system: messages.append({"role": "system", "content": system}) messages.append({"role": "user", "content": prompt}) response = ollama.chat(model=model, messages=messages) return response.message.content def ask_claude(prompt): """Query Claude via the CLI subprocess.""" result = subprocess.run( ["claude", "-p", prompt], capture_output=True, text=True, ) if result.returncode != 0: raise RuntimeError(f"Claude CLI failed (exit {result.returncode}): {result.stderr.strip()}") return result.stdout.strip() def ask(prompt, system=None, quality="fast"): """Unified interface. quality='fast' uses Ollama, 'smart' uses Claude.""" if quality == "smart": return ask_claude(prompt) return ask_ollama(prompt, system=system) def chat_ollama(messages, system=None, model=None): """Multi-turn conversation with Ollama.""" model = model or _ollama_model all_messages = [] if system: all_messages.append({"role": "system", "content": system}) all_messages.extend(messages) response = ollama.chat(model=model, messages=all_messages) return response.message.content