Code/python/persian-tutor/modules/vocab.py

"""Vocabulary search, flashcard logic, and FSRS-driven review."""

import json
import random
from pathlib import Path

import fsrs

import db


VOCAB_PATH = Path(__file__).parent.parent / "data" / "vocabulary.json"

_vocab_data = None


def load_vocab():
    """Load vocabulary data from JSON (cached)."""
    global _vocab_data
    if _vocab_data is None:
        with open(VOCAB_PATH, encoding="utf-8") as f:
            _vocab_data = json.load(f)
    return _vocab_data


def get_categories():
    """Return sorted list of unique categories."""
    vocab = load_vocab()
    return sorted({entry["category"] for entry in vocab})


def get_sections():
    """Return sorted list of unique sections."""
    vocab = load_vocab()
    return sorted({entry["section"] for entry in vocab})


def search(query, vocab_data=None):
    """Search vocabulary by English or Persian text. Returns matching entries."""
    if not query or not query.strip():
        return []
    vocab = vocab_data or load_vocab()
    query_lower = query.strip().lower()
    results = []
    for entry in vocab:
        if (
            query_lower in entry["english"].lower()
            or query_lower in entry["persian"]
            or (entry.get("finglish") and query_lower in entry["finglish"].lower())
        ):
            results.append(entry)
    return results


def get_random_word(vocab_data=None, category=None):
    """Pick a random vocabulary entry, optionally filtered by category."""
    vocab = vocab_data or load_vocab()
    if category and category != "All":
        filtered = [e for e in vocab if e["category"] == category]
    else:
        filtered = vocab
    if not filtered:
        return None
    return random.choice(filtered)


def get_flashcard_batch(count=10, category=None):
    """Get a batch of words for flashcard study.

    Prioritizes due words (FSRS), then fills with new/random words.
    """
    vocab = load_vocab()

    if category and category != "All":
        pool = [e for e in vocab if e["category"] == category]
    else:
        pool = vocab

    # Get due words first
    due_ids = db.get_due_words(limit=count)
    due_entries = [e for e in pool if e["id"] in due_ids]

    # Fill remaining with unseen or random words
    remaining = count - len(due_entries)
    if remaining > 0:
        seen_ids = {e["id"] for e in due_entries}
        # Prefer unseen words
        unseen = [e for e in pool if e["id"] not in seen_ids and not db.get_word_progress(e["id"])]
        if len(unseen) >= remaining:
            fill = random.sample(unseen, remaining)
        else:
            # Use all unseen + random from rest
            fill = unseen
            still_needed = remaining - len(fill)
            rest = [e for e in pool if e["id"] not in seen_ids and e not in fill]
            if rest:
                fill.extend(random.sample(rest, min(still_needed, len(rest))))
        due_entries.extend(fill)

    random.shuffle(due_entries)
    return due_entries


def check_answer(word_id, user_answer, direction="en_to_fa"):
    """Check if user's answer matches the target word.

    Args:
        word_id: Vocabulary entry ID.
        user_answer: What the user typed.
        direction: "en_to_fa" (user writes Persian) or "fa_to_en" (user writes English).

    Returns:
        (is_correct, correct_answer, entry)
    """
    vocab = load_vocab()
    entry = next((e for e in vocab if e["id"] == word_id), None)
    if not entry:
        return False, "", None

    user_answer = user_answer.strip()

    if direction == "en_to_fa":
        correct = entry["persian"].strip()
        is_correct = user_answer == correct
    else:
        correct = entry["english"].strip().lower()
        is_correct = user_answer.lower() == correct

    return is_correct, correct if not is_correct else user_answer, entry


def format_word_card(entry, show_transliteration="off"):
    """Format a vocabulary entry for display as RTL-safe markdown."""
    parts = []
    parts.append(f'<div dir="rtl" style="font-size:2em; text-align:center">{entry["persian"]}</div>')
    parts.append(f'<div style="font-size:1.3em; text-align:center">{entry["english"]}</div>')

    if show_transliteration != "off" and entry.get("finglish"):
        parts.append(f'<div style="text-align:center; color:#666; font-style:italic">{entry["finglish"]}</div>')

    parts.append(f'<div style="text-align:center; color:#999; font-size:0.9em">{entry.get("category", "")}</div>')
    return "\n".join(parts)


def get_word_status(word_id):
    """Return status string for a word: new, learning, or mastered."""
    progress = db.get_word_progress(word_id)
    if not progress:
        return "new"
    if progress["stability"] and progress["stability"] > 10:
        return "mastered"
    return "learning"