Add persian-tutor: Gradio-based GCSE Persian language learning app

Vocabulary study with FSRS spaced repetition, AI tutoring (Ollama/Claude), essay marking, idioms browser, Anki export, and dashboard. 918 vocabulary entries across 39 categories. 41 tests passing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 01:57:44 +00:00
parent 104da381fb
commit 2e8c2c11d0
22 changed files with 10664 additions and 0 deletions
--- a/python/persian-tutor/modules/vocab.py
+++ b/python/persian-tutor/modules/vocab.py
@@ -0,0 +1,152 @@
+"""Vocabulary search, flashcard logic, and FSRS-driven review."""
+
+import json
+import random
+from pathlib import Path
+
+import fsrs
+
+import db
+
+
+VOCAB_PATH = Path(__file__).parent.parent / "data" / "vocabulary.json"
+
+_vocab_data = None
+
+
+def load_vocab():
+    """Load vocabulary data from JSON (cached)."""
+    global _vocab_data
+    if _vocab_data is None:
+        with open(VOCAB_PATH, encoding="utf-8") as f:
+            _vocab_data = json.load(f)
+    return _vocab_data
+
+
+def get_categories():
+    """Return sorted list of unique categories."""
+    vocab = load_vocab()
+    return sorted({entry["category"] for entry in vocab})
+
+
+def get_sections():
+    """Return sorted list of unique sections."""
+    vocab = load_vocab()
+    return sorted({entry["section"] for entry in vocab})
+
+
+def search(query, vocab_data=None):
+    """Search vocabulary by English or Persian text. Returns matching entries."""
+    if not query or not query.strip():
+        return []
+    vocab = vocab_data or load_vocab()
+    query_lower = query.strip().lower()
+    results = []
+    for entry in vocab:
+        if (
+            query_lower in entry["english"].lower()
+            or query_lower in entry["persian"]
+            or (entry.get("finglish") and query_lower in entry["finglish"].lower())
+        ):
+            results.append(entry)
+    return results
+
+
+def get_random_word(vocab_data=None, category=None):
+    """Pick a random vocabulary entry, optionally filtered by category."""
+    vocab = vocab_data or load_vocab()
+    if category and category != "All":
+        filtered = [e for e in vocab if e["category"] == category]
+    else:
+        filtered = vocab
+    if not filtered:
+        return None
+    return random.choice(filtered)
+
+
+def get_flashcard_batch(count=10, category=None):
+    """Get a batch of words for flashcard study.
+
+    Prioritizes due words (FSRS), then fills with new/random words.
+    """
+    vocab = load_vocab()
+
+    if category and category != "All":
+        pool = [e for e in vocab if e["category"] == category]
+    else:
+        pool = vocab
+
+    # Get due words first
+    due_ids = db.get_due_words(limit=count)
+    due_entries = [e for e in pool if e["id"] in due_ids]
+
+    # Fill remaining with unseen or random words
+    remaining = count - len(due_entries)
+    if remaining > 0:
+        seen_ids = {e["id"] for e in due_entries}
+        # Prefer unseen words
+        unseen = [e for e in pool if e["id"] not in seen_ids and not db.get_word_progress(e["id"])]
+        if len(unseen) >= remaining:
+            fill = random.sample(unseen, remaining)
+        else:
+            # Use all unseen + random from rest
+            fill = unseen
+            still_needed = remaining - len(fill)
+            rest = [e for e in pool if e["id"] not in seen_ids and e not in fill]
+            if rest:
+                fill.extend(random.sample(rest, min(still_needed, len(rest))))
+        due_entries.extend(fill)
+
+    random.shuffle(due_entries)
+    return due_entries
+
+
+def check_answer(word_id, user_answer, direction="en_to_fa"):
+    """Check if user's answer matches the target word.
+
+    Args:
+        word_id: Vocabulary entry ID.
+        user_answer: What the user typed.
+        direction: "en_to_fa" (user writes Persian) or "fa_to_en" (user writes English).
+
+    Returns:
+        (is_correct, correct_answer, entry)
+    """
+    vocab = load_vocab()
+    entry = next((e for e in vocab if e["id"] == word_id), None)
+    if not entry:
+        return False, "", None
+
+    user_answer = user_answer.strip()
+
+    if direction == "en_to_fa":
+        correct = entry["persian"].strip()
+        is_correct = user_answer == correct
+    else:
+        correct = entry["english"].strip().lower()
+        is_correct = user_answer.lower() == correct
+
+    return is_correct, correct if not is_correct else user_answer, entry
+
+
+def format_word_card(entry, show_transliteration="off"):
+    """Format a vocabulary entry for display as RTL-safe markdown."""
+    parts = []
+    parts.append(f'<div dir="rtl" style="font-size:2em; text-align:center">{entry["persian"]}</div>')
+    parts.append(f'<div style="font-size:1.3em; text-align:center">{entry["english"]}</div>')
+
+    if show_transliteration != "off" and entry.get("finglish"):
+        parts.append(f'<div style="text-align:center; color:#666; font-style:italic">{entry["finglish"]}</div>')
+
+    parts.append(f'<div style="text-align:center; color:#999; font-size:0.9em">{entry.get("category", "")}</div>')
+    return "\n".join(parts)
+
+
+def get_word_status(word_id):
+    """Return status string for a word: new, learning, or mastered."""
+    progress = db.get_word_progress(word_id)
+    if not progress:
+        return "new"
+    if progress["stability"] and progress["stability"] > 10:
+        return "mastered"
+    return "learning"