Add persian-tutor: Gradio-based GCSE Persian language learning app
Vocabulary study with FSRS spaced repetition, AI tutoring (Ollama/Claude), essay marking, idioms browser, Anki export, and dashboard. 918 vocabulary entries across 39 categories. 41 tests passing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
152
python/persian-tutor/modules/vocab.py
Normal file
152
python/persian-tutor/modules/vocab.py
Normal file
@@ -0,0 +1,152 @@
|
||||
"""Vocabulary search, flashcard logic, and FSRS-driven review."""
|
||||
|
||||
import json
|
||||
import random
|
||||
from pathlib import Path
|
||||
|
||||
import fsrs
|
||||
|
||||
import db
|
||||
|
||||
|
||||
VOCAB_PATH = Path(__file__).parent.parent / "data" / "vocabulary.json"
|
||||
|
||||
_vocab_data = None
|
||||
|
||||
|
||||
def load_vocab():
|
||||
"""Load vocabulary data from JSON (cached)."""
|
||||
global _vocab_data
|
||||
if _vocab_data is None:
|
||||
with open(VOCAB_PATH, encoding="utf-8") as f:
|
||||
_vocab_data = json.load(f)
|
||||
return _vocab_data
|
||||
|
||||
|
||||
def get_categories():
|
||||
"""Return sorted list of unique categories."""
|
||||
vocab = load_vocab()
|
||||
return sorted({entry["category"] for entry in vocab})
|
||||
|
||||
|
||||
def get_sections():
|
||||
"""Return sorted list of unique sections."""
|
||||
vocab = load_vocab()
|
||||
return sorted({entry["section"] for entry in vocab})
|
||||
|
||||
|
||||
def search(query, vocab_data=None):
|
||||
"""Search vocabulary by English or Persian text. Returns matching entries."""
|
||||
if not query or not query.strip():
|
||||
return []
|
||||
vocab = vocab_data or load_vocab()
|
||||
query_lower = query.strip().lower()
|
||||
results = []
|
||||
for entry in vocab:
|
||||
if (
|
||||
query_lower in entry["english"].lower()
|
||||
or query_lower in entry["persian"]
|
||||
or (entry.get("finglish") and query_lower in entry["finglish"].lower())
|
||||
):
|
||||
results.append(entry)
|
||||
return results
|
||||
|
||||
|
||||
def get_random_word(vocab_data=None, category=None):
|
||||
"""Pick a random vocabulary entry, optionally filtered by category."""
|
||||
vocab = vocab_data or load_vocab()
|
||||
if category and category != "All":
|
||||
filtered = [e for e in vocab if e["category"] == category]
|
||||
else:
|
||||
filtered = vocab
|
||||
if not filtered:
|
||||
return None
|
||||
return random.choice(filtered)
|
||||
|
||||
|
||||
def get_flashcard_batch(count=10, category=None):
|
||||
"""Get a batch of words for flashcard study.
|
||||
|
||||
Prioritizes due words (FSRS), then fills with new/random words.
|
||||
"""
|
||||
vocab = load_vocab()
|
||||
|
||||
if category and category != "All":
|
||||
pool = [e for e in vocab if e["category"] == category]
|
||||
else:
|
||||
pool = vocab
|
||||
|
||||
# Get due words first
|
||||
due_ids = db.get_due_words(limit=count)
|
||||
due_entries = [e for e in pool if e["id"] in due_ids]
|
||||
|
||||
# Fill remaining with unseen or random words
|
||||
remaining = count - len(due_entries)
|
||||
if remaining > 0:
|
||||
seen_ids = {e["id"] for e in due_entries}
|
||||
# Prefer unseen words
|
||||
unseen = [e for e in pool if e["id"] not in seen_ids and not db.get_word_progress(e["id"])]
|
||||
if len(unseen) >= remaining:
|
||||
fill = random.sample(unseen, remaining)
|
||||
else:
|
||||
# Use all unseen + random from rest
|
||||
fill = unseen
|
||||
still_needed = remaining - len(fill)
|
||||
rest = [e for e in pool if e["id"] not in seen_ids and e not in fill]
|
||||
if rest:
|
||||
fill.extend(random.sample(rest, min(still_needed, len(rest))))
|
||||
due_entries.extend(fill)
|
||||
|
||||
random.shuffle(due_entries)
|
||||
return due_entries
|
||||
|
||||
|
||||
def check_answer(word_id, user_answer, direction="en_to_fa"):
|
||||
"""Check if user's answer matches the target word.
|
||||
|
||||
Args:
|
||||
word_id: Vocabulary entry ID.
|
||||
user_answer: What the user typed.
|
||||
direction: "en_to_fa" (user writes Persian) or "fa_to_en" (user writes English).
|
||||
|
||||
Returns:
|
||||
(is_correct, correct_answer, entry)
|
||||
"""
|
||||
vocab = load_vocab()
|
||||
entry = next((e for e in vocab if e["id"] == word_id), None)
|
||||
if not entry:
|
||||
return False, "", None
|
||||
|
||||
user_answer = user_answer.strip()
|
||||
|
||||
if direction == "en_to_fa":
|
||||
correct = entry["persian"].strip()
|
||||
is_correct = user_answer == correct
|
||||
else:
|
||||
correct = entry["english"].strip().lower()
|
||||
is_correct = user_answer.lower() == correct
|
||||
|
||||
return is_correct, correct if not is_correct else user_answer, entry
|
||||
|
||||
|
||||
def format_word_card(entry, show_transliteration="off"):
|
||||
"""Format a vocabulary entry for display as RTL-safe markdown."""
|
||||
parts = []
|
||||
parts.append(f'<div dir="rtl" style="font-size:2em; text-align:center">{entry["persian"]}</div>')
|
||||
parts.append(f'<div style="font-size:1.3em; text-align:center">{entry["english"]}</div>')
|
||||
|
||||
if show_transliteration != "off" and entry.get("finglish"):
|
||||
parts.append(f'<div style="text-align:center; color:#666; font-style:italic">{entry["finglish"]}</div>')
|
||||
|
||||
parts.append(f'<div style="text-align:center; color:#999; font-size:0.9em">{entry.get("category", "")}</div>')
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def get_word_status(word_id):
|
||||
"""Return status string for a word: new, learning, or mastered."""
|
||||
progress = db.get_word_progress(word_id)
|
||||
if not progress:
|
||||
return "new"
|
||||
if progress["stability"] and progress["stability"] > 10:
|
||||
return "mastered"
|
||||
return "learning"
|
||||
Reference in New Issue
Block a user