Add persian-tutor: Gradio-based GCSE Persian language learning app

Vocabulary study with FSRS spaced repetition, AI tutoring (Ollama/Claude),
essay marking, idioms browser, Anki export, and dashboard. 918 vocabulary
entries across 39 categories. 41 tests passing.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
local
2026-02-08 01:57:44 +00:00
parent 104da381fb
commit 2e8c2c11d0
22 changed files with 10664 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,81 @@
#!/usr/bin/env python3
"""One-time script to generate/update vocabulary.json with AI-assisted transliterations.
Usage:
python scripts/generate_vocab.py
This reads an existing vocabulary.json, finds entries missing finglish
transliterations, and uses Ollama to generate them.
"""
import json
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from ai import ask_ollama
VOCAB_PATH = Path(__file__).parent.parent / "data" / "vocabulary.json"
def generate_transliterations(vocab):
"""Fill in missing finglish transliterations using AI."""
missing = [e for e in vocab if not e.get("finglish")]
if not missing:
print("All entries already have finglish transliterations.")
return vocab
print(f"Generating transliterations for {len(missing)} entries...")
# Process in batches of 20
batch_size = 20
for i in range(0, len(missing), batch_size):
batch = missing[i : i + batch_size]
pairs = "\n".join(f"{e['persian']} = {e['english']}" for e in batch)
prompt = f"""For each Persian word below, provide the Finglish (romanized) transliteration.
Use these conventions: â for آ, kh for خ, sh for ش, zh for ژ, gh for ق/غ, ch for چ.
Reply with ONLY the transliterations, one per line, in the same order.
{pairs}"""
try:
response = ask_ollama(prompt, model="qwen2.5:7b")
lines = [l.strip() for l in response.strip().split("\n") if l.strip()]
for j, entry in enumerate(batch):
if j < len(lines):
# Clean up the response line
line = lines[j]
# Remove any numbering or equals signs
for sep in ["=", ":", "-", "."]:
if sep in line:
line = line.split(sep)[-1].strip()
entry["finglish"] = line
print(f" Processed {min(i + batch_size, len(missing))}/{len(missing)}")
except Exception as e:
print(f" Error processing batch: {e}")
return vocab
def main():
if not VOCAB_PATH.exists():
print(f"No vocabulary file found at {VOCAB_PATH}")
return
with open(VOCAB_PATH, encoding="utf-8") as f:
vocab = json.load(f)
print(f"Loaded {len(vocab)} entries")
vocab = generate_transliterations(vocab)
with open(VOCAB_PATH, "w", encoding="utf-8") as f:
json.dump(vocab, f, ensure_ascii=False, indent=2)
print(f"Saved {len(vocab)} entries to {VOCAB_PATH}")
if __name__ == "__main__":
main()