Add persian-tutor: Gradio-based GCSE Persian language learning app

Vocabulary study with FSRS spaced repetition, AI tutoring (Ollama/Claude), essay marking, idioms browser, Anki export, and dashboard. 918 vocabulary entries across 39 categories. 41 tests passing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 01:57:44 +00:00
parent 104da381fb
commit 2e8c2c11d0
22 changed files with 10664 additions and 0 deletions
--- a/python/persian-tutor/stt.py
+++ b/python/persian-tutor/stt.py
@@ -0,0 +1,65 @@
+"""Persian speech-to-text wrapper using sttlib."""
+
+import sys
+
+import numpy as np
+
+sys.path.insert(0, "/home/ys/family-repo/Code/python/tool-speechtotext")
+from sttlib import load_whisper_model, transcribe, is_hallucination
+
+_model = None
+
+# Common Whisper hallucinations in Persian/silence
+PERSIAN_HALLUCINATIONS = [
+    "ممنون",  # "thank you" hallucination
+    "خداحافظ",  # "goodbye" hallucination
+    "تماشا کنید",  # "watch" hallucination
+    "لایک کنید",  # "like" hallucination
+]
+
+
+def get_model(size="medium"):
+    """Load Whisper model (cached singleton)."""
+    global _model
+    if _model is None:
+        _model = load_whisper_model(size)
+    return _model
+
+
+def transcribe_persian(audio_tuple):
+    """Transcribe Persian audio from Gradio audio component.
+
+    Args:
+        audio_tuple: (sample_rate, numpy_array) from gr.Audio component.
+
+    Returns:
+        Transcribed text string, or empty string on failure/hallucination.
+    """
+    if audio_tuple is None:
+        return ""
+
+    sr, audio = audio_tuple
+    model = get_model()
+
+    # Convert to float32 normalized [-1, 1]
+    if audio.dtype == np.int16:
+        audio_float = audio.astype(np.float32) / 32768.0
+    elif audio.dtype == np.float32:
+        audio_float = audio
+    else:
+        audio_float = audio.astype(np.float32) / np.iinfo(audio.dtype).max
+
+    # Mono conversion if stereo
+    if audio_float.ndim > 1:
+        audio_float = audio_float.mean(axis=1)
+
+    # Use sttlib transcribe
+    text = transcribe(model, audio_float)
+
+    # Filter hallucinations (English + Persian)
+    if is_hallucination(text):
+        return ""
+    if text.strip() in PERSIAN_HALLUCINATIONS:
+        return ""
+
+    return text