Add expression-evaluator: DAGs & state machines tutorial project

Educational calculator teaching FSMs (explicit transition table tokenizer)
and DAGs (recursive descent parser with AST evaluation). Includes CLI with
REPL, graphviz visualization, and 61 tests.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dl92
2026-02-08 18:09:42 +00:00
parent 3a8705ece8
commit 01d5532823
11 changed files with 1557 additions and 0 deletions

View File

@@ -0,0 +1,200 @@
"""
Part 4: Visualization -- Graphviz Dot Output
==============================================
Generate graphviz dot-format strings for:
1. The tokenizer's finite state machine (FSM)
2. Any expression's AST (DAG)
3. Text-based tree rendering for the terminal
No external dependencies -- outputs raw dot strings that can be piped
to the 'dot' command: python main.py --dot "3+4*2" | dot -Tpng -o ast.png
"""
from parser import NumberNode, BinOpNode, UnaryOpNode, Node
from tokenizer import TRANSITIONS, State, CharClass, Action, TokenType
# ---------- FSM diagram ----------
# Human-readable labels for character classes
_CHAR_LABELS = {
CharClass.DIGIT: "digit",
CharClass.DOT: "'.'",
CharClass.OPERATOR: "op",
CharClass.LPAREN: "'('",
CharClass.RPAREN: "')'",
CharClass.SPACE: "space",
CharClass.EOF: "EOF",
}
# Short labels for actions
_ACTION_LABELS = {
Action.ACCUMULATE: "accum",
Action.EMIT_NUMBER: "emit num",
Action.EMIT_OPERATOR: "emit op",
Action.EMIT_LPAREN: "emit '('",
Action.EMIT_RPAREN: "emit ')'",
Action.EMIT_NUMBER_THEN_OP: "emit num+op",
Action.EMIT_NUMBER_THEN_LPAREN: "emit num+'('",
Action.EMIT_NUMBER_THEN_RPAREN: "emit num+')'",
Action.EMIT_NUMBER_THEN_DONE: "emit num, done",
Action.SKIP: "skip",
Action.DONE: "done",
Action.ERROR: "ERROR",
}
def fsm_to_dot():
"""
Generate a graphviz dot diagram of the tokenizer's state machine.
Reads the TRANSITIONS table directly -- because the FSM is data (a dict),
we can programmatically inspect and visualize it. This is a key advantage
of explicit state machines over implicit if/else control flow.
"""
lines = [
'digraph FSM {',
' rankdir=LR;',
' node [shape=circle, fontname="Helvetica"];',
' edge [fontname="Helvetica", fontsize=10];',
'',
' // Start indicator',
' __start__ [shape=point, width=0.2];',
' __start__ -> START;',
'',
]
# Collect edges grouped by (src, dst) to merge labels
edge_labels = {}
for (state, char_class), transition in TRANSITIONS.items():
src = state.name
dst = transition.next_state.name
char_label = _CHAR_LABELS.get(char_class, char_class.name)
action_label = _ACTION_LABELS.get(transition.action, transition.action.name)
label = f"{char_label} / {action_label}"
edge_labels.setdefault((src, dst), []).append(label)
# Emit edges
for (src, dst), labels in sorted(edge_labels.items()):
combined = "\\n".join(labels)
lines.append(f' {src} -> {dst} [label="{combined}"];')
lines.append('}')
return '\n'.join(lines)
# ---------- AST diagram ----------
_OP_LABELS = {
TokenType.PLUS: '+',
TokenType.MINUS: '-',
TokenType.MULTIPLY: '*',
TokenType.DIVIDE: '/',
TokenType.POWER: '^',
TokenType.UNARY_MINUS: 'neg',
}
def ast_to_dot(node):
"""
Generate a graphviz dot diagram of an AST (expression tree / DAG).
Each node gets a unique ID. Edges go from parent to children,
showing the directed acyclic structure. Leaves are boxed,
operators are ellipses.
"""
lines = [
'digraph AST {',
' node [fontname="Helvetica"];',
' edge [fontname="Helvetica"];',
'',
]
counter = [0]
def _visit(node):
nid = f"n{counter[0]}"
counter[0] += 1
match node:
case NumberNode(value=v):
label = _format_number(v)
lines.append(f' {nid} [label="{label}", shape=box, style=rounded];')
return nid
case UnaryOpNode(op=op, operand=child):
label = _OP_LABELS.get(op, op.name)
lines.append(f' {nid} [label="{label}", shape=ellipse];')
child_id = _visit(child)
lines.append(f' {nid} -> {child_id};')
return nid
case BinOpNode(op=op, left=left, right=right):
label = _OP_LABELS.get(op, op.name)
lines.append(f' {nid} [label="{label}", shape=ellipse];')
left_id = _visit(left)
right_id = _visit(right)
lines.append(f' {nid} -> {left_id} [label="L"];')
lines.append(f' {nid} -> {right_id} [label="R"];')
return nid
_visit(node)
lines.append('}')
return '\n'.join(lines)
# ---------- Text-based tree ----------
def ast_to_text(node, prefix="", connector=""):
"""
Render the AST as an indented text tree for terminal display.
Example output for (2 + 3) * 4:
*
+-- +
| +-- 2
| +-- 3
+-- 4
"""
match node:
case NumberNode(value=v):
label = _format_number(v)
case UnaryOpNode(op=op):
label = _OP_LABELS.get(op, op.name)
case BinOpNode(op=op):
label = _OP_LABELS.get(op, op.name)
lines = [f"{prefix}{connector}{label}"]
children = _get_children(node)
for i, child in enumerate(children):
is_last_child = (i == len(children) - 1)
if connector:
# Extend the prefix: if we used "+-- " then next children
# see "| " (continuing) or " " (last child)
child_prefix = prefix + ("| " if connector == "+-- " else " ")
else:
child_prefix = prefix
child_connector = "+-- " if is_last_child else "+-- "
# Use a different lead for non-last: the vertical bar continues
child_connector = "`-- " if is_last_child else "+-- "
child_lines = ast_to_text(child, child_prefix, child_connector)
lines.append(child_lines)
return '\n'.join(lines)
def _get_children(node):
match node:
case NumberNode():
return []
case UnaryOpNode(operand=child):
return [child]
case BinOpNode(left=left, right=right):
return [left, right]
return []
def _format_number(v):
if isinstance(v, float) and v == int(v):
return str(int(v))
return str(v)