Educational calculator teaching FSMs (explicit transition table tokenizer) and DAGs (recursive descent parser with AST evaluation). Includes CLI with REPL, graphviz visualization, and 61 tests. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
201 lines
6.1 KiB
Python
201 lines
6.1 KiB
Python
"""
|
|
Part 4: Visualization -- Graphviz Dot Output
|
|
==============================================
|
|
Generate graphviz dot-format strings for:
|
|
1. The tokenizer's finite state machine (FSM)
|
|
2. Any expression's AST (DAG)
|
|
3. Text-based tree rendering for the terminal
|
|
|
|
No external dependencies -- outputs raw dot strings that can be piped
|
|
to the 'dot' command: python main.py --dot "3+4*2" | dot -Tpng -o ast.png
|
|
"""
|
|
|
|
from parser import NumberNode, BinOpNode, UnaryOpNode, Node
|
|
from tokenizer import TRANSITIONS, State, CharClass, Action, TokenType
|
|
|
|
|
|
# ---------- FSM diagram ----------
|
|
|
|
# Human-readable labels for character classes
|
|
_CHAR_LABELS = {
|
|
CharClass.DIGIT: "digit",
|
|
CharClass.DOT: "'.'",
|
|
CharClass.OPERATOR: "op",
|
|
CharClass.LPAREN: "'('",
|
|
CharClass.RPAREN: "')'",
|
|
CharClass.SPACE: "space",
|
|
CharClass.EOF: "EOF",
|
|
}
|
|
|
|
# Short labels for actions
|
|
_ACTION_LABELS = {
|
|
Action.ACCUMULATE: "accum",
|
|
Action.EMIT_NUMBER: "emit num",
|
|
Action.EMIT_OPERATOR: "emit op",
|
|
Action.EMIT_LPAREN: "emit '('",
|
|
Action.EMIT_RPAREN: "emit ')'",
|
|
Action.EMIT_NUMBER_THEN_OP: "emit num+op",
|
|
Action.EMIT_NUMBER_THEN_LPAREN: "emit num+'('",
|
|
Action.EMIT_NUMBER_THEN_RPAREN: "emit num+')'",
|
|
Action.EMIT_NUMBER_THEN_DONE: "emit num, done",
|
|
Action.SKIP: "skip",
|
|
Action.DONE: "done",
|
|
Action.ERROR: "ERROR",
|
|
}
|
|
|
|
|
|
def fsm_to_dot():
|
|
"""
|
|
Generate a graphviz dot diagram of the tokenizer's state machine.
|
|
|
|
Reads the TRANSITIONS table directly -- because the FSM is data (a dict),
|
|
we can programmatically inspect and visualize it. This is a key advantage
|
|
of explicit state machines over implicit if/else control flow.
|
|
"""
|
|
lines = [
|
|
'digraph FSM {',
|
|
' rankdir=LR;',
|
|
' node [shape=circle, fontname="Helvetica"];',
|
|
' edge [fontname="Helvetica", fontsize=10];',
|
|
'',
|
|
' // Start indicator',
|
|
' __start__ [shape=point, width=0.2];',
|
|
' __start__ -> START;',
|
|
'',
|
|
]
|
|
|
|
# Collect edges grouped by (src, dst) to merge labels
|
|
edge_labels = {}
|
|
for (state, char_class), transition in TRANSITIONS.items():
|
|
src = state.name
|
|
dst = transition.next_state.name
|
|
char_label = _CHAR_LABELS.get(char_class, char_class.name)
|
|
action_label = _ACTION_LABELS.get(transition.action, transition.action.name)
|
|
label = f"{char_label} / {action_label}"
|
|
edge_labels.setdefault((src, dst), []).append(label)
|
|
|
|
# Emit edges
|
|
for (src, dst), labels in sorted(edge_labels.items()):
|
|
combined = "\\n".join(labels)
|
|
lines.append(f' {src} -> {dst} [label="{combined}"];')
|
|
|
|
lines.append('}')
|
|
return '\n'.join(lines)
|
|
|
|
|
|
# ---------- AST diagram ----------
|
|
|
|
_OP_LABELS = {
|
|
TokenType.PLUS: '+',
|
|
TokenType.MINUS: '-',
|
|
TokenType.MULTIPLY: '*',
|
|
TokenType.DIVIDE: '/',
|
|
TokenType.POWER: '^',
|
|
TokenType.UNARY_MINUS: 'neg',
|
|
}
|
|
|
|
|
|
def ast_to_dot(node):
|
|
"""
|
|
Generate a graphviz dot diagram of an AST (expression tree / DAG).
|
|
|
|
Each node gets a unique ID. Edges go from parent to children,
|
|
showing the directed acyclic structure. Leaves are boxed,
|
|
operators are ellipses.
|
|
"""
|
|
lines = [
|
|
'digraph AST {',
|
|
' node [fontname="Helvetica"];',
|
|
' edge [fontname="Helvetica"];',
|
|
'',
|
|
]
|
|
counter = [0]
|
|
|
|
def _visit(node):
|
|
nid = f"n{counter[0]}"
|
|
counter[0] += 1
|
|
|
|
match node:
|
|
case NumberNode(value=v):
|
|
label = _format_number(v)
|
|
lines.append(f' {nid} [label="{label}", shape=box, style=rounded];')
|
|
return nid
|
|
|
|
case UnaryOpNode(op=op, operand=child):
|
|
label = _OP_LABELS.get(op, op.name)
|
|
lines.append(f' {nid} [label="{label}", shape=ellipse];')
|
|
child_id = _visit(child)
|
|
lines.append(f' {nid} -> {child_id};')
|
|
return nid
|
|
|
|
case BinOpNode(op=op, left=left, right=right):
|
|
label = _OP_LABELS.get(op, op.name)
|
|
lines.append(f' {nid} [label="{label}", shape=ellipse];')
|
|
left_id = _visit(left)
|
|
right_id = _visit(right)
|
|
lines.append(f' {nid} -> {left_id} [label="L"];')
|
|
lines.append(f' {nid} -> {right_id} [label="R"];')
|
|
return nid
|
|
|
|
_visit(node)
|
|
lines.append('}')
|
|
return '\n'.join(lines)
|
|
|
|
|
|
# ---------- Text-based tree ----------
|
|
|
|
def ast_to_text(node, prefix="", connector=""):
|
|
"""
|
|
Render the AST as an indented text tree for terminal display.
|
|
|
|
Example output for (2 + 3) * 4:
|
|
*
|
|
+-- +
|
|
| +-- 2
|
|
| +-- 3
|
|
+-- 4
|
|
"""
|
|
match node:
|
|
case NumberNode(value=v):
|
|
label = _format_number(v)
|
|
case UnaryOpNode(op=op):
|
|
label = _OP_LABELS.get(op, op.name)
|
|
case BinOpNode(op=op):
|
|
label = _OP_LABELS.get(op, op.name)
|
|
|
|
lines = [f"{prefix}{connector}{label}"]
|
|
|
|
children = _get_children(node)
|
|
for i, child in enumerate(children):
|
|
is_last_child = (i == len(children) - 1)
|
|
if connector:
|
|
# Extend the prefix: if we used "+-- " then next children
|
|
# see "| " (continuing) or " " (last child)
|
|
child_prefix = prefix + ("| " if connector == "+-- " else " ")
|
|
else:
|
|
child_prefix = prefix
|
|
child_connector = "+-- " if is_last_child else "+-- "
|
|
# Use a different lead for non-last: the vertical bar continues
|
|
child_connector = "`-- " if is_last_child else "+-- "
|
|
child_lines = ast_to_text(child, child_prefix, child_connector)
|
|
lines.append(child_lines)
|
|
|
|
return '\n'.join(lines)
|
|
|
|
|
|
def _get_children(node):
|
|
match node:
|
|
case NumberNode():
|
|
return []
|
|
case UnaryOpNode(operand=child):
|
|
return [child]
|
|
case BinOpNode(left=left, right=right):
|
|
return [left, right]
|
|
return []
|
|
|
|
|
|
def _format_number(v):
|
|
if isinstance(v, float) and v == int(v):
|
|
return str(int(v))
|
|
return str(v)
|