""" Part 4: Visualization -- Graphviz Dot Output ============================================== Generate graphviz dot-format strings for: 1. The tokenizer's finite state machine (FSM) 2. Any expression's AST (DAG) 3. Text-based tree rendering for the terminal No external dependencies -- outputs raw dot strings that can be piped to the 'dot' command: python main.py --dot "3+4*2" | dot -Tpng -o ast.png """ from parser import NumberNode, BinOpNode, UnaryOpNode, Node from tokenizer import TRANSITIONS, State, CharClass, Action, TokenType # ---------- FSM diagram ---------- # Human-readable labels for character classes _CHAR_LABELS = { CharClass.DIGIT: "digit", CharClass.DOT: "'.'", CharClass.OPERATOR: "op", CharClass.LPAREN: "'('", CharClass.RPAREN: "')'", CharClass.SPACE: "space", CharClass.EOF: "EOF", } # Short labels for actions _ACTION_LABELS = { Action.ACCUMULATE: "accum", Action.EMIT_NUMBER: "emit num", Action.EMIT_OPERATOR: "emit op", Action.EMIT_LPAREN: "emit '('", Action.EMIT_RPAREN: "emit ')'", Action.EMIT_NUMBER_THEN_OP: "emit num+op", Action.EMIT_NUMBER_THEN_LPAREN: "emit num+'('", Action.EMIT_NUMBER_THEN_RPAREN: "emit num+')'", Action.EMIT_NUMBER_THEN_DONE: "emit num, done", Action.SKIP: "skip", Action.DONE: "done", Action.ERROR: "ERROR", } def fsm_to_dot(): """ Generate a graphviz dot diagram of the tokenizer's state machine. Reads the TRANSITIONS table directly -- because the FSM is data (a dict), we can programmatically inspect and visualize it. This is a key advantage of explicit state machines over implicit if/else control flow. """ lines = [ 'digraph FSM {', ' rankdir=LR;', ' node [shape=circle, fontname="Helvetica"];', ' edge [fontname="Helvetica", fontsize=10];', '', ' // Start indicator', ' __start__ [shape=point, width=0.2];', ' __start__ -> START;', '', ] # Collect edges grouped by (src, dst) to merge labels edge_labels = {} for (state, char_class), transition in TRANSITIONS.items(): src = state.name dst = transition.next_state.name char_label = _CHAR_LABELS.get(char_class, char_class.name) action_label = _ACTION_LABELS.get(transition.action, transition.action.name) label = f"{char_label} / {action_label}" edge_labels.setdefault((src, dst), []).append(label) # Emit edges for (src, dst), labels in sorted(edge_labels.items()): combined = "\\n".join(labels) lines.append(f' {src} -> {dst} [label="{combined}"];') lines.append('}') return '\n'.join(lines) # ---------- AST diagram ---------- _OP_LABELS = { TokenType.PLUS: '+', TokenType.MINUS: '-', TokenType.MULTIPLY: '*', TokenType.DIVIDE: '/', TokenType.POWER: '^', TokenType.UNARY_MINUS: 'neg', } def ast_to_dot(node): """ Generate a graphviz dot diagram of an AST (expression tree / DAG). Each node gets a unique ID. Edges go from parent to children, showing the directed acyclic structure. Leaves are boxed, operators are ellipses. """ lines = [ 'digraph AST {', ' node [fontname="Helvetica"];', ' edge [fontname="Helvetica"];', '', ] counter = [0] def _visit(node): nid = f"n{counter[0]}" counter[0] += 1 match node: case NumberNode(value=v): label = _format_number(v) lines.append(f' {nid} [label="{label}", shape=box, style=rounded];') return nid case UnaryOpNode(op=op, operand=child): label = _OP_LABELS.get(op, op.name) lines.append(f' {nid} [label="{label}", shape=ellipse];') child_id = _visit(child) lines.append(f' {nid} -> {child_id};') return nid case BinOpNode(op=op, left=left, right=right): label = _OP_LABELS.get(op, op.name) lines.append(f' {nid} [label="{label}", shape=ellipse];') left_id = _visit(left) right_id = _visit(right) lines.append(f' {nid} -> {left_id} [label="L"];') lines.append(f' {nid} -> {right_id} [label="R"];') return nid _visit(node) lines.append('}') return '\n'.join(lines) # ---------- Text-based tree ---------- def ast_to_text(node, prefix="", connector=""): """ Render the AST as an indented text tree for terminal display. Example output for (2 + 3) * 4: * +-- + | +-- 2 | +-- 3 +-- 4 """ match node: case NumberNode(value=v): label = _format_number(v) case UnaryOpNode(op=op): label = _OP_LABELS.get(op, op.name) case BinOpNode(op=op): label = _OP_LABELS.get(op, op.name) lines = [f"{prefix}{connector}{label}"] children = _get_children(node) for i, child in enumerate(children): is_last_child = (i == len(children) - 1) if connector: # Extend the prefix: if we used "+-- " then next children # see "| " (continuing) or " " (last child) child_prefix = prefix + ("| " if connector == "+-- " else " ") else: child_prefix = prefix child_connector = "+-- " if is_last_child else "+-- " # Use a different lead for non-last: the vertical bar continues child_connector = "`-- " if is_last_child else "+-- " child_lines = ast_to_text(child, child_prefix, child_connector) lines.append(child_lines) return '\n'.join(lines) def _get_children(node): match node: case NumberNode(): return [] case UnaryOpNode(operand=child): return [child] case BinOpNode(left=left, right=right): return [left, right] return [] def _format_number(v): if isinstance(v, float) and v == int(v): return str(int(v)) return str(v)