diff --git a/python/expression-evaluator/CLAUDE.md b/python/expression-evaluator/CLAUDE.md new file mode 100644 index 0000000..5315196 --- /dev/null +++ b/python/expression-evaluator/CLAUDE.md @@ -0,0 +1,42 @@ +# Expression Evaluator + +## Overview +Educational project teaching DAGs and state machines through a calculator. +Pure Python, no external dependencies. + +## Running +```bash +python main.py "3 + 4 * 2" # single expression +python main.py # REPL mode +python main.py --show-tokens --show-ast --trace "expr" # show internals +python main.py --dot "3+4*2" | dot -Tpng -o ast.png # AST diagram +python main.py --dot-fsm | dot -Tpng -o fsm.png # FSM diagram +``` + +## Testing +```bash +python -m pytest tests/ -v +``` + +## Architecture +- `tokenizer.py` -- Explicit finite state machine (Mealy machine) tokenizer +- `parser.py` -- Recursive descent parser building an AST (DAG) +- `evaluator.py` -- Post-order tree walker (topological sort evaluation) +- `visualize.py` -- Graphviz dot generation for AST and FSM diagrams +- `main.py` -- CLI entry point with argparse, REPL mode + +## Key Design Decisions +- State machine uses an explicit transition table (dict), not implicit if/else +- Unary minus resolved by examining previous token context +- Power operator (`^`) is right-associative (grammar uses right-recursion) +- AST nodes are dataclasses; evaluation uses structural pattern matching +- Graphviz output is raw dot strings (no graphviz Python package needed) + +## Grammar +``` +expression ::= term ((PLUS | MINUS) term)* +term ::= unary ((MULTIPLY | DIVIDE) unary)* +unary ::= UNARY_MINUS unary | power +power ::= atom (POWER power)? +atom ::= NUMBER | LPAREN expression RPAREN +``` diff --git a/python/expression-evaluator/README.md b/python/expression-evaluator/README.md new file mode 100644 index 0000000..698a2ac --- /dev/null +++ b/python/expression-evaluator/README.md @@ -0,0 +1,87 @@ +# Expression Evaluator -- DAGs & State Machines Tutorial + +A calculator that teaches two fundamental CS patterns by building them from scratch: + +1. **Finite State Machine** -- the tokenizer processes input character-by-character using an explicit transition table +2. **Directed Acyclic Graph (DAG)** -- the parser builds an expression tree, evaluated bottom-up in topological order + +## What You'll Learn + +| File | CS Concept | What it does | +|------|-----------|-------------| +| `tokenizer.py` | **State Machine** (Mealy machine) | Converts `"3 + 4 * 2"` into tokens using a transition table | +| `parser.py` | **DAG construction** | Builds an expression tree with operator precedence | +| `evaluator.py` | **Topological evaluation** | Walks the tree bottom-up (leaves before parents) | +| `visualize.py` | **Visualization** | Generates graphviz diagrams of both the FSM and AST | + +## Quick Start + +```bash +# Evaluate an expression +python main.py "3 + 4 * 2" +# => 11 + +# Interactive REPL +python main.py + +# See how the state machine tokenizes +python main.py --show-tokens "(2 + 3) * -4" + +# See the expression tree (DAG) +python main.py --show-ast "(2 + 3) * 4" +# * +# +-- + +# | +-- 2 +# | `-- 3 +# `-- 4 + +# Watch evaluation in topological order +python main.py --trace "(2 + 3) * 4" +# Step 1: 2 => 2 +# Step 2: 3 => 3 +# Step 3: 2 + 3 => 5 +# Step 4: 4 => 4 +# Step 5: 5 * 4 => 20 + +# Generate graphviz diagrams +python main.py --dot "(2 + 3) * 4" | dot -Tpng -o ast.png +python main.py --dot-fsm | dot -Tpng -o fsm.png +``` + +## Features + +- Arithmetic: `+`, `-`, `*`, `/`, `^` (power) +- Parentheses: `(2 + 3) * 4` +- Unary minus: `-3`, `-(2 + 1)`, `2 * -3` +- Decimals: `3.14`, `.5` +- Standard precedence: parens > `^` > `*`/`/` > `+`/`-` +- Right-associative power: `2^3^4` = `2^(3^4)` +- Correct unary minus: `-3^2` = `-(3^2)` = `-9` + +## Running Tests + +```bash +python -m pytest tests/ -v +``` + +## How the State Machine Works + +The tokenizer in `tokenizer.py` uses an **explicit transition table** -- a dictionary mapping `(current_state, character_class)` to `(next_state, action)`. This is the same pattern used in network protocol parsers, regex engines, and compiler lexers. + +The three states are: +- `START` -- between tokens, dispatching based on the next character +- `INTEGER` -- accumulating digits (e.g., `"12"` so far) +- `DECIMAL` -- accumulating digits after a decimal point (e.g., `"12.3"`) + +Use `--dot-fsm` to generate a visual diagram of the state machine. + +## How the DAG Works + +The parser in `parser.py` builds an **expression tree** (AST) where: +- **Leaf nodes** are numbers (no dependencies) +- **Interior nodes** are operators with edges to their operands +- **Edges** represent "depends on" relationships + +Evaluation in `evaluator.py` walks this tree **bottom-up** -- children before parents. This is exactly a **topological sort** of the DAG: you can only compute a node after all its dependencies are resolved. + +Use `--show-ast` to see the tree structure, or `--dot` to generate a graphviz diagram. diff --git a/python/expression-evaluator/evaluator.py b/python/expression-evaluator/evaluator.py new file mode 100644 index 0000000..177ffd4 --- /dev/null +++ b/python/expression-evaluator/evaluator.py @@ -0,0 +1,147 @@ +""" +Part 3: DAG Evaluation -- Tree Walker +======================================= +Evaluating the AST bottom-up is equivalent to topological-sort +evaluation of a DAG. We must evaluate a node's children before +the node itself -- just like in any dependency graph. + +For a tree, post-order traversal gives a topological ordering. +The recursive evaluate() function naturally does this: + 1. Recursively evaluate all children (dependencies) + 2. Combine the results (compute this node's value) + 3. Return the result (make it available to the parent) + +This is the same pattern as: + - make: build dependencies before the target + - pip/npm install: install dependencies before the package + - Spreadsheet recalculation: compute referenced cells first +""" + +from parser import NumberNode, BinOpNode, UnaryOpNode, Node +from tokenizer import TokenType + + +# ---------- Errors ---------- + +class EvalError(Exception): + pass + + +# ---------- Evaluator ---------- + +OP_SYMBOLS = { + TokenType.PLUS: '+', + TokenType.MINUS: '-', + TokenType.MULTIPLY: '*', + TokenType.DIVIDE: '/', + TokenType.POWER: '^', + TokenType.UNARY_MINUS: 'neg', +} + + +def evaluate(node): + """ + Evaluate an AST by walking it bottom-up (post-order traversal). + + This is a recursive function that mirrors the DAG structure: + each recursive call follows a DAG edge to a child node. + Children are evaluated before parents -- topological order. + """ + match node: + case NumberNode(value=v): + return v + + case UnaryOpNode(op=TokenType.UNARY_MINUS, operand=child): + return -evaluate(child) + + case BinOpNode(op=op, left=left, right=right): + left_val = evaluate(left) + right_val = evaluate(right) + match op: + case TokenType.PLUS: + return left_val + right_val + case TokenType.MINUS: + return left_val - right_val + case TokenType.MULTIPLY: + return left_val * right_val + case TokenType.DIVIDE: + if right_val == 0: + raise EvalError("division by zero") + return left_val / right_val + case TokenType.POWER: + return left_val ** right_val + + raise EvalError(f"unknown node type: {type(node)}") + + +def evaluate_traced(node): + """ + Like evaluate(), but records each step for educational display. + Returns (result, list_of_trace_lines). + + The trace shows the topological evaluation order -- how the DAG + is evaluated from leaves to root. Each step shows a node being + evaluated after all its dependencies are resolved. + """ + steps = [] + counter = [0] # mutable counter for step numbering + + def _walk(node, depth): + indent = " " * depth + counter[0] += 1 + step = counter[0] + + match node: + case NumberNode(value=v): + result = v + display = _format_number(v) + steps.append(f"{indent}Step {step}: {display} => {_format_number(result)}") + return result + + case UnaryOpNode(op=TokenType.UNARY_MINUS, operand=child): + child_val = _walk(child, depth + 1) + result = -child_val + counter[0] += 1 + step = counter[0] + steps.append( + f"{indent}Step {step}: neg({_format_number(child_val)}) " + f"=> {_format_number(result)}" + ) + return result + + case BinOpNode(op=op, left=left, right=right): + left_val = _walk(left, depth + 1) + right_val = _walk(right, depth + 1) + sym = OP_SYMBOLS[op] + match op: + case TokenType.PLUS: + result = left_val + right_val + case TokenType.MINUS: + result = left_val - right_val + case TokenType.MULTIPLY: + result = left_val * right_val + case TokenType.DIVIDE: + if right_val == 0: + raise EvalError("division by zero") + result = left_val / right_val + case TokenType.POWER: + result = left_val ** right_val + counter[0] += 1 + step = counter[0] + steps.append( + f"{indent}Step {step}: {_format_number(left_val)} {sym} " + f"{_format_number(right_val)} => {_format_number(result)}" + ) + return result + + raise EvalError(f"unknown node type: {type(node)}") + + result = _walk(node, 0) + return result, steps + + +def _format_number(v): + """Display a number as integer when possible.""" + if isinstance(v, float) and v == int(v): + return str(int(v)) + return str(v) diff --git a/python/expression-evaluator/main.py b/python/expression-evaluator/main.py new file mode 100644 index 0000000..1eb844c --- /dev/null +++ b/python/expression-evaluator/main.py @@ -0,0 +1,163 @@ +""" +Expression Evaluator -- Learn DAGs & State Machines +==================================================== +CLI entry point and interactive REPL. + +Usage: + python main.py "3 + 4 * 2" # evaluate + python main.py # REPL mode + python main.py --show-tokens --show-ast --trace "expr" # show internals + python main.py --dot "3 + 4 * 2" | dot -Tpng -o ast.png + python main.py --dot-fsm | dot -Tpng -o fsm.png +""" + +import argparse +import sys + +from tokenizer import tokenize, TokenError +from parser import Parser, ParseError +from evaluator import evaluate, evaluate_traced, EvalError +from visualize import ast_to_dot, fsm_to_dot, ast_to_text + + +def process_expression(expr, args): + """Tokenize, parse, and evaluate a single expression.""" + try: + tokens = tokenize(expr) + except TokenError as e: + _print_error(expr, e) + return + + if args.show_tokens: + print("\nTokens:") + for tok in tokens: + print(f" {tok}") + + try: + ast = Parser(tokens).parse() + except ParseError as e: + _print_error(expr, e) + return + + if args.show_ast: + print("\nAST (text tree):") + print(ast_to_text(ast)) + + if args.dot: + print(ast_to_dot(ast)) + return # dot output goes to stdout, skip numeric result + + if args.trace: + try: + result, steps = evaluate_traced(ast) + except EvalError as e: + print(f"Eval error: {e}") + return + print("\nEvaluation trace (topological order):") + for step in steps: + print(step) + print(f"\nResult: {_format_result(result)}") + else: + try: + result = evaluate(ast) + except EvalError as e: + print(f"Eval error: {e}") + return + print(_format_result(result)) + + +def repl(args): + """Interactive read-eval-print loop.""" + print("Expression Evaluator REPL") + print("Type an expression, or 'quit' to exit.") + flags = [] + if args.show_tokens: + flags.append("--show-tokens") + if args.show_ast: + flags.append("--show-ast") + if args.trace: + flags.append("--trace") + if flags: + print(f"Active flags: {' '.join(flags)}") + print() + + while True: + try: + line = input(">>> ").strip() + except (EOFError, KeyboardInterrupt): + print() + break + if line.lower() in ("quit", "exit", "q"): + break + if not line: + continue + process_expression(line, args) + print() + + +def _print_error(expr, error): + """Print an error with a caret pointing to the position.""" + print(f"Error: {error}") + if hasattr(error, 'position') and error.position is not None: + print(f" {expr}") + print(f" {' ' * error.position}^") + + +def _format_result(v): + """Format a numeric result: show as int when possible.""" + if isinstance(v, float) and v == int(v) and abs(v) < 1e15: + return str(int(v)) + return str(v) + + +def main(): + arg_parser = argparse.ArgumentParser( + description="Expression Evaluator -- learn DAGs and state machines", + epilog="Examples:\n" + " python main.py '3 + 4 * 2'\n" + " python main.py --show-tokens --trace '-(3 + 4) ^ 2'\n" + " python main.py --dot '(2+3)*4' | dot -Tpng -o ast.png\n" + " python main.py --dot-fsm | dot -Tpng -o fsm.png", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + arg_parser.add_argument( + "expression", nargs="?", + help="Expression to evaluate (omit for REPL mode)", + ) + arg_parser.add_argument( + "--show-tokens", action="store_true", + help="Display tokenizer output", + ) + arg_parser.add_argument( + "--show-ast", action="store_true", + help="Display AST as indented text tree", + ) + arg_parser.add_argument( + "--trace", action="store_true", + help="Show step-by-step evaluation trace", + ) + arg_parser.add_argument( + "--dot", action="store_true", + help="Output AST as graphviz dot (pipe to: dot -Tpng -o ast.png)", + ) + arg_parser.add_argument( + "--dot-fsm", action="store_true", + help="Output tokenizer FSM as graphviz dot", + ) + + args = arg_parser.parse_args() + + # Special mode: just print the FSM diagram and exit + if args.dot_fsm: + print(fsm_to_dot()) + return + + # REPL mode if no expression given + if args.expression is None: + repl(args) + else: + process_expression(args.expression, args) + + +if __name__ == "__main__": + main() diff --git a/python/expression-evaluator/parser.py b/python/expression-evaluator/parser.py new file mode 100644 index 0000000..fb7feef --- /dev/null +++ b/python/expression-evaluator/parser.py @@ -0,0 +1,217 @@ +""" +Part 2: DAG Construction -- Recursive Descent Parser +===================================================== +A parser converts a flat list of tokens into a tree structure (AST). +The AST is a DAG (Directed Acyclic Graph) where: + + - Nodes are operations (BinOpNode) or values (NumberNode) + - Edges point from parent operations to their operands + - The graph is acyclic because an operation's inputs are always + "simpler" sub-expressions (no circular dependencies) + - It is a tree (a special case of DAG) because no node is shared + +This is the same structure as: + - Spreadsheet dependency graphs (cell A1 depends on B1, B2...) + - Build systems (Makefile targets depend on other targets) + - Task scheduling (some tasks must finish before others start) + - Neural network computation graphs (forward pass is a DAG) + +Key DAG concepts demonstrated: + - Nodes: operations and values + - Directed edges: from operation to its inputs (dependencies) + - Acyclic: no circular dependencies + - Topological ordering: natural evaluation order (leaves first) + +Grammar (BNF) -- precedence is encoded by nesting depth: + expression ::= term ((PLUS | MINUS) term)* # lowest precedence + term ::= unary ((MULTIPLY | DIVIDE) unary)* + unary ::= UNARY_MINUS unary | power + power ::= atom (POWER power)? # right-associative + atom ::= NUMBER | LPAREN expression RPAREN # highest precedence + +Call chain: expression -> term -> unary -> power -> atom +This means: +/- binds loosest, then *//, then unary -, then ^, then parens. +So -3^2 = -(3^2) = -9, matching standard math convention. +""" + +from dataclasses import dataclass + +from tokenizer import Token, TokenType + + +# ---------- AST node types ---------- +# These are the nodes of our DAG. Each node is either a leaf (NumberNode) +# or an interior node with edges pointing to its children (operands). + +@dataclass +class NumberNode: + """Leaf node: a numeric literal. In DAG terms, a node with no outgoing edges.""" + value: float + + def __repr__(self): + if self.value == int(self.value): + return f"NumberNode({int(self.value)})" + return f"NumberNode({self.value})" + + +@dataclass +class BinOpNode: + """ + Interior node: a binary operation with two children. + + DAG edges: this node -> left, this node -> right + The edges represent "depends on": to compute this node's value, + we must first compute left and right. + """ + op: TokenType + left: 'NumberNode | BinOpNode | UnaryOpNode' + right: 'NumberNode | BinOpNode | UnaryOpNode' + + def __repr__(self): + return f"BinOpNode({self.op.name}, {self.left}, {self.right})" + + +@dataclass +class UnaryOpNode: + """Interior node: a unary operation (negation) with one child.""" + op: TokenType + operand: 'NumberNode | BinOpNode | UnaryOpNode' + + def __repr__(self): + return f"UnaryOpNode({self.op.name}, {self.operand})" + + +# Union type for any AST node +Node = NumberNode | BinOpNode | UnaryOpNode + + +# ---------- Errors ---------- + +class ParseError(Exception): + def __init__(self, message, position=None): + self.position = position + pos_info = f" at position {position}" if position is not None else "" + super().__init__(f"Parse error{pos_info}: {message}") + + +# ---------- Recursive descent parser ---------- + +class Parser: + """ + Converts a list of tokens into an AST (expression tree / DAG). + + Each grammar rule becomes a method. The call tree mirrors the shape + of the AST being built. When a deeper method returns a node, it + becomes a child of the node built by the caller -- this is how + the DAG edges form. + + Precedence is encoded by nesting: lower-precedence operators are + parsed at higher (outer) levels, so they become closer to the root + of the tree and are evaluated last. + """ + + def __init__(self, tokens): + self.tokens = tokens + self.pos = 0 + + def peek(self): + """Look at the current token without consuming it.""" + return self.tokens[self.pos] + + def consume(self, expected=None): + """Consume and return the current token, optionally asserting its type.""" + token = self.tokens[self.pos] + if expected is not None and token.type != expected: + raise ParseError( + f"expected {expected.name}, got {token.type.name}", + token.position, + ) + self.pos += 1 + return token + + def parse(self): + """Entry point: parse the full expression and verify we consumed everything.""" + if self.peek().type == TokenType.EOF: + raise ParseError("empty expression") + node = self.expression() + self.consume(TokenType.EOF) + return node + + # --- Grammar rules --- + # Each method corresponds to one production in the grammar. + # The nesting encodes operator precedence. + + def expression(self): + """expression ::= term ((PLUS | MINUS) term)*""" + node = self.term() + while self.peek().type in (TokenType.PLUS, TokenType.MINUS): + op_token = self.consume() + right = self.term() + # Build a new BinOpNode -- this creates a DAG edge from + # the new node to both 'node' (left) and 'right' + node = BinOpNode(op_token.type, node, right) + return node + + def term(self): + """term ::= unary ((MULTIPLY | DIVIDE) unary)*""" + node = self.unary() + while self.peek().type in (TokenType.MULTIPLY, TokenType.DIVIDE): + op_token = self.consume() + right = self.unary() + node = BinOpNode(op_token.type, node, right) + return node + + def unary(self): + """ + unary ::= UNARY_MINUS unary | power + + Unary minus is parsed here, between term and power, so it binds + looser than ^ but tighter than * and /. This gives the standard + math behavior: -3^2 = -(3^2) = -9. + + The recursion (unary calls itself) handles double negation: --3 = 3. + """ + if self.peek().type == TokenType.UNARY_MINUS: + op_token = self.consume() + operand = self.unary() + return UnaryOpNode(op_token.type, operand) + return self.power() + + def power(self): + """ + power ::= atom (POWER power)? + + Right-recursive for right-associativity: 2^3^4 = 2^(3^4) = 2^81. + Compare with term() which uses a while loop for LEFT-associativity. + """ + node = self.atom() + if self.peek().type == TokenType.POWER: + op_token = self.consume() + right = self.power() # recurse (not loop) for right-associativity + node = BinOpNode(op_token.type, node, right) + return node + + def atom(self): + """ + atom ::= NUMBER | LPAREN expression RPAREN + + The base case: either a literal number or a parenthesized + sub-expression. Parentheses work by recursing back to + expression(), which restarts precedence parsing from the top. + """ + token = self.peek() + + if token.type == TokenType.NUMBER: + self.consume() + return NumberNode(float(token.value)) + + if token.type == TokenType.LPAREN: + self.consume() + node = self.expression() + self.consume(TokenType.RPAREN) + return node + + raise ParseError( + f"expected number or '(', got {token.type.name}", + token.position, + ) diff --git a/python/expression-evaluator/tests/__init__.py b/python/expression-evaluator/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/expression-evaluator/tests/test_evaluator.py b/python/expression-evaluator/tests/test_evaluator.py new file mode 100644 index 0000000..d34037a --- /dev/null +++ b/python/expression-evaluator/tests/test_evaluator.py @@ -0,0 +1,120 @@ +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +import pytest +from tokenizer import tokenize +from parser import Parser +from evaluator import evaluate, evaluate_traced, EvalError + + +def eval_expr(expr): + """Helper: tokenize -> parse -> evaluate in one step.""" + tokens = tokenize(expr) + ast = Parser(tokens).parse() + return evaluate(ast) + + +# ---------- Basic arithmetic ---------- + +def test_addition(): + assert eval_expr("3 + 4") == 7.0 + +def test_subtraction(): + assert eval_expr("10 - 3") == 7.0 + +def test_multiplication(): + assert eval_expr("3 * 4") == 12.0 + +def test_division(): + assert eval_expr("10 / 4") == 2.5 + +def test_power(): + assert eval_expr("2 ^ 10") == 1024.0 + + +# ---------- Precedence ---------- + +def test_standard_precedence(): + assert eval_expr("3 + 4 * 2") == 11.0 + +def test_parentheses(): + assert eval_expr("(3 + 4) * 2") == 14.0 + +def test_power_precedence(): + assert eval_expr("2 * 3 ^ 2") == 18.0 + +def test_right_associative_power(): + # 2^(2^3) = 2^8 = 256 + assert eval_expr("2 ^ 2 ^ 3") == 256.0 + + +# ---------- Unary minus ---------- + +def test_negation(): + assert eval_expr("-5") == -5.0 + +def test_double_negation(): + assert eval_expr("--5") == 5.0 + +def test_negation_with_power(): + # -(3^2) = -9, not (-3)^2 = 9 + assert eval_expr("-3 ^ 2") == -9.0 + +def test_negation_in_parens(): + assert eval_expr("(-3) ^ 2") == 9.0 + + +# ---------- Decimals ---------- + +def test_decimal_addition(): + assert eval_expr("0.1 + 0.2") == pytest.approx(0.3) + +def test_leading_dot(): + assert eval_expr(".5 + .5") == 1.0 + + +# ---------- Edge cases ---------- + +def test_nested_parens(): + assert eval_expr("((((3))))") == 3.0 + +def test_complex_expression(): + assert eval_expr("(2 + 3) * (7 - 2) / 5 ^ 1") == 5.0 + +def test_long_chain(): + assert eval_expr("1 + 2 + 3 + 4 + 5") == 15.0 + +def test_mixed_operations(): + assert eval_expr("2 + 3 * 4 - 6 / 2") == 11.0 + + +# ---------- Division by zero ---------- + +def test_division_by_zero(): + with pytest.raises(EvalError): + eval_expr("1 / 0") + +def test_division_by_zero_in_expression(): + with pytest.raises(EvalError): + eval_expr("5 + 3 / (2 - 2)") + + +# ---------- Traced evaluation ---------- + +def test_traced_returns_correct_result(): + tokens = tokenize("3 + 4 * 2") + ast = Parser(tokens).parse() + result, steps = evaluate_traced(ast) + assert result == 11.0 + assert len(steps) > 0 + +def test_traced_step_count(): + """A simple binary op has 3 evaluation events: left, right, combine.""" + tokens = tokenize("3 + 4") + ast = Parser(tokens).parse() + result, steps = evaluate_traced(ast) + assert result == 7.0 + # NumberNode(3), NumberNode(4), BinOp(+) + assert len(steps) == 3 diff --git a/python/expression-evaluator/tests/test_parser.py b/python/expression-evaluator/tests/test_parser.py new file mode 100644 index 0000000..6d5ac36 --- /dev/null +++ b/python/expression-evaluator/tests/test_parser.py @@ -0,0 +1,136 @@ +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +import pytest +from tokenizer import tokenize, TokenType +from parser import Parser, ParseError, NumberNode, BinOpNode, UnaryOpNode + + +def parse(expr): + """Helper: tokenize and parse in one step.""" + return Parser(tokenize(expr)).parse() + + +# ---------- Basic parsing ---------- + +def test_parse_number(): + ast = parse("42") + assert isinstance(ast, NumberNode) + assert ast.value == 42.0 + +def test_parse_decimal(): + ast = parse("3.14") + assert isinstance(ast, NumberNode) + assert ast.value == 3.14 + +def test_parse_addition(): + ast = parse("3 + 4") + assert isinstance(ast, BinOpNode) + assert ast.op == TokenType.PLUS + assert isinstance(ast.left, NumberNode) + assert isinstance(ast.right, NumberNode) + + +# ---------- Precedence ---------- + +def test_multiply_before_add(): + """3 + 4 * 2 should parse as 3 + (4 * 2).""" + ast = parse("3 + 4 * 2") + assert ast.op == TokenType.PLUS + assert isinstance(ast.right, BinOpNode) + assert ast.right.op == TokenType.MULTIPLY + +def test_power_before_multiply(): + """2 * 3 ^ 4 should parse as 2 * (3 ^ 4).""" + ast = parse("2 * 3 ^ 4") + assert ast.op == TokenType.MULTIPLY + assert isinstance(ast.right, BinOpNode) + assert ast.right.op == TokenType.POWER + +def test_parentheses_override_precedence(): + """(3 + 4) * 2 should parse as (3 + 4) * 2.""" + ast = parse("(3 + 4) * 2") + assert ast.op == TokenType.MULTIPLY + assert isinstance(ast.left, BinOpNode) + assert ast.left.op == TokenType.PLUS + + +# ---------- Associativity ---------- + +def test_left_associative_subtraction(): + """10 - 3 - 2 should parse as (10 - 3) - 2.""" + ast = parse("10 - 3 - 2") + assert ast.op == TokenType.MINUS + assert isinstance(ast.left, BinOpNode) + assert ast.left.op == TokenType.MINUS + assert isinstance(ast.right, NumberNode) + +def test_power_right_associative(): + """2 ^ 3 ^ 4 should parse as 2 ^ (3 ^ 4).""" + ast = parse("2 ^ 3 ^ 4") + assert ast.op == TokenType.POWER + assert isinstance(ast.left, NumberNode) + assert isinstance(ast.right, BinOpNode) + assert ast.right.op == TokenType.POWER + + +# ---------- Unary minus ---------- + +def test_unary_minus(): + ast = parse("-3") + assert isinstance(ast, UnaryOpNode) + assert ast.operand.value == 3.0 + +def test_double_negation(): + ast = parse("--3") + assert isinstance(ast, UnaryOpNode) + assert isinstance(ast.operand, UnaryOpNode) + assert ast.operand.operand.value == 3.0 + +def test_unary_minus_precedence(): + """-3^2 should parse as -(3^2), not (-3)^2.""" + ast = parse("-3 ^ 2") + assert isinstance(ast, UnaryOpNode) + assert isinstance(ast.operand, BinOpNode) + assert ast.operand.op == TokenType.POWER + +def test_unary_minus_in_expression(): + """2 * -3 should parse as 2 * (-(3)).""" + ast = parse("2 * -3") + assert ast.op == TokenType.MULTIPLY + assert isinstance(ast.right, UnaryOpNode) + + +# ---------- Nested parentheses ---------- + +def test_nested_parens(): + ast = parse("((3))") + assert isinstance(ast, NumberNode) + assert ast.value == 3.0 + +def test_complex_nesting(): + """((2 + 3) * (7 - 2))""" + ast = parse("((2 + 3) * (7 - 2))") + assert isinstance(ast, BinOpNode) + assert ast.op == TokenType.MULTIPLY + + +# ---------- Errors ---------- + +def test_missing_rparen(): + with pytest.raises(ParseError): + parse("(3 + 4") + +def test_empty_expression(): + with pytest.raises(ParseError): + parse("") + +def test_trailing_operator(): + with pytest.raises(ParseError): + parse("3 +") + +def test_empty_parens(): + with pytest.raises(ParseError): + parse("()") diff --git a/python/expression-evaluator/tests/test_tokenizer.py b/python/expression-evaluator/tests/test_tokenizer.py new file mode 100644 index 0000000..a312749 --- /dev/null +++ b/python/expression-evaluator/tests/test_tokenizer.py @@ -0,0 +1,139 @@ +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +import pytest +from tokenizer import tokenize, TokenType, Token, TokenError + + +# ---------- Basic tokens ---------- + +def test_single_integer(): + tokens = tokenize("42") + assert tokens[0].type == TokenType.NUMBER + assert tokens[0].value == "42" + +def test_decimal_number(): + tokens = tokenize("3.14") + assert tokens[0].type == TokenType.NUMBER + assert tokens[0].value == "3.14" + +def test_leading_dot(): + tokens = tokenize(".5") + assert tokens[0].type == TokenType.NUMBER + assert tokens[0].value == ".5" + +def test_all_operators(): + """Operators between numbers are all binary.""" + tokens = tokenize("1 + 1 - 1 * 1 / 1 ^ 1") + ops = [t.type for t in tokens if t.type not in (TokenType.NUMBER, TokenType.EOF)] + assert ops == [ + TokenType.PLUS, TokenType.MINUS, TokenType.MULTIPLY, + TokenType.DIVIDE, TokenType.POWER, + ] + +def test_operators_between_numbers(): + tokens = tokenize("1 + 2 - 3 * 4 / 5 ^ 6") + ops = [t.type for t in tokens if t.type not in (TokenType.NUMBER, TokenType.EOF)] + assert ops == [ + TokenType.PLUS, TokenType.MINUS, TokenType.MULTIPLY, + TokenType.DIVIDE, TokenType.POWER, + ] + +def test_parentheses(): + tokens = tokenize("()") + assert tokens[0].type == TokenType.LPAREN + assert tokens[1].type == TokenType.RPAREN + + +# ---------- Unary minus ---------- + +def test_unary_minus_at_start(): + tokens = tokenize("-3") + assert tokens[0].type == TokenType.UNARY_MINUS + assert tokens[1].type == TokenType.NUMBER + +def test_unary_minus_after_lparen(): + tokens = tokenize("(-3)") + assert tokens[1].type == TokenType.UNARY_MINUS + +def test_unary_minus_after_operator(): + tokens = tokenize("2 * -3") + assert tokens[2].type == TokenType.UNARY_MINUS + +def test_binary_minus(): + tokens = tokenize("5 - 3") + assert tokens[1].type == TokenType.MINUS + +def test_double_unary_minus(): + tokens = tokenize("--3") + assert tokens[0].type == TokenType.UNARY_MINUS + assert tokens[1].type == TokenType.UNARY_MINUS + assert tokens[2].type == TokenType.NUMBER + + +# ---------- Whitespace handling ---------- + +def test_no_spaces(): + tokens = tokenize("3+4") + non_eof = [t for t in tokens if t.type != TokenType.EOF] + assert len(non_eof) == 3 + +def test_extra_spaces(): + tokens = tokenize(" 3 + 4 ") + non_eof = [t for t in tokens if t.type != TokenType.EOF] + assert len(non_eof) == 3 + + +# ---------- Position tracking ---------- + +def test_positions(): + tokens = tokenize("3 + 4") + assert tokens[0].position == 0 # '3' + assert tokens[1].position == 2 # '+' + assert tokens[2].position == 4 # '4' + + +# ---------- Errors ---------- + +def test_invalid_character(): + with pytest.raises(TokenError): + tokenize("3 & 4") + +def test_double_dot(): + with pytest.raises(TokenError): + tokenize("3.14.15") + + +# ---------- EOF token ---------- + +def test_eof_always_present(): + tokens = tokenize("42") + assert tokens[-1].type == TokenType.EOF + +def test_empty_input(): + tokens = tokenize("") + assert len(tokens) == 1 + assert tokens[0].type == TokenType.EOF + + +# ---------- Complex expressions ---------- + +def test_complex_expression(): + tokens = tokenize("(3 + 4.5) * -2 ^ 3") + types = [t.type for t in tokens if t.type != TokenType.EOF] + assert types == [ + TokenType.LPAREN, TokenType.NUMBER, TokenType.PLUS, + TokenType.NUMBER, TokenType.RPAREN, TokenType.MULTIPLY, + TokenType.UNARY_MINUS, TokenType.NUMBER, TokenType.POWER, + TokenType.NUMBER, + ] + +def test_adjacent_parens(): + tokens = tokenize("(3)(4)") + types = [t.type for t in tokens if t.type != TokenType.EOF] + assert types == [ + TokenType.LPAREN, TokenType.NUMBER, TokenType.RPAREN, + TokenType.LPAREN, TokenType.NUMBER, TokenType.RPAREN, + ] diff --git a/python/expression-evaluator/tokenizer.py b/python/expression-evaluator/tokenizer.py new file mode 100644 index 0000000..e39c755 --- /dev/null +++ b/python/expression-evaluator/tokenizer.py @@ -0,0 +1,306 @@ +""" +Part 1: State Machine Tokenizer +================================ +A tokenizer (lexer) converts raw text into a stream of tokens. +This implementation uses an EXPLICIT finite state machine (FSM): + + - States are named values (an enum), not implicit control flow + - A transition table maps (current_state, input_class) -> (next_state, action) + - The main loop reads one character at a time and consults the table + +This is the same pattern used in: + - Network protocol parsers (HTTP, TCP state machines) + - Regular expression engines + - Compiler front-ends (lexers for C, Python, etc.) + - Game AI (enemy behavior states) + +Key FSM concepts demonstrated: + - States: the "memory" of what we're currently building + - Transitions: rules for moving between states based on input + - Actions: side effects (emit a token, accumulate a character) + - Mealy machine: outputs depend on both state AND input +""" + +from dataclasses import dataclass +from enum import Enum + + +# ---------- Token types ---------- + +class TokenType(Enum): + NUMBER = "NUMBER" + PLUS = "PLUS" + MINUS = "MINUS" + MULTIPLY = "MULTIPLY" + DIVIDE = "DIVIDE" + POWER = "POWER" + LPAREN = "LPAREN" + RPAREN = "RPAREN" + UNARY_MINUS = "UNARY_MINUS" + EOF = "EOF" + + +@dataclass +class Token: + type: TokenType + value: str # raw text: "42", "+", "(", etc. + position: int # character offset in original expression + + def __repr__(self): + return f"Token({self.type.name}, {self.value!r}, pos={self.position})" + + +OPERATOR_MAP = { + '+': TokenType.PLUS, + '-': TokenType.MINUS, + '*': TokenType.MULTIPLY, + '/': TokenType.DIVIDE, + '^': TokenType.POWER, +} + + +# ---------- FSM state definitions ---------- + +class State(Enum): + """ + The tokenizer's finite set of states. + + START -- idle / between tokens, deciding what comes next + INTEGER -- accumulating digits of an integer (e.g. "12" so far) + DECIMAL -- accumulating digits after a decimal point (e.g. "12.3" so far) + """ + START = "START" + INTEGER = "INTEGER" + DECIMAL = "DECIMAL" + + +class CharClass(Enum): + """ + Character classification -- groups raw characters into categories + so the transition table stays small and readable. + """ + DIGIT = "DIGIT" + DOT = "DOT" + OPERATOR = "OPERATOR" + LPAREN = "LPAREN" + RPAREN = "RPAREN" + SPACE = "SPACE" + EOF = "EOF" + UNKNOWN = "UNKNOWN" + + +class Action(Enum): + """ + What the FSM does on a transition. In a Mealy machine, the output + (action) depends on both the current state AND the input. + """ + ACCUMULATE = "ACCUMULATE" + EMIT_NUMBER = "EMIT_NUMBER" + EMIT_OPERATOR = "EMIT_OPERATOR" + EMIT_LPAREN = "EMIT_LPAREN" + EMIT_RPAREN = "EMIT_RPAREN" + EMIT_NUMBER_THEN_OP = "EMIT_NUMBER_THEN_OP" + EMIT_NUMBER_THEN_LPAREN = "EMIT_NUMBER_THEN_LPAREN" + EMIT_NUMBER_THEN_RPAREN = "EMIT_NUMBER_THEN_RPAREN" + EMIT_NUMBER_THEN_DONE = "EMIT_NUMBER_THEN_DONE" + SKIP = "SKIP" + DONE = "DONE" + ERROR = "ERROR" + + +@dataclass(frozen=True) +class Transition: + next_state: State + action: Action + + +# ---------- Transition table ---------- +# This is the heart of the state machine. Every (state, char_class) pair +# maps to exactly one transition: a next state and an action to perform. +# Making this a data structure (not nested if/else) means we can: +# 1. Inspect it programmatically (e.g. to generate a diagram) +# 2. Verify completeness (every combination is covered) +# 3. Understand the FSM at a glance + +TRANSITIONS = { + # --- START: between tokens, dispatch based on character class --- + (State.START, CharClass.DIGIT): Transition(State.INTEGER, Action.ACCUMULATE), + (State.START, CharClass.DOT): Transition(State.DECIMAL, Action.ACCUMULATE), + (State.START, CharClass.OPERATOR): Transition(State.START, Action.EMIT_OPERATOR), + (State.START, CharClass.LPAREN): Transition(State.START, Action.EMIT_LPAREN), + (State.START, CharClass.RPAREN): Transition(State.START, Action.EMIT_RPAREN), + (State.START, CharClass.SPACE): Transition(State.START, Action.SKIP), + (State.START, CharClass.EOF): Transition(State.START, Action.DONE), + + # --- INTEGER: accumulating digits like "123" --- + (State.INTEGER, CharClass.DIGIT): Transition(State.INTEGER, Action.ACCUMULATE), + (State.INTEGER, CharClass.DOT): Transition(State.DECIMAL, Action.ACCUMULATE), + (State.INTEGER, CharClass.OPERATOR): Transition(State.START, Action.EMIT_NUMBER_THEN_OP), + (State.INTEGER, CharClass.LPAREN): Transition(State.START, Action.EMIT_NUMBER_THEN_LPAREN), + (State.INTEGER, CharClass.RPAREN): Transition(State.START, Action.EMIT_NUMBER_THEN_RPAREN), + (State.INTEGER, CharClass.SPACE): Transition(State.START, Action.EMIT_NUMBER), + (State.INTEGER, CharClass.EOF): Transition(State.START, Action.EMIT_NUMBER_THEN_DONE), + + # --- DECIMAL: accumulating digits after "." like "123.45" --- + (State.DECIMAL, CharClass.DIGIT): Transition(State.DECIMAL, Action.ACCUMULATE), + (State.DECIMAL, CharClass.DOT): Transition(State.START, Action.ERROR), + (State.DECIMAL, CharClass.OPERATOR): Transition(State.START, Action.EMIT_NUMBER_THEN_OP), + (State.DECIMAL, CharClass.LPAREN): Transition(State.START, Action.EMIT_NUMBER_THEN_LPAREN), + (State.DECIMAL, CharClass.RPAREN): Transition(State.START, Action.EMIT_NUMBER_THEN_RPAREN), + (State.DECIMAL, CharClass.SPACE): Transition(State.START, Action.EMIT_NUMBER), + (State.DECIMAL, CharClass.EOF): Transition(State.START, Action.EMIT_NUMBER_THEN_DONE), +} + + +# ---------- Errors ---------- + +class TokenError(Exception): + def __init__(self, message, position): + self.position = position + super().__init__(f"Token error at position {position}: {message}") + + +# ---------- Character classification ---------- + +def classify(ch): + """Map a single character to its CharClass.""" + if ch.isdigit(): + return CharClass.DIGIT + if ch == '.': + return CharClass.DOT + if ch in OPERATOR_MAP: + return CharClass.OPERATOR + if ch == '(': + return CharClass.LPAREN + if ch == ')': + return CharClass.RPAREN + if ch.isspace(): + return CharClass.SPACE + return CharClass.UNKNOWN + + +# ---------- Main tokenize function ---------- + +def tokenize(expression): + """ + Process an expression string through the state machine, producing tokens. + + The main loop: + 1. Classify the current character + 2. Look up (state, char_class) in the transition table + 3. Execute the action (accumulate, emit, skip, etc.) + 4. Move to the next state + 5. Advance to the next character + + After all tokens are emitted, a post-processing step resolves + unary minus: if a MINUS token appears at the start, after an operator, + or after LPAREN, it is re-classified as UNARY_MINUS. + """ + state = State.START + buffer = [] # characters accumulated for the current token + buffer_start = 0 # position where the current buffer started + tokens = [] + pos = 0 + + # Append a sentinel so EOF is handled uniformly in the loop + chars = expression + '\0' + + while pos <= len(expression): + ch = chars[pos] + char_class = CharClass.EOF if pos == len(expression) else classify(ch) + + if char_class == CharClass.UNKNOWN: + raise TokenError(f"unexpected character {ch!r}", pos) + + # Look up the transition + key = (state, char_class) + transition = TRANSITIONS.get(key) + if transition is None: + raise TokenError(f"no transition for state={state.name}, input={char_class.name}", pos) + + action = transition.action + next_state = transition.next_state + + # --- Execute the action --- + + if action == Action.ACCUMULATE: + if not buffer: + buffer_start = pos + buffer.append(ch) + + elif action == Action.EMIT_NUMBER: + tokens.append(Token(TokenType.NUMBER, ''.join(buffer), buffer_start)) + buffer.clear() + + elif action == Action.EMIT_OPERATOR: + tokens.append(Token(OPERATOR_MAP[ch], ch, pos)) + + elif action == Action.EMIT_LPAREN: + tokens.append(Token(TokenType.LPAREN, ch, pos)) + + elif action == Action.EMIT_RPAREN: + tokens.append(Token(TokenType.RPAREN, ch, pos)) + + elif action == Action.EMIT_NUMBER_THEN_OP: + tokens.append(Token(TokenType.NUMBER, ''.join(buffer), buffer_start)) + buffer.clear() + tokens.append(Token(OPERATOR_MAP[ch], ch, pos)) + + elif action == Action.EMIT_NUMBER_THEN_LPAREN: + tokens.append(Token(TokenType.NUMBER, ''.join(buffer), buffer_start)) + buffer.clear() + tokens.append(Token(TokenType.LPAREN, ch, pos)) + + elif action == Action.EMIT_NUMBER_THEN_RPAREN: + tokens.append(Token(TokenType.NUMBER, ''.join(buffer), buffer_start)) + buffer.clear() + tokens.append(Token(TokenType.RPAREN, ch, pos)) + + elif action == Action.EMIT_NUMBER_THEN_DONE: + tokens.append(Token(TokenType.NUMBER, ''.join(buffer), buffer_start)) + buffer.clear() + + elif action == Action.SKIP: + pass + + elif action == Action.DONE: + pass + + elif action == Action.ERROR: + raise TokenError(f"unexpected {ch!r} in state {state.name}", pos) + + state = next_state + pos += 1 + + # --- Post-processing: resolve unary minus --- + # A MINUS is unary if it appears: + # - at the very start of the token stream + # - immediately after an operator (+, -, *, /, ^) or LPAREN + # This context-sensitivity cannot be captured by the FSM alone -- + # it requires looking at previously emitted tokens. + _resolve_unary_minus(tokens) + + tokens.append(Token(TokenType.EOF, '', len(expression))) + return tokens + + +def _resolve_unary_minus(tokens): + """ + Convert binary MINUS tokens to UNARY_MINUS where appropriate. + + Why this isn't in the FSM: the FSM processes characters one at a time + and only tracks what kind of token it's currently building (its state). + But whether '-' is unary or binary depends on the PREVIOUS TOKEN -- + information the FSM doesn't track. This is a common real-world pattern: + the lexer handles most work, then a lightweight post-pass adds context. + """ + unary_predecessor = { + TokenType.PLUS, TokenType.MINUS, TokenType.MULTIPLY, + TokenType.DIVIDE, TokenType.POWER, TokenType.LPAREN, + TokenType.UNARY_MINUS, + } + for i, token in enumerate(tokens): + if token.type != TokenType.MINUS: + continue + if i == 0 or tokens[i - 1].type in unary_predecessor: + tokens[i] = Token(TokenType.UNARY_MINUS, token.value, token.position) diff --git a/python/expression-evaluator/visualize.py b/python/expression-evaluator/visualize.py new file mode 100644 index 0000000..822f078 --- /dev/null +++ b/python/expression-evaluator/visualize.py @@ -0,0 +1,200 @@ +""" +Part 4: Visualization -- Graphviz Dot Output +============================================== +Generate graphviz dot-format strings for: + 1. The tokenizer's finite state machine (FSM) + 2. Any expression's AST (DAG) + 3. Text-based tree rendering for the terminal + +No external dependencies -- outputs raw dot strings that can be piped +to the 'dot' command: python main.py --dot "3+4*2" | dot -Tpng -o ast.png +""" + +from parser import NumberNode, BinOpNode, UnaryOpNode, Node +from tokenizer import TRANSITIONS, State, CharClass, Action, TokenType + + +# ---------- FSM diagram ---------- + +# Human-readable labels for character classes +_CHAR_LABELS = { + CharClass.DIGIT: "digit", + CharClass.DOT: "'.'", + CharClass.OPERATOR: "op", + CharClass.LPAREN: "'('", + CharClass.RPAREN: "')'", + CharClass.SPACE: "space", + CharClass.EOF: "EOF", +} + +# Short labels for actions +_ACTION_LABELS = { + Action.ACCUMULATE: "accum", + Action.EMIT_NUMBER: "emit num", + Action.EMIT_OPERATOR: "emit op", + Action.EMIT_LPAREN: "emit '('", + Action.EMIT_RPAREN: "emit ')'", + Action.EMIT_NUMBER_THEN_OP: "emit num+op", + Action.EMIT_NUMBER_THEN_LPAREN: "emit num+'('", + Action.EMIT_NUMBER_THEN_RPAREN: "emit num+')'", + Action.EMIT_NUMBER_THEN_DONE: "emit num, done", + Action.SKIP: "skip", + Action.DONE: "done", + Action.ERROR: "ERROR", +} + + +def fsm_to_dot(): + """ + Generate a graphviz dot diagram of the tokenizer's state machine. + + Reads the TRANSITIONS table directly -- because the FSM is data (a dict), + we can programmatically inspect and visualize it. This is a key advantage + of explicit state machines over implicit if/else control flow. + """ + lines = [ + 'digraph FSM {', + ' rankdir=LR;', + ' node [shape=circle, fontname="Helvetica"];', + ' edge [fontname="Helvetica", fontsize=10];', + '', + ' // Start indicator', + ' __start__ [shape=point, width=0.2];', + ' __start__ -> START;', + '', + ] + + # Collect edges grouped by (src, dst) to merge labels + edge_labels = {} + for (state, char_class), transition in TRANSITIONS.items(): + src = state.name + dst = transition.next_state.name + char_label = _CHAR_LABELS.get(char_class, char_class.name) + action_label = _ACTION_LABELS.get(transition.action, transition.action.name) + label = f"{char_label} / {action_label}" + edge_labels.setdefault((src, dst), []).append(label) + + # Emit edges + for (src, dst), labels in sorted(edge_labels.items()): + combined = "\\n".join(labels) + lines.append(f' {src} -> {dst} [label="{combined}"];') + + lines.append('}') + return '\n'.join(lines) + + +# ---------- AST diagram ---------- + +_OP_LABELS = { + TokenType.PLUS: '+', + TokenType.MINUS: '-', + TokenType.MULTIPLY: '*', + TokenType.DIVIDE: '/', + TokenType.POWER: '^', + TokenType.UNARY_MINUS: 'neg', +} + + +def ast_to_dot(node): + """ + Generate a graphviz dot diagram of an AST (expression tree / DAG). + + Each node gets a unique ID. Edges go from parent to children, + showing the directed acyclic structure. Leaves are boxed, + operators are ellipses. + """ + lines = [ + 'digraph AST {', + ' node [fontname="Helvetica"];', + ' edge [fontname="Helvetica"];', + '', + ] + counter = [0] + + def _visit(node): + nid = f"n{counter[0]}" + counter[0] += 1 + + match node: + case NumberNode(value=v): + label = _format_number(v) + lines.append(f' {nid} [label="{label}", shape=box, style=rounded];') + return nid + + case UnaryOpNode(op=op, operand=child): + label = _OP_LABELS.get(op, op.name) + lines.append(f' {nid} [label="{label}", shape=ellipse];') + child_id = _visit(child) + lines.append(f' {nid} -> {child_id};') + return nid + + case BinOpNode(op=op, left=left, right=right): + label = _OP_LABELS.get(op, op.name) + lines.append(f' {nid} [label="{label}", shape=ellipse];') + left_id = _visit(left) + right_id = _visit(right) + lines.append(f' {nid} -> {left_id} [label="L"];') + lines.append(f' {nid} -> {right_id} [label="R"];') + return nid + + _visit(node) + lines.append('}') + return '\n'.join(lines) + + +# ---------- Text-based tree ---------- + +def ast_to_text(node, prefix="", connector=""): + """ + Render the AST as an indented text tree for terminal display. + + Example output for (2 + 3) * 4: + * + +-- + + | +-- 2 + | +-- 3 + +-- 4 + """ + match node: + case NumberNode(value=v): + label = _format_number(v) + case UnaryOpNode(op=op): + label = _OP_LABELS.get(op, op.name) + case BinOpNode(op=op): + label = _OP_LABELS.get(op, op.name) + + lines = [f"{prefix}{connector}{label}"] + + children = _get_children(node) + for i, child in enumerate(children): + is_last_child = (i == len(children) - 1) + if connector: + # Extend the prefix: if we used "+-- " then next children + # see "| " (continuing) or " " (last child) + child_prefix = prefix + ("| " if connector == "+-- " else " ") + else: + child_prefix = prefix + child_connector = "+-- " if is_last_child else "+-- " + # Use a different lead for non-last: the vertical bar continues + child_connector = "`-- " if is_last_child else "+-- " + child_lines = ast_to_text(child, child_prefix, child_connector) + lines.append(child_lines) + + return '\n'.join(lines) + + +def _get_children(node): + match node: + case NumberNode(): + return [] + case UnaryOpNode(operand=child): + return [child] + case BinOpNode(left=left, right=right): + return [left, right] + return [] + + +def _format_number(v): + if isinstance(v, float) and v == int(v): + return str(int(v)) + return str(v)