Add expression-evaluator: DAGs & state machines tutorial project
Educational calculator teaching FSMs (explicit transition table tokenizer) and DAGs (recursive descent parser with AST evaluation). Includes CLI with REPL, graphviz visualization, and 61 tests. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
217
python/expression-evaluator/parser.py
Normal file
217
python/expression-evaluator/parser.py
Normal file
@@ -0,0 +1,217 @@
|
||||
"""
|
||||
Part 2: DAG Construction -- Recursive Descent Parser
|
||||
=====================================================
|
||||
A parser converts a flat list of tokens into a tree structure (AST).
|
||||
The AST is a DAG (Directed Acyclic Graph) where:
|
||||
|
||||
- Nodes are operations (BinOpNode) or values (NumberNode)
|
||||
- Edges point from parent operations to their operands
|
||||
- The graph is acyclic because an operation's inputs are always
|
||||
"simpler" sub-expressions (no circular dependencies)
|
||||
- It is a tree (a special case of DAG) because no node is shared
|
||||
|
||||
This is the same structure as:
|
||||
- Spreadsheet dependency graphs (cell A1 depends on B1, B2...)
|
||||
- Build systems (Makefile targets depend on other targets)
|
||||
- Task scheduling (some tasks must finish before others start)
|
||||
- Neural network computation graphs (forward pass is a DAG)
|
||||
|
||||
Key DAG concepts demonstrated:
|
||||
- Nodes: operations and values
|
||||
- Directed edges: from operation to its inputs (dependencies)
|
||||
- Acyclic: no circular dependencies
|
||||
- Topological ordering: natural evaluation order (leaves first)
|
||||
|
||||
Grammar (BNF) -- precedence is encoded by nesting depth:
|
||||
expression ::= term ((PLUS | MINUS) term)* # lowest precedence
|
||||
term ::= unary ((MULTIPLY | DIVIDE) unary)*
|
||||
unary ::= UNARY_MINUS unary | power
|
||||
power ::= atom (POWER power)? # right-associative
|
||||
atom ::= NUMBER | LPAREN expression RPAREN # highest precedence
|
||||
|
||||
Call chain: expression -> term -> unary -> power -> atom
|
||||
This means: +/- binds loosest, then *//, then unary -, then ^, then parens.
|
||||
So -3^2 = -(3^2) = -9, matching standard math convention.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from tokenizer import Token, TokenType
|
||||
|
||||
|
||||
# ---------- AST node types ----------
|
||||
# These are the nodes of our DAG. Each node is either a leaf (NumberNode)
|
||||
# or an interior node with edges pointing to its children (operands).
|
||||
|
||||
@dataclass
|
||||
class NumberNode:
|
||||
"""Leaf node: a numeric literal. In DAG terms, a node with no outgoing edges."""
|
||||
value: float
|
||||
|
||||
def __repr__(self):
|
||||
if self.value == int(self.value):
|
||||
return f"NumberNode({int(self.value)})"
|
||||
return f"NumberNode({self.value})"
|
||||
|
||||
|
||||
@dataclass
|
||||
class BinOpNode:
|
||||
"""
|
||||
Interior node: a binary operation with two children.
|
||||
|
||||
DAG edges: this node -> left, this node -> right
|
||||
The edges represent "depends on": to compute this node's value,
|
||||
we must first compute left and right.
|
||||
"""
|
||||
op: TokenType
|
||||
left: 'NumberNode | BinOpNode | UnaryOpNode'
|
||||
right: 'NumberNode | BinOpNode | UnaryOpNode'
|
||||
|
||||
def __repr__(self):
|
||||
return f"BinOpNode({self.op.name}, {self.left}, {self.right})"
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnaryOpNode:
|
||||
"""Interior node: a unary operation (negation) with one child."""
|
||||
op: TokenType
|
||||
operand: 'NumberNode | BinOpNode | UnaryOpNode'
|
||||
|
||||
def __repr__(self):
|
||||
return f"UnaryOpNode({self.op.name}, {self.operand})"
|
||||
|
||||
|
||||
# Union type for any AST node
|
||||
Node = NumberNode | BinOpNode | UnaryOpNode
|
||||
|
||||
|
||||
# ---------- Errors ----------
|
||||
|
||||
class ParseError(Exception):
|
||||
def __init__(self, message, position=None):
|
||||
self.position = position
|
||||
pos_info = f" at position {position}" if position is not None else ""
|
||||
super().__init__(f"Parse error{pos_info}: {message}")
|
||||
|
||||
|
||||
# ---------- Recursive descent parser ----------
|
||||
|
||||
class Parser:
|
||||
"""
|
||||
Converts a list of tokens into an AST (expression tree / DAG).
|
||||
|
||||
Each grammar rule becomes a method. The call tree mirrors the shape
|
||||
of the AST being built. When a deeper method returns a node, it
|
||||
becomes a child of the node built by the caller -- this is how
|
||||
the DAG edges form.
|
||||
|
||||
Precedence is encoded by nesting: lower-precedence operators are
|
||||
parsed at higher (outer) levels, so they become closer to the root
|
||||
of the tree and are evaluated last.
|
||||
"""
|
||||
|
||||
def __init__(self, tokens):
|
||||
self.tokens = tokens
|
||||
self.pos = 0
|
||||
|
||||
def peek(self):
|
||||
"""Look at the current token without consuming it."""
|
||||
return self.tokens[self.pos]
|
||||
|
||||
def consume(self, expected=None):
|
||||
"""Consume and return the current token, optionally asserting its type."""
|
||||
token = self.tokens[self.pos]
|
||||
if expected is not None and token.type != expected:
|
||||
raise ParseError(
|
||||
f"expected {expected.name}, got {token.type.name}",
|
||||
token.position,
|
||||
)
|
||||
self.pos += 1
|
||||
return token
|
||||
|
||||
def parse(self):
|
||||
"""Entry point: parse the full expression and verify we consumed everything."""
|
||||
if self.peek().type == TokenType.EOF:
|
||||
raise ParseError("empty expression")
|
||||
node = self.expression()
|
||||
self.consume(TokenType.EOF)
|
||||
return node
|
||||
|
||||
# --- Grammar rules ---
|
||||
# Each method corresponds to one production in the grammar.
|
||||
# The nesting encodes operator precedence.
|
||||
|
||||
def expression(self):
|
||||
"""expression ::= term ((PLUS | MINUS) term)*"""
|
||||
node = self.term()
|
||||
while self.peek().type in (TokenType.PLUS, TokenType.MINUS):
|
||||
op_token = self.consume()
|
||||
right = self.term()
|
||||
# Build a new BinOpNode -- this creates a DAG edge from
|
||||
# the new node to both 'node' (left) and 'right'
|
||||
node = BinOpNode(op_token.type, node, right)
|
||||
return node
|
||||
|
||||
def term(self):
|
||||
"""term ::= unary ((MULTIPLY | DIVIDE) unary)*"""
|
||||
node = self.unary()
|
||||
while self.peek().type in (TokenType.MULTIPLY, TokenType.DIVIDE):
|
||||
op_token = self.consume()
|
||||
right = self.unary()
|
||||
node = BinOpNode(op_token.type, node, right)
|
||||
return node
|
||||
|
||||
def unary(self):
|
||||
"""
|
||||
unary ::= UNARY_MINUS unary | power
|
||||
|
||||
Unary minus is parsed here, between term and power, so it binds
|
||||
looser than ^ but tighter than * and /. This gives the standard
|
||||
math behavior: -3^2 = -(3^2) = -9.
|
||||
|
||||
The recursion (unary calls itself) handles double negation: --3 = 3.
|
||||
"""
|
||||
if self.peek().type == TokenType.UNARY_MINUS:
|
||||
op_token = self.consume()
|
||||
operand = self.unary()
|
||||
return UnaryOpNode(op_token.type, operand)
|
||||
return self.power()
|
||||
|
||||
def power(self):
|
||||
"""
|
||||
power ::= atom (POWER power)?
|
||||
|
||||
Right-recursive for right-associativity: 2^3^4 = 2^(3^4) = 2^81.
|
||||
Compare with term() which uses a while loop for LEFT-associativity.
|
||||
"""
|
||||
node = self.atom()
|
||||
if self.peek().type == TokenType.POWER:
|
||||
op_token = self.consume()
|
||||
right = self.power() # recurse (not loop) for right-associativity
|
||||
node = BinOpNode(op_token.type, node, right)
|
||||
return node
|
||||
|
||||
def atom(self):
|
||||
"""
|
||||
atom ::= NUMBER | LPAREN expression RPAREN
|
||||
|
||||
The base case: either a literal number or a parenthesized
|
||||
sub-expression. Parentheses work by recursing back to
|
||||
expression(), which restarts precedence parsing from the top.
|
||||
"""
|
||||
token = self.peek()
|
||||
|
||||
if token.type == TokenType.NUMBER:
|
||||
self.consume()
|
||||
return NumberNode(float(token.value))
|
||||
|
||||
if token.type == TokenType.LPAREN:
|
||||
self.consume()
|
||||
node = self.expression()
|
||||
self.consume(TokenType.RPAREN)
|
||||
return node
|
||||
|
||||
raise ParseError(
|
||||
f"expected number or '(', got {token.type.name}",
|
||||
token.position,
|
||||
)
|
||||
Reference in New Issue
Block a user