""" Part 2: DAG Construction -- Recursive Descent Parser ===================================================== A parser converts a flat list of tokens into a tree structure (AST). The AST is a DAG (Directed Acyclic Graph) where: - Nodes are operations (BinOpNode) or values (NumberNode) - Edges point from parent operations to their operands - The graph is acyclic because an operation's inputs are always "simpler" sub-expressions (no circular dependencies) - It is a tree (a special case of DAG) because no node is shared This is the same structure as: - Spreadsheet dependency graphs (cell A1 depends on B1, B2...) - Build systems (Makefile targets depend on other targets) - Task scheduling (some tasks must finish before others start) - Neural network computation graphs (forward pass is a DAG) Key DAG concepts demonstrated: - Nodes: operations and values - Directed edges: from operation to its inputs (dependencies) - Acyclic: no circular dependencies - Topological ordering: natural evaluation order (leaves first) Grammar (BNF) -- precedence is encoded by nesting depth: expression ::= term ((PLUS | MINUS) term)* # lowest precedence term ::= unary ((MULTIPLY | DIVIDE) unary)* unary ::= UNARY_MINUS unary | power power ::= atom (POWER power)? # right-associative atom ::= NUMBER | LPAREN expression RPAREN # highest precedence Call chain: expression -> term -> unary -> power -> atom This means: +/- binds loosest, then *//, then unary -, then ^, then parens. So -3^2 = -(3^2) = -9, matching standard math convention. """ from dataclasses import dataclass from tokenizer import Token, TokenType # ---------- AST node types ---------- # These are the nodes of our DAG. Each node is either a leaf (NumberNode) # or an interior node with edges pointing to its children (operands). @dataclass class NumberNode: """Leaf node: a numeric literal. In DAG terms, a node with no outgoing edges.""" value: float def __repr__(self): if self.value == int(self.value): return f"NumberNode({int(self.value)})" return f"NumberNode({self.value})" @dataclass class BinOpNode: """ Interior node: a binary operation with two children. DAG edges: this node -> left, this node -> right The edges represent "depends on": to compute this node's value, we must first compute left and right. """ op: TokenType left: 'NumberNode | BinOpNode | UnaryOpNode' right: 'NumberNode | BinOpNode | UnaryOpNode' def __repr__(self): return f"BinOpNode({self.op.name}, {self.left}, {self.right})" @dataclass class UnaryOpNode: """Interior node: a unary operation (negation) with one child.""" op: TokenType operand: 'NumberNode | BinOpNode | UnaryOpNode' def __repr__(self): return f"UnaryOpNode({self.op.name}, {self.operand})" # Union type for any AST node Node = NumberNode | BinOpNode | UnaryOpNode # ---------- Errors ---------- class ParseError(Exception): def __init__(self, message, position=None): self.position = position pos_info = f" at position {position}" if position is not None else "" super().__init__(f"Parse error{pos_info}: {message}") # ---------- Recursive descent parser ---------- class Parser: """ Converts a list of tokens into an AST (expression tree / DAG). Each grammar rule becomes a method. The call tree mirrors the shape of the AST being built. When a deeper method returns a node, it becomes a child of the node built by the caller -- this is how the DAG edges form. Precedence is encoded by nesting: lower-precedence operators are parsed at higher (outer) levels, so they become closer to the root of the tree and are evaluated last. """ def __init__(self, tokens): self.tokens = tokens self.pos = 0 def peek(self): """Look at the current token without consuming it.""" return self.tokens[self.pos] def consume(self, expected=None): """Consume and return the current token, optionally asserting its type.""" token = self.tokens[self.pos] if expected is not None and token.type != expected: raise ParseError( f"expected {expected.name}, got {token.type.name}", token.position, ) self.pos += 1 return token def parse(self): """Entry point: parse the full expression and verify we consumed everything.""" if self.peek().type == TokenType.EOF: raise ParseError("empty expression") node = self.expression() self.consume(TokenType.EOF) return node # --- Grammar rules --- # Each method corresponds to one production in the grammar. # The nesting encodes operator precedence. def expression(self): """expression ::= term ((PLUS | MINUS) term)*""" node = self.term() while self.peek().type in (TokenType.PLUS, TokenType.MINUS): op_token = self.consume() right = self.term() # Build a new BinOpNode -- this creates a DAG edge from # the new node to both 'node' (left) and 'right' node = BinOpNode(op_token.type, node, right) return node def term(self): """term ::= unary ((MULTIPLY | DIVIDE) unary)*""" node = self.unary() while self.peek().type in (TokenType.MULTIPLY, TokenType.DIVIDE): op_token = self.consume() right = self.unary() node = BinOpNode(op_token.type, node, right) return node def unary(self): """ unary ::= UNARY_MINUS unary | power Unary minus is parsed here, between term and power, so it binds looser than ^ but tighter than * and /. This gives the standard math behavior: -3^2 = -(3^2) = -9. The recursion (unary calls itself) handles double negation: --3 = 3. """ if self.peek().type == TokenType.UNARY_MINUS: op_token = self.consume() operand = self.unary() return UnaryOpNode(op_token.type, operand) return self.power() def power(self): """ power ::= atom (POWER power)? Right-recursive for right-associativity: 2^3^4 = 2^(3^4) = 2^81. Compare with term() which uses a while loop for LEFT-associativity. """ node = self.atom() if self.peek().type == TokenType.POWER: op_token = self.consume() right = self.power() # recurse (not loop) for right-associativity node = BinOpNode(op_token.type, node, right) return node def atom(self): """ atom ::= NUMBER | LPAREN expression RPAREN The base case: either a literal number or a parenthesized sub-expression. Parentheses work by recursing back to expression(), which restarts precedence parsing from the top. """ token = self.peek() if token.type == TokenType.NUMBER: self.consume() return NumberNode(float(token.value)) if token.type == TokenType.LPAREN: self.consume() node = self.expression() self.consume(TokenType.RPAREN) return node raise ParseError( f"expected number or '(', got {token.type.name}", token.position, )