import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) import pytest from tokenizer import tokenize, TokenType, Token, TokenError # ---------- Basic tokens ---------- def test_single_integer(): tokens = tokenize("42") assert tokens[0].type == TokenType.NUMBER assert tokens[0].value == "42" def test_decimal_number(): tokens = tokenize("3.14") assert tokens[0].type == TokenType.NUMBER assert tokens[0].value == "3.14" def test_leading_dot(): tokens = tokenize(".5") assert tokens[0].type == TokenType.NUMBER assert tokens[0].value == ".5" def test_all_operators(): """Operators between numbers are all binary.""" tokens = tokenize("1 + 1 - 1 * 1 / 1 ^ 1") ops = [t.type for t in tokens if t.type not in (TokenType.NUMBER, TokenType.EOF)] assert ops == [ TokenType.PLUS, TokenType.MINUS, TokenType.MULTIPLY, TokenType.DIVIDE, TokenType.POWER, ] def test_operators_between_numbers(): tokens = tokenize("1 + 2 - 3 * 4 / 5 ^ 6") ops = [t.type for t in tokens if t.type not in (TokenType.NUMBER, TokenType.EOF)] assert ops == [ TokenType.PLUS, TokenType.MINUS, TokenType.MULTIPLY, TokenType.DIVIDE, TokenType.POWER, ] def test_parentheses(): tokens = tokenize("()") assert tokens[0].type == TokenType.LPAREN assert tokens[1].type == TokenType.RPAREN # ---------- Unary minus ---------- def test_unary_minus_at_start(): tokens = tokenize("-3") assert tokens[0].type == TokenType.UNARY_MINUS assert tokens[1].type == TokenType.NUMBER def test_unary_minus_after_lparen(): tokens = tokenize("(-3)") assert tokens[1].type == TokenType.UNARY_MINUS def test_unary_minus_after_operator(): tokens = tokenize("2 * -3") assert tokens[2].type == TokenType.UNARY_MINUS def test_binary_minus(): tokens = tokenize("5 - 3") assert tokens[1].type == TokenType.MINUS def test_double_unary_minus(): tokens = tokenize("--3") assert tokens[0].type == TokenType.UNARY_MINUS assert tokens[1].type == TokenType.UNARY_MINUS assert tokens[2].type == TokenType.NUMBER # ---------- Whitespace handling ---------- def test_no_spaces(): tokens = tokenize("3+4") non_eof = [t for t in tokens if t.type != TokenType.EOF] assert len(non_eof) == 3 def test_extra_spaces(): tokens = tokenize(" 3 + 4 ") non_eof = [t for t in tokens if t.type != TokenType.EOF] assert len(non_eof) == 3 # ---------- Position tracking ---------- def test_positions(): tokens = tokenize("3 + 4") assert tokens[0].position == 0 # '3' assert tokens[1].position == 2 # '+' assert tokens[2].position == 4 # '4' # ---------- Errors ---------- def test_invalid_character(): with pytest.raises(TokenError): tokenize("3 & 4") def test_double_dot(): with pytest.raises(TokenError): tokenize("3.14.15") # ---------- EOF token ---------- def test_eof_always_present(): tokens = tokenize("42") assert tokens[-1].type == TokenType.EOF def test_empty_input(): tokens = tokenize("") assert len(tokens) == 1 assert tokens[0].type == TokenType.EOF # ---------- Complex expressions ---------- def test_complex_expression(): tokens = tokenize("(3 + 4.5) * -2 ^ 3") types = [t.type for t in tokens if t.type != TokenType.EOF] assert types == [ TokenType.LPAREN, TokenType.NUMBER, TokenType.PLUS, TokenType.NUMBER, TokenType.RPAREN, TokenType.MULTIPLY, TokenType.UNARY_MINUS, TokenType.NUMBER, TokenType.POWER, TokenType.NUMBER, ] def test_adjacent_parens(): tokens = tokenize("(3)(4)") types = [t.type for t in tokens if t.type != TokenType.EOF] assert types == [ TokenType.LPAREN, TokenType.NUMBER, TokenType.RPAREN, TokenType.LPAREN, TokenType.NUMBER, TokenType.RPAREN, ]