const std = @import("std"); const tokenizer = @import("tokenizer.zig"); const ParserError = error{ ParsingError, OutOfMemory, }; const NodeType = enum { PROGRAM, STATEMENT, ASSIGNMENT_STATEMENT, PRINT_STATEMENT, FUNCTION_CALL_STATEMENT, EXPRESSION, ADDITIVE_EXPRESSION, PRIMARY_EXPRESSION, FUNCTION_DEFINITION, RETURN_STATEMENT, }; pub const Node = union(NodeType) { PROGRAM: struct { statements: []*Node, }, STATEMENT: struct { statement: *Node, }, ASSIGNMENT_STATEMENT: struct { is_declaration: bool, name: []const u8, expression: *Node, }, PRINT_STATEMENT: struct { expression: *Node, }, FUNCTION_CALL_STATEMENT: struct { name: []const u8, }, EXPRESSION: struct { ADDITIVE_EXPRESSION: struct { expression: *Node, }, FUNCTION_DEFINITION: struct { expression: *Node, }, }, ADDITIVE_EXPRESSION: struct { lhs: *Node, rhs: *Node, }, PRIMARY_EXPRESSION: union(enum) { NUMBER: struct { value: i64, }, IDENTIFIER: struct { name: []const u8, }, FUNCTION_CALL: struct { name: []const u8, }, }, FUNCTION_DEFINITION: struct { statements: []*Node, }, RETURN_STATEMENT: struct { expression: *Node, }, }; pub const Parser = struct { tokens: []tokenizer.Token, offset: u32, allocator: std.mem.Allocator, try_context: bool, //TODO: I dont like this pub fn init(tokens: []tokenizer.Token, arena_allocator: *std.heap.ArenaAllocator) ParserError!*Parser { const parser = try arena_allocator.allocator().create(Parser); parser.* = .{ .tokens = tokens, .offset = 0, .allocator = arena_allocator.allocator(), .try_context = false, }; return parser; } pub fn parse(self: *Parser) !*Node { return self.parse_program(); } // Program ::= Statement+ fn parse_program(self: *Parser) !*Node { var nodes = std.ArrayList(*Node).init(self.allocator); while (self.offset < self.tokens.len) { try nodes.append(@constCast(try self.parse_statement())); } return self.create_node(.{ .PROGRAM = .{ .statements = try nodes.toOwnedSlice(), } }); } // Statement ::= (AssignmentStatement | PrintStatement | FunctionCallStatement) SEMICOLON fn parse_statement(self: *Parser) ParserError!*Node { errdefer if (!self.try_context) std.debug.print("Error parsing statement\n", .{}); var statement: ?*Node = undefined; if (self.accept_parse(parse_print_statement)) |stmt| { statement = stmt; } else if (self.accept_parse(parse_function_call_statement)) |stmt| { statement = stmt; } else { statement = try self.parse_assignment_statement(); } _ = try self.accept_token(tokenizer.TokenType.SEMICOLON); return self.create_node(.{ .STATEMENT = .{ .statement = statement.?, }, }); } // AssignmentStatement ::= "let" IDENTIFIER EQUALS Expression fn parse_assignment_statement(self: *Parser) ParserError!*Node { errdefer if (!self.try_context) std.debug.print("Error parsing assignment statement\n", .{}); var is_declaration: bool = false; if (self.match_token(.LET)) { is_declaration = true; } const identifier = try self.accept_token(tokenizer.TokenType.IDENTIFIER); _ = try self.accept_token(tokenizer.TokenType.EQUALS); const expression = try self.parse_expression(); return self.create_node(.{ .ASSIGNMENT_STATEMENT = .{ .is_declaration = is_declaration, .name = try self.allocator.dupe(u8, identifier.IDENTIFIER), .expression = @constCast(expression), }, }); } // PrintStatement :== PRINT LPAREN Expression RPAREN fn parse_print_statement(self: *Parser) ParserError!*Node { errdefer if (!self.try_context) std.debug.print("Error parsing print statement\n", .{}); _ = try self.accept_token(tokenizer.TokenType.PRINT); _ = try self.accept_token(tokenizer.TokenType.LPAREN); const expression = try self.parse_expression(); _ = try self.accept_token(tokenizer.TokenType.RPAREN); return self.create_node(.{ .PRINT_STATEMENT = .{ .expression = @constCast(expression), }, }); } // FunctionCallStatement ::= IDENTIFIER LPAREN RPAREN fn parse_function_call_statement(self: *Parser) ParserError!*Node { errdefer if (!self.try_context) std.debug.print("Error parsing function call statement\n", .{}); const identifier = try self.accept_token(tokenizer.TokenType.IDENTIFIER); _ = try self.accept_token(tokenizer.TokenType.LPAREN); _ = try self.accept_token(tokenizer.TokenType.RPAREN); return self.create_node(.{ .FUNCTION_CALL_STATEMENT = .{ .name = try self.allocator.dupe(u8, identifier.IDENTIFIER) } }); } // Expression ::= AdditiveExpression | FunctionDefinition fn parse_expression(self: *Parser) ParserError!*Node { errdefer if (!self.try_context) std.debug.print("Error parsing expression\n", .{}); if (self.accept_parse(parse_additive_expression)) |expression| { return expression; } else if (self.accept_parse(parse_function_definition)) |expression| { return expression; } return ParserError.ParsingError; } // AdditiveExpression ::= PrimaryExpression ("+" AdditiveExpression) fn parse_additive_expression(self: *Parser) ParserError!*Node { errdefer if (!self.try_context) std.debug.print("Error parsing additive expression\n", .{}); const lhs = try self.parse_primary_expression(); if (self.match_token(tokenizer.TokenType.PLUS)) { const rhs = try self.parse_additive_expression(); return self.create_node(.{ .ADDITIVE_EXPRESSION = .{ .lhs = lhs, .rhs = rhs, } }); } return lhs; } // PrimaryExpression ::= NUMBER | IDENTIFIER | FunctionCallStatement fn parse_primary_expression(self: *Parser) ParserError!*Node { errdefer if (!self.try_context) std.debug.print("Error parsing primary expression\n", .{}); const token = self.consume_token() orelse return ParserError.ParsingError; if (self.accept_parse(parse_function_call_statement)) |stmt| return stmt; return switch (token) { .NUMBER => |number_token| try self.create_node(.{ .PRIMARY_EXPRESSION = .{ .NUMBER = .{ .value = number_token, }, }, }), .IDENTIFIER => |identifier_token| try self.create_node(.{ .PRIMARY_EXPRESSION = .{ .IDENTIFIER = .{ .name = try self.allocator.dupe(u8, identifier_token), }, }, }), else => ParserError.ParsingError, }; } // FunctionDefinition ::= ARROW LBRACE Statement* ReturnStatement RBRACE fn parse_function_definition(self: *Parser) ParserError!*Node { errdefer if (!self.try_context) std.debug.print("Error parsing function definition\n", .{}); _ = try self.accept_token(tokenizer.TokenType.LPAREN); _ = try self.accept_token(tokenizer.TokenType.RPAREN); _ = try self.accept_token(tokenizer.TokenType.ARROW); _ = try self.accept_token(tokenizer.TokenType.LBRACE); var nodes = std.ArrayList(*Node).init(self.allocator); while (self.accept_parse(parse_statement)) |expression| { try nodes.append(expression); } try nodes.append(try self.parse_return_statement()); _ = try self.accept_token(tokenizer.TokenType.RBRACE); return self.create_node(.{ .FUNCTION_DEFINITION = .{ .statements = nodes.items, } }); } // ReturnStatement :== RETURN Expression fn parse_return_statement(self: *Parser) ParserError!*Node { errdefer if (!self.try_context) std.debug.print("Error parsing return statement\n", .{}); _ = try self.accept_token(tokenizer.TokenType.RETURN); const expression = try self.parse_expression(); _ = try self.accept_token(tokenizer.TokenType.SEMICOLON); //TODO: I dont like this return self.create_node(.{ .RETURN_STATEMENT = .{ .expression = @constCast(expression), }, }); } fn accept_parse(self: *Parser, parsing_func: *const fn (_: *Parser) ParserError!*Node) ?*Node { const prev_offset = self.offset; self.try_context = true; defer self.try_context = false; const node = parsing_func(self) catch { self.offset = prev_offset; return null; }; return node; } fn accept_token(self: *Parser, expected_token: tokenizer.TokenType) ParserError!tokenizer.Token { errdefer if (!self.try_context) std.debug.print("Error accepting token: {any}\n", .{expected_token}); const token = self.peek_token() orelse return ParserError.ParsingError; if (token != expected_token) { if (!self.try_context) std.debug.print("Expected {any} - found {any}\n", .{ expected_token, token }); return ParserError.ParsingError; } return self.consume_token() orelse unreachable; } fn match_token(self: *Parser, token: tokenizer.TokenType) bool { const curr_token = self.peek_token() orelse return false; if (curr_token == token) { _ = self.consume_token(); return true; } return false; } fn consume_token(self: *Parser) ?tokenizer.Token { if (self.offset >= self.tokens.len) return null; defer self.offset += 1; return self.tokens[self.offset]; } fn peek_token(self: *Parser) ?tokenizer.Token { if (self.offset >= self.tokens.len) return null; return self.tokens[self.offset]; } fn create_node(self: *Parser, node_value: Node) !*Node { const node = try self.allocator.create(Node); node.* = node_value; return node; } }; test "parse print" { const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{ tokenizer.Token{ .PRINT = void{} }, tokenizer.Token{ .LPAREN = void{} }, tokenizer.Token{ .NUMBER = 7 }, tokenizer.Token{ .RPAREN = void{} }, tokenizer.Token{ .SEMICOLON = void{} }, }); var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); var parser = try Parser.init(tokens, arena.allocator()); const actualNode = try parser.parse_print_statement(); const expectedNode = Node{ .PRINT_STATEMENT = .{ .expression = @constCast(&Node{ .EXPRESSION = .{ .NUMBER = .{ .value = 7 }, } }), } }; try std.testing.expectEqualDeep(&expectedNode, actualNode); } test "parse identifier" { const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{ tokenizer.Token{ .IDENTIFIER = @constCast("i") }, }); var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); var parser = try Parser.init(tokens, arena.allocator()); const actualNode = try parser.parse_expression(); const expectedNode = Node{ .EXPRESSION = .{ .IDENTIFIER = .{ .name = @constCast("i"), }, } }; try std.testing.expectEqualDeep(&expectedNode, actualNode); } test "parse number" { const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{ tokenizer.Token{ .NUMBER = 12 }, }); var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); var parser = try Parser.init(tokens, arena.allocator()); const actualNode = try parser.parse_expression(); const expectedNode = Node{ .EXPRESSION = .{ .NUMBER = .{ .value = 12, }, } }; try std.testing.expectEqualDeep(&expectedNode, actualNode); } test "simple e2e" { const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{ tokenizer.Token{ .LET = void{} }, tokenizer.Token{ .IDENTIFIER = @constCast("i") }, tokenizer.Token{ .EQUALS = void{} }, tokenizer.Token{ .NUMBER = 2 }, tokenizer.Token{ .SEMICOLON = void{} }, }); var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); var parser = try Parser.init(tokens, arena.allocator()); const ast = try parser.parse(); const expected_ast = Node{ .PROGRAM = .{ .statements = @constCast(&[_]*Node{@constCast(&Node{ .STATEMENT = .{ .statement = @constCast(&Node{ .ASSIGNMENT_STATEMENT = .{ .is_declaration = true, .name = @constCast("i"), .expression = @constCast(&Node{ .EXPRESSION = .{ .NUMBER = .{ .value = 2 }, } }), } }) } })}) } }; try std.testing.expectEqualDeep(&expected_ast, ast); }