summary refs log blame commit diff
path: root/src/parser.zig
blob: 55c3e28b4e9727e7fc06ad6c68b81d80bf0449f0 (plain) (tree)
1
2
3
4
5
6
7
8
9
10


                                           
                          

                 

  

                       
              
                         
                    
                            
               



                        

  


                                  
      


                         
                                  
                             
                         




                             

                                     
      












                                     





                             

                               
          
      





                                 
  

                           


                              

                                 



                                                                                                           
                     

                             

                                                     

                      

     
                                        


                                    
                             
                                            

                                                              
                                                                     

         
                                                
                                                   
             
     
 
                                                                                             
                                                         









                                                                                          



                                                                 
                                         

              
     
 


                                                                                                     

                                         
                                     
                                  







                                                                                 
                                  
                                      
                                                 
                                                                           

                                                     
           

     
                                                        
                                                               
                                                                                                
                                                             
 
                                                              
 
                                                       
 
                                                              
 
                                  
                                 
                                                     
              
           

     


                                                                                                        
 
                                                                                 
 



                                                                                                                                 

     
                                                               
                                                          































                                                                                                    
                                                                                  
 




                                                                                 
                                
                                              

                      
               

                                                                     
                                    
                                                                              

                      
               
                                             
          
     
 


                                                                                                    
 







                                                                 

         

































                                                                                                   


                                                                                                     
                                                                                                             

                                                                               



                                                                                                                 

                                                       

     








                                                                    







                                                        
                                                   



                                                        





                                                            

  







                                                                     



                                                                    
                                                    

                                                        

             
                                                               

 












































                                                                     
                                                                                                                                                                           







                                                        
const std = @import("std");
const tokenizer = @import("tokenizer.zig");

const ParserError = error{
    ParsingError,
    OutOfMemory,
};

const NodeType = enum {
    PROGRAM,
    STATEMENT,
    ASSIGNMENT_STATEMENT,
    PRINT_STATEMENT,
    FUNCTION_CALL_STATEMENT,
    EXPRESSION,
    ADDITIVE_EXPRESSION,
    PRIMARY_EXPRESSION,
    FUNCTION_DEFINITION,
    RETURN_STATEMENT,
};

pub const Node = union(NodeType) {
    PROGRAM: struct {
        statements: []*Node,
    },
    STATEMENT: struct {
        statement: *Node,
    },
    ASSIGNMENT_STATEMENT: struct {
        is_declaration: bool,
        name: []const u8,
        expression: *Node,
    },
    PRINT_STATEMENT: struct {
        expression: *Node,
    },
    FUNCTION_CALL_STATEMENT: struct {
        name: []const u8,
    },
    EXPRESSION: struct {
        ADDITIVE_EXPRESSION: struct {
            expression: *Node,
        },
        FUNCTION_DEFINITION: struct {
            expression: *Node,
        },
    },
    ADDITIVE_EXPRESSION: struct {
        lhs: *Node,
        rhs: *Node,
    },
    PRIMARY_EXPRESSION: union(enum) {
        NUMBER: struct {
            value: i64,
        },
        IDENTIFIER: struct {
            name: []const u8,
        },
        FUNCTION_CALL: struct {
            name: []const u8,
        },
    },
    FUNCTION_DEFINITION: struct {
        statements: []*Node,
    },
    RETURN_STATEMENT: struct {
        expression: *Node,
    },
};

pub const Parser = struct {
    tokens: []tokenizer.Token,
    offset: u32,

    allocator: std.mem.Allocator,

    try_context: bool, //TODO: I dont like this

    pub fn init(tokens: []tokenizer.Token, arena_allocator: *std.heap.ArenaAllocator) ParserError!*Parser {
        const parser = try arena_allocator.allocator().create(Parser);
        parser.* = .{
            .tokens = tokens,
            .offset = 0,
            .allocator = arena_allocator.allocator(),
            .try_context = false,
        };
        return parser;
    }

    pub fn parse(self: *Parser) !*Node {
        return self.parse_program();
    }

    // Program ::= Statement+
    fn parse_program(self: *Parser) !*Node {
        var nodes = std.ArrayList(*Node).init(self.allocator);
        while (self.offset < self.tokens.len) {
            try nodes.append(@constCast(try self.parse_statement()));
        }

        return self.create_node(.{ .PROGRAM = .{
            .statements = try nodes.toOwnedSlice(),
        } });
    }

    // Statement ::= (AssignmentStatement | PrintStatement | FunctionCallStatement) SEMICOLON
    fn parse_statement(self: *Parser) ParserError!*Node {
        errdefer if (!self.try_context) std.debug.print("Error parsing statement\n", .{});

        var statement: ?*Node = undefined;
        if (self.accept_parse(parse_print_statement)) |stmt| {
            statement = stmt;
        } else if (self.accept_parse(parse_function_call_statement)) |stmt| {
            statement = stmt;
        } else {
            statement = try self.parse_assignment_statement();
        }
        _ = try self.accept_token(tokenizer.TokenType.SEMICOLON);

        return self.create_node(.{
            .STATEMENT = .{
                .statement = statement.?,
            },
        });
    }

    // AssignmentStatement ::= "let" IDENTIFIER EQUALS Expression
    fn parse_assignment_statement(self: *Parser) ParserError!*Node {
        errdefer if (!self.try_context) std.debug.print("Error parsing assignment statement\n", .{});

        var is_declaration: bool = false;
        if (self.match_token(.LET)) {
            is_declaration = true;
        }

        const identifier = try self.accept_token(tokenizer.TokenType.IDENTIFIER);

        _ = try self.accept_token(tokenizer.TokenType.EQUALS);

        const expression = try self.parse_expression();

        return self.create_node(.{
            .ASSIGNMENT_STATEMENT = .{
                .is_declaration = is_declaration,
                .name = try self.allocator.dupe(u8, identifier.IDENTIFIER),
                .expression = @constCast(expression),
            },
        });
    }

    // PrintStatement :== PRINT LPAREN Expression RPAREN
    fn parse_print_statement(self: *Parser) ParserError!*Node {
        errdefer if (!self.try_context) std.debug.print("Error parsing print statement\n", .{});
        _ = try self.accept_token(tokenizer.TokenType.PRINT);

        _ = try self.accept_token(tokenizer.TokenType.LPAREN);

        const expression = try self.parse_expression();

        _ = try self.accept_token(tokenizer.TokenType.RPAREN);

        return self.create_node(.{
            .PRINT_STATEMENT = .{
                .expression = @constCast(expression),
            },
        });
    }

    // FunctionCallStatement ::= IDENTIFIER LPAREN RPAREN
    fn parse_function_call_statement(self: *Parser) ParserError!*Node {
        errdefer if (!self.try_context) std.debug.print("Error parsing function call statement\n", .{});

        const identifier = try self.accept_token(tokenizer.TokenType.IDENTIFIER);

        _ = try self.accept_token(tokenizer.TokenType.LPAREN);
        _ = try self.accept_token(tokenizer.TokenType.RPAREN);

        return self.create_node(.{ .FUNCTION_CALL_STATEMENT = .{ .name = try self.allocator.dupe(u8, identifier.IDENTIFIER) } });
    }

    // Expression   ::= AdditiveExpression | FunctionDefinition
    fn parse_expression(self: *Parser) ParserError!*Node {
        errdefer if (!self.try_context) std.debug.print("Error parsing expression\n", .{});

        if (self.accept_parse(parse_additive_expression)) |expression| {
            return expression;
        } else if (self.accept_parse(parse_function_definition)) |expression| {
            return expression;
        }

        return ParserError.ParsingError;
    }

    // AdditiveExpression ::= PrimaryExpression ("+" AdditiveExpression)
    fn parse_additive_expression(self: *Parser) ParserError!*Node {
        errdefer if (!self.try_context) std.debug.print("Error parsing additive expression\n", .{});

        const lhs = try self.parse_primary_expression();

        if (self.match_token(tokenizer.TokenType.PLUS)) {
            const rhs = try self.parse_additive_expression();
            return self.create_node(.{ .ADDITIVE_EXPRESSION = .{
                .lhs = lhs,
                .rhs = rhs,
            } });
        }

        return lhs;
    }

    // PrimaryExpression ::= NUMBER | IDENTIFIER | FunctionCallStatement
    fn parse_primary_expression(self: *Parser) ParserError!*Node {
        errdefer if (!self.try_context) std.debug.print("Error parsing primary expression\n", .{});

        const token = self.consume_token() orelse return ParserError.ParsingError;

        if (self.accept_parse(parse_function_call_statement)) |stmt| return stmt;

        return switch (token) {
            .NUMBER => |number_token| try self.create_node(.{
                .PRIMARY_EXPRESSION = .{
                    .NUMBER = .{
                        .value = number_token,
                    },
                },
            }),
            .IDENTIFIER => |identifier_token| try self.create_node(.{
                .PRIMARY_EXPRESSION = .{
                    .IDENTIFIER = .{
                        .name = try self.allocator.dupe(u8, identifier_token),
                    },
                },
            }),
            else => ParserError.ParsingError,
        };
    }

    // FunctionDefinition ::= ARROW LBRACE Statement* ReturnStatement RBRACE
    fn parse_function_definition(self: *Parser) ParserError!*Node {
        errdefer if (!self.try_context) std.debug.print("Error parsing function definition\n", .{});

        _ = try self.accept_token(tokenizer.TokenType.LPAREN);
        _ = try self.accept_token(tokenizer.TokenType.RPAREN);
        _ = try self.accept_token(tokenizer.TokenType.ARROW);
        _ = try self.accept_token(tokenizer.TokenType.LBRACE);

        var nodes = std.ArrayList(*Node).init(self.allocator);
        while (self.accept_parse(parse_statement)) |expression| {
            try nodes.append(expression);
        }

        try nodes.append(try self.parse_return_statement());

        _ = try self.accept_token(tokenizer.TokenType.RBRACE);

        return self.create_node(.{ .FUNCTION_DEFINITION = .{
            .statements = nodes.items,
        } });
    }

    // ReturnStatement :== RETURN Expression
    fn parse_return_statement(self: *Parser) ParserError!*Node {
        errdefer if (!self.try_context) std.debug.print("Error parsing return statement\n", .{});
        _ = try self.accept_token(tokenizer.TokenType.RETURN);

        const expression = try self.parse_expression();

        _ = try self.accept_token(tokenizer.TokenType.SEMICOLON); //TODO: I dont like this

        return self.create_node(.{
            .RETURN_STATEMENT = .{
                .expression = @constCast(expression),
            },
        });
    }

    fn accept_parse(self: *Parser, parsing_func: *const fn (_: *Parser) ParserError!*Node) ?*Node {
        const prev_offset = self.offset;
        self.try_context = true;
        defer self.try_context = false;
        const node = parsing_func(self) catch {
            self.offset = prev_offset;
            return null;
        };
        return node;
    }

    fn accept_token(self: *Parser, expected_token: tokenizer.TokenType) ParserError!tokenizer.Token {
        errdefer if (!self.try_context) std.debug.print("Error accepting token: {any}\n", .{expected_token});
        const token = self.peek_token() orelse return ParserError.ParsingError;

        if (token != expected_token) {
            if (!self.try_context) std.debug.print("Expected {any} - found {any}\n", .{ expected_token, token });
            return ParserError.ParsingError;
        }

        return self.consume_token() orelse unreachable;
    }

    fn match_token(self: *Parser, token: tokenizer.TokenType) bool {
        const curr_token = self.peek_token() orelse return false;
        if (curr_token == token) {
            _ = self.consume_token();
            return true;
        }
        return false;
    }

    fn consume_token(self: *Parser) ?tokenizer.Token {
        if (self.offset >= self.tokens.len) return null;

        defer self.offset += 1;

        return self.tokens[self.offset];
    }

    fn peek_token(self: *Parser) ?tokenizer.Token {
        if (self.offset >= self.tokens.len) return null;

        return self.tokens[self.offset];
    }

    fn create_node(self: *Parser, node_value: Node) !*Node {
        const node = try self.allocator.create(Node);
        node.* = node_value;
        return node;
    }
};

test "parse print" {
    const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{
        tokenizer.Token{ .PRINT = void{} },
        tokenizer.Token{ .LPAREN = void{} },
        tokenizer.Token{ .NUMBER = 7 },
        tokenizer.Token{ .RPAREN = void{} },
        tokenizer.Token{ .SEMICOLON = void{} },
    });
    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
    defer arena.deinit();
    var parser = try Parser.init(tokens, arena.allocator());
    const actualNode = try parser.parse_print_statement();
    const expectedNode = Node{ .PRINT_STATEMENT = .{
        .expression = @constCast(&Node{ .EXPRESSION = .{
            .NUMBER = .{ .value = 7 },
        } }),
    } };
    try std.testing.expectEqualDeep(&expectedNode, actualNode);
}

test "parse identifier" {
    const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{
        tokenizer.Token{ .IDENTIFIER = @constCast("i") },
    });
    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
    defer arena.deinit();
    var parser = try Parser.init(tokens, arena.allocator());
    const actualNode = try parser.parse_expression();
    const expectedNode = Node{ .EXPRESSION = .{
        .IDENTIFIER = .{
            .name = @constCast("i"),
        },
    } };
    try std.testing.expectEqualDeep(&expectedNode, actualNode);
}

test "parse number" {
    const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{
        tokenizer.Token{ .NUMBER = 12 },
    });
    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
    defer arena.deinit();
    var parser = try Parser.init(tokens, arena.allocator());
    const actualNode = try parser.parse_expression();
    const expectedNode = Node{ .EXPRESSION = .{
        .NUMBER = .{
            .value = 12,
        },
    } };
    try std.testing.expectEqualDeep(&expectedNode, actualNode);
}

test "simple e2e" {
    const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{
        tokenizer.Token{ .LET = void{} },
        tokenizer.Token{ .IDENTIFIER = @constCast("i") },
        tokenizer.Token{ .EQUALS = void{} },
        tokenizer.Token{ .NUMBER = 2 },
        tokenizer.Token{ .SEMICOLON = void{} },
    });

    var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
    defer arena.deinit();
    var parser = try Parser.init(tokens, arena.allocator());
    const ast = try parser.parse();
    const expected_ast = Node{ .PROGRAM = .{ .statements = @constCast(&[_]*Node{@constCast(&Node{ .STATEMENT = .{ .statement = @constCast(&Node{ .ASSIGNMENT_STATEMENT = .{
        .is_declaration = true,
        .name = @constCast("i"),
        .expression = @constCast(&Node{ .EXPRESSION = .{
            .NUMBER = .{ .value = 2 },
        } }),
    } }) } })}) } };
    try std.testing.expectEqualDeep(&expected_ast, ast);
}