summary refs log blame commit diff
path: root/src/parser.zig
blob: d5649248ecefcc94e12290569178bb041a0c0b9d (plain) (tree)
1
2
3
4
5
6
7
8
9
10


                                           
                          

                 

  

                       
              

                       
               

  









                                                    
                            
                    
                   



                         
    

                           


                              

                                 


                                                                                              

                             
                                   

                      

     
                                        


                                    
                             
                                            
                                                              
                             
                                               
                                                                     

         
                                                
                                                   
             
     
 
                                                                   
                                                         
                                                                               
 











                                                                 
     
 
                                                                              














                                                                                 
                                  

                                                 
                                                                           

                                                     
           

     
                                                        
                                                               
                                                             
 
                                                              
 
                                                       
 
                                                              
 
                                  
                                 
                                                     
              
           

     

                                                          
                                                                                  
 

                                                         

                                
                                              

                      

                                                                 

                                    
                                                                              

                      


                                







                                                                                                     

     







                                                        
                                                   



                                                        





                                                            

  









                                                                     
                                                
                       










                                                                                                               

 






































                                                                                                                                      
const std = @import("std");
const tokenizer = @import("tokenizer.zig");

const ParserError = error{
    ParsingError,
    OutOfMemory,
};

const NodeType = enum {
    PROGRAM,
    STATEMENT,
    VARIABLE_STATEMENT,
    PRINT_STATEMENT,
    EXPRESSION,
};

pub const Node = union(NodeType) { PROGRAM: struct {
    statements: []*Node,
}, STATEMENT: struct {
    statement: *Node,
}, VARIABLE_STATEMENT: struct {
    is_declaration: bool,
    name: []const u8,
    expression: *Node,
}, PRINT_STATEMENT: struct {
    expression: *Node,
}, EXPRESSION: union(enum) {
    NUMBER: struct {
        value: i64,
    },
    IDENTIFIER: struct {
        name: []const u8,
    },
} };

pub const Parser = struct {
    tokens: []tokenizer.Token,
    offset: u32,

    allocator: std.mem.Allocator,

    pub fn init(tokens: []tokenizer.Token, allocator: std.mem.Allocator) ParserError!*Parser {
        const parser = try allocator.create(Parser);
        parser.* = .{
            .tokens = tokens,
            .offset = 0,
            .allocator = allocator,
        };
        return parser;
    }

    pub fn parse(self: *Parser) !*Node {
        return self.parse_program();
    }

    // Program ::= Statement+
    fn parse_program(self: *Parser) !*Node {
        var nodes = std.ArrayList(*Node).init(self.allocator);
        defer nodes.deinit();
        while (self.offset < self.tokens.len) {
            try nodes.append(@constCast(try self.parse_statement()));
        }

        return self.create_node(.{ .PROGRAM = .{
            .statements = try nodes.toOwnedSlice(),
        } });
    }

    // Statement ::= (VariableStatement | PrintStatement) SEMICOLON
    fn parse_statement(self: *Parser) ParserError!*Node {
        const token = self.peek_token() orelse return ParserError.ParsingError;

        const statement = switch (token) {
            .PRINT => try self.parse_print_statement(),
            else => try self.parse_variable_statement(),
        };

        _ = try self.accept_token(tokenizer.TokenType.SEMICOLON);

        return self.create_node(.{
            .STATEMENT = .{
                .statement = statement,
            },
        });
    }

    // VariableStatement ::= ("let" IDENTIFIER | IDENTIFIER) EQUALS Expression
    fn parse_variable_statement(self: *Parser) ParserError!*Node {
        const token = self.peek_token() orelse return ParserError.ParsingError;

        var is_declaration: bool = false;
        if (token == .LET) {
            is_declaration = true;
            _ = self.consume_token() orelse return ParserError.ParsingError;
        }

        const identifier = try self.accept_token(tokenizer.TokenType.IDENTIFIER);

        _ = try self.accept_token(tokenizer.TokenType.EQUALS);

        const expression = try self.parse_expression();

        return self.create_node(.{
            .VARIABLE_STATEMENT = .{
                .is_declaration = is_declaration,
                .name = try self.allocator.dupe(u8, identifier.IDENTIFIER),
                .expression = @constCast(expression),
            },
        });
    }

    // PrintStatement :== PRINT LPAREN Expression RPAREN
    fn parse_print_statement(self: *Parser) ParserError!*Node {
        _ = try self.accept_token(tokenizer.TokenType.PRINT);

        _ = try self.accept_token(tokenizer.TokenType.LPAREN);

        const expression = try self.parse_expression();

        _ = try self.accept_token(tokenizer.TokenType.RPAREN);

        return self.create_node(.{
            .PRINT_STATEMENT = .{
                .expression = @constCast(expression),
            },
        });
    }

    // Expression :== NUMBER | IDENTIFIER
    fn parse_expression(self: *Parser) ParserError!*Node {
        const token = self.consume_token() orelse return ParserError.ParsingError;

        return switch (token) {
            .NUMBER => |number_token| self.create_node(.{
                .EXPRESSION = .{
                    .NUMBER = .{
                        .value = number_token,
                    },
                },
            }),
            .IDENTIFIER => |identifier_token| self.create_node(.{
                .EXPRESSION = .{
                    .IDENTIFIER = .{
                        .name = try self.allocator.dupe(u8, identifier_token),
                    },
                },
            }),
            else => unreachable,
        };
    }

    fn accept_token(self: *Parser, expected_token: tokenizer.TokenType) ParserError!tokenizer.Token {
        const token = self.peek_token() orelse return ParserError.ParsingError;

        if (token != expected_token) return ParserError.ParsingError;

        return self.consume_token() orelse unreachable;
    }

    fn consume_token(self: *Parser) ?tokenizer.Token {
        if (self.offset >= self.tokens.len) return null;

        defer self.offset += 1;

        return self.tokens[self.offset];
    }

    fn peek_token(self: *Parser) ?tokenizer.Token {
        if (self.offset >= self.tokens.len) return null;

        return self.tokens[self.offset];
    }

    fn create_node(self: *Parser, node_value: Node) !*Node {
        const node = try self.allocator.create(Node);
        node.* = node_value;
        return node;
    }
};

test "parse print" {
    const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{
        tokenizer.Token{ .PRINT = void{} },
        tokenizer.Token{ .LPAREN = void{} },
        tokenizer.Token{ .NUMBER = 7 },
        tokenizer.Token{ .RPAREN = void{} },
        tokenizer.Token{ .SEMICOLON = void{} },
    });
    var parser = Parser.init(tokens);
    const print = try parser.parse_print_statement();
    std.debug.print("PRINT: {any}\n", .{print});
    //TODO: Warning ptr
    // try std.testing.expectEqualDeep(Node{ .PRINT_STATEMENT = .{ .expression = @constCast(&Node{ .NUMBER = .{
    //     .value = 7,
    // } }) } }, print);
    const expectedNode = Node{ .PRINT_STATEMENT = .{
        .expression = @constCast(&Node{ .NUMBER = .{
            .value = 9,
        } }),
    } };
    std.debug.print("EXPECTED: {any}\n", .{expectedNode});
    // TODO: This seems bugged with recursive types maybe?
    // try std.testing.expectEqualDeep(expectedNode, print);
}

// test "parse identifier" {
//     const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{
//         tokenizer.Token{ .IDENTIFIER = @constCast("i") },
//     });
//     var parser = Parser.init(tokens);
//     const ident = try parser.parse_identifier();
//     try std.testing.expectEqualDeep(Node{ .IDENTIFIER = .{
//         .name = @constCast("i"),
//     } }, ident);
// }
//
// test "narse number" {
//     const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{
//         tokenizer.Token{ .NUMBER = 7 },
//     });
//     var parser = Parser.init(tokens);
//     const number = try parser.parse_number();
//     try std.testing.expectEqualDeep(Node{ .NUMBER = .{
//         .value = 7,
//     } }, number);
// }
//
// test "simple e2e" {
//     const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{
//         tokenizer.Token{ .LET = void{} },
//         tokenizer.Token{ .IDENTIFIER = @constCast("i") },
//         tokenizer.Token{ .EQUALS = void{} },
//         tokenizer.Token{ .NUMBER = 2 },
//         tokenizer.Token{ .SEMICOLON = void{} },
//     });
//
//     const ast = try Parser.init(tokens).parse();
//
//     try std.testing.expectEqualDeep(Node{ .PROGRAM = .{ .statements = @constCast(&[_]*Node{
//         @constCast(&Node{ .VARIABLE_STATEMENT = .{ .is_declaration = true, .name = @constCast("i"), .expression = @constCast(&Node{
//             .NUMBER = .{ .value = 2 },
//         }) } }),
//     }) } }, ast);
// }