diff options
author | Baitinq <manuelpalenzuelamerino@gmail.com> | 2025-01-15 00:46:15 +0100 |
---|---|---|
committer | Baitinq <manuelpalenzuelamerino@gmail.com> | 2025-01-15 00:46:15 +0100 |
commit | 875efb6a01514996ed7fff9d6714acc472928767 (patch) | |
tree | f389b076a0bcf2451f7a88595fc63016b1725e7c | |
parent | Parser: clean (diff) | |
download | interpreter-875efb6a01514996ed7fff9d6714acc472928767.tar.gz interpreter-875efb6a01514996ed7fff9d6714acc472928767.tar.bz2 interpreter-875efb6a01514996ed7fff9d6714acc472928767.zip |
Add basic support for parsing functions
-rw-r--r-- | examples/4.src | 7 | ||||
-rw-r--r-- | examples/5.src | 7 | ||||
-rw-r--r-- | grammar.ebnf | 20 | ||||
-rw-r--r-- | src/main.zig | 16 | ||||
-rw-r--r-- | src/parser.zig | 212 |
5 files changed, 181 insertions, 81 deletions
diff --git a/examples/4.src b/examples/4.src index e1ebcdc..25dcc7a 100644 --- a/examples/4.src +++ b/examples/4.src @@ -1,3 +1,6 @@ -let x = 2; +let print_one = () => { + print(1); + return 1; +}; -return x+1; +print_one(); diff --git a/examples/5.src b/examples/5.src deleted file mode 100644 index 521b9d7..0000000 --- a/examples/5.src +++ /dev/null @@ -1,7 +0,0 @@ -let print_one = () => { - print(1); -}; - -print_one(); - -return 0; diff --git a/grammar.ebnf b/grammar.ebnf index 5340ba4..65cd74e 100644 --- a/grammar.ebnf +++ b/grammar.ebnf @@ -1,11 +1,19 @@ -Program ::= Statement+ +Program ::= Statement+ -Statement ::= (VariableStatement | PrintStatement | ReturnStatement) SEMICOLON +Statement ::= (AssignmentStatement | PrintStatement | FunctionCallStatement) SEMICOLON -VariableStatement ::= ("let" IDENTIFIER | IDENTIFIER) EQUALS Expression +AssignmentStatement ::= "let" IDENTIFIER EQUALS Expression -PrintStatement :== PRINT LPAREN Expression RPAREN +PrintStatement ::= PRINT LPAREN Expression RPAREN -- TODO: this won't be needed once functions support arguments -ReturnStatement :== RETURN Expression +FunctionCallStatement ::= IDENTIFIER LPAREN RPAREN -Expression :== NUMBER | IDENTIFIER | Expression + Expression +Expression ::= AdditiveExpression | FunctionDefinition + +AdditiveExpression ::= PrimaryExpression ("+" AdditiveExpression) + +PrimaryExpression ::= NUMBER | IDENTIFIER | FunctionCallStatement + +FunctionDefinition ::= ARROW LBRACE Statement* ReturnStatement RBRACE + +ReturnStatement ::= RETURN Expression SEMICOLON --TODO: I dont like this diff --git a/src/main.zig b/src/main.zig index e7ebba3..da72d72 100644 --- a/src/main.zig +++ b/src/main.zig @@ -17,8 +17,8 @@ pub fn main() !void { if (deinit_status == .leak) @panic("Memory leak detected!"); } - const source_evaluator = try evaluator.Evaluator.init(allocator); - defer source_evaluator.deinit(); + // const source_evaluator = try evaluator.Evaluator.init(allocator); + // defer source_evaluator.deinit(); var arena = std.heap.ArenaAllocator.init(allocator); defer arena.deinit(); @@ -30,7 +30,7 @@ pub fn main() !void { const buf = try stdin.readUntilDelimiterAlloc(allocator, '\n', 1024); defer allocator.free(buf); - process_buf(buf, allocator, &arena, source_evaluator) catch |err| { + process_buf(buf, allocator, &arena) catch |err| { try stdout.print("Error processing line: {any}\n", .{err}); }; } @@ -39,11 +39,11 @@ pub fn main() !void { const file = try std.fs.cwd().openFile(path, .{}); const buf = try file.readToEndAlloc(allocator, 1 * 1024 * 1024); defer allocator.free(buf); - try process_buf(buf, allocator, &arena, source_evaluator); + try process_buf(buf, allocator, &arena); } } -fn process_buf(buf: []u8, allocator: std.mem.Allocator, arena: *std.heap.ArenaAllocator, source_evaluator: *evaluator.Evaluator) !void { +fn process_buf(buf: []u8, allocator: std.mem.Allocator, arena: *std.heap.ArenaAllocator) !void { std.debug.print("Buf:\n{s}\n", .{buf}); var token_list = std.ArrayList(tokenizer.Token).init(allocator); @@ -55,12 +55,12 @@ fn process_buf(buf: []u8, allocator: std.mem.Allocator, arena: *std.heap.ArenaAl try token_list.append(token); } - const source_parser = try parser.Parser.init(token_list.items, arena.allocator()); + const source_parser = try parser.Parser.init(token_list.items, arena); const ast = try source_parser.parse(); std.debug.print("AST: {any}\n", .{ast}); - const result = try source_evaluator.evaluate_ast(ast); - std.debug.print("Evaluation result: {any}\n", .{result}); + // const result = try source_evaluator.evaluate_ast(ast); + // std.debug.print("Evaluation result: {any}\n", .{result}); } test { diff --git a/src/parser.zig b/src/parser.zig index 7476ee1..55c3e28 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -9,10 +9,14 @@ const ParserError = error{ const NodeType = enum { PROGRAM, STATEMENT, - VARIABLE_STATEMENT, + ASSIGNMENT_STATEMENT, PRINT_STATEMENT, - RETURN_STATEMENT, + FUNCTION_CALL_STATEMENT, EXPRESSION, + ADDITIVE_EXPRESSION, + PRIMARY_EXPRESSION, + FUNCTION_DEFINITION, + RETURN_STATEMENT, }; pub const Node = union(NodeType) { @@ -22,7 +26,7 @@ pub const Node = union(NodeType) { STATEMENT: struct { statement: *Node, }, - VARIABLE_STATEMENT: struct { + ASSIGNMENT_STATEMENT: struct { is_declaration: bool, name: []const u8, expression: *Node, @@ -30,22 +34,38 @@ pub const Node = union(NodeType) { PRINT_STATEMENT: struct { expression: *Node, }, - RETURN_STATEMENT: struct { - expression: *Node, + FUNCTION_CALL_STATEMENT: struct { + name: []const u8, }, - EXPRESSION: union(enum) { + EXPRESSION: struct { + ADDITIVE_EXPRESSION: struct { + expression: *Node, + }, + FUNCTION_DEFINITION: struct { + expression: *Node, + }, + }, + ADDITIVE_EXPRESSION: struct { + lhs: *Node, + rhs: *Node, + }, + PRIMARY_EXPRESSION: union(enum) { NUMBER: struct { value: i64, }, IDENTIFIER: struct { name: []const u8, }, - BINARY: struct { - //TODO: For now, this just represents sum - lhs: *Node, - rhs: *Node, + FUNCTION_CALL: struct { + name: []const u8, }, }, + FUNCTION_DEFINITION: struct { + statements: []*Node, + }, + RETURN_STATEMENT: struct { + expression: *Node, + }, }; pub const Parser = struct { @@ -54,12 +74,15 @@ pub const Parser = struct { allocator: std.mem.Allocator, - pub fn init(tokens: []tokenizer.Token, allocator: std.mem.Allocator) ParserError!*Parser { - const parser = try allocator.create(Parser); + try_context: bool, //TODO: I dont like this + + pub fn init(tokens: []tokenizer.Token, arena_allocator: *std.heap.ArenaAllocator) ParserError!*Parser { + const parser = try arena_allocator.allocator().create(Parser); parser.* = .{ .tokens = tokens, .offset = 0, - .allocator = allocator, + .allocator = arena_allocator.allocator(), + .try_context = false, }; return parser; } @@ -71,7 +94,6 @@ pub const Parser = struct { // Program ::= Statement+ fn parse_program(self: *Parser) !*Node { var nodes = std.ArrayList(*Node).init(self.allocator); - defer nodes.deinit(); while (self.offset < self.tokens.len) { try nodes.append(@constCast(try self.parse_statement())); } @@ -81,29 +103,30 @@ pub const Parser = struct { } }); } - // Statement ::= (VariableStatement | PrintStatement) SEMICOLON + // Statement ::= (AssignmentStatement | PrintStatement | FunctionCallStatement) SEMICOLON fn parse_statement(self: *Parser) ParserError!*Node { - errdefer std.debug.print("Error parsing statement\n", .{}); - const token = self.peek_token() orelse return ParserError.ParsingError; - - const statement = switch (token) { - .PRINT => try self.parse_print_statement(), - .RETURN => try self.parse_return_statement(), - else => try self.parse_variable_statement(), - }; - + errdefer if (!self.try_context) std.debug.print("Error parsing statement\n", .{}); + + var statement: ?*Node = undefined; + if (self.accept_parse(parse_print_statement)) |stmt| { + statement = stmt; + } else if (self.accept_parse(parse_function_call_statement)) |stmt| { + statement = stmt; + } else { + statement = try self.parse_assignment_statement(); + } _ = try self.accept_token(tokenizer.TokenType.SEMICOLON); return self.create_node(.{ .STATEMENT = .{ - .statement = statement, + .statement = statement.?, }, }); } - // VariableStatement ::= ("let" IDENTIFIER | IDENTIFIER) EQUALS Expression - fn parse_variable_statement(self: *Parser) ParserError!*Node { - errdefer std.debug.print("Error parsing variable statement\n", .{}); + // AssignmentStatement ::= "let" IDENTIFIER EQUALS Expression + fn parse_assignment_statement(self: *Parser) ParserError!*Node { + errdefer if (!self.try_context) std.debug.print("Error parsing assignment statement\n", .{}); var is_declaration: bool = false; if (self.match_token(.LET)) { @@ -117,7 +140,7 @@ pub const Parser = struct { const expression = try self.parse_expression(); return self.create_node(.{ - .VARIABLE_STATEMENT = .{ + .ASSIGNMENT_STATEMENT = .{ .is_declaration = is_declaration, .name = try self.allocator.dupe(u8, identifier.IDENTIFIER), .expression = @constCast(expression), @@ -127,7 +150,7 @@ pub const Parser = struct { // PrintStatement :== PRINT LPAREN Expression RPAREN fn parse_print_statement(self: *Parser) ParserError!*Node { - errdefer std.debug.print("Error parsing print statement\n", .{}); + errdefer if (!self.try_context) std.debug.print("Error parsing print statement\n", .{}); _ = try self.accept_token(tokenizer.TokenType.PRINT); _ = try self.accept_token(tokenizer.TokenType.LPAREN); @@ -143,60 +166,133 @@ pub const Parser = struct { }); } - // ReturnStatement :== RETURN Expression - fn parse_return_statement(self: *Parser) ParserError!*Node { - errdefer std.debug.print("Error parsing return statement\n", .{}); - _ = try self.accept_token(tokenizer.TokenType.RETURN); + // FunctionCallStatement ::= IDENTIFIER LPAREN RPAREN + fn parse_function_call_statement(self: *Parser) ParserError!*Node { + errdefer if (!self.try_context) std.debug.print("Error parsing function call statement\n", .{}); - const expression = try self.parse_expression(); + const identifier = try self.accept_token(tokenizer.TokenType.IDENTIFIER); - return self.create_node(.{ - .RETURN_STATEMENT = .{ - .expression = @constCast(expression), - }, - }); + _ = try self.accept_token(tokenizer.TokenType.LPAREN); + _ = try self.accept_token(tokenizer.TokenType.RPAREN); + + return self.create_node(.{ .FUNCTION_CALL_STATEMENT = .{ .name = try self.allocator.dupe(u8, identifier.IDENTIFIER) } }); } - // Expression :== NUMBER | IDENTIFIER | Expression + Expression + // Expression ::= AdditiveExpression | FunctionDefinition fn parse_expression(self: *Parser) ParserError!*Node { - errdefer std.debug.print("Error parsing expression\n", .{}); + errdefer if (!self.try_context) std.debug.print("Error parsing expression\n", .{}); + + if (self.accept_parse(parse_additive_expression)) |expression| { + return expression; + } else if (self.accept_parse(parse_function_definition)) |expression| { + return expression; + } + + return ParserError.ParsingError; + } + + // AdditiveExpression ::= PrimaryExpression ("+" AdditiveExpression) + fn parse_additive_expression(self: *Parser) ParserError!*Node { + errdefer if (!self.try_context) std.debug.print("Error parsing additive expression\n", .{}); + + const lhs = try self.parse_primary_expression(); + + if (self.match_token(tokenizer.TokenType.PLUS)) { + const rhs = try self.parse_additive_expression(); + return self.create_node(.{ .ADDITIVE_EXPRESSION = .{ + .lhs = lhs, + .rhs = rhs, + } }); + } + + return lhs; + } + + // PrimaryExpression ::= NUMBER | IDENTIFIER | FunctionCallStatement + fn parse_primary_expression(self: *Parser) ParserError!*Node { + errdefer if (!self.try_context) std.debug.print("Error parsing primary expression\n", .{}); + const token = self.consume_token() orelse return ParserError.ParsingError; - const lhs = try switch (token) { - .NUMBER => |number_token| self.create_node(.{ - .EXPRESSION = .{ + if (self.accept_parse(parse_function_call_statement)) |stmt| return stmt; + + return switch (token) { + .NUMBER => |number_token| try self.create_node(.{ + .PRIMARY_EXPRESSION = .{ .NUMBER = .{ .value = number_token, }, }, }), - .IDENTIFIER => |identifier_token| self.create_node(.{ - .EXPRESSION = .{ + .IDENTIFIER => |identifier_token| try self.create_node(.{ + .PRIMARY_EXPRESSION = .{ .IDENTIFIER = .{ .name = try self.allocator.dupe(u8, identifier_token), }, }, }), - else => unreachable, + else => ParserError.ParsingError, }; + } - if (self.match_token(tokenizer.TokenType.PLUS)) { - const rhs = try self.parse_expression(); + // FunctionDefinition ::= ARROW LBRACE Statement* ReturnStatement RBRACE + fn parse_function_definition(self: *Parser) ParserError!*Node { + errdefer if (!self.try_context) std.debug.print("Error parsing function definition\n", .{}); - return self.create_node(.{ .EXPRESSION = .{ .BINARY = .{ - .lhs = lhs, - .rhs = rhs, - } } }); + _ = try self.accept_token(tokenizer.TokenType.LPAREN); + _ = try self.accept_token(tokenizer.TokenType.RPAREN); + _ = try self.accept_token(tokenizer.TokenType.ARROW); + _ = try self.accept_token(tokenizer.TokenType.LBRACE); + + var nodes = std.ArrayList(*Node).init(self.allocator); + while (self.accept_parse(parse_statement)) |expression| { + try nodes.append(expression); } - return lhs; + try nodes.append(try self.parse_return_statement()); + + _ = try self.accept_token(tokenizer.TokenType.RBRACE); + + return self.create_node(.{ .FUNCTION_DEFINITION = .{ + .statements = nodes.items, + } }); + } + + // ReturnStatement :== RETURN Expression + fn parse_return_statement(self: *Parser) ParserError!*Node { + errdefer if (!self.try_context) std.debug.print("Error parsing return statement\n", .{}); + _ = try self.accept_token(tokenizer.TokenType.RETURN); + + const expression = try self.parse_expression(); + + _ = try self.accept_token(tokenizer.TokenType.SEMICOLON); //TODO: I dont like this + + return self.create_node(.{ + .RETURN_STATEMENT = .{ + .expression = @constCast(expression), + }, + }); + } + + fn accept_parse(self: *Parser, parsing_func: *const fn (_: *Parser) ParserError!*Node) ?*Node { + const prev_offset = self.offset; + self.try_context = true; + defer self.try_context = false; + const node = parsing_func(self) catch { + self.offset = prev_offset; + return null; + }; + return node; } fn accept_token(self: *Parser, expected_token: tokenizer.TokenType) ParserError!tokenizer.Token { - errdefer std.debug.print("Error accepting token: {any}\n", .{expected_token}); + errdefer if (!self.try_context) std.debug.print("Error accepting token: {any}\n", .{expected_token}); const token = self.peek_token() orelse return ParserError.ParsingError; - if (token != expected_token) return ParserError.ParsingError; + if (token != expected_token) { + if (!self.try_context) std.debug.print("Expected {any} - found {any}\n", .{ expected_token, token }); + return ParserError.ParsingError; + } return self.consume_token() orelse unreachable; } @@ -296,7 +392,7 @@ test "simple e2e" { defer arena.deinit(); var parser = try Parser.init(tokens, arena.allocator()); const ast = try parser.parse(); - const expected_ast = Node{ .PROGRAM = .{ .statements = @constCast(&[_]*Node{@constCast(&Node{ .STATEMENT = .{ .statement = @constCast(&Node{ .VARIABLE_STATEMENT = .{ + const expected_ast = Node{ .PROGRAM = .{ .statements = @constCast(&[_]*Node{@constCast(&Node{ .STATEMENT = .{ .statement = @constCast(&Node{ .ASSIGNMENT_STATEMENT = .{ .is_declaration = true, .name = @constCast("i"), .expression = @constCast(&Node{ .EXPRESSION = .{ |