summary refs log tree commit diff
diff options
context:
space:
mode:
authorBaitinq <manuelpalenzuelamerino@gmail.com>2025-01-15 00:46:15 +0100
committerBaitinq <manuelpalenzuelamerino@gmail.com>2025-01-15 00:46:15 +0100
commit875efb6a01514996ed7fff9d6714acc472928767 (patch)
treef389b076a0bcf2451f7a88595fc63016b1725e7c
parentParser: clean (diff)
downloadinterpreter-875efb6a01514996ed7fff9d6714acc472928767.tar.gz
interpreter-875efb6a01514996ed7fff9d6714acc472928767.tar.bz2
interpreter-875efb6a01514996ed7fff9d6714acc472928767.zip
Add basic support for parsing functions
-rw-r--r--examples/4.src7
-rw-r--r--examples/5.src7
-rw-r--r--grammar.ebnf20
-rw-r--r--src/main.zig16
-rw-r--r--src/parser.zig212
5 files changed, 181 insertions, 81 deletions
diff --git a/examples/4.src b/examples/4.src
index e1ebcdc..25dcc7a 100644
--- a/examples/4.src
+++ b/examples/4.src
@@ -1,3 +1,6 @@
-let x = 2;
+let print_one = () => {
+	print(1);
+	return 1;
+};
 
-return x+1;
+print_one();
diff --git a/examples/5.src b/examples/5.src
deleted file mode 100644
index 521b9d7..0000000
--- a/examples/5.src
+++ /dev/null
@@ -1,7 +0,0 @@
-let print_one = () => {
-	print(1);
-};
-
-print_one();
-
-return 0;
diff --git a/grammar.ebnf b/grammar.ebnf
index 5340ba4..65cd74e 100644
--- a/grammar.ebnf
+++ b/grammar.ebnf
@@ -1,11 +1,19 @@
-Program ::= Statement+
+Program      ::= Statement+
 
-Statement ::= (VariableStatement | PrintStatement | ReturnStatement) SEMICOLON
+Statement    ::= (AssignmentStatement | PrintStatement | FunctionCallStatement) SEMICOLON
 
-VariableStatement ::= ("let" IDENTIFIER | IDENTIFIER) EQUALS Expression
+AssignmentStatement ::= "let" IDENTIFIER EQUALS Expression
 
-PrintStatement :== PRINT LPAREN Expression RPAREN
+PrintStatement ::= PRINT LPAREN Expression RPAREN -- TODO: this won't be needed once functions support arguments
 
-ReturnStatement :== RETURN Expression
+FunctionCallStatement ::= IDENTIFIER LPAREN RPAREN
 
-Expression :== NUMBER | IDENTIFIER | Expression + Expression
+Expression   ::= AdditiveExpression | FunctionDefinition
+
+AdditiveExpression ::= PrimaryExpression ("+" AdditiveExpression)
+
+PrimaryExpression ::= NUMBER | IDENTIFIER | FunctionCallStatement
+
+FunctionDefinition ::= ARROW LBRACE Statement* ReturnStatement RBRACE
+
+ReturnStatement ::= RETURN Expression SEMICOLON --TODO: I dont like this
diff --git a/src/main.zig b/src/main.zig
index e7ebba3..da72d72 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -17,8 +17,8 @@ pub fn main() !void {
         if (deinit_status == .leak) @panic("Memory leak detected!");
     }
 
-    const source_evaluator = try evaluator.Evaluator.init(allocator);
-    defer source_evaluator.deinit();
+    // const source_evaluator = try evaluator.Evaluator.init(allocator);
+    // defer source_evaluator.deinit();
 
     var arena = std.heap.ArenaAllocator.init(allocator);
     defer arena.deinit();
@@ -30,7 +30,7 @@ pub fn main() !void {
             const buf = try stdin.readUntilDelimiterAlloc(allocator, '\n', 1024);
             defer allocator.free(buf);
 
-            process_buf(buf, allocator, &arena, source_evaluator) catch |err| {
+            process_buf(buf, allocator, &arena) catch |err| {
                 try stdout.print("Error processing line: {any}\n", .{err});
             };
         }
@@ -39,11 +39,11 @@ pub fn main() !void {
         const file = try std.fs.cwd().openFile(path, .{});
         const buf = try file.readToEndAlloc(allocator, 1 * 1024 * 1024);
         defer allocator.free(buf);
-        try process_buf(buf, allocator, &arena, source_evaluator);
+        try process_buf(buf, allocator, &arena);
     }
 }
 
-fn process_buf(buf: []u8, allocator: std.mem.Allocator, arena: *std.heap.ArenaAllocator, source_evaluator: *evaluator.Evaluator) !void {
+fn process_buf(buf: []u8, allocator: std.mem.Allocator, arena: *std.heap.ArenaAllocator) !void {
     std.debug.print("Buf:\n{s}\n", .{buf});
 
     var token_list = std.ArrayList(tokenizer.Token).init(allocator);
@@ -55,12 +55,12 @@ fn process_buf(buf: []u8, allocator: std.mem.Allocator, arena: *std.heap.ArenaAl
         try token_list.append(token);
     }
 
-    const source_parser = try parser.Parser.init(token_list.items, arena.allocator());
+    const source_parser = try parser.Parser.init(token_list.items, arena);
     const ast = try source_parser.parse();
     std.debug.print("AST: {any}\n", .{ast});
 
-    const result = try source_evaluator.evaluate_ast(ast);
-    std.debug.print("Evaluation result: {any}\n", .{result});
+    // const result = try source_evaluator.evaluate_ast(ast);
+    // std.debug.print("Evaluation result: {any}\n", .{result});
 }
 
 test {
diff --git a/src/parser.zig b/src/parser.zig
index 7476ee1..55c3e28 100644
--- a/src/parser.zig
+++ b/src/parser.zig
@@ -9,10 +9,14 @@ const ParserError = error{
 const NodeType = enum {
     PROGRAM,
     STATEMENT,
-    VARIABLE_STATEMENT,
+    ASSIGNMENT_STATEMENT,
     PRINT_STATEMENT,
-    RETURN_STATEMENT,
+    FUNCTION_CALL_STATEMENT,
     EXPRESSION,
+    ADDITIVE_EXPRESSION,
+    PRIMARY_EXPRESSION,
+    FUNCTION_DEFINITION,
+    RETURN_STATEMENT,
 };
 
 pub const Node = union(NodeType) {
@@ -22,7 +26,7 @@ pub const Node = union(NodeType) {
     STATEMENT: struct {
         statement: *Node,
     },
-    VARIABLE_STATEMENT: struct {
+    ASSIGNMENT_STATEMENT: struct {
         is_declaration: bool,
         name: []const u8,
         expression: *Node,
@@ -30,22 +34,38 @@ pub const Node = union(NodeType) {
     PRINT_STATEMENT: struct {
         expression: *Node,
     },
-    RETURN_STATEMENT: struct {
-        expression: *Node,
+    FUNCTION_CALL_STATEMENT: struct {
+        name: []const u8,
     },
-    EXPRESSION: union(enum) {
+    EXPRESSION: struct {
+        ADDITIVE_EXPRESSION: struct {
+            expression: *Node,
+        },
+        FUNCTION_DEFINITION: struct {
+            expression: *Node,
+        },
+    },
+    ADDITIVE_EXPRESSION: struct {
+        lhs: *Node,
+        rhs: *Node,
+    },
+    PRIMARY_EXPRESSION: union(enum) {
         NUMBER: struct {
             value: i64,
         },
         IDENTIFIER: struct {
             name: []const u8,
         },
-        BINARY: struct {
-            //TODO: For now, this just represents sum
-            lhs: *Node,
-            rhs: *Node,
+        FUNCTION_CALL: struct {
+            name: []const u8,
         },
     },
+    FUNCTION_DEFINITION: struct {
+        statements: []*Node,
+    },
+    RETURN_STATEMENT: struct {
+        expression: *Node,
+    },
 };
 
 pub const Parser = struct {
@@ -54,12 +74,15 @@ pub const Parser = struct {
 
     allocator: std.mem.Allocator,
 
-    pub fn init(tokens: []tokenizer.Token, allocator: std.mem.Allocator) ParserError!*Parser {
-        const parser = try allocator.create(Parser);
+    try_context: bool, //TODO: I dont like this
+
+    pub fn init(tokens: []tokenizer.Token, arena_allocator: *std.heap.ArenaAllocator) ParserError!*Parser {
+        const parser = try arena_allocator.allocator().create(Parser);
         parser.* = .{
             .tokens = tokens,
             .offset = 0,
-            .allocator = allocator,
+            .allocator = arena_allocator.allocator(),
+            .try_context = false,
         };
         return parser;
     }
@@ -71,7 +94,6 @@ pub const Parser = struct {
     // Program ::= Statement+
     fn parse_program(self: *Parser) !*Node {
         var nodes = std.ArrayList(*Node).init(self.allocator);
-        defer nodes.deinit();
         while (self.offset < self.tokens.len) {
             try nodes.append(@constCast(try self.parse_statement()));
         }
@@ -81,29 +103,30 @@ pub const Parser = struct {
         } });
     }
 
-    // Statement ::= (VariableStatement | PrintStatement) SEMICOLON
+    // Statement ::= (AssignmentStatement | PrintStatement | FunctionCallStatement) SEMICOLON
     fn parse_statement(self: *Parser) ParserError!*Node {
-        errdefer std.debug.print("Error parsing statement\n", .{});
-        const token = self.peek_token() orelse return ParserError.ParsingError;
-
-        const statement = switch (token) {
-            .PRINT => try self.parse_print_statement(),
-            .RETURN => try self.parse_return_statement(),
-            else => try self.parse_variable_statement(),
-        };
-
+        errdefer if (!self.try_context) std.debug.print("Error parsing statement\n", .{});
+
+        var statement: ?*Node = undefined;
+        if (self.accept_parse(parse_print_statement)) |stmt| {
+            statement = stmt;
+        } else if (self.accept_parse(parse_function_call_statement)) |stmt| {
+            statement = stmt;
+        } else {
+            statement = try self.parse_assignment_statement();
+        }
         _ = try self.accept_token(tokenizer.TokenType.SEMICOLON);
 
         return self.create_node(.{
             .STATEMENT = .{
-                .statement = statement,
+                .statement = statement.?,
             },
         });
     }
 
-    // VariableStatement ::= ("let" IDENTIFIER | IDENTIFIER) EQUALS Expression
-    fn parse_variable_statement(self: *Parser) ParserError!*Node {
-        errdefer std.debug.print("Error parsing variable statement\n", .{});
+    // AssignmentStatement ::= "let" IDENTIFIER EQUALS Expression
+    fn parse_assignment_statement(self: *Parser) ParserError!*Node {
+        errdefer if (!self.try_context) std.debug.print("Error parsing assignment statement\n", .{});
 
         var is_declaration: bool = false;
         if (self.match_token(.LET)) {
@@ -117,7 +140,7 @@ pub const Parser = struct {
         const expression = try self.parse_expression();
 
         return self.create_node(.{
-            .VARIABLE_STATEMENT = .{
+            .ASSIGNMENT_STATEMENT = .{
                 .is_declaration = is_declaration,
                 .name = try self.allocator.dupe(u8, identifier.IDENTIFIER),
                 .expression = @constCast(expression),
@@ -127,7 +150,7 @@ pub const Parser = struct {
 
     // PrintStatement :== PRINT LPAREN Expression RPAREN
     fn parse_print_statement(self: *Parser) ParserError!*Node {
-        errdefer std.debug.print("Error parsing print statement\n", .{});
+        errdefer if (!self.try_context) std.debug.print("Error parsing print statement\n", .{});
         _ = try self.accept_token(tokenizer.TokenType.PRINT);
 
         _ = try self.accept_token(tokenizer.TokenType.LPAREN);
@@ -143,60 +166,133 @@ pub const Parser = struct {
         });
     }
 
-    // ReturnStatement :== RETURN Expression
-    fn parse_return_statement(self: *Parser) ParserError!*Node {
-        errdefer std.debug.print("Error parsing return statement\n", .{});
-        _ = try self.accept_token(tokenizer.TokenType.RETURN);
+    // FunctionCallStatement ::= IDENTIFIER LPAREN RPAREN
+    fn parse_function_call_statement(self: *Parser) ParserError!*Node {
+        errdefer if (!self.try_context) std.debug.print("Error parsing function call statement\n", .{});
 
-        const expression = try self.parse_expression();
+        const identifier = try self.accept_token(tokenizer.TokenType.IDENTIFIER);
 
-        return self.create_node(.{
-            .RETURN_STATEMENT = .{
-                .expression = @constCast(expression),
-            },
-        });
+        _ = try self.accept_token(tokenizer.TokenType.LPAREN);
+        _ = try self.accept_token(tokenizer.TokenType.RPAREN);
+
+        return self.create_node(.{ .FUNCTION_CALL_STATEMENT = .{ .name = try self.allocator.dupe(u8, identifier.IDENTIFIER) } });
     }
 
-    // Expression :== NUMBER | IDENTIFIER | Expression + Expression
+    // Expression   ::= AdditiveExpression | FunctionDefinition
     fn parse_expression(self: *Parser) ParserError!*Node {
-        errdefer std.debug.print("Error parsing expression\n", .{});
+        errdefer if (!self.try_context) std.debug.print("Error parsing expression\n", .{});
+
+        if (self.accept_parse(parse_additive_expression)) |expression| {
+            return expression;
+        } else if (self.accept_parse(parse_function_definition)) |expression| {
+            return expression;
+        }
+
+        return ParserError.ParsingError;
+    }
+
+    // AdditiveExpression ::= PrimaryExpression ("+" AdditiveExpression)
+    fn parse_additive_expression(self: *Parser) ParserError!*Node {
+        errdefer if (!self.try_context) std.debug.print("Error parsing additive expression\n", .{});
+
+        const lhs = try self.parse_primary_expression();
+
+        if (self.match_token(tokenizer.TokenType.PLUS)) {
+            const rhs = try self.parse_additive_expression();
+            return self.create_node(.{ .ADDITIVE_EXPRESSION = .{
+                .lhs = lhs,
+                .rhs = rhs,
+            } });
+        }
+
+        return lhs;
+    }
+
+    // PrimaryExpression ::= NUMBER | IDENTIFIER | FunctionCallStatement
+    fn parse_primary_expression(self: *Parser) ParserError!*Node {
+        errdefer if (!self.try_context) std.debug.print("Error parsing primary expression\n", .{});
+
         const token = self.consume_token() orelse return ParserError.ParsingError;
 
-        const lhs = try switch (token) {
-            .NUMBER => |number_token| self.create_node(.{
-                .EXPRESSION = .{
+        if (self.accept_parse(parse_function_call_statement)) |stmt| return stmt;
+
+        return switch (token) {
+            .NUMBER => |number_token| try self.create_node(.{
+                .PRIMARY_EXPRESSION = .{
                     .NUMBER = .{
                         .value = number_token,
                     },
                 },
             }),
-            .IDENTIFIER => |identifier_token| self.create_node(.{
-                .EXPRESSION = .{
+            .IDENTIFIER => |identifier_token| try self.create_node(.{
+                .PRIMARY_EXPRESSION = .{
                     .IDENTIFIER = .{
                         .name = try self.allocator.dupe(u8, identifier_token),
                     },
                 },
             }),
-            else => unreachable,
+            else => ParserError.ParsingError,
         };
+    }
 
-        if (self.match_token(tokenizer.TokenType.PLUS)) {
-            const rhs = try self.parse_expression();
+    // FunctionDefinition ::= ARROW LBRACE Statement* ReturnStatement RBRACE
+    fn parse_function_definition(self: *Parser) ParserError!*Node {
+        errdefer if (!self.try_context) std.debug.print("Error parsing function definition\n", .{});
 
-            return self.create_node(.{ .EXPRESSION = .{ .BINARY = .{
-                .lhs = lhs,
-                .rhs = rhs,
-            } } });
+        _ = try self.accept_token(tokenizer.TokenType.LPAREN);
+        _ = try self.accept_token(tokenizer.TokenType.RPAREN);
+        _ = try self.accept_token(tokenizer.TokenType.ARROW);
+        _ = try self.accept_token(tokenizer.TokenType.LBRACE);
+
+        var nodes = std.ArrayList(*Node).init(self.allocator);
+        while (self.accept_parse(parse_statement)) |expression| {
+            try nodes.append(expression);
         }
 
-        return lhs;
+        try nodes.append(try self.parse_return_statement());
+
+        _ = try self.accept_token(tokenizer.TokenType.RBRACE);
+
+        return self.create_node(.{ .FUNCTION_DEFINITION = .{
+            .statements = nodes.items,
+        } });
+    }
+
+    // ReturnStatement :== RETURN Expression
+    fn parse_return_statement(self: *Parser) ParserError!*Node {
+        errdefer if (!self.try_context) std.debug.print("Error parsing return statement\n", .{});
+        _ = try self.accept_token(tokenizer.TokenType.RETURN);
+
+        const expression = try self.parse_expression();
+
+        _ = try self.accept_token(tokenizer.TokenType.SEMICOLON); //TODO: I dont like this
+
+        return self.create_node(.{
+            .RETURN_STATEMENT = .{
+                .expression = @constCast(expression),
+            },
+        });
+    }
+
+    fn accept_parse(self: *Parser, parsing_func: *const fn (_: *Parser) ParserError!*Node) ?*Node {
+        const prev_offset = self.offset;
+        self.try_context = true;
+        defer self.try_context = false;
+        const node = parsing_func(self) catch {
+            self.offset = prev_offset;
+            return null;
+        };
+        return node;
     }
 
     fn accept_token(self: *Parser, expected_token: tokenizer.TokenType) ParserError!tokenizer.Token {
-        errdefer std.debug.print("Error accepting token: {any}\n", .{expected_token});
+        errdefer if (!self.try_context) std.debug.print("Error accepting token: {any}\n", .{expected_token});
         const token = self.peek_token() orelse return ParserError.ParsingError;
 
-        if (token != expected_token) return ParserError.ParsingError;
+        if (token != expected_token) {
+            if (!self.try_context) std.debug.print("Expected {any} - found {any}\n", .{ expected_token, token });
+            return ParserError.ParsingError;
+        }
 
         return self.consume_token() orelse unreachable;
     }
@@ -296,7 +392,7 @@ test "simple e2e" {
     defer arena.deinit();
     var parser = try Parser.init(tokens, arena.allocator());
     const ast = try parser.parse();
-    const expected_ast = Node{ .PROGRAM = .{ .statements = @constCast(&[_]*Node{@constCast(&Node{ .STATEMENT = .{ .statement = @constCast(&Node{ .VARIABLE_STATEMENT = .{
+    const expected_ast = Node{ .PROGRAM = .{ .statements = @constCast(&[_]*Node{@constCast(&Node{ .STATEMENT = .{ .statement = @constCast(&Node{ .ASSIGNMENT_STATEMENT = .{
         .is_declaration = true,
         .name = @constCast("i"),
         .expression = @constCast(&Node{ .EXPRESSION = .{