diff options
author | Baitinq <manuelpalenzuelamerino@gmail.com> | 2025-01-22 23:42:57 +0100 |
---|---|---|
committer | Baitinq <manuelpalenzuelamerino@gmail.com> | 2025-01-22 23:42:57 +0100 |
commit | b53e0f2add02cde1a1618332db89431101902c2f (patch) | |
tree | 05e8b362ba9f44916e2cc46fc926e7aa0adb67ef | |
parent | Feature: Add support for negation (diff) | |
download | interpreter-b53e0f2add02cde1a1618332db89431101902c2f.tar.gz interpreter-b53e0f2add02cde1a1618332db89431101902c2f.tar.bz2 interpreter-b53e0f2add02cde1a1618332db89431101902c2f.zip |
Tokenizer: Improve Token type
-rw-r--r-- | src/parser.zig | 34 | ||||
-rw-r--r-- | src/tokenizer.zig | 101 |
2 files changed, 56 insertions, 79 deletions
diff --git a/src/parser.zig b/src/parser.zig index b407c42..a13f1cc 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -6,23 +6,7 @@ const ParserError = error{ OutOfMemory, }; -const NodeType = enum { - PROGRAM, - STATEMENT, - ASSIGNMENT_STATEMENT, - FUNCTION_CALL_STATEMENT, - IF_STATEMENT, - WHILE_STATEMENT, - EQUALITY_EXPRESSION, - ADDITIVE_EXPRESSION, - MULTIPLICATIVE_EXPRESSION, - UNARY_EXPRESSION, - PRIMARY_EXPRESSION, - FUNCTION_DEFINITION, - RETURN_STATEMENT, -}; - -pub const Node = union(NodeType) { +pub const Node = union(enum) { PROGRAM: struct { statements: []*Node, }, @@ -159,7 +143,7 @@ pub const Parser = struct { return self.create_node(.{ .ASSIGNMENT_STATEMENT = .{ .is_declaration = is_declaration, - .name = try self.allocator.dupe(u8, identifier.IDENTIFIER), + .name = try self.allocator.dupe(u8, identifier.type.IDENTIFIER), .expression = @constCast(expression), }, }); @@ -178,7 +162,7 @@ pub const Parser = struct { _ = try self.parse_token(tokenizer.TokenType.RPAREN); return self.create_node(.{ .FUNCTION_CALL_STATEMENT = .{ - .name = try self.allocator.dupe(u8, identifier.IDENTIFIER), + .name = try self.allocator.dupe(u8, identifier.type.IDENTIFIER), .arguments = arguments, } }); } @@ -353,7 +337,7 @@ pub const Parser = struct { const token = self.consume_token() orelse return ParserError.ParsingError; - return switch (token) { + return switch (token.type) { .NUMBER => |number_token| try self.create_node(.{ .PRIMARY_EXPRESSION = .{ .NUMBER = .{ @@ -421,7 +405,7 @@ pub const Parser = struct { try node_list.append(try self.create_node(.{ .PRIMARY_EXPRESSION = .{ .IDENTIFIER = .{ - .name = try self.allocator.dupe(u8, ident.IDENTIFIER), + .name = try self.allocator.dupe(u8, ident.type.IDENTIFIER), }, }, })); @@ -444,11 +428,11 @@ pub const Parser = struct { }); } - fn parse_token(self: *Parser, expected_token: tokenizer.TokenType) ParserError!tokenizer.Token { + fn parse_token(self: *Parser, expected_token: std.meta.Tag(tokenizer.TokenType)) ParserError!tokenizer.Token { errdefer if (!self.try_context) std.debug.print("Error accepting token: {any}\n", .{expected_token}); const token = self.peek_token() orelse return ParserError.ParsingError; - if (token != expected_token) { + if (expected_token != std.meta.activeTag(token.type)) { if (!self.try_context) std.debug.print("Expected {any} - found {any}\n", .{ expected_token, token }); return ParserError.ParsingError; } @@ -469,9 +453,9 @@ pub const Parser = struct { return node; } - fn accept_token(self: *Parser, token: tokenizer.TokenType) ?tokenizer.Token { + fn accept_token(self: *Parser, token_type: std.meta.Tag(tokenizer.TokenType)) ?tokenizer.Token { const curr_token = self.peek_token() orelse return null; - if (curr_token == token) { + if (std.meta.activeTag(curr_token.type) == token_type) { return self.consume_token(); } return null; diff --git a/src/tokenizer.zig b/src/tokenizer.zig index e125110..e377e31 100644 --- a/src/tokenizer.zig +++ b/src/tokenizer.zig @@ -4,53 +4,30 @@ const TokenizerError = error{ TokenizingError, }; -pub const TokenType = enum { +pub const TokenType = union(enum) { // Keywords - LET, - IF, - WHILE, - RETURN, - ARROW, - - // Identifiers - IDENTIFIER, - - // Literals - NUMBER, - BOOLEAN, - - // Operators - EQUALS, - PLUS, - MINUS, - MUL, - DIV, - BANG, - - // Punctuation - SEMICOLON, - COMMA, - LPAREN, - RPAREN, - LBRACE, - RBRACE, -}; - -pub const Token = union(TokenType) { LET: void, IF: void, WHILE: void, RETURN: void, ARROW: void, + + // Identifiers IDENTIFIER: []u8, + + // Literals NUMBER: i64, BOOLEAN: bool, + + // Operators EQUALS: void, PLUS: void, MINUS: void, MUL: void, DIV: void, BANG: void, + + // Punctuation SEMICOLON: void, COMMA: void, LPAREN: void, @@ -59,6 +36,14 @@ pub const Token = union(TokenType) { RBRACE: void, }; +pub const Token = struct { + //TODO: Add source code info + col: u64, + row: u64, + + type: TokenType, +}; + pub const Tokenizer = struct { buf: []u8, offset: u64, @@ -77,33 +62,33 @@ pub const Tokenizer = struct { const c = self.buf[self.offset]; - if (self.accept_substr("let")) return Token{ .LET = void{} }; - if (self.accept_substr("if")) return Token{ .IF = void{} }; - if (self.accept_substr("while")) return Token{ .WHILE = void{} }; - if (self.accept_substr("return")) return Token{ .RETURN = void{} }; - if (self.accept_substr("true")) return Token{ .BOOLEAN = true }; - if (self.accept_substr("false")) return Token{ .BOOLEAN = false }; - - if (self.accept_substr("=>")) return Token{ .ARROW = void{} }; - if (c == ';') return Token{ .SEMICOLON = void{} }; - if (c == ',') return Token{ .COMMA = void{} }; - if (c == '(') return Token{ .LPAREN = void{} }; - if (c == ')') return Token{ .RPAREN = void{} }; - if (c == '{') return Token{ .LBRACE = void{} }; - if (c == '}') return Token{ .RBRACE = void{} }; - if (c == '=') return Token{ .EQUALS = void{} }; - if (c == '+') return Token{ .PLUS = void{} }; - if (c == '-') return Token{ .MINUS = void{} }; - if (c == '*') return Token{ .MUL = void{} }; - if (c == '/') return Token{ .DIV = void{} }; - if (c == '!') return Token{ .BANG = void{} }; + if (self.accept_substr("let")) return self.create_token(.{ .LET = void{} }); + if (self.accept_substr("if")) return self.create_token(.{ .IF = void{} }); + if (self.accept_substr("while")) return self.create_token(.{ .WHILE = void{} }); + if (self.accept_substr("return")) return self.create_token(.{ .RETURN = void{} }); + if (self.accept_substr("true")) return self.create_token(.{ .BOOLEAN = true }); + if (self.accept_substr("false")) return self.create_token(.{ .BOOLEAN = false }); + + if (self.accept_substr("=>")) return self.create_token(.{ .ARROW = void{} }); + if (c == ';') return self.create_token(.{ .SEMICOLON = void{} }); + if (c == ',') return self.create_token(.{ .COMMA = void{} }); + if (c == '(') return self.create_token(.{ .LPAREN = void{} }); + if (c == ')') return self.create_token(.{ .RPAREN = void{} }); + if (c == '{') return self.create_token(.{ .LBRACE = void{} }); + if (c == '}') return self.create_token(.{ .RBRACE = void{} }); + if (c == '=') return self.create_token(.{ .EQUALS = void{} }); + if (c == '+') return self.create_token(.{ .PLUS = void{} }); + if (c == '-') return self.create_token(.{ .MINUS = void{} }); + if (c == '*') return self.create_token(.{ .MUL = void{} }); + if (c == '/') return self.create_token(.{ .DIV = void{} }); + if (c == '!') return self.create_token(.{ .BANG = void{} }); const string = self.consume_string(); if (string.len == 0) return TokenizerError.TokenizingError; - if (std.fmt.parseInt(i32, string, 10) catch null) |i| return Token{ .NUMBER = i }; + if (std.fmt.parseInt(i32, string, 10) catch null) |i| return self.create_token(.{ .NUMBER = i }); - return Token{ .IDENTIFIER = string }; + return self.create_token(.{ .IDENTIFIER = string }); } fn skip_comments(self: *Tokenizer) void { @@ -145,6 +130,14 @@ pub const Tokenizer = struct { } return false; } + + fn create_token(self: *Tokenizer, token_type: TokenType) Token { + return Token{ + .col = self.offset, + .row = self.offset, + .type = token_type, + }; + } }; test "simple" { |