summary refs log tree commit diff
path: root/src/tokenizer.zig
diff options
context:
space:
mode:
authorBaitinq <manuelpalenzuelamerino@gmail.com>2025-01-22 23:42:57 +0100
committerBaitinq <manuelpalenzuelamerino@gmail.com>2025-01-22 23:42:57 +0100
commitb53e0f2add02cde1a1618332db89431101902c2f (patch)
tree05e8b362ba9f44916e2cc46fc926e7aa0adb67ef /src/tokenizer.zig
parentFeature: Add support for negation (diff)
downloadinterpreter-b53e0f2add02cde1a1618332db89431101902c2f.tar.gz
interpreter-b53e0f2add02cde1a1618332db89431101902c2f.tar.bz2
interpreter-b53e0f2add02cde1a1618332db89431101902c2f.zip
Tokenizer: Improve Token type
Diffstat (limited to 'src/tokenizer.zig')
-rw-r--r--src/tokenizer.zig101
1 files changed, 47 insertions, 54 deletions
diff --git a/src/tokenizer.zig b/src/tokenizer.zig
index e125110..e377e31 100644
--- a/src/tokenizer.zig
+++ b/src/tokenizer.zig
@@ -4,53 +4,30 @@ const TokenizerError = error{
     TokenizingError,
 };
 
-pub const TokenType = enum {
+pub const TokenType = union(enum) {
     // Keywords
-    LET,
-    IF,
-    WHILE,
-    RETURN,
-    ARROW,
-
-    // Identifiers
-    IDENTIFIER,
-
-    // Literals
-    NUMBER,
-    BOOLEAN,
-
-    // Operators
-    EQUALS,
-    PLUS,
-    MINUS,
-    MUL,
-    DIV,
-    BANG,
-
-    // Punctuation
-    SEMICOLON,
-    COMMA,
-    LPAREN,
-    RPAREN,
-    LBRACE,
-    RBRACE,
-};
-
-pub const Token = union(TokenType) {
     LET: void,
     IF: void,
     WHILE: void,
     RETURN: void,
     ARROW: void,
+
+    // Identifiers
     IDENTIFIER: []u8,
+
+    // Literals
     NUMBER: i64,
     BOOLEAN: bool,
+
+    // Operators
     EQUALS: void,
     PLUS: void,
     MINUS: void,
     MUL: void,
     DIV: void,
     BANG: void,
+
+    // Punctuation
     SEMICOLON: void,
     COMMA: void,
     LPAREN: void,
@@ -59,6 +36,14 @@ pub const Token = union(TokenType) {
     RBRACE: void,
 };
 
+pub const Token = struct {
+    //TODO: Add source code info
+    col: u64,
+    row: u64,
+
+    type: TokenType,
+};
+
 pub const Tokenizer = struct {
     buf: []u8,
     offset: u64,
@@ -77,33 +62,33 @@ pub const Tokenizer = struct {
 
         const c = self.buf[self.offset];
 
-        if (self.accept_substr("let")) return Token{ .LET = void{} };
-        if (self.accept_substr("if")) return Token{ .IF = void{} };
-        if (self.accept_substr("while")) return Token{ .WHILE = void{} };
-        if (self.accept_substr("return")) return Token{ .RETURN = void{} };
-        if (self.accept_substr("true")) return Token{ .BOOLEAN = true };
-        if (self.accept_substr("false")) return Token{ .BOOLEAN = false };
-
-        if (self.accept_substr("=>")) return Token{ .ARROW = void{} };
-        if (c == ';') return Token{ .SEMICOLON = void{} };
-        if (c == ',') return Token{ .COMMA = void{} };
-        if (c == '(') return Token{ .LPAREN = void{} };
-        if (c == ')') return Token{ .RPAREN = void{} };
-        if (c == '{') return Token{ .LBRACE = void{} };
-        if (c == '}') return Token{ .RBRACE = void{} };
-        if (c == '=') return Token{ .EQUALS = void{} };
-        if (c == '+') return Token{ .PLUS = void{} };
-        if (c == '-') return Token{ .MINUS = void{} };
-        if (c == '*') return Token{ .MUL = void{} };
-        if (c == '/') return Token{ .DIV = void{} };
-        if (c == '!') return Token{ .BANG = void{} };
+        if (self.accept_substr("let")) return self.create_token(.{ .LET = void{} });
+        if (self.accept_substr("if")) return self.create_token(.{ .IF = void{} });
+        if (self.accept_substr("while")) return self.create_token(.{ .WHILE = void{} });
+        if (self.accept_substr("return")) return self.create_token(.{ .RETURN = void{} });
+        if (self.accept_substr("true")) return self.create_token(.{ .BOOLEAN = true });
+        if (self.accept_substr("false")) return self.create_token(.{ .BOOLEAN = false });
+
+        if (self.accept_substr("=>")) return self.create_token(.{ .ARROW = void{} });
+        if (c == ';') return self.create_token(.{ .SEMICOLON = void{} });
+        if (c == ',') return self.create_token(.{ .COMMA = void{} });
+        if (c == '(') return self.create_token(.{ .LPAREN = void{} });
+        if (c == ')') return self.create_token(.{ .RPAREN = void{} });
+        if (c == '{') return self.create_token(.{ .LBRACE = void{} });
+        if (c == '}') return self.create_token(.{ .RBRACE = void{} });
+        if (c == '=') return self.create_token(.{ .EQUALS = void{} });
+        if (c == '+') return self.create_token(.{ .PLUS = void{} });
+        if (c == '-') return self.create_token(.{ .MINUS = void{} });
+        if (c == '*') return self.create_token(.{ .MUL = void{} });
+        if (c == '/') return self.create_token(.{ .DIV = void{} });
+        if (c == '!') return self.create_token(.{ .BANG = void{} });
 
         const string = self.consume_string();
         if (string.len == 0) return TokenizerError.TokenizingError;
 
-        if (std.fmt.parseInt(i32, string, 10) catch null) |i| return Token{ .NUMBER = i };
+        if (std.fmt.parseInt(i32, string, 10) catch null) |i| return self.create_token(.{ .NUMBER = i });
 
-        return Token{ .IDENTIFIER = string };
+        return self.create_token(.{ .IDENTIFIER = string });
     }
 
     fn skip_comments(self: *Tokenizer) void {
@@ -145,6 +130,14 @@ pub const Tokenizer = struct {
         }
         return false;
     }
+
+    fn create_token(self: *Tokenizer, token_type: TokenType) Token {
+        return Token{
+            .col = self.offset,
+            .row = self.offset,
+            .type = token_type,
+        };
+    }
 };
 
 test "simple" {