about summary refs log tree commit diff
path: root/src/tokenizer.zig
diff options
context:
space:
mode:
authorBaitinq <[email protected]>2025-04-06 16:17:22 +0200
committerBaitinq <[email protected]>2025-04-06 16:17:22 +0200
commitbd42f4899d09c8f6317ae1af1747ae5b6ac81650 (patch)
treeed21afbbcca7f0be0751e4bf7104bf7cdc9f36e6 /src/tokenizer.zig
parentCodegen: Fix GEP type (diff)
downloadinterpreter-bd42f4899d09c8f6317ae1af1747ae5b6ac81650.tar.gz
interpreter-bd42f4899d09c8f6317ae1af1747ae5b6ac81650.tar.bz2
interpreter-bd42f4899d09c8f6317ae1af1747ae5b6ac81650.zip
Feature: Add char type and support underlying pointer values
Diffstat (limited to 'src/tokenizer.zig')
-rw-r--r--src/tokenizer.zig38
1 files changed, 38 insertions, 0 deletions
diff --git a/src/tokenizer.zig b/src/tokenizer.zig
index 1c88bf2..2b57b8d 100644
--- a/src/tokenizer.zig
+++ b/src/tokenizer.zig
@@ -19,6 +19,7 @@ pub const TokenType = union(enum) {
     // Literals
     NUMBER: i64,
     BOOLEAN: bool,
+    CHAR: u8,
     STRING: []u8,
 
     // Operators
@@ -97,6 +98,7 @@ pub const Tokenizer = struct {
         if (self.accept_string(">")) return self.create_token(.{ .GREATER = void{} });
 
         if (self.accept_int_type()) |i| return self.create_token(.{ .NUMBER = i });
+        if (self.accept_char_type()) |c| return self.create_token(.{ .CHAR = c });
         if (self.accept_string_type()) |s| return self.create_token(.{ .STRING = s });
 
         const string = self.consume_until_condition(struct {
@@ -158,6 +160,42 @@ pub const Tokenizer = struct {
         return std.fmt.parseInt(i64, res, 10) catch null;
     }
 
+    fn accept_char_type(self: *Tokenizer) ?u8 {
+        const prev_offset = self.offset;
+        if (!self.accept_string("'")) {
+            self.offset = prev_offset;
+            return null;
+        }
+
+        const string = self.consume_until_condition(struct {
+            fn condition(c: u8) bool {
+                return c == '\'';
+            }
+        }.condition);
+
+        var res: u8 = string[0];
+        var i: usize = 0;
+        while (i < string.len) : (i += 1) {
+            if (string[i] == '\\') {
+                i += 1;
+                res = switch (string[i]) {
+                    'n' => '\n',
+                    't' => '\t',
+                    '0' => 0,
+                    else => unreachable,
+                };
+                break;
+            }
+        }
+
+        if (!self.accept_string("'")) {
+            self.offset = prev_offset;
+            return null;
+        }
+
+        return res;
+    }
+
     fn accept_string_type(self: *Tokenizer) ?[]u8 {
         const prev_offset = self.offset;
         if (!self.accept_string("\"")) {