about summary refs log tree commit diff
path: root/src/tokenizer.zig
diff options
context:
space:
mode:
authorBaitinq <[email protected]>2025-03-23 23:57:05 +0100
committerBaitinq <[email protected]>2025-03-24 00:17:07 +0100
commit31fa32743a5ed25724868cfa476e3af028adb118 (patch)
tree97afd19643223af3784baef5c2b57e48467dcce9 /src/tokenizer.zig
parentParser: Fix ambiguity with symbol declaration (diff)
downloadpry-lang-31fa32743a5ed25724868cfa476e3af028adb118.tar.gz
pry-lang-31fa32743a5ed25724868cfa476e3af028adb118.tar.bz2
pry-lang-31fa32743a5ed25724868cfa476e3af028adb118.zip
Feature: Add support for strings
Diffstat (limited to '')
-rw-r--r--src/tokenizer.zig47
1 files changed, 42 insertions, 5 deletions
diff --git a/src/tokenizer.zig b/src/tokenizer.zig
index 138ad69..b959738 100644
--- a/src/tokenizer.zig
+++ b/src/tokenizer.zig
@@ -19,6 +19,7 @@ pub const TokenType = union(enum) {
     // Literals
     NUMBER: i64,
     BOOLEAN: bool,
+    STRING: []u8,
 
     // Operators
     EQUALS: void,
@@ -93,10 +94,15 @@ pub const Tokenizer = struct {
         if (self.accept_string("<")) return self.create_token(.{ .LESS = void{} });
         if (self.accept_string(">")) return self.create_token(.{ .GREATER = void{} });
 
-        const string = self.consume_string();
-        if (string.len == 0) return TokenizerError.TokenizingError;
+        if (self.accept_int_type()) |i| return self.create_token(.{ .NUMBER = i });
+        if (self.accept_string_type()) |s| return self.create_token(.{ .STRING = s });
 
-        if (std.fmt.parseInt(i32, string, 10) catch null) |i| return self.create_token(.{ .NUMBER = i });
+        const string = self.consume_until_condition(struct {
+            fn condition(c: u8) bool {
+                return !std.ascii.isAlphanumeric(c) and c != '_';
+            }
+        }.condition);
+        if (string.len == 0) return TokenizerError.TokenizingError;
 
         return self.create_token(.{ .IDENTIFIER = string });
     }
@@ -118,7 +124,7 @@ pub const Tokenizer = struct {
         }
     }
 
-    fn consume_string(self: *Tokenizer) []u8 {
+    fn consume_until_condition(self: *Tokenizer, condition: fn (c: u8) bool) []u8 {
         defer self.offset = if (self.offset > 0) self.offset - 1 else self.offset;
         const start = self.offset;
         while (true) {
@@ -127,7 +133,7 @@ pub const Tokenizer = struct {
 
             const c = self.buf[self.offset];
 
-            if (!std.ascii.isAlphanumeric(c) and c != '_') return self.buf[start..self.offset];
+            if (condition(c)) return self.buf[start..self.offset];
         }
     }
 
@@ -140,6 +146,37 @@ pub const Tokenizer = struct {
         return false;
     }
 
+    fn accept_int_type(self: *Tokenizer) ?i64 {
+        const res = self.consume_until_condition(struct {
+            fn condition(c: u8) bool {
+                return !std.ascii.isDigit(c);
+            }
+        }.condition);
+
+        return std.fmt.parseInt(i64, res, 10) catch null;
+    }
+
+    fn accept_string_type(self: *Tokenizer) ?[]u8 {
+        const prev_offset = self.offset;
+        if (!self.accept_string("\"")) {
+            self.offset = prev_offset;
+            return null;
+        }
+
+        const res = self.consume_until_condition(struct {
+            fn condition(c: u8) bool {
+                return c == '"';
+            }
+        }.condition);
+
+        if (!self.accept_string("\"")) {
+            self.offset = prev_offset;
+            return null;
+        }
+
+        return res;
+    }
+
     fn create_token(self: *Tokenizer, token_type: TokenType) Token {
         return Token{
             .location = self.compute_location(),