summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--src/main.zig4
-rw-r--r--src/tokenizer.zig56
2 files changed, 49 insertions, 11 deletions
diff --git a/src/main.zig b/src/main.zig
index 41c33fa..3be7b14 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -21,7 +21,9 @@ pub fn main() !void {
     std.debug.print("Buf:\n{s}\n", .{buf});
 
     var sourceTokenizer = try tokenizer.Tokenizer.init(buf);
-    std.debug.print("Next: {any}\n", .{sourceTokenizer.next()});
+    while (sourceTokenizer.next()) |token| {
+        std.debug.print("{any}\n", .{token});
+    }
 }
 
 test {
diff --git a/src/tokenizer.zig b/src/tokenizer.zig
index f2b013e..e7683e9 100644
--- a/src/tokenizer.zig
+++ b/src/tokenizer.zig
@@ -8,11 +8,48 @@ pub const Tokenizer = struct {
         return Tokenizer{ .buf = buf, .offset = 0 };
     }
 
-    pub fn next(_: *Tokenizer) ?Token {
-        return null;
-        // return Token{
-        //     .LET = void{},
-        // };
+    pub fn next(self: *Tokenizer) ?Token {
+        defer self.offset += 1;
+        self.skip_whitespace();
+
+        if (self.offset >= self.buf.len) return null;
+
+        const c = self.buf[self.offset];
+
+        if (c == ';') return Token{ .SEMICOLON = void{} };
+        if (c == '(') return Token{ .LPAREN = void{} };
+        if (c == ')') return Token{ .RPAREN = void{} };
+        if (c == '=') return Token{ .EQUALS = void{} };
+
+        const string = self.consume_string();
+        self.offset -= 1;
+
+        if (std.mem.eql(u8, string, "let")) return Token{ .LET = void{} };
+        if (std.mem.eql(u8, string, "print")) return Token{ .PRINT = void{} };
+
+        if (std.fmt.parseInt(i32, string, 10) catch null) |i| return Token{ .NUMBER = i };
+
+        return Token{ .IDENTIFIER = string };
+    }
+
+    fn skip_whitespace(self: *Tokenizer) void {
+        while (true) {
+            if (self.offset >= self.buf.len) return;
+            const c = self.buf[self.offset];
+            if (!std.ascii.isWhitespace(c)) return;
+            self.offset += 1;
+        }
+    }
+
+    fn consume_string(self: *Tokenizer) []u8 {
+        const start = self.offset;
+        while (true) {
+            const c = self.buf[self.offset];
+
+            if (!std.ascii.isAlphanumeric(c)) return self.buf[start..self.offset];
+
+            self.offset += 1;
+        }
     }
 };
 
@@ -26,7 +63,6 @@ const TokenType = enum {
 
     // Literals
     NUMBER,
-    STRING,
 
     // Operators
     EQUALS,
@@ -42,7 +78,6 @@ pub const Token = union(TokenType) {
     PRINT: void,
     IDENTIFIER: []u8,
     NUMBER: i64,
-    STRING: []u8,
     EQUALS: void,
     SEMICOLON: void,
     LPAREN: void,
@@ -56,13 +91,14 @@ test "simple" {
         \\ print(i);
     ;
 
-    var tokenizer = try Tokenizer.init(@constCast(buf));
-
     var token_list = std.ArrayList(Token).init(std.testing.allocator);
+    defer token_list.deinit();
+
+    var tokenizer = try Tokenizer.init(@constCast(buf));
     while (tokenizer.next()) |token| {
         try token_list.append(token);
     }
-    try std.testing.expectEqualSlices(Token, &.{
+    try std.testing.expectEqualDeep(&.{
         Token{ .LET = void{} },
         Token{ .IDENTIFIER = @constCast("i") },
         Token{ .EQUALS = void{} },