diff options
| author | Baitinq <[email protected]> | 2025-05-17 23:07:17 +0200 |
|---|---|---|
| committer | Baitinq <[email protected]> | 2025-05-17 23:07:17 +0200 |
| commit | c664f315c62e86be8458488e60d91d13281920ff (patch) | |
| tree | 563ec23f2da637b41de6a9968ec43e93bcd6bc56 /src | |
| parent | Codegen: Fix bug with if generation in nested functions (diff) | |
| download | interpreter-c664f315c62e86be8458488e60d91d13281920ff.tar.gz interpreter-c664f315c62e86be8458488e60d91d13281920ff.tar.bz2 interpreter-c664f315c62e86be8458488e60d91d13281920ff.zip | |
Tokenizer: Cleanup consuming logic
Diffstat (limited to 'src')
| -rw-r--r-- | src/bootstrap/tokenizer.src | 76 | ||||
| -rw-r--r-- | src/tokenizer.zig | 58 |
2 files changed, 60 insertions, 74 deletions
diff --git a/src/bootstrap/tokenizer.src b/src/bootstrap/tokenizer.src index f097a9d..dee4a40 100644 --- a/src/bootstrap/tokenizer.src +++ b/src/bootstrap/tokenizer.src @@ -104,14 +104,50 @@ let tokenizer_consume_until_condition = (condition: (i8) => bool) => *i8 { }; let c = (*(buf + offset)); - if condition(c) { - return res; - }; + + if c == '\\' { + let next_c = (*(buf + (offset + 1))); - (*(res + (offset - start))) = c; - (*(res + (offset - start + 1))) = '\0'; + let any = false; + if next_c == 'n' { + (*(res + (offset - start))) = '\n'; + any = true; + }; + if next_c == 't' { + (*(res + (offset - start))) = '\t'; + any = true; + }; + if next_c == 'r' { + (*(res + (offset - start))) = '\r'; + any = true; + }; + if next_c == '0' { + (*(res + (offset - start))) = '\0'; + any = true; + }; + if next_c == '\\' { + (*(res + (offset - start))) = '\\'; + any = true; + }; + if !any { + (*(res + (offset - start))) = next_c; + }; + + offset = offset + 1; + offset = offset + 1; + }; - offset = offset + 1; + /* else / continue */ + if !(c == '\\') { + if condition(c) { + return res; + }; + + (*(res + (offset - start))) = c; + (*(res + (offset - start + 1))) = '\0'; + + offset = offset + 1; + }; }; return null; @@ -143,34 +179,6 @@ let tokenizer_accept_char_type = () => *i8 { return c == '\''; }); - /*let string_len = strlen(string); - let i = 0; - - while i < string_len { - let c = (*(string + i)); - if c == '\' { - i = i + 1; - let nc = (*(string + i)); - let res = malloc(1); - if nc == 'n' { - *res = '\n'; - }; - if nc == 't' { - *res = '\t'; - }; - if nc == 'r' { - *res = '\r'; - }; - if nc == '0' { - *res = '\0'; - }; - unreachable - return res; - }; - i = i + 1; - }; - */ - if !tokenizer_accept_string("'") { offset = prev_offset; return null; diff --git a/src/tokenizer.zig b/src/tokenizer.zig index c9345fe..4ae6316 100644 --- a/src/tokenizer.zig +++ b/src/tokenizer.zig @@ -148,21 +148,29 @@ pub const Tokenizer = struct { fn consume_until_condition(self: *Tokenizer, condition: fn (c: u8) bool) []u8 { var res = std.ArrayList(u8).init(self.arena); - var prev_c: ?u8 = null; while (true) : (self.offset += 1) { if (self.offset >= self.buf.len) { return res.items; } const c = self.buf[self.offset]; - defer prev_c = c; + + if (c == '\\') { + const next_c = self.buf[self.offset + 1]; + res.append(switch (next_c) { + 'n' => '\n', + 't' => '\t', + 'r' => '\r', + '0' => 0, + '\\' => '\\', + else => |x| x, + }) catch unreachable; + self.offset += 1; + continue; + } if (condition(c)) { - if (prev_c == null or prev_c.? != '\\') { - return res.items; - } else { - _ = res.pop(); - } + return res.items; } res.append(c) catch unreachable; @@ -202,28 +210,14 @@ pub const Tokenizer = struct { } }.condition); - var res: u8 = string[0]; - var i: usize = 0; - while (i < string.len) : (i += 1) { - if (string[i] == '\\') { - i += 1; - res = switch (string[i]) { - 'n' => '\n', - 't' => '\t', - 'r' => '\r', - '0' => 0, - else => unreachable, - }; - break; - } - } + std.debug.assert(string.len == 1); if (!self.accept_string("'")) { self.offset = prev_offset; return null; } - return res; + return string[0]; } fn accept_string_type(self: *Tokenizer) ?[]u8 { @@ -239,28 +233,12 @@ pub const Tokenizer = struct { } }.condition); - var res = std.ArrayList(u8).init(self.arena); - - var i: usize = 0; - while (i < string.len) : (i += 1) { - if (string[i] == '\\') { - i += 1; - switch (string[i]) { - 'n' => res.append('\n') catch unreachable, - 't' => res.append('\t') catch unreachable, - else => unreachable, - } - continue; - } - res.append(string[i]) catch unreachable; - } - if (!self.accept_string("\"")) { self.offset = prev_offset; return null; } - return res.items; + return string; } fn create_token(self: *Tokenizer, token_type: TokenType) Token { |