diff options
| -rw-r--r-- | src/bootstrap/tokenizer.src | 108 |
1 files changed, 98 insertions, 10 deletions
diff --git a/src/bootstrap/tokenizer.src b/src/bootstrap/tokenizer.src index 3c420d5..faeac4d 100644 --- a/src/bootstrap/tokenizer.src +++ b/src/bootstrap/tokenizer.src @@ -85,7 +85,7 @@ let tokenizer_accept_string = (str: *i8) => bool { let s = malloc(1000); memcpy(s, buf + offset, str_len); - printf("Accept string: %s vs %s\n", str, s); + /* printf("Accept string: %s vs %s\n", str, s); */ if strcmp(s, str) { offset = offset + str_len; return true; @@ -99,18 +99,18 @@ let tokenizer_consume_until_condition = (condition: (i8) => bool) => *i8 { let res = malloc(1000); while true { - memcpy(res, buf + start, offset); - (*(res + (offset - start))) = '\0'; - if offset >= file_size { return res; }; let c = (*(buf + offset)); - /* TODO: calling condition breaks */ - if !isdigit(c) { + if condition(c) { return res; }; + + memcpy(res, buf + start, offset); + (*(res + (offset - start))) = '\0'; + offset = offset + 1; }; @@ -122,18 +122,90 @@ let isnt_digit = (c: i8) => bool { }; let tokenizer_accept_int_type = () => *i64 { - let res = tokenizer_consume_until_condition(isnt_digit); - if res == null { + let string = tokenizer_consume_until_condition(isnt_digit); + printf("INT STRING: %s\n", string); + if string == null { return null; }; - if strlen(res) == 0 { + if strlen(string) == 0 { return null; }; let x = malloc(8); - *x = atoi(res); + *x = atoi(string); + printf("Int: %d\n", *x); return x; }; +let is_backtick = (c: i8) => bool { + return c == '\''; +}; + +let tokenizer_accept_char_type = () => *i8 { + let prev_offset = offset; + if !tokenizer_accept_string("'") { + offset = prev_offset; + return null; + }; + + let string = tokenizer_consume_until_condition(is_backtick); + + /*let string_len = strlen(string); + let i = 0; + + while i < string_len { + let c = (*(string + i)); + if c == '\' { + i = i + 1; + let nc = (*(string + i)); + let res = malloc(1); + if nc == 'n' { + *res = '\n'; + }; + if nc == 't' { + *res = '\t'; + }; + if nc == 'r' { + *res = '\r'; + }; + if nc == '0' { + *res = '\0'; + }; + unreachable + return res; + }; + i = i + 1; + }; + */ + + if !tokenizer_accept_string("'") { + offset = prev_offset; + return null; + }; + + return string; +}; + +let is_quote = (c: i8) => bool { + return c == '"'; +}; + +let tokenizer_accept_string_type = () => *i8 { + let prev_offset = offset; + if !tokenizer_accept_string("\"") { + offset = prev_offset; + return null; + }; + + let string = tokenizer_consume_until_condition(is_quote); + + if !tokenizer_accept_string("\"") { + offset = prev_offset; + return null; + }; + + return string; +}; + let tokenizer_skip_comments = () => void { if !tokenizer_accept_string("/*") { return; }; @@ -241,6 +313,22 @@ let tokenizer_next = () => *i8 { return t; }; + let maybe_char = tokenizer_accept_char_type(); + if !(maybe_char == null) { + let t = malloc(1000); + sprintf(t, "char:%d", *maybe_char); + + return t; + }; + + let maybe_string = tokenizer_accept_string_type(); + if !(maybe_string == null) { + let t = malloc(1000); + sprintf(t, "string:%s", maybe_string); + + return t; + }; + let c = (*(buf + offset)); offset = offset + 1; |