From fa92a157746ae17f295d31b7a047dfeb99624a13 Mon Sep 17 00:00:00 2001 From: Baitinq Date: Wed, 11 Jun 2025 00:16:17 +0200 Subject: Misc: Rename lang --- src/bootstrap/tokenizer.src | 553 -------------------------------------------- 1 file changed, 553 deletions(-) delete mode 100644 src/bootstrap/tokenizer.src (limited to 'src/bootstrap/tokenizer.src') diff --git a/src/bootstrap/tokenizer.src b/src/bootstrap/tokenizer.src deleted file mode 100644 index 8d7f997..0000000 --- a/src/bootstrap/tokenizer.src +++ /dev/null @@ -1,553 +0,0 @@ -extern strlen = (*i8) => i64; -extern memcpy = (*void, *void, i64) => void; -extern sprintf = (*i8, *i8, varargs) => void; -extern atoi = (*i8) => i64; - -import "!stdlib.src"; -import "!mem.src"; - -/* Keywords */ -let TOKEN_IMPORT = 1; -let TOKEN_LET = 2; -let TOKEN_EXTERN = 3; -let TOKEN_IF = 4; -let TOKEN_WHILE = 5; -let TOKEN_RETURN = 6; -let TOKEN_BREAK = 7; -let TOKEN_CONTINUE = 8; -let TOKEN_ARROW = 9; -let TOKEN_STRUCT = 10; -let TOKEN_TYPE = 34; - -/* Identifiers */ -let TOKEN_IDENTIFIER = 11; - -/* Literals */ -let TOKEN_NUMBER = 12; -let TOKEN_BOOLEAN = 13; -let TOKEN_NULL = 14; -let TOKEN_CHAR = 15; -let TOKEN_STRING = 16; - -/* Operators */ -let TOKEN_EQUALS = 17; -let TOKEN_PLUS = 18; -let TOKEN_MINUS = 19; -let TOKEN_MUL = 20; -let TOKEN_DIV = 21; -let TOKEN_MOD = 22; -let TOKEN_BANG = 23; -let TOKEN_LESS = 24; -let TOKEN_GREATER = 25; -let TOKEN_DOT = 26; - -/* Punctuation */ -let TOKEN_SEMICOLON = 27; -let TOKEN_COMMA = 28; -let TOKEN_COLON = 29; -let TOKEN_LPAREN = 30; -let TOKEN_RPAREN = 31; -let TOKEN_LBRACE = 32; -let TOKEN_RBRACE = 33; - -let token = struct { - type: i64, - data: *void, -}; - -let tokenizer = struct { - buf: *i8, - buf_len: i64, - offset: i64, - - arena: *arena, -}; - -let print_tokens = (ts: *token, ts_len: i64) => i64 { - let i = 0; - while i < ts_len { - let to = (*(ts + cast(*token, i))); - - if (to.type == TOKEN_IMPORT) { - printf("Import\n"); - }; - if (to.type == TOKEN_LET) { - printf("Let\n"); - }; - if (to.type == TOKEN_EXTERN) { - printf("Extern\n"); - }; - if (to.type == TOKEN_IF) { - printf("If\n"); - }; - if (to.type == TOKEN_WHILE) { - printf("While\n"); - }; - if (to.type == TOKEN_RETURN) { - printf("Return\n"); - }; - if (to.type == TOKEN_BREAK) { - printf("Break\n"); - }; - if (to.type == TOKEN_CONTINUE) { - printf("Continue\n"); - }; - if (to.type == TOKEN_ARROW) { - printf("Arrow\n"); - }; - if (to.type == TOKEN_STRUCT) { - printf("Struct\n"); - }; - if (to.type == TOKEN_TYPE) { - printf("Type\n"); - }; - if (to.type == TOKEN_IDENTIFIER) { - printf("Identifier: %s\n", cast(*i8, to.data)); - }; - if (to.type == TOKEN_NUMBER) { - printf("Number: %d\n", *cast(*i64, to.data)); - }; - if (to.type == TOKEN_BOOLEAN) { - printf("Boolean: %d\n", *cast(*bool, to.data)); - }; - if (to.type == TOKEN_NULL) { - printf("Null\n"); - }; - if (to.type == TOKEN_CHAR) { - printf("Char: %c\n", *cast(*i8, to.data)); - }; - if (to.type == TOKEN_STRING) { - printf("String: %s\n", cast(*i8, to.data)); - }; - if (to.type == TOKEN_EQUALS) { - printf("Equals\n"); - }; - if (to.type == TOKEN_PLUS) { - printf("Plus\n"); - }; - if (to.type == TOKEN_MINUS) { - printf("Minus\n"); - }; - if (to.type == TOKEN_MUL) { - printf("Mul\n"); - }; - if (to.type == TOKEN_DIV) { - printf("Div\n"); - }; - if (to.type == TOKEN_MOD) { - printf("Mod\n"); - }; - if (to.type == TOKEN_BANG) { - printf("Bang\n"); - }; - if (to.type == TOKEN_LESS) { - printf("Less\n"); - }; - if (to.type == TOKEN_GREATER) { - printf("Greater\n"); - }; - if (to.type == TOKEN_DOT) { - printf("Dot\n"); - }; - if (to.type == TOKEN_SEMICOLON) { - printf("Semicolon\n"); - }; - if (to.type == TOKEN_COMMA) { - printf("Comma\n"); - }; - if (to.type == TOKEN_COLON) { - printf("Colon\n"); - }; - if (to.type == TOKEN_LPAREN) { - printf("LParen\n"); - }; - if (to.type == TOKEN_RPAREN) { - printf("RParen\n"); - }; - if (to.type == TOKEN_LBRACE) { - printf("LBrace\n"); - }; - if (to.type == TOKEN_RBRACE) { - printf("RBrace\n"); - }; - - i = i + 1; - }; - - return 0; -}; - -let tokenizer_skip_whitespace = (t: *tokenizer) => void { - while true { - if (*t).offset >= (*t).buf_len { return; }; - let c = (*((*t).buf + cast(*i8, (*t).offset))); - if !iswhitespace(c) { - return; - }; - (*t).offset = (*t).offset + 1; - }; - - return; -}; - -let tokenizer_accept_string = (t: *tokenizer, str: *i8) => bool { - let str_len = strlen(str); - if (*t).offset + str_len > (*t).buf_len { return false; }; - - let s = cast(*i8, arena_alloc((*t).arena, 1000)); - memcpy(cast(*void, s), cast(*void, (*t).buf + cast(*i8, (*t).offset)), str_len); - - if strcmp(s, str) { - (*t).offset = (*t).offset + str_len; - return true; - }; - - return false; -}; - -let tokenizer_consume_until_condition = (t: *tokenizer, condition: (i8) => bool) => *i8 { - let start = (*t).offset; - let res = cast(*i8, arena_alloc((*t).arena, 1000)); - - while true { - if (*t).offset >= (*t).buf_len { - return res; - }; - - let c = (*((*t).buf + cast(*i8, (*t).offset))); - - let offset = (*t).offset; - if c == '\\' { - let next_c = (*((*t).buf + cast(*i8, offset + 1))); - - let any = false; - if next_c == 'n' { - (*(res + cast(*i8, offset - start))) = '\n'; - any = true; - }; - if next_c == 't' { - (*(res + cast(*i8, offset - start))) = '\t'; - any = true; - }; - if next_c == 'r' { - (*(res + cast(*i8, offset - start))) = '\r'; - any = true; - }; - if next_c == '0' { - (*(res + cast(*i8, offset - start))) = '\0'; - any = true; - }; - if next_c == '\\' { - (*(res + cast(*i8, offset - start))) = '\\'; - any = true; - }; - if !any { - (*(res + cast(*i8, offset - start))) = next_c; - }; - - offset = offset + 1; - offset = offset + 1; - (*t).offset = offset; - - continue; - }; - - if condition(c) { - return res; - }; - - (*(res + cast(*i8, offset - start))) = c; - (*(res + cast(*i8, offset - start + 1))) = '\0'; - - offset = offset + 1; - (*t).offset = offset; - }; - - return cast(*i8, null); -}; - -let tokenizer_accept_int_type = (t: *tokenizer) => *i64 { - let string = tokenizer_consume_until_condition(t, (c: i8) => bool { - return !isdigit(c); - }); - if string == cast(*i8, null) { - return cast(*i64, null); - }; - if strlen(string) == 0 { - return cast(*i64, null); - }; - let x = cast(*i64, arena_alloc((*t).arena, sizeof(i64))); - *x = atoi(string); - return x; -}; - -let tokenizer_accept_char_type = (t: *tokenizer) => *i8 { - let prev_offset = (*t).offset; - if !tokenizer_accept_string(t, "'") { - (*t).offset = prev_offset; - return cast(*i8, null); - }; - - let string = tokenizer_consume_until_condition(t, (c: i8) => bool { - return c == '\''; - }); - - if !tokenizer_accept_string(t, "'") { - (*t).offset = prev_offset; - return cast(*i8, null); - }; - - return string; -}; - -let tokenizer_accept_string_type = (t: *tokenizer) => *i8 { - let prev_offset = (*t).offset; - if !tokenizer_accept_string(t, "\"") { - (*t).offset = prev_offset; - return cast(*i8, null); - }; - - let string = tokenizer_consume_until_condition(t, (c: i8) => bool { - return c == '"'; - }); - - if !tokenizer_accept_string(t, "\"") { - (*t).offset = prev_offset; - return cast(*i8, null); - }; - - return string; -}; - -let tokenizer_skip_comments = (t: *tokenizer) => void { - if !tokenizer_accept_string(t, "/*") { return; }; - - while !tokenizer_accept_string(t, "*/") { - (*t).offset = (*t).offset + 1; - }; - - return; -}; - -let tokenizer_next = (t: *tokenizer) => *token { - tokenizer_skip_whitespace(t); - tokenizer_skip_comments(t); - tokenizer_skip_whitespace(t); - - if (*t).offset >= (*t).buf_len { - return cast(*token, null); - }; - - let to = cast(*token, arena_alloc((*t).arena, sizeof(token))); - - if tokenizer_accept_string(t, "import") { - (*to).type = TOKEN_IMPORT; - return to; - }; - if tokenizer_accept_string(t, "let") { - (*to).type = TOKEN_LET; - return to; - }; - if tokenizer_accept_string(t, "extern") { - (*to).type = TOKEN_EXTERN; - return to; - }; - if tokenizer_accept_string(t, "if") { - (*to).type = TOKEN_IF; - return to; - }; - if tokenizer_accept_string(t, "while") { - (*to).type = TOKEN_WHILE; - return to; - }; - if tokenizer_accept_string(t, "return") { - (*to).type = TOKEN_RETURN; - return to; - }; - if tokenizer_accept_string(t, "break") { - (*to).type = TOKEN_BREAK; - return to; - }; - if tokenizer_accept_string(t, "continue") { - (*to).type = TOKEN_CONTINUE; - return to; - }; - if tokenizer_accept_string(t, "true") { - (*to).type = TOKEN_BOOLEAN; - let data = cast(*bool, arena_alloc((*t).arena, sizeof(bool))); - *data = true; - (*to).data = cast(*void, data); - return to; - }; - if tokenizer_accept_string(t, "false") { - (*to).type = TOKEN_BOOLEAN; - let data = cast(*bool, arena_alloc((*t).arena, sizeof(bool))); - *data = false; - (*to).data = cast(*void, data); - return to; - }; - if tokenizer_accept_string(t, "null") { - (*to).type = TOKEN_NULL; - return to; - }; - if tokenizer_accept_string(t, "struct") { - (*to).type = TOKEN_STRUCT; - return to; - }; - if tokenizer_accept_string(t, "newtype") { - (*to).type = TOKEN_TYPE; - return to; - }; - - if tokenizer_accept_string(t, "=>") { - (*to).type = TOKEN_ARROW; - return to; - }; - if tokenizer_accept_string(t, ";") { - (*to).type = TOKEN_SEMICOLON; - return to; - }; - if tokenizer_accept_string(t, ",") { - (*to).type = TOKEN_COMMA; - return to; - }; - if tokenizer_accept_string(t, ":") { - (*to).type = TOKEN_COLON; - return to; - }; - if tokenizer_accept_string(t, "(") { - (*to).type = TOKEN_LPAREN; - return to; - }; - if tokenizer_accept_string(t, ")") { - (*to).type = TOKEN_RPAREN; - return to; - }; - if tokenizer_accept_string(t, "{") { - (*to).type = TOKEN_LBRACE; - return to; - }; - if tokenizer_accept_string(t, "}") { - (*to).type = TOKEN_RBRACE; - return to; - }; - if tokenizer_accept_string(t, "=") { - (*to).type = TOKEN_EQUALS; - return to; - }; - if tokenizer_accept_string(t, "+") { - (*to).type = TOKEN_PLUS; - return to; - }; - if tokenizer_accept_string(t, "-") { - (*to).type = TOKEN_MINUS; - return to; - }; - if tokenizer_accept_string(t, "*") { - (*to).type = TOKEN_MUL; - return to; - }; - if tokenizer_accept_string(t, "/") { - (*to).type = TOKEN_DIV; - return to; - }; - if tokenizer_accept_string(t, "%") { - (*to).type = TOKEN_MOD; - return to; - }; - if tokenizer_accept_string(t, "!") { - (*to).type = TOKEN_BANG; - return to; - }; - if tokenizer_accept_string(t, "<") { - (*to).type = TOKEN_LESS; - return to; - }; - if tokenizer_accept_string(t, ">") { - (*to).type = TOKEN_GREATER; - return to; - }; - if tokenizer_accept_string(t, ".") { - (*to).type = TOKEN_DOT; - return to; - }; - - let maybe_int = tokenizer_accept_int_type(t); - if maybe_int != cast(*i64, null) { - (*to).type = TOKEN_NUMBER; - (*to).data = cast(*void, maybe_int); - return to; - }; - - let maybe_char = tokenizer_accept_char_type(t); - if maybe_char != cast(*i8, null) { - (*to).type = TOKEN_CHAR; - (*to).data = cast(*void, maybe_char); - return to; - }; - - let maybe_string = tokenizer_accept_string_type(t); - if maybe_string != cast(*i8, null) { - (*to).type = TOKEN_STRING; - (*to).data = cast(*void, maybe_string); - return to; - }; - - let string = tokenizer_consume_until_condition(t, (c: i8) => bool { - if isalphanum(c) { - return false; - }; - if c == '_' { - return false; - }; - return true; - }); - if strlen(string) == 0 { - printf("NO IDENT!\n"); - return cast(*token, null); - }; - - (*to).type = TOKEN_IDENTIFIER; - (*to).data = cast(*void, string); - - return to; -}; - -let tokenizer_init = (alloc: *arena, file: slice) => *tokenizer { - let t = cast(*tokenizer, arena_alloc(alloc, sizeof(tokenizer))); - (*t).arena = alloc; - (*t).offset = 0; - (*t).buf = cast(*i8, file.data); - (*t).buf_len = file.data_len; - - printf("File size: %d\n", (*t).buf_len); - - printf("%s\n", (*t).buf); - - return t; -}; - -let tokenizer_tokenize = (t: *tokenizer) => slice { - let tokens = cast(*token, arena_alloc((*t).arena, sizeof(token) * 1000)); /* why does it not care about type here */ - let tokens_len = 0; - - while true { - let tk = tokenizer_next(t); - if tk == cast(*token, null) { - break; - }; - printf("Add token: %d\n", (*tk).type); - - (*(tokens + cast(*token, tokens_len))) = *tk; - tokens_len = tokens_len + 1; - }; - - printf("PRINT TOKENS: %d\n", tokens_len); - - print_tokens(tokens, tokens_len); - - let res = slice{}; - res.data = cast(*void, tokens); - res.data_len = tokens_len; - return res; -}; -- cgit 1.4.1