extern strlen = (*i8) => i64; extern memcpy = (*void, *void, i64) => void; extern sprintf = (*i8, *i8, varargs) => void; extern atoi = (*i8) => i64; extern fopen = (*i8, *i8) => *i8; extern fgets = (*i8, i64, *i8) => void; extern feof = (*i8) => bool; extern fseek = (*i8, i64, i64) => i64; extern ftell = (*i8) => i64; extern fread = (*i8, i64, i64, *i8) => i64; extern fclose = (*i8) => *i8; extern malloc = (i64) => *void; extern free = (*void) => void; import "!stdlib.src"; let offset = 0; let buf = cast(*i8, null); let file_size = 0; let tokens = cast(*i8, null); let tokens_len = 0; let read_file = (filename: *i8) => *i8 { let file = fopen(filename, "r"); fseek(file, 0, 2); file_size = ftell(file); fseek(file, 0, 0); buf = cast(*i8, malloc(file_size + 1)); let bytes_read = fread(buf, 1, file_size, file); (*(buf + cast(*i8, bytes_read))) = '\0'; fclose(file); return buf; }; let add_token = (tokens: *i8, token: *i8) => i64 { println("Add token: %s", token); let i = 0; while true { let c = (*(token + cast(*i8, i))); (*(tokens + cast(*i8, tokens_len))) = c; tokens_len = tokens_len + 1; i = i + 1; if c == '\0' { return 0; }; }; return 0; }; let print_tokens = (tokens: *i8) => i64 { let i = 0; while i < tokens_len { let c = (*(tokens + cast(*i8, i))); if c == '\0' { c = '\n'; }; printf("%c", c); i = i + 1; }; return 0; }; let tokenizer_skip_whitespace = () => void { while true { if offset >= file_size { return; }; let c = (*(buf + cast(*i8, offset))); if !iswhitespace(c) { return; }; offset = offset + 1; }; return; }; let tokenizer_accept_string = (str: *i8) => bool { let str_len = strlen(str); if offset + str_len > file_size { return false; }; let s = cast(*i8, malloc(1000)); memcpy(cast(*void, s), cast(*void, buf + cast(*i8, offset)), str_len); if strcmp(s, str) { offset = offset + str_len; return true; }; return false; }; let tokenizer_consume_until_condition = (condition: (i8) => bool) => *i8 { let start = offset; let res = cast(*i8, malloc(1000)); while true { if offset >= file_size { return res; }; let c = (*(buf + cast(*i8, offset))); if c == '\\' { let next_c = (*(buf + cast(*i8, offset + 1))); let any = false; if next_c == 'n' { (*(res + cast(*i8, offset - start))) = '\n'; any = true; }; if next_c == 't' { (*(res + cast(*i8, offset - start))) = '\t'; any = true; }; if next_c == 'r' { (*(res + cast(*i8, offset - start))) = '\r'; any = true; }; if next_c == '0' { (*(res + cast(*i8, offset - start))) = '\0'; any = true; }; if next_c == '\\' { (*(res + cast(*i8, offset - start))) = '\\'; any = true; }; if !any { (*(res + cast(*i8, offset - start))) = next_c; }; offset = offset + 1; offset = offset + 1; continue; }; if condition(c) { return res; }; (*(res + cast(*i8, offset - start))) = c; (*(res + cast(*i8, offset - start + 1))) = '\0'; offset = offset + 1; }; return cast(*i8, null); }; let tokenizer_accept_int_type = () => *i64 { let string = tokenizer_consume_until_condition((c: i8) => bool { return !isdigit(c); }); if string == cast(*i8, null) { return cast(*i64, null); }; if strlen(string) == 0 { return cast(*i64, null); }; let x = cast(*i64, malloc(8)); *x = atoi(string); return x; }; let tokenizer_accept_char_type = () => *i8 { let prev_offset = offset; if !tokenizer_accept_string("'") { offset = prev_offset; return cast(*i8, null); }; let string = tokenizer_consume_until_condition((c: i8) => bool { return c == '\''; }); if !tokenizer_accept_string("'") { offset = prev_offset; return cast(*i8, null); }; return string; }; let tokenizer_accept_string_type = () => *i8 { let prev_offset = offset; if !tokenizer_accept_string("\"") { offset = prev_offset; return cast(*i8, null); }; let string = tokenizer_consume_until_condition((c: i8) => bool { return c == '"'; }); if !tokenizer_accept_string("\"") { offset = prev_offset; return cast(*i8, null); }; return string; }; let tokenizer_skip_comments = () => void { if !tokenizer_accept_string("/*") { return; }; while !tokenizer_accept_string("*/") { offset = offset + 1; }; return; }; let tokenizer_next = () => *i8 { tokenizer_skip_whitespace(); tokenizer_skip_comments(); tokenizer_skip_whitespace(); if offset >= file_size { return "EOF"; }; if tokenizer_accept_string("import") { return "import"; }; if tokenizer_accept_string("let") { return "let"; }; if tokenizer_accept_string("extern") { return "extern"; }; if tokenizer_accept_string("if") { return "if"; }; if tokenizer_accept_string("while") { return "while"; }; if tokenizer_accept_string("return") { return "return"; }; if tokenizer_accept_string("break") { return "break"; }; if tokenizer_accept_string("true") { return "bool:true"; }; if tokenizer_accept_string("false") { return "bool:false"; }; if tokenizer_accept_string("=>") { return "=>"; }; if tokenizer_accept_string(";") { return ";"; }; if tokenizer_accept_string(",") { return ","; }; if tokenizer_accept_string(":") { return ":"; }; if tokenizer_accept_string("(") { return "("; }; if tokenizer_accept_string(")") { return ")"; }; if tokenizer_accept_string("{") { return "{"; }; if tokenizer_accept_string("}") { return "}"; }; if tokenizer_accept_string("=") { return "="; }; if tokenizer_accept_string("+") { return "+"; }; if tokenizer_accept_string("-") { return "-"; }; if tokenizer_accept_string("*") { return "*"; }; if tokenizer_accept_string("/") { return "/"; }; if tokenizer_accept_string("%") { return "%"; }; if tokenizer_accept_string("!") { return "!"; }; if tokenizer_accept_string("<") { return "<"; }; if tokenizer_accept_string(">") { return ">"; }; let maybe_int = tokenizer_accept_int_type(); if maybe_int != cast(*i64, null) { let t = cast(*i8, malloc(1000)); sprintf(t, "int:%d", *maybe_int); return t; }; let maybe_char = tokenizer_accept_char_type(); if maybe_char != cast(*i8, null) { let t = cast(*i8, malloc(1000)); sprintf(t, "char:%d", *maybe_char); return t; }; let maybe_string = tokenizer_accept_string_type(); if maybe_string != cast(*i8, null) { let t = cast(*i8, malloc(1000)); sprintf(t, "string:%s", maybe_string); return t; }; let string = tokenizer_consume_until_condition((c: i8) => bool { if isalphanum(c) { return false; }; if c == '_' { return false; }; return true; }); if strlen(string) == 0 { return cast(*i8, null); }; let t = cast(*i8, malloc(100)); sprintf(t, "identifier:%s", string); return t; }; let tokenizer_init = (filename: *i8) => i64 { let buf = read_file(filename); println("File size: %d", file_size); println("%s", buf); tokens = cast(*i8, malloc(100000)); while true { let t = tokenizer_next(); if t == cast(*i8, null) { println("NULL TOKEN!"); return 1; }; if strcmp(t, "EOF") { break; }; add_token(tokens, t); }; println("PRINT TOKENS"); print_tokens(tokens); return 0; }; let tokenizer_deinit = () => i64 { free(cast(*void, tokens)); free(cast(*void, buf)); return 0; };