From cc56ed42486c2636af50bae451825ad90cfd4b6c Mon Sep 17 00:00:00 2001 From: Baitinq Date: Tue, 15 Jul 2025 17:34:39 +0200 Subject: Finish bootstrapping :^) --- src/bootstrap/codegen.pry | 1450 ------------------------------------------ src/bootstrap/llvm.pry | 353 ----------- src/bootstrap/main.pry | 80 --- src/bootstrap/parser.pry | 1456 ------------------------------------------- src/bootstrap/tokenizer.pry | 553 ---------------- src/codegen.pry | 1450 ++++++++++++++++++++++++++++++++++++++++++ src/codegen.zig | 1101 -------------------------------- src/llvm.pry | 353 +++++++++++ src/main.pry | 80 +++ src/main.zig | 49 -- src/parser.pry | 1456 +++++++++++++++++++++++++++++++++++++++++++ src/parser.zig | 1055 ------------------------------- src/tokenizer.pry | 553 ++++++++++++++++ src/tokenizer.zig | 327 ---------- 14 files changed, 3892 insertions(+), 6424 deletions(-) delete mode 100644 src/bootstrap/codegen.pry delete mode 100644 src/bootstrap/llvm.pry delete mode 100644 src/bootstrap/main.pry delete mode 100644 src/bootstrap/parser.pry delete mode 100644 src/bootstrap/tokenizer.pry create mode 100644 src/codegen.pry delete mode 100644 src/codegen.zig create mode 100644 src/llvm.pry create mode 100644 src/main.pry delete mode 100644 src/main.zig create mode 100644 src/parser.pry delete mode 100644 src/parser.zig create mode 100644 src/tokenizer.pry delete mode 100644 src/tokenizer.zig (limited to 'src') diff --git a/src/bootstrap/codegen.pry b/src/bootstrap/codegen.pry deleted file mode 100644 index cb054ec..0000000 --- a/src/bootstrap/codegen.pry +++ /dev/null @@ -1,1450 +0,0 @@ -import "!hashmap.pry"; - -import "llvm.pry"; - -let Variable = struct { - value: LLVMValueRef, - type: LLVMTypeRef, - node: *Node, - node_type: *Node, - stack_level: *i64, -}; - -let Scope = struct { - variables: *HashMap, -}; - -let Environment = struct { - scope_stack: **Scope, - scope_stack_len: i64, - arena: *arena, -}; - -let environment_create_scope = (e: *Environment) => void { - let scope = cast(*Scope, arena_alloc((*e).arena, sizeof(Scope))); - (*scope).variables = hashmap_init(16, (*e).arena); - (*((*e).scope_stack + cast(**Scope, (*e).scope_stack_len))) = scope; - (*e).scope_stack_len = (*e).scope_stack_len + 1; - - return; -}; - -let environment_drop_scope = (e: *Environment) => void { - (*e).scope_stack_len = (*e).scope_stack_len - 1; - - return; -}; - -let environment_get_variable = (e: *Environment, name: *i8) => *Variable { - let i = (*e).scope_stack_len; - let variable = cast(*Variable, null); - - while i > 0 { - i = i - 1; - let scope = *(((*e).scope_stack + cast(**Scope, i))); - assert(scope != cast(*Scope, null)); - let v = cast(*Variable, hashmap_get((*scope).variables, name)); - if v != cast(*Variable, null) { - if variable == cast(*Variable, null) { - variable = v; - }; - let stack_level = cast(*i64, arena_alloc((*e).arena, sizeof(i64))); - (*stack_level) = i; - (*variable).stack_level = stack_level; - }; - }; - - return variable; -}; - -let environment_add_variable = (e: *Environment, name: *i8, variable: *Variable) => void { - /* TODO: Dont allow shadowing if value != value or type != type (across things) */ - let top_scope = *(((*e).scope_stack + cast(**Scope, (*e).scope_stack_len - 1))); - hashmap_put((*top_scope).variables, name, cast(*void, variable)); - - return; -}; - -let environment_set_variable = (e: *Environment, name: *i8, variable: *Variable) => void { - let existing = environment_get_variable(e, name); - (*existing) = (*variable); - - return; -}; - -let environment_init = (alloc: *arena) => *Environment { - let e = cast(*Environment, arena_alloc(alloc, sizeof(Environment))); - (*e).scope_stack = cast(**Scope, arena_alloc(alloc, sizeof(*Scope) * 40)); - (*e).scope_stack_len = 0; - (*e).arena = alloc; - - environment_create_scope(e); - - return e; -}; - -let codegen = struct { - llvm_module: LLVMModuleRef, - llvm_context: LLVMContextRef, - builder: LLVMBuilderRef, - arena: *arena, - environment: *Environment, - - whil_loop_exit: LLVMBasicBlockRef, - whil_block: LLVMBasicBlockRef, - current_function: LLVMValueRef, - current_function_retur_type: *Node, - llvm_target_data: LLVMTargetDataRef, -}; - -let codegen_init = (alloc: *arena) => *codegen { - LLVMInitializeAllTargetInfos(); - LLVMInitializeAllTargetMCs(); - LLVMInitializeAllTargets(); - LLVMInitializeAllAsmPrinters(); - LLVMInitializeAllAsmParsers(); - - let module = LLVMModuleCreateWithName("module"); - let context = LLVMGetGlobalContext(); - let builder = LLVMCreateBuilder(); - - let c = cast(*codegen, arena_alloc(alloc, sizeof(codegen))); - - (*c).llvm_module = module; - (*c).llvm_target_data = LLVMGetModuleDataLayout(module); - (*c).llvm_context = context; - (*c).builder = builder; - (*c).arena = alloc; - (*c).environment = environment_init(alloc); - - return c; -}; - -let create_node = (c: *codegen, n: Node) => *Node { - let res = cast(*Node, arena_alloc((*c).arena, sizeof(Node))); - *res = n; - return res; -}; - -let codegen_create_variable = (c: *codegen, variable: Variable) => *Variable { - let v = cast(*Variable, arena_alloc((*c).arena, sizeof(Variable))); - *v = variable; - return v; -}; - -let compare_types = (c: *codegen, a: *Node, b: *Node, is_dereference: bool) => bool { - assert((*a).type >= NODE_TYPE_SIMPLE_TYPE); - assert((*a).type <= NODE_TYPE_STRUCT_TYPE); - assert((*b).type >= NODE_TYPE_SIMPLE_TYPE); - assert((*b).type <= NODE_TYPE_STRUCT_TYPE); - - if (*a).type == NODE_TYPE_SIMPLE_TYPE { - let simple_type_a = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*a).data); - if strcmp(simple_type_a.name, "varargs") { - return true; - }; - }; - - if is_dereference { - assert((*a).type == NODE_TYPE_POINTER_TYPE); - let pointer_type_a = *cast(*NODE_TYPE_POINTER_TYPE_DATA, (*a).data); - a = pointer_type_a.type; - }; - - if (*a).type != (*b).type { - printf("Types do not match: %d != ", (*a).type); - printf("%d\n", (*b).type); - return false; - }; - - if (*a).type == NODE_TYPE_SIMPLE_TYPE { - assert((*b).type == NODE_TYPE_SIMPLE_TYPE); - let simple_type_a = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*a).data); - let simple_type_b = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*b).data); - let eql = strcmp(simple_type_a.name, simple_type_b.name); - if !eql { - printf("Simple types do not match: %s != ", simple_type_a.name); - printf("%s\n", simple_type_b.name); - }; - return eql; - }; - - if (*a).type == NODE_TYPE_FUNCTION_TYPE { - assert((*b).type == NODE_TYPE_FUNCTION_TYPE); - let function_type_a = *cast(*NODE_TYPE_FUNCTION_TYPE_DATA, (*a).data); - let function_type_b = *cast(*NODE_TYPE_FUNCTION_TYPE_DATA, (*b).data); - - - if !compare_types(c, function_type_a.retur_type, function_type_b.retur_type, false) { - printf("Function return types do not match\n"); - return false; - }; - - if function_type_a.parameters_len != function_type_b.parameters_len { - printf("Function parameter lengths do not match\n"); - return false; - }; - - let i = 0; - while i < function_type_a.parameters_len { - let param_a = *(function_type_a.parameters + cast(**Node, i)); - let param_b = *(function_type_b.parameters + cast(**Node, i)); - if !compare_types(c, param_a, param_b, false) { - printf("Function parameter types do not match\n"); - return false; - }; - i = i + 1; - }; - - return true; - }; - - if (*a).type == NODE_TYPE_POINTER_TYPE { - assert((*b).type == NODE_TYPE_POINTER_TYPE); - let pointer_type_a = *cast(*NODE_TYPE_POINTER_TYPE_DATA, (*a).data); - let pointer_type_b = *cast(*NODE_TYPE_POINTER_TYPE_DATA, (*b).data); - if !compare_types(c, pointer_type_a.type, pointer_type_b.type, false) { - printf("Pointer types do not match\n"); - return false; - }; - return true; - }; - - if (*a).type == NODE_TYPE_STRUCT_TYPE { - assert((*b).type == NODE_TYPE_STRUCT_TYPE); - let struc_type_a = *cast(*NODE_TYPE_STRUCT_TYPE_DATA, (*a).data); - let struc_type_b = *cast(*NODE_TYPE_STRUCT_TYPE_DATA, (*b).data); - - if struc_type_a.fields_len != struc_type_b.fields_len { - printf("Struct field lengths do not match\n"); - return false; - }; - - let i = 0; - while i < struc_type_a.fields_len { - let field_a = *(struc_type_a.fields + cast(**Node, i)); - let field_b = *(struc_type_b.fields + cast(**Node, i)); - if !compare_types(c, field_a, field_b, false) { - printf("Struct field types do not match\n"); - return false; - }; - i = i + 1; - }; - - return true; - }; - - return false; -}; - -let codegen_get_llvm_type = (c: *codegen, node: *Node) => *LLVMTypeRef { - assert((*node).type >= NODE_TYPE_SIMPLE_TYPE); - assert((*node).type <= NODE_TYPE_STRUCT_TYPE); - - if (*node).type == NODE_TYPE_SIMPLE_TYPE { - let simple_type = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*node).data); - - if strcmp(simple_type.name, "i8") { - let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); - *r = LLVMInt8Type(); - return r; - }; - - if strcmp(simple_type.name, "i64") { - let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); - *r = LLVMInt64Type(); - return r; - }; - - if strcmp(simple_type.name, "bool") { - let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); - *r = LLVMInt1Type(); - return r; - }; - - if strcmp(simple_type.name, "void") { - let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); - *r = LLVMVoidType(); - return r; - }; - - if strcmp(simple_type.name, "varargs") { /* Hack for varargs (only used for printf) */ - let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); - *r = LLVMPointerType(LLVMInt64Type(), 0); - return r; - }; - - let v = environment_get_variable((*c).environment, simple_type.name); - if (v != cast(*Variable, null)) { - assert((*v).type != cast(LLVMTypeRef, null)); - let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); - *r = (*v).type; - return r; - }; - - printf("NO SIMPLE TYPE %s!\n", simple_type.name); - assert(false); - }; - - if (*node).type == NODE_TYPE_FUNCTION_TYPE { - let function_type = *cast(*NODE_TYPE_FUNCTION_TYPE_DATA, (*node).data); - let f_retur = function_type.retur_type; - let retur_type = codegen_get_llvm_type(c, f_retur); - assert(retur_type != cast(*LLVMTypeRef, null)); - if (*f_retur).type == NODE_TYPE_FUNCTION_TYPE { - (*retur_type) = LLVMPointerType(*retur_type, 0); - }; - - let paramtypes = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef) * 20)); - let paramtypes_len = 0; - let is_varargs = 0; - - let i = 0; - while i < function_type.parameters_len { - let param = *(function_type.parameters + cast(**Node, i)); - if (*param).type == NODE_TYPE_SIMPLE_TYPE { - let simple_type = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*param).data); - if strcmp(simple_type.name, "varargs") { - is_varargs = 1; - i = i + 1; - continue; - }; - }; - let typ = codegen_get_llvm_type(c, param); - assert(typ != cast(*LLVMTypeRef, null)); - if (*param).type == NODE_TYPE_FUNCTION_TYPE { - *typ = LLVMPointerType(*typ, 0); - }; - - (*(paramtypes + cast(*LLVMTypeRef, paramtypes_len))) = *typ; - paramtypes_len = paramtypes_len + 1; - - i = i + 1; - }; - let function_type = LLVMFunctionType(*retur_type, paramtypes, paramtypes_len, is_varargs); - let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); - *r = function_type; - return r; - }; - - if (*node).type == NODE_TYPE_POINTER_TYPE { - let pointer_type = *cast(*NODE_TYPE_POINTER_TYPE_DATA, (*node).data); - let inner_type = codegen_get_llvm_type(c, pointer_type.type); - assert(inner_type != cast(*LLVMTypeRef, null)); - let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); - *r = LLVMPointerType(*inner_type, 0); - return r; - }; - - printf("NO TYPEEE BOI %d\n", (*node).type); - assert(false); - - return cast(*LLVMTypeRef, null); -}; - -let codegen_generate_literal = (c: *codegen, literal_val: LLVMValueRef, name: *i8, node: *Node, node_type: *Node) => *Variable { - if name != cast(*i8, null) { - let e = (*c).environment; - if (*e).scope_stack_len == 1 { - let lt = codegen_get_llvm_type(c, node_type); - assert(lt != cast(*LLVMTypeRef, null)); - let v = Variable{}; - v.value = LLVMAddGlobal((*c).llvm_module, *lt, name); - v.type = cast(LLVMTypeRef, null); - v.stack_level = cast(*i64, null); - v.node = node; - v.node_type = node_type; - LLVMSetInitializer(v.value, literal_val); - return codegen_create_variable(c, v); - }; - }; - - - let v = Variable{}; - v.value = literal_val; - v.type = cast(LLVMTypeRef, null); - v.stack_level = cast(*i64, null); - v.node = node; - v.node_type = node_type; - return codegen_create_variable(c, v); -}; - -extern codegen_generate_statement = (*codegen, *Node) => i64; -extern codegen_generate_function_call_statement = (*codegen, *Node) => *Variable; -extern codegen_generate_expression_value = (*codegen, *Node, *i8) => *Variable; - -let StructField = struct { - value: LLVMValueRef, - type: *Node, -}; - -let codegen_get_struct_field = (c: *codegen, node: *Node, name: *i8) => *StructField { - let ptr = cast(*Variable, null); - if (*node).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER { - let identifier = *cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*node).data); - ptr = environment_get_variable((*c).environment, identifier.name); - }; - if (*node).type == NODE_UNARY_EXPRESSION { - let xd = (*cast(*NODE_UNARY_EXPRESSION_DATA, (*node).data)).expression; - ptr = codegen_generate_expression_value(c, xd, ""); - }; - - assert(ptr != cast(*Variable, null)); - - let typ = cast(*Node, null); - let ptr_typ = (*ptr).node_type; - if (*ptr_typ).type == NODE_TYPE_STRUCT_TYPE { - typ = ptr_typ; - }; - if (*ptr_typ).type == NODE_TYPE_POINTER_TYPE { - let pt = *cast(*NODE_TYPE_POINTER_TYPE_DATA, (*ptr_typ).data); - let pt_type = pt.type; - assert((*pt_type).type == NODE_TYPE_SIMPLE_TYPE); - let simple_type = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*pt_type).data); - let v = environment_get_variable((*c).environment, simple_type.name); - assert(v != cast(*Variable, null)); - typ = (*v).node_type; /* TODO: we shouldnt be able to get fields of pointers, we have to dref first */ - }; - if (*ptr_typ).type == NODE_TYPE_SIMPLE_TYPE { - let simple_type = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*ptr_typ).data); - let v = environment_get_variable((*c).environment, simple_type.name); - assert(v != cast(*Variable, null)); - typ = (*v).node_type; - }; - - assert(typ != cast(*Node, null)); - assert((*typ).type == NODE_TYPE_SIMPLE_TYPE); - let simple_type = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*typ).data); - assert((*simple_type.underlying_type).type == NODE_TYPE_STRUCT_TYPE); - let struc_type = *cast(*NODE_TYPE_STRUCT_TYPE_DATA, (*simple_type.underlying_type).data); - - let fieldIndex = cast(*i64, null); - - let i = 0; - while i < struc_type.fields_len { - let field = *(struc_type.fields + cast(**Node, i)); - assert((*field).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER); - let field_data = *cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*field).data); - if strcmp(name, field_data.name) { - let ii = cast(*i64, arena_alloc((*c).arena, sizeof(i64))); - *ii = i; - fieldIndex = ii; - break; - }; - i = i + 1; - }; - - assert(fieldIndex != cast(*i64, null)); - - let zero = LLVMConstInt(LLVMInt32Type(), 0, 0); - let llvmFieldIndex = LLVMConstInt(LLVMInt32Type(), *fieldIndex, 0); - let indices = cast(*LLVMValueRef, arena_alloc((*c).arena, sizeof(LLVMValueRef) * 2)); - (*(indices + cast(*LLVMValueRef, 0))) = zero; - (*(indices + cast(*LLVMValueRef, 1))) = llvmFieldIndex; - - let res = cast(*StructField, arena_alloc((*c).arena, sizeof(StructField))); - - let x = codegen_get_llvm_type(c, typ); - assert(x != cast(*LLVMTypeRef, null)); - (*res).value = LLVMBuildGEP2((*c).builder, *x, (*ptr).value, indices, 2, name); - let no = *(struc_type.fields + cast(**Node, *fieldIndex)); - assert((*no).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER); - let no_d = *cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*no).data); - (*res).type = no_d.type; - - return res; -}; - -let codegen_generate_expression_value = (c: *codegen, expression: *Node, name: *i8) => *Variable { - if ((*expression).type == NODE_PRIMARY_EXPRESSION_NULL) { - let inner_type_data = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); - (*inner_type_data).name = "void"; - let inner_type = Node{}; - inner_type.type = NODE_TYPE_SIMPLE_TYPE; - inner_type.data = cast(*void, inner_type_data); - - let node_type_data = cast(*NODE_TYPE_POINTER_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_POINTER_TYPE_DATA))); - (*node_type_data).type = create_node(c, inner_type); - let node_type = Node{}; - node_type.type = NODE_TYPE_POINTER_TYPE; - node_type.data = cast(*void, node_type_data); - - return codegen_generate_literal(c, LLVMConstNull(LLVMPointerType(LLVMInt8Type(), 0)), name, expression, create_node(c, node_type)); - }; - - if ((*expression).type == NODE_PRIMARY_EXPRESSION_NUMBER) { - let n = (*cast(*NODE_PRIMARY_EXPRESSION_NUMBER_DATA, (*expression).data)).value; - - let node_type = Node{}; - node_type.type = NODE_TYPE_SIMPLE_TYPE; - - let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); - (*d).name = "i64"; - (*d).underlying_type = cast(*Node, null); - node_type.data = cast(*void, d); - - return codegen_generate_literal(c, LLVMConstInt(LLVMInt64Type(), n, 0), name, expression, create_node(c, node_type)); - }; - - if ((*expression).type == NODE_PRIMARY_EXPRESSION_BOOLEAN) { - let b = (*cast(*NODE_PRIMARY_EXPRESSION_BOOLEAN_DATA, (*expression).data)).value; - - let node_type = Node{}; - node_type.type = NODE_TYPE_SIMPLE_TYPE; - - let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); - (*d).name = "bool"; - (*d).underlying_type = cast(*Node, null); - node_type.data = cast(*void, d); - - let int_value = 0; - if b == true { - int_value = 1; - }; - - return codegen_generate_literal(c, LLVMConstInt(LLVMInt1Type(), int_value, 0), name, expression, create_node(c, node_type)); - }; - - if ((*expression).type == NODE_PRIMARY_EXPRESSION_CHAR) { - let ch = cast(i64, (*cast(*NODE_PRIMARY_EXPRESSION_CHAR_DATA, (*expression).data)).value); - - let node_type = Node{}; - node_type.type = NODE_TYPE_SIMPLE_TYPE; - - let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); - (*d).name = "i8"; - (*d).underlying_type = cast(*Node, null); - node_type.data = cast(*void, d); - - return codegen_generate_literal(c, LLVMConstInt(LLVMInt8Type(), cast(i64, ch), 0), name, expression, create_node(c, node_type)); - }; - - if ((*expression).type == NODE_PRIMARY_EXPRESSION_STRING) { - let str = (*cast(*NODE_PRIMARY_EXPRESSION_STRING_DATA, (*expression).data)).value; - - let x = LLVMBuildGlobalStringPtr((*c).builder, str, ""); - - let inner_type_data = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); - (*inner_type_data).name = "i8"; - let inner_type = Node{}; - inner_type.type = NODE_TYPE_SIMPLE_TYPE; - inner_type.data = cast(*void, inner_type_data); - - let node_type_data = cast(*NODE_TYPE_POINTER_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_POINTER_TYPE_DATA))); - (*node_type_data).type = create_node(c, inner_type); - let node_type = Node{}; - node_type.type = NODE_TYPE_POINTER_TYPE; - node_type.data = cast(*void, node_type_data); - - let v = Variable{}; - v.value = x; - v.type = cast(LLVMTypeRef, null); - v.stack_level = cast(*i64, null); - v.node = expression; - v.node_type = create_node(c, node_type); - - return codegen_create_variable(c, v); - }; - - if ((*expression).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER) { - let identifier = *cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*expression).data); - let variable = environment_get_variable((*c).environment, identifier.name); - assert(variable != cast(*Variable, null)); - let param_value = (*variable).value; - let v_type = (*variable).node_type; - - let done = false; - if (*v_type).type != NODE_TYPE_FUNCTION_TYPE { - let param_type = codegen_get_llvm_type(c, v_type); - assert(param_type != cast(*LLVMTypeRef, null)); - if (*v_type).type == NODE_TYPE_FUNCTION_TYPE { - (*param_type) = LLVMPointerType(*param_type, 0); - }; - param_value = LLVMBuildLoad2((*c).builder, *param_type, (*variable).value, ""); - done = true; - }; - - if !done { - if (*(*variable).stack_level) != 0 { - let param_type = codegen_get_llvm_type(c, v_type); - assert(param_type != cast(*LLVMTypeRef, null)); - if (*v_type).type == NODE_TYPE_FUNCTION_TYPE { - (*param_type) = LLVMPointerType(*param_type, 0); - }; - param_value = LLVMBuildLoad2((*c).builder, *param_type, (*variable).value, ""); - done = true; - }; - - }; - - return codegen_generate_literal(c, param_value, name, expression, (*variable).node_type); - }; - - if ((*expression).type == NODE_FUNCTION_DEFINITION) { - /* Functions should be declared "globally" */ - let builder_pos = LLVMGetInsertBlock((*c).builder); - - let llvm_param_types = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef) * 20)); - let param_types = cast(**Node, arena_alloc((*c).arena, sizeof(*Node) * 20)); - - let function_definition = *cast(*NODE_FUNCTION_DEFINITION_DATA, (*expression).data); - - let i = 0; - let is_varargs = 0; - while i < function_definition.parameters_len { - let node = *(function_definition.parameters + cast(**Node, i)); - assert((*node).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER); - let param = *cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*node).data); - let param_type = param.type; - if (*param_type).type == NODE_TYPE_SIMPLE_TYPE { - let simple_type = *(cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*param_type).data)); - if strcmp(simple_type.name, "varargs") { - is_varargs = 1; - }; - }; - let llvm_param_type = codegen_get_llvm_type(c, param_type); - assert(llvm_param_type != cast(*LLVMTypeRef, null)); - if (*param_type).type == NODE_TYPE_FUNCTION_TYPE { - (*llvm_param_type) = LLVMPointerType(*llvm_param_type, 0); - }; - - (*(llvm_param_types + cast(*LLVMTypeRef, i))) = *llvm_param_type; - (*(param_types + cast(**Node, i))) = param_type; - i = i + 1; - }; - - let f_ret = function_definition.retur_type; - let retur_type = codegen_get_llvm_type(c, f_ret); - assert(retur_type != cast(*LLVMTypeRef, null)); - if (*f_ret).type == NODE_TYPE_FUNCTION_TYPE { - (*retur_type) = LLVMPointerType(*retur_type, 0); - }; - - let function = cast(LLVMValueRef, null); - if name != cast(*i8, null) { - let v = environment_get_variable((*c).environment, name); - if (v != cast(*Variable, null)) { - function = (*v).value; - }; - }; - if function == cast(LLVMValueRef, null) { - let function_type = LLVMFunctionType(*retur_type, llvm_param_types, i, is_varargs); - let n_name = name; - if name == cast(*i8, null) { - n_name = "unnamed_func"; - }; - function = LLVMAddFunction((*c).llvm_module, n_name, function_type); - }; - - let function_entry = LLVMAppendBasicBlock(function, "entrypoint"); - LLVMPositionBuilderAtEnd((*c).builder, function_entry); - - environment_create_scope((*c).environment); - let last_function = (*c).current_function; - (*c).current_function = function; - let last_function_retur_type = (*c).current_function_retur_type; - (*c).current_function_retur_type = function_definition.retur_type; - - /* TODO: Defer. For now we do at the end */ - - let d = cast(*NODE_TYPE_FUNCTION_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_FUNCTION_TYPE_DATA))); - (*d).parameters = param_types; - (*d).parameters_len = i; - (*d).retur_type = function_definition.retur_type; - let n = Node{}; - let node_type = create_node(c, n); - (*node_type).type = NODE_TYPE_FUNCTION_TYPE; - (*node_type).data = cast(*void, d); - - /* Needed for recursive functions */ - if name != cast(*i8, null) { - let v = Variable{}; - v.value = function; - v.type = cast(LLVMTypeRef, null); - v.stack_level = cast(*i64, null); - v.node = expression; - v.node_type = node_type; - environment_add_variable((*c).environment, name, codegen_create_variable(c, v)); - }; - - let params = cast(*LLVMValueRef, arena_alloc((*c).arena, sizeof(LLVMValueRef) * function_definition.parameters_len)); - LLVMGetParams(function, params); - - let parameters_index = 0; - while parameters_index < function_definition.parameters_len { - let p = (*(params + cast(*LLVMValueRef, parameters_index))); - let param_node = *(function_definition.parameters + cast(**Node, parameters_index)); - assert((*param_node).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER); - let param = *cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*param_node).data); - let param_type = param.type; - let llvm_param_type = codegen_get_llvm_type(c, param_type); - assert(llvm_param_type != cast(*LLVMTypeRef, null)); - if (*param_type).type == NODE_TYPE_FUNCTION_TYPE { - (*llvm_param_type) = LLVMPointerType(*llvm_param_type, 0); - }; - let alloca = LLVMBuildAlloca((*c).builder, *llvm_param_type, param.name); - LLVMBuildStore((*c).builder, p, alloca); - - let v = Variable{}; - v.value = alloca; - v.type = cast(LLVMTypeRef, null); - v.stack_level = cast(*i64, null); - v.node = param_node; - v.node_type = param_type; - environment_add_variable((*c).environment, param.name, codegen_create_variable(c, v)); - parameters_index = parameters_index + 1; - }; - - i = 0; - while i < function_definition.statements_len { - let stmt = *(function_definition.statements + cast(**Node, i)); - - let res = codegen_generate_statement(c, stmt); - assert(res == 0); - - i = i + 1; - }; - - LLVMPositionBuilderAtEnd((*c).builder, builder_pos); - - let v = Variable{}; - v.value = function; - v.type = cast(LLVMTypeRef, null); - v.stack_level = cast(*i64, null); - v.node = expression; - v.node_type = node_type; - - /* TODO: Move to defer */ - (*c).current_function = last_function; - (*c).current_function_retur_type = last_function_retur_type; - environment_drop_scope((*c).environment); - - return codegen_create_variable(c, v); - }; - - if ((*expression).type == NODE_EQUALITY_EXPRESSION) { - let exp = (*(cast(*NODE_EQUALITY_EXPRESSION_DATA, (*expression).data))); - let lhs_value = codegen_generate_expression_value(c, exp.lhs, cast(*i8, null)); - assert(lhs_value != cast(*Variable, null)); - let rhs_value = codegen_generate_expression_value(c, exp.rhs, cast(*i8, null)); - assert(rhs_value != cast(*Variable, null)); - - assert(compare_types(c, (*lhs_value).node_type, (*rhs_value).node_type, false)); - - let op = -1; - - if exp.typ == EQUALITY_EXPRESSION_TYPE_EQ { - op = LLVMIntEQ; - }; - if exp.typ == EQUALITY_EXPRESSION_TYPE_NE { - op = LLVMIntNE; - }; - if exp.typ == EQUALITY_EXPRESSION_TYPE_GE { - op = LLVMIntSGE; - }; - if exp.typ == EQUALITY_EXPRESSION_TYPE_LE { - op = LLVMIntSLE; - }; - if exp.typ == EQUALITY_EXPRESSION_TYPE_LT { - op = LLVMIntSLT; - }; - if exp.typ == EQUALITY_EXPRESSION_TYPE_GT { - op = LLVMIntSGT; - }; - - assert(op != -1); - - let cmp = LLVMBuildICmp((*c).builder, cast(LLVMIntPredicate, op), (*lhs_value).value, (*rhs_value).value, ""); - - - let node_type = Node{}; - node_type.type = NODE_TYPE_SIMPLE_TYPE; - - let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); - (*d).name = "bool"; - (*d).underlying_type = cast(*Node, null); - node_type.data = cast(*void, d); - - return codegen_generate_literal(c, cmp, name, expression, create_node(c, node_type)); - }; - - if ((*expression).type == NODE_ADDITIVE_EXPRESSION) { - let exp = (*(cast(*NODE_ADDITIVE_EXPRESSION_DATA, (*expression).data))); - let lhs_value = codegen_generate_expression_value(c, exp.lhs, cast(*i8, null)); - assert(lhs_value != cast(*Variable, null)); - let rhs_value = codegen_generate_expression_value(c, exp.rhs, cast(*i8, null)); - assert(rhs_value != cast(*Variable, null)); - - assert(compare_types(c, (*lhs_value).node_type, (*rhs_value).node_type, false)); - - let result = cast(LLVMValueRef, null); - let node_type = Node{}; - node_type.type = NODE_TYPE_SIMPLE_TYPE; - - let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); - (*d).name = "i64"; - (*d).underlying_type = cast(*Node, null); - node_type.data = cast(*void, d); - - let pnode_type = create_node(c, node_type); - - if exp.addition { - let nt = (*lhs_value).node_type; - if (*nt).type == NODE_TYPE_POINTER_TYPE { - let ipt = cast(*NODE_TYPE_POINTER_TYPE_DATA, (*nt).data); - let llvmipt = codegen_get_llvm_type(c, (*ipt).type); - assert(llvmipt != cast(*LLVMTypeRef, null)); - let arr = cast(*LLVMValueRef, arena_alloc((*c).arena, sizeof(LLVMValueRef) * 1)); - (*(arr + cast(*LLVMValueRef, 0))) = (*rhs_value).value; - result = LLVMBuildGEP2((*c).builder, *llvmipt, (*lhs_value).value, arr, 1, ""); - pnode_type = (*lhs_value).node_type; - }; - if (*nt).type != NODE_TYPE_POINTER_TYPE { - result = LLVMBuildAdd((*c).builder, (*lhs_value).value, (*rhs_value).value, ""); - }; - - }; - if !exp.addition { - result = LLVMBuildSub((*c).builder, (*lhs_value).value, (*rhs_value).value, ""); - }; - - return codegen_generate_literal(c, result, name, expression, pnode_type); - }; - - if ((*expression).type == NODE_MULTIPLICATIVE_EXPRESSION) { - let exp = (*(cast(*NODE_MULTIPLICATIVE_EXPRESSION_DATA, (*expression).data))); - let lhs_value = codegen_generate_expression_value(c, exp.lhs, cast(*i8, null)); - assert(lhs_value != cast(*Variable, null)); - let rhs_value = codegen_generate_expression_value(c, exp.rhs, cast(*i8, null)); - assert(rhs_value != cast(*Variable, null)); - - assert(compare_types(c, (*lhs_value).node_type, (*rhs_value).node_type, false)); - - let result = cast(LLVMValueRef, null); - - if exp.typ == MULTIPLICATIVE_EXPRESSION_TYPE_MUL { - result = LLVMBuildMul((*c).builder, (*lhs_value).value, (*rhs_value).value, ""); - }; - if exp.typ == MULTIPLICATIVE_EXPRESSION_TYPE_DIV { - result = LLVMBuildSDiv((*c).builder, (*lhs_value).value, (*rhs_value).value, ""); - }; - if exp.typ == MULTIPLICATIVE_EXPRESSION_TYPE_MOD { - result = LLVMBuildSRem((*c).builder, (*lhs_value).value, (*rhs_value).value, ""); - }; - assert(result != cast(LLVMValueRef, null)); - - return codegen_generate_literal(c, result, name, expression, (*lhs_value).node_type); - }; - - if ((*expression).type == NODE_UNARY_EXPRESSION) { - let exp = (*(cast(*NODE_UNARY_EXPRESSION_DATA, (*expression).data))); - let k = codegen_generate_expression_value(c, exp.expression, cast(*i8, null)); - assert(k != cast(*Variable, null)); - - let r = cast(LLVMValueRef, null); - let typ = (*k).node_type; - - if exp.typ == UNARY_EXPRESSION_TYPE_NOT { - assert((*typ).type == NODE_TYPE_SIMPLE_TYPE); - let simple_type = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*typ).data); - assert(strcmp(simple_type.name, "bool")); - r = LLVMBuildICmp((*c).builder, cast(LLVMIntPredicate, LLVMIntEQ), (*k).value, LLVMConstInt(LLVMInt1Type(), 0, 0), ""); - let node_type = Node{}; - node_type.type = NODE_TYPE_SIMPLE_TYPE; - - let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); - (*d).name = "bool"; - (*d).underlying_type = cast(*Node, null); - node_type.data = cast(*void, d); - typ = create_node(c, node_type); - }; - - if exp.typ == UNARY_EXPRESSION_TYPE_MINUS { - r = LLVMBuildNeg((*c).builder, (*k).value, ""); - let node_type = Node{}; - node_type.type = NODE_TYPE_SIMPLE_TYPE; - - let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); - (*d).name = "i64"; - (*d).underlying_type = cast(*Node, null); - node_type.data = cast(*void, d); - typ = create_node(c, node_type); - }; - - if exp.typ == UNARY_EXPRESSION_TYPE_STAR { - assert((*typ).type == NODE_TYPE_POINTER_TYPE); - let n = (*k).node_type; - typ = (*cast(*NODE_TYPE_POINTER_TYPE_DATA, (*n).data)).type; - let ptr_type = codegen_get_llvm_type(c, typ); - assert(ptr_type != cast(*LLVMTypeRef, null)); - r = LLVMBuildLoad2((*c).builder, *ptr_type, (*k).value, ""); - }; - - return codegen_generate_literal(c, r, name, expression, typ); - }; - - if ((*expression).type == NODE_TYPE_FUNCTION_TYPE) { - let e = *((*c).environment); - assert(e.scope_stack_len == 1); - - let existing = environment_get_variable((*c).environment, name); - if (existing != cast(*Variable, null)) { - return existing; - }; - - let function_type = codegen_get_llvm_type(c, expression); - assert(function_type != cast(*LLVMTypeRef, null)); - let function = LLVMAddFunction((*c).llvm_module, name, *function_type); - let v = Variable{}; - v.value = function; - v.type = cast(LLVMTypeRef, null); - v.stack_level = cast(*i64, null); - v.node = expression; - v.node_type = expression; - return codegen_create_variable(c, v); - }; - - if ((*expression).type == NODE_FUNCTION_CALL_STATEMENT) { - return codegen_generate_function_call_statement(c, expression); - }; - - if ((*expression).type == NODE_CAST_STATEMENT) { - let exp = *cast(*NODE_CAST_STATEMENT_DATA, (*expression).data); - let val = codegen_generate_expression_value(c, exp.expression, ""); - assert(val != cast(*Variable, null)); - let v = Variable{}; - v.value = (*val).value; /* TODO: Do real casting */ - v.type = cast(LLVMTypeRef, null); - v.stack_level = cast(*i64, null); - v.node = expression; - v.node_type = exp.typ; - return codegen_create_variable(c, v); - }; - - if ((*expression).type == NODE_SIZEOF_STATEMENT) { - let exp = *cast(*NODE_SIZEOF_STATEMENT_DATA, (*expression).data); - let typ = codegen_get_llvm_type(c, exp.typ); - assert(typ != cast(*LLVMTypeRef, null)); - let size_in_bytes = LLVMStoreSizeOfType((*c).llvm_target_data, *typ); - let size_val = LLVMConstInt(LLVMInt64Type(), size_in_bytes, 0); - - let node_type = Node{}; - node_type.type = NODE_TYPE_SIMPLE_TYPE; - - let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); - (*d).name = "i64"; - (*d).underlying_type = cast(*Node, null); - node_type.data = cast(*void, d); - - let v = Variable{}; - v.value = size_val; - v.type = cast(LLVMTypeRef, null); - v.stack_level = cast(*i64, null); - v.node = expression; - v.node_type = create_node(c, node_type); - return codegen_create_variable(c, v); - }; - - if ((*expression).type == NODE_TYPE_STRUCT_TYPE) { - let struc_data = *cast(*NODE_TYPE_STRUCT_TYPE_DATA, (*expression).data); - let dd = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); - (*dd).name = name; - (*dd).underlying_type = expression; - let n = Node{}; - n.type = NODE_TYPE_SIMPLE_TYPE; - n.data = cast(*void, dd); - let simple_type_node = create_node(c, n); - - let struc_type = LLVMStructCreateNamed((*c).llvm_context, name); - - if name != cast(*i8, null) { - let v = Variable{}; - v.value = cast(LLVMValueRef, null); - v.type = struc_type; - v.stack_level = cast(*i64, null); - v.node = expression; - v.node_type = simple_type_node; - environment_add_variable((*c).environment, name, codegen_create_variable(c, v)); - }; - - let llvm_types = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef) * 20)); - let i = 0; - while i < struc_data.fields_len { - let field = *(struc_data.fields + cast(**Node, i)); - assert((*field).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER); - let t = (*cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*field).data)).type; - let lt = codegen_get_llvm_type(c, t); - assert(lt != cast(*LLVMTypeRef, null)); - (*(llvm_types + cast(*LLVMTypeRef, i))) = *lt; - i = i + 1; - }; - - LLVMStructSetBody(struc_type, llvm_types, i, 0); - - let v = Variable{}; - v.value = cast(LLVMValueRef, null); - v.type = struc_type; - v.stack_level = cast(*i64, null); - v.node = expression; - v.node_type = simple_type_node; - return codegen_create_variable(c, v); - }; - - if ((*expression).type == NODE_TYPE_SIMPLE_TYPE) { - let simple_type_data = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*expression).data); - let typ = codegen_get_llvm_type(c, simple_type_data.underlying_type); - assert(typ != cast(*LLVMTypeRef, null)); - let v = Variable{}; - v.value = cast(LLVMValueRef, null); - v.type = *typ; - v.stack_level = cast(*i64, null); - v.node = expression; - v.node_type = simple_type_data.underlying_type; - return codegen_create_variable(c, v); - }; - - if ((*expression).type == NODE_STRUCT_INSTANCIATION) { - let struc_data = *cast(*NODE_STRUCT_INSTANCIATION_DATA, (*expression).data); - let v = environment_get_variable((*c).environment, struc_data.typ); - assert(v != cast(*Variable, null)); - return v; - }; - - if ((*expression).type == NODE_FIELD_ACCESS) { - let field_access = *cast(*NODE_FIELD_ACCESS_DATA, (*expression).data); - let x = codegen_get_struct_field(c, field_access.expression, field_access.name); - assert(x != cast(*StructField, null)); - let t = codegen_get_llvm_type(c, (*x).type); - assert(t != cast(*LLVMTypeRef, null)); - let loaded = LLVMBuildLoad2((*c).builder, *t, (*x).value, ""); - let v = Variable{}; - v.value = loaded; - v.type = cast(LLVMTypeRef, null); - v.stack_level = cast(*i64, null); - v.node = expression; - v.node_type = (*x).type; - return codegen_create_variable(c, v); - }; - - printf("ASSERT 1: %d\n", (*expression).type); - assert(false); - - return cast(*Variable, null); -}; - -let codegen_generate_assignment_statement = (c: *codegen, stmt: *NODE_ASSIGNMENT_STATEMENT_DATA) => i64 { - let lhs = *((*stmt).lhs); - let prhs = (*stmt).rhs; - - if (lhs.type == NODE_PRIMARY_EXPRESSION_IDENTIFIER) { - let identifier = (*cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, lhs.data)).name; - let variable = codegen_generate_expression_value(c, prhs, identifier); - assert(variable != cast(*Variable, null)); - - let env = (*(*c).environment); - if env.scope_stack_len == 1 { - environment_add_variable((*c).environment, identifier, variable); - return 0; - }; - - let ptr = cast(LLVMValueRef, null); - let typ = (*variable).node_type; - - if (*stmt).is_declaration { - let x = codegen_get_llvm_type(c, typ); - assert(x != cast(*LLVMTypeRef, null)); - if (*typ).type == NODE_TYPE_FUNCTION_TYPE { - *x = LLVMPointerType(*x, 0); - }; - ptr = LLVMBuildAlloca((*c).builder, *x, identifier); - }; - if !(*stmt).is_declaration { - let v = environment_get_variable((*c).environment, identifier); - assert(v != cast(*Variable, null)); - ptr = (*v).value; - typ = (*v).node_type; - /* TODO: Do this in more places! (everywhere get_llvm_type or get_variable?) Also check types in return and cmp */ - assert(compare_types(c, typ, (*variable).node_type, (*stmt).is_dereference)); - }; - - if (*stmt).is_dereference { - let ltyp = codegen_get_llvm_type(c, typ); - assert(ltyp != cast(*LLVMTypeRef, null)); - ptr = LLVMBuildLoad2((*c).builder, *ltyp, ptr, ""); - }; - - /* NOTE: structs have a null variable.value */ - if (*variable).value != cast(LLVMValueRef, null) { - LLVMBuildStore((*c).builder, (*variable).value, ptr); - }; - - if (*stmt).is_dereference { - let v = environment_get_variable((*c).environment, identifier); - assert(v != cast(*Variable, null)); - ptr = (*v).value; - }; - - let new_variable = Variable{}; - - new_variable.value = ptr; - new_variable.type = (*variable).type; - new_variable.stack_level = cast(*i64, null); - new_variable.node = (*variable).node; - new_variable.node_type = typ; - - if (*stmt).is_declaration { - environment_add_variable((*c).environment, identifier, codegen_create_variable(c, new_variable)); - }; - if !(*stmt).is_declaration { - environment_set_variable((*c).environment, identifier, codegen_create_variable(c, new_variable)); - }; - - return 0; - }; - - if (lhs.type == NODE_UNARY_EXPRESSION) { - let xd = (*cast(*NODE_UNARY_EXPRESSION_DATA, lhs.data)).expression; - let a = codegen_generate_expression_value(c, xd, cast(*i8, null)); - assert(a != cast(*Variable, null)); - let variable = codegen_generate_expression_value(c, prhs, cast(*i8, null)); - assert(variable != cast(*Variable, null)); - assert(compare_types(c, (*a).node_type, (*variable).node_type, true)); - LLVMBuildStore((*c).builder, (*variable).value, (*a).value); - - return 0; - }; - - if (lhs.type == NODE_FIELD_ACCESS) { - let field_access = (*cast(*NODE_FIELD_ACCESS_DATA, lhs.data)); - let xd = field_access.expression; - let name = field_access.name; - - let x = codegen_get_struct_field(c, xd, name); - assert(x != cast(*StructField, null)); - - let variable = codegen_generate_expression_value(c, prhs, cast(*i8, null)); - assert(compare_types(c, (*x).type, (*variable).node_type, (*stmt).is_dereference)); - LLVMBuildStore((*c).builder, (*variable).value, (*x).value); - - return 0; - }; - - printf("ASSERT 2 %d\n", lhs.type); - assert(false); - return 0; -}; - -let codegen_generate_return_statement = (c: *codegen, stmt: *NODE_RETURN_STATEMENT_DATA) => i64 { - let expression = (*stmt).expression; - - if expression == cast(*Node, null) { - LLVMBuildRetVoid((*c).builder); - return 0; - }; - - let val = codegen_generate_expression_value(c, expression, cast(*i8, null)); - assert(val != cast(*Variable, null)); - - assert(compare_types(c, (*c).current_function_retur_type, (*val).node_type, false)); - - LLVMBuildRet((*c).builder, (*val).value); - - return 0; -}; - -let get_function_return_type = (ic: *codegen, fun: *Node) => *Node { - if (*fun).type == NODE_FUNCTION_DEFINITION { - let d = cast(*NODE_FUNCTION_DEFINITION_DATA, (*fun).data); - return (*d).retur_type; - }; - if (*fun).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER { - let d = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*fun).data); - let f = environment_get_variable((*ic).environment, (*d).name); - if f == cast(*Variable, null) { - printf("NO variable 2: %s\n", (*d).name); - assert(false); - }; - let f_type = (*f).node_type; - assert((*f_type).type == NODE_TYPE_FUNCTION_TYPE); - return get_function_return_type(ic, f_type); - }; - if (*fun).type == NODE_TYPE_FUNCTION_TYPE { - let d = cast(*NODE_TYPE_FUNCTION_TYPE_DATA, (*fun).data); - return (*d).retur_type; - }; - assert(false); - return cast(*Node, null); -}; - -let codegen_generate_function_call_statement = (c: *codegen, statement: *Node) => *Variable { - assert((*statement).type == NODE_FUNCTION_CALL_STATEMENT); - let stmt = cast(*NODE_FUNCTION_CALL_STATEMENT_DATA, (*statement).data); - let expression = (*stmt).expression; - - let node = statement; - let function = cast(*Variable, null); - - if (*expression).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER { - let ident = (*cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*expression).data)); - function = environment_get_variable((*c).environment, ident.name); - if function == cast(*Variable, null) { - printf("NO variable 1: %s\n", ident.name); - assert(false); - }; - - if LLVMGetValueKind((*function).value) != LLVMFunctionValueKind { - let lt = codegen_get_llvm_type(c, (*function).node_type); - assert(lt != cast(*LLVMTypeRef, null)); - (*function).value = LLVMBuildLoad2((*c).builder, LLVMPointerType(*lt, 0), (*function).value, ""); - node = (*function).node; - }; - }; - if (*expression).type == NODE_FUNCTION_DEFINITION { - function = codegen_generate_expression_value(c, expression, cast(*i8, null)); - }; - - assert(function != cast(*Variable, null)); - assert((*function).node_type != cast(*Node, null)); - let function_type = (*function).node_type; - assert((*function_type).type == NODE_TYPE_FUNCTION_TYPE); - let function_type_data = cast(*NODE_TYPE_FUNCTION_TYPE_DATA, (*function_type).data); - /* assert((*function_type_data).parameters_len == (*stmt).arguments_len); TODO: Varargs */ - - let arguments = cast(*LLVMValueRef, arena_alloc((*c).arena, sizeof(LLVMValueRef) * (*stmt).arguments_len)); - - let i = 0; - while i < (*stmt).arguments_len { - let argument = (*((*stmt).arguments + cast(**Node, i))); - let arg = codegen_generate_expression_value(c, argument, cast(*i8, null)); - assert(arg != cast(*Variable, null)); - let expected_type = *((*function_type_data).parameters + cast(**Node, i)); /* TODO: If varargs we shouldn't do this */ - - assert(compare_types(c, expected_type, (*arg).node_type, false)); - - (*(arguments + cast(*LLVMValueRef, i))) = (*arg).value; - - i = i + 1; - }; - - let function_type = codegen_get_llvm_type(c, (*function).node_type); - assert(function_type != cast(*LLVMTypeRef, null)); - - let res = LLVMBuildCall2((*c).builder, *function_type, (*function).value, arguments, i, ""); - - let function_return_type = get_function_return_type(c, (*function).node_type); - - let v = Variable{}; - - v.value = res; - v.type = cast(LLVMTypeRef, null); - v.stack_level = cast(*i64, null); - v.node = node; - v.node_type = function_return_type; - - return codegen_create_variable(c, v); -}; - -let codegen_generate_break_statement = (c: *codegen) => i64 { - assert((*c).whil_loop_exit != cast(LLVMBasicBlockRef, null)); - LLVMBuildBr((*c).builder, (*c).whil_loop_exit); - return 0; -}; - -let codegen_generate_continue_statement = (c: *codegen) => i64 { - assert((*c).whil_block != cast(LLVMBasicBlockRef, null)); - LLVMBuildBr((*c).builder, (*c).whil_block); - return 0; -}; - -let codegen_generate_if_statement = (c: *codegen, statement: *NODE_IF_STATEMENT_DATA) => *void { - let condition_value = codegen_generate_expression_value(c, (*statement).condition, cast(*i8, null)); - assert(condition_value != cast(*Variable, null)); - - let current_block = LLVMGetInsertBlock((*c).builder); - - let then_block = LLVMAppendBasicBlock((*c).current_function, "then_block"); - LLVMPositionBuilderAtEnd((*c).builder, then_block); - - let i = 0; - while i < (*statement).statements_len { - let stmt = (*((*statement).statements + cast(**Node, i))); - let res = codegen_generate_statement(c, stmt); - assert(res == 0); - i = i + 1; - }; - - let merge_block = LLVMAppendBasicBlock((*c).current_function, "merge_block"); - let last_instr = LLVMGetLastInstruction(LLVMGetInsertBlock((*c).builder)); - if last_instr == cast(LLVMValueRef, null) { - LLVMBuildBr((*c).builder, merge_block); - }; - if last_instr != cast(LLVMValueRef, null) { - if LLVMIsATerminatorInst(last_instr) == cast(LLVMValueRef, null) { - LLVMBuildBr((*c).builder, merge_block); - }; - }; - LLVMPositionBuilderAtEnd((*c).builder, current_block); - LLVMBuildCondBr((*c).builder, (*condition_value).value, then_block, merge_block); - LLVMPositionBuilderAtEnd((*c).builder, merge_block); - - return null; -}; - -let codegen_generate_while_statement = (c: *codegen, statement: *NODE_WHILE_STATEMENT_DATA) => *void { - let whil_block = LLVMAppendBasicBlock((*c).current_function, "while_block"); - LLVMBuildBr((*c).builder, whil_block); - LLVMPositionBuilderAtEnd((*c).builder, whil_block); - - let condition_value = codegen_generate_expression_value(c, (*statement).condition, cast(*i8, null)); - assert(condition_value != cast(*Variable, null)); - - let inner_block = LLVMAppendBasicBlock((*c).current_function, "inner_block"); - let outer_block = LLVMAppendBasicBlock((*c).current_function, "outer_block"); - LLVMBuildCondBr((*c).builder, (*condition_value).value, inner_block, outer_block); - - (*c).whil_loop_exit = outer_block; - (*c).whil_block = whil_block; - - LLVMPositionBuilderAtEnd((*c).builder, inner_block); - let i = 0; - while i < (*statement).statements_len { - let stmt = (*((*statement).statements + cast(**Node, i))); - let res = codegen_generate_statement(c, stmt); - assert(res == 0); - i = i + 1; - }; - - LLVMBuildBr((*c).builder, whil_block); - LLVMPositionBuilderAtEnd((*c).builder, outer_block); - - (*c).whil_loop_exit = cast(LLVMBasicBlockRef, null); - (*c).whil_block = cast(LLVMBasicBlockRef, null); - - return null; -}; - -extern codegen_generate = (*codegen, *Node) => i64; - -let codegen_generate_import_declaration = (c: *codegen, statement: *NODE_IMPORT_DECLARATION_DATA) => i64 { - return codegen_generate(c, (*statement).program); -}; - -let codegen_generate_statement = (c: *codegen, statement: *Node) => i64 { - let stmt = *statement; - - if stmt.type == NODE_ASSIGNMENT_STATEMENT { - return codegen_generate_assignment_statement(c, cast(*NODE_ASSIGNMENT_STATEMENT_DATA, stmt.data)); - }; - - if stmt.type == NODE_RETURN_STATEMENT { - return codegen_generate_return_statement(c, cast(*NODE_RETURN_STATEMENT_DATA, stmt.data)); - }; - - if stmt.type == NODE_FUNCTION_CALL_STATEMENT { - codegen_generate_function_call_statement(c, statement); - return 0; - }; - - if stmt.type == NODE_IF_STATEMENT { - codegen_generate_if_statement(c, cast(*NODE_IF_STATEMENT_DATA, stmt.data)); - return 0; - }; - - if stmt.type == NODE_WHILE_STATEMENT { - codegen_generate_while_statement(c, cast(*NODE_WHILE_STATEMENT_DATA, stmt.data)); - return 0; - }; - - if stmt.type == NODE_IMPORT_DECLARATION { - return codegen_generate_import_declaration(c, cast(*NODE_IMPORT_DECLARATION_DATA, stmt.data)); - }; - - if stmt.type == NODE_CONTINUE_STATEMENT { - return codegen_generate_continue_statement(c); - }; - - if stmt.type == NODE_BREAK_STATEMENT { - return codegen_generate_break_statement(c); - }; - - printf("ASSERT 3 %d\n", stmt.type); - assert(false); - - return 0; -}; - -let codegen_generate = (c: *codegen, ast: *Node) => i64 { - assert((*ast).type == NODE_PROGRAM); - - let program = *cast(*NODE_PROGRAM_DATA, (*ast).data); - - let i = 0; - while i < program.statements_len { - let stmt = *(program.statements + cast(**Node, i)); - - let res = codegen_generate_statement(c, stmt); - if res != 0 { - return 1; - }; - - i = i + 1; - }; - - return 0; -}; - -let codegen_compile = (c: *codegen, dump_ir: bool) => i64 { - /* Dump module */ - LLVMDumpModule((*c).llvm_module); - let message = cast(**i8, null); - - if dump_ir { - LLVMPrintModuleToFile((*c).llvm_module, "output.ll", message); - return 0; - }; - - /* Generate code */ - let triple = LLVMGetDefaultTargetTriple(); - let target_ref = cast(*LLVMTargetRef, arena_alloc((*c).arena, sizeof(*LLVMTargetRef))); - let result = LLVMGetTargetFromTriple(triple, target_ref, message); - if result != 0 { - printf("Target output: %s\n", *message); - LLVMDisposeMessage(*message); - }; - let target_machine = LLVMCreateTargetMachine( - *target_ref, - triple, - "", - "", - LLVMCodeGenLevelDefault, - LLVMRelocDefault, - LLVMCodeModelDefault, - ); - LLVMDisposeMessage(triple); - result = LLVMVerifyModule((*c).llvm_module, LLVMAbortProcessAction, message); - if result != 0 { - printf("Verification output: %s\n", *message); - LLVMDisposeMessage(*message); - }; - - /* Generate the object file */ - let filename = "bootstrap_output.o"; - LLVMTargetMachineEmitToFile( - target_machine, - (*c).llvm_module, - filename, - LLVMObjectFile, - cast(**i8, null), - ); - LLVMDisposeTargetMachine(target_machine); - printf("Object file generated: %s\n", filename); - - return 0; -}; - -let codegen_deinit = (c: *codegen) => void { - LLVMDisposeModule((*c).llvm_module); - LLVMShutdown(); - LLVMDisposeBuilder((*c).builder); - return; -}; diff --git a/src/bootstrap/llvm.pry b/src/bootstrap/llvm.pry deleted file mode 100644 index 2feb815..0000000 --- a/src/bootstrap/llvm.pry +++ /dev/null @@ -1,353 +0,0 @@ -extern LLVMInitializeAArch64TargetInfo = () => void; -extern LLVMInitializeAMDGPUTargetInfo = () => void; -extern LLVMInitializeARMTargetInfo = () => void; -extern LLVMInitializeAVRTargetInfo = () => void; -extern LLVMInitializeBPFTargetInfo = () => void; -extern LLVMInitializeHexagonTargetInfo = () => void; -extern LLVMInitializeLanaiTargetInfo = () => void; -extern LLVMInitializeLoongArchTargetInfo = () => void; -extern LLVMInitializeMipsTargetInfo = () => void; -extern LLVMInitializeMSP430TargetInfo = () => void; -extern LLVMInitializeNVPTXTargetInfo = () => void; -extern LLVMInitializePowerPCTargetInfo = () => void; -extern LLVMInitializeRISCVTargetInfo = () => void; -extern LLVMInitializeSparcTargetInfo = () => void; -extern LLVMInitializeSystemZTargetInfo = () => void; -extern LLVMInitializeVETargetInfo = () => void; -extern LLVMInitializeWebAssemblyTargetInfo = () => void; -extern LLVMInitializeX86TargetInfo = () => void; -extern LLVMInitializeXCoreTargetInfo = () => void; - -let LLVMInitializeAllTargetInfos = () => void { - LLVMInitializeAArch64TargetInfo(); - LLVMInitializeAMDGPUTargetInfo(); - LLVMInitializeARMTargetInfo(); - LLVMInitializeAVRTargetInfo(); - LLVMInitializeBPFTargetInfo(); - LLVMInitializeHexagonTargetInfo(); - LLVMInitializeLanaiTargetInfo(); - LLVMInitializeLoongArchTargetInfo(); - LLVMInitializeMipsTargetInfo(); - LLVMInitializeMSP430TargetInfo(); - LLVMInitializeNVPTXTargetInfo(); - LLVMInitializePowerPCTargetInfo(); - LLVMInitializeRISCVTargetInfo(); - LLVMInitializeSparcTargetInfo(); - LLVMInitializeSystemZTargetInfo(); - LLVMInitializeVETargetInfo(); - LLVMInitializeWebAssemblyTargetInfo(); - LLVMInitializeX86TargetInfo(); - LLVMInitializeXCoreTargetInfo(); - return; -}; - -extern LLVMInitializeAArch64Target = () => void; -extern LLVMInitializeAMDGPUTarget = () => void; -extern LLVMInitializeARMTarget = () => void; -extern LLVMInitializeAVRTarget = () => void; -extern LLVMInitializeBPFTarget = () => void; -extern LLVMInitializeHexagonTarget = () => void; -extern LLVMInitializeLanaiTarget = () => void; -extern LLVMInitializeLoongArchTarget = () => void; -extern LLVMInitializeMipsTarget = () => void; -extern LLVMInitializeMSP430Target = () => void; -extern LLVMInitializeNVPTXTarget = () => void; -extern LLVMInitializePowerPCTarget = () => void; -extern LLVMInitializeRISCVTarget = () => void; -extern LLVMInitializeSparcTarget = () => void; -extern LLVMInitializeSystemZTarget = () => void; -extern LLVMInitializeVETarget = () => void; -extern LLVMInitializeWebAssemblyTarget = () => void; -extern LLVMInitializeX86Target = () => void; -extern LLVMInitializeXCoreTarget = () => void; - -let LLVMInitializeAllTargets = () => void { - LLVMInitializeAArch64Target(); - LLVMInitializeAMDGPUTarget(); - LLVMInitializeARMTarget(); - LLVMInitializeAVRTarget(); - LLVMInitializeBPFTarget(); - LLVMInitializeHexagonTarget(); - LLVMInitializeLanaiTarget(); - LLVMInitializeLoongArchTarget(); - LLVMInitializeMipsTarget(); - LLVMInitializeMSP430Target(); - LLVMInitializeNVPTXTarget(); - LLVMInitializePowerPCTarget(); - LLVMInitializeRISCVTarget(); - LLVMInitializeSparcTarget(); - LLVMInitializeSystemZTarget(); - LLVMInitializeVETarget(); - LLVMInitializeWebAssemblyTarget(); - LLVMInitializeX86Target(); - LLVMInitializeXCoreTarget(); - return; -}; - -extern LLVMInitializeAArch64TargetMC = () => void; -extern LLVMInitializeAMDGPUTargetMC = () => void; -extern LLVMInitializeARMTargetMC = () => void; -extern LLVMInitializeAVRTargetMC = () => void; -extern LLVMInitializeBPFTargetMC = () => void; -extern LLVMInitializeHexagonTargetMC = () => void; -extern LLVMInitializeLanaiTargetMC = () => void; -extern LLVMInitializeLoongArchTargetMC = () => void; -extern LLVMInitializeMipsTargetMC = () => void; -extern LLVMInitializeMSP430TargetMC = () => void; -extern LLVMInitializeNVPTXTargetMC = () => void; -extern LLVMInitializePowerPCTargetMC = () => void; -extern LLVMInitializeRISCVTargetMC = () => void; -extern LLVMInitializeSparcTargetMC = () => void; -extern LLVMInitializeSystemZTargetMC = () => void; -extern LLVMInitializeVETargetMC = () => void; -extern LLVMInitializeWebAssemblyTargetMC = () => void; -extern LLVMInitializeX86TargetMC = () => void; -extern LLVMInitializeXCoreTargetMC = () => void; - -let LLVMInitializeAllTargetMCs = () => void { - LLVMInitializeAArch64TargetMC(); - LLVMInitializeAMDGPUTargetMC(); - LLVMInitializeARMTargetMC(); - LLVMInitializeAVRTargetMC(); - LLVMInitializeBPFTargetMC(); - LLVMInitializeHexagonTargetMC(); - LLVMInitializeLanaiTargetMC(); - LLVMInitializeLoongArchTargetMC(); - LLVMInitializeMipsTargetMC(); - LLVMInitializeMSP430TargetMC(); - LLVMInitializeNVPTXTargetMC(); - LLVMInitializePowerPCTargetMC(); - LLVMInitializeRISCVTargetMC(); - LLVMInitializeSparcTargetMC(); - LLVMInitializeSystemZTargetMC(); - LLVMInitializeVETargetMC(); - LLVMInitializeWebAssemblyTargetMC(); - LLVMInitializeX86TargetMC(); - LLVMInitializeXCoreTargetMC(); - return; -}; - -extern LLVMInitializeAArch64AsmPrinter = () => void; -extern LLVMInitializeAMDGPUAsmPrinter = () => void; -extern LLVMInitializeARMAsmPrinter = () => void; -extern LLVMInitializeAVRAsmPrinter = () => void; -extern LLVMInitializeBPFAsmPrinter = () => void; -extern LLVMInitializeHexagonAsmPrinter = () => void; -extern LLVMInitializeLanaiAsmPrinter = () => void; -extern LLVMInitializeLoongArchAsmPrinter = () => void; -extern LLVMInitializeMipsAsmPrinter = () => void; -extern LLVMInitializeMSP430AsmPrinter = () => void; -extern LLVMInitializeNVPTXAsmPrinter = () => void; -extern LLVMInitializePowerPCAsmPrinter = () => void; -extern LLVMInitializeRISCVAsmPrinter = () => void; -extern LLVMInitializeSparcAsmPrinter = () => void; -extern LLVMInitializeSystemZAsmPrinter = () => void; -extern LLVMInitializeVEAsmPrinter = () => void; -extern LLVMInitializeWebAssemblyAsmPrinter = () => void; -extern LLVMInitializeX86AsmPrinter = () => void; -extern LLVMInitializeXCoreAsmPrinter = () => void; - -let LLVMInitializeAllAsmPrinters = () => void { - LLVMInitializeAArch64AsmPrinter(); - LLVMInitializeAMDGPUAsmPrinter(); - LLVMInitializeARMAsmPrinter(); - LLVMInitializeAVRAsmPrinter(); - LLVMInitializeBPFAsmPrinter(); - LLVMInitializeHexagonAsmPrinter(); - LLVMInitializeLanaiAsmPrinter(); - LLVMInitializeLoongArchAsmPrinter(); - LLVMInitializeMipsAsmPrinter(); - LLVMInitializeMSP430AsmPrinter(); - LLVMInitializeNVPTXAsmPrinter(); - LLVMInitializePowerPCAsmPrinter(); - LLVMInitializeRISCVAsmPrinter(); - LLVMInitializeSparcAsmPrinter(); - LLVMInitializeSystemZAsmPrinter(); - LLVMInitializeVEAsmPrinter(); - LLVMInitializeWebAssemblyAsmPrinter(); - LLVMInitializeX86AsmPrinter(); - LLVMInitializeXCoreAsmPrinter(); - return; -}; - -extern LLVMInitializeAArch64AsmParser = () => void; -extern LLVMInitializeAMDGPUAsmParser = () => void; -extern LLVMInitializeARMAsmParser = () => void; -extern LLVMInitializeAVRAsmParser = () => void; -extern LLVMInitializeBPFAsmParser = () => void; -extern LLVMInitializeHexagonAsmParser = () => void; -extern LLVMInitializeLanaiAsmParser = () => void; -extern LLVMInitializeLoongArchAsmParser = () => void; -extern LLVMInitializeMipsAsmParser = () => void; -extern LLVMInitializeMSP430AsmParser = () => void; -extern LLVMInitializePowerPCAsmParser = () => void; -extern LLVMInitializeRISCVAsmParser = () => void; -extern LLVMInitializeSparcAsmParser = () => void; -extern LLVMInitializeSystemZAsmParser = () => void; -extern LLVMInitializeVEAsmParser = () => void; -extern LLVMInitializeWebAssemblyAsmParser = () => void; -extern LLVMInitializeX86AsmParser = () => void; - -let LLVMInitializeAllAsmParsers = () => void { - LLVMInitializeAArch64AsmParser(); - LLVMInitializeAMDGPUAsmParser(); - LLVMInitializeARMAsmParser(); - LLVMInitializeAVRAsmParser(); - LLVMInitializeBPFAsmParser(); - LLVMInitializeHexagonAsmParser(); - LLVMInitializeLanaiAsmParser(); - LLVMInitializeLoongArchAsmParser(); - LLVMInitializeMipsAsmParser(); - LLVMInitializeMSP430AsmParser(); - LLVMInitializePowerPCAsmParser(); - LLVMInitializeRISCVAsmParser(); - LLVMInitializeSparcAsmParser(); - LLVMInitializeSystemZAsmParser(); - LLVMInitializeVEAsmParser(); - LLVMInitializeWebAssemblyAsmParser(); - LLVMInitializeX86AsmParser(); - return; -}; - -extern LLVMInitializeAArch64Disassembler = () => void; -extern LLVMInitializeAMDGPUDisassembler = () => void; -extern LLVMInitializeARMDisassembler = () => void; -extern LLVMInitializeAVRDisassembler = () => void; -extern LLVMInitializeBPFDisassembler = () => void; -extern LLVMInitializeHexagonDisassembler = () => void; -extern LLVMInitializeLanaiDisassembler = () => void; -extern LLVMInitializeLoongArchDisassembler = () => void; -extern LLVMInitializeMipsDisassembler = () => void; -extern LLVMInitializeMSP430Disassembler = () => void; -extern LLVMInitializePowerPCDisassembler = () => void; -extern LLVMInitializeRISCVDisassembler = () => void; -extern LLVMInitializeSparcDisassembler = () => void; -extern LLVMInitializeSystemZDisassembler = () => void; -extern LLVMInitializeVEDisassembler = () => void; -extern LLVMInitializeWebAssemblyDisassembler = () => void; -extern LLVMInitializeX86Disassembler = () => void; -extern LLVMInitializeXCoreDisassembler = () => void; - -let LLVMInitializeAllDisassemblers = () => void { - LLVMInitializeAArch64Disassembler(); - LLVMInitializeAMDGPUDisassembler(); - LLVMInitializeARMDisassembler(); - LLVMInitializeAVRDisassembler(); - LLVMInitializeBPFDisassembler(); - LLVMInitializeHexagonDisassembler(); - LLVMInitializeLanaiDisassembler(); - LLVMInitializeLoongArchDisassembler(); - LLVMInitializeMipsDisassembler(); - LLVMInitializeMSP430Disassembler(); - LLVMInitializePowerPCDisassembler(); - LLVMInitializeRISCVDisassembler(); - LLVMInitializeSparcDisassembler(); - LLVMInitializeSystemZDisassembler(); - LLVMInitializeVEDisassembler(); - LLVMInitializeWebAssemblyDisassembler(); - LLVMInitializeX86Disassembler(); - LLVMInitializeXCoreDisassembler(); - return; -}; - -let LLVMBuilderRef = newtype *void; -let LLVMModuleRef = newtype *void; -let LLVMTargetDataRef = newtype *void; -let LLVMTargetMachineRef = newtype *void; -let LLVMContextRef = newtype *void; -let LLVMTargetRef = newtype *void; -let LLVMIntPredicate = newtype i64; - -let LLVMValueRef = newtype *void; -let LLVMValueKind = newtype i64; -let LLVMTypeRef = newtype *void; -let LLVMBasicBlockRef = newtype *void; - -extern LLVMGetModuleDataLayout = (LLVMModuleRef) => LLVMTargetDataRef; -extern LLVMConstInt = (LLVMTypeRef, i64, i64) => LLVMValueRef; -extern LLVMConstNull = (LLVMTypeRef) => LLVMValueRef; -extern LLVMInt64Type = () => LLVMTypeRef; -extern LLVMInt32Type = () => LLVMTypeRef; -extern LLVMInt1Type = () => LLVMTypeRef; -extern LLVMInt8Type = () => LLVMTypeRef; -extern LLVMVoidType = () => LLVMTypeRef; - -extern LLVMModuleCreateWithName = (*i8) => LLVMModuleRef; -extern LLVMGetGlobalContext = () => LLVMContextRef; -extern LLVMCreateBuilder = () => LLVMBuilderRef; -extern LLVMDisposeModule = (LLVMModuleRef) => void; -extern LLVMShutdown = () => void; -extern LLVMDisposeBuilder = (LLVMBuilderRef) => void; - -extern LLVMGetInsertBlock = (LLVMBuilderRef) => LLVMBasicBlockRef; - -extern LLVMDumpModule = (LLVMModuleRef) => void; -extern LLVMPrintModuleToFile = (LLVMModuleRef, *i8, **i8) => i64; -extern LLVMGetDefaultTargetTriple = () => *i8; -extern LLVMGetTargetFromTriple = (*i8, *LLVMTargetRef, **i8) => i64; -extern LLVMDisposeMessage = (*i8) => void; -extern LLVMCreateTargetMachine = (LLVMTargetRef, *i8, *i8, *i8, i64, i64, i64) => LLVMTargetMachineRef; -extern LLVMDisposeTargetMachine = (LLVMTargetMachineRef) => void; - -let LLVMCodeGenLevelDefault = 2; -let LLVMRelocDefault = 0; -let LLVMCodeModelDefault = 0; - -extern LLVMVerifyModule = (LLVMModuleRef, i64, **i8) => i64; - -let LLVMAbortProcessAction = 0; - -extern LLVMTargetMachineEmitToFile = (LLVMTargetMachineRef, LLVMModuleRef, *i8, i64, **i8) => i64; - -let LLVMObjectFile = 1; - -extern LLVMFunctionType = (LLVMTypeRef, *LLVMTypeRef, i64, i64) => LLVMTypeRef; -extern LLVMAddFunction = (LLVMModuleRef, *i8, LLVMTypeRef) => LLVMValueRef; -extern LLVMAppendBasicBlock = (LLVMValueRef, *i8) => LLVMBasicBlockRef; -extern LLVMPositionBuilderAtEnd = (LLVMBuilderRef, LLVMBasicBlockRef) => void; - -extern LLVMGetParams = (LLVMValueRef, *LLVMValueRef) => void; - -extern LLVMBuildRetVoid = (LLVMBuilderRef) => void; -extern LLVMBuildRet = (LLVMBuilderRef, LLVMValueRef) => void; -extern LLVMPointerType = (LLVMTypeRef, i64) => LLVMTypeRef; - -extern LLVMBuildCall2 = (LLVMBuilderRef, LLVMTypeRef, LLVMValueRef, *LLVMValueRef, i64, *i8) => LLVMValueRef; -extern LLVMBuildGlobalStringPtr = (LLVMBuilderRef, *i8, *i8) => LLVMValueRef; -extern LLVMBuildAlloca = (LLVMBuilderRef, LLVMTypeRef, *i8) => LLVMValueRef; -extern LLVMBuildStore = (LLVMBuilderRef, LLVMValueRef, LLVMValueRef) => LLVMValueRef; -extern LLVMBuildLoad2 = (LLVMBuilderRef, LLVMTypeRef, LLVMValueRef, *i8) => LLVMValueRef; -extern LLVMGetLastInstruction = (LLVMBasicBlockRef) => LLVMValueRef; - -extern LLVMBuildBr = (LLVMBuilderRef, LLVMBasicBlockRef) => LLVMValueRef; -extern LLVMIsATerminatorInst = (LLVMValueRef) => LLVMValueRef; -extern LLVMBuildCondBr = (LLVMBuilderRef, LLVMValueRef, LLVMBasicBlockRef, LLVMBasicBlockRef) => LLVMValueRef; -extern LLVMBuildICmp = (LLVMBuilderRef, LLVMIntPredicate, LLVMValueRef, LLVMValueRef, *i8) => LLVMValueRef; -extern LLVMBuildNeg = (LLVMBuilderRef, LLVMValueRef, *i8) => LLVMValueRef; -extern LLVMBuildSub = (LLVMBuilderRef, LLVMValueRef, LLVMValueRef, *i8) => LLVMValueRef; -extern LLVMBuildAdd = (LLVMBuilderRef, LLVMValueRef, LLVMValueRef, *i8) => LLVMValueRef; -extern LLVMBuildMul = (LLVMBuilderRef, LLVMValueRef, LLVMValueRef, *i8) => LLVMValueRef; -extern LLVMBuildSDiv = (LLVMBuilderRef, LLVMValueRef, LLVMValueRef, *i8) => LLVMValueRef; -extern LLVMBuildSRem = (LLVMBuilderRef, LLVMValueRef, LLVMValueRef, *i8) => LLVMValueRef; - -extern LLVMBuildGEP2 = (LLVMBuilderRef, LLVMTypeRef, LLVMValueRef, *LLVMValueRef, i64, *i8) => LLVMValueRef; -extern LLVMAddGlobal = (LLVMModuleRef, LLVMTypeRef, *i8) => LLVMValueRef; -extern LLVMSetInitializer = (LLVMValueRef, LLVMValueRef) => void; -extern LLVMGetValueKind = (LLVMValueRef) => LLVMValueKind; -let LLVMFunctionValueKind = cast(LLVMValueKind, 5); - -let LLVMIntEQ = 32; -let LLVMIntNE = 33; -let LLVMIntUGT = 34; -let LLVMIntUGE = 35; -let LLVMIntULT = 36; -let LLVMIntULE = 37; -let LLVMIntSGT = 38; -let LLVMIntSGE = 39; -let LLVMIntSLT = 40; -let LLVMIntSLE = 41; - -extern LLVMStoreSizeOfType = (LLVMTargetDataRef, LLVMTypeRef) => i64; -extern LLVMStructCreateNamed = (LLVMContextRef, *i8) => LLVMTypeRef; -extern LLVMStructSetBody = (LLVMTypeRef, *LLVMTypeRef, i64, i64) => void; - diff --git a/src/bootstrap/main.pry b/src/bootstrap/main.pry deleted file mode 100644 index a564965..0000000 --- a/src/bootstrap/main.pry +++ /dev/null @@ -1,80 +0,0 @@ -import "!stdlib.pry"; -import "!mem.pry"; - -let slice = struct { - data: *void, - data_len: i64, -}; - -import "tokenizer.pry"; -import "parser.pry"; -import "codegen.pry"; - -let read_file = (filename: *i8, alloc: *arena) => slice { - let file = fopen(filename, "r"); - - fseek(file, 0, 2); - let file_size = ftell(file); - fseek(file, 0, 0); - - let buf = cast(*i8, arena_alloc(alloc, file_size + 1)); - - let bytes_read = fread(buf, 1, file_size, file); - (*(buf + cast(*i8, bytes_read))) = '\0'; - - fclose(file); - - let sl = slice{}; - sl.data = cast(*void, buf); - sl.data_len = file_size; - return sl; -}; - -let main = (argc: i64, argv: **i8) => i64 { - if argc < 2 { - printf("Need filename!\n"); - return 1; - }; - - let generate_ir = false; - let filename = cast(*i8, null); - - let i = 0; - while i < (argc - 1) { - i = i + 1; - let arg = *(argv + cast(**i8, i)); - - if strcmp(arg, "--generate-ir") { - generate_ir = true; - continue; - }; - - if filename == cast(*i8, null) { - filename = arg; - continue; - }; - - assert(false); - }; - - printf("%s\n", filename); - - let alloc = arena_init(1024 * 1024 * 1024); - - let file = read_file(filename, alloc); - - let t = tokenizer_init(alloc, file); - let ts = tokenizer_tokenize(t); - - let p = parser_init(cast(*token, ts.data), ts.data_len, alloc, filename); - let ast = parse(p); - - let c = codegen_init(alloc); - let res = codegen_generate(c, ast); - let res = codegen_compile(c, generate_ir); - codegen_deinit(c); - - arena_free(alloc); - - return 0; -}; diff --git a/src/bootstrap/parser.pry b/src/bootstrap/parser.pry deleted file mode 100644 index 0b448d0..0000000 --- a/src/bootstrap/parser.pry +++ /dev/null @@ -1,1456 +0,0 @@ -import "tokenizer.pry"; - -extern fopen = (*i8, *i8) => *i8; -extern fgets = (*i8, i64, *i8) => void; -extern feof = (*i8) => bool; -extern fseek = (*i8, i64, i64) => i64; -extern lseek = (i64, i64, i64) => i64; -extern ftell = (*i8) => i64; -extern fread = (*i8, i64, i64, *i8) => i64; -extern fclose = (*i8) => *i8; - -extern strcpy = (*i8, *i8) => *i8; -extern dirname = (*i8) => *i8; -extern open = (*i8, i64) => i64; -extern openat = (i64, *i8, i64) => i64; -extern read = (i64, *i8, i64) => i64; -extern realpath = (*i8, *i8) => *i8; -extern snprintf = (*i8, i64, *i8, *i8, *i8) => i64; -extern strcpy = (*i8, *i8) => *i8; -extern strlen = (*i8) => i64; - -let Node = struct { - type: i64, - data: *void, -}; - -let NODE_PROGRAM = 1; -let NODE_STATEMENT = 2; -let NODE_ASSIGNMENT_STATEMENT = 3; -let NODE_IMPORT_DECLARATION = 4; -let NODE_FUNCTION_CALL_STATEMENT = 5; -let NODE_IF_STATEMENT = 6; -let NODE_WHILE_STATEMENT = 7; -let NODE_EQUALITY_EXPRESSION = 8; -let NODE_ADDITIVE_EXPRESSION = 9; -let NODE_MULTIPLICATIVE_EXPRESSION = 10; -let NODE_UNARY_EXPRESSION = 11; -let NODE_POSTFIX_EXPRESSION = 12; -let NODE_PRIMARY_EXPRESSION_NUMBER = 13; -let NODE_PRIMARY_EXPRESSION_BOOLEAN = 14; -let NODE_PRIMARY_EXPRESSION_NULL = 15; -let NODE_PRIMARY_EXPRESSION_CHAR = 16; -let NODE_PRIMARY_EXPRESSION_STRING = 17; -let NODE_PRIMARY_EXPRESSION_IDENTIFIER = 18; -let NODE_FUNCTION_DEFINITION = 19; -let NODE_STRUCT_INSTANCIATION = 20; -let NODE_FIELD_ACCESS = 21; -let NODE_TYPE_SIMPLE_TYPE = 22; -let NODE_TYPE_FUNCTION_TYPE = 23; -let NODE_TYPE_POINTER_TYPE = 24; -let NODE_TYPE_STRUCT_TYPE = 25; -let NODE_RETURN_STATEMENT = 26; -let NODE_CAST_STATEMENT = 27; -let NODE_SIZEOF_STATEMENT = 28; -let NODE_BREAK_STATEMENT = 29; -let NODE_CONTINUE_STATEMENT = 30; - -let EQUALITY_EXPRESSION_TYPE_EQ = 0; -let EQUALITY_EXPRESSION_TYPE_NE = 1; -let EQUALITY_EXPRESSION_TYPE_GE = 2; -let EQUALITY_EXPRESSION_TYPE_LE = 3; -let EQUALITY_EXPRESSION_TYPE_LT = 4; -let EQUALITY_EXPRESSION_TYPE_GT = 5; - -let MULTIPLICATIVE_EXPRESSION_TYPE_MUL = 0; -let MULTIPLICATIVE_EXPRESSION_TYPE_DIV = 1; -let MULTIPLICATIVE_EXPRESSION_TYPE_MOD = 2; - -let UNARY_EXPRESSION_TYPE_NOT = 0; -let UNARY_EXPRESSION_TYPE_MINUS = 1; -let UNARY_EXPRESSION_TYPE_STAR = 2; - -let NODE_PROGRAM_DATA = struct { - statements: **Node, - statements_len: i64, -}; - -let NODE_STATEMENT_DATA = struct { - statement: *Node, -}; - -let NODE_ASSIGNMENT_STATEMENT_DATA = struct { - is_declaration: bool, - is_dereference: bool, - lhs: *Node, - rhs: *Node, -}; - -let NODE_IMPORT_DECLARATION_DATA = struct { - filename: *i8, - program: *Node, -}; - -let NODE_FUNCTION_CALL_STATEMENT_DATA = struct { - expression: *Node, - arguments: **Node, - arguments_len: i64, -}; - -let NODE_IF_STATEMENT_DATA = struct { - condition: *Node, - statements: **Node, - statements_len: i64, -}; - -let NODE_WHILE_STATEMENT_DATA = struct { - condition: *Node, - statements: **Node, - statements_len: i64, -}; - -let NODE_EQUALITY_EXPRESSION_DATA = struct { - lhs: *Node, - rhs: *Node, - typ: i64, -}; - -let NODE_ADDITIVE_EXPRESSION_DATA = struct { - addition: bool, - lhs: *Node, - rhs: *Node, -}; - -let NODE_MULTIPLICATIVE_EXPRESSION_DATA = struct { - lhs: *Node, - rhs: *Node, - typ: i64, -}; - -let NODE_UNARY_EXPRESSION_DATA = struct { - typ: i64, - expression: *Node, -}; - -let NODE_POSTFIX_EXPRESSION_DATA = struct { - lhs: *Node, - rhs: *Node, -}; - -let NODE_PRIMARY_EXPRESSION_NUMBER_DATA = struct { - value: i64, -}; - -let NODE_PRIMARY_EXPRESSION_BOOLEAN_DATA = struct { - value: bool, -}; - -let NODE_PRIMARY_EXPRESSION_CHAR_DATA = struct { - value: i8, -}; - -let NODE_PRIMARY_EXPRESSION_STRING_DATA = struct { - value: *i8, -}; - -let NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA = struct { - name: *i8, - type: *Node, -}; - -let NODE_FUNCTION_DEFINITION_DATA = struct { - statements: **Node, - statements_len: i64, - parameters: **Node, - parameters_len: i64, - retur_type: *Node, -}; - -let NODE_STRUCT_INSTANCIATION_DATA = struct { - typ: *i8, -}; - -let NODE_FIELD_ACCESS_DATA = struct { - expression: *Node, - name: *i8, -}; - -let NODE_TYPE_SIMPLE_TYPE_DATA = struct { - name: *i8, - underlying_type: *Node, -}; - -let NODE_TYPE_FUNCTION_TYPE_DATA = struct { - parameters: **Node, - parameters_len: i64, - retur_type: *Node, -}; - -let NODE_TYPE_POINTER_TYPE_DATA = struct { - type: *Node, -}; - -let NODE_TYPE_STRUCT_TYPE_DATA = struct { - fields: **Node, - fields_len: i64, -}; - -let NODE_RETURN_STATEMENT_DATA = struct { - expression: *Node, -}; - -let NODE_CAST_STATEMENT_DATA = struct { - typ: *Node, - expression: *Node, -}; - -let NODE_SIZEOF_STATEMENT_DATA = struct { - typ: *Node, -}; - - -let parser = struct { - tokens: *token, - tokens_len: i64, - - offset: i64, - - arena: *arena, - filename: *i8, -}; - -extern parser_parse_statement = (*parser) => *Node; -extern parser_parse_expression = (*parser) => *Node; -extern parse_function_call_statement = (*parser) => *Node; -extern parser_parse_additive_expression = (*parser) => *Node; - -let parser_init = (ts: *token, ts_len: i64, ar: *arena, filename: *i8) => *parser { - let p = cast(*parser, arena_alloc(ar, sizeof(parser))); - - (*p).tokens = ts; - (*p).tokens_len = ts_len; - (*p).offset = 0; - (*p).arena = ar; - (*p).filename = filename; - - return p; -}; - -let create_node = (p: *parser, n: Node) => *Node { - let res = cast(*Node, arena_alloc((*p).arena, sizeof(Node))); - *res = n; - return res; -}; - -let parser_peek_token = (p: *parser) => *token { - if (*p).offset >= (*p).tokens_len { - return cast(*token, null); - }; - - return ((*p).tokens + cast(*token, (*p).offset)); -}; - - let parser_consume_token = (p: *parser) => *token { - if (*p).offset >= (*p).tokens_len { - return cast(*token, null); - }; - - let t = ((*p).tokens + cast(*token, (*p).offset)); - (*p).offset = (*p).offset + 1; - return t; -}; - -let parser_accept_token = (p: *parser, t: i64) => *token { - let curr_token = parser_peek_token(p); - if curr_token == cast(*token, null) { - return cast(*token, null); - }; - - if (*curr_token).type == t { - return parser_consume_token(p); - }; - return cast(*token, null); -}; - -let parser_accept_parse = (p: *parser, parsing_func: (*parser) => *Node) => *Node { - let prev_offset = (*p).offset; - let node = parsing_func(p); - if node == cast(*Node, null) { - (*p).offset = prev_offset; - }; - return node; -}; - -/* ReturnStatement ::= RETURN (Expression)? */ -let parser_parse_return_statement = (p: *parser) => *Node { - if parser_accept_token(p, TOKEN_RETURN) == cast(*token, null) { - return cast(*Node, null); - }; - - let maybe_expr = parser_accept_parse(p, parser_parse_expression); - - let d = cast(*NODE_RETURN_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_RETURN_STATEMENT_DATA ))); - (*d).expression = maybe_expr; - - let r = Node{}; - r.type = NODE_RETURN_STATEMENT; - r.data = cast(*void, d); - - return create_node(p, r); -}; - -extern parser_parse_type = (*parser) => *Node; - -/* CastStatement ::= "cast" LPAREN TYPE "," Expression RPAREN */ -let parser_parse_cast_statement = (p: *parser) => *Node { - let ident = parser_accept_token(p, TOKEN_IDENTIFIER); - if ident == cast(*token, null) { - return cast(*Node, null); - }; - - if !strcmp(cast(*i8, (*ident).data), "cast") { - return cast(*Node, null); - }; - - if parser_accept_token(p, TOKEN_LPAREN) == cast(*token, null) { - return cast(*Node, null); - }; - - let typ = parser_parse_type(p); - if typ == cast(*Node, null) { - return cast(*Node, null); - }; - - if parser_accept_token(p, TOKEN_COMMA) == cast(*token, null) { - return cast(*Node, null); - }; - - let expression = parser_parse_expression(p); - if expression == cast(*Node, null) { - return cast(*Node, null); - }; - - if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) { - return cast(*Node, null); - }; - - let d = cast(*NODE_CAST_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_CAST_STATEMENT_DATA ))); - (*d).typ = typ; - (*d).expression = expression; - - let r = Node{}; - r.type = NODE_CAST_STATEMENT; - r.data = cast(*void, d); - - return create_node(p, r); -}; - -/* SizeOfStatement ::= "sizeof" LPAREN TYPE RPAREN */ -let parser_parse_sizeof_statement = (p: *parser) => *Node { - let ident = parser_accept_token(p, TOKEN_IDENTIFIER); - if ident == cast(*token, null) { - return cast(*Node, null); - }; - - if !strcmp(cast(*i8, (*ident).data), "sizeof") { - return cast(*Node, null); - }; - - if parser_accept_token(p, TOKEN_LPAREN) == cast(*token, null) { - return cast(*Node, null); - }; - - let typ = parser_parse_type(p); - if typ == cast(*Node, null) { - return cast(*Node, null); - }; - - if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) { - return cast(*Node, null); - }; - - let d = cast(*NODE_SIZEOF_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_SIZEOF_STATEMENT_DATA ))); - (*d).typ = typ; - - let r = Node{}; - r.type = NODE_SIZEOF_STATEMENT; - r.data = cast(*void, d); - - return create_node(p, r); -}; - -/* FunctionType ::= LPAREN (Type ("," Type)*)? RPAREN ARROW Type */ -let parser_parse_function_type = (p: *parser) => *Node { - if parser_accept_token(p, TOKEN_LPAREN) == cast(*token, null) { - return cast(*Node, null); - }; - - let parameters = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 20)); - let parameters_len = 0; - let first = true; - while true { - if !first { - parser_accept_token(p, TOKEN_COMMA); - }; - first = false; - let type_annotation = parser_accept_parse(p, parser_parse_type); - if type_annotation == cast(*Node, null) { - break; - }; - (*(parameters + cast(**Node, parameters_len))) = type_annotation; - parameters_len = parameters_len + 1; - - }; - - if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) { - return cast(*Node, null); - }; - if parser_accept_token(p, TOKEN_ARROW) == cast(*token, null) { - return cast(*Node, null); - }; - - let retur_typ = parser_parse_type(p); - if retur_typ == cast(*Node, null) { - return cast(*Node, null); - }; - - let d = cast(*NODE_TYPE_FUNCTION_TYPE_DATA, arena_alloc((*p).arena, sizeof(NODE_TYPE_FUNCTION_TYPE_DATA))); - (*d).parameters = parameters; - (*d).parameters_len = parameters_len; - (*d).retur_type = retur_typ; - - let r = Node{}; - r.type = NODE_TYPE_FUNCTION_TYPE; - r.data = cast(*void, d); - - return create_node(p, r); -}; - -/* Type ::= IDENTIFIER | FunctionType */ -let parser_parse_type = (p: *parser) => *Node { - let typ = parser_accept_parse(p, parser_parse_function_type); - if typ != cast(*Node, null) { - return typ; - }; - - let to = parser_consume_token(p); - assert(to != cast(*token, null)); - - /* TODO: we should only accept specific type identifiers */ - if (*to).type == TOKEN_IDENTIFIER { - let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*p).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); - (*d).name = cast(*i8, (*to).data); - (*d).underlying_type = cast(*Node, null); - - let r = Node{}; - r.type = NODE_TYPE_SIMPLE_TYPE; - r.data = cast(*void, d); - - return create_node(p, r); - }; - - if (*to).type == TOKEN_MUL { - let d = cast(*NODE_TYPE_POINTER_TYPE_DATA, arena_alloc((*p).arena, sizeof(NODE_TYPE_POINTER_TYPE_DATA))); - (*d).type = parser_parse_type(p); - - let r = Node{}; - r.type = NODE_TYPE_POINTER_TYPE; - r.data = cast(*void, d); - - return create_node(p, r); - }; - - return cast(*Node, null); -}; - -/* IfStatement ::= "if" Expression LBRACE Statement* RBRACE */ -let parser_parse_if_statement = (p: *parser) => *Node { - if parser_accept_token(p, TOKEN_IF) == cast(*token, null) { - return cast(*Node, null); - }; - - let expression = parser_parse_expression(p); - if expression == cast(*Node, null) { - return cast(*Node, null); - }; - - if parser_accept_token(p, TOKEN_LBRACE) == cast(*token, null) { - return cast(*Node, null); - }; - - let statements = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 100)); - let i = 0; - while true { - let n = parser_accept_parse(p, parser_parse_statement); - if n == cast(*Node, null) { - break; - }; - (*(statements + cast(**Node, i))) = n; - i = i + 1; - }; - - if parser_accept_token(p, TOKEN_RBRACE) == cast(*token, null) { - return cast(*Node, null); - }; - - let dd = cast(*NODE_IF_STATEMENT_DATA, arena_alloc((*p).arena, sizeof(NODE_IF_STATEMENT_DATA))); - (*dd).condition = expression; - (*dd).statements = statements; - (*dd).statements_len = i; - - let r = Node{}; - r.type = NODE_IF_STATEMENT; - r.data = cast(*void, dd); - - return create_node(p, r); -}; - -/* WhileStatement ::= "while" Expression LBRACE Statement* RBRACE */ -let parser_parse_while_statement = (p: *parser) => *Node { - if parser_accept_token(p, TOKEN_WHILE) == cast(*token, null) { - return cast(*Node, null); - }; - - let expression = parser_parse_expression(p); - if expression == cast(*Node, null) { - return cast(*Node, null); - }; - - if parser_accept_token(p, TOKEN_LBRACE) == cast(*token, null) { - return cast(*Node, null); - }; - - let statements = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 100)); - let i = 0; - while true { - let n = parser_accept_parse(p, parser_parse_statement); - if n == cast(*Node, null) { - break; - }; - (*(statements + cast(**Node, i))) = n; - i = i + 1; - }; - - if parser_accept_token(p, TOKEN_RBRACE) == cast(*token, null) { - return cast(*Node, null); - }; - - let dd = cast(*NODE_WHILE_STATEMENT_DATA, arena_alloc((*p).arena, sizeof(NODE_WHILE_STATEMENT_DATA))); - (*dd).condition = expression; - (*dd).statements = statements; - (*dd).statements_len = i; - - let r = Node{}; - r.type = NODE_WHILE_STATEMENT; - r.data = cast(*void, dd); - - return create_node(p, r); -}; - -/* ExternDeclaration ::= "extern" IDENTIFIER EQUALS Type */ -let parser_parse_extern_declaration = (p: *parser) => *Node { - if parser_accept_token(p, TOKEN_EXTERN) == cast(*token, null) { - return cast(*Node, null); - }; - - let ident = parser_accept_token(p, TOKEN_IDENTIFIER); - if ident == cast(*token, null) { - return cast(*Node, null); - }; - if parser_accept_token(p, TOKEN_EQUALS) == cast(*token, null) { - return cast(*Node, null); - }; - let typ = parser_parse_type(p); - if typ == cast(*Node, null) { - return cast(*Node, null); - }; - - let d = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA))); - (*d).name = cast(*i8, (*ident).data); - (*d).type = cast(*Node, null); - - let n = Node{}; - n.type = NODE_PRIMARY_EXPRESSION_IDENTIFIER; - n.data = cast(*void, d); - - let dd = cast(*NODE_ASSIGNMENT_STATEMENT_DATA, arena_alloc((*p).arena, sizeof(NODE_ASSIGNMENT_STATEMENT_DATA))); - (*dd).is_declaration = false; - (*dd).is_dereference = false; - (*dd).lhs = create_node(p, n); - (*dd).rhs = typ; - - let r = Node{}; - r.type = NODE_ASSIGNMENT_STATEMENT; - r.data = cast(*void, dd); - - return create_node(p, r); -}; - -/* FunctionParameters ::= IDENTIFIER ":" Type ("," IDENTIFIER ":" Type)* */ -let parser_parse_function_parameters = (p: *parser) => *slice { - let node_list = cast(**Node, arena_alloc((*p).arena, sizeof(**Node) * 20)); - let i = 0; - while true { - if i != 0 { - parser_accept_token(p, TOKEN_COMMA); - }; - let ident = parser_accept_token(p, TOKEN_IDENTIFIER); - if ident == cast(*token, null) { - break; - }; - if parser_accept_token(p, TOKEN_COLON) == cast(*token, null) { - return cast(*slice, null); - }; - let type_annotation = parser_parse_type(p); - if type_annotation == cast(*Node, null) { - return cast(*slice, null); - }; - - let d = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA))); - (*d).name = cast(*i8, (*ident).data); - (*d).type = type_annotation; - let n = Node{}; - n.type = NODE_PRIMARY_EXPRESSION_IDENTIFIER; - n.data = cast(*void, d); - (*(node_list + cast(**Node, i))) = create_node(p, n); - - i = i + 1; - }; - - let s = cast(*slice, arena_alloc((*p).arena, sizeof(slice))); - (*s).data = cast(*void, node_list); - (*s).data_len = i; - return s; -}; - -/* TypeDefinition ::= "newtype" Type */ -let parser_parse_type_definition = (p: *parser) => *Node { - if parser_accept_token(p, TOKEN_TYPE) == cast(*token, null) { - return cast(*Node, null); - }; - - let typ = parser_parse_type(p); - if typ == cast(*Node, null) { - return cast(*Node, null); - }; - - let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*p).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); - (*d).name = ""; - (*d).underlying_type = typ; - - let r = Node{}; - r.type = NODE_TYPE_SIMPLE_TYPE; - r.data = cast(*void, d); - - return create_node(p, r); -}; - -/* StructDefinition ::= "struct" LBRACE StructFields? RBRACE */ -let parser_parse_struct_definition = (p: *parser) => *Node { - if parser_accept_token(p, TOKEN_STRUCT) == cast(*token, null) { - return cast(*Node, null); - }; - if parser_accept_token(p, TOKEN_LBRACE) == cast(*token, null) { - return cast(*Node, null); - }; - - let fields = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 20)); - let i = 0; - while true { - let field = parser_accept_parse(p, (ip: *parser) => *Node { - let ident = parser_accept_token(ip, TOKEN_IDENTIFIER); - if ident == cast(*token, null) { - return cast(*Node, null); - }; - if parser_accept_token(ip, TOKEN_COLON) == cast(*token, null) { - return cast(*Node, null); - }; - let typ_annotation = parser_parse_type(ip); - if typ_annotation == cast(*Node, null) { - return cast(*Node, null); - }; - printf("STRUCT TYP: %d\n" (*typ_annotation).type); - let d = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, arena_alloc((*ip).arena, sizeof(NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA))); - (*d).name = cast(*i8, (*ident).data); - (*d).type = typ_annotation; - let n = Node{}; - n.type = NODE_PRIMARY_EXPRESSION_IDENTIFIER; - n.data = cast(*void, d); - return create_node(ip, n); - }); - if field == cast(*Node, null) { - break; - }; - - parser_accept_token(p, TOKEN_COMMA); - - (*(fields + cast(**Node, i))) = field; - i = i + 1; - }; - - if parser_accept_token(p, TOKEN_RBRACE) == cast(*token, null) { - return cast(*Node, null); - }; - - let d = cast(*NODE_TYPE_STRUCT_TYPE_DATA, arena_alloc((*p).arena, sizeof(NODE_TYPE_STRUCT_TYPE_DATA))); - (*d).fields = fields; - (*d).fields_len = i; - let n = Node{}; - n.type = NODE_TYPE_STRUCT_TYPE; - n.data = cast(*void, d); - return create_node(p, n); -}; - -/* FunctionDefinition ::= LPAREN FunctionParameters? RPAREN ARROW IDENTIFIER LBRACE Statement* ReturnStatement SEMICOLON RBRACE */ -let parser_parse_function_definition = (p: *parser) => *Node { - if parser_accept_token(p, TOKEN_LPAREN) == cast(*token, null) { - return cast(*Node, null); - }; - let params = parser_parse_function_parameters(p); - if params == cast(*slice, null) { - return cast(*Node, null); - }; - if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) { - return cast(*Node, null); - }; - if parser_accept_token(p, TOKEN_ARROW) == cast(*token, null) { - return cast(*Node, null); - }; - let retur_type = parser_parse_type(p); - if retur_type == cast(*Node, null) { - return cast(*Node, null); - }; - if parser_accept_token(p, TOKEN_LBRACE) == cast(*token, null) { - return cast(*Node, null); - }; - - let statements = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 100)); - let i = 0; - while true { - let n = parser_accept_parse(p, parser_parse_statement); - if n == cast(*Node, null) { - break; - }; - (*(statements + cast(**Node, i))) = n; - i = i + 1; - }; - - - if parser_accept_token(p, TOKEN_RBRACE) == cast(*token, null) { - return cast(*Node, null); - }; - - - let d = cast(*NODE_FUNCTION_DEFINITION_DATA, arena_alloc((*p).arena, sizeof(NODE_FUNCTION_DEFINITION_DATA))); - (*d).statements = statements; - (*d).statements_len = i; - (*d).parameters = cast(**Node, (*params).data); - (*d).parameters_len = (*params).data_len; - (*d).retur_type = retur_type; - - let n = Node{}; - n.type = NODE_FUNCTION_DEFINITION; - n.data = cast(*void, d); - - return create_node(p, n); -}; - -/* StructInstantiation ::= IDENTIFIER LBRACE RBRACE */ -let parser_parse_struct_instanciation = (p: *parser) => *Node { - let typ = parser_accept_token(p, TOKEN_IDENTIFIER); - if typ == cast(*token, null) { - return cast(*Node, null); - }; - - if parser_accept_token(p, TOKEN_LBRACE) == cast(*token, null) { - return cast(*Node, null); - }; - if parser_accept_token(p, TOKEN_RBRACE) == cast(*token, null) { - return cast(*Node, null); - }; - - let d = cast(*NODE_STRUCT_INSTANCIATION_DATA, arena_alloc((*p).arena, sizeof(NODE_STRUCT_INSTANCIATION_DATA))); - (*d).typ = cast(*i8, (*typ).data); - - let n = Node{}; - n.type = NODE_STRUCT_INSTANCIATION; - n.data = cast(*void, d); - - return create_node(p, n); -}; - -extern parser_parse_primary_expression = (*parser) => *Node; - -/* FieldAccess ::= PrimaryExpression DOT IDENTIFIER */ -let parser_parse_field_access = (p: *parser) => *Node { - let expression = parser_parse_primary_expression(p); - if expression == cast(*Node, null) { - return cast(*Node, null); - }; - if parser_accept_token(p, TOKEN_DOT) == cast(*token, null) { - return cast(*Node, null); - }; - let ident = parser_accept_token(p, TOKEN_IDENTIFIER); - if ident == cast(*token, null) { - return cast(*Node, null); - }; - - let d = cast(*NODE_FIELD_ACCESS_DATA, arena_alloc((*p).arena, sizeof(NODE_FIELD_ACCESS_DATA))); - (*d).expression = expression; - (*d).name = cast(*i8, (*ident).data); - - let n = Node{}; - n.type = NODE_FIELD_ACCESS; - n.data = cast(*void, d); - - return create_node(p, n); -}; - -/* PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | FunctionDefinition | StructDefinition | StructInstantiation | FieldAccess | LPAREN Expression RPAREN */ -let parser_parse_primary_expression = (p: *parser) => *Node { - let stmt = parser_accept_parse(p, parser_parse_function_definition); - if stmt != cast(*Node, null) { - return stmt; - }; - let stmt = parser_accept_parse(p, parser_parse_type_definition); - if stmt != cast(*Node, null) { - return stmt; - }; - let stmt = parser_accept_parse(p, parser_parse_struct_definition); - if stmt != cast(*Node, null) { - return stmt; - }; - let stmt = parser_accept_parse(p, parser_parse_struct_instanciation); - if stmt != cast(*Node, null) { - return stmt; - }; - - if parser_accept_token(p, TOKEN_LPAREN) != cast(*token, null) { - let expr = parser_parse_expression(p); - if expr == cast(*Node, null) { - return cast(*Node, null); - }; - if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) { - return cast(*Node, null); - }; - return expr; - }; - - let tok = parser_consume_token(p); - if tok == cast(*token, null) { - printf("NO TOK\n"); - return cast(*Node, null); - }; - - if (*tok).type == TOKEN_NULL { - let n = Node{}; - n.type = NODE_PRIMARY_EXPRESSION_NULL; - return create_node(p, n); - }; - - if (*tok).type == TOKEN_IDENTIFIER { - let d = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA))); - (*d).name = cast(*i8, (*tok).data); - (*d).type = cast(*Node, null); - let n = Node{}; - n.type = NODE_PRIMARY_EXPRESSION_IDENTIFIER; - n.data = cast(*void, d); - return create_node(p, n); - }; - - if (*tok).type == TOKEN_NUMBER { - let d = cast(*NODE_PRIMARY_EXPRESSION_NUMBER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_NUMBER_DATA))); - (*d).value = *(cast(*i64, (*tok).data)); - let n = Node{}; - n.type = NODE_PRIMARY_EXPRESSION_NUMBER; - n.data = cast(*void, d); - return create_node(p, n); - }; - - if (*tok).type == TOKEN_BOOLEAN { - let d = cast(*NODE_PRIMARY_EXPRESSION_BOOLEAN_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_BOOLEAN_DATA))); - (*d).value = *(cast(*bool, (*tok).data)); - let n = Node{}; - n.type = NODE_PRIMARY_EXPRESSION_BOOLEAN; - n.data = cast(*void, d); - return create_node(p, n); - }; - - if (*tok).type == TOKEN_CHAR { - let d = cast(*NODE_PRIMARY_EXPRESSION_CHAR_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_CHAR_DATA))); - (*d).value = *(cast(*i8, (*tok).data)); - let n = Node{}; - n.type = NODE_PRIMARY_EXPRESSION_CHAR; - n.data = cast(*void, d); - return create_node(p, n); - }; - - if (*tok).type == TOKEN_STRING { - let d = cast(*NODE_PRIMARY_EXPRESSION_STRING_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_STRING_DATA))); - (*d).value = cast(*i8, (*tok).data); - let n = Node{}; - n.type = NODE_PRIMARY_EXPRESSION_STRING; - n.data = cast(*void, d); - return create_node(p, n); - }; - - return cast(*Node, null); -}; - -/* EqualityExpression ::= AdditiveExpression ("==" | "!=" | "<=" | ">=" | "<" | ">") AdditiveExpression */ -let parser_parse_equality_expression = (p: *parser) => *Node { - let lhs = parser_parse_additive_expression(p); - if lhs == cast(*Node, null) { - return cast(*Node, null); - }; - - let typ = -1; - let ex = parser_accept_parse(p, (ip: *parser) => *Node { - if parser_accept_token(ip, TOKEN_EQUALS) == cast(*token, null) { - return cast(*Node, null); - }; - if parser_accept_token(ip, TOKEN_EQUALS) == cast(*token, null) { - return cast(*Node, null); - }; - let n = Node{}; - return create_node(ip, n); - }); - if ex != cast(*Node, null) { - typ = EQUALITY_EXPRESSION_TYPE_EQ; - }; - - if (typ == -1) { - ex = parser_accept_parse(p, (ip: *parser) => *Node { - if (parser_accept_token(ip, TOKEN_BANG) == cast(*token, null)) { - return cast(*Node, null); - }; - if (parser_accept_token(ip, TOKEN_EQUALS) == cast(*token, null)) { - return cast(*Node, null); - }; - let n = Node{}; - return create_node(ip, n); - }); - if (ex != cast(*Node, null)) { - typ = EQUALITY_EXPRESSION_TYPE_NE; - }; - }; - - if (typ == -1) { - ex = parser_accept_parse(p, (ip: *parser) => *Node { - if (parser_accept_token(ip, TOKEN_LESS) == cast(*token, null)) { - return cast(*Node, null); - }; - if (parser_accept_token(ip, TOKEN_EQUALS) == cast(*token, null)) { - return cast(*Node, null); - }; - let n = Node{}; - return create_node(ip, n); - }); - if (ex != cast(*Node, null)) { - typ = EQUALITY_EXPRESSION_TYPE_LE; - }; - }; - - if (typ == -1) { - ex = parser_accept_parse(p, (ip: *parser) => *Node { - if (parser_accept_token(ip, TOKEN_GREATER) == cast(*token, null)) { - return cast(*Node, null); - }; - if (parser_accept_token(ip, TOKEN_EQUALS) == cast(*token, null)) { - return cast(*Node, null); - }; - let n = Node{}; - return create_node(ip, n); - }); - if (ex != cast(*Node, null)) { - typ = EQUALITY_EXPRESSION_TYPE_GE; - }; - }; - - if (typ == -1) { - if (parser_accept_token(p, TOKEN_LESS) != cast(*token, null)) { - typ = EQUALITY_EXPRESSION_TYPE_LT; - }; - }; - - if (typ == -1) { - if (parser_accept_token(p, TOKEN_GREATER) != cast(*token, null)) { - typ = EQUALITY_EXPRESSION_TYPE_GT; - }; - }; - - if typ == -1 { - return cast(*Node, null); - }; - - let rhs = parser_parse_additive_expression(p); - if rhs == cast(*Node, null) { - return cast(*Node, null); - }; - - let d = cast(*NODE_EQUALITY_EXPRESSION_DATA, arena_alloc((*p).arena, sizeof(NODE_EQUALITY_EXPRESSION_DATA))); - (*d).lhs = lhs; - (*d).rhs = rhs; - (*d).typ = typ; - - let n = Node{}; - n.type = NODE_EQUALITY_EXPRESSION ; - n.data = cast(*void, d); - - return create_node(p, n); -}; - -/* PostfixExpression ::= PrimaryExpression (CastStatement | SizeOfStatement | FunctionCallStatement | FieldAccess )* */ -let parser_parse_postfix_expression = (p: *parser) => *Node { - let ex = parser_accept_parse(p, parser_parse_cast_statement); - if ex != cast(*Node, null) { - return ex; - }; - let ex = parser_accept_parse(p, parser_parse_sizeof_statement); - if ex != cast(*Node, null) { - return ex; - }; - let ex = parser_accept_parse(p, parse_function_call_statement); - if ex != cast(*Node, null) { - return ex; - }; - let ex = parser_accept_parse(p, parser_parse_field_access); - if ex != cast(*Node, null) { - return ex; - }; - return parser_parse_primary_expression(p); -}; - -/* UnaryExpression ::= ("!" | "-" | "*") UnaryExpression | PostfixExpression */ -let parser_parse_unary_expression = (p: *parser) => *Node { - let typ = -1; - if parser_accept_token(p, TOKEN_BANG) != cast(*token, null) { - typ = UNARY_EXPRESSION_TYPE_NOT; - }; - if typ == -1 { - if parser_accept_token(p, TOKEN_MINUS) != cast(*token, null) { - typ = UNARY_EXPRESSION_TYPE_MINUS; - }; - }; - if typ == -1 { - if parser_accept_token(p, TOKEN_MUL) != cast(*token, null) { - typ = UNARY_EXPRESSION_TYPE_STAR; - }; - }; - if typ == -1 { - return parser_parse_postfix_expression(p); - }; - - let new_lhs_data = cast(*NODE_UNARY_EXPRESSION_DATA, arena_alloc((*p).arena, sizeof(NODE_UNARY_EXPRESSION_DATA))); - ((*new_lhs_data).typ) = typ; - let e = parser_parse_unary_expression(p); - if e == cast(*Node, null) { - return cast(*Node, null); - }; - ((*new_lhs_data).expression) = e; - let new_lhs = Node{}; - new_lhs.type = NODE_UNARY_EXPRESSION; - new_lhs.data = cast(*void, new_lhs_data); - return create_node(p, new_lhs); -}; - -/* MultiplicativeExpression ::= UnaryExpression (("*" | "/" | "%") UnaryExpression)* */ -let parser_parse_multiplicative_expression = (p: *parser) => *Node { - let lhs = parser_parse_unary_expression(p); - if lhs == cast(*Node, null) { - return cast(*Node, null); - }; - - while true { - let typ = -1; - if parser_accept_token(p, TOKEN_MUL) != cast(*token, null) { - typ = MULTIPLICATIVE_EXPRESSION_TYPE_MUL; - }; - if parser_accept_token(p, TOKEN_DIV) != cast(*token, null) { - typ = MULTIPLICATIVE_EXPRESSION_TYPE_DIV; - }; - if parser_accept_token(p, TOKEN_MOD) != cast(*token, null) { - typ = MULTIPLICATIVE_EXPRESSION_TYPE_MOD; - }; - if typ == -1 { - break; - }; - - let rhs = parser_parse_unary_expression(p); - if rhs == cast(*Node, null) { - return cast(*Node, null); - }; - - let new_lhs_data = cast(*NODE_MULTIPLICATIVE_EXPRESSION_DATA, arena_alloc((*p).arena, sizeof(NODE_MULTIPLICATIVE_EXPRESSION_DATA))); - ((*new_lhs_data).lhs) = lhs; - ((*new_lhs_data).rhs) = rhs; - ((*new_lhs_data).typ) = typ; - let new_lhs = Node{}; - new_lhs.type = NODE_MULTIPLICATIVE_EXPRESSION; - new_lhs.data = cast(*void, new_lhs_data); - lhs = create_node(p, new_lhs); - }; - return lhs; -}; - -/* AdditiveExpression ::= MultiplicativeExpression (("+" | "-") MultiplicativeExpression)* */ -let parser_parse_additive_expression = (p: *parser) => *Node { - let lhs = parser_parse_multiplicative_expression(p); - if lhs == cast(*Node, null) { - return cast(*Node, null); - }; - - while true { - let plus = parser_accept_token(p, TOKEN_PLUS); - let minus = parser_accept_token(p, TOKEN_MINUS); - - if plus == cast(*token, null) { - if minus == cast(*token, null) { - break; - }; - }; - - let rhs = parser_parse_multiplicative_expression(p); - if rhs == cast(*Node, null) { - return cast(*Node, null); - }; - - let new_lhs_data = cast(*NODE_ADDITIVE_EXPRESSION_DATA, arena_alloc((*p).arena, sizeof(NODE_ADDITIVE_EXPRESSION_DATA))); - ((*new_lhs_data).addition) = plus != cast(*token, null); - ((*new_lhs_data).lhs) = lhs; - ((*new_lhs_data).rhs) = rhs; - let new_lhs = Node{}; - new_lhs.type = NODE_ADDITIVE_EXPRESSION; - new_lhs.data = cast(*void, new_lhs_data); - lhs = create_node(p, new_lhs); - }; - return lhs; -}; - -/* Expression ::= EqualityExpression | AdditiveExpression */ -let parser_parse_expression = (p: *parser) => *Node { - let ex = parser_accept_parse(p, parser_parse_equality_expression); - if ex != cast(*Node, null) { - return ex; - }; - let ax = parser_accept_parse(p, parser_parse_additive_expression); - if ax != cast(*Node, null) { - return ax; - }; - - return cast(*Node, null); -}; - -/* AssignmentStatement ::= ("let")? ("*")? Expression EQUALS Expression */ -let parse_assignment_statement = (p: *parser) => *Node { - let is_declaration = false; - if parser_accept_token(p, TOKEN_LET) != cast(*token, null) { - is_declaration = true; - }; - - let is_dereference = false; - if parser_accept_token(p, TOKEN_MUL) != cast(*token, null) { - is_dereference = true; - }; - - let lhs = parser_parse_expression(p); - if lhs == cast(*Node, null) { - return cast(*Node, null); - }; - - if parser_accept_token(p, TOKEN_EQUALS) == cast(*token, null) { - return cast(*Node, null); - }; - - let rhs = parser_parse_expression(p); - if rhs == cast(*Node, null) { - return cast(*Node, null); - }; - - let d = cast(*NODE_ASSIGNMENT_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_ASSIGNMENT_STATEMENT_DATA))); - (*d).is_declaration = is_declaration; - (*d).is_dereference = is_dereference; - (*d).lhs = lhs; - (*d).rhs = rhs; - let n = Node{}; - n.type = NODE_ASSIGNMENT_STATEMENT; - n.data = cast(*void, d); - return create_node(p, n); -}; - -extern parse = (*parser) => *Node; - -let parser_parse_import_declaration = (p: *parser) => *Node { - if parser_accept_token(p, TOKEN_IMPORT) == cast(*token, null) { - return cast(*Node, null); - }; - - let expr = parser_parse_primary_expression(p); - if expr == cast(*Node, null) { - return cast(*Node, null); - }; - - assert((*expr).type == NODE_PRIMARY_EXPRESSION_STRING); - - let impor_filename = (*cast(*NODE_PRIMARY_EXPRESSION_STRING_DATA, (*expr).data)).value; - let current_file = cast(*i8, arena_alloc((*p).arena, sizeof(i8) * 70)); - strcpy(current_file, (*p).filename); - - /* stdlib. TODO: this is very hacky and won't work if running the compiler binary by itself */ - if (*impor_filename) == '!' { - let buf = cast(*i8, arena_alloc((*p).arena, sizeof(i8) * 70)); - sprintf(buf, "./std/%s", (impor_filename + cast(*i8, 1))); - impor_filename = buf; - current_file = "."; - }; - - let dirpath = dirname(current_file); - let dir = open(dirpath, 0); - assert(dir != -1); - - let file = openat(dir, impor_filename, 0); - assert(file != -1); - - let file_size = lseek(file, 0, 2); - lseek(file, 0, 0); - let file_contents = cast(*i8, arena_alloc((*p).arena, file_size + 1)); - - let bytes_read = read(file, file_contents, file_size); - (*(file_contents + cast(*i8, bytes_read))) = '\0'; - - let f = slice{}; - f.data = cast(*void, file_contents); - f.data_len = file_size; - let inner_tokenizer = tokenizer_init((*p).arena, f); - let tokens = tokenizer_tokenize(inner_tokenizer); - - let buf2 = cast(*i8, arena_alloc((*p).arena, 90)); - strcpy(buf2, dirpath); - (*(buf2 + cast(*i8, strlen(dirpath)))) = '/'; - strcpy(buf2 + cast(*i8, strlen(dirpath) + 1), impor_filename); - let full_path = realpath(buf2, cast(*i8, null)); - - let inner_parser = parser_init(cast(*token, tokens.data), tokens.data_len, (*p).arena, full_path); - let ast = parse(inner_parser); - - let d = cast(*NODE_IMPORT_DECLARATION_DATA , arena_alloc((*p).arena, sizeof(NODE_IMPORT_DECLARATION_DATA))); - (*d).filename = impor_filename; - (*d).program = ast; - let n = Node{}; - n.type = NODE_IMPORT_DECLARATION; - n.data = cast(*void, d); - return create_node(p, n); -}; - -/* FunctionArguments ::= Expression ("," Expression)* */ -let parser_parse_function_arguments = (p: *parser) => *slice { - let node_list = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 20)); - let first = true; - - let i = 0; - while true { - if !first { - parser_accept_token(p, TOKEN_COMMA); - }; - first = false; - let maybe_expr = parser_accept_parse(p, parser_parse_expression); - if maybe_expr == cast(*Node, null) { - let s = cast(*slice, arena_alloc((*p).arena, sizeof(slice))); - (*s).data = cast(*void, node_list); - (*s).data_len = i; - return s; - }; - - (*(node_list + cast(**Node, i))) = maybe_expr; - i = i + 1; - }; - - let s = cast(*slice, arena_alloc((*p).arena, sizeof(slice))); - (*s).data = cast(*void, node_list); - (*s).data_len = i; - return s; -}; - -/* FunctionCallStatement ::= (IDENTIFIER | FunctionDefinition) LPAREN FunctionArguments? RPAREN */ -let parse_function_call_statement = (p: *parser) => *Node { - let ident = parser_accept_token(p, TOKEN_IDENTIFIER); - let fn_def = parser_accept_parse(p, parser_parse_function_definition); - - if ident == cast(*token, null) { - if fn_def == cast(*Node, null) { - return cast(*Node, null); - }; - }; - - if parser_accept_token(p, TOKEN_LPAREN) == cast(*token, null) { - return cast(*Node, null); - }; - - let arguments = parser_parse_function_arguments(p); - if arguments == cast(*slice, null) { - return cast(*Node, null); - }; - - if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) { - return cast(*Node, null); - }; - - let d = cast(*NODE_FUNCTION_CALL_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_FUNCTION_CALL_STATEMENT_DATA))); - (*d).arguments = cast(**Node, (*arguments).data); - (*d).arguments_len = (*arguments).data_len; - - if fn_def != cast(*Node, null) { - (*d).expression = fn_def; - }; - if fn_def == cast(*Node, null) { - let expression_data = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA))); - (*expression_data).name = cast(*i8, (*ident).data); - let expression = cast(*Node, arena_alloc((*p).arena, sizeof(Node))); - (*expression).type = NODE_PRIMARY_EXPRESSION_IDENTIFIER; - (*expression).data = cast(*void, expression_data); - (*d).expression = expression; - }; - - let n = Node{}; - n.type = NODE_FUNCTION_CALL_STATEMENT; - n.data = cast(*void, d); - return create_node(p, n); -}; - -/* Statement ::= (AssignmentStatement | ImportDeclaration | ExternDeclaration | CastStatement | SizeOfStatement | FunctionCallStatement | IfStatement | WhileStatement | ReturnStatement | "break" | "continue") SEMICOLON */ -let parser_parse_statement = (p: *parser) => *Node { - /* TODO: Can we not deal with cast / sizeof in parser? */ - let fn_call = parser_accept_parse(p, parser_parse_cast_statement); - if fn_call != cast(*Node, null) { - if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { - return cast(*Node, null); - }; - return fn_call; - }; - - let fn_call = parser_accept_parse(p, parser_parse_sizeof_statement); - if fn_call != cast(*Node, null) { - if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { - return cast(*Node, null); - }; - return fn_call; - }; - - let fn_call = parser_accept_parse(p, parse_function_call_statement); - if fn_call != cast(*Node, null) { - if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { - return cast(*Node, null); - }; - return fn_call; - }; - - let assignment = parser_accept_parse(p, parse_assignment_statement); - if assignment != cast(*Node, null) { - if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { - return cast(*Node, null); - }; - return assignment; - }; - - let retu = parser_accept_parse(p, parser_parse_return_statement); - if retu != cast(*Node, null) { - if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { - return cast(*Node, null); - }; - return retu; - }; - - let retu = parser_accept_parse(p, parser_parse_import_declaration); - if retu != cast(*Node, null) { - if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { - return cast(*Node, null); - }; - return retu; - }; - - let retu = parser_accept_parse(p, parser_parse_extern_declaration); - if retu != cast(*Node, null) { - if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { - return cast(*Node, null); - }; - return retu; - }; - - let retu = parser_accept_parse(p, parser_parse_if_statement); - if retu != cast(*Node, null) { - if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { - return cast(*Node, null); - }; - return retu; - }; - - let retu = parser_accept_parse(p, parser_parse_while_statement); - if retu != cast(*Node, null) { - if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { - return cast(*Node, null); - }; - return retu; - }; - - /* Break and continue */ - let retu = parser_accept_parse(p, (ip: *parser) => *Node { - if parser_accept_token(ip, TOKEN_BREAK) == cast(*token, null) { - return cast(*Node, null); - }; - - let n = Node{}; - n.type = NODE_BREAK_STATEMENT; - return create_node(ip, n); - }); - if retu != cast(*Node, null) { - if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { - return cast(*Node, null); - }; - return retu; - }; - - let retu = parser_accept_parse(p, (ip: *parser) => *Node { - if parser_accept_token(ip, TOKEN_CONTINUE) == cast(*token, null) { - return cast(*Node, null); - }; - - let n = Node{}; - n.type = NODE_CONTINUE_STATEMENT; - return create_node(ip, n); - }); - if retu != cast(*Node, null) { - if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { - return cast(*Node, null); - }; - return retu; - }; - - printf("None\n"); - - return cast(*Node, null); -}; - -/* Program ::= Statement+ */ -let parse_program = (p: *parser) => *Node { - let nodes = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 50000)); - - let i = 0; - while (*p).offset < (*p).tokens_len { - let s = parser_parse_statement(p); - assert(s != cast(*Node, null)); - (*(nodes + cast(**Node, i))) = s; - i = i + 1; - }; - - let d = cast(*NODE_PROGRAM_DATA, arena_alloc((*p).arena, sizeof(NODE_PROGRAM_DATA))); - (*d).statements = nodes; - (*d).statements_len = i; - let n = Node{}; - n.type = NODE_PROGRAM; - n.data = cast(*void, d); - return create_node(p, n); -}; - -let parse = (p: *parser) => *Node { - return parse_program(p); -}; diff --git a/src/bootstrap/tokenizer.pry b/src/bootstrap/tokenizer.pry deleted file mode 100644 index ddc2cef..0000000 --- a/src/bootstrap/tokenizer.pry +++ /dev/null @@ -1,553 +0,0 @@ -extern strlen = (*i8) => i64; -extern memcpy = (*void, *void, i64) => void; -extern sprintf = (*i8, *i8, varargs) => void; -extern atoi = (*i8) => i64; - -import "!stdlib.pry"; -import "!mem.pry"; - -/* Keywords */ -let TOKEN_IMPORT = 1; -let TOKEN_LET = 2; -let TOKEN_EXTERN = 3; -let TOKEN_IF = 4; -let TOKEN_WHILE = 5; -let TOKEN_RETURN = 6; -let TOKEN_BREAK = 7; -let TOKEN_CONTINUE = 8; -let TOKEN_ARROW = 9; -let TOKEN_STRUCT = 10; -let TOKEN_TYPE = 34; - -/* Identifiers */ -let TOKEN_IDENTIFIER = 11; - -/* Literals */ -let TOKEN_NUMBER = 12; -let TOKEN_BOOLEAN = 13; -let TOKEN_NULL = 14; -let TOKEN_CHAR = 15; -let TOKEN_STRING = 16; - -/* Operators */ -let TOKEN_EQUALS = 17; -let TOKEN_PLUS = 18; -let TOKEN_MINUS = 19; -let TOKEN_MUL = 20; -let TOKEN_DIV = 21; -let TOKEN_MOD = 22; -let TOKEN_BANG = 23; -let TOKEN_LESS = 24; -let TOKEN_GREATER = 25; -let TOKEN_DOT = 26; - -/* Punctuation */ -let TOKEN_SEMICOLON = 27; -let TOKEN_COMMA = 28; -let TOKEN_COLON = 29; -let TOKEN_LPAREN = 30; -let TOKEN_RPAREN = 31; -let TOKEN_LBRACE = 32; -let TOKEN_RBRACE = 33; - -let token = struct { - type: i64, - data: *void, -}; - -let tokenizer = struct { - buf: *i8, - buf_len: i64, - offset: i64, - - arena: *arena, -}; - -let print_tokens = (ts: *token, ts_len: i64) => i64 { - let i = 0; - while i < ts_len { - let to = (*(ts + cast(*token, i))); - - if (to.type == TOKEN_IMPORT) { - printf("Import\n"); - }; - if (to.type == TOKEN_LET) { - printf("Let\n"); - }; - if (to.type == TOKEN_EXTERN) { - printf("Extern\n"); - }; - if (to.type == TOKEN_IF) { - printf("If\n"); - }; - if (to.type == TOKEN_WHILE) { - printf("While\n"); - }; - if (to.type == TOKEN_RETURN) { - printf("Return\n"); - }; - if (to.type == TOKEN_BREAK) { - printf("Break\n"); - }; - if (to.type == TOKEN_CONTINUE) { - printf("Continue\n"); - }; - if (to.type == TOKEN_ARROW) { - printf("Arrow\n"); - }; - if (to.type == TOKEN_STRUCT) { - printf("Struct\n"); - }; - if (to.type == TOKEN_TYPE) { - printf("Type\n"); - }; - if (to.type == TOKEN_IDENTIFIER) { - printf("Identifier: %s\n", cast(*i8, to.data)); - }; - if (to.type == TOKEN_NUMBER) { - printf("Number: %d\n", *cast(*i64, to.data)); - }; - if (to.type == TOKEN_BOOLEAN) { - printf("Boolean: %d\n", *cast(*bool, to.data)); - }; - if (to.type == TOKEN_NULL) { - printf("Null\n"); - }; - if (to.type == TOKEN_CHAR) { - printf("Char: %c\n", *cast(*i8, to.data)); - }; - if (to.type == TOKEN_STRING) { - printf("String: %s\n", cast(*i8, to.data)); - }; - if (to.type == TOKEN_EQUALS) { - printf("Equals\n"); - }; - if (to.type == TOKEN_PLUS) { - printf("Plus\n"); - }; - if (to.type == TOKEN_MINUS) { - printf("Minus\n"); - }; - if (to.type == TOKEN_MUL) { - printf("Mul\n"); - }; - if (to.type == TOKEN_DIV) { - printf("Div\n"); - }; - if (to.type == TOKEN_MOD) { - printf("Mod\n"); - }; - if (to.type == TOKEN_BANG) { - printf("Bang\n"); - }; - if (to.type == TOKEN_LESS) { - printf("Less\n"); - }; - if (to.type == TOKEN_GREATER) { - printf("Greater\n"); - }; - if (to.type == TOKEN_DOT) { - printf("Dot\n"); - }; - if (to.type == TOKEN_SEMICOLON) { - printf("Semicolon\n"); - }; - if (to.type == TOKEN_COMMA) { - printf("Comma\n"); - }; - if (to.type == TOKEN_COLON) { - printf("Colon\n"); - }; - if (to.type == TOKEN_LPAREN) { - printf("LParen\n"); - }; - if (to.type == TOKEN_RPAREN) { - printf("RParen\n"); - }; - if (to.type == TOKEN_LBRACE) { - printf("LBrace\n"); - }; - if (to.type == TOKEN_RBRACE) { - printf("RBrace\n"); - }; - - i = i + 1; - }; - - return 0; -}; - -let tokenizer_skip_whitespace = (t: *tokenizer) => void { - while true { - if (*t).offset >= (*t).buf_len { return; }; - let c = (*((*t).buf + cast(*i8, (*t).offset))); - if !iswhitespace(c) { - return; - }; - (*t).offset = (*t).offset + 1; - }; - - return; -}; - -let tokenizer_accept_string = (t: *tokenizer, str: *i8) => bool { - let str_len = strlen(str); - if (*t).offset + str_len > (*t).buf_len { return false; }; - - let s = cast(*i8, arena_alloc((*t).arena, 1000)); - memcpy(cast(*void, s), cast(*void, (*t).buf + cast(*i8, (*t).offset)), str_len); - - if strcmp(s, str) { - (*t).offset = (*t).offset + str_len; - return true; - }; - - return false; -}; - -let tokenizer_consume_until_condition = (t: *tokenizer, condition: (i8) => bool) => *i8 { - let start = (*t).offset; - let res = cast(*i8, arena_alloc((*t).arena, 1000)); - - while true { - if (*t).offset >= (*t).buf_len { - return res; - }; - - let c = (*((*t).buf + cast(*i8, (*t).offset))); - - let offset = (*t).offset; - if c == '\\' { - let next_c = (*((*t).buf + cast(*i8, offset + 1))); - - let any = false; - if next_c == 'n' { - (*(res + cast(*i8, offset - start))) = '\n'; - any = true; - }; - if next_c == 't' { - (*(res + cast(*i8, offset - start))) = '\t'; - any = true; - }; - if next_c == 'r' { - (*(res + cast(*i8, offset - start))) = '\r'; - any = true; - }; - if next_c == '0' { - (*(res + cast(*i8, offset - start))) = '\0'; - any = true; - }; - if next_c == '\\' { - (*(res + cast(*i8, offset - start))) = '\\'; - any = true; - }; - if !any { - (*(res + cast(*i8, offset - start))) = next_c; - }; - - offset = offset + 1; - offset = offset + 1; - (*t).offset = offset; - - continue; - }; - - if condition(c) { - return res; - }; - - (*(res + cast(*i8, offset - start))) = c; - (*(res + cast(*i8, offset - start + 1))) = '\0'; - - offset = offset + 1; - (*t).offset = offset; - }; - - return cast(*i8, null); -}; - -let tokenizer_accept_int_type = (t: *tokenizer) => *i64 { - let string = tokenizer_consume_until_condition(t, (c: i8) => bool { - return !isdigit(c); - }); - if string == cast(*i8, null) { - return cast(*i64, null); - }; - if strlen(string) == 0 { - return cast(*i64, null); - }; - let x = cast(*i64, arena_alloc((*t).arena, sizeof(i64))); - *x = atoi(string); - return x; -}; - -let tokenizer_accept_char_type = (t: *tokenizer) => *i8 { - let prev_offset = (*t).offset; - if !tokenizer_accept_string(t, "'") { - (*t).offset = prev_offset; - return cast(*i8, null); - }; - - let string = tokenizer_consume_until_condition(t, (c: i8) => bool { - return c == '\''; - }); - - if !tokenizer_accept_string(t, "'") { - (*t).offset = prev_offset; - return cast(*i8, null); - }; - - return string; -}; - -let tokenizer_accept_string_type = (t: *tokenizer) => *i8 { - let prev_offset = (*t).offset; - if !tokenizer_accept_string(t, "\"") { - (*t).offset = prev_offset; - return cast(*i8, null); - }; - - let string = tokenizer_consume_until_condition(t, (c: i8) => bool { - return c == '"'; - }); - - if !tokenizer_accept_string(t, "\"") { - (*t).offset = prev_offset; - return cast(*i8, null); - }; - - return string; -}; - -let tokenizer_skip_comments = (t: *tokenizer) => void { - if !tokenizer_accept_string(t, "/*") { return; }; - - while !tokenizer_accept_string(t, "*/") { - (*t).offset = (*t).offset + 1; - }; - - return; -}; - -let tokenizer_next = (t: *tokenizer) => *token { - tokenizer_skip_whitespace(t); - tokenizer_skip_comments(t); - tokenizer_skip_whitespace(t); - - if (*t).offset >= (*t).buf_len { - return cast(*token, null); - }; - - let to = cast(*token, arena_alloc((*t).arena, sizeof(token))); - - if tokenizer_accept_string(t, "import") { - (*to).type = TOKEN_IMPORT; - return to; - }; - if tokenizer_accept_string(t, "let") { - (*to).type = TOKEN_LET; - return to; - }; - if tokenizer_accept_string(t, "extern") { - (*to).type = TOKEN_EXTERN; - return to; - }; - if tokenizer_accept_string(t, "if") { - (*to).type = TOKEN_IF; - return to; - }; - if tokenizer_accept_string(t, "while") { - (*to).type = TOKEN_WHILE; - return to; - }; - if tokenizer_accept_string(t, "return") { - (*to).type = TOKEN_RETURN; - return to; - }; - if tokenizer_accept_string(t, "break") { - (*to).type = TOKEN_BREAK; - return to; - }; - if tokenizer_accept_string(t, "continue") { - (*to).type = TOKEN_CONTINUE; - return to; - }; - if tokenizer_accept_string(t, "true") { - (*to).type = TOKEN_BOOLEAN; - let data = cast(*bool, arena_alloc((*t).arena, sizeof(bool))); - *data = true; - (*to).data = cast(*void, data); - return to; - }; - if tokenizer_accept_string(t, "false") { - (*to).type = TOKEN_BOOLEAN; - let data = cast(*bool, arena_alloc((*t).arena, sizeof(bool))); - *data = false; - (*to).data = cast(*void, data); - return to; - }; - if tokenizer_accept_string(t, "null") { - (*to).type = TOKEN_NULL; - return to; - }; - if tokenizer_accept_string(t, "struct") { - (*to).type = TOKEN_STRUCT; - return to; - }; - if tokenizer_accept_string(t, "newtype") { - (*to).type = TOKEN_TYPE; - return to; - }; - - if tokenizer_accept_string(t, "=>") { - (*to).type = TOKEN_ARROW; - return to; - }; - if tokenizer_accept_string(t, ";") { - (*to).type = TOKEN_SEMICOLON; - return to; - }; - if tokenizer_accept_string(t, ",") { - (*to).type = TOKEN_COMMA; - return to; - }; - if tokenizer_accept_string(t, ":") { - (*to).type = TOKEN_COLON; - return to; - }; - if tokenizer_accept_string(t, "(") { - (*to).type = TOKEN_LPAREN; - return to; - }; - if tokenizer_accept_string(t, ")") { - (*to).type = TOKEN_RPAREN; - return to; - }; - if tokenizer_accept_string(t, "{") { - (*to).type = TOKEN_LBRACE; - return to; - }; - if tokenizer_accept_string(t, "}") { - (*to).type = TOKEN_RBRACE; - return to; - }; - if tokenizer_accept_string(t, "=") { - (*to).type = TOKEN_EQUALS; - return to; - }; - if tokenizer_accept_string(t, "+") { - (*to).type = TOKEN_PLUS; - return to; - }; - if tokenizer_accept_string(t, "-") { - (*to).type = TOKEN_MINUS; - return to; - }; - if tokenizer_accept_string(t, "*") { - (*to).type = TOKEN_MUL; - return to; - }; - if tokenizer_accept_string(t, "/") { - (*to).type = TOKEN_DIV; - return to; - }; - if tokenizer_accept_string(t, "%") { - (*to).type = TOKEN_MOD; - return to; - }; - if tokenizer_accept_string(t, "!") { - (*to).type = TOKEN_BANG; - return to; - }; - if tokenizer_accept_string(t, "<") { - (*to).type = TOKEN_LESS; - return to; - }; - if tokenizer_accept_string(t, ">") { - (*to).type = TOKEN_GREATER; - return to; - }; - if tokenizer_accept_string(t, ".") { - (*to).type = TOKEN_DOT; - return to; - }; - - let maybe_int = tokenizer_accept_int_type(t); - if maybe_int != cast(*i64, null) { - (*to).type = TOKEN_NUMBER; - (*to).data = cast(*void, maybe_int); - return to; - }; - - let maybe_char = tokenizer_accept_char_type(t); - if maybe_char != cast(*i8, null) { - (*to).type = TOKEN_CHAR; - (*to).data = cast(*void, maybe_char); - return to; - }; - - let maybe_string = tokenizer_accept_string_type(t); - if maybe_string != cast(*i8, null) { - (*to).type = TOKEN_STRING; - (*to).data = cast(*void, maybe_string); - return to; - }; - - let string = tokenizer_consume_until_condition(t, (c: i8) => bool { - if isalphanum(c) { - return false; - }; - if c == '_' { - return false; - }; - return true; - }); - if strlen(string) == 0 { - printf("NO IDENT!\n"); - return cast(*token, null); - }; - - (*to).type = TOKEN_IDENTIFIER; - (*to).data = cast(*void, string); - - return to; -}; - -let tokenizer_init = (alloc: *arena, file: slice) => *tokenizer { - let t = cast(*tokenizer, arena_alloc(alloc, sizeof(tokenizer))); - (*t).arena = alloc; - (*t).offset = 0; - (*t).buf = cast(*i8, file.data); - (*t).buf_len = file.data_len; - - printf("File size: %d\n", (*t).buf_len); - - printf("%s\n", (*t).buf); - - return t; -}; - -let tokenizer_tokenize = (t: *tokenizer) => slice { - let tokens = cast(*token, arena_alloc((*t).arena, sizeof(token) * 40000)); /* why does it not care about type here */ - let tokens_len = 0; - - while true { - let tk = tokenizer_next(t); - if tk == cast(*token, null) { - break; - }; - printf("Add token: %d\n", (*tk).type); - - (*(tokens + cast(*token, tokens_len))) = *tk; - tokens_len = tokens_len + 1; - }; - - printf("PRINT TOKENS: %d\n", tokens_len); - - print_tokens(tokens, tokens_len); - - let res = slice{}; - res.data = cast(*void, tokens); - res.data_len = tokens_len; - return res; -}; diff --git a/src/codegen.pry b/src/codegen.pry new file mode 100644 index 0000000..cb054ec --- /dev/null +++ b/src/codegen.pry @@ -0,0 +1,1450 @@ +import "!hashmap.pry"; + +import "llvm.pry"; + +let Variable = struct { + value: LLVMValueRef, + type: LLVMTypeRef, + node: *Node, + node_type: *Node, + stack_level: *i64, +}; + +let Scope = struct { + variables: *HashMap, +}; + +let Environment = struct { + scope_stack: **Scope, + scope_stack_len: i64, + arena: *arena, +}; + +let environment_create_scope = (e: *Environment) => void { + let scope = cast(*Scope, arena_alloc((*e).arena, sizeof(Scope))); + (*scope).variables = hashmap_init(16, (*e).arena); + (*((*e).scope_stack + cast(**Scope, (*e).scope_stack_len))) = scope; + (*e).scope_stack_len = (*e).scope_stack_len + 1; + + return; +}; + +let environment_drop_scope = (e: *Environment) => void { + (*e).scope_stack_len = (*e).scope_stack_len - 1; + + return; +}; + +let environment_get_variable = (e: *Environment, name: *i8) => *Variable { + let i = (*e).scope_stack_len; + let variable = cast(*Variable, null); + + while i > 0 { + i = i - 1; + let scope = *(((*e).scope_stack + cast(**Scope, i))); + assert(scope != cast(*Scope, null)); + let v = cast(*Variable, hashmap_get((*scope).variables, name)); + if v != cast(*Variable, null) { + if variable == cast(*Variable, null) { + variable = v; + }; + let stack_level = cast(*i64, arena_alloc((*e).arena, sizeof(i64))); + (*stack_level) = i; + (*variable).stack_level = stack_level; + }; + }; + + return variable; +}; + +let environment_add_variable = (e: *Environment, name: *i8, variable: *Variable) => void { + /* TODO: Dont allow shadowing if value != value or type != type (across things) */ + let top_scope = *(((*e).scope_stack + cast(**Scope, (*e).scope_stack_len - 1))); + hashmap_put((*top_scope).variables, name, cast(*void, variable)); + + return; +}; + +let environment_set_variable = (e: *Environment, name: *i8, variable: *Variable) => void { + let existing = environment_get_variable(e, name); + (*existing) = (*variable); + + return; +}; + +let environment_init = (alloc: *arena) => *Environment { + let e = cast(*Environment, arena_alloc(alloc, sizeof(Environment))); + (*e).scope_stack = cast(**Scope, arena_alloc(alloc, sizeof(*Scope) * 40)); + (*e).scope_stack_len = 0; + (*e).arena = alloc; + + environment_create_scope(e); + + return e; +}; + +let codegen = struct { + llvm_module: LLVMModuleRef, + llvm_context: LLVMContextRef, + builder: LLVMBuilderRef, + arena: *arena, + environment: *Environment, + + whil_loop_exit: LLVMBasicBlockRef, + whil_block: LLVMBasicBlockRef, + current_function: LLVMValueRef, + current_function_retur_type: *Node, + llvm_target_data: LLVMTargetDataRef, +}; + +let codegen_init = (alloc: *arena) => *codegen { + LLVMInitializeAllTargetInfos(); + LLVMInitializeAllTargetMCs(); + LLVMInitializeAllTargets(); + LLVMInitializeAllAsmPrinters(); + LLVMInitializeAllAsmParsers(); + + let module = LLVMModuleCreateWithName("module"); + let context = LLVMGetGlobalContext(); + let builder = LLVMCreateBuilder(); + + let c = cast(*codegen, arena_alloc(alloc, sizeof(codegen))); + + (*c).llvm_module = module; + (*c).llvm_target_data = LLVMGetModuleDataLayout(module); + (*c).llvm_context = context; + (*c).builder = builder; + (*c).arena = alloc; + (*c).environment = environment_init(alloc); + + return c; +}; + +let create_node = (c: *codegen, n: Node) => *Node { + let res = cast(*Node, arena_alloc((*c).arena, sizeof(Node))); + *res = n; + return res; +}; + +let codegen_create_variable = (c: *codegen, variable: Variable) => *Variable { + let v = cast(*Variable, arena_alloc((*c).arena, sizeof(Variable))); + *v = variable; + return v; +}; + +let compare_types = (c: *codegen, a: *Node, b: *Node, is_dereference: bool) => bool { + assert((*a).type >= NODE_TYPE_SIMPLE_TYPE); + assert((*a).type <= NODE_TYPE_STRUCT_TYPE); + assert((*b).type >= NODE_TYPE_SIMPLE_TYPE); + assert((*b).type <= NODE_TYPE_STRUCT_TYPE); + + if (*a).type == NODE_TYPE_SIMPLE_TYPE { + let simple_type_a = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*a).data); + if strcmp(simple_type_a.name, "varargs") { + return true; + }; + }; + + if is_dereference { + assert((*a).type == NODE_TYPE_POINTER_TYPE); + let pointer_type_a = *cast(*NODE_TYPE_POINTER_TYPE_DATA, (*a).data); + a = pointer_type_a.type; + }; + + if (*a).type != (*b).type { + printf("Types do not match: %d != ", (*a).type); + printf("%d\n", (*b).type); + return false; + }; + + if (*a).type == NODE_TYPE_SIMPLE_TYPE { + assert((*b).type == NODE_TYPE_SIMPLE_TYPE); + let simple_type_a = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*a).data); + let simple_type_b = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*b).data); + let eql = strcmp(simple_type_a.name, simple_type_b.name); + if !eql { + printf("Simple types do not match: %s != ", simple_type_a.name); + printf("%s\n", simple_type_b.name); + }; + return eql; + }; + + if (*a).type == NODE_TYPE_FUNCTION_TYPE { + assert((*b).type == NODE_TYPE_FUNCTION_TYPE); + let function_type_a = *cast(*NODE_TYPE_FUNCTION_TYPE_DATA, (*a).data); + let function_type_b = *cast(*NODE_TYPE_FUNCTION_TYPE_DATA, (*b).data); + + + if !compare_types(c, function_type_a.retur_type, function_type_b.retur_type, false) { + printf("Function return types do not match\n"); + return false; + }; + + if function_type_a.parameters_len != function_type_b.parameters_len { + printf("Function parameter lengths do not match\n"); + return false; + }; + + let i = 0; + while i < function_type_a.parameters_len { + let param_a = *(function_type_a.parameters + cast(**Node, i)); + let param_b = *(function_type_b.parameters + cast(**Node, i)); + if !compare_types(c, param_a, param_b, false) { + printf("Function parameter types do not match\n"); + return false; + }; + i = i + 1; + }; + + return true; + }; + + if (*a).type == NODE_TYPE_POINTER_TYPE { + assert((*b).type == NODE_TYPE_POINTER_TYPE); + let pointer_type_a = *cast(*NODE_TYPE_POINTER_TYPE_DATA, (*a).data); + let pointer_type_b = *cast(*NODE_TYPE_POINTER_TYPE_DATA, (*b).data); + if !compare_types(c, pointer_type_a.type, pointer_type_b.type, false) { + printf("Pointer types do not match\n"); + return false; + }; + return true; + }; + + if (*a).type == NODE_TYPE_STRUCT_TYPE { + assert((*b).type == NODE_TYPE_STRUCT_TYPE); + let struc_type_a = *cast(*NODE_TYPE_STRUCT_TYPE_DATA, (*a).data); + let struc_type_b = *cast(*NODE_TYPE_STRUCT_TYPE_DATA, (*b).data); + + if struc_type_a.fields_len != struc_type_b.fields_len { + printf("Struct field lengths do not match\n"); + return false; + }; + + let i = 0; + while i < struc_type_a.fields_len { + let field_a = *(struc_type_a.fields + cast(**Node, i)); + let field_b = *(struc_type_b.fields + cast(**Node, i)); + if !compare_types(c, field_a, field_b, false) { + printf("Struct field types do not match\n"); + return false; + }; + i = i + 1; + }; + + return true; + }; + + return false; +}; + +let codegen_get_llvm_type = (c: *codegen, node: *Node) => *LLVMTypeRef { + assert((*node).type >= NODE_TYPE_SIMPLE_TYPE); + assert((*node).type <= NODE_TYPE_STRUCT_TYPE); + + if (*node).type == NODE_TYPE_SIMPLE_TYPE { + let simple_type = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*node).data); + + if strcmp(simple_type.name, "i8") { + let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); + *r = LLVMInt8Type(); + return r; + }; + + if strcmp(simple_type.name, "i64") { + let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); + *r = LLVMInt64Type(); + return r; + }; + + if strcmp(simple_type.name, "bool") { + let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); + *r = LLVMInt1Type(); + return r; + }; + + if strcmp(simple_type.name, "void") { + let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); + *r = LLVMVoidType(); + return r; + }; + + if strcmp(simple_type.name, "varargs") { /* Hack for varargs (only used for printf) */ + let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); + *r = LLVMPointerType(LLVMInt64Type(), 0); + return r; + }; + + let v = environment_get_variable((*c).environment, simple_type.name); + if (v != cast(*Variable, null)) { + assert((*v).type != cast(LLVMTypeRef, null)); + let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); + *r = (*v).type; + return r; + }; + + printf("NO SIMPLE TYPE %s!\n", simple_type.name); + assert(false); + }; + + if (*node).type == NODE_TYPE_FUNCTION_TYPE { + let function_type = *cast(*NODE_TYPE_FUNCTION_TYPE_DATA, (*node).data); + let f_retur = function_type.retur_type; + let retur_type = codegen_get_llvm_type(c, f_retur); + assert(retur_type != cast(*LLVMTypeRef, null)); + if (*f_retur).type == NODE_TYPE_FUNCTION_TYPE { + (*retur_type) = LLVMPointerType(*retur_type, 0); + }; + + let paramtypes = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef) * 20)); + let paramtypes_len = 0; + let is_varargs = 0; + + let i = 0; + while i < function_type.parameters_len { + let param = *(function_type.parameters + cast(**Node, i)); + if (*param).type == NODE_TYPE_SIMPLE_TYPE { + let simple_type = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*param).data); + if strcmp(simple_type.name, "varargs") { + is_varargs = 1; + i = i + 1; + continue; + }; + }; + let typ = codegen_get_llvm_type(c, param); + assert(typ != cast(*LLVMTypeRef, null)); + if (*param).type == NODE_TYPE_FUNCTION_TYPE { + *typ = LLVMPointerType(*typ, 0); + }; + + (*(paramtypes + cast(*LLVMTypeRef, paramtypes_len))) = *typ; + paramtypes_len = paramtypes_len + 1; + + i = i + 1; + }; + let function_type = LLVMFunctionType(*retur_type, paramtypes, paramtypes_len, is_varargs); + let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); + *r = function_type; + return r; + }; + + if (*node).type == NODE_TYPE_POINTER_TYPE { + let pointer_type = *cast(*NODE_TYPE_POINTER_TYPE_DATA, (*node).data); + let inner_type = codegen_get_llvm_type(c, pointer_type.type); + assert(inner_type != cast(*LLVMTypeRef, null)); + let r = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef))); + *r = LLVMPointerType(*inner_type, 0); + return r; + }; + + printf("NO TYPEEE BOI %d\n", (*node).type); + assert(false); + + return cast(*LLVMTypeRef, null); +}; + +let codegen_generate_literal = (c: *codegen, literal_val: LLVMValueRef, name: *i8, node: *Node, node_type: *Node) => *Variable { + if name != cast(*i8, null) { + let e = (*c).environment; + if (*e).scope_stack_len == 1 { + let lt = codegen_get_llvm_type(c, node_type); + assert(lt != cast(*LLVMTypeRef, null)); + let v = Variable{}; + v.value = LLVMAddGlobal((*c).llvm_module, *lt, name); + v.type = cast(LLVMTypeRef, null); + v.stack_level = cast(*i64, null); + v.node = node; + v.node_type = node_type; + LLVMSetInitializer(v.value, literal_val); + return codegen_create_variable(c, v); + }; + }; + + + let v = Variable{}; + v.value = literal_val; + v.type = cast(LLVMTypeRef, null); + v.stack_level = cast(*i64, null); + v.node = node; + v.node_type = node_type; + return codegen_create_variable(c, v); +}; + +extern codegen_generate_statement = (*codegen, *Node) => i64; +extern codegen_generate_function_call_statement = (*codegen, *Node) => *Variable; +extern codegen_generate_expression_value = (*codegen, *Node, *i8) => *Variable; + +let StructField = struct { + value: LLVMValueRef, + type: *Node, +}; + +let codegen_get_struct_field = (c: *codegen, node: *Node, name: *i8) => *StructField { + let ptr = cast(*Variable, null); + if (*node).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER { + let identifier = *cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*node).data); + ptr = environment_get_variable((*c).environment, identifier.name); + }; + if (*node).type == NODE_UNARY_EXPRESSION { + let xd = (*cast(*NODE_UNARY_EXPRESSION_DATA, (*node).data)).expression; + ptr = codegen_generate_expression_value(c, xd, ""); + }; + + assert(ptr != cast(*Variable, null)); + + let typ = cast(*Node, null); + let ptr_typ = (*ptr).node_type; + if (*ptr_typ).type == NODE_TYPE_STRUCT_TYPE { + typ = ptr_typ; + }; + if (*ptr_typ).type == NODE_TYPE_POINTER_TYPE { + let pt = *cast(*NODE_TYPE_POINTER_TYPE_DATA, (*ptr_typ).data); + let pt_type = pt.type; + assert((*pt_type).type == NODE_TYPE_SIMPLE_TYPE); + let simple_type = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*pt_type).data); + let v = environment_get_variable((*c).environment, simple_type.name); + assert(v != cast(*Variable, null)); + typ = (*v).node_type; /* TODO: we shouldnt be able to get fields of pointers, we have to dref first */ + }; + if (*ptr_typ).type == NODE_TYPE_SIMPLE_TYPE { + let simple_type = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*ptr_typ).data); + let v = environment_get_variable((*c).environment, simple_type.name); + assert(v != cast(*Variable, null)); + typ = (*v).node_type; + }; + + assert(typ != cast(*Node, null)); + assert((*typ).type == NODE_TYPE_SIMPLE_TYPE); + let simple_type = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*typ).data); + assert((*simple_type.underlying_type).type == NODE_TYPE_STRUCT_TYPE); + let struc_type = *cast(*NODE_TYPE_STRUCT_TYPE_DATA, (*simple_type.underlying_type).data); + + let fieldIndex = cast(*i64, null); + + let i = 0; + while i < struc_type.fields_len { + let field = *(struc_type.fields + cast(**Node, i)); + assert((*field).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER); + let field_data = *cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*field).data); + if strcmp(name, field_data.name) { + let ii = cast(*i64, arena_alloc((*c).arena, sizeof(i64))); + *ii = i; + fieldIndex = ii; + break; + }; + i = i + 1; + }; + + assert(fieldIndex != cast(*i64, null)); + + let zero = LLVMConstInt(LLVMInt32Type(), 0, 0); + let llvmFieldIndex = LLVMConstInt(LLVMInt32Type(), *fieldIndex, 0); + let indices = cast(*LLVMValueRef, arena_alloc((*c).arena, sizeof(LLVMValueRef) * 2)); + (*(indices + cast(*LLVMValueRef, 0))) = zero; + (*(indices + cast(*LLVMValueRef, 1))) = llvmFieldIndex; + + let res = cast(*StructField, arena_alloc((*c).arena, sizeof(StructField))); + + let x = codegen_get_llvm_type(c, typ); + assert(x != cast(*LLVMTypeRef, null)); + (*res).value = LLVMBuildGEP2((*c).builder, *x, (*ptr).value, indices, 2, name); + let no = *(struc_type.fields + cast(**Node, *fieldIndex)); + assert((*no).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER); + let no_d = *cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*no).data); + (*res).type = no_d.type; + + return res; +}; + +let codegen_generate_expression_value = (c: *codegen, expression: *Node, name: *i8) => *Variable { + if ((*expression).type == NODE_PRIMARY_EXPRESSION_NULL) { + let inner_type_data = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*inner_type_data).name = "void"; + let inner_type = Node{}; + inner_type.type = NODE_TYPE_SIMPLE_TYPE; + inner_type.data = cast(*void, inner_type_data); + + let node_type_data = cast(*NODE_TYPE_POINTER_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_POINTER_TYPE_DATA))); + (*node_type_data).type = create_node(c, inner_type); + let node_type = Node{}; + node_type.type = NODE_TYPE_POINTER_TYPE; + node_type.data = cast(*void, node_type_data); + + return codegen_generate_literal(c, LLVMConstNull(LLVMPointerType(LLVMInt8Type(), 0)), name, expression, create_node(c, node_type)); + }; + + if ((*expression).type == NODE_PRIMARY_EXPRESSION_NUMBER) { + let n = (*cast(*NODE_PRIMARY_EXPRESSION_NUMBER_DATA, (*expression).data)).value; + + let node_type = Node{}; + node_type.type = NODE_TYPE_SIMPLE_TYPE; + + let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*d).name = "i64"; + (*d).underlying_type = cast(*Node, null); + node_type.data = cast(*void, d); + + return codegen_generate_literal(c, LLVMConstInt(LLVMInt64Type(), n, 0), name, expression, create_node(c, node_type)); + }; + + if ((*expression).type == NODE_PRIMARY_EXPRESSION_BOOLEAN) { + let b = (*cast(*NODE_PRIMARY_EXPRESSION_BOOLEAN_DATA, (*expression).data)).value; + + let node_type = Node{}; + node_type.type = NODE_TYPE_SIMPLE_TYPE; + + let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*d).name = "bool"; + (*d).underlying_type = cast(*Node, null); + node_type.data = cast(*void, d); + + let int_value = 0; + if b == true { + int_value = 1; + }; + + return codegen_generate_literal(c, LLVMConstInt(LLVMInt1Type(), int_value, 0), name, expression, create_node(c, node_type)); + }; + + if ((*expression).type == NODE_PRIMARY_EXPRESSION_CHAR) { + let ch = cast(i64, (*cast(*NODE_PRIMARY_EXPRESSION_CHAR_DATA, (*expression).data)).value); + + let node_type = Node{}; + node_type.type = NODE_TYPE_SIMPLE_TYPE; + + let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*d).name = "i8"; + (*d).underlying_type = cast(*Node, null); + node_type.data = cast(*void, d); + + return codegen_generate_literal(c, LLVMConstInt(LLVMInt8Type(), cast(i64, ch), 0), name, expression, create_node(c, node_type)); + }; + + if ((*expression).type == NODE_PRIMARY_EXPRESSION_STRING) { + let str = (*cast(*NODE_PRIMARY_EXPRESSION_STRING_DATA, (*expression).data)).value; + + let x = LLVMBuildGlobalStringPtr((*c).builder, str, ""); + + let inner_type_data = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*inner_type_data).name = "i8"; + let inner_type = Node{}; + inner_type.type = NODE_TYPE_SIMPLE_TYPE; + inner_type.data = cast(*void, inner_type_data); + + let node_type_data = cast(*NODE_TYPE_POINTER_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_POINTER_TYPE_DATA))); + (*node_type_data).type = create_node(c, inner_type); + let node_type = Node{}; + node_type.type = NODE_TYPE_POINTER_TYPE; + node_type.data = cast(*void, node_type_data); + + let v = Variable{}; + v.value = x; + v.type = cast(LLVMTypeRef, null); + v.stack_level = cast(*i64, null); + v.node = expression; + v.node_type = create_node(c, node_type); + + return codegen_create_variable(c, v); + }; + + if ((*expression).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER) { + let identifier = *cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*expression).data); + let variable = environment_get_variable((*c).environment, identifier.name); + assert(variable != cast(*Variable, null)); + let param_value = (*variable).value; + let v_type = (*variable).node_type; + + let done = false; + if (*v_type).type != NODE_TYPE_FUNCTION_TYPE { + let param_type = codegen_get_llvm_type(c, v_type); + assert(param_type != cast(*LLVMTypeRef, null)); + if (*v_type).type == NODE_TYPE_FUNCTION_TYPE { + (*param_type) = LLVMPointerType(*param_type, 0); + }; + param_value = LLVMBuildLoad2((*c).builder, *param_type, (*variable).value, ""); + done = true; + }; + + if !done { + if (*(*variable).stack_level) != 0 { + let param_type = codegen_get_llvm_type(c, v_type); + assert(param_type != cast(*LLVMTypeRef, null)); + if (*v_type).type == NODE_TYPE_FUNCTION_TYPE { + (*param_type) = LLVMPointerType(*param_type, 0); + }; + param_value = LLVMBuildLoad2((*c).builder, *param_type, (*variable).value, ""); + done = true; + }; + + }; + + return codegen_generate_literal(c, param_value, name, expression, (*variable).node_type); + }; + + if ((*expression).type == NODE_FUNCTION_DEFINITION) { + /* Functions should be declared "globally" */ + let builder_pos = LLVMGetInsertBlock((*c).builder); + + let llvm_param_types = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef) * 20)); + let param_types = cast(**Node, arena_alloc((*c).arena, sizeof(*Node) * 20)); + + let function_definition = *cast(*NODE_FUNCTION_DEFINITION_DATA, (*expression).data); + + let i = 0; + let is_varargs = 0; + while i < function_definition.parameters_len { + let node = *(function_definition.parameters + cast(**Node, i)); + assert((*node).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER); + let param = *cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*node).data); + let param_type = param.type; + if (*param_type).type == NODE_TYPE_SIMPLE_TYPE { + let simple_type = *(cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*param_type).data)); + if strcmp(simple_type.name, "varargs") { + is_varargs = 1; + }; + }; + let llvm_param_type = codegen_get_llvm_type(c, param_type); + assert(llvm_param_type != cast(*LLVMTypeRef, null)); + if (*param_type).type == NODE_TYPE_FUNCTION_TYPE { + (*llvm_param_type) = LLVMPointerType(*llvm_param_type, 0); + }; + + (*(llvm_param_types + cast(*LLVMTypeRef, i))) = *llvm_param_type; + (*(param_types + cast(**Node, i))) = param_type; + i = i + 1; + }; + + let f_ret = function_definition.retur_type; + let retur_type = codegen_get_llvm_type(c, f_ret); + assert(retur_type != cast(*LLVMTypeRef, null)); + if (*f_ret).type == NODE_TYPE_FUNCTION_TYPE { + (*retur_type) = LLVMPointerType(*retur_type, 0); + }; + + let function = cast(LLVMValueRef, null); + if name != cast(*i8, null) { + let v = environment_get_variable((*c).environment, name); + if (v != cast(*Variable, null)) { + function = (*v).value; + }; + }; + if function == cast(LLVMValueRef, null) { + let function_type = LLVMFunctionType(*retur_type, llvm_param_types, i, is_varargs); + let n_name = name; + if name == cast(*i8, null) { + n_name = "unnamed_func"; + }; + function = LLVMAddFunction((*c).llvm_module, n_name, function_type); + }; + + let function_entry = LLVMAppendBasicBlock(function, "entrypoint"); + LLVMPositionBuilderAtEnd((*c).builder, function_entry); + + environment_create_scope((*c).environment); + let last_function = (*c).current_function; + (*c).current_function = function; + let last_function_retur_type = (*c).current_function_retur_type; + (*c).current_function_retur_type = function_definition.retur_type; + + /* TODO: Defer. For now we do at the end */ + + let d = cast(*NODE_TYPE_FUNCTION_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_FUNCTION_TYPE_DATA))); + (*d).parameters = param_types; + (*d).parameters_len = i; + (*d).retur_type = function_definition.retur_type; + let n = Node{}; + let node_type = create_node(c, n); + (*node_type).type = NODE_TYPE_FUNCTION_TYPE; + (*node_type).data = cast(*void, d); + + /* Needed for recursive functions */ + if name != cast(*i8, null) { + let v = Variable{}; + v.value = function; + v.type = cast(LLVMTypeRef, null); + v.stack_level = cast(*i64, null); + v.node = expression; + v.node_type = node_type; + environment_add_variable((*c).environment, name, codegen_create_variable(c, v)); + }; + + let params = cast(*LLVMValueRef, arena_alloc((*c).arena, sizeof(LLVMValueRef) * function_definition.parameters_len)); + LLVMGetParams(function, params); + + let parameters_index = 0; + while parameters_index < function_definition.parameters_len { + let p = (*(params + cast(*LLVMValueRef, parameters_index))); + let param_node = *(function_definition.parameters + cast(**Node, parameters_index)); + assert((*param_node).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER); + let param = *cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*param_node).data); + let param_type = param.type; + let llvm_param_type = codegen_get_llvm_type(c, param_type); + assert(llvm_param_type != cast(*LLVMTypeRef, null)); + if (*param_type).type == NODE_TYPE_FUNCTION_TYPE { + (*llvm_param_type) = LLVMPointerType(*llvm_param_type, 0); + }; + let alloca = LLVMBuildAlloca((*c).builder, *llvm_param_type, param.name); + LLVMBuildStore((*c).builder, p, alloca); + + let v = Variable{}; + v.value = alloca; + v.type = cast(LLVMTypeRef, null); + v.stack_level = cast(*i64, null); + v.node = param_node; + v.node_type = param_type; + environment_add_variable((*c).environment, param.name, codegen_create_variable(c, v)); + parameters_index = parameters_index + 1; + }; + + i = 0; + while i < function_definition.statements_len { + let stmt = *(function_definition.statements + cast(**Node, i)); + + let res = codegen_generate_statement(c, stmt); + assert(res == 0); + + i = i + 1; + }; + + LLVMPositionBuilderAtEnd((*c).builder, builder_pos); + + let v = Variable{}; + v.value = function; + v.type = cast(LLVMTypeRef, null); + v.stack_level = cast(*i64, null); + v.node = expression; + v.node_type = node_type; + + /* TODO: Move to defer */ + (*c).current_function = last_function; + (*c).current_function_retur_type = last_function_retur_type; + environment_drop_scope((*c).environment); + + return codegen_create_variable(c, v); + }; + + if ((*expression).type == NODE_EQUALITY_EXPRESSION) { + let exp = (*(cast(*NODE_EQUALITY_EXPRESSION_DATA, (*expression).data))); + let lhs_value = codegen_generate_expression_value(c, exp.lhs, cast(*i8, null)); + assert(lhs_value != cast(*Variable, null)); + let rhs_value = codegen_generate_expression_value(c, exp.rhs, cast(*i8, null)); + assert(rhs_value != cast(*Variable, null)); + + assert(compare_types(c, (*lhs_value).node_type, (*rhs_value).node_type, false)); + + let op = -1; + + if exp.typ == EQUALITY_EXPRESSION_TYPE_EQ { + op = LLVMIntEQ; + }; + if exp.typ == EQUALITY_EXPRESSION_TYPE_NE { + op = LLVMIntNE; + }; + if exp.typ == EQUALITY_EXPRESSION_TYPE_GE { + op = LLVMIntSGE; + }; + if exp.typ == EQUALITY_EXPRESSION_TYPE_LE { + op = LLVMIntSLE; + }; + if exp.typ == EQUALITY_EXPRESSION_TYPE_LT { + op = LLVMIntSLT; + }; + if exp.typ == EQUALITY_EXPRESSION_TYPE_GT { + op = LLVMIntSGT; + }; + + assert(op != -1); + + let cmp = LLVMBuildICmp((*c).builder, cast(LLVMIntPredicate, op), (*lhs_value).value, (*rhs_value).value, ""); + + + let node_type = Node{}; + node_type.type = NODE_TYPE_SIMPLE_TYPE; + + let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*d).name = "bool"; + (*d).underlying_type = cast(*Node, null); + node_type.data = cast(*void, d); + + return codegen_generate_literal(c, cmp, name, expression, create_node(c, node_type)); + }; + + if ((*expression).type == NODE_ADDITIVE_EXPRESSION) { + let exp = (*(cast(*NODE_ADDITIVE_EXPRESSION_DATA, (*expression).data))); + let lhs_value = codegen_generate_expression_value(c, exp.lhs, cast(*i8, null)); + assert(lhs_value != cast(*Variable, null)); + let rhs_value = codegen_generate_expression_value(c, exp.rhs, cast(*i8, null)); + assert(rhs_value != cast(*Variable, null)); + + assert(compare_types(c, (*lhs_value).node_type, (*rhs_value).node_type, false)); + + let result = cast(LLVMValueRef, null); + let node_type = Node{}; + node_type.type = NODE_TYPE_SIMPLE_TYPE; + + let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*d).name = "i64"; + (*d).underlying_type = cast(*Node, null); + node_type.data = cast(*void, d); + + let pnode_type = create_node(c, node_type); + + if exp.addition { + let nt = (*lhs_value).node_type; + if (*nt).type == NODE_TYPE_POINTER_TYPE { + let ipt = cast(*NODE_TYPE_POINTER_TYPE_DATA, (*nt).data); + let llvmipt = codegen_get_llvm_type(c, (*ipt).type); + assert(llvmipt != cast(*LLVMTypeRef, null)); + let arr = cast(*LLVMValueRef, arena_alloc((*c).arena, sizeof(LLVMValueRef) * 1)); + (*(arr + cast(*LLVMValueRef, 0))) = (*rhs_value).value; + result = LLVMBuildGEP2((*c).builder, *llvmipt, (*lhs_value).value, arr, 1, ""); + pnode_type = (*lhs_value).node_type; + }; + if (*nt).type != NODE_TYPE_POINTER_TYPE { + result = LLVMBuildAdd((*c).builder, (*lhs_value).value, (*rhs_value).value, ""); + }; + + }; + if !exp.addition { + result = LLVMBuildSub((*c).builder, (*lhs_value).value, (*rhs_value).value, ""); + }; + + return codegen_generate_literal(c, result, name, expression, pnode_type); + }; + + if ((*expression).type == NODE_MULTIPLICATIVE_EXPRESSION) { + let exp = (*(cast(*NODE_MULTIPLICATIVE_EXPRESSION_DATA, (*expression).data))); + let lhs_value = codegen_generate_expression_value(c, exp.lhs, cast(*i8, null)); + assert(lhs_value != cast(*Variable, null)); + let rhs_value = codegen_generate_expression_value(c, exp.rhs, cast(*i8, null)); + assert(rhs_value != cast(*Variable, null)); + + assert(compare_types(c, (*lhs_value).node_type, (*rhs_value).node_type, false)); + + let result = cast(LLVMValueRef, null); + + if exp.typ == MULTIPLICATIVE_EXPRESSION_TYPE_MUL { + result = LLVMBuildMul((*c).builder, (*lhs_value).value, (*rhs_value).value, ""); + }; + if exp.typ == MULTIPLICATIVE_EXPRESSION_TYPE_DIV { + result = LLVMBuildSDiv((*c).builder, (*lhs_value).value, (*rhs_value).value, ""); + }; + if exp.typ == MULTIPLICATIVE_EXPRESSION_TYPE_MOD { + result = LLVMBuildSRem((*c).builder, (*lhs_value).value, (*rhs_value).value, ""); + }; + assert(result != cast(LLVMValueRef, null)); + + return codegen_generate_literal(c, result, name, expression, (*lhs_value).node_type); + }; + + if ((*expression).type == NODE_UNARY_EXPRESSION) { + let exp = (*(cast(*NODE_UNARY_EXPRESSION_DATA, (*expression).data))); + let k = codegen_generate_expression_value(c, exp.expression, cast(*i8, null)); + assert(k != cast(*Variable, null)); + + let r = cast(LLVMValueRef, null); + let typ = (*k).node_type; + + if exp.typ == UNARY_EXPRESSION_TYPE_NOT { + assert((*typ).type == NODE_TYPE_SIMPLE_TYPE); + let simple_type = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*typ).data); + assert(strcmp(simple_type.name, "bool")); + r = LLVMBuildICmp((*c).builder, cast(LLVMIntPredicate, LLVMIntEQ), (*k).value, LLVMConstInt(LLVMInt1Type(), 0, 0), ""); + let node_type = Node{}; + node_type.type = NODE_TYPE_SIMPLE_TYPE; + + let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*d).name = "bool"; + (*d).underlying_type = cast(*Node, null); + node_type.data = cast(*void, d); + typ = create_node(c, node_type); + }; + + if exp.typ == UNARY_EXPRESSION_TYPE_MINUS { + r = LLVMBuildNeg((*c).builder, (*k).value, ""); + let node_type = Node{}; + node_type.type = NODE_TYPE_SIMPLE_TYPE; + + let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*d).name = "i64"; + (*d).underlying_type = cast(*Node, null); + node_type.data = cast(*void, d); + typ = create_node(c, node_type); + }; + + if exp.typ == UNARY_EXPRESSION_TYPE_STAR { + assert((*typ).type == NODE_TYPE_POINTER_TYPE); + let n = (*k).node_type; + typ = (*cast(*NODE_TYPE_POINTER_TYPE_DATA, (*n).data)).type; + let ptr_type = codegen_get_llvm_type(c, typ); + assert(ptr_type != cast(*LLVMTypeRef, null)); + r = LLVMBuildLoad2((*c).builder, *ptr_type, (*k).value, ""); + }; + + return codegen_generate_literal(c, r, name, expression, typ); + }; + + if ((*expression).type == NODE_TYPE_FUNCTION_TYPE) { + let e = *((*c).environment); + assert(e.scope_stack_len == 1); + + let existing = environment_get_variable((*c).environment, name); + if (existing != cast(*Variable, null)) { + return existing; + }; + + let function_type = codegen_get_llvm_type(c, expression); + assert(function_type != cast(*LLVMTypeRef, null)); + let function = LLVMAddFunction((*c).llvm_module, name, *function_type); + let v = Variable{}; + v.value = function; + v.type = cast(LLVMTypeRef, null); + v.stack_level = cast(*i64, null); + v.node = expression; + v.node_type = expression; + return codegen_create_variable(c, v); + }; + + if ((*expression).type == NODE_FUNCTION_CALL_STATEMENT) { + return codegen_generate_function_call_statement(c, expression); + }; + + if ((*expression).type == NODE_CAST_STATEMENT) { + let exp = *cast(*NODE_CAST_STATEMENT_DATA, (*expression).data); + let val = codegen_generate_expression_value(c, exp.expression, ""); + assert(val != cast(*Variable, null)); + let v = Variable{}; + v.value = (*val).value; /* TODO: Do real casting */ + v.type = cast(LLVMTypeRef, null); + v.stack_level = cast(*i64, null); + v.node = expression; + v.node_type = exp.typ; + return codegen_create_variable(c, v); + }; + + if ((*expression).type == NODE_SIZEOF_STATEMENT) { + let exp = *cast(*NODE_SIZEOF_STATEMENT_DATA, (*expression).data); + let typ = codegen_get_llvm_type(c, exp.typ); + assert(typ != cast(*LLVMTypeRef, null)); + let size_in_bytes = LLVMStoreSizeOfType((*c).llvm_target_data, *typ); + let size_val = LLVMConstInt(LLVMInt64Type(), size_in_bytes, 0); + + let node_type = Node{}; + node_type.type = NODE_TYPE_SIMPLE_TYPE; + + let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*d).name = "i64"; + (*d).underlying_type = cast(*Node, null); + node_type.data = cast(*void, d); + + let v = Variable{}; + v.value = size_val; + v.type = cast(LLVMTypeRef, null); + v.stack_level = cast(*i64, null); + v.node = expression; + v.node_type = create_node(c, node_type); + return codegen_create_variable(c, v); + }; + + if ((*expression).type == NODE_TYPE_STRUCT_TYPE) { + let struc_data = *cast(*NODE_TYPE_STRUCT_TYPE_DATA, (*expression).data); + let dd = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*dd).name = name; + (*dd).underlying_type = expression; + let n = Node{}; + n.type = NODE_TYPE_SIMPLE_TYPE; + n.data = cast(*void, dd); + let simple_type_node = create_node(c, n); + + let struc_type = LLVMStructCreateNamed((*c).llvm_context, name); + + if name != cast(*i8, null) { + let v = Variable{}; + v.value = cast(LLVMValueRef, null); + v.type = struc_type; + v.stack_level = cast(*i64, null); + v.node = expression; + v.node_type = simple_type_node; + environment_add_variable((*c).environment, name, codegen_create_variable(c, v)); + }; + + let llvm_types = cast(*LLVMTypeRef, arena_alloc((*c).arena, sizeof(LLVMTypeRef) * 20)); + let i = 0; + while i < struc_data.fields_len { + let field = *(struc_data.fields + cast(**Node, i)); + assert((*field).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER); + let t = (*cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*field).data)).type; + let lt = codegen_get_llvm_type(c, t); + assert(lt != cast(*LLVMTypeRef, null)); + (*(llvm_types + cast(*LLVMTypeRef, i))) = *lt; + i = i + 1; + }; + + LLVMStructSetBody(struc_type, llvm_types, i, 0); + + let v = Variable{}; + v.value = cast(LLVMValueRef, null); + v.type = struc_type; + v.stack_level = cast(*i64, null); + v.node = expression; + v.node_type = simple_type_node; + return codegen_create_variable(c, v); + }; + + if ((*expression).type == NODE_TYPE_SIMPLE_TYPE) { + let simple_type_data = *cast(*NODE_TYPE_SIMPLE_TYPE_DATA, (*expression).data); + let typ = codegen_get_llvm_type(c, simple_type_data.underlying_type); + assert(typ != cast(*LLVMTypeRef, null)); + let v = Variable{}; + v.value = cast(LLVMValueRef, null); + v.type = *typ; + v.stack_level = cast(*i64, null); + v.node = expression; + v.node_type = simple_type_data.underlying_type; + return codegen_create_variable(c, v); + }; + + if ((*expression).type == NODE_STRUCT_INSTANCIATION) { + let struc_data = *cast(*NODE_STRUCT_INSTANCIATION_DATA, (*expression).data); + let v = environment_get_variable((*c).environment, struc_data.typ); + assert(v != cast(*Variable, null)); + return v; + }; + + if ((*expression).type == NODE_FIELD_ACCESS) { + let field_access = *cast(*NODE_FIELD_ACCESS_DATA, (*expression).data); + let x = codegen_get_struct_field(c, field_access.expression, field_access.name); + assert(x != cast(*StructField, null)); + let t = codegen_get_llvm_type(c, (*x).type); + assert(t != cast(*LLVMTypeRef, null)); + let loaded = LLVMBuildLoad2((*c).builder, *t, (*x).value, ""); + let v = Variable{}; + v.value = loaded; + v.type = cast(LLVMTypeRef, null); + v.stack_level = cast(*i64, null); + v.node = expression; + v.node_type = (*x).type; + return codegen_create_variable(c, v); + }; + + printf("ASSERT 1: %d\n", (*expression).type); + assert(false); + + return cast(*Variable, null); +}; + +let codegen_generate_assignment_statement = (c: *codegen, stmt: *NODE_ASSIGNMENT_STATEMENT_DATA) => i64 { + let lhs = *((*stmt).lhs); + let prhs = (*stmt).rhs; + + if (lhs.type == NODE_PRIMARY_EXPRESSION_IDENTIFIER) { + let identifier = (*cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, lhs.data)).name; + let variable = codegen_generate_expression_value(c, prhs, identifier); + assert(variable != cast(*Variable, null)); + + let env = (*(*c).environment); + if env.scope_stack_len == 1 { + environment_add_variable((*c).environment, identifier, variable); + return 0; + }; + + let ptr = cast(LLVMValueRef, null); + let typ = (*variable).node_type; + + if (*stmt).is_declaration { + let x = codegen_get_llvm_type(c, typ); + assert(x != cast(*LLVMTypeRef, null)); + if (*typ).type == NODE_TYPE_FUNCTION_TYPE { + *x = LLVMPointerType(*x, 0); + }; + ptr = LLVMBuildAlloca((*c).builder, *x, identifier); + }; + if !(*stmt).is_declaration { + let v = environment_get_variable((*c).environment, identifier); + assert(v != cast(*Variable, null)); + ptr = (*v).value; + typ = (*v).node_type; + /* TODO: Do this in more places! (everywhere get_llvm_type or get_variable?) Also check types in return and cmp */ + assert(compare_types(c, typ, (*variable).node_type, (*stmt).is_dereference)); + }; + + if (*stmt).is_dereference { + let ltyp = codegen_get_llvm_type(c, typ); + assert(ltyp != cast(*LLVMTypeRef, null)); + ptr = LLVMBuildLoad2((*c).builder, *ltyp, ptr, ""); + }; + + /* NOTE: structs have a null variable.value */ + if (*variable).value != cast(LLVMValueRef, null) { + LLVMBuildStore((*c).builder, (*variable).value, ptr); + }; + + if (*stmt).is_dereference { + let v = environment_get_variable((*c).environment, identifier); + assert(v != cast(*Variable, null)); + ptr = (*v).value; + }; + + let new_variable = Variable{}; + + new_variable.value = ptr; + new_variable.type = (*variable).type; + new_variable.stack_level = cast(*i64, null); + new_variable.node = (*variable).node; + new_variable.node_type = typ; + + if (*stmt).is_declaration { + environment_add_variable((*c).environment, identifier, codegen_create_variable(c, new_variable)); + }; + if !(*stmt).is_declaration { + environment_set_variable((*c).environment, identifier, codegen_create_variable(c, new_variable)); + }; + + return 0; + }; + + if (lhs.type == NODE_UNARY_EXPRESSION) { + let xd = (*cast(*NODE_UNARY_EXPRESSION_DATA, lhs.data)).expression; + let a = codegen_generate_expression_value(c, xd, cast(*i8, null)); + assert(a != cast(*Variable, null)); + let variable = codegen_generate_expression_value(c, prhs, cast(*i8, null)); + assert(variable != cast(*Variable, null)); + assert(compare_types(c, (*a).node_type, (*variable).node_type, true)); + LLVMBuildStore((*c).builder, (*variable).value, (*a).value); + + return 0; + }; + + if (lhs.type == NODE_FIELD_ACCESS) { + let field_access = (*cast(*NODE_FIELD_ACCESS_DATA, lhs.data)); + let xd = field_access.expression; + let name = field_access.name; + + let x = codegen_get_struct_field(c, xd, name); + assert(x != cast(*StructField, null)); + + let variable = codegen_generate_expression_value(c, prhs, cast(*i8, null)); + assert(compare_types(c, (*x).type, (*variable).node_type, (*stmt).is_dereference)); + LLVMBuildStore((*c).builder, (*variable).value, (*x).value); + + return 0; + }; + + printf("ASSERT 2 %d\n", lhs.type); + assert(false); + return 0; +}; + +let codegen_generate_return_statement = (c: *codegen, stmt: *NODE_RETURN_STATEMENT_DATA) => i64 { + let expression = (*stmt).expression; + + if expression == cast(*Node, null) { + LLVMBuildRetVoid((*c).builder); + return 0; + }; + + let val = codegen_generate_expression_value(c, expression, cast(*i8, null)); + assert(val != cast(*Variable, null)); + + assert(compare_types(c, (*c).current_function_retur_type, (*val).node_type, false)); + + LLVMBuildRet((*c).builder, (*val).value); + + return 0; +}; + +let get_function_return_type = (ic: *codegen, fun: *Node) => *Node { + if (*fun).type == NODE_FUNCTION_DEFINITION { + let d = cast(*NODE_FUNCTION_DEFINITION_DATA, (*fun).data); + return (*d).retur_type; + }; + if (*fun).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER { + let d = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*fun).data); + let f = environment_get_variable((*ic).environment, (*d).name); + if f == cast(*Variable, null) { + printf("NO variable 2: %s\n", (*d).name); + assert(false); + }; + let f_type = (*f).node_type; + assert((*f_type).type == NODE_TYPE_FUNCTION_TYPE); + return get_function_return_type(ic, f_type); + }; + if (*fun).type == NODE_TYPE_FUNCTION_TYPE { + let d = cast(*NODE_TYPE_FUNCTION_TYPE_DATA, (*fun).data); + return (*d).retur_type; + }; + assert(false); + return cast(*Node, null); +}; + +let codegen_generate_function_call_statement = (c: *codegen, statement: *Node) => *Variable { + assert((*statement).type == NODE_FUNCTION_CALL_STATEMENT); + let stmt = cast(*NODE_FUNCTION_CALL_STATEMENT_DATA, (*statement).data); + let expression = (*stmt).expression; + + let node = statement; + let function = cast(*Variable, null); + + if (*expression).type == NODE_PRIMARY_EXPRESSION_IDENTIFIER { + let ident = (*cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, (*expression).data)); + function = environment_get_variable((*c).environment, ident.name); + if function == cast(*Variable, null) { + printf("NO variable 1: %s\n", ident.name); + assert(false); + }; + + if LLVMGetValueKind((*function).value) != LLVMFunctionValueKind { + let lt = codegen_get_llvm_type(c, (*function).node_type); + assert(lt != cast(*LLVMTypeRef, null)); + (*function).value = LLVMBuildLoad2((*c).builder, LLVMPointerType(*lt, 0), (*function).value, ""); + node = (*function).node; + }; + }; + if (*expression).type == NODE_FUNCTION_DEFINITION { + function = codegen_generate_expression_value(c, expression, cast(*i8, null)); + }; + + assert(function != cast(*Variable, null)); + assert((*function).node_type != cast(*Node, null)); + let function_type = (*function).node_type; + assert((*function_type).type == NODE_TYPE_FUNCTION_TYPE); + let function_type_data = cast(*NODE_TYPE_FUNCTION_TYPE_DATA, (*function_type).data); + /* assert((*function_type_data).parameters_len == (*stmt).arguments_len); TODO: Varargs */ + + let arguments = cast(*LLVMValueRef, arena_alloc((*c).arena, sizeof(LLVMValueRef) * (*stmt).arguments_len)); + + let i = 0; + while i < (*stmt).arguments_len { + let argument = (*((*stmt).arguments + cast(**Node, i))); + let arg = codegen_generate_expression_value(c, argument, cast(*i8, null)); + assert(arg != cast(*Variable, null)); + let expected_type = *((*function_type_data).parameters + cast(**Node, i)); /* TODO: If varargs we shouldn't do this */ + + assert(compare_types(c, expected_type, (*arg).node_type, false)); + + (*(arguments + cast(*LLVMValueRef, i))) = (*arg).value; + + i = i + 1; + }; + + let function_type = codegen_get_llvm_type(c, (*function).node_type); + assert(function_type != cast(*LLVMTypeRef, null)); + + let res = LLVMBuildCall2((*c).builder, *function_type, (*function).value, arguments, i, ""); + + let function_return_type = get_function_return_type(c, (*function).node_type); + + let v = Variable{}; + + v.value = res; + v.type = cast(LLVMTypeRef, null); + v.stack_level = cast(*i64, null); + v.node = node; + v.node_type = function_return_type; + + return codegen_create_variable(c, v); +}; + +let codegen_generate_break_statement = (c: *codegen) => i64 { + assert((*c).whil_loop_exit != cast(LLVMBasicBlockRef, null)); + LLVMBuildBr((*c).builder, (*c).whil_loop_exit); + return 0; +}; + +let codegen_generate_continue_statement = (c: *codegen) => i64 { + assert((*c).whil_block != cast(LLVMBasicBlockRef, null)); + LLVMBuildBr((*c).builder, (*c).whil_block); + return 0; +}; + +let codegen_generate_if_statement = (c: *codegen, statement: *NODE_IF_STATEMENT_DATA) => *void { + let condition_value = codegen_generate_expression_value(c, (*statement).condition, cast(*i8, null)); + assert(condition_value != cast(*Variable, null)); + + let current_block = LLVMGetInsertBlock((*c).builder); + + let then_block = LLVMAppendBasicBlock((*c).current_function, "then_block"); + LLVMPositionBuilderAtEnd((*c).builder, then_block); + + let i = 0; + while i < (*statement).statements_len { + let stmt = (*((*statement).statements + cast(**Node, i))); + let res = codegen_generate_statement(c, stmt); + assert(res == 0); + i = i + 1; + }; + + let merge_block = LLVMAppendBasicBlock((*c).current_function, "merge_block"); + let last_instr = LLVMGetLastInstruction(LLVMGetInsertBlock((*c).builder)); + if last_instr == cast(LLVMValueRef, null) { + LLVMBuildBr((*c).builder, merge_block); + }; + if last_instr != cast(LLVMValueRef, null) { + if LLVMIsATerminatorInst(last_instr) == cast(LLVMValueRef, null) { + LLVMBuildBr((*c).builder, merge_block); + }; + }; + LLVMPositionBuilderAtEnd((*c).builder, current_block); + LLVMBuildCondBr((*c).builder, (*condition_value).value, then_block, merge_block); + LLVMPositionBuilderAtEnd((*c).builder, merge_block); + + return null; +}; + +let codegen_generate_while_statement = (c: *codegen, statement: *NODE_WHILE_STATEMENT_DATA) => *void { + let whil_block = LLVMAppendBasicBlock((*c).current_function, "while_block"); + LLVMBuildBr((*c).builder, whil_block); + LLVMPositionBuilderAtEnd((*c).builder, whil_block); + + let condition_value = codegen_generate_expression_value(c, (*statement).condition, cast(*i8, null)); + assert(condition_value != cast(*Variable, null)); + + let inner_block = LLVMAppendBasicBlock((*c).current_function, "inner_block"); + let outer_block = LLVMAppendBasicBlock((*c).current_function, "outer_block"); + LLVMBuildCondBr((*c).builder, (*condition_value).value, inner_block, outer_block); + + (*c).whil_loop_exit = outer_block; + (*c).whil_block = whil_block; + + LLVMPositionBuilderAtEnd((*c).builder, inner_block); + let i = 0; + while i < (*statement).statements_len { + let stmt = (*((*statement).statements + cast(**Node, i))); + let res = codegen_generate_statement(c, stmt); + assert(res == 0); + i = i + 1; + }; + + LLVMBuildBr((*c).builder, whil_block); + LLVMPositionBuilderAtEnd((*c).builder, outer_block); + + (*c).whil_loop_exit = cast(LLVMBasicBlockRef, null); + (*c).whil_block = cast(LLVMBasicBlockRef, null); + + return null; +}; + +extern codegen_generate = (*codegen, *Node) => i64; + +let codegen_generate_import_declaration = (c: *codegen, statement: *NODE_IMPORT_DECLARATION_DATA) => i64 { + return codegen_generate(c, (*statement).program); +}; + +let codegen_generate_statement = (c: *codegen, statement: *Node) => i64 { + let stmt = *statement; + + if stmt.type == NODE_ASSIGNMENT_STATEMENT { + return codegen_generate_assignment_statement(c, cast(*NODE_ASSIGNMENT_STATEMENT_DATA, stmt.data)); + }; + + if stmt.type == NODE_RETURN_STATEMENT { + return codegen_generate_return_statement(c, cast(*NODE_RETURN_STATEMENT_DATA, stmt.data)); + }; + + if stmt.type == NODE_FUNCTION_CALL_STATEMENT { + codegen_generate_function_call_statement(c, statement); + return 0; + }; + + if stmt.type == NODE_IF_STATEMENT { + codegen_generate_if_statement(c, cast(*NODE_IF_STATEMENT_DATA, stmt.data)); + return 0; + }; + + if stmt.type == NODE_WHILE_STATEMENT { + codegen_generate_while_statement(c, cast(*NODE_WHILE_STATEMENT_DATA, stmt.data)); + return 0; + }; + + if stmt.type == NODE_IMPORT_DECLARATION { + return codegen_generate_import_declaration(c, cast(*NODE_IMPORT_DECLARATION_DATA, stmt.data)); + }; + + if stmt.type == NODE_CONTINUE_STATEMENT { + return codegen_generate_continue_statement(c); + }; + + if stmt.type == NODE_BREAK_STATEMENT { + return codegen_generate_break_statement(c); + }; + + printf("ASSERT 3 %d\n", stmt.type); + assert(false); + + return 0; +}; + +let codegen_generate = (c: *codegen, ast: *Node) => i64 { + assert((*ast).type == NODE_PROGRAM); + + let program = *cast(*NODE_PROGRAM_DATA, (*ast).data); + + let i = 0; + while i < program.statements_len { + let stmt = *(program.statements + cast(**Node, i)); + + let res = codegen_generate_statement(c, stmt); + if res != 0 { + return 1; + }; + + i = i + 1; + }; + + return 0; +}; + +let codegen_compile = (c: *codegen, dump_ir: bool) => i64 { + /* Dump module */ + LLVMDumpModule((*c).llvm_module); + let message = cast(**i8, null); + + if dump_ir { + LLVMPrintModuleToFile((*c).llvm_module, "output.ll", message); + return 0; + }; + + /* Generate code */ + let triple = LLVMGetDefaultTargetTriple(); + let target_ref = cast(*LLVMTargetRef, arena_alloc((*c).arena, sizeof(*LLVMTargetRef))); + let result = LLVMGetTargetFromTriple(triple, target_ref, message); + if result != 0 { + printf("Target output: %s\n", *message); + LLVMDisposeMessage(*message); + }; + let target_machine = LLVMCreateTargetMachine( + *target_ref, + triple, + "", + "", + LLVMCodeGenLevelDefault, + LLVMRelocDefault, + LLVMCodeModelDefault, + ); + LLVMDisposeMessage(triple); + result = LLVMVerifyModule((*c).llvm_module, LLVMAbortProcessAction, message); + if result != 0 { + printf("Verification output: %s\n", *message); + LLVMDisposeMessage(*message); + }; + + /* Generate the object file */ + let filename = "bootstrap_output.o"; + LLVMTargetMachineEmitToFile( + target_machine, + (*c).llvm_module, + filename, + LLVMObjectFile, + cast(**i8, null), + ); + LLVMDisposeTargetMachine(target_machine); + printf("Object file generated: %s\n", filename); + + return 0; +}; + +let codegen_deinit = (c: *codegen) => void { + LLVMDisposeModule((*c).llvm_module); + LLVMShutdown(); + LLVMDisposeBuilder((*c).builder); + return; +}; diff --git a/src/codegen.zig b/src/codegen.zig deleted file mode 100644 index 2ece01e..0000000 --- a/src/codegen.zig +++ /dev/null @@ -1,1101 +0,0 @@ -const std = @import("std"); - -const llvm = @cImport({ - @cInclude("llvm-c/Core.h"); - @cInclude("llvm-c/TargetMachine.h"); - @cInclude("llvm-c/Types.h"); - @cInclude("llvm-c/Analysis.h"); - @cInclude("llvm-c/Target.h"); -}); - -const parser = @import("parser.zig"); - -pub const CodeGenError = error{ - CompilationError, - OutOfMemory, -}; - -pub const CodeGen = struct { - llvm_module: llvm.LLVMModuleRef, - llvm_target_data: llvm.LLVMTargetDataRef, - llvm_context: llvm.LLVMContextRef, - builder: llvm.LLVMBuilderRef, - environment: *Environment, - - arena: std.mem.Allocator, - - while_loop_exit: ?llvm.LLVMBasicBlockRef, - while_block: ?llvm.LLVMBasicBlockRef, - current_function: ?llvm.LLVMValueRef, - current_function_return_type: ?*parser.Node, - - pub fn init(arena: std.mem.Allocator) !*CodeGen { - // Initialize LLVM - llvm.LLVMInitializeAllTargetInfos(); - llvm.LLVMInitializeAllTargetMCs(); - llvm.LLVMInitializeAllTargets(); - llvm.LLVMInitializeAllAsmPrinters(); - llvm.LLVMInitializeAllAsmParsers(); - - const module: llvm.LLVMModuleRef = llvm.LLVMModuleCreateWithName("module"); - const context = llvm.LLVMGetGlobalContext(); - const builder = llvm.LLVMCreateBuilder(); - - const self = try arena.create(CodeGen); - self.* = .{ - .llvm_module = module, - .llvm_target_data = llvm.LLVMGetModuleDataLayout(module), - .llvm_context = context, - .builder = builder, - .environment = try Environment.init(arena), - - .arena = arena, - - .while_loop_exit = null, - .while_block = null, - .current_function = null, - .current_function_return_type = null, - }; - - return self; - } - - pub fn compile(self: *CodeGen) void { - // Dump module - llvm.LLVMDumpModule(self.llvm_module); - - // Generate code - const triple = llvm.LLVMGetDefaultTargetTriple(); - var target_ref: llvm.LLVMTargetRef = undefined; - var message: [*c]u8 = undefined; - var result = llvm.LLVMGetTargetFromTriple(triple, &target_ref, &message); - if (result != 0) { - std.debug.print("Target output: {s}.\n", .{message}); - llvm.LLVMDisposeMessage(message.?); - } - const target_machine = llvm.LLVMCreateTargetMachine( - target_ref, - triple, - "", - "", - llvm.LLVMCodeGenLevelDefault, - llvm.LLVMRelocDefault, - llvm.LLVMCodeModelDefault, - ); - - result = llvm.LLVMVerifyModule(self.llvm_module, llvm.LLVMAbortProcessAction, &message); - if (result != 0) { - std.debug.print("Verification output: {any}.\n", .{message}); - llvm.LLVMDisposeMessage(message); - } - - // Generate the object file - const filename = "output.o"; - _ = llvm.LLVMTargetMachineEmitToFile( - target_machine, - self.llvm_module, - filename, - llvm.LLVMObjectFile, - null, - ); - std.debug.print("Object file generated: {s}\n", .{filename}); - } - - pub fn deinit(self: *CodeGen) void { - defer llvm.LLVMDisposeBuilder(self.builder); - llvm.LLVMDisposeModule(self.llvm_module); - llvm.LLVMShutdown(); - } - - pub fn generate(self: *CodeGen, ast: *parser.Node) CodeGenError!void { - std.debug.assert(ast.* == parser.Node.PROGRAM); - - const program = ast.PROGRAM; - - for (program.statements) |stmt| { - _ = try self.generate_statement(stmt); - } - } - - fn generate_statement(self: *CodeGen, statement: *parser.Node) CodeGenError!void { - errdefer std.debug.print("Error generating statement\n", .{}); - std.debug.assert(statement.* == parser.Node.STATEMENT); - - switch (statement.STATEMENT.statement.*) { - .ASSIGNMENT_STATEMENT => |*assignment_statement| { - try self.generate_assignment_statement(@ptrCast(assignment_statement)); - }, - .FUNCTION_CALL_STATEMENT => |*function_call_statement| { - _ = try self.generate_function_call_statement(@ptrCast(function_call_statement)); - }, - .RETURN_STATEMENT => |*return_statement| return try self.generate_return_statement(@ptrCast(return_statement)), - .BREAK_STATEMENT => |*break_statement| return try self.generate_break_statement(@ptrCast(@alignCast(break_statement))), - .CONTINUE_STATEMENT => |*continue_statement| return try self.generate_continue_statement(@ptrCast(@alignCast(continue_statement))), - .IF_STATEMENT => |*if_statement| return try self.generate_if_statement(@ptrCast(if_statement)), - .WHILE_STATEMENT => |*while_statement| return try self.generate_while_statement(@ptrCast(while_statement)), - .IMPORT_DECLARATION => |*import_declaration| return try self.generate_import_declaration(@ptrCast(import_declaration)), - else => unreachable, - } - } - - fn generate_assignment_statement(self: *CodeGen, statement: *parser.Node) CodeGenError!void { - errdefer std.debug.print("Error generating assignment statement\n", .{}); - std.debug.assert(statement.* == parser.Node.ASSIGNMENT_STATEMENT); - const assignment_statement = statement.ASSIGNMENT_STATEMENT; - - switch (assignment_statement.lhs.*) { - .PRIMARY_EXPRESSION => { - const identifier = assignment_statement.lhs.PRIMARY_EXPRESSION.IDENTIFIER; - const variable = try self.generate_expression_value(assignment_statement.rhs, identifier.name); - - if (self.environment.scope_stack.items.len == 1) { - try self.environment.add_variable(identifier.name, try self.create_variable(.{ - .value = variable.value, - .type = variable.type, - .node = variable.node, - .node_type = variable.node_type, - .stack_level = null, - })); - return; - } - - var ptr: llvm.LLVMValueRef = undefined; - var typ = variable.node_type; - if (assignment_statement.is_declaration) { - var x = try self.get_llvm_type(variable.node_type); - if (variable.node_type.TYPE == .FUNCTION_TYPE) { - x = llvm.LLVMPointerType(x, 0); - } - ptr = llvm.LLVMBuildAlloca(self.builder, x, try std.fmt.allocPrintZ(self.arena, "{s}", .{identifier.name})); - } else { - ptr = self.environment.get_variable(identifier.name).?.value; - typ = self.environment.get_variable(identifier.name).?.node_type; - // TODO: Do this in more places! (everywhere get_llvm_type or get_variable?) Also check types in return and cmp - std.debug.print("TYP {s}: {any} vs {any} -- {any}\n", .{ identifier.name, typ.TYPE, variable.node_type.TYPE, variable.node }); - std.debug.assert(self.compare_types(typ, variable.node_type, assignment_statement.is_dereference)); - } - - if (assignment_statement.is_dereference) { - ptr = llvm.LLVMBuildLoad2(self.builder, try self.get_llvm_type(typ), ptr, ""); - } - - // NOTE: structs have a null variable.value - if (variable.value != null) { - _ = llvm.LLVMBuildStore(self.builder, variable.value, ptr); - } - - if (assignment_statement.is_dereference) { - ptr = self.environment.get_variable(identifier.name).?.value; - } - - const new_variable = try self.create_variable(.{ - .value = ptr, - .type = variable.type, - .node = variable.node, - .node_type = typ, - .stack_level = null, - }); - // Adding variable doesnt actually replace the variable of previous scope - if (assignment_statement.is_declaration) { - try self.environment.add_variable(identifier.name, new_variable); - } else { - try self.environment.set_variable(identifier.name, new_variable); - } - }, - .UNARY_EXPRESSION => { - const xd = assignment_statement.lhs.UNARY_EXPRESSION.expression; - const a = try self.generate_expression_value(xd, null); - const variable = try self.generate_expression_value(assignment_statement.rhs, null); - std.debug.assert(self.compare_types(a.node_type, variable.node_type, true)); - _ = llvm.LLVMBuildStore(self.builder, variable.value, a.value); - }, - .FIELD_ACCESS => |field_access| { - const xd = assignment_statement.lhs.FIELD_ACCESS.expression; - const name = field_access.name; - - const x = try self.get_struct_field(xd, name); - - const variable = try self.generate_expression_value(assignment_statement.rhs, null); - std.debug.print("7TYP {s}: {any} vs {any} -- {any}\n", .{ name, x.type, variable.node_type.TYPE, variable.node }); - std.debug.assert(self.compare_types(x.type, variable.node_type, assignment_statement.is_dereference)); - _ = llvm.LLVMBuildStore(self.builder, variable.value, x.value); - }, - else => unreachable, - } - } - - fn generate_function_call_statement(self: *CodeGen, statement: *parser.Node) CodeGenError!*Variable { - errdefer std.debug.print("Error generating function call statement\n", .{}); - std.debug.assert(statement.* == parser.Node.FUNCTION_CALL_STATEMENT); - const function_call_statement = statement.FUNCTION_CALL_STATEMENT; - - var node = statement; - - var function: *Variable = undefined; - switch (function_call_statement.expression.*) { - .PRIMARY_EXPRESSION => |primary_expression| { - std.debug.assert(primary_expression == .IDENTIFIER); - function = self.environment.get_variable(primary_expression.IDENTIFIER.name) orelse return CodeGenError.CompilationError; - if (llvm.LLVMGetValueKind(function.value) != llvm.LLVMFunctionValueKind) { - function.value = llvm.LLVMBuildLoad2(self.builder, llvm.LLVMPointerType(try self.get_llvm_type(function.node_type), 0), function.value, ""); - node = function.node; - } - }, - .FUNCTION_DEFINITION => |*function_definition| { - function = try self.generate_expression_value(@ptrCast(function_definition), null); - }, - else => unreachable, - } - - var arguments = std.ArrayList(llvm.LLVMValueRef).init(self.arena); - - for (0.., function_call_statement.arguments) |i, argument| { - const arg = try self.generate_expression_value(argument, null); - const expected_type = function.node_type.TYPE.FUNCTION_TYPE.parameters[i]; //TODO: If varargs we shouldnt do this - std.debug.print("2 TYP {s}: {any} vs {any}\n", .{ function_call_statement.expression.PRIMARY_EXPRESSION.IDENTIFIER.name, expected_type.TYPE, arg.node_type.TYPE }); - std.debug.assert(self.compare_types(expected_type, arg.node_type, false)); - try arguments.append(arg.value); - } - - const res = llvm.LLVMBuildCall2(self.builder, try self.get_llvm_type(function.node_type), function.value, @ptrCast(arguments.items), @intCast(arguments.items.len), "") orelse return CodeGenError.CompilationError; - - const get_function_return_type = struct { - fn call(iSelf: *CodeGen, fun: *parser.Node) *parser.Node { - switch (fun.*) { - .FUNCTION_DEFINITION => |x| { - return x.return_type; - }, - .PRIMARY_EXPRESSION => |x| { - const f = iSelf.environment.get_variable(x.IDENTIFIER.name).?.node_type; - std.debug.assert(f.TYPE == .FUNCTION_TYPE); - return call(iSelf, f); - }, - .TYPE => |x| { - return x.FUNCTION_TYPE.return_type; - }, - else => unreachable, - } - } - }; - - const function_return_type = get_function_return_type.call(self, function.node_type); - - std.debug.print("FN: {s} -> ret: {any}\n", .{ function_call_statement.expression.PRIMARY_EXPRESSION.IDENTIFIER.name, function_return_type }); - - return self.create_variable(.{ - .value = res, - .type = null, - .stack_level = null, - .node = node, - .node_type = function_return_type, - }) catch return CodeGenError.CompilationError; - } - - fn generate_return_statement(self: *CodeGen, statement: *parser.Node) !void { - errdefer std.debug.print("Error generating return statement\n", .{}); - std.debug.assert(statement.* == parser.Node.RETURN_STATEMENT); - - const expression = statement.RETURN_STATEMENT.expression; - - if (expression == null) { - _ = llvm.LLVMBuildRetVoid(self.builder); - return; - } - - const val = try self.generate_expression_value(expression.?, null); - - std.debug.print("3TYP : {any} vs {any}\n", .{ self.current_function_return_type.?, val.node_type }); - std.debug.assert(self.compare_types(self.current_function_return_type.?, val.node_type, false)); - - _ = llvm.LLVMBuildRet(self.builder, val.value); - } - - fn generate_break_statement(self: *CodeGen, statement: *parser.Node) !void { - errdefer std.debug.print("Error generating break statement\n", .{}); - std.debug.assert(statement.* == parser.Node.BREAK_STATEMENT); - std.debug.assert(self.while_loop_exit != null); - - _ = llvm.LLVMBuildBr(self.builder, self.while_loop_exit.?); - } - - fn generate_continue_statement(self: *CodeGen, statement: *parser.Node) !void { - errdefer std.debug.print("Error generating continue statement\n", .{}); - std.debug.assert(statement.* == parser.Node.CONTINUE_STATEMENT); - std.debug.assert(self.while_block != null); - - _ = llvm.LLVMBuildBr(self.builder, self.while_block.?); - } - - fn generate_if_statement(self: *CodeGen, statement: *parser.Node) !void { - errdefer std.debug.print("Error generating if statement\n", .{}); - std.debug.assert(statement.* == parser.Node.IF_STATEMENT); - - const if_statement = statement.IF_STATEMENT; - - const condition_value = try self.generate_expression_value(if_statement.condition, null); - - const current_block = llvm.LLVMGetInsertBlock(self.builder); - - const then_block = llvm.LLVMAppendBasicBlock(self.current_function.?, "then_block"); - _ = llvm.LLVMPositionBuilderAtEnd(self.builder, then_block); - for (if_statement.statements) |stmt| { - try self.generate_statement(stmt); - } - const merge_block = llvm.LLVMAppendBasicBlock(self.current_function.?, "merge_block"); - const last_instr = llvm.LLVMGetLastInstruction(llvm.LLVMGetInsertBlock(self.builder)); - if (last_instr == null or llvm.LLVMIsATerminatorInst(last_instr) == null) { - _ = llvm.LLVMBuildBr(self.builder, merge_block); - } - llvm.LLVMPositionBuilderAtEnd(self.builder, current_block); - - _ = llvm.LLVMBuildCondBr(self.builder, condition_value.value, then_block, merge_block); - llvm.LLVMPositionBuilderAtEnd(self.builder, merge_block); - } - - fn generate_while_statement(self: *CodeGen, statement: *parser.Node) !void { - errdefer std.debug.print("Error generating while statement\n", .{}); - std.debug.assert(statement.* == parser.Node.WHILE_STATEMENT); - - const while_statement = statement.WHILE_STATEMENT; - - const while_block = llvm.LLVMAppendBasicBlock(self.current_function.?, "while_block"); - _ = llvm.LLVMBuildBr(self.builder, while_block); - _ = llvm.LLVMPositionBuilderAtEnd(self.builder, while_block); - const condition_value = try self.generate_expression_value(while_statement.condition, null); - - const inner_block = llvm.LLVMAppendBasicBlock(self.current_function.?, "inner_block"); - const outer_block = llvm.LLVMAppendBasicBlock(self.current_function.?, "outer_block"); - _ = llvm.LLVMBuildCondBr(self.builder, condition_value.value, inner_block, outer_block); - - self.while_loop_exit = outer_block; - self.while_block = while_block; - defer { - self.while_block = null; - self.while_loop_exit = null; - } - - _ = llvm.LLVMPositionBuilderAtEnd(self.builder, inner_block); - for (while_statement.statements) |stmt| { - try self.generate_statement(stmt); - } - - _ = llvm.LLVMBuildBr(self.builder, while_block); - - llvm.LLVMPositionBuilderAtEnd(self.builder, outer_block); - } - - fn generate_import_declaration(self: *CodeGen, declaration: *parser.Node) !void { - errdefer std.debug.print("Error generating import declaration\n", .{}); - std.debug.assert(declaration.* == parser.Node.IMPORT_DECLARATION); - - const import_declaration = declaration.IMPORT_DECLARATION; - - try self.generate(import_declaration.program); - } - - fn generate_expression_value(self: *CodeGen, expression: *parser.Node, name: ?[]const u8) CodeGenError!*Variable { - errdefer std.debug.print("Error generating statement value\n", .{}); - return switch (expression.*) { - .FUNCTION_DEFINITION => |function_definition| { - - // Functions should be declared "globally" - const builder_pos = llvm.LLVMGetInsertBlock(self.builder); - - var llvm_param_types = std.ArrayList(llvm.LLVMTypeRef).init(self.arena); - var param_types = std.ArrayList(*parser.Node).init(self.arena); - var is_varargs: i8 = 0; - for (function_definition.parameters) |param| { - std.debug.assert(param.PRIMARY_EXPRESSION == .IDENTIFIER); - const param_type = param.PRIMARY_EXPRESSION.IDENTIFIER.type.?; - if (param_type.TYPE == .SIMPLE_TYPE and std.mem.eql(u8, param_type.TYPE.SIMPLE_TYPE.name, "varargs")) { - is_varargs = 1; - } - var llvm_param_type = try self.get_llvm_type(param_type); - if (param_type.TYPE == .FUNCTION_TYPE) { - llvm_param_type = llvm.LLVMPointerType(llvm_param_type, 0); - } - try llvm_param_types.append(llvm_param_type); - try param_types.append(param_type); - } - var return_type = try self.get_llvm_type(function_definition.return_type); - if (function_definition.return_type.TYPE == .FUNCTION_TYPE) { - return_type = llvm.LLVMPointerType(return_type, 0); - } - var function: llvm.LLVMValueRef = null; - if (name != null) { - if (self.environment.get_variable(name.?)) |x| { - // If the function has been forward declared, we reuse its declaration - function = x.value; - } - } - if (function == null) { - const function_type = llvm.LLVMFunctionType(return_type, llvm_param_types.items.ptr, @intCast(llvm_param_types.items.len), is_varargs) orelse return CodeGenError.CompilationError; - function = llvm.LLVMAddFunction(self.llvm_module, try std.fmt.allocPrintZ(self.arena, "{s}", .{name orelse "unnamed_func"}), function_type) orelse return CodeGenError.CompilationError; - } - const function_entry = llvm.LLVMAppendBasicBlock(function, "entrypoint") orelse return CodeGenError.CompilationError; - llvm.LLVMPositionBuilderAtEnd(self.builder, function_entry); - - try self.environment.create_scope(); - const last_function = self.current_function; - self.current_function = function; - const last_return_type = self.current_function_return_type; - self.current_function_return_type = function_definition.return_type; - defer { - self.current_function = last_function; - self.current_function_return_type = last_return_type; - self.environment.drop_scope(); - } - - const node_type = try self.create_node(.{ - .TYPE = .{ - .FUNCTION_TYPE = .{ - .parameters = param_types.items, - .return_type = function_definition.return_type, - }, - }, - }); - - // Needed for recursive functions - if (name != null) { - try self.environment.add_variable(name.?, try self.create_variable(.{ - .value = function, - .type = null, - .stack_level = null, - .node = expression, - .node_type = node_type, - })); - } - - const params = try self.arena.alloc(llvm.LLVMValueRef, function_definition.parameters.len); - llvm.LLVMGetParams(function, params.ptr); - - var parameters_index: usize = 0; - for (params) |p| { - defer parameters_index += 1; - const param_node = function_definition.parameters[parameters_index]; - std.debug.assert(param_node.* == .PRIMARY_EXPRESSION); - - const param_type = param_node.PRIMARY_EXPRESSION.IDENTIFIER.type.?; - var llvm_param_type = try self.get_llvm_type(param_type); - if (param_node.PRIMARY_EXPRESSION.IDENTIFIER.type.?.TYPE == .FUNCTION_TYPE) { - llvm_param_type = llvm.LLVMPointerType(llvm_param_type.?, 0); - } - // We need to alloca params because we assume all identifiers are alloca - const alloca = llvm.LLVMBuildAlloca(self.builder, llvm_param_type, try std.fmt.allocPrintZ(self.arena, "{s}", .{param_node.PRIMARY_EXPRESSION.IDENTIFIER.name})); - _ = llvm.LLVMBuildStore(self.builder, p, alloca); - - try self.environment.add_variable(param_node.PRIMARY_EXPRESSION.IDENTIFIER.name, try self.create_variable(.{ - .value = alloca, - .type = null, - .stack_level = null, - .node = param_node, - .node_type = param_type, - })); - } - - for (function_definition.statements) |stmt| { - try self.generate_statement(stmt); - } - - // TODO: This should be done with a defer when `builder_pos` is declared, but for some reason it doesn't work - llvm.LLVMPositionBuilderAtEnd(self.builder, builder_pos); - - return try self.create_variable(.{ - .value = function, - .type = null, - .stack_level = null, - .node = expression, - .node_type = node_type, - }); - }, - .FUNCTION_CALL_STATEMENT => |*fn_call| { - return try self.generate_function_call_statement(@ptrCast(fn_call)); - }, - .STRUCT_INSTANCIATION => |struct_instanciation| { - return self.environment.get_variable(struct_instanciation.typ).?; - }, - .PRIMARY_EXPRESSION => |primary_expression| switch (primary_expression) { - .NULL => { - return try self.generate_literal(llvm.LLVMConstNull(llvm.LLVMPointerType(llvm.LLVMInt8Type(), 0)), name, expression, try self.create_node(.{ - .TYPE = .{ - .POINTER_TYPE = .{ - .type = try self.create_node(.{ - .TYPE = .{ .SIMPLE_TYPE = .{ - .name = "void", - .underlying_type = null, - } }, - }), - }, - }, - })); - }, - .NUMBER => |n| { - return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt64Type(), @intCast(n.value), 0), name, expression, try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "i64", - .underlying_type = null, - }, - }, - })); - }, - .BOOLEAN => |b| { - const int_value: i64 = switch (b.value) { - false => 0, - true => 1, - }; - - return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt1Type(), @intCast(int_value), 0), name, expression, try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "bool", - .underlying_type = null, - }, - }, - })); - }, - .CHAR => |c| { - return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt8Type(), @intCast(c.value), 0), name, expression, try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "i8", - .underlying_type = null, - }, - }, - })); - }, - .STRING => |s| { - const x = llvm.LLVMBuildGlobalStringPtr(self.builder, try std.fmt.allocPrintZ(self.arena, "{s}", .{s.value}), ""); - return self.create_variable( - .{ - .value = x, - .type = null, - .stack_level = null, - .node = expression, - .node_type = try self.create_node(.{ - .TYPE = .{ - .POINTER_TYPE = .{ - .type = try self.create_node(.{ - .TYPE = .{ .SIMPLE_TYPE = .{ - .name = "i8", - .underlying_type = null, - } }, - }), - }, - }, - }), - }, - ); - }, - .IDENTIFIER => |i| { - const variable = self.environment.get_variable(i.name).?; - var param_value = variable.value; - if (variable.node_type.TYPE != .FUNCTION_TYPE or variable.stack_level != 0) { - var param_type = try self.get_llvm_type(variable.node_type); - if (variable.node_type.TYPE == .FUNCTION_TYPE) { - param_type = llvm.LLVMPointerType(param_type.?, 0); - } - param_value = llvm.LLVMBuildLoad2(self.builder, param_type, variable.value, ""); - } - - return self.generate_literal(param_value, name, expression, variable.node_type); - }, - }, - .ADDITIVE_EXPRESSION => |exp| { - const lhs_value = try self.generate_expression_value(exp.lhs, null); - const rhs_value = try self.generate_expression_value(exp.rhs, null); - - std.debug.print("4 TYP {s}: {any} vs {any}\n", .{ name orelse "unknown", lhs_value.node_type.TYPE, rhs_value.node_type.TYPE }); - std.debug.assert(self.compare_types(lhs_value.node_type, rhs_value.node_type, false)); - - var result: llvm.LLVMValueRef = undefined; - var node_type: *parser.Node = try self.create_node(.{ .TYPE = .{ .SIMPLE_TYPE = .{ - .name = "i64", - .underlying_type = null, - } } }); - - if (exp.addition) { - if (lhs_value.node_type.TYPE == .POINTER_TYPE) { - result = llvm.LLVMBuildGEP2(self.builder, try self.get_llvm_type(lhs_value.node_type.TYPE.POINTER_TYPE.type), lhs_value.value, @constCast(&[_]llvm.LLVMValueRef{rhs_value.value}), 1, ""); - node_type = lhs_value.node_type; - } else { - result = llvm.LLVMBuildAdd(self.builder, lhs_value.value, rhs_value.value, "") orelse return CodeGenError.CompilationError; - } - } else { - result = llvm.LLVMBuildSub(self.builder, lhs_value.value, rhs_value.value, "") orelse return CodeGenError.CompilationError; - } - - return self.generate_literal(result, name, expression, node_type); - }, - .MULTIPLICATIVE_EXPRESSION => |exp| { - const lhs_value = try self.generate_expression_value(exp.lhs, null); - const rhs_value = try self.generate_expression_value(exp.rhs, null); - - std.debug.print("5 TYP {s}: {any} vs {any}\n", .{ name orelse "unknown", lhs_value.node_type.TYPE, rhs_value.node_type.TYPE }); - std.debug.assert(self.compare_types(lhs_value.node_type, rhs_value.node_type, false)); - - var result: llvm.LLVMValueRef = undefined; - switch (exp.typ) { - .MUL => { - result = llvm.LLVMBuildMul(self.builder, lhs_value.value, rhs_value.value, "") orelse return CodeGenError.CompilationError; - }, - .DIV => { - result = llvm.LLVMBuildSDiv(self.builder, lhs_value.value, rhs_value.value, "") orelse return CodeGenError.CompilationError; - }, - .MOD => { - result = llvm.LLVMBuildSRem(self.builder, lhs_value.value, rhs_value.value, "") orelse return CodeGenError.CompilationError; - }, - } - - return self.generate_literal(result, name, expression, lhs_value.node_type); - }, - .UNARY_EXPRESSION => |exp| { - const k = try self.generate_expression_value(exp.expression, null); - - var r: llvm.LLVMValueRef = undefined; - var typ: *parser.Node = k.node_type; - switch (exp.typ) { - .NOT => { - std.debug.assert(std.mem.eql(u8, k.node_type.TYPE.SIMPLE_TYPE.name, "bool")); - r = llvm.LLVMBuildICmp(self.builder, llvm.LLVMIntEQ, k.value, llvm.LLVMConstInt(llvm.LLVMInt1Type(), 0, 0), ""); - typ = try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "bool", - .underlying_type = null, - }, - }, - }); - }, - .MINUS => { - r = llvm.LLVMBuildNeg(self.builder, k.value, ""); - typ = try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "i64", - .underlying_type = null, - }, - }, - }); - }, - .STAR => { - std.debug.assert(k.node_type.TYPE == .POINTER_TYPE); - typ = k.node_type.TYPE.POINTER_TYPE.type; - r = llvm.LLVMBuildLoad2(self.builder, try self.get_llvm_type(typ), k.value, ""); - }, - } - - return self.generate_literal(r, name, expression, typ); - }, - .EQUALITY_EXPRESSION => |exp| { - const lhs_value = try self.generate_expression_value(exp.lhs, null); - const rhs_value = try self.generate_expression_value(exp.rhs, null); - - std.debug.print("6 TYP {s}: {any} vs {any}\n", .{ name orelse "unknown", lhs_value.node_type.TYPE, rhs_value.node_type.TYPE }); - std.debug.assert(self.compare_types(lhs_value.node_type, rhs_value.node_type, false)); - - const op: c_uint = switch (exp.typ) { - .EQ => llvm.LLVMIntEQ, - .NE => llvm.LLVMIntNE, - .GE => llvm.LLVMIntSGE, - .LE => llvm.LLVMIntSLE, - .LT => llvm.LLVMIntSLT, - .GT => llvm.LLVMIntSGT, - }; - const cmp = llvm.LLVMBuildICmp(self.builder, op, lhs_value.value, rhs_value.value, ""); - - return self.generate_literal(cmp, name, expression, try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "bool", - .underlying_type = null, - }, - }, - })); - }, - .TYPE => |typ| { - switch (typ) { - .FUNCTION_TYPE => { - std.debug.assert(self.environment.scope_stack.items.len == 1); - - const variable = self.environment.get_variable(name.?); - if (variable) |v| { - return v; - } - - const function_type = try self.get_llvm_type(expression); - const function = llvm.LLVMAddFunction(self.llvm_module, try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?}), function_type); - - return try self.create_variable(.{ - .value = function, - .type = null, - .stack_level = null, - .node = expression, - .node_type = expression, - }); - }, - .STRUCT_TYPE => |t| { - const simple_type_node = try self.create_node(.{ .TYPE = .{ .SIMPLE_TYPE = .{ - .name = name.?, - .underlying_type = expression, - } } }); - const struct_type = llvm.LLVMStructCreateNamed(self.llvm_context, try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?})); - - // Needed for recursive structs - if (name != null) { - try self.environment.add_variable(name.?, try self.create_variable(.{ - .value = null, - .type = struct_type, - .stack_level = null, - .node = expression, - .node_type = simple_type_node, - })); - } - - var llvm_types = std.ArrayList(llvm.LLVMTypeRef).init(self.arena); - - for (t.fields) |field| { - try llvm_types.append(try self.get_llvm_type(field.PRIMARY_EXPRESSION.IDENTIFIER.type.?)); - } - llvm.LLVMStructSetBody(struct_type, llvm_types.items.ptr, @intCast(llvm_types.items.len), 0); - return try self.create_variable(.{ - .value = null, - .type = struct_type, - .stack_level = null, - .node = expression, - .node_type = simple_type_node, - }); - }, - .SIMPLE_TYPE => |t| { - return try self.create_variable(.{ - .value = null, - .type = try self.get_llvm_type(t.underlying_type.?), - .stack_level = null, - .node = expression, - .node_type = t.underlying_type.?, - }); - }, - else => unreachable, - } - }, - .CAST_STATEMENT => |exp| { - const val = try self.generate_expression_value(exp.expression, ""); - return try self.create_variable(.{ - .value = val.value, //TODO: do real casting - .type = null, - .stack_level = null, - .node = expression, - .node_type = exp.typ, - }); - }, - .SIZEOF_STATEMENT => |exp| { - const typ = try self.get_llvm_type(exp.typ); - const size_in_bytes = llvm.LLVMStoreSizeOfType(self.llvm_target_data, typ); - - const size_val = llvm.LLVMConstInt(llvm.LLVMInt64Type(), size_in_bytes, 0); - - return try self.create_variable(.{ - .value = size_val, - .type = null, - .node_type = try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "i64", - .underlying_type = null, - }, - }, - }), - .stack_level = null, - .node = expression, - }); - }, - .FIELD_ACCESS => |exp| { - const x = try self.get_struct_field(exp.expression, exp.name); - const loaded = llvm.LLVMBuildLoad2(self.builder, try self.get_llvm_type(x.type), x.value, ""); - - return try self.create_variable(.{ - .value = loaded, - .type = null, - .stack_level = null, - .node = expression, - .node_type = x.type, - }); - }, - else => unreachable, - }; - } - - fn generate_literal(self: *CodeGen, literal_val: llvm.LLVMValueRef, name: ?[]const u8, node: *parser.Node, node_type: *parser.Node) !*Variable { - if (name != null and self.environment.scope_stack.items.len == 1) { - const ptr = try self.create_variable(.{ - .value = llvm.LLVMAddGlobal(self.llvm_module, try self.get_llvm_type(node_type), try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?})), - .type = null, - .stack_level = null, - .node = node, - .node_type = node_type, - }); - llvm.LLVMSetInitializer(ptr.value, literal_val); - return ptr; - } - - return try self.create_variable(.{ - .value = literal_val, - .type = null, - .stack_level = null, - .node = node, - .node_type = node_type, - }); - } - - fn get_struct_field(self: *CodeGen, node: *parser.Node, name: []const u8) !struct { value: llvm.LLVMValueRef, type: *parser.Node } { - var ptr: *Variable = undefined; - switch (node.*) { - .PRIMARY_EXPRESSION => { - ptr = self.environment.get_variable(node.PRIMARY_EXPRESSION.IDENTIFIER.name).?; - }, - .UNARY_EXPRESSION => { - ptr = try self.generate_expression_value(node.UNARY_EXPRESSION.expression, ""); - }, - else => unreachable, - } - - var typ: *parser.Node = undefined; - if (ptr.node_type.TYPE == .STRUCT_TYPE) { - typ = ptr.node_type; - } else if (ptr.node_type.TYPE == .POINTER_TYPE) { - typ = self.environment.get_variable(ptr.node_type.TYPE.POINTER_TYPE.type.TYPE.SIMPLE_TYPE.name).?.node_type; //TODO: we shouldnt be able to get fields of pointers, we have to dref first. - } else if (ptr.node_type.TYPE == .SIMPLE_TYPE) { - typ = self.environment.get_variable(ptr.node_type.TYPE.SIMPLE_TYPE.name).?.node_type; - } else { - unreachable; - } - var fieldIndex: ?usize = null; - for (0.., typ.TYPE.SIMPLE_TYPE.underlying_type.?.TYPE.STRUCT_TYPE.fields) |i, field| { - if (std.mem.eql(u8, name, field.PRIMARY_EXPRESSION.IDENTIFIER.name)) { - fieldIndex = i; - break; - } - } - if (fieldIndex == null) unreachable; - - const zero = llvm.LLVMConstInt(llvm.LLVMInt32Type(), 0, 0); - const llvmFieldIndex = llvm.LLVMConstInt(llvm.LLVMInt32Type(), fieldIndex.?, 0); - const indices = @constCast(&[_]llvm.LLVMValueRef{ zero, llvmFieldIndex }); - - return .{ - .value = llvm.LLVMBuildGEP2(self.builder, try self.get_llvm_type(typ), ptr.value, indices, indices.len, try std.fmt.allocPrintZ(self.arena, "{s}", .{name})), - .type = typ.TYPE.SIMPLE_TYPE.underlying_type.?.TYPE.STRUCT_TYPE.fields[fieldIndex.?].PRIMARY_EXPRESSION.IDENTIFIER.type.?, - }; - } - - fn get_llvm_type(self: *CodeGen, node: *parser.Node) !llvm.LLVMTypeRef { - std.debug.assert(node.* == parser.Node.TYPE); - const type_node = node.TYPE; - - switch (type_node) { - .SIMPLE_TYPE => |t| { - if (std.mem.eql(u8, t.name, "i8")) return llvm.LLVMInt8Type(); - if (std.mem.eql(u8, t.name, "i64")) return llvm.LLVMInt64Type(); - if (std.mem.eql(u8, t.name, "bool")) return llvm.LLVMInt1Type(); - if (std.mem.eql(u8, t.name, "void")) return llvm.LLVMVoidType(); - if (std.mem.eql(u8, t.name, "varargs")) return llvm.LLVMPointerType(llvm.LLVMInt64Type(), 0); // Hack for varargs (only used for printf) - if (self.environment.get_variable(t.name)) |v| { - std.debug.assert(v.type != null); - return v.type; - } - std.debug.print("Unknown type: {s}\n", .{t.name}); - unreachable; - }, - .FUNCTION_TYPE => |t| { - var return_type = try self.get_llvm_type(t.return_type); - if (t.return_type.TYPE == .FUNCTION_TYPE) { - return_type = llvm.LLVMPointerType(return_type, 0); - } - var paramtypes = std.ArrayList(llvm.LLVMTypeRef).init(self.arena); - var is_varargs: i8 = 0; - for (t.parameters) |param| { - if (param.TYPE == .SIMPLE_TYPE and std.mem.eql(u8, param.TYPE.SIMPLE_TYPE.name, "varargs")) { - is_varargs = 1; - continue; - } - var typ = try self.get_llvm_type(param); - if (param.TYPE == .FUNCTION_TYPE) { - typ = llvm.LLVMPointerType(typ, 0); - } - try paramtypes.append(typ); - } - const function_type = llvm.LLVMFunctionType(return_type, paramtypes.items.ptr, @intCast(paramtypes.items.len), is_varargs) orelse unreachable; - return function_type; - }, - .POINTER_TYPE => |t| { - const inner_type = try self.get_llvm_type(t.type); - return llvm.LLVMPointerType(inner_type, 0); - }, - .STRUCT_TYPE => |t| { - var llvm_types = std.ArrayList(llvm.LLVMTypeRef).init(self.arena); - - for (t.fields) |field| { - try llvm_types.append(try self.get_llvm_type(field.PRIMARY_EXPRESSION.IDENTIFIER.type.?)); - } - - return llvm.LLVMStructType(llvm_types.items.ptr, @intCast(llvm_types.items.len), 0); - }, - } - } - - fn compare_types(self: *CodeGen, a: *parser.Node, b: *parser.Node, is_dereference: bool) bool { - std.debug.assert(a.* == parser.Node.TYPE); - std.debug.assert(b.* == parser.Node.TYPE); - - var a_type = a.TYPE; - const b_type = b.TYPE; - - if (a_type == .SIMPLE_TYPE and std.mem.eql(u8, "varargs", a_type.SIMPLE_TYPE.name)) { - return true; - } - - if (is_dereference) { - a_type = a_type.POINTER_TYPE.type.TYPE; - } - - if (!std.mem.eql(u8, @tagName(a_type), @tagName(b_type))) { - std.debug.print("Tagname mismatch: {any} vs {any}\n", .{ a_type, b_type }); - return false; - } - - switch (a_type) { - .SIMPLE_TYPE => |a_simple| { - const b_simple = b_type.SIMPLE_TYPE; - const res = std.mem.eql(u8, a_simple.name, b_simple.name); - if (!res) { - std.debug.print("Simple type name mismatch: '{s}' vs '{s}'\n", .{ a_simple.name, b_simple.name }); - } - return res; - }, - .FUNCTION_TYPE => |a_func| { - const b_func = b_type.FUNCTION_TYPE; - - if (!self.compare_types(a_func.return_type, b_func.return_type, false)) { - std.debug.print("Function return type mismatch\n", .{}); - return false; - } - - if (a_func.parameters.len != b_func.parameters.len) { - std.debug.print("Parameter count mismatch: {} vs {}\n", .{ a_func.parameters.len, b_func.parameters.len }); - return false; - } - - for (a_func.parameters, b_func.parameters) |a_param, b_param| { - if (!self.compare_types(a_param, b_param, false)) { - std.debug.print("Parameter type mismatch\n", .{}); - return false; - } - } - - return true; - }, - .POINTER_TYPE => |a_ptr| { - const b_ptr = b_type.POINTER_TYPE; - - const res = self.compare_types(a_ptr.type, b_ptr.type, false); - if (!res) { - std.debug.print("Pointer base type mismatch\n", .{}); - } - return res; - }, - .STRUCT_TYPE => |a_struct| { - const b_struct = b_type.STRUCT_TYPE; - - if (a_struct.fields.len != b_struct.fields.len) return false; - - for (0.., a_struct.fields) |i, f| { - if (!self.compare_types(f, b_struct.fields[i], false)) { - return false; - } - } - return true; - }, - } - } - - fn create_variable(self: *CodeGen, variable_value: Variable) !*Variable { - const variable = try self.arena.create(Variable); - variable.* = variable_value; - return variable; - } - - fn create_node(self: *CodeGen, node_value: parser.Node) !*parser.Node { - const node = try self.arena.create(parser.Node); - node.* = node_value; - return node; - } -}; - -const Variable = struct { - value: llvm.LLVMValueRef, - type: llvm.LLVMTypeRef, - node: *parser.Node, - node_type: *parser.Node, - stack_level: ?usize, -}; - -const Scope = struct { - variables: std.StringHashMap(*Variable), -}; - -const Environment = struct { - scope_stack: std.ArrayList(*Scope), - - arena: std.mem.Allocator, - - fn init(arena_allocator: std.mem.Allocator) !*Environment { - const self = try arena_allocator.create(Environment); - - self.* = .{ - .scope_stack = std.ArrayList(*Scope).init(arena_allocator), - .arena = arena_allocator, - }; - - // Create global scope - try self.create_scope(); - - return self; - } - - fn create_scope(self: *Environment) !void { - const scope = try self.arena.create(Scope); - scope.* = .{ - .variables = std.StringHashMap(*Variable).init(self.arena), - }; - try self.scope_stack.append(scope); - } - - fn drop_scope(self: *Environment) void { - _ = self.scope_stack.pop(); - } - - fn add_variable(self: *Environment, name: []const u8, variable: *Variable) !void { - // TODO: Dont allow shadowing if value != value or type != type (across things) - try self.scope_stack.getLast().variables.put(name, variable); - } - - fn set_variable(self: *Environment, name: []const u8, variable: *Variable) !void { - self.get_variable(name).?.* = variable.*; - } - - fn get_variable(self: *Environment, name: []const u8) ?*Variable { - var i = self.scope_stack.items.len; - var variable: ?*Variable = null; - while (i > 0) { - i -= 1; - const scope = self.scope_stack.items[i]; - if (scope.variables.get(name)) |v| { - if (variable == null) { - variable = v; - } - variable.?.stack_level = i; - } - } - return variable; - } -}; diff --git a/src/llvm.pry b/src/llvm.pry new file mode 100644 index 0000000..2feb815 --- /dev/null +++ b/src/llvm.pry @@ -0,0 +1,353 @@ +extern LLVMInitializeAArch64TargetInfo = () => void; +extern LLVMInitializeAMDGPUTargetInfo = () => void; +extern LLVMInitializeARMTargetInfo = () => void; +extern LLVMInitializeAVRTargetInfo = () => void; +extern LLVMInitializeBPFTargetInfo = () => void; +extern LLVMInitializeHexagonTargetInfo = () => void; +extern LLVMInitializeLanaiTargetInfo = () => void; +extern LLVMInitializeLoongArchTargetInfo = () => void; +extern LLVMInitializeMipsTargetInfo = () => void; +extern LLVMInitializeMSP430TargetInfo = () => void; +extern LLVMInitializeNVPTXTargetInfo = () => void; +extern LLVMInitializePowerPCTargetInfo = () => void; +extern LLVMInitializeRISCVTargetInfo = () => void; +extern LLVMInitializeSparcTargetInfo = () => void; +extern LLVMInitializeSystemZTargetInfo = () => void; +extern LLVMInitializeVETargetInfo = () => void; +extern LLVMInitializeWebAssemblyTargetInfo = () => void; +extern LLVMInitializeX86TargetInfo = () => void; +extern LLVMInitializeXCoreTargetInfo = () => void; + +let LLVMInitializeAllTargetInfos = () => void { + LLVMInitializeAArch64TargetInfo(); + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeARMTargetInfo(); + LLVMInitializeAVRTargetInfo(); + LLVMInitializeBPFTargetInfo(); + LLVMInitializeHexagonTargetInfo(); + LLVMInitializeLanaiTargetInfo(); + LLVMInitializeLoongArchTargetInfo(); + LLVMInitializeMipsTargetInfo(); + LLVMInitializeMSP430TargetInfo(); + LLVMInitializeNVPTXTargetInfo(); + LLVMInitializePowerPCTargetInfo(); + LLVMInitializeRISCVTargetInfo(); + LLVMInitializeSparcTargetInfo(); + LLVMInitializeSystemZTargetInfo(); + LLVMInitializeVETargetInfo(); + LLVMInitializeWebAssemblyTargetInfo(); + LLVMInitializeX86TargetInfo(); + LLVMInitializeXCoreTargetInfo(); + return; +}; + +extern LLVMInitializeAArch64Target = () => void; +extern LLVMInitializeAMDGPUTarget = () => void; +extern LLVMInitializeARMTarget = () => void; +extern LLVMInitializeAVRTarget = () => void; +extern LLVMInitializeBPFTarget = () => void; +extern LLVMInitializeHexagonTarget = () => void; +extern LLVMInitializeLanaiTarget = () => void; +extern LLVMInitializeLoongArchTarget = () => void; +extern LLVMInitializeMipsTarget = () => void; +extern LLVMInitializeMSP430Target = () => void; +extern LLVMInitializeNVPTXTarget = () => void; +extern LLVMInitializePowerPCTarget = () => void; +extern LLVMInitializeRISCVTarget = () => void; +extern LLVMInitializeSparcTarget = () => void; +extern LLVMInitializeSystemZTarget = () => void; +extern LLVMInitializeVETarget = () => void; +extern LLVMInitializeWebAssemblyTarget = () => void; +extern LLVMInitializeX86Target = () => void; +extern LLVMInitializeXCoreTarget = () => void; + +let LLVMInitializeAllTargets = () => void { + LLVMInitializeAArch64Target(); + LLVMInitializeAMDGPUTarget(); + LLVMInitializeARMTarget(); + LLVMInitializeAVRTarget(); + LLVMInitializeBPFTarget(); + LLVMInitializeHexagonTarget(); + LLVMInitializeLanaiTarget(); + LLVMInitializeLoongArchTarget(); + LLVMInitializeMipsTarget(); + LLVMInitializeMSP430Target(); + LLVMInitializeNVPTXTarget(); + LLVMInitializePowerPCTarget(); + LLVMInitializeRISCVTarget(); + LLVMInitializeSparcTarget(); + LLVMInitializeSystemZTarget(); + LLVMInitializeVETarget(); + LLVMInitializeWebAssemblyTarget(); + LLVMInitializeX86Target(); + LLVMInitializeXCoreTarget(); + return; +}; + +extern LLVMInitializeAArch64TargetMC = () => void; +extern LLVMInitializeAMDGPUTargetMC = () => void; +extern LLVMInitializeARMTargetMC = () => void; +extern LLVMInitializeAVRTargetMC = () => void; +extern LLVMInitializeBPFTargetMC = () => void; +extern LLVMInitializeHexagonTargetMC = () => void; +extern LLVMInitializeLanaiTargetMC = () => void; +extern LLVMInitializeLoongArchTargetMC = () => void; +extern LLVMInitializeMipsTargetMC = () => void; +extern LLVMInitializeMSP430TargetMC = () => void; +extern LLVMInitializeNVPTXTargetMC = () => void; +extern LLVMInitializePowerPCTargetMC = () => void; +extern LLVMInitializeRISCVTargetMC = () => void; +extern LLVMInitializeSparcTargetMC = () => void; +extern LLVMInitializeSystemZTargetMC = () => void; +extern LLVMInitializeVETargetMC = () => void; +extern LLVMInitializeWebAssemblyTargetMC = () => void; +extern LLVMInitializeX86TargetMC = () => void; +extern LLVMInitializeXCoreTargetMC = () => void; + +let LLVMInitializeAllTargetMCs = () => void { + LLVMInitializeAArch64TargetMC(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeARMTargetMC(); + LLVMInitializeAVRTargetMC(); + LLVMInitializeBPFTargetMC(); + LLVMInitializeHexagonTargetMC(); + LLVMInitializeLanaiTargetMC(); + LLVMInitializeLoongArchTargetMC(); + LLVMInitializeMipsTargetMC(); + LLVMInitializeMSP430TargetMC(); + LLVMInitializeNVPTXTargetMC(); + LLVMInitializePowerPCTargetMC(); + LLVMInitializeRISCVTargetMC(); + LLVMInitializeSparcTargetMC(); + LLVMInitializeSystemZTargetMC(); + LLVMInitializeVETargetMC(); + LLVMInitializeWebAssemblyTargetMC(); + LLVMInitializeX86TargetMC(); + LLVMInitializeXCoreTargetMC(); + return; +}; + +extern LLVMInitializeAArch64AsmPrinter = () => void; +extern LLVMInitializeAMDGPUAsmPrinter = () => void; +extern LLVMInitializeARMAsmPrinter = () => void; +extern LLVMInitializeAVRAsmPrinter = () => void; +extern LLVMInitializeBPFAsmPrinter = () => void; +extern LLVMInitializeHexagonAsmPrinter = () => void; +extern LLVMInitializeLanaiAsmPrinter = () => void; +extern LLVMInitializeLoongArchAsmPrinter = () => void; +extern LLVMInitializeMipsAsmPrinter = () => void; +extern LLVMInitializeMSP430AsmPrinter = () => void; +extern LLVMInitializeNVPTXAsmPrinter = () => void; +extern LLVMInitializePowerPCAsmPrinter = () => void; +extern LLVMInitializeRISCVAsmPrinter = () => void; +extern LLVMInitializeSparcAsmPrinter = () => void; +extern LLVMInitializeSystemZAsmPrinter = () => void; +extern LLVMInitializeVEAsmPrinter = () => void; +extern LLVMInitializeWebAssemblyAsmPrinter = () => void; +extern LLVMInitializeX86AsmPrinter = () => void; +extern LLVMInitializeXCoreAsmPrinter = () => void; + +let LLVMInitializeAllAsmPrinters = () => void { + LLVMInitializeAArch64AsmPrinter(); + LLVMInitializeAMDGPUAsmPrinter(); + LLVMInitializeARMAsmPrinter(); + LLVMInitializeAVRAsmPrinter(); + LLVMInitializeBPFAsmPrinter(); + LLVMInitializeHexagonAsmPrinter(); + LLVMInitializeLanaiAsmPrinter(); + LLVMInitializeLoongArchAsmPrinter(); + LLVMInitializeMipsAsmPrinter(); + LLVMInitializeMSP430AsmPrinter(); + LLVMInitializeNVPTXAsmPrinter(); + LLVMInitializePowerPCAsmPrinter(); + LLVMInitializeRISCVAsmPrinter(); + LLVMInitializeSparcAsmPrinter(); + LLVMInitializeSystemZAsmPrinter(); + LLVMInitializeVEAsmPrinter(); + LLVMInitializeWebAssemblyAsmPrinter(); + LLVMInitializeX86AsmPrinter(); + LLVMInitializeXCoreAsmPrinter(); + return; +}; + +extern LLVMInitializeAArch64AsmParser = () => void; +extern LLVMInitializeAMDGPUAsmParser = () => void; +extern LLVMInitializeARMAsmParser = () => void; +extern LLVMInitializeAVRAsmParser = () => void; +extern LLVMInitializeBPFAsmParser = () => void; +extern LLVMInitializeHexagonAsmParser = () => void; +extern LLVMInitializeLanaiAsmParser = () => void; +extern LLVMInitializeLoongArchAsmParser = () => void; +extern LLVMInitializeMipsAsmParser = () => void; +extern LLVMInitializeMSP430AsmParser = () => void; +extern LLVMInitializePowerPCAsmParser = () => void; +extern LLVMInitializeRISCVAsmParser = () => void; +extern LLVMInitializeSparcAsmParser = () => void; +extern LLVMInitializeSystemZAsmParser = () => void; +extern LLVMInitializeVEAsmParser = () => void; +extern LLVMInitializeWebAssemblyAsmParser = () => void; +extern LLVMInitializeX86AsmParser = () => void; + +let LLVMInitializeAllAsmParsers = () => void { + LLVMInitializeAArch64AsmParser(); + LLVMInitializeAMDGPUAsmParser(); + LLVMInitializeARMAsmParser(); + LLVMInitializeAVRAsmParser(); + LLVMInitializeBPFAsmParser(); + LLVMInitializeHexagonAsmParser(); + LLVMInitializeLanaiAsmParser(); + LLVMInitializeLoongArchAsmParser(); + LLVMInitializeMipsAsmParser(); + LLVMInitializeMSP430AsmParser(); + LLVMInitializePowerPCAsmParser(); + LLVMInitializeRISCVAsmParser(); + LLVMInitializeSparcAsmParser(); + LLVMInitializeSystemZAsmParser(); + LLVMInitializeVEAsmParser(); + LLVMInitializeWebAssemblyAsmParser(); + LLVMInitializeX86AsmParser(); + return; +}; + +extern LLVMInitializeAArch64Disassembler = () => void; +extern LLVMInitializeAMDGPUDisassembler = () => void; +extern LLVMInitializeARMDisassembler = () => void; +extern LLVMInitializeAVRDisassembler = () => void; +extern LLVMInitializeBPFDisassembler = () => void; +extern LLVMInitializeHexagonDisassembler = () => void; +extern LLVMInitializeLanaiDisassembler = () => void; +extern LLVMInitializeLoongArchDisassembler = () => void; +extern LLVMInitializeMipsDisassembler = () => void; +extern LLVMInitializeMSP430Disassembler = () => void; +extern LLVMInitializePowerPCDisassembler = () => void; +extern LLVMInitializeRISCVDisassembler = () => void; +extern LLVMInitializeSparcDisassembler = () => void; +extern LLVMInitializeSystemZDisassembler = () => void; +extern LLVMInitializeVEDisassembler = () => void; +extern LLVMInitializeWebAssemblyDisassembler = () => void; +extern LLVMInitializeX86Disassembler = () => void; +extern LLVMInitializeXCoreDisassembler = () => void; + +let LLVMInitializeAllDisassemblers = () => void { + LLVMInitializeAArch64Disassembler(); + LLVMInitializeAMDGPUDisassembler(); + LLVMInitializeARMDisassembler(); + LLVMInitializeAVRDisassembler(); + LLVMInitializeBPFDisassembler(); + LLVMInitializeHexagonDisassembler(); + LLVMInitializeLanaiDisassembler(); + LLVMInitializeLoongArchDisassembler(); + LLVMInitializeMipsDisassembler(); + LLVMInitializeMSP430Disassembler(); + LLVMInitializePowerPCDisassembler(); + LLVMInitializeRISCVDisassembler(); + LLVMInitializeSparcDisassembler(); + LLVMInitializeSystemZDisassembler(); + LLVMInitializeVEDisassembler(); + LLVMInitializeWebAssemblyDisassembler(); + LLVMInitializeX86Disassembler(); + LLVMInitializeXCoreDisassembler(); + return; +}; + +let LLVMBuilderRef = newtype *void; +let LLVMModuleRef = newtype *void; +let LLVMTargetDataRef = newtype *void; +let LLVMTargetMachineRef = newtype *void; +let LLVMContextRef = newtype *void; +let LLVMTargetRef = newtype *void; +let LLVMIntPredicate = newtype i64; + +let LLVMValueRef = newtype *void; +let LLVMValueKind = newtype i64; +let LLVMTypeRef = newtype *void; +let LLVMBasicBlockRef = newtype *void; + +extern LLVMGetModuleDataLayout = (LLVMModuleRef) => LLVMTargetDataRef; +extern LLVMConstInt = (LLVMTypeRef, i64, i64) => LLVMValueRef; +extern LLVMConstNull = (LLVMTypeRef) => LLVMValueRef; +extern LLVMInt64Type = () => LLVMTypeRef; +extern LLVMInt32Type = () => LLVMTypeRef; +extern LLVMInt1Type = () => LLVMTypeRef; +extern LLVMInt8Type = () => LLVMTypeRef; +extern LLVMVoidType = () => LLVMTypeRef; + +extern LLVMModuleCreateWithName = (*i8) => LLVMModuleRef; +extern LLVMGetGlobalContext = () => LLVMContextRef; +extern LLVMCreateBuilder = () => LLVMBuilderRef; +extern LLVMDisposeModule = (LLVMModuleRef) => void; +extern LLVMShutdown = () => void; +extern LLVMDisposeBuilder = (LLVMBuilderRef) => void; + +extern LLVMGetInsertBlock = (LLVMBuilderRef) => LLVMBasicBlockRef; + +extern LLVMDumpModule = (LLVMModuleRef) => void; +extern LLVMPrintModuleToFile = (LLVMModuleRef, *i8, **i8) => i64; +extern LLVMGetDefaultTargetTriple = () => *i8; +extern LLVMGetTargetFromTriple = (*i8, *LLVMTargetRef, **i8) => i64; +extern LLVMDisposeMessage = (*i8) => void; +extern LLVMCreateTargetMachine = (LLVMTargetRef, *i8, *i8, *i8, i64, i64, i64) => LLVMTargetMachineRef; +extern LLVMDisposeTargetMachine = (LLVMTargetMachineRef) => void; + +let LLVMCodeGenLevelDefault = 2; +let LLVMRelocDefault = 0; +let LLVMCodeModelDefault = 0; + +extern LLVMVerifyModule = (LLVMModuleRef, i64, **i8) => i64; + +let LLVMAbortProcessAction = 0; + +extern LLVMTargetMachineEmitToFile = (LLVMTargetMachineRef, LLVMModuleRef, *i8, i64, **i8) => i64; + +let LLVMObjectFile = 1; + +extern LLVMFunctionType = (LLVMTypeRef, *LLVMTypeRef, i64, i64) => LLVMTypeRef; +extern LLVMAddFunction = (LLVMModuleRef, *i8, LLVMTypeRef) => LLVMValueRef; +extern LLVMAppendBasicBlock = (LLVMValueRef, *i8) => LLVMBasicBlockRef; +extern LLVMPositionBuilderAtEnd = (LLVMBuilderRef, LLVMBasicBlockRef) => void; + +extern LLVMGetParams = (LLVMValueRef, *LLVMValueRef) => void; + +extern LLVMBuildRetVoid = (LLVMBuilderRef) => void; +extern LLVMBuildRet = (LLVMBuilderRef, LLVMValueRef) => void; +extern LLVMPointerType = (LLVMTypeRef, i64) => LLVMTypeRef; + +extern LLVMBuildCall2 = (LLVMBuilderRef, LLVMTypeRef, LLVMValueRef, *LLVMValueRef, i64, *i8) => LLVMValueRef; +extern LLVMBuildGlobalStringPtr = (LLVMBuilderRef, *i8, *i8) => LLVMValueRef; +extern LLVMBuildAlloca = (LLVMBuilderRef, LLVMTypeRef, *i8) => LLVMValueRef; +extern LLVMBuildStore = (LLVMBuilderRef, LLVMValueRef, LLVMValueRef) => LLVMValueRef; +extern LLVMBuildLoad2 = (LLVMBuilderRef, LLVMTypeRef, LLVMValueRef, *i8) => LLVMValueRef; +extern LLVMGetLastInstruction = (LLVMBasicBlockRef) => LLVMValueRef; + +extern LLVMBuildBr = (LLVMBuilderRef, LLVMBasicBlockRef) => LLVMValueRef; +extern LLVMIsATerminatorInst = (LLVMValueRef) => LLVMValueRef; +extern LLVMBuildCondBr = (LLVMBuilderRef, LLVMValueRef, LLVMBasicBlockRef, LLVMBasicBlockRef) => LLVMValueRef; +extern LLVMBuildICmp = (LLVMBuilderRef, LLVMIntPredicate, LLVMValueRef, LLVMValueRef, *i8) => LLVMValueRef; +extern LLVMBuildNeg = (LLVMBuilderRef, LLVMValueRef, *i8) => LLVMValueRef; +extern LLVMBuildSub = (LLVMBuilderRef, LLVMValueRef, LLVMValueRef, *i8) => LLVMValueRef; +extern LLVMBuildAdd = (LLVMBuilderRef, LLVMValueRef, LLVMValueRef, *i8) => LLVMValueRef; +extern LLVMBuildMul = (LLVMBuilderRef, LLVMValueRef, LLVMValueRef, *i8) => LLVMValueRef; +extern LLVMBuildSDiv = (LLVMBuilderRef, LLVMValueRef, LLVMValueRef, *i8) => LLVMValueRef; +extern LLVMBuildSRem = (LLVMBuilderRef, LLVMValueRef, LLVMValueRef, *i8) => LLVMValueRef; + +extern LLVMBuildGEP2 = (LLVMBuilderRef, LLVMTypeRef, LLVMValueRef, *LLVMValueRef, i64, *i8) => LLVMValueRef; +extern LLVMAddGlobal = (LLVMModuleRef, LLVMTypeRef, *i8) => LLVMValueRef; +extern LLVMSetInitializer = (LLVMValueRef, LLVMValueRef) => void; +extern LLVMGetValueKind = (LLVMValueRef) => LLVMValueKind; +let LLVMFunctionValueKind = cast(LLVMValueKind, 5); + +let LLVMIntEQ = 32; +let LLVMIntNE = 33; +let LLVMIntUGT = 34; +let LLVMIntUGE = 35; +let LLVMIntULT = 36; +let LLVMIntULE = 37; +let LLVMIntSGT = 38; +let LLVMIntSGE = 39; +let LLVMIntSLT = 40; +let LLVMIntSLE = 41; + +extern LLVMStoreSizeOfType = (LLVMTargetDataRef, LLVMTypeRef) => i64; +extern LLVMStructCreateNamed = (LLVMContextRef, *i8) => LLVMTypeRef; +extern LLVMStructSetBody = (LLVMTypeRef, *LLVMTypeRef, i64, i64) => void; + diff --git a/src/main.pry b/src/main.pry new file mode 100644 index 0000000..a564965 --- /dev/null +++ b/src/main.pry @@ -0,0 +1,80 @@ +import "!stdlib.pry"; +import "!mem.pry"; + +let slice = struct { + data: *void, + data_len: i64, +}; + +import "tokenizer.pry"; +import "parser.pry"; +import "codegen.pry"; + +let read_file = (filename: *i8, alloc: *arena) => slice { + let file = fopen(filename, "r"); + + fseek(file, 0, 2); + let file_size = ftell(file); + fseek(file, 0, 0); + + let buf = cast(*i8, arena_alloc(alloc, file_size + 1)); + + let bytes_read = fread(buf, 1, file_size, file); + (*(buf + cast(*i8, bytes_read))) = '\0'; + + fclose(file); + + let sl = slice{}; + sl.data = cast(*void, buf); + sl.data_len = file_size; + return sl; +}; + +let main = (argc: i64, argv: **i8) => i64 { + if argc < 2 { + printf("Need filename!\n"); + return 1; + }; + + let generate_ir = false; + let filename = cast(*i8, null); + + let i = 0; + while i < (argc - 1) { + i = i + 1; + let arg = *(argv + cast(**i8, i)); + + if strcmp(arg, "--generate-ir") { + generate_ir = true; + continue; + }; + + if filename == cast(*i8, null) { + filename = arg; + continue; + }; + + assert(false); + }; + + printf("%s\n", filename); + + let alloc = arena_init(1024 * 1024 * 1024); + + let file = read_file(filename, alloc); + + let t = tokenizer_init(alloc, file); + let ts = tokenizer_tokenize(t); + + let p = parser_init(cast(*token, ts.data), ts.data_len, alloc, filename); + let ast = parse(p); + + let c = codegen_init(alloc); + let res = codegen_generate(c, ast); + let res = codegen_compile(c, generate_ir); + codegen_deinit(c); + + arena_free(alloc); + + return 0; +}; diff --git a/src/main.zig b/src/main.zig deleted file mode 100644 index 79def61..0000000 --- a/src/main.zig +++ /dev/null @@ -1,49 +0,0 @@ -const std = @import("std"); -const tokenizer = @import("tokenizer.zig"); -const parser = @import("parser.zig"); -const codegen = @import("codegen.zig"); - -pub fn main() !void { - const pathLen = std.mem.len(std.os.argv[1]); - const path = std.os.argv[1][0..pathLen]; - - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - const allocator = gpa.allocator(); - defer { - const deinit_status = gpa.deinit(); - if (deinit_status == .leak) @panic("Memory leak detected!"); - } - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - std.debug.print("Tokenizing! {s}\n", .{path}); - const file = try std.fs.cwd().openFile(path, .{}); - const buf = try file.readToEndAlloc(allocator, 1 * 1024 * 1024); - defer allocator.free(buf); - const source_codegen = try codegen.CodeGen.init(arena.allocator()); - defer source_codegen.deinit(); - try process_buf( - buf, - arena.allocator(), - source_codegen, - path, - ); - source_codegen.compile(); -} - -fn process_buf(buf: []u8, arena: std.mem.Allocator, source_codegen: ?*codegen.CodeGen, filename: []const u8) !void { - std.debug.print("Buf:\n{s}\n", .{buf}); - - var source_tokenizer = try tokenizer.Tokenizer.init(buf, arena); - const token_list = try source_tokenizer.tokenize(); - const source_parser = try parser.Parser.init(token_list, arena, filename); - const ast = try source_parser.parse(); - std.debug.print("AST: {any}\n", .{ast}); - - try source_codegen.?.generate(ast); -} - -test { - std.testing.refAllDecls(@This()); -} diff --git a/src/parser.pry b/src/parser.pry new file mode 100644 index 0000000..0b448d0 --- /dev/null +++ b/src/parser.pry @@ -0,0 +1,1456 @@ +import "tokenizer.pry"; + +extern fopen = (*i8, *i8) => *i8; +extern fgets = (*i8, i64, *i8) => void; +extern feof = (*i8) => bool; +extern fseek = (*i8, i64, i64) => i64; +extern lseek = (i64, i64, i64) => i64; +extern ftell = (*i8) => i64; +extern fread = (*i8, i64, i64, *i8) => i64; +extern fclose = (*i8) => *i8; + +extern strcpy = (*i8, *i8) => *i8; +extern dirname = (*i8) => *i8; +extern open = (*i8, i64) => i64; +extern openat = (i64, *i8, i64) => i64; +extern read = (i64, *i8, i64) => i64; +extern realpath = (*i8, *i8) => *i8; +extern snprintf = (*i8, i64, *i8, *i8, *i8) => i64; +extern strcpy = (*i8, *i8) => *i8; +extern strlen = (*i8) => i64; + +let Node = struct { + type: i64, + data: *void, +}; + +let NODE_PROGRAM = 1; +let NODE_STATEMENT = 2; +let NODE_ASSIGNMENT_STATEMENT = 3; +let NODE_IMPORT_DECLARATION = 4; +let NODE_FUNCTION_CALL_STATEMENT = 5; +let NODE_IF_STATEMENT = 6; +let NODE_WHILE_STATEMENT = 7; +let NODE_EQUALITY_EXPRESSION = 8; +let NODE_ADDITIVE_EXPRESSION = 9; +let NODE_MULTIPLICATIVE_EXPRESSION = 10; +let NODE_UNARY_EXPRESSION = 11; +let NODE_POSTFIX_EXPRESSION = 12; +let NODE_PRIMARY_EXPRESSION_NUMBER = 13; +let NODE_PRIMARY_EXPRESSION_BOOLEAN = 14; +let NODE_PRIMARY_EXPRESSION_NULL = 15; +let NODE_PRIMARY_EXPRESSION_CHAR = 16; +let NODE_PRIMARY_EXPRESSION_STRING = 17; +let NODE_PRIMARY_EXPRESSION_IDENTIFIER = 18; +let NODE_FUNCTION_DEFINITION = 19; +let NODE_STRUCT_INSTANCIATION = 20; +let NODE_FIELD_ACCESS = 21; +let NODE_TYPE_SIMPLE_TYPE = 22; +let NODE_TYPE_FUNCTION_TYPE = 23; +let NODE_TYPE_POINTER_TYPE = 24; +let NODE_TYPE_STRUCT_TYPE = 25; +let NODE_RETURN_STATEMENT = 26; +let NODE_CAST_STATEMENT = 27; +let NODE_SIZEOF_STATEMENT = 28; +let NODE_BREAK_STATEMENT = 29; +let NODE_CONTINUE_STATEMENT = 30; + +let EQUALITY_EXPRESSION_TYPE_EQ = 0; +let EQUALITY_EXPRESSION_TYPE_NE = 1; +let EQUALITY_EXPRESSION_TYPE_GE = 2; +let EQUALITY_EXPRESSION_TYPE_LE = 3; +let EQUALITY_EXPRESSION_TYPE_LT = 4; +let EQUALITY_EXPRESSION_TYPE_GT = 5; + +let MULTIPLICATIVE_EXPRESSION_TYPE_MUL = 0; +let MULTIPLICATIVE_EXPRESSION_TYPE_DIV = 1; +let MULTIPLICATIVE_EXPRESSION_TYPE_MOD = 2; + +let UNARY_EXPRESSION_TYPE_NOT = 0; +let UNARY_EXPRESSION_TYPE_MINUS = 1; +let UNARY_EXPRESSION_TYPE_STAR = 2; + +let NODE_PROGRAM_DATA = struct { + statements: **Node, + statements_len: i64, +}; + +let NODE_STATEMENT_DATA = struct { + statement: *Node, +}; + +let NODE_ASSIGNMENT_STATEMENT_DATA = struct { + is_declaration: bool, + is_dereference: bool, + lhs: *Node, + rhs: *Node, +}; + +let NODE_IMPORT_DECLARATION_DATA = struct { + filename: *i8, + program: *Node, +}; + +let NODE_FUNCTION_CALL_STATEMENT_DATA = struct { + expression: *Node, + arguments: **Node, + arguments_len: i64, +}; + +let NODE_IF_STATEMENT_DATA = struct { + condition: *Node, + statements: **Node, + statements_len: i64, +}; + +let NODE_WHILE_STATEMENT_DATA = struct { + condition: *Node, + statements: **Node, + statements_len: i64, +}; + +let NODE_EQUALITY_EXPRESSION_DATA = struct { + lhs: *Node, + rhs: *Node, + typ: i64, +}; + +let NODE_ADDITIVE_EXPRESSION_DATA = struct { + addition: bool, + lhs: *Node, + rhs: *Node, +}; + +let NODE_MULTIPLICATIVE_EXPRESSION_DATA = struct { + lhs: *Node, + rhs: *Node, + typ: i64, +}; + +let NODE_UNARY_EXPRESSION_DATA = struct { + typ: i64, + expression: *Node, +}; + +let NODE_POSTFIX_EXPRESSION_DATA = struct { + lhs: *Node, + rhs: *Node, +}; + +let NODE_PRIMARY_EXPRESSION_NUMBER_DATA = struct { + value: i64, +}; + +let NODE_PRIMARY_EXPRESSION_BOOLEAN_DATA = struct { + value: bool, +}; + +let NODE_PRIMARY_EXPRESSION_CHAR_DATA = struct { + value: i8, +}; + +let NODE_PRIMARY_EXPRESSION_STRING_DATA = struct { + value: *i8, +}; + +let NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA = struct { + name: *i8, + type: *Node, +}; + +let NODE_FUNCTION_DEFINITION_DATA = struct { + statements: **Node, + statements_len: i64, + parameters: **Node, + parameters_len: i64, + retur_type: *Node, +}; + +let NODE_STRUCT_INSTANCIATION_DATA = struct { + typ: *i8, +}; + +let NODE_FIELD_ACCESS_DATA = struct { + expression: *Node, + name: *i8, +}; + +let NODE_TYPE_SIMPLE_TYPE_DATA = struct { + name: *i8, + underlying_type: *Node, +}; + +let NODE_TYPE_FUNCTION_TYPE_DATA = struct { + parameters: **Node, + parameters_len: i64, + retur_type: *Node, +}; + +let NODE_TYPE_POINTER_TYPE_DATA = struct { + type: *Node, +}; + +let NODE_TYPE_STRUCT_TYPE_DATA = struct { + fields: **Node, + fields_len: i64, +}; + +let NODE_RETURN_STATEMENT_DATA = struct { + expression: *Node, +}; + +let NODE_CAST_STATEMENT_DATA = struct { + typ: *Node, + expression: *Node, +}; + +let NODE_SIZEOF_STATEMENT_DATA = struct { + typ: *Node, +}; + + +let parser = struct { + tokens: *token, + tokens_len: i64, + + offset: i64, + + arena: *arena, + filename: *i8, +}; + +extern parser_parse_statement = (*parser) => *Node; +extern parser_parse_expression = (*parser) => *Node; +extern parse_function_call_statement = (*parser) => *Node; +extern parser_parse_additive_expression = (*parser) => *Node; + +let parser_init = (ts: *token, ts_len: i64, ar: *arena, filename: *i8) => *parser { + let p = cast(*parser, arena_alloc(ar, sizeof(parser))); + + (*p).tokens = ts; + (*p).tokens_len = ts_len; + (*p).offset = 0; + (*p).arena = ar; + (*p).filename = filename; + + return p; +}; + +let create_node = (p: *parser, n: Node) => *Node { + let res = cast(*Node, arena_alloc((*p).arena, sizeof(Node))); + *res = n; + return res; +}; + +let parser_peek_token = (p: *parser) => *token { + if (*p).offset >= (*p).tokens_len { + return cast(*token, null); + }; + + return ((*p).tokens + cast(*token, (*p).offset)); +}; + + let parser_consume_token = (p: *parser) => *token { + if (*p).offset >= (*p).tokens_len { + return cast(*token, null); + }; + + let t = ((*p).tokens + cast(*token, (*p).offset)); + (*p).offset = (*p).offset + 1; + return t; +}; + +let parser_accept_token = (p: *parser, t: i64) => *token { + let curr_token = parser_peek_token(p); + if curr_token == cast(*token, null) { + return cast(*token, null); + }; + + if (*curr_token).type == t { + return parser_consume_token(p); + }; + return cast(*token, null); +}; + +let parser_accept_parse = (p: *parser, parsing_func: (*parser) => *Node) => *Node { + let prev_offset = (*p).offset; + let node = parsing_func(p); + if node == cast(*Node, null) { + (*p).offset = prev_offset; + }; + return node; +}; + +/* ReturnStatement ::= RETURN (Expression)? */ +let parser_parse_return_statement = (p: *parser) => *Node { + if parser_accept_token(p, TOKEN_RETURN) == cast(*token, null) { + return cast(*Node, null); + }; + + let maybe_expr = parser_accept_parse(p, parser_parse_expression); + + let d = cast(*NODE_RETURN_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_RETURN_STATEMENT_DATA ))); + (*d).expression = maybe_expr; + + let r = Node{}; + r.type = NODE_RETURN_STATEMENT; + r.data = cast(*void, d); + + return create_node(p, r); +}; + +extern parser_parse_type = (*parser) => *Node; + +/* CastStatement ::= "cast" LPAREN TYPE "," Expression RPAREN */ +let parser_parse_cast_statement = (p: *parser) => *Node { + let ident = parser_accept_token(p, TOKEN_IDENTIFIER); + if ident == cast(*token, null) { + return cast(*Node, null); + }; + + if !strcmp(cast(*i8, (*ident).data), "cast") { + return cast(*Node, null); + }; + + if parser_accept_token(p, TOKEN_LPAREN) == cast(*token, null) { + return cast(*Node, null); + }; + + let typ = parser_parse_type(p); + if typ == cast(*Node, null) { + return cast(*Node, null); + }; + + if parser_accept_token(p, TOKEN_COMMA) == cast(*token, null) { + return cast(*Node, null); + }; + + let expression = parser_parse_expression(p); + if expression == cast(*Node, null) { + return cast(*Node, null); + }; + + if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) { + return cast(*Node, null); + }; + + let d = cast(*NODE_CAST_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_CAST_STATEMENT_DATA ))); + (*d).typ = typ; + (*d).expression = expression; + + let r = Node{}; + r.type = NODE_CAST_STATEMENT; + r.data = cast(*void, d); + + return create_node(p, r); +}; + +/* SizeOfStatement ::= "sizeof" LPAREN TYPE RPAREN */ +let parser_parse_sizeof_statement = (p: *parser) => *Node { + let ident = parser_accept_token(p, TOKEN_IDENTIFIER); + if ident == cast(*token, null) { + return cast(*Node, null); + }; + + if !strcmp(cast(*i8, (*ident).data), "sizeof") { + return cast(*Node, null); + }; + + if parser_accept_token(p, TOKEN_LPAREN) == cast(*token, null) { + return cast(*Node, null); + }; + + let typ = parser_parse_type(p); + if typ == cast(*Node, null) { + return cast(*Node, null); + }; + + if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) { + return cast(*Node, null); + }; + + let d = cast(*NODE_SIZEOF_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_SIZEOF_STATEMENT_DATA ))); + (*d).typ = typ; + + let r = Node{}; + r.type = NODE_SIZEOF_STATEMENT; + r.data = cast(*void, d); + + return create_node(p, r); +}; + +/* FunctionType ::= LPAREN (Type ("," Type)*)? RPAREN ARROW Type */ +let parser_parse_function_type = (p: *parser) => *Node { + if parser_accept_token(p, TOKEN_LPAREN) == cast(*token, null) { + return cast(*Node, null); + }; + + let parameters = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 20)); + let parameters_len = 0; + let first = true; + while true { + if !first { + parser_accept_token(p, TOKEN_COMMA); + }; + first = false; + let type_annotation = parser_accept_parse(p, parser_parse_type); + if type_annotation == cast(*Node, null) { + break; + }; + (*(parameters + cast(**Node, parameters_len))) = type_annotation; + parameters_len = parameters_len + 1; + + }; + + if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) { + return cast(*Node, null); + }; + if parser_accept_token(p, TOKEN_ARROW) == cast(*token, null) { + return cast(*Node, null); + }; + + let retur_typ = parser_parse_type(p); + if retur_typ == cast(*Node, null) { + return cast(*Node, null); + }; + + let d = cast(*NODE_TYPE_FUNCTION_TYPE_DATA, arena_alloc((*p).arena, sizeof(NODE_TYPE_FUNCTION_TYPE_DATA))); + (*d).parameters = parameters; + (*d).parameters_len = parameters_len; + (*d).retur_type = retur_typ; + + let r = Node{}; + r.type = NODE_TYPE_FUNCTION_TYPE; + r.data = cast(*void, d); + + return create_node(p, r); +}; + +/* Type ::= IDENTIFIER | FunctionType */ +let parser_parse_type = (p: *parser) => *Node { + let typ = parser_accept_parse(p, parser_parse_function_type); + if typ != cast(*Node, null) { + return typ; + }; + + let to = parser_consume_token(p); + assert(to != cast(*token, null)); + + /* TODO: we should only accept specific type identifiers */ + if (*to).type == TOKEN_IDENTIFIER { + let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*p).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*d).name = cast(*i8, (*to).data); + (*d).underlying_type = cast(*Node, null); + + let r = Node{}; + r.type = NODE_TYPE_SIMPLE_TYPE; + r.data = cast(*void, d); + + return create_node(p, r); + }; + + if (*to).type == TOKEN_MUL { + let d = cast(*NODE_TYPE_POINTER_TYPE_DATA, arena_alloc((*p).arena, sizeof(NODE_TYPE_POINTER_TYPE_DATA))); + (*d).type = parser_parse_type(p); + + let r = Node{}; + r.type = NODE_TYPE_POINTER_TYPE; + r.data = cast(*void, d); + + return create_node(p, r); + }; + + return cast(*Node, null); +}; + +/* IfStatement ::= "if" Expression LBRACE Statement* RBRACE */ +let parser_parse_if_statement = (p: *parser) => *Node { + if parser_accept_token(p, TOKEN_IF) == cast(*token, null) { + return cast(*Node, null); + }; + + let expression = parser_parse_expression(p); + if expression == cast(*Node, null) { + return cast(*Node, null); + }; + + if parser_accept_token(p, TOKEN_LBRACE) == cast(*token, null) { + return cast(*Node, null); + }; + + let statements = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 100)); + let i = 0; + while true { + let n = parser_accept_parse(p, parser_parse_statement); + if n == cast(*Node, null) { + break; + }; + (*(statements + cast(**Node, i))) = n; + i = i + 1; + }; + + if parser_accept_token(p, TOKEN_RBRACE) == cast(*token, null) { + return cast(*Node, null); + }; + + let dd = cast(*NODE_IF_STATEMENT_DATA, arena_alloc((*p).arena, sizeof(NODE_IF_STATEMENT_DATA))); + (*dd).condition = expression; + (*dd).statements = statements; + (*dd).statements_len = i; + + let r = Node{}; + r.type = NODE_IF_STATEMENT; + r.data = cast(*void, dd); + + return create_node(p, r); +}; + +/* WhileStatement ::= "while" Expression LBRACE Statement* RBRACE */ +let parser_parse_while_statement = (p: *parser) => *Node { + if parser_accept_token(p, TOKEN_WHILE) == cast(*token, null) { + return cast(*Node, null); + }; + + let expression = parser_parse_expression(p); + if expression == cast(*Node, null) { + return cast(*Node, null); + }; + + if parser_accept_token(p, TOKEN_LBRACE) == cast(*token, null) { + return cast(*Node, null); + }; + + let statements = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 100)); + let i = 0; + while true { + let n = parser_accept_parse(p, parser_parse_statement); + if n == cast(*Node, null) { + break; + }; + (*(statements + cast(**Node, i))) = n; + i = i + 1; + }; + + if parser_accept_token(p, TOKEN_RBRACE) == cast(*token, null) { + return cast(*Node, null); + }; + + let dd = cast(*NODE_WHILE_STATEMENT_DATA, arena_alloc((*p).arena, sizeof(NODE_WHILE_STATEMENT_DATA))); + (*dd).condition = expression; + (*dd).statements = statements; + (*dd).statements_len = i; + + let r = Node{}; + r.type = NODE_WHILE_STATEMENT; + r.data = cast(*void, dd); + + return create_node(p, r); +}; + +/* ExternDeclaration ::= "extern" IDENTIFIER EQUALS Type */ +let parser_parse_extern_declaration = (p: *parser) => *Node { + if parser_accept_token(p, TOKEN_EXTERN) == cast(*token, null) { + return cast(*Node, null); + }; + + let ident = parser_accept_token(p, TOKEN_IDENTIFIER); + if ident == cast(*token, null) { + return cast(*Node, null); + }; + if parser_accept_token(p, TOKEN_EQUALS) == cast(*token, null) { + return cast(*Node, null); + }; + let typ = parser_parse_type(p); + if typ == cast(*Node, null) { + return cast(*Node, null); + }; + + let d = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA))); + (*d).name = cast(*i8, (*ident).data); + (*d).type = cast(*Node, null); + + let n = Node{}; + n.type = NODE_PRIMARY_EXPRESSION_IDENTIFIER; + n.data = cast(*void, d); + + let dd = cast(*NODE_ASSIGNMENT_STATEMENT_DATA, arena_alloc((*p).arena, sizeof(NODE_ASSIGNMENT_STATEMENT_DATA))); + (*dd).is_declaration = false; + (*dd).is_dereference = false; + (*dd).lhs = create_node(p, n); + (*dd).rhs = typ; + + let r = Node{}; + r.type = NODE_ASSIGNMENT_STATEMENT; + r.data = cast(*void, dd); + + return create_node(p, r); +}; + +/* FunctionParameters ::= IDENTIFIER ":" Type ("," IDENTIFIER ":" Type)* */ +let parser_parse_function_parameters = (p: *parser) => *slice { + let node_list = cast(**Node, arena_alloc((*p).arena, sizeof(**Node) * 20)); + let i = 0; + while true { + if i != 0 { + parser_accept_token(p, TOKEN_COMMA); + }; + let ident = parser_accept_token(p, TOKEN_IDENTIFIER); + if ident == cast(*token, null) { + break; + }; + if parser_accept_token(p, TOKEN_COLON) == cast(*token, null) { + return cast(*slice, null); + }; + let type_annotation = parser_parse_type(p); + if type_annotation == cast(*Node, null) { + return cast(*slice, null); + }; + + let d = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA))); + (*d).name = cast(*i8, (*ident).data); + (*d).type = type_annotation; + let n = Node{}; + n.type = NODE_PRIMARY_EXPRESSION_IDENTIFIER; + n.data = cast(*void, d); + (*(node_list + cast(**Node, i))) = create_node(p, n); + + i = i + 1; + }; + + let s = cast(*slice, arena_alloc((*p).arena, sizeof(slice))); + (*s).data = cast(*void, node_list); + (*s).data_len = i; + return s; +}; + +/* TypeDefinition ::= "newtype" Type */ +let parser_parse_type_definition = (p: *parser) => *Node { + if parser_accept_token(p, TOKEN_TYPE) == cast(*token, null) { + return cast(*Node, null); + }; + + let typ = parser_parse_type(p); + if typ == cast(*Node, null) { + return cast(*Node, null); + }; + + let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*p).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*d).name = ""; + (*d).underlying_type = typ; + + let r = Node{}; + r.type = NODE_TYPE_SIMPLE_TYPE; + r.data = cast(*void, d); + + return create_node(p, r); +}; + +/* StructDefinition ::= "struct" LBRACE StructFields? RBRACE */ +let parser_parse_struct_definition = (p: *parser) => *Node { + if parser_accept_token(p, TOKEN_STRUCT) == cast(*token, null) { + return cast(*Node, null); + }; + if parser_accept_token(p, TOKEN_LBRACE) == cast(*token, null) { + return cast(*Node, null); + }; + + let fields = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 20)); + let i = 0; + while true { + let field = parser_accept_parse(p, (ip: *parser) => *Node { + let ident = parser_accept_token(ip, TOKEN_IDENTIFIER); + if ident == cast(*token, null) { + return cast(*Node, null); + }; + if parser_accept_token(ip, TOKEN_COLON) == cast(*token, null) { + return cast(*Node, null); + }; + let typ_annotation = parser_parse_type(ip); + if typ_annotation == cast(*Node, null) { + return cast(*Node, null); + }; + printf("STRUCT TYP: %d\n" (*typ_annotation).type); + let d = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, arena_alloc((*ip).arena, sizeof(NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA))); + (*d).name = cast(*i8, (*ident).data); + (*d).type = typ_annotation; + let n = Node{}; + n.type = NODE_PRIMARY_EXPRESSION_IDENTIFIER; + n.data = cast(*void, d); + return create_node(ip, n); + }); + if field == cast(*Node, null) { + break; + }; + + parser_accept_token(p, TOKEN_COMMA); + + (*(fields + cast(**Node, i))) = field; + i = i + 1; + }; + + if parser_accept_token(p, TOKEN_RBRACE) == cast(*token, null) { + return cast(*Node, null); + }; + + let d = cast(*NODE_TYPE_STRUCT_TYPE_DATA, arena_alloc((*p).arena, sizeof(NODE_TYPE_STRUCT_TYPE_DATA))); + (*d).fields = fields; + (*d).fields_len = i; + let n = Node{}; + n.type = NODE_TYPE_STRUCT_TYPE; + n.data = cast(*void, d); + return create_node(p, n); +}; + +/* FunctionDefinition ::= LPAREN FunctionParameters? RPAREN ARROW IDENTIFIER LBRACE Statement* ReturnStatement SEMICOLON RBRACE */ +let parser_parse_function_definition = (p: *parser) => *Node { + if parser_accept_token(p, TOKEN_LPAREN) == cast(*token, null) { + return cast(*Node, null); + }; + let params = parser_parse_function_parameters(p); + if params == cast(*slice, null) { + return cast(*Node, null); + }; + if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) { + return cast(*Node, null); + }; + if parser_accept_token(p, TOKEN_ARROW) == cast(*token, null) { + return cast(*Node, null); + }; + let retur_type = parser_parse_type(p); + if retur_type == cast(*Node, null) { + return cast(*Node, null); + }; + if parser_accept_token(p, TOKEN_LBRACE) == cast(*token, null) { + return cast(*Node, null); + }; + + let statements = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 100)); + let i = 0; + while true { + let n = parser_accept_parse(p, parser_parse_statement); + if n == cast(*Node, null) { + break; + }; + (*(statements + cast(**Node, i))) = n; + i = i + 1; + }; + + + if parser_accept_token(p, TOKEN_RBRACE) == cast(*token, null) { + return cast(*Node, null); + }; + + + let d = cast(*NODE_FUNCTION_DEFINITION_DATA, arena_alloc((*p).arena, sizeof(NODE_FUNCTION_DEFINITION_DATA))); + (*d).statements = statements; + (*d).statements_len = i; + (*d).parameters = cast(**Node, (*params).data); + (*d).parameters_len = (*params).data_len; + (*d).retur_type = retur_type; + + let n = Node{}; + n.type = NODE_FUNCTION_DEFINITION; + n.data = cast(*void, d); + + return create_node(p, n); +}; + +/* StructInstantiation ::= IDENTIFIER LBRACE RBRACE */ +let parser_parse_struct_instanciation = (p: *parser) => *Node { + let typ = parser_accept_token(p, TOKEN_IDENTIFIER); + if typ == cast(*token, null) { + return cast(*Node, null); + }; + + if parser_accept_token(p, TOKEN_LBRACE) == cast(*token, null) { + return cast(*Node, null); + }; + if parser_accept_token(p, TOKEN_RBRACE) == cast(*token, null) { + return cast(*Node, null); + }; + + let d = cast(*NODE_STRUCT_INSTANCIATION_DATA, arena_alloc((*p).arena, sizeof(NODE_STRUCT_INSTANCIATION_DATA))); + (*d).typ = cast(*i8, (*typ).data); + + let n = Node{}; + n.type = NODE_STRUCT_INSTANCIATION; + n.data = cast(*void, d); + + return create_node(p, n); +}; + +extern parser_parse_primary_expression = (*parser) => *Node; + +/* FieldAccess ::= PrimaryExpression DOT IDENTIFIER */ +let parser_parse_field_access = (p: *parser) => *Node { + let expression = parser_parse_primary_expression(p); + if expression == cast(*Node, null) { + return cast(*Node, null); + }; + if parser_accept_token(p, TOKEN_DOT) == cast(*token, null) { + return cast(*Node, null); + }; + let ident = parser_accept_token(p, TOKEN_IDENTIFIER); + if ident == cast(*token, null) { + return cast(*Node, null); + }; + + let d = cast(*NODE_FIELD_ACCESS_DATA, arena_alloc((*p).arena, sizeof(NODE_FIELD_ACCESS_DATA))); + (*d).expression = expression; + (*d).name = cast(*i8, (*ident).data); + + let n = Node{}; + n.type = NODE_FIELD_ACCESS; + n.data = cast(*void, d); + + return create_node(p, n); +}; + +/* PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | FunctionDefinition | StructDefinition | StructInstantiation | FieldAccess | LPAREN Expression RPAREN */ +let parser_parse_primary_expression = (p: *parser) => *Node { + let stmt = parser_accept_parse(p, parser_parse_function_definition); + if stmt != cast(*Node, null) { + return stmt; + }; + let stmt = parser_accept_parse(p, parser_parse_type_definition); + if stmt != cast(*Node, null) { + return stmt; + }; + let stmt = parser_accept_parse(p, parser_parse_struct_definition); + if stmt != cast(*Node, null) { + return stmt; + }; + let stmt = parser_accept_parse(p, parser_parse_struct_instanciation); + if stmt != cast(*Node, null) { + return stmt; + }; + + if parser_accept_token(p, TOKEN_LPAREN) != cast(*token, null) { + let expr = parser_parse_expression(p); + if expr == cast(*Node, null) { + return cast(*Node, null); + }; + if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) { + return cast(*Node, null); + }; + return expr; + }; + + let tok = parser_consume_token(p); + if tok == cast(*token, null) { + printf("NO TOK\n"); + return cast(*Node, null); + }; + + if (*tok).type == TOKEN_NULL { + let n = Node{}; + n.type = NODE_PRIMARY_EXPRESSION_NULL; + return create_node(p, n); + }; + + if (*tok).type == TOKEN_IDENTIFIER { + let d = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA))); + (*d).name = cast(*i8, (*tok).data); + (*d).type = cast(*Node, null); + let n = Node{}; + n.type = NODE_PRIMARY_EXPRESSION_IDENTIFIER; + n.data = cast(*void, d); + return create_node(p, n); + }; + + if (*tok).type == TOKEN_NUMBER { + let d = cast(*NODE_PRIMARY_EXPRESSION_NUMBER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_NUMBER_DATA))); + (*d).value = *(cast(*i64, (*tok).data)); + let n = Node{}; + n.type = NODE_PRIMARY_EXPRESSION_NUMBER; + n.data = cast(*void, d); + return create_node(p, n); + }; + + if (*tok).type == TOKEN_BOOLEAN { + let d = cast(*NODE_PRIMARY_EXPRESSION_BOOLEAN_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_BOOLEAN_DATA))); + (*d).value = *(cast(*bool, (*tok).data)); + let n = Node{}; + n.type = NODE_PRIMARY_EXPRESSION_BOOLEAN; + n.data = cast(*void, d); + return create_node(p, n); + }; + + if (*tok).type == TOKEN_CHAR { + let d = cast(*NODE_PRIMARY_EXPRESSION_CHAR_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_CHAR_DATA))); + (*d).value = *(cast(*i8, (*tok).data)); + let n = Node{}; + n.type = NODE_PRIMARY_EXPRESSION_CHAR; + n.data = cast(*void, d); + return create_node(p, n); + }; + + if (*tok).type == TOKEN_STRING { + let d = cast(*NODE_PRIMARY_EXPRESSION_STRING_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_STRING_DATA))); + (*d).value = cast(*i8, (*tok).data); + let n = Node{}; + n.type = NODE_PRIMARY_EXPRESSION_STRING; + n.data = cast(*void, d); + return create_node(p, n); + }; + + return cast(*Node, null); +}; + +/* EqualityExpression ::= AdditiveExpression ("==" | "!=" | "<=" | ">=" | "<" | ">") AdditiveExpression */ +let parser_parse_equality_expression = (p: *parser) => *Node { + let lhs = parser_parse_additive_expression(p); + if lhs == cast(*Node, null) { + return cast(*Node, null); + }; + + let typ = -1; + let ex = parser_accept_parse(p, (ip: *parser) => *Node { + if parser_accept_token(ip, TOKEN_EQUALS) == cast(*token, null) { + return cast(*Node, null); + }; + if parser_accept_token(ip, TOKEN_EQUALS) == cast(*token, null) { + return cast(*Node, null); + }; + let n = Node{}; + return create_node(ip, n); + }); + if ex != cast(*Node, null) { + typ = EQUALITY_EXPRESSION_TYPE_EQ; + }; + + if (typ == -1) { + ex = parser_accept_parse(p, (ip: *parser) => *Node { + if (parser_accept_token(ip, TOKEN_BANG) == cast(*token, null)) { + return cast(*Node, null); + }; + if (parser_accept_token(ip, TOKEN_EQUALS) == cast(*token, null)) { + return cast(*Node, null); + }; + let n = Node{}; + return create_node(ip, n); + }); + if (ex != cast(*Node, null)) { + typ = EQUALITY_EXPRESSION_TYPE_NE; + }; + }; + + if (typ == -1) { + ex = parser_accept_parse(p, (ip: *parser) => *Node { + if (parser_accept_token(ip, TOKEN_LESS) == cast(*token, null)) { + return cast(*Node, null); + }; + if (parser_accept_token(ip, TOKEN_EQUALS) == cast(*token, null)) { + return cast(*Node, null); + }; + let n = Node{}; + return create_node(ip, n); + }); + if (ex != cast(*Node, null)) { + typ = EQUALITY_EXPRESSION_TYPE_LE; + }; + }; + + if (typ == -1) { + ex = parser_accept_parse(p, (ip: *parser) => *Node { + if (parser_accept_token(ip, TOKEN_GREATER) == cast(*token, null)) { + return cast(*Node, null); + }; + if (parser_accept_token(ip, TOKEN_EQUALS) == cast(*token, null)) { + return cast(*Node, null); + }; + let n = Node{}; + return create_node(ip, n); + }); + if (ex != cast(*Node, null)) { + typ = EQUALITY_EXPRESSION_TYPE_GE; + }; + }; + + if (typ == -1) { + if (parser_accept_token(p, TOKEN_LESS) != cast(*token, null)) { + typ = EQUALITY_EXPRESSION_TYPE_LT; + }; + }; + + if (typ == -1) { + if (parser_accept_token(p, TOKEN_GREATER) != cast(*token, null)) { + typ = EQUALITY_EXPRESSION_TYPE_GT; + }; + }; + + if typ == -1 { + return cast(*Node, null); + }; + + let rhs = parser_parse_additive_expression(p); + if rhs == cast(*Node, null) { + return cast(*Node, null); + }; + + let d = cast(*NODE_EQUALITY_EXPRESSION_DATA, arena_alloc((*p).arena, sizeof(NODE_EQUALITY_EXPRESSION_DATA))); + (*d).lhs = lhs; + (*d).rhs = rhs; + (*d).typ = typ; + + let n = Node{}; + n.type = NODE_EQUALITY_EXPRESSION ; + n.data = cast(*void, d); + + return create_node(p, n); +}; + +/* PostfixExpression ::= PrimaryExpression (CastStatement | SizeOfStatement | FunctionCallStatement | FieldAccess )* */ +let parser_parse_postfix_expression = (p: *parser) => *Node { + let ex = parser_accept_parse(p, parser_parse_cast_statement); + if ex != cast(*Node, null) { + return ex; + }; + let ex = parser_accept_parse(p, parser_parse_sizeof_statement); + if ex != cast(*Node, null) { + return ex; + }; + let ex = parser_accept_parse(p, parse_function_call_statement); + if ex != cast(*Node, null) { + return ex; + }; + let ex = parser_accept_parse(p, parser_parse_field_access); + if ex != cast(*Node, null) { + return ex; + }; + return parser_parse_primary_expression(p); +}; + +/* UnaryExpression ::= ("!" | "-" | "*") UnaryExpression | PostfixExpression */ +let parser_parse_unary_expression = (p: *parser) => *Node { + let typ = -1; + if parser_accept_token(p, TOKEN_BANG) != cast(*token, null) { + typ = UNARY_EXPRESSION_TYPE_NOT; + }; + if typ == -1 { + if parser_accept_token(p, TOKEN_MINUS) != cast(*token, null) { + typ = UNARY_EXPRESSION_TYPE_MINUS; + }; + }; + if typ == -1 { + if parser_accept_token(p, TOKEN_MUL) != cast(*token, null) { + typ = UNARY_EXPRESSION_TYPE_STAR; + }; + }; + if typ == -1 { + return parser_parse_postfix_expression(p); + }; + + let new_lhs_data = cast(*NODE_UNARY_EXPRESSION_DATA, arena_alloc((*p).arena, sizeof(NODE_UNARY_EXPRESSION_DATA))); + ((*new_lhs_data).typ) = typ; + let e = parser_parse_unary_expression(p); + if e == cast(*Node, null) { + return cast(*Node, null); + }; + ((*new_lhs_data).expression) = e; + let new_lhs = Node{}; + new_lhs.type = NODE_UNARY_EXPRESSION; + new_lhs.data = cast(*void, new_lhs_data); + return create_node(p, new_lhs); +}; + +/* MultiplicativeExpression ::= UnaryExpression (("*" | "/" | "%") UnaryExpression)* */ +let parser_parse_multiplicative_expression = (p: *parser) => *Node { + let lhs = parser_parse_unary_expression(p); + if lhs == cast(*Node, null) { + return cast(*Node, null); + }; + + while true { + let typ = -1; + if parser_accept_token(p, TOKEN_MUL) != cast(*token, null) { + typ = MULTIPLICATIVE_EXPRESSION_TYPE_MUL; + }; + if parser_accept_token(p, TOKEN_DIV) != cast(*token, null) { + typ = MULTIPLICATIVE_EXPRESSION_TYPE_DIV; + }; + if parser_accept_token(p, TOKEN_MOD) != cast(*token, null) { + typ = MULTIPLICATIVE_EXPRESSION_TYPE_MOD; + }; + if typ == -1 { + break; + }; + + let rhs = parser_parse_unary_expression(p); + if rhs == cast(*Node, null) { + return cast(*Node, null); + }; + + let new_lhs_data = cast(*NODE_MULTIPLICATIVE_EXPRESSION_DATA, arena_alloc((*p).arena, sizeof(NODE_MULTIPLICATIVE_EXPRESSION_DATA))); + ((*new_lhs_data).lhs) = lhs; + ((*new_lhs_data).rhs) = rhs; + ((*new_lhs_data).typ) = typ; + let new_lhs = Node{}; + new_lhs.type = NODE_MULTIPLICATIVE_EXPRESSION; + new_lhs.data = cast(*void, new_lhs_data); + lhs = create_node(p, new_lhs); + }; + return lhs; +}; + +/* AdditiveExpression ::= MultiplicativeExpression (("+" | "-") MultiplicativeExpression)* */ +let parser_parse_additive_expression = (p: *parser) => *Node { + let lhs = parser_parse_multiplicative_expression(p); + if lhs == cast(*Node, null) { + return cast(*Node, null); + }; + + while true { + let plus = parser_accept_token(p, TOKEN_PLUS); + let minus = parser_accept_token(p, TOKEN_MINUS); + + if plus == cast(*token, null) { + if minus == cast(*token, null) { + break; + }; + }; + + let rhs = parser_parse_multiplicative_expression(p); + if rhs == cast(*Node, null) { + return cast(*Node, null); + }; + + let new_lhs_data = cast(*NODE_ADDITIVE_EXPRESSION_DATA, arena_alloc((*p).arena, sizeof(NODE_ADDITIVE_EXPRESSION_DATA))); + ((*new_lhs_data).addition) = plus != cast(*token, null); + ((*new_lhs_data).lhs) = lhs; + ((*new_lhs_data).rhs) = rhs; + let new_lhs = Node{}; + new_lhs.type = NODE_ADDITIVE_EXPRESSION; + new_lhs.data = cast(*void, new_lhs_data); + lhs = create_node(p, new_lhs); + }; + return lhs; +}; + +/* Expression ::= EqualityExpression | AdditiveExpression */ +let parser_parse_expression = (p: *parser) => *Node { + let ex = parser_accept_parse(p, parser_parse_equality_expression); + if ex != cast(*Node, null) { + return ex; + }; + let ax = parser_accept_parse(p, parser_parse_additive_expression); + if ax != cast(*Node, null) { + return ax; + }; + + return cast(*Node, null); +}; + +/* AssignmentStatement ::= ("let")? ("*")? Expression EQUALS Expression */ +let parse_assignment_statement = (p: *parser) => *Node { + let is_declaration = false; + if parser_accept_token(p, TOKEN_LET) != cast(*token, null) { + is_declaration = true; + }; + + let is_dereference = false; + if parser_accept_token(p, TOKEN_MUL) != cast(*token, null) { + is_dereference = true; + }; + + let lhs = parser_parse_expression(p); + if lhs == cast(*Node, null) { + return cast(*Node, null); + }; + + if parser_accept_token(p, TOKEN_EQUALS) == cast(*token, null) { + return cast(*Node, null); + }; + + let rhs = parser_parse_expression(p); + if rhs == cast(*Node, null) { + return cast(*Node, null); + }; + + let d = cast(*NODE_ASSIGNMENT_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_ASSIGNMENT_STATEMENT_DATA))); + (*d).is_declaration = is_declaration; + (*d).is_dereference = is_dereference; + (*d).lhs = lhs; + (*d).rhs = rhs; + let n = Node{}; + n.type = NODE_ASSIGNMENT_STATEMENT; + n.data = cast(*void, d); + return create_node(p, n); +}; + +extern parse = (*parser) => *Node; + +let parser_parse_import_declaration = (p: *parser) => *Node { + if parser_accept_token(p, TOKEN_IMPORT) == cast(*token, null) { + return cast(*Node, null); + }; + + let expr = parser_parse_primary_expression(p); + if expr == cast(*Node, null) { + return cast(*Node, null); + }; + + assert((*expr).type == NODE_PRIMARY_EXPRESSION_STRING); + + let impor_filename = (*cast(*NODE_PRIMARY_EXPRESSION_STRING_DATA, (*expr).data)).value; + let current_file = cast(*i8, arena_alloc((*p).arena, sizeof(i8) * 70)); + strcpy(current_file, (*p).filename); + + /* stdlib. TODO: this is very hacky and won't work if running the compiler binary by itself */ + if (*impor_filename) == '!' { + let buf = cast(*i8, arena_alloc((*p).arena, sizeof(i8) * 70)); + sprintf(buf, "./std/%s", (impor_filename + cast(*i8, 1))); + impor_filename = buf; + current_file = "."; + }; + + let dirpath = dirname(current_file); + let dir = open(dirpath, 0); + assert(dir != -1); + + let file = openat(dir, impor_filename, 0); + assert(file != -1); + + let file_size = lseek(file, 0, 2); + lseek(file, 0, 0); + let file_contents = cast(*i8, arena_alloc((*p).arena, file_size + 1)); + + let bytes_read = read(file, file_contents, file_size); + (*(file_contents + cast(*i8, bytes_read))) = '\0'; + + let f = slice{}; + f.data = cast(*void, file_contents); + f.data_len = file_size; + let inner_tokenizer = tokenizer_init((*p).arena, f); + let tokens = tokenizer_tokenize(inner_tokenizer); + + let buf2 = cast(*i8, arena_alloc((*p).arena, 90)); + strcpy(buf2, dirpath); + (*(buf2 + cast(*i8, strlen(dirpath)))) = '/'; + strcpy(buf2 + cast(*i8, strlen(dirpath) + 1), impor_filename); + let full_path = realpath(buf2, cast(*i8, null)); + + let inner_parser = parser_init(cast(*token, tokens.data), tokens.data_len, (*p).arena, full_path); + let ast = parse(inner_parser); + + let d = cast(*NODE_IMPORT_DECLARATION_DATA , arena_alloc((*p).arena, sizeof(NODE_IMPORT_DECLARATION_DATA))); + (*d).filename = impor_filename; + (*d).program = ast; + let n = Node{}; + n.type = NODE_IMPORT_DECLARATION; + n.data = cast(*void, d); + return create_node(p, n); +}; + +/* FunctionArguments ::= Expression ("," Expression)* */ +let parser_parse_function_arguments = (p: *parser) => *slice { + let node_list = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 20)); + let first = true; + + let i = 0; + while true { + if !first { + parser_accept_token(p, TOKEN_COMMA); + }; + first = false; + let maybe_expr = parser_accept_parse(p, parser_parse_expression); + if maybe_expr == cast(*Node, null) { + let s = cast(*slice, arena_alloc((*p).arena, sizeof(slice))); + (*s).data = cast(*void, node_list); + (*s).data_len = i; + return s; + }; + + (*(node_list + cast(**Node, i))) = maybe_expr; + i = i + 1; + }; + + let s = cast(*slice, arena_alloc((*p).arena, sizeof(slice))); + (*s).data = cast(*void, node_list); + (*s).data_len = i; + return s; +}; + +/* FunctionCallStatement ::= (IDENTIFIER | FunctionDefinition) LPAREN FunctionArguments? RPAREN */ +let parse_function_call_statement = (p: *parser) => *Node { + let ident = parser_accept_token(p, TOKEN_IDENTIFIER); + let fn_def = parser_accept_parse(p, parser_parse_function_definition); + + if ident == cast(*token, null) { + if fn_def == cast(*Node, null) { + return cast(*Node, null); + }; + }; + + if parser_accept_token(p, TOKEN_LPAREN) == cast(*token, null) { + return cast(*Node, null); + }; + + let arguments = parser_parse_function_arguments(p); + if arguments == cast(*slice, null) { + return cast(*Node, null); + }; + + if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) { + return cast(*Node, null); + }; + + let d = cast(*NODE_FUNCTION_CALL_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_FUNCTION_CALL_STATEMENT_DATA))); + (*d).arguments = cast(**Node, (*arguments).data); + (*d).arguments_len = (*arguments).data_len; + + if fn_def != cast(*Node, null) { + (*d).expression = fn_def; + }; + if fn_def == cast(*Node, null) { + let expression_data = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA))); + (*expression_data).name = cast(*i8, (*ident).data); + let expression = cast(*Node, arena_alloc((*p).arena, sizeof(Node))); + (*expression).type = NODE_PRIMARY_EXPRESSION_IDENTIFIER; + (*expression).data = cast(*void, expression_data); + (*d).expression = expression; + }; + + let n = Node{}; + n.type = NODE_FUNCTION_CALL_STATEMENT; + n.data = cast(*void, d); + return create_node(p, n); +}; + +/* Statement ::= (AssignmentStatement | ImportDeclaration | ExternDeclaration | CastStatement | SizeOfStatement | FunctionCallStatement | IfStatement | WhileStatement | ReturnStatement | "break" | "continue") SEMICOLON */ +let parser_parse_statement = (p: *parser) => *Node { + /* TODO: Can we not deal with cast / sizeof in parser? */ + let fn_call = parser_accept_parse(p, parser_parse_cast_statement); + if fn_call != cast(*Node, null) { + if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { + return cast(*Node, null); + }; + return fn_call; + }; + + let fn_call = parser_accept_parse(p, parser_parse_sizeof_statement); + if fn_call != cast(*Node, null) { + if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { + return cast(*Node, null); + }; + return fn_call; + }; + + let fn_call = parser_accept_parse(p, parse_function_call_statement); + if fn_call != cast(*Node, null) { + if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { + return cast(*Node, null); + }; + return fn_call; + }; + + let assignment = parser_accept_parse(p, parse_assignment_statement); + if assignment != cast(*Node, null) { + if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { + return cast(*Node, null); + }; + return assignment; + }; + + let retu = parser_accept_parse(p, parser_parse_return_statement); + if retu != cast(*Node, null) { + if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { + return cast(*Node, null); + }; + return retu; + }; + + let retu = parser_accept_parse(p, parser_parse_import_declaration); + if retu != cast(*Node, null) { + if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { + return cast(*Node, null); + }; + return retu; + }; + + let retu = parser_accept_parse(p, parser_parse_extern_declaration); + if retu != cast(*Node, null) { + if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { + return cast(*Node, null); + }; + return retu; + }; + + let retu = parser_accept_parse(p, parser_parse_if_statement); + if retu != cast(*Node, null) { + if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { + return cast(*Node, null); + }; + return retu; + }; + + let retu = parser_accept_parse(p, parser_parse_while_statement); + if retu != cast(*Node, null) { + if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { + return cast(*Node, null); + }; + return retu; + }; + + /* Break and continue */ + let retu = parser_accept_parse(p, (ip: *parser) => *Node { + if parser_accept_token(ip, TOKEN_BREAK) == cast(*token, null) { + return cast(*Node, null); + }; + + let n = Node{}; + n.type = NODE_BREAK_STATEMENT; + return create_node(ip, n); + }); + if retu != cast(*Node, null) { + if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { + return cast(*Node, null); + }; + return retu; + }; + + let retu = parser_accept_parse(p, (ip: *parser) => *Node { + if parser_accept_token(ip, TOKEN_CONTINUE) == cast(*token, null) { + return cast(*Node, null); + }; + + let n = Node{}; + n.type = NODE_CONTINUE_STATEMENT; + return create_node(ip, n); + }); + if retu != cast(*Node, null) { + if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { + return cast(*Node, null); + }; + return retu; + }; + + printf("None\n"); + + return cast(*Node, null); +}; + +/* Program ::= Statement+ */ +let parse_program = (p: *parser) => *Node { + let nodes = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 50000)); + + let i = 0; + while (*p).offset < (*p).tokens_len { + let s = parser_parse_statement(p); + assert(s != cast(*Node, null)); + (*(nodes + cast(**Node, i))) = s; + i = i + 1; + }; + + let d = cast(*NODE_PROGRAM_DATA, arena_alloc((*p).arena, sizeof(NODE_PROGRAM_DATA))); + (*d).statements = nodes; + (*d).statements_len = i; + let n = Node{}; + n.type = NODE_PROGRAM; + n.data = cast(*void, d); + return create_node(p, n); +}; + +let parse = (p: *parser) => *Node { + return parse_program(p); +}; diff --git a/src/parser.zig b/src/parser.zig deleted file mode 100644 index da3c71c..0000000 --- a/src/parser.zig +++ /dev/null @@ -1,1055 +0,0 @@ -const std = @import("std"); -const tokenizer = @import("tokenizer.zig"); - -const ParserError = error{ - ParsingError, - OutOfMemory, -}; - -pub const Node = union(enum) { - PROGRAM: struct { - statements: []*Node, - }, - STATEMENT: struct { - statement: *Node, - }, - ASSIGNMENT_STATEMENT: struct { - is_declaration: bool, - is_dereference: bool, - lhs: *Node, - rhs: *Node, - }, - IMPORT_DECLARATION: struct { - filename: []const u8, - program: *Node, - }, - FUNCTION_CALL_STATEMENT: struct { - expression: *Node, - arguments: []*Node, - }, - IF_STATEMENT: struct { - condition: *Node, - statements: []*Node, - }, - WHILE_STATEMENT: struct { - condition: *Node, - statements: []*Node, - }, - EQUALITY_EXPRESSION: struct { lhs: *Node, rhs: *Node, typ: EqualityExpressionType }, - ADDITIVE_EXPRESSION: struct { - addition: bool, - lhs: *Node, - rhs: *Node, - }, - MULTIPLICATIVE_EXPRESSION: struct { - lhs: *Node, - rhs: *Node, - typ: MultiplicativeExpressionType, - }, - UNARY_EXPRESSION: struct { - typ: enum { - NOT, - MINUS, - STAR, - }, - expression: *Node, - }, - POSTFIX_EXPRESSION: struct { - lhs: *Node, - rhs: ?*Node, - }, - PRIMARY_EXPRESSION: union(enum) { - NUMBER: struct { - value: i64, - }, - BOOLEAN: struct { - value: bool, - }, - NULL: void, - CHAR: struct { - value: u8, - }, - STRING: struct { - value: []const u8, - }, - IDENTIFIER: struct { - name: []const u8, - type: ?*Node, - }, - }, - FUNCTION_DEFINITION: struct { - statements: []*Node, - parameters: []*Node, - return_type: *Node, - }, - STRUCT_INSTANCIATION: struct { - typ: []const u8, - }, - FIELD_ACCESS: struct { - expression: *Node, - name: []const u8, - }, - TYPE: union(enum) { - SIMPLE_TYPE: struct { - name: []const u8, - underlying_type: ?*Node, - }, - FUNCTION_TYPE: struct { - parameters: []*Node, - return_type: *Node, - }, - POINTER_TYPE: struct { - type: *Node, - }, - STRUCT_TYPE: struct { - fields: []*Node, - }, - }, - RETURN_STATEMENT: struct { - expression: ?*Node, - }, - CAST_STATEMENT: struct { - typ: *Node, - expression: *Node, - }, - SIZEOF_STATEMENT: struct { - typ: *Node, - }, - BREAK_STATEMENT: void, - CONTINUE_STATEMENT: void, -}; - -pub const EqualityExpressionType = enum { - EQ, - NE, - GE, - LE, - LT, - GT, -}; - -pub const MultiplicativeExpressionType = enum { - MUL, - DIV, - MOD, -}; - -pub const Parser = struct { - filename: []const u8, - - tokens: []tokenizer.Token, - offset: u32, - - arena: std.mem.Allocator, - - try_context: bool, //TODO: I dont like this - - pub fn init(tokens: []tokenizer.Token, arena_allocator: std.mem.Allocator, filename: []const u8) ParserError!*Parser { - const parser = try arena_allocator.create(Parser); - parser.* = .{ - .filename = filename, - .tokens = tokens, - .offset = 0, - .arena = arena_allocator, - .try_context = false, - }; - return parser; - } - - pub fn parse(self: *Parser) !*Node { - return try self.parse_program(); - } - - // Program ::= Statement+ - fn parse_program(self: *Parser) !*Node { - var nodes = std.ArrayList(*Node).init(self.arena); - while (self.offset < self.tokens.len) { - try nodes.append(@constCast(try self.parse_statement())); - } - - return self.create_node(.{ .PROGRAM = .{ - .statements = try nodes.toOwnedSlice(), - } }); - } - - // Statement ::= (AssignmentStatement | ImportDeclaration | ExternDeclaration | CastStatement | SizeOfStatement | FunctionCallStatement | IfStatement | WhileStatement | ReturnStatement | "break" | "continue") SEMICOLON - fn parse_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing statement {any}\n", .{self.peek_token()}); - - const statement = - self.accept_parse(parse_cast_statement) orelse //TODO: Can we not deal with cast / sizeof in parser? - self.accept_parse(parse_sizeof_statement) orelse - self.accept_parse(parse_function_call_statement) orelse - self.accept_parse(parse_if_statement) orelse - self.accept_parse(parse_while_statement) orelse - self.accept_parse(parse_return_statement) orelse - self.accept_parse(parse_assignment_statement) orelse - self.accept_parse(parse_import_declaration) orelse - self.accept_parse(parse_extern_declaration) orelse - self.accept_parse(struct { - fn parse_break_statement(iself: *Parser) ParserError!*Node { - _ = try iself.parse_token(tokenizer.TokenType.BREAK); - return try iself.create_node(.{ - .BREAK_STATEMENT = void{}, - }); - } - }.parse_break_statement) orelse - self.accept_parse(struct { - fn parse_continue_statement(iself: *Parser) ParserError!*Node { - _ = try iself.parse_token(tokenizer.TokenType.CONTINUE); - return try iself.create_node(.{ - .CONTINUE_STATEMENT = void{}, - }); - } - }.parse_continue_statement); - - _ = try self.parse_token(tokenizer.TokenType.SEMICOLON); - - return self.create_node(.{ - .STATEMENT = .{ - .statement = statement.?, - }, - }); - } - - // AssignmentStatement ::= ("let")? ("*")? Expression EQUALS Expression - fn parse_assignment_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing assignment statement {any}\n", .{self.peek_token()}); - - var is_declaration = false; - if (self.accept_token(.LET) != null) { - is_declaration = true; - } - - var is_dereference = false; - if (self.accept_token(.MUL) != null) { - is_dereference = true; - } - - const lhs = try self.parse_expression(); - - _ = try self.parse_token(tokenizer.TokenType.EQUALS); - - const rhs = try self.parse_expression(); - - return self.create_node(.{ - .ASSIGNMENT_STATEMENT = .{ - .is_declaration = is_declaration, - .is_dereference = is_dereference, - .lhs = lhs, - .rhs = rhs, - }, - }); - } - - // ImportDeclaration ::= "import" STRING - fn parse_import_declaration(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing import declaration {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(.IMPORT); - - const expr = try self.parse_primary_expression(); - - std.debug.assert(expr.PRIMARY_EXPRESSION == .STRING); - - var import_filename = expr.PRIMARY_EXPRESSION.STRING.value; - var current_file = self.filename; - - // stdlib. TODO: this is very hacky and won't work if running the compiler binary by itself - if (import_filename.ptr[0] == '!') { - import_filename = std.fmt.allocPrint(self.arena, "./std/{s}", .{import_filename[1..]}) catch return ParserError.OutOfMemory; - current_file = "."; - } - - // Open the directory containing current_file - const dir_path = std.fs.path.dirname(current_file) orelse "."; - var dir = std.fs.cwd().openDir(dir_path, .{}) catch { - std.debug.print("Couldn't open directory {s}\n", .{current_file}); - return ParserError.OutOfMemory; - }; - defer dir.close(); - - // Open the target file - const file = dir.openFile(import_filename, .{}) catch { - std.debug.print("Couldn't open file {s}\n", .{import_filename}); - return ParserError.OutOfMemory; - }; - defer file.close(); - - // Read file contents - const buf = file.readToEndAlloc(self.arena, 1 * 1024 * 1024) catch return ParserError.OutOfMemory; - - // Initialize tokenizer and parse - var inner_tokenizer = try tokenizer.Tokenizer.init(buf, self.arena); - const tokens = inner_tokenizer.tokenize() catch return ParserError.OutOfMemory; - - // Resolve the full path of the imported file - const full_path = try std.fs.path.resolve(self.arena, &.{ dir_path, import_filename }); - - const inner_parser = try Parser.init(tokens, self.arena, full_path); - const ast = try inner_parser.parse(); - - return self.create_node(.{ - .IMPORT_DECLARATION = .{ - .filename = import_filename, - .program = ast, - }, - }); - } - - // ExternDeclaration ::= "extern" IDENTIFIER EQUALS Type - fn parse_extern_declaration(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing extern declaration {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(.EXTERN); - - const identifier = try self.parse_token(tokenizer.TokenType.IDENTIFIER); - - _ = try self.parse_token(tokenizer.TokenType.EQUALS); - - const typ = try self.parse_type(); - - return self.create_node(.{ - .ASSIGNMENT_STATEMENT = .{ - .is_declaration = true, - .is_dereference = false, - .lhs = try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ - .IDENTIFIER = .{ - .name = try self.arena.dupe(u8, identifier.type.IDENTIFIER), - .type = null, - }, - }, - }), - .rhs = @constCast(typ), - }, - }); - } - - // FunctionCallStatement ::= (IDENTIFIER | FunctionDefinition) LPAREN FunctionArguments? RPAREN - fn parse_function_call_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing function call statement {any}\n", .{self.peek_token()}); - - const identifier = self.accept_token(tokenizer.TokenType.IDENTIFIER); - const fn_def = self.accept_parse(parse_function_definition); - - if (identifier == null and fn_def == null) return ParserError.ParsingError; - - _ = try self.parse_token(tokenizer.TokenType.LPAREN); - - const arguments = try self.parse_function_arguments(); - - _ = try self.parse_token(tokenizer.TokenType.RPAREN); - - if (fn_def != null) { - return self.create_node(.{ .FUNCTION_CALL_STATEMENT = .{ - .expression = fn_def.?, - .arguments = arguments, - } }); - } - - return self.create_node(.{ .FUNCTION_CALL_STATEMENT = .{ - .expression = try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ - .IDENTIFIER = .{ - .name = try self.arena.dupe(u8, identifier.?.type.IDENTIFIER), - .type = null, - }, - }, - }), - .arguments = arguments, - } }); - } - - // FunctionArguments ::= Expression ("," Expression)* - fn parse_function_arguments(self: *Parser) ParserError![]*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing function arguments {any}\n", .{self.peek_token()}); - - var node_list = std.ArrayList(*Node).init(self.arena); - - var first = true; - while (true) { - if (!first) { - _ = self.accept_token(tokenizer.TokenType.COMMA); - } - first = false; - const expr = self.accept_parse(parse_expression) orelse return node_list.items; - try node_list.append(expr); - } - - return node_list.items; - } - - // IfStatement ::= "if" Expression LBRACE Statement* RBRACE - fn parse_if_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing if statement {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(tokenizer.TokenType.IF); - - const expression = try self.parse_expression(); - - _ = try self.parse_token(tokenizer.TokenType.LBRACE); - - var statements = std.ArrayList(*Node).init(self.arena); - while (self.accept_parse(parse_statement)) |expr| { - try statements.append(expr); - } - - _ = try self.parse_token(tokenizer.TokenType.RBRACE); - - return try self.create_node(.{ .IF_STATEMENT = .{ - .condition = expression, - .statements = statements.items, - } }); - } - - // WhileStatement ::= "while" Expression LBRACE Statement* RBRACE - fn parse_while_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing while statement {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(tokenizer.TokenType.WHILE); - - const expression = try self.parse_expression(); - - _ = try self.parse_token(tokenizer.TokenType.LBRACE); - - var statements = std.ArrayList(*Node).init(self.arena); - while (self.accept_parse(parse_statement)) |expr| { - try statements.append(expr); - } - - _ = try self.parse_token(tokenizer.TokenType.RBRACE); - - return try self.create_node(.{ .WHILE_STATEMENT = .{ - .condition = expression, - .statements = statements.items, - } }); - } - - // Expression ::= EqualityExpression | AdditiveExpression - fn parse_expression(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing expression {any}\n", .{self.peek_token()}); - - return self.accept_parse(parse_equality_expression) orelse - self.accept_parse(parse_additive_expression) orelse - return ParserError.ParsingError; - } - - // EqualityExpression ::= AdditiveExpression ("==" | "!=" | "<=" | ">=" | "<" | ">") AdditiveExpression - fn parse_equality_expression(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing equality expression {any}\n", .{self.peek_token()}); - - const lhs = try self.parse_additive_expression(); - - var typ: EqualityExpressionType = undefined; - - if (self.accept_parse(struct { - fn parse(iself: *Parser) ParserError!*Node { - _ = try iself.parse_token(tokenizer.TokenType.EQUALS); - _ = try iself.parse_token(tokenizer.TokenType.EQUALS); - return try iself.create_node(.{ .PROGRAM = .{ - .statements = &[_]*Node{}, - } }); - } - }.parse) != null) { - typ = .EQ; - } else if (self.accept_parse(struct { - fn parse(iself: *Parser) ParserError!*Node { - _ = try iself.parse_token(tokenizer.TokenType.BANG); - _ = try iself.parse_token(tokenizer.TokenType.EQUALS); - return try iself.create_node(.{ .PROGRAM = .{ - .statements = &[_]*Node{}, - } }); - } - }.parse) != null) { - typ = .NE; - } else if (self.accept_parse(struct { - fn parse(iself: *Parser) ParserError!*Node { - _ = try iself.parse_token(tokenizer.TokenType.LESS); - _ = try iself.parse_token(tokenizer.TokenType.EQUALS); - return try iself.create_node(.{ .PROGRAM = .{ - .statements = &[_]*Node{}, - } }); - } - }.parse) != null) { - typ = .LE; - } else if (self.accept_parse(struct { - fn parse(iself: *Parser) ParserError!*Node { - _ = try iself.parse_token(tokenizer.TokenType.GREATER); - _ = try iself.parse_token(tokenizer.TokenType.EQUALS); - return try iself.create_node(.{ .PROGRAM = .{ - .statements = &[_]*Node{}, - } }); - } - }.parse) != null) { - typ = .GE; - } else if (self.accept_token(tokenizer.TokenType.LESS) != null) { - typ = .LT; - } else if (self.accept_token(tokenizer.TokenType.GREATER) != null) { - typ = .GT; - } else { - return ParserError.ParsingError; - } - - const rhs = try self.parse_additive_expression(); - - return self.create_node(.{ .EQUALITY_EXPRESSION = .{ - .lhs = lhs, - .rhs = rhs, - .typ = typ, - } }); - } - - // AdditiveExpression ::= MultiplicativeExpression (("+" | "-") MultiplicativeExpression)* - fn parse_additive_expression(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing additive expression {any}\n", .{self.peek_token()}); - - var lhs = try self.parse_multiplicative_expression(); - - while (true) { - const plus = self.accept_token(tokenizer.TokenType.PLUS); - const minus = self.accept_token(tokenizer.TokenType.MINUS); - - if (plus == null and minus == null) break; - - const rhs = try self.parse_multiplicative_expression(); - - lhs = try self.create_node(.{ .ADDITIVE_EXPRESSION = .{ - .addition = plus != null, - .lhs = lhs, - .rhs = rhs, - } }); - } - - return lhs; - } - - // MultiplicativeExpression ::= UnaryExpression (("*" | "/" | "%") UnaryExpression)* - fn parse_multiplicative_expression(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing additive expression {any}\n", .{self.peek_token()}); - - var lhs = try self.parse_unary_expression(); - - while (true) { - var typ: MultiplicativeExpressionType = undefined; - if (self.accept_token(tokenizer.TokenType.MUL) != null) { - typ = .MUL; - } else if (self.accept_token(tokenizer.TokenType.DIV) != null) { - typ = .DIV; - } else if (self.accept_token(tokenizer.TokenType.MOD) != null) { - typ = .MOD; - } else { - break; - } - - const rhs = try self.parse_unary_expression(); - - lhs = try self.create_node(.{ .MULTIPLICATIVE_EXPRESSION = .{ - .lhs = lhs, - .rhs = rhs, - .typ = typ, - } }); - } - - return lhs; - } - - // UnaryExpression ::= ("!" | "-" | "*") UnaryExpression | PostfixExpression - fn parse_unary_expression(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing unary expression {any}\n", .{self.peek_token()}); - - const not = self.accept_token(tokenizer.TokenType.BANG) != null; - const minus = self.accept_token(tokenizer.TokenType.MINUS) != null; - const star = self.accept_token(tokenizer.TokenType.MUL) != null; - - if (!not and !minus and !star) { - return try self.parse_postfix_expression(); - } - - return self.create_node(.{ .UNARY_EXPRESSION = .{ - .typ = if (not) .NOT else if (minus) .MINUS else .STAR, - .expression = try self.parse_unary_expression(), - } }); - } - - // PostfixExpression ::= PrimaryExpression (CastStatement | SizeOfStatement | FunctionCallStatement | FieldAccess )* - fn parse_postfix_expression(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing postfix expression {any}\n", .{self.peek_token()}); - - if (self.accept_parse(parse_cast_statement)) |stmt| { - return stmt; - } else if (self.accept_parse(parse_sizeof_statement)) |stmt| { - return stmt; - } else if (self.accept_parse(parse_function_call_statement)) |stmt| { - return stmt; - } else if (self.accept_parse(parse_field_access)) |stmt| { - return stmt; - } else { - return try self.parse_primary_expression(); - } - } - - // PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | FunctionDefinition | TypeDefinition | StructDefinition | StructInstantiation | FieldAccess | LPAREN Expression RPAREN - fn parse_primary_expression(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing primary expression {any}\n", .{self.peek_token()}); - - if (self.accept_parse(parse_function_definition)) |stmt| return stmt; - if (self.accept_parse(parse_type_definition)) |stmt| return stmt; - if (self.accept_parse(parse_struct_definition)) |stmt| return stmt; - if (self.accept_parse(parse_struct_instanciation)) |stmt| return stmt; - - // LPAREN (Expression) RPAREN - if (self.accept_token(tokenizer.TokenType.LPAREN)) |_| { - const expr = try self.parse_expression(); - _ = try self.parse_token(tokenizer.TokenType.RPAREN); - return expr; - } - - const token = self.consume_token() orelse return ParserError.ParsingError; - - return switch (token.type) { - .NULL => try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ .NULL = void{} }, - }), - .NUMBER => |number_token| try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ - .NUMBER = .{ - .value = number_token, - }, - }, - }), - .BOOLEAN => |boolean_token| try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ .BOOLEAN = .{ - .value = boolean_token, - } }, - }), - .CHAR => |char_token| try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ .CHAR = .{ - .value = char_token, - } }, - }), - .STRING => |string_token| try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ .STRING = .{ - .value = try self.arena.dupe(u8, string_token), - } }, - }), - .IDENTIFIER => |identifier_token| try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ - .IDENTIFIER = .{ - .name = try self.arena.dupe(u8, identifier_token), - .type = null, - }, - }, - }), - else => ParserError.ParsingError, - }; - } - - // FunctionDefinition ::= LPAREN FunctionParameters? RPAREN ARROW IDENTIFIER LBRACE Statement* ReturnStatement SEMICOLON RBRACE - fn parse_function_definition(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing function definition {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(tokenizer.TokenType.LPAREN); - - const parameters = try self.parse_function_parameters(); - - _ = try self.parse_token(tokenizer.TokenType.RPAREN); - - _ = try self.parse_token(tokenizer.TokenType.ARROW); - - const return_type = try self.parse_type(); - - _ = try self.parse_token(tokenizer.TokenType.LBRACE); - - var nodes = std.ArrayList(*Node).init(self.arena); - while (self.accept_parse(parse_statement)) |expression| { - try nodes.append(expression); - } - - if (nodes.items.len == 0 or nodes.getLast().STATEMENT.statement.* != .RETURN_STATEMENT) return ParserError.ParsingError; - - _ = try self.parse_token(tokenizer.TokenType.RBRACE); - - return self.create_node(.{ .FUNCTION_DEFINITION = .{ - .statements = nodes.items, - .parameters = parameters, - .return_type = return_type, - } }); - } - - // FunctionParameters ::= IDENTIFIER ":" Type ("," IDENTIFIER ":" Type)* - fn parse_function_parameters(self: *Parser) ParserError![]*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing function parameters {any}\n", .{self.peek_token()}); - - var node_list = std.ArrayList(*Node).init(self.arena); - - var first = true; - while (true) { - if (!first) { - _ = self.accept_token(tokenizer.TokenType.COMMA); - } - first = false; - const ident = self.accept_token(tokenizer.TokenType.IDENTIFIER) orelse return node_list.items; - - _ = try self.parse_token(tokenizer.TokenType.COLON); - const type_annotation = try self.parse_type(); - - try node_list.append(try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ - .IDENTIFIER = .{ - .name = try self.arena.dupe(u8, ident.type.IDENTIFIER), - .type = type_annotation, - }, - }, - })); - } - - return node_list.items; - } - - // TypeDefinition ::= "newtype" Type - fn parse_type_definition(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing type definition {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(tokenizer.TokenType.TYPE); - - const typ = try self.parse_type(); - - return self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "", - .underlying_type = typ, - }, - }, - }); - } - - // StructDefinition ::= "struct" LBRACE StructFields? RBRACE - fn parse_struct_definition(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing struct definition {any}\n", .{self.peek_token()}); - - // StructField ::= IDENTIFIER ":" Type - const parse_struct_field = struct { - fn call(iself: *Parser) ParserError!*Node { - const ident = try iself.parse_token(tokenizer.TokenType.IDENTIFIER); - _ = try iself.parse_token(tokenizer.TokenType.COLON); - const type_annotation = try iself.parse_type(); - - return iself.create_node(.{ - .PRIMARY_EXPRESSION = .{ - .IDENTIFIER = .{ - .name = try iself.arena.dupe(u8, ident.type.IDENTIFIER), - .type = type_annotation, - }, - }, - }); - } - }; - - _ = try self.parse_token(tokenizer.TokenType.STRUCT); - _ = try self.parse_token(tokenizer.TokenType.LBRACE); - - var fields = std.ArrayList(*Node).init(self.arena); - while (self.accept_parse(parse_struct_field.call)) |field| { - _ = self.accept_token(tokenizer.TokenType.COMMA); - try fields.append(field); - } - _ = try self.parse_token(tokenizer.TokenType.RBRACE); - - return self.create_node(.{ - .TYPE = .{ - .STRUCT_TYPE = .{ - .fields = fields.items, - }, - }, - }); - } - - // StructInstantiation ::= IDENTIFIER LBRACE RBRACE - fn parse_struct_instanciation(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing struct instanciation {any}\n", .{self.peek_token()}); - - const typ = try self.parse_token(tokenizer.TokenType.IDENTIFIER); - _ = try self.parse_token(tokenizer.TokenType.LBRACE); - _ = try self.parse_token(tokenizer.TokenType.RBRACE); - - return self.create_node(.{ - .STRUCT_INSTANCIATION = .{ - .typ = try self.arena.dupe(u8, typ.type.IDENTIFIER), - }, - }); - } - - // FieldAccess ::= PrimaryExpression DOT IDENTIFIER - fn parse_field_access(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing field access {any}\n", .{self.peek_token()}); - - const expression = try self.parse_primary_expression(); - _ = try self.parse_token(tokenizer.TokenType.DOT); - const ident = try self.parse_token(tokenizer.TokenType.IDENTIFIER); - - return self.create_node(.{ - .FIELD_ACCESS = .{ - .expression = expression, - .name = try self.arena.dupe(u8, ident.type.IDENTIFIER), - }, - }); - } - - // ReturnStatement ::= RETURN (Expression)? - fn parse_return_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing return statement {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(tokenizer.TokenType.RETURN); - - const maybe_expression = self.accept_parse(parse_expression); - - return self.create_node(.{ - .RETURN_STATEMENT = .{ - .expression = maybe_expression, - }, - }); - } - - // CastStatement ::= "cast" LPAREN TYPE "," Expression RPAREN - fn parse_cast_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing cast statement {any}\n", .{self.peek_token()}); - - const ident = try self.parse_token(tokenizer.TokenType.IDENTIFIER); - - if (!std.mem.eql(u8, "cast", ident.type.IDENTIFIER)) { - return ParserError.ParsingError; - } - - _ = try self.parse_token(tokenizer.TokenType.LPAREN); - - const typ = try self.parse_type(); - - _ = try self.parse_token(tokenizer.TokenType.COMMA); - - const expression = try self.parse_expression(); - - _ = try self.parse_token(tokenizer.TokenType.RPAREN); - - return self.create_node(.{ - .CAST_STATEMENT = .{ - .typ = typ, - .expression = expression, - }, - }); - } - - // SizeOfStatement ::= "sizeof" LPAREN TYPE RPAREN - fn parse_sizeof_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing sizeof statement {any}\n", .{self.peek_token()}); - - const ident = try self.parse_token(tokenizer.TokenType.IDENTIFIER); - - if (!std.mem.eql(u8, "sizeof", ident.type.IDENTIFIER)) { - return ParserError.ParsingError; - } - - _ = try self.parse_token(tokenizer.TokenType.LPAREN); - - const typ = try self.parse_type(); - - _ = try self.parse_token(tokenizer.TokenType.RPAREN); - - return self.create_node(.{ - .SIZEOF_STATEMENT = .{ - .typ = typ, - }, - }); - } - - // Type ::= IDENTIFIER | FunctionType - fn parse_type(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing type annotation {any}\n", .{self.peek_token()}); - - return self.accept_parse(parse_function_type) orelse switch (self.consume_token().?.type) { - .MUL => { - return self.create_node(.{ - .TYPE = .{ - .POINTER_TYPE = .{ - .type = try self.parse_type(), - }, - }, - }); - }, - .IDENTIFIER => |ident| { - //TODO: we should only accept specific type identifiers - return try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = try self.arena.dupe(u8, ident), - .underlying_type = null, - }, - }, - }); - }, - else => ParserError.ParsingError, - }; - } - - // FunctionType ::= LPAREN (Type ("," Type)*)? RPAREN ARROW Type - fn parse_function_type(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing function type {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(tokenizer.TokenType.LPAREN); - - var parameters = std.ArrayList(*Node).init(self.arena); - var first = true; - while (true) { - if (!first) { - _ = self.accept_token(tokenizer.TokenType.COMMA); - } - first = false; - const type_annotation = self.accept_parse(parse_type) orelse break; - try parameters.append(type_annotation); - } - - _ = try self.parse_token(tokenizer.TokenType.RPAREN); - - _ = try self.parse_token(tokenizer.TokenType.ARROW); - - const return_type = try self.parse_type(); - - return try self.create_node(.{ - .TYPE = .{ - .FUNCTION_TYPE = .{ - .parameters = parameters.items, - .return_type = return_type, - }, - }, - }); - } - - fn parse_token(self: *Parser, expected_token: std.meta.Tag(tokenizer.TokenType)) ParserError!tokenizer.Token { - errdefer if (!self.try_context) std.debug.print("Error accepting token: {any}\n", .{expected_token}); - const token = self.peek_token() orelse return ParserError.ParsingError; - - if (expected_token != std.meta.activeTag(token.type)) { - if (!self.try_context) std.debug.print("Expected {any} - found {any}\n", .{ expected_token, token }); - return ParserError.ParsingError; - } - - return self.consume_token() orelse unreachable; - } - - fn accept_parse(self: *Parser, parsing_func: *const fn (_: *Parser) ParserError!*Node) ?*Node { - const prev_offset = self.offset; - const prev_try_context = self.try_context; - self.try_context = true; - const node = parsing_func(self) catch { - self.offset = prev_offset; - self.try_context = prev_try_context; - return null; - }; - self.try_context = prev_try_context; - return node; - } - - fn accept_token(self: *Parser, token_type: std.meta.Tag(tokenizer.TokenType)) ?tokenizer.Token { - const curr_token = self.peek_token() orelse return null; - if (std.meta.activeTag(curr_token.type) == token_type) { - return self.consume_token(); - } - return null; - } - - fn consume_token(self: *Parser) ?tokenizer.Token { - if (self.offset >= self.tokens.len) return null; - - defer self.offset += 1; - - return self.tokens[self.offset]; - } - - fn peek_token(self: *Parser) ?tokenizer.Token { - if (self.offset >= self.tokens.len) return null; - - return self.tokens[self.offset]; - } - - fn create_node(self: *Parser, node_value: Node) !*Node { - const node = try self.arena.create(Node); - node.* = node_value; - return node; - } -}; - -test "parse print" { - const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{ - tokenizer.Token{ .PRINT = void{} }, - tokenizer.Token{ .LPAREN = void{} }, - tokenizer.Token{ .NUMBER = 7 }, - tokenizer.Token{ .RPAREN = void{} }, - tokenizer.Token{ .SEMICOLON = void{} }, - }); - var arena = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena.deinit(); - var parser = try Parser.init(tokens, arena.allocator()); - const actualNode = try parser.parse_print_statement(); - const expectedNode = Node{ .PRINT_STATEMENT = .{ - .expression = @constCast(&Node{ .EXPRESSION = .{ - .NUMBER = .{ .value = 7 }, - } }), - } }; - try std.testing.expectEqualDeep(&expectedNode, actualNode); -} - -test "parse identifier" { - const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{ - tokenizer.Token{ .IDENTIFIER = @constCast("i") }, - }); - var arena = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena.deinit(); - var parser = try Parser.init(tokens, arena.allocator()); - const actualNode = try parser.parse_expression(); - const expectedNode = Node{ .EXPRESSION = .{ - .IDENTIFIER = .{ - .name = @constCast("i"), - }, - } }; - try std.testing.expectEqualDeep(&expectedNode, actualNode); -} - -test "parse number" { - const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{ - tokenizer.Token{ .NUMBER = 12 }, - }); - var arena = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena.deinit(); - var parser = try Parser.init(tokens, arena.allocator()); - const actualNode = try parser.parse_expression(); - const expectedNode = Node{ .EXPRESSION = .{ - .NUMBER = .{ - .value = 12, - }, - } }; - try std.testing.expectEqualDeep(&expectedNode, actualNode); -} - -test "simple e2e" { - const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{ - tokenizer.Token{ .LET = void{} }, - tokenizer.Token{ .IDENTIFIER = @constCast("i") }, - tokenizer.Token{ .EQUALS = void{} }, - tokenizer.Token{ .NUMBER = 2 }, - tokenizer.Token{ .SEMICOLON = void{} }, - }); - - var arena = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena.deinit(); - var parser = try Parser.init(tokens, arena.allocator()); - const ast = try parser.parse(); - const expected_ast = Node{ .PROGRAM = .{ .statements = @constCast(&[_]*Node{@constCast(&Node{ .STATEMENT = .{ .statement = @constCast(&Node{ .ASSIGNMENT_STATEMENT = .{ - .is_declaration = true, - .name = @constCast("i"), - .expression = @constCast(&Node{ .EXPRESSION = .{ - .NUMBER = .{ .value = 2 }, - } }), - } }) } })}) } }; - try std.testing.expectEqualDeep(&expected_ast, ast); -} diff --git a/src/tokenizer.pry b/src/tokenizer.pry new file mode 100644 index 0000000..ddc2cef --- /dev/null +++ b/src/tokenizer.pry @@ -0,0 +1,553 @@ +extern strlen = (*i8) => i64; +extern memcpy = (*void, *void, i64) => void; +extern sprintf = (*i8, *i8, varargs) => void; +extern atoi = (*i8) => i64; + +import "!stdlib.pry"; +import "!mem.pry"; + +/* Keywords */ +let TOKEN_IMPORT = 1; +let TOKEN_LET = 2; +let TOKEN_EXTERN = 3; +let TOKEN_IF = 4; +let TOKEN_WHILE = 5; +let TOKEN_RETURN = 6; +let TOKEN_BREAK = 7; +let TOKEN_CONTINUE = 8; +let TOKEN_ARROW = 9; +let TOKEN_STRUCT = 10; +let TOKEN_TYPE = 34; + +/* Identifiers */ +let TOKEN_IDENTIFIER = 11; + +/* Literals */ +let TOKEN_NUMBER = 12; +let TOKEN_BOOLEAN = 13; +let TOKEN_NULL = 14; +let TOKEN_CHAR = 15; +let TOKEN_STRING = 16; + +/* Operators */ +let TOKEN_EQUALS = 17; +let TOKEN_PLUS = 18; +let TOKEN_MINUS = 19; +let TOKEN_MUL = 20; +let TOKEN_DIV = 21; +let TOKEN_MOD = 22; +let TOKEN_BANG = 23; +let TOKEN_LESS = 24; +let TOKEN_GREATER = 25; +let TOKEN_DOT = 26; + +/* Punctuation */ +let TOKEN_SEMICOLON = 27; +let TOKEN_COMMA = 28; +let TOKEN_COLON = 29; +let TOKEN_LPAREN = 30; +let TOKEN_RPAREN = 31; +let TOKEN_LBRACE = 32; +let TOKEN_RBRACE = 33; + +let token = struct { + type: i64, + data: *void, +}; + +let tokenizer = struct { + buf: *i8, + buf_len: i64, + offset: i64, + + arena: *arena, +}; + +let print_tokens = (ts: *token, ts_len: i64) => i64 { + let i = 0; + while i < ts_len { + let to = (*(ts + cast(*token, i))); + + if (to.type == TOKEN_IMPORT) { + printf("Import\n"); + }; + if (to.type == TOKEN_LET) { + printf("Let\n"); + }; + if (to.type == TOKEN_EXTERN) { + printf("Extern\n"); + }; + if (to.type == TOKEN_IF) { + printf("If\n"); + }; + if (to.type == TOKEN_WHILE) { + printf("While\n"); + }; + if (to.type == TOKEN_RETURN) { + printf("Return\n"); + }; + if (to.type == TOKEN_BREAK) { + printf("Break\n"); + }; + if (to.type == TOKEN_CONTINUE) { + printf("Continue\n"); + }; + if (to.type == TOKEN_ARROW) { + printf("Arrow\n"); + }; + if (to.type == TOKEN_STRUCT) { + printf("Struct\n"); + }; + if (to.type == TOKEN_TYPE) { + printf("Type\n"); + }; + if (to.type == TOKEN_IDENTIFIER) { + printf("Identifier: %s\n", cast(*i8, to.data)); + }; + if (to.type == TOKEN_NUMBER) { + printf("Number: %d\n", *cast(*i64, to.data)); + }; + if (to.type == TOKEN_BOOLEAN) { + printf("Boolean: %d\n", *cast(*bool, to.data)); + }; + if (to.type == TOKEN_NULL) { + printf("Null\n"); + }; + if (to.type == TOKEN_CHAR) { + printf("Char: %c\n", *cast(*i8, to.data)); + }; + if (to.type == TOKEN_STRING) { + printf("String: %s\n", cast(*i8, to.data)); + }; + if (to.type == TOKEN_EQUALS) { + printf("Equals\n"); + }; + if (to.type == TOKEN_PLUS) { + printf("Plus\n"); + }; + if (to.type == TOKEN_MINUS) { + printf("Minus\n"); + }; + if (to.type == TOKEN_MUL) { + printf("Mul\n"); + }; + if (to.type == TOKEN_DIV) { + printf("Div\n"); + }; + if (to.type == TOKEN_MOD) { + printf("Mod\n"); + }; + if (to.type == TOKEN_BANG) { + printf("Bang\n"); + }; + if (to.type == TOKEN_LESS) { + printf("Less\n"); + }; + if (to.type == TOKEN_GREATER) { + printf("Greater\n"); + }; + if (to.type == TOKEN_DOT) { + printf("Dot\n"); + }; + if (to.type == TOKEN_SEMICOLON) { + printf("Semicolon\n"); + }; + if (to.type == TOKEN_COMMA) { + printf("Comma\n"); + }; + if (to.type == TOKEN_COLON) { + printf("Colon\n"); + }; + if (to.type == TOKEN_LPAREN) { + printf("LParen\n"); + }; + if (to.type == TOKEN_RPAREN) { + printf("RParen\n"); + }; + if (to.type == TOKEN_LBRACE) { + printf("LBrace\n"); + }; + if (to.type == TOKEN_RBRACE) { + printf("RBrace\n"); + }; + + i = i + 1; + }; + + return 0; +}; + +let tokenizer_skip_whitespace = (t: *tokenizer) => void { + while true { + if (*t).offset >= (*t).buf_len { return; }; + let c = (*((*t).buf + cast(*i8, (*t).offset))); + if !iswhitespace(c) { + return; + }; + (*t).offset = (*t).offset + 1; + }; + + return; +}; + +let tokenizer_accept_string = (t: *tokenizer, str: *i8) => bool { + let str_len = strlen(str); + if (*t).offset + str_len > (*t).buf_len { return false; }; + + let s = cast(*i8, arena_alloc((*t).arena, 1000)); + memcpy(cast(*void, s), cast(*void, (*t).buf + cast(*i8, (*t).offset)), str_len); + + if strcmp(s, str) { + (*t).offset = (*t).offset + str_len; + return true; + }; + + return false; +}; + +let tokenizer_consume_until_condition = (t: *tokenizer, condition: (i8) => bool) => *i8 { + let start = (*t).offset; + let res = cast(*i8, arena_alloc((*t).arena, 1000)); + + while true { + if (*t).offset >= (*t).buf_len { + return res; + }; + + let c = (*((*t).buf + cast(*i8, (*t).offset))); + + let offset = (*t).offset; + if c == '\\' { + let next_c = (*((*t).buf + cast(*i8, offset + 1))); + + let any = false; + if next_c == 'n' { + (*(res + cast(*i8, offset - start))) = '\n'; + any = true; + }; + if next_c == 't' { + (*(res + cast(*i8, offset - start))) = '\t'; + any = true; + }; + if next_c == 'r' { + (*(res + cast(*i8, offset - start))) = '\r'; + any = true; + }; + if next_c == '0' { + (*(res + cast(*i8, offset - start))) = '\0'; + any = true; + }; + if next_c == '\\' { + (*(res + cast(*i8, offset - start))) = '\\'; + any = true; + }; + if !any { + (*(res + cast(*i8, offset - start))) = next_c; + }; + + offset = offset + 1; + offset = offset + 1; + (*t).offset = offset; + + continue; + }; + + if condition(c) { + return res; + }; + + (*(res + cast(*i8, offset - start))) = c; + (*(res + cast(*i8, offset - start + 1))) = '\0'; + + offset = offset + 1; + (*t).offset = offset; + }; + + return cast(*i8, null); +}; + +let tokenizer_accept_int_type = (t: *tokenizer) => *i64 { + let string = tokenizer_consume_until_condition(t, (c: i8) => bool { + return !isdigit(c); + }); + if string == cast(*i8, null) { + return cast(*i64, null); + }; + if strlen(string) == 0 { + return cast(*i64, null); + }; + let x = cast(*i64, arena_alloc((*t).arena, sizeof(i64))); + *x = atoi(string); + return x; +}; + +let tokenizer_accept_char_type = (t: *tokenizer) => *i8 { + let prev_offset = (*t).offset; + if !tokenizer_accept_string(t, "'") { + (*t).offset = prev_offset; + return cast(*i8, null); + }; + + let string = tokenizer_consume_until_condition(t, (c: i8) => bool { + return c == '\''; + }); + + if !tokenizer_accept_string(t, "'") { + (*t).offset = prev_offset; + return cast(*i8, null); + }; + + return string; +}; + +let tokenizer_accept_string_type = (t: *tokenizer) => *i8 { + let prev_offset = (*t).offset; + if !tokenizer_accept_string(t, "\"") { + (*t).offset = prev_offset; + return cast(*i8, null); + }; + + let string = tokenizer_consume_until_condition(t, (c: i8) => bool { + return c == '"'; + }); + + if !tokenizer_accept_string(t, "\"") { + (*t).offset = prev_offset; + return cast(*i8, null); + }; + + return string; +}; + +let tokenizer_skip_comments = (t: *tokenizer) => void { + if !tokenizer_accept_string(t, "/*") { return; }; + + while !tokenizer_accept_string(t, "*/") { + (*t).offset = (*t).offset + 1; + }; + + return; +}; + +let tokenizer_next = (t: *tokenizer) => *token { + tokenizer_skip_whitespace(t); + tokenizer_skip_comments(t); + tokenizer_skip_whitespace(t); + + if (*t).offset >= (*t).buf_len { + return cast(*token, null); + }; + + let to = cast(*token, arena_alloc((*t).arena, sizeof(token))); + + if tokenizer_accept_string(t, "import") { + (*to).type = TOKEN_IMPORT; + return to; + }; + if tokenizer_accept_string(t, "let") { + (*to).type = TOKEN_LET; + return to; + }; + if tokenizer_accept_string(t, "extern") { + (*to).type = TOKEN_EXTERN; + return to; + }; + if tokenizer_accept_string(t, "if") { + (*to).type = TOKEN_IF; + return to; + }; + if tokenizer_accept_string(t, "while") { + (*to).type = TOKEN_WHILE; + return to; + }; + if tokenizer_accept_string(t, "return") { + (*to).type = TOKEN_RETURN; + return to; + }; + if tokenizer_accept_string(t, "break") { + (*to).type = TOKEN_BREAK; + return to; + }; + if tokenizer_accept_string(t, "continue") { + (*to).type = TOKEN_CONTINUE; + return to; + }; + if tokenizer_accept_string(t, "true") { + (*to).type = TOKEN_BOOLEAN; + let data = cast(*bool, arena_alloc((*t).arena, sizeof(bool))); + *data = true; + (*to).data = cast(*void, data); + return to; + }; + if tokenizer_accept_string(t, "false") { + (*to).type = TOKEN_BOOLEAN; + let data = cast(*bool, arena_alloc((*t).arena, sizeof(bool))); + *data = false; + (*to).data = cast(*void, data); + return to; + }; + if tokenizer_accept_string(t, "null") { + (*to).type = TOKEN_NULL; + return to; + }; + if tokenizer_accept_string(t, "struct") { + (*to).type = TOKEN_STRUCT; + return to; + }; + if tokenizer_accept_string(t, "newtype") { + (*to).type = TOKEN_TYPE; + return to; + }; + + if tokenizer_accept_string(t, "=>") { + (*to).type = TOKEN_ARROW; + return to; + }; + if tokenizer_accept_string(t, ";") { + (*to).type = TOKEN_SEMICOLON; + return to; + }; + if tokenizer_accept_string(t, ",") { + (*to).type = TOKEN_COMMA; + return to; + }; + if tokenizer_accept_string(t, ":") { + (*to).type = TOKEN_COLON; + return to; + }; + if tokenizer_accept_string(t, "(") { + (*to).type = TOKEN_LPAREN; + return to; + }; + if tokenizer_accept_string(t, ")") { + (*to).type = TOKEN_RPAREN; + return to; + }; + if tokenizer_accept_string(t, "{") { + (*to).type = TOKEN_LBRACE; + return to; + }; + if tokenizer_accept_string(t, "}") { + (*to).type = TOKEN_RBRACE; + return to; + }; + if tokenizer_accept_string(t, "=") { + (*to).type = TOKEN_EQUALS; + return to; + }; + if tokenizer_accept_string(t, "+") { + (*to).type = TOKEN_PLUS; + return to; + }; + if tokenizer_accept_string(t, "-") { + (*to).type = TOKEN_MINUS; + return to; + }; + if tokenizer_accept_string(t, "*") { + (*to).type = TOKEN_MUL; + return to; + }; + if tokenizer_accept_string(t, "/") { + (*to).type = TOKEN_DIV; + return to; + }; + if tokenizer_accept_string(t, "%") { + (*to).type = TOKEN_MOD; + return to; + }; + if tokenizer_accept_string(t, "!") { + (*to).type = TOKEN_BANG; + return to; + }; + if tokenizer_accept_string(t, "<") { + (*to).type = TOKEN_LESS; + return to; + }; + if tokenizer_accept_string(t, ">") { + (*to).type = TOKEN_GREATER; + return to; + }; + if tokenizer_accept_string(t, ".") { + (*to).type = TOKEN_DOT; + return to; + }; + + let maybe_int = tokenizer_accept_int_type(t); + if maybe_int != cast(*i64, null) { + (*to).type = TOKEN_NUMBER; + (*to).data = cast(*void, maybe_int); + return to; + }; + + let maybe_char = tokenizer_accept_char_type(t); + if maybe_char != cast(*i8, null) { + (*to).type = TOKEN_CHAR; + (*to).data = cast(*void, maybe_char); + return to; + }; + + let maybe_string = tokenizer_accept_string_type(t); + if maybe_string != cast(*i8, null) { + (*to).type = TOKEN_STRING; + (*to).data = cast(*void, maybe_string); + return to; + }; + + let string = tokenizer_consume_until_condition(t, (c: i8) => bool { + if isalphanum(c) { + return false; + }; + if c == '_' { + return false; + }; + return true; + }); + if strlen(string) == 0 { + printf("NO IDENT!\n"); + return cast(*token, null); + }; + + (*to).type = TOKEN_IDENTIFIER; + (*to).data = cast(*void, string); + + return to; +}; + +let tokenizer_init = (alloc: *arena, file: slice) => *tokenizer { + let t = cast(*tokenizer, arena_alloc(alloc, sizeof(tokenizer))); + (*t).arena = alloc; + (*t).offset = 0; + (*t).buf = cast(*i8, file.data); + (*t).buf_len = file.data_len; + + printf("File size: %d\n", (*t).buf_len); + + printf("%s\n", (*t).buf); + + return t; +}; + +let tokenizer_tokenize = (t: *tokenizer) => slice { + let tokens = cast(*token, arena_alloc((*t).arena, sizeof(token) * 40000)); /* why does it not care about type here */ + let tokens_len = 0; + + while true { + let tk = tokenizer_next(t); + if tk == cast(*token, null) { + break; + }; + printf("Add token: %d\n", (*tk).type); + + (*(tokens + cast(*token, tokens_len))) = *tk; + tokens_len = tokens_len + 1; + }; + + printf("PRINT TOKENS: %d\n", tokens_len); + + print_tokens(tokens, tokens_len); + + let res = slice{}; + res.data = cast(*void, tokens); + res.data_len = tokens_len; + return res; +}; diff --git a/src/tokenizer.zig b/src/tokenizer.zig deleted file mode 100644 index 5dacc75..0000000 --- a/src/tokenizer.zig +++ /dev/null @@ -1,327 +0,0 @@ -const std = @import("std"); - -const TokenizerError = error{ - TokenizingError, -}; - -pub const TokenType = union(enum) { - // Keywords - IMPORT: void, - LET: void, - EXTERN: void, - IF: void, - WHILE: void, - RETURN: void, - BREAK: void, - CONTINUE: void, - ARROW: void, - STRUCT: void, - TYPE: void, - - // Identifiers - IDENTIFIER: []u8, - - // Literals - NUMBER: i64, - BOOLEAN: bool, - NULL: void, - CHAR: u8, - STRING: []u8, - - // Operators - EQUALS: void, - PLUS: void, - MINUS: void, - MUL: void, - DIV: void, - MOD: void, - BANG: void, - LESS: void, - GREATER: void, - DOT: void, - - // Punctuation - SEMICOLON: void, - COMMA: void, - COLON: void, - LPAREN: void, - RPAREN: void, - LBRACE: void, - RBRACE: void, -}; - -const TokenLocation = struct { - col: u64, - row: u64, -}; - -pub const Token = struct { - location: TokenLocation, - offset: u64, - type: TokenType, -}; - -pub const Tokenizer = struct { - buf: []u8, - offset: u64, - - arena: std.mem.Allocator, - - pub fn init(buf: []u8, arena: std.mem.Allocator) !Tokenizer { - return Tokenizer{ .buf = buf, .offset = 0, .arena = arena }; - } - - pub fn tokenize(self: *Tokenizer) ![]Token { - var token_list = std.ArrayList(Token).init(self.arena); - - while (try self.next()) |token| { - std.debug.print("{any}\n", .{token}); - try token_list.append(token); - } - - return token_list.items; - } - - fn next(self: *Tokenizer) TokenizerError!?Token { - self.skip_whitespace(); - self.skip_comments(); - self.skip_whitespace(); - - if (self.offset >= self.buf.len) return null; - - if (self.accept_string("import")) return self.create_token(.{ .IMPORT = void{} }); - - if (self.accept_string("let")) return self.create_token(.{ .LET = void{} }); - if (self.accept_string("extern")) return self.create_token(.{ .EXTERN = void{} }); - if (self.accept_string("if")) return self.create_token(.{ .IF = void{} }); - if (self.accept_string("while")) return self.create_token(.{ .WHILE = void{} }); - if (self.accept_string("return")) return self.create_token(.{ .RETURN = void{} }); - if (self.accept_string("break")) return self.create_token(.{ .BREAK = void{} }); - if (self.accept_string("continue")) return self.create_token(.{ .CONTINUE = void{} }); - if (self.accept_string("true")) return self.create_token(.{ .BOOLEAN = true }); - if (self.accept_string("false")) return self.create_token(.{ .BOOLEAN = false }); - if (self.accept_string("null")) return self.create_token(.{ .NULL = void{} }); - if (self.accept_string("struct")) return self.create_token(.{ .STRUCT = void{} }); - if (self.accept_string("newtype")) return self.create_token(.{ .TYPE = void{} }); - - if (self.accept_string("=>")) return self.create_token(.{ .ARROW = void{} }); - if (self.accept_string(";")) return self.create_token(.{ .SEMICOLON = void{} }); - if (self.accept_string(",")) return self.create_token(.{ .COMMA = void{} }); - if (self.accept_string(":")) return self.create_token(.{ .COLON = void{} }); - if (self.accept_string("(")) return self.create_token(.{ .LPAREN = void{} }); - if (self.accept_string(")")) return self.create_token(.{ .RPAREN = void{} }); - if (self.accept_string("{")) return self.create_token(.{ .LBRACE = void{} }); - if (self.accept_string("}")) return self.create_token(.{ .RBRACE = void{} }); - if (self.accept_string("=")) return self.create_token(.{ .EQUALS = void{} }); - if (self.accept_string("+")) return self.create_token(.{ .PLUS = void{} }); - if (self.accept_string("-")) return self.create_token(.{ .MINUS = void{} }); - if (self.accept_string("*")) return self.create_token(.{ .MUL = void{} }); - if (self.accept_string("/")) return self.create_token(.{ .DIV = void{} }); - if (self.accept_string("%")) return self.create_token(.{ .MOD = void{} }); - if (self.accept_string("!")) return self.create_token(.{ .BANG = void{} }); - if (self.accept_string("<")) return self.create_token(.{ .LESS = void{} }); - if (self.accept_string(">")) return self.create_token(.{ .GREATER = void{} }); - if (self.accept_string(".")) return self.create_token(.{ .DOT = void{} }); - - if (self.accept_int_type()) |i| return self.create_token(.{ .NUMBER = i }); - if (self.accept_char_type()) |c| return self.create_token(.{ .CHAR = c }); - if (self.accept_string_type()) |s| return self.create_token(.{ .STRING = s }); - - const string = self.consume_until_condition(struct { - fn condition(c: u8) bool { - return !std.ascii.isAlphanumeric(c) and c != '_'; - } - }.condition); - if (string.len == 0) return TokenizerError.TokenizingError; - - return self.create_token(.{ .IDENTIFIER = string }); - } - - fn skip_comments(self: *Tokenizer) void { - if (!self.accept_string("/*")) return; - - while (!self.accept_string("*/")) { - self.offset += 1; - } - } - - fn skip_whitespace(self: *Tokenizer) void { - while (true) { - if (self.offset >= self.buf.len) return; - const c = self.buf[self.offset]; - if (!std.ascii.isWhitespace(c)) return; - self.offset += 1; - } - } - - fn consume_until_condition(self: *Tokenizer, condition: fn (c: u8) bool) []u8 { - var res = std.ArrayList(u8).init(self.arena); - while (true) : (self.offset += 1) { - if (self.offset >= self.buf.len) { - return res.items; - } - - const c = self.buf[self.offset]; - - if (c == '\\') { - const next_c = self.buf[self.offset + 1]; - res.append(switch (next_c) { - 'n' => '\n', - 't' => '\t', - 'r' => '\r', - '0' => 0, - '\\' => '\\', - else => |x| x, - }) catch unreachable; - self.offset += 1; - continue; - } - - if (condition(c)) { - return res.items; - } - - res.append(c) catch unreachable; - } - return res.items; - } - - fn accept_string(self: *Tokenizer, substr: []const u8) bool { - if (self.offset + substr.len > self.buf.len) return false; - if (std.mem.eql(u8, self.buf[self.offset .. self.offset + substr.len], substr)) { - self.offset += substr.len; - return true; - } - return false; - } - - fn accept_int_type(self: *Tokenizer) ?i64 { - const res = self.consume_until_condition(struct { - fn condition(c: u8) bool { - return !std.ascii.isDigit(c); - } - }.condition); - - return std.fmt.parseInt(i64, res, 10) catch null; - } - - fn accept_char_type(self: *Tokenizer) ?u8 { - const prev_offset = self.offset; - if (!self.accept_string("'")) { - self.offset = prev_offset; - return null; - } - - const string = self.consume_until_condition(struct { - fn condition(c: u8) bool { - return c == '\''; - } - }.condition); - - std.debug.assert(string.len == 1); - - if (!self.accept_string("'")) { - self.offset = prev_offset; - return null; - } - - return string[0]; - } - - fn accept_string_type(self: *Tokenizer) ?[]u8 { - const prev_offset = self.offset; - if (!self.accept_string("\"")) { - self.offset = prev_offset; - return null; - } - - const string = self.consume_until_condition(struct { - fn condition(c: u8) bool { - return c == '"'; - } - }.condition); - - if (!self.accept_string("\"")) { - self.offset = prev_offset; - return null; - } - - return string; - } - - fn create_token(self: *Tokenizer, token_type: TokenType) Token { - return Token{ - .location = self.compute_location(), - .offset = self.offset - 1, - .type = token_type, - }; - } - - fn compute_location(self: *Tokenizer) TokenLocation { - var location = TokenLocation{ .col = 1, .row = 1 }; - - var i: usize = 0; - while (i < self.offset) : (i += 1) { - if (self.buf[i] == '\n') { - location.row += 1; - location.col = 1; - } else { - location.col += 1; - } - } - - // We need to do this because we call this fn after we consume the token - location.row -= 1; - location.col -= 1; - - return location; - } -}; - -test "simple" { - const tests = [_]struct { - buf: []u8, - tokens: []const Token, - }{ - .{ - .buf = @constCast( - \\ let i = 2; - \\ - \\ print(i); - ), - .tokens = &[_]Token{ - Token{ .LET = {} }, - Token{ .IDENTIFIER = @constCast("i") }, - Token{ .EQUALS = {} }, - Token{ .NUMBER = 2 }, - Token{ .SEMICOLON = {} }, - Token{ .PRINT = {} }, - Token{ .LPAREN = {} }, - Token{ .IDENTIFIER = @constCast("i") }, - Token{ .RPAREN = {} }, - Token{ .SEMICOLON = {} }, - }, - }, - .{ - .buf = @constCast( - \\ - \\ let hello - ), - .tokens = &[_]Token{ - Token{ .LET = {} }, - Token{ .IDENTIFIER = @constCast("hello") }, - }, - }, - }; - - for (tests) |t| { - var token_list = std.ArrayList(Token).init(std.testing.allocator); - defer token_list.deinit(); - - var tokenizer = try Tokenizer.init(t.buf); - while (try tokenizer.next()) |token| { - try token_list.append(token); - } - try std.testing.expectEqualDeep(t.tokens, token_list.items); - } -} -- cgit 1.4.1