From fa92a157746ae17f295d31b7a047dfeb99624a13 Mon Sep 17 00:00:00 2001 From: Baitinq Date: Wed, 11 Jun 2025 00:16:17 +0200 Subject: Misc: Rename lang --- src/bootstrap/codegen.pry | 187 +++++++++++++++ src/bootstrap/codegen.src | 187 --------------- src/bootstrap/llvm.pry | 289 +++++++++++++++++++++++ src/bootstrap/llvm.src | 289 ----------------------- src/bootstrap/main.pry | 69 ++++++ src/bootstrap/main.src | 69 ------ src/bootstrap/parser.pry | 530 ++++++++++++++++++++++++++++++++++++++++++ src/bootstrap/parser.src | 530 ------------------------------------------ src/bootstrap/tokenizer.pry | 553 ++++++++++++++++++++++++++++++++++++++++++++ src/bootstrap/tokenizer.src | 553 -------------------------------------------- 10 files changed, 1628 insertions(+), 1628 deletions(-) create mode 100644 src/bootstrap/codegen.pry delete mode 100644 src/bootstrap/codegen.src create mode 100644 src/bootstrap/llvm.pry delete mode 100644 src/bootstrap/llvm.src create mode 100644 src/bootstrap/main.pry delete mode 100644 src/bootstrap/main.src create mode 100644 src/bootstrap/parser.pry delete mode 100644 src/bootstrap/parser.src create mode 100644 src/bootstrap/tokenizer.pry delete mode 100644 src/bootstrap/tokenizer.src (limited to 'src/bootstrap') diff --git a/src/bootstrap/codegen.pry b/src/bootstrap/codegen.pry new file mode 100644 index 0000000..7293c09 --- /dev/null +++ b/src/bootstrap/codegen.pry @@ -0,0 +1,187 @@ +import "llvm.pry"; + +let codegen = struct { + llvm_module: LLVMModuleRef, + llvm_context: LLVMContextRef, + builder: LLVMBuilderRef, + arena: *arena, +}; + +let codegen_init = (alloc: *arena) => *codegen { + LLVMInitializeAllTargetInfos(); + LLVMInitializeAllTargetMCs(); + LLVMInitializeAllTargets(); + LLVMInitializeAllAsmPrinters(); + LLVMInitializeAllAsmParsers(); + + let module = LLVMModuleCreateWithName("module"); + let context = LLVMGetGlobalContext(); + let builder = LLVMCreateBuilder(); + + let c = cast(*codegen, arena_alloc(alloc, sizeof(codegen))); + + (*c).llvm_module = module; + (*c).llvm_context = context; + (*c).builder = builder; + (*c).arena = alloc; + + return c; +}; + +let create_node = (c: *codegen, n: Node) => *Node { + let res = cast(*Node, arena_alloc((*c).arena, sizeof(Node))); + *res = n; + return res; +}; + +let Variable = struct { + value: LLVMValueRef, + type: LLVMTypeRef, + node: *Node, + node_type: *Node, + stack_level: *i64, +}; + +let codegen_create_variable = (c: *codegen, variable: Variable) => *Variable { + let v = cast(*Variable, arena_alloc((*c).arena, sizeof(Variable))); + *v = variable; + return v; +}; + +let codegen_generate_literal = (c: *codegen, literal_val: LLVMValueRef, name: *i8, node: *Node, node_type: *Node) => *Variable { + /* TODO: Global */ + let v = Variable{}; + v.value = literal_val; + v.type = cast(LLVMTypeRef, null); + v.stack_level = cast(*i64, null); + v.node = node; + v.node_type = node_type; + return codegen_create_variable(c, v); +}; + +let codegen_generate_expression_value = (c: *codegen, expression: *Node, name: *i8) => *Variable { + if ((*expression).type == NODE_PRIMARY_EXPRESSION_NUMBER) { + let n = (*cast(*NODE_PRIMARY_EXPRESSION_NUMBER_DATA, (*expression).data)).value; + + printf("X: %d\n", n); + + let node_type = Node{}; + node_type.type = NODE_TYPE_SIMPLE_TYPE; + + let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*d).name = "i64"; + (*d).underlying_type = cast(*Node, null); + node_type.data = cast(*void, d); + + return codegen_generate_literal(c, LLVMConstInt(LLVMInt64Type(), n, 0), name, expression, create_node(c, node_type)); + }; + + if ((*expression).type == NODE_FUNCTION_DEFINITION) { + printf("ASS %d\n", (*expression).type); + assert(false); /* TODO */ + printf("ERT\n"); + }; + + assert(false); + + return cast(*Variable, null); +}; + +let codegen_generate_assignment_statement = (c: *codegen, stmt: *NODE_ASSIGNMENT_STATEMENT_DATA) => i64 { + let lhs = *((*stmt).lhs); + let prhs = (*stmt).rhs; + + if (lhs.type == NODE_PRIMARY_EXPRESSION_IDENTIFIER) { + let identifier = (*cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, lhs.data)).name; + printf("XX %s\n", identifier); + let variable = codegen_generate_expression_value(c, prhs, identifier); + assert(variable != cast(*Variable, null)); + return 0; + }; + + assert(false); + return 0; +}; + +let codegen_generate_statement = (c: *codegen, statement: *Node) => i64 { + let stmt = *statement; + assert(stmt.type == NODE_ASSIGNMENT_STATEMENT); /* TODO: generate other node types */ + + let res = codegen_generate_assignment_statement(c, cast(*NODE_ASSIGNMENT_STATEMENT_DATA, stmt.data)); + if res != 0 { + return 1; + }; + printf("STMT: %d\n", stmt.type); + return 0; +}; + +let codegen_generate = (c: *codegen, ast: *Node) => i64 { + assert((*ast).type == NODE_PROGRAM); + + let program = *cast(*NODE_PROGRAM_DATA, (*ast).data); + + let i = 0; + while i < program.statements_len { + let stmt = *(program.statements + cast(**Node, i)); + + let res = codegen_generate_statement(c, stmt); + if res != 0 { + return 1; + }; + + i = i + 1; + }; + + return 0; +}; + +let codegen_compile = (c: *codegen) => i64 { + /* Dump module */ + LLVMDumpModule((*c).llvm_module); + + /* Generate code */ + let triple = LLVMGetDefaultTargetTriple(); + let target_ref = cast(*LLVMTargetRef, arena_alloc((*c).arena, sizeof(*LLVMTargetRef))); + let message = cast(**i8, null); + let result = LLVMGetTargetFromTriple(triple, target_ref, message); + if result != 0 { + printf("Target output: %s\n", *message); + LLVMDisposeMessage(*message); + }; + let target_machine = LLVMCreateTargetMachine( + *target_ref, + triple, + "", + "", + LLVMCodeGenLevelDefault, + LLVMRelocDefault, + LLVMCodeModelDefault, + ); + LLVMDisposeMessage(triple); + result = LLVMVerifyModule((*c).llvm_module, LLVMAbortProcessAction, message); + if result != 0 { + printf("Verification output: %s\n", *message); + LLVMDisposeMessage(*message); + }; + + /* Generate the object file */ + let filename = "bootstrap_output.o"; + LLVMTargetMachineEmitToFile( + target_machine, + (*c).llvm_module, + filename, + LLVMObjectFile, + cast(**i8, null), + ); + LLVMDisposeTargetMachine(target_machine); + printf("Object file generated: %s\n", filename); + + return 0; +}; + +let codegen_deinit = (c: *codegen) => void { + LLVMDisposeModule((*c).llvm_module); + LLVMShutdown(); + LLVMDisposeBuilder((*c).builder); + return; +}; diff --git a/src/bootstrap/codegen.src b/src/bootstrap/codegen.src deleted file mode 100644 index 7059508..0000000 --- a/src/bootstrap/codegen.src +++ /dev/null @@ -1,187 +0,0 @@ -import "llvm.src"; - -let codegen = struct { - llvm_module: LLVMModuleRef, - llvm_context: LLVMContextRef, - builder: LLVMBuilderRef, - arena: *arena, -}; - -let codegen_init = (alloc: *arena) => *codegen { - LLVMInitializeAllTargetInfos(); - LLVMInitializeAllTargetMCs(); - LLVMInitializeAllTargets(); - LLVMInitializeAllAsmPrinters(); - LLVMInitializeAllAsmParsers(); - - let module = LLVMModuleCreateWithName("module"); - let context = LLVMGetGlobalContext(); - let builder = LLVMCreateBuilder(); - - let c = cast(*codegen, arena_alloc(alloc, sizeof(codegen))); - - (*c).llvm_module = module; - (*c).llvm_context = context; - (*c).builder = builder; - (*c).arena = alloc; - - return c; -}; - -let create_node = (c: *codegen, n: Node) => *Node { - let res = cast(*Node, arena_alloc((*c).arena, sizeof(Node))); - *res = n; - return res; -}; - -let Variable = struct { - value: LLVMValueRef, - type: LLVMTypeRef, - node: *Node, - node_type: *Node, - stack_level: *i64, -}; - -let codegen_create_variable = (c: *codegen, variable: Variable) => *Variable { - let v = cast(*Variable, arena_alloc((*c).arena, sizeof(Variable))); - *v = variable; - return v; -}; - -let codegen_generate_literal = (c: *codegen, literal_val: LLVMValueRef, name: *i8, node: *Node, node_type: *Node) => *Variable { - /* TODO: Global */ - let v = Variable{}; - v.value = literal_val; - v.type = cast(LLVMTypeRef, null); - v.stack_level = cast(*i64, null); - v.node = node; - v.node_type = node_type; - return codegen_create_variable(c, v); -}; - -let codegen_generate_expression_value = (c: *codegen, expression: *Node, name: *i8) => *Variable { - if ((*expression).type == NODE_PRIMARY_EXPRESSION_NUMBER) { - let n = (*cast(*NODE_PRIMARY_EXPRESSION_NUMBER_DATA, (*expression).data)).value; - - printf("X: %d\n", n); - - let node_type = Node{}; - node_type.type = NODE_TYPE_SIMPLE_TYPE; - - let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*c).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); - (*d).name = "i64"; - (*d).underlying_type = cast(*Node, null); - node_type.data = cast(*void, d); - - return codegen_generate_literal(c, LLVMConstInt(LLVMInt64Type(), n, 0), name, expression, create_node(c, node_type)); - }; - - if ((*expression).type == NODE_FUNCTION_DEFINITION) { - printf("ASS %d\n", (*expression).type); - assert(false); /* TODO */ - printf("ERT\n"); - }; - - assert(false); - - return cast(*Variable, null); -}; - -let codegen_generate_assignment_statement = (c: *codegen, stmt: *NODE_ASSIGNMENT_STATEMENT_DATA) => i64 { - let lhs = *((*stmt).lhs); - let prhs = (*stmt).rhs; - - if (lhs.type == NODE_PRIMARY_EXPRESSION_IDENTIFIER) { - let identifier = (*cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, lhs.data)).name; - printf("XX %s\n", identifier); - let variable = codegen_generate_expression_value(c, prhs, identifier); - assert(variable != cast(*Variable, null)); - return 0; - }; - - assert(false); - return 0; -}; - -let codegen_generate_statement = (c: *codegen, statement: *Node) => i64 { - let stmt = *statement; - assert(stmt.type == NODE_ASSIGNMENT_STATEMENT); /* TODO: generate other node types */ - - let res = codegen_generate_assignment_statement(c, cast(*NODE_ASSIGNMENT_STATEMENT_DATA, stmt.data)); - if res != 0 { - return 1; - }; - printf("STMT: %d\n", stmt.type); - return 0; -}; - -let codegen_generate = (c: *codegen, ast: *Node) => i64 { - assert((*ast).type == NODE_PROGRAM); - - let program = *cast(*NODE_PROGRAM_DATA, (*ast).data); - - let i = 0; - while i < program.statements_len { - let stmt = *(program.statements + cast(**Node, i)); - - let res = codegen_generate_statement(c, stmt); - if res != 0 { - return 1; - }; - - i = i + 1; - }; - - return 0; -}; - -let codegen_compile = (c: *codegen) => i64 { - /* Dump module */ - LLVMDumpModule((*c).llvm_module); - - /* Generate code */ - let triple = LLVMGetDefaultTargetTriple(); - let target_ref = cast(*LLVMTargetRef, arena_alloc((*c).arena, sizeof(*LLVMTargetRef))); - let message = cast(**i8, null); - let result = LLVMGetTargetFromTriple(triple, target_ref, message); - if result != 0 { - printf("Target output: %s\n", *message); - LLVMDisposeMessage(*message); - }; - let target_machine = LLVMCreateTargetMachine( - *target_ref, - triple, - "", - "", - LLVMCodeGenLevelDefault, - LLVMRelocDefault, - LLVMCodeModelDefault, - ); - LLVMDisposeMessage(triple); - result = LLVMVerifyModule((*c).llvm_module, LLVMAbortProcessAction, message); - if result != 0 { - printf("Verification output: %s\n", *message); - LLVMDisposeMessage(*message); - }; - - /* Generate the object file */ - let filename = "bootstrap_output.o"; - LLVMTargetMachineEmitToFile( - target_machine, - (*c).llvm_module, - filename, - LLVMObjectFile, - cast(**i8, null), - ); - LLVMDisposeTargetMachine(target_machine); - printf("Object file generated: %s\n", filename); - - return 0; -}; - -let codegen_deinit = (c: *codegen) => void { - LLVMDisposeModule((*c).llvm_module); - LLVMShutdown(); - LLVMDisposeBuilder((*c).builder); - return; -}; diff --git a/src/bootstrap/llvm.pry b/src/bootstrap/llvm.pry new file mode 100644 index 0000000..8cd600f --- /dev/null +++ b/src/bootstrap/llvm.pry @@ -0,0 +1,289 @@ +extern LLVMInitializeAArch64TargetInfo = () => void; +extern LLVMInitializeAMDGPUTargetInfo = () => void; +extern LLVMInitializeARMTargetInfo = () => void; +extern LLVMInitializeAVRTargetInfo = () => void; +extern LLVMInitializeBPFTargetInfo = () => void; +extern LLVMInitializeHexagonTargetInfo = () => void; +extern LLVMInitializeLanaiTargetInfo = () => void; +extern LLVMInitializeLoongArchTargetInfo = () => void; +extern LLVMInitializeMipsTargetInfo = () => void; +extern LLVMInitializeMSP430TargetInfo = () => void; +extern LLVMInitializeNVPTXTargetInfo = () => void; +extern LLVMInitializePowerPCTargetInfo = () => void; +extern LLVMInitializeRISCVTargetInfo = () => void; +extern LLVMInitializeSparcTargetInfo = () => void; +extern LLVMInitializeSystemZTargetInfo = () => void; +extern LLVMInitializeVETargetInfo = () => void; +extern LLVMInitializeWebAssemblyTargetInfo = () => void; +extern LLVMInitializeX86TargetInfo = () => void; +extern LLVMInitializeXCoreTargetInfo = () => void; + +let LLVMInitializeAllTargetInfos = () => void { + LLVMInitializeAArch64TargetInfo(); + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeARMTargetInfo(); + LLVMInitializeAVRTargetInfo(); + LLVMInitializeBPFTargetInfo(); + LLVMInitializeHexagonTargetInfo(); + LLVMInitializeLanaiTargetInfo(); + LLVMInitializeLoongArchTargetInfo(); + LLVMInitializeMipsTargetInfo(); + LLVMInitializeMSP430TargetInfo(); + LLVMInitializeNVPTXTargetInfo(); + LLVMInitializePowerPCTargetInfo(); + LLVMInitializeRISCVTargetInfo(); + LLVMInitializeSparcTargetInfo(); + LLVMInitializeSystemZTargetInfo(); + LLVMInitializeVETargetInfo(); + LLVMInitializeWebAssemblyTargetInfo(); + LLVMInitializeX86TargetInfo(); + LLVMInitializeXCoreTargetInfo(); + return; +}; + +extern LLVMInitializeAArch64Target = () => void; +extern LLVMInitializeAMDGPUTarget = () => void; +extern LLVMInitializeARMTarget = () => void; +extern LLVMInitializeAVRTarget = () => void; +extern LLVMInitializeBPFTarget = () => void; +extern LLVMInitializeHexagonTarget = () => void; +extern LLVMInitializeLanaiTarget = () => void; +extern LLVMInitializeLoongArchTarget = () => void; +extern LLVMInitializeMipsTarget = () => void; +extern LLVMInitializeMSP430Target = () => void; +extern LLVMInitializeNVPTXTarget = () => void; +extern LLVMInitializePowerPCTarget = () => void; +extern LLVMInitializeRISCVTarget = () => void; +extern LLVMInitializeSparcTarget = () => void; +extern LLVMInitializeSystemZTarget = () => void; +extern LLVMInitializeVETarget = () => void; +extern LLVMInitializeWebAssemblyTarget = () => void; +extern LLVMInitializeX86Target = () => void; +extern LLVMInitializeXCoreTarget = () => void; + +let LLVMInitializeAllTargets = () => void { + LLVMInitializeAArch64Target(); + LLVMInitializeAMDGPUTarget(); + LLVMInitializeARMTarget(); + LLVMInitializeAVRTarget(); + LLVMInitializeBPFTarget(); + LLVMInitializeHexagonTarget(); + LLVMInitializeLanaiTarget(); + LLVMInitializeLoongArchTarget(); + LLVMInitializeMipsTarget(); + LLVMInitializeMSP430Target(); + LLVMInitializeNVPTXTarget(); + LLVMInitializePowerPCTarget(); + LLVMInitializeRISCVTarget(); + LLVMInitializeSparcTarget(); + LLVMInitializeSystemZTarget(); + LLVMInitializeVETarget(); + LLVMInitializeWebAssemblyTarget(); + LLVMInitializeX86Target(); + LLVMInitializeXCoreTarget(); + return; +}; + +extern LLVMInitializeAArch64TargetMC = () => void; +extern LLVMInitializeAMDGPUTargetMC = () => void; +extern LLVMInitializeARMTargetMC = () => void; +extern LLVMInitializeAVRTargetMC = () => void; +extern LLVMInitializeBPFTargetMC = () => void; +extern LLVMInitializeHexagonTargetMC = () => void; +extern LLVMInitializeLanaiTargetMC = () => void; +extern LLVMInitializeLoongArchTargetMC = () => void; +extern LLVMInitializeMipsTargetMC = () => void; +extern LLVMInitializeMSP430TargetMC = () => void; +extern LLVMInitializeNVPTXTargetMC = () => void; +extern LLVMInitializePowerPCTargetMC = () => void; +extern LLVMInitializeRISCVTargetMC = () => void; +extern LLVMInitializeSparcTargetMC = () => void; +extern LLVMInitializeSystemZTargetMC = () => void; +extern LLVMInitializeVETargetMC = () => void; +extern LLVMInitializeWebAssemblyTargetMC = () => void; +extern LLVMInitializeX86TargetMC = () => void; +extern LLVMInitializeXCoreTargetMC = () => void; + +let LLVMInitializeAllTargetMCs = () => void { + LLVMInitializeAArch64TargetMC(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeARMTargetMC(); + LLVMInitializeAVRTargetMC(); + LLVMInitializeBPFTargetMC(); + LLVMInitializeHexagonTargetMC(); + LLVMInitializeLanaiTargetMC(); + LLVMInitializeLoongArchTargetMC(); + LLVMInitializeMipsTargetMC(); + LLVMInitializeMSP430TargetMC(); + LLVMInitializeNVPTXTargetMC(); + LLVMInitializePowerPCTargetMC(); + LLVMInitializeRISCVTargetMC(); + LLVMInitializeSparcTargetMC(); + LLVMInitializeSystemZTargetMC(); + LLVMInitializeVETargetMC(); + LLVMInitializeWebAssemblyTargetMC(); + LLVMInitializeX86TargetMC(); + LLVMInitializeXCoreTargetMC(); + return; +}; + +extern LLVMInitializeAArch64AsmPrinter = () => void; +extern LLVMInitializeAMDGPUAsmPrinter = () => void; +extern LLVMInitializeARMAsmPrinter = () => void; +extern LLVMInitializeAVRAsmPrinter = () => void; +extern LLVMInitializeBPFAsmPrinter = () => void; +extern LLVMInitializeHexagonAsmPrinter = () => void; +extern LLVMInitializeLanaiAsmPrinter = () => void; +extern LLVMInitializeLoongArchAsmPrinter = () => void; +extern LLVMInitializeMipsAsmPrinter = () => void; +extern LLVMInitializeMSP430AsmPrinter = () => void; +extern LLVMInitializeNVPTXAsmPrinter = () => void; +extern LLVMInitializePowerPCAsmPrinter = () => void; +extern LLVMInitializeRISCVAsmPrinter = () => void; +extern LLVMInitializeSparcAsmPrinter = () => void; +extern LLVMInitializeSystemZAsmPrinter = () => void; +extern LLVMInitializeVEAsmPrinter = () => void; +extern LLVMInitializeWebAssemblyAsmPrinter = () => void; +extern LLVMInitializeX86AsmPrinter = () => void; +extern LLVMInitializeXCoreAsmPrinter = () => void; + +let LLVMInitializeAllAsmPrinters = () => void { + LLVMInitializeAArch64AsmPrinter(); + LLVMInitializeAMDGPUAsmPrinter(); + LLVMInitializeARMAsmPrinter(); + LLVMInitializeAVRAsmPrinter(); + LLVMInitializeBPFAsmPrinter(); + LLVMInitializeHexagonAsmPrinter(); + LLVMInitializeLanaiAsmPrinter(); + LLVMInitializeLoongArchAsmPrinter(); + LLVMInitializeMipsAsmPrinter(); + LLVMInitializeMSP430AsmPrinter(); + LLVMInitializeNVPTXAsmPrinter(); + LLVMInitializePowerPCAsmPrinter(); + LLVMInitializeRISCVAsmPrinter(); + LLVMInitializeSparcAsmPrinter(); + LLVMInitializeSystemZAsmPrinter(); + LLVMInitializeVEAsmPrinter(); + LLVMInitializeWebAssemblyAsmPrinter(); + LLVMInitializeX86AsmPrinter(); + LLVMInitializeXCoreAsmPrinter(); + return; +}; + +extern LLVMInitializeAArch64AsmParser = () => void; +extern LLVMInitializeAMDGPUAsmParser = () => void; +extern LLVMInitializeARMAsmParser = () => void; +extern LLVMInitializeAVRAsmParser = () => void; +extern LLVMInitializeBPFAsmParser = () => void; +extern LLVMInitializeHexagonAsmParser = () => void; +extern LLVMInitializeLanaiAsmParser = () => void; +extern LLVMInitializeLoongArchAsmParser = () => void; +extern LLVMInitializeMipsAsmParser = () => void; +extern LLVMInitializeMSP430AsmParser = () => void; +extern LLVMInitializePowerPCAsmParser = () => void; +extern LLVMInitializeRISCVAsmParser = () => void; +extern LLVMInitializeSparcAsmParser = () => void; +extern LLVMInitializeSystemZAsmParser = () => void; +extern LLVMInitializeVEAsmParser = () => void; +extern LLVMInitializeWebAssemblyAsmParser = () => void; +extern LLVMInitializeX86AsmParser = () => void; + +let LLVMInitializeAllAsmParsers = () => void { + LLVMInitializeAArch64AsmParser(); + LLVMInitializeAMDGPUAsmParser(); + LLVMInitializeARMAsmParser(); + LLVMInitializeAVRAsmParser(); + LLVMInitializeBPFAsmParser(); + LLVMInitializeHexagonAsmParser(); + LLVMInitializeLanaiAsmParser(); + LLVMInitializeLoongArchAsmParser(); + LLVMInitializeMipsAsmParser(); + LLVMInitializeMSP430AsmParser(); + LLVMInitializePowerPCAsmParser(); + LLVMInitializeRISCVAsmParser(); + LLVMInitializeSparcAsmParser(); + LLVMInitializeSystemZAsmParser(); + LLVMInitializeVEAsmParser(); + LLVMInitializeWebAssemblyAsmParser(); + LLVMInitializeX86AsmParser(); + return; +}; + +extern LLVMInitializeAArch64Disassembler = () => void; +extern LLVMInitializeAMDGPUDisassembler = () => void; +extern LLVMInitializeARMDisassembler = () => void; +extern LLVMInitializeAVRDisassembler = () => void; +extern LLVMInitializeBPFDisassembler = () => void; +extern LLVMInitializeHexagonDisassembler = () => void; +extern LLVMInitializeLanaiDisassembler = () => void; +extern LLVMInitializeLoongArchDisassembler = () => void; +extern LLVMInitializeMipsDisassembler = () => void; +extern LLVMInitializeMSP430Disassembler = () => void; +extern LLVMInitializePowerPCDisassembler = () => void; +extern LLVMInitializeRISCVDisassembler = () => void; +extern LLVMInitializeSparcDisassembler = () => void; +extern LLVMInitializeSystemZDisassembler = () => void; +extern LLVMInitializeVEDisassembler = () => void; +extern LLVMInitializeWebAssemblyDisassembler = () => void; +extern LLVMInitializeX86Disassembler = () => void; +extern LLVMInitializeXCoreDisassembler = () => void; + +let LLVMInitializeAllDisassemblers = () => void { + LLVMInitializeAArch64Disassembler(); + LLVMInitializeAMDGPUDisassembler(); + LLVMInitializeARMDisassembler(); + LLVMInitializeAVRDisassembler(); + LLVMInitializeBPFDisassembler(); + LLVMInitializeHexagonDisassembler(); + LLVMInitializeLanaiDisassembler(); + LLVMInitializeLoongArchDisassembler(); + LLVMInitializeMipsDisassembler(); + LLVMInitializeMSP430Disassembler(); + LLVMInitializePowerPCDisassembler(); + LLVMInitializeRISCVDisassembler(); + LLVMInitializeSparcDisassembler(); + LLVMInitializeSystemZDisassembler(); + LLVMInitializeVEDisassembler(); + LLVMInitializeWebAssemblyDisassembler(); + LLVMInitializeX86Disassembler(); + LLVMInitializeXCoreDisassembler(); + return; +}; + +let LLVMBuilderRef = newtype *void; +let LLVMModuleRef = newtype *void; +let LLVMTargetMachineRef = newtype *void; +let LLVMContextRef = newtype *void; +let LLVMTargetRef = newtype *void; + +let LLVMValueRef = newtype *void; +let LLVMTypeRef = newtype *void; + +extern LLVMConstInt = (LLVMTypeRef, i64, i64) => LLVMValueRef; +extern LLVMInt64Type = () => LLVMTypeRef; + +extern LLVMModuleCreateWithName = (*i8) => LLVMModuleRef; +extern LLVMGetGlobalContext = () => LLVMContextRef; +extern LLVMCreateBuilder = () => LLVMBuilderRef; +extern LLVMDisposeModule = (LLVMModuleRef) => void; +extern LLVMShutdown = () => void; +extern LLVMDisposeBuilder = (LLVMBuilderRef) => void; + +extern LLVMDumpModule = (LLVMModuleRef) => void; +extern LLVMGetDefaultTargetTriple = () => *i8; +extern LLVMGetTargetFromTriple = (*i8, *LLVMTargetRef, **i8) => i64; +extern LLVMDisposeMessage = (*i8) => void; +extern LLVMCreateTargetMachine = (LLVMTargetRef, *i8, *i8, *i8, i64, i64, i64) => LLVMTargetMachineRef; +extern LLVMDisposeTargetMachine = (LLVMTargetMachineRef) => void; + +let LLVMCodeGenLevelDefault = 2; +let LLVMRelocDefault = 0; +let LLVMCodeModelDefault = 0; + +extern LLVMVerifyModule = (LLVMModuleRef, i64, **i8) => i64; + +let LLVMAbortProcessAction = 0; + +extern LLVMTargetMachineEmitToFile = (LLVMTargetMachineRef, LLVMModuleRef, *i8, i64, **i8) => i64; + +let LLVMObjectFile = 1; diff --git a/src/bootstrap/llvm.src b/src/bootstrap/llvm.src deleted file mode 100644 index 8cd600f..0000000 --- a/src/bootstrap/llvm.src +++ /dev/null @@ -1,289 +0,0 @@ -extern LLVMInitializeAArch64TargetInfo = () => void; -extern LLVMInitializeAMDGPUTargetInfo = () => void; -extern LLVMInitializeARMTargetInfo = () => void; -extern LLVMInitializeAVRTargetInfo = () => void; -extern LLVMInitializeBPFTargetInfo = () => void; -extern LLVMInitializeHexagonTargetInfo = () => void; -extern LLVMInitializeLanaiTargetInfo = () => void; -extern LLVMInitializeLoongArchTargetInfo = () => void; -extern LLVMInitializeMipsTargetInfo = () => void; -extern LLVMInitializeMSP430TargetInfo = () => void; -extern LLVMInitializeNVPTXTargetInfo = () => void; -extern LLVMInitializePowerPCTargetInfo = () => void; -extern LLVMInitializeRISCVTargetInfo = () => void; -extern LLVMInitializeSparcTargetInfo = () => void; -extern LLVMInitializeSystemZTargetInfo = () => void; -extern LLVMInitializeVETargetInfo = () => void; -extern LLVMInitializeWebAssemblyTargetInfo = () => void; -extern LLVMInitializeX86TargetInfo = () => void; -extern LLVMInitializeXCoreTargetInfo = () => void; - -let LLVMInitializeAllTargetInfos = () => void { - LLVMInitializeAArch64TargetInfo(); - LLVMInitializeAMDGPUTargetInfo(); - LLVMInitializeARMTargetInfo(); - LLVMInitializeAVRTargetInfo(); - LLVMInitializeBPFTargetInfo(); - LLVMInitializeHexagonTargetInfo(); - LLVMInitializeLanaiTargetInfo(); - LLVMInitializeLoongArchTargetInfo(); - LLVMInitializeMipsTargetInfo(); - LLVMInitializeMSP430TargetInfo(); - LLVMInitializeNVPTXTargetInfo(); - LLVMInitializePowerPCTargetInfo(); - LLVMInitializeRISCVTargetInfo(); - LLVMInitializeSparcTargetInfo(); - LLVMInitializeSystemZTargetInfo(); - LLVMInitializeVETargetInfo(); - LLVMInitializeWebAssemblyTargetInfo(); - LLVMInitializeX86TargetInfo(); - LLVMInitializeXCoreTargetInfo(); - return; -}; - -extern LLVMInitializeAArch64Target = () => void; -extern LLVMInitializeAMDGPUTarget = () => void; -extern LLVMInitializeARMTarget = () => void; -extern LLVMInitializeAVRTarget = () => void; -extern LLVMInitializeBPFTarget = () => void; -extern LLVMInitializeHexagonTarget = () => void; -extern LLVMInitializeLanaiTarget = () => void; -extern LLVMInitializeLoongArchTarget = () => void; -extern LLVMInitializeMipsTarget = () => void; -extern LLVMInitializeMSP430Target = () => void; -extern LLVMInitializeNVPTXTarget = () => void; -extern LLVMInitializePowerPCTarget = () => void; -extern LLVMInitializeRISCVTarget = () => void; -extern LLVMInitializeSparcTarget = () => void; -extern LLVMInitializeSystemZTarget = () => void; -extern LLVMInitializeVETarget = () => void; -extern LLVMInitializeWebAssemblyTarget = () => void; -extern LLVMInitializeX86Target = () => void; -extern LLVMInitializeXCoreTarget = () => void; - -let LLVMInitializeAllTargets = () => void { - LLVMInitializeAArch64Target(); - LLVMInitializeAMDGPUTarget(); - LLVMInitializeARMTarget(); - LLVMInitializeAVRTarget(); - LLVMInitializeBPFTarget(); - LLVMInitializeHexagonTarget(); - LLVMInitializeLanaiTarget(); - LLVMInitializeLoongArchTarget(); - LLVMInitializeMipsTarget(); - LLVMInitializeMSP430Target(); - LLVMInitializeNVPTXTarget(); - LLVMInitializePowerPCTarget(); - LLVMInitializeRISCVTarget(); - LLVMInitializeSparcTarget(); - LLVMInitializeSystemZTarget(); - LLVMInitializeVETarget(); - LLVMInitializeWebAssemblyTarget(); - LLVMInitializeX86Target(); - LLVMInitializeXCoreTarget(); - return; -}; - -extern LLVMInitializeAArch64TargetMC = () => void; -extern LLVMInitializeAMDGPUTargetMC = () => void; -extern LLVMInitializeARMTargetMC = () => void; -extern LLVMInitializeAVRTargetMC = () => void; -extern LLVMInitializeBPFTargetMC = () => void; -extern LLVMInitializeHexagonTargetMC = () => void; -extern LLVMInitializeLanaiTargetMC = () => void; -extern LLVMInitializeLoongArchTargetMC = () => void; -extern LLVMInitializeMipsTargetMC = () => void; -extern LLVMInitializeMSP430TargetMC = () => void; -extern LLVMInitializeNVPTXTargetMC = () => void; -extern LLVMInitializePowerPCTargetMC = () => void; -extern LLVMInitializeRISCVTargetMC = () => void; -extern LLVMInitializeSparcTargetMC = () => void; -extern LLVMInitializeSystemZTargetMC = () => void; -extern LLVMInitializeVETargetMC = () => void; -extern LLVMInitializeWebAssemblyTargetMC = () => void; -extern LLVMInitializeX86TargetMC = () => void; -extern LLVMInitializeXCoreTargetMC = () => void; - -let LLVMInitializeAllTargetMCs = () => void { - LLVMInitializeAArch64TargetMC(); - LLVMInitializeAMDGPUTargetMC(); - LLVMInitializeARMTargetMC(); - LLVMInitializeAVRTargetMC(); - LLVMInitializeBPFTargetMC(); - LLVMInitializeHexagonTargetMC(); - LLVMInitializeLanaiTargetMC(); - LLVMInitializeLoongArchTargetMC(); - LLVMInitializeMipsTargetMC(); - LLVMInitializeMSP430TargetMC(); - LLVMInitializeNVPTXTargetMC(); - LLVMInitializePowerPCTargetMC(); - LLVMInitializeRISCVTargetMC(); - LLVMInitializeSparcTargetMC(); - LLVMInitializeSystemZTargetMC(); - LLVMInitializeVETargetMC(); - LLVMInitializeWebAssemblyTargetMC(); - LLVMInitializeX86TargetMC(); - LLVMInitializeXCoreTargetMC(); - return; -}; - -extern LLVMInitializeAArch64AsmPrinter = () => void; -extern LLVMInitializeAMDGPUAsmPrinter = () => void; -extern LLVMInitializeARMAsmPrinter = () => void; -extern LLVMInitializeAVRAsmPrinter = () => void; -extern LLVMInitializeBPFAsmPrinter = () => void; -extern LLVMInitializeHexagonAsmPrinter = () => void; -extern LLVMInitializeLanaiAsmPrinter = () => void; -extern LLVMInitializeLoongArchAsmPrinter = () => void; -extern LLVMInitializeMipsAsmPrinter = () => void; -extern LLVMInitializeMSP430AsmPrinter = () => void; -extern LLVMInitializeNVPTXAsmPrinter = () => void; -extern LLVMInitializePowerPCAsmPrinter = () => void; -extern LLVMInitializeRISCVAsmPrinter = () => void; -extern LLVMInitializeSparcAsmPrinter = () => void; -extern LLVMInitializeSystemZAsmPrinter = () => void; -extern LLVMInitializeVEAsmPrinter = () => void; -extern LLVMInitializeWebAssemblyAsmPrinter = () => void; -extern LLVMInitializeX86AsmPrinter = () => void; -extern LLVMInitializeXCoreAsmPrinter = () => void; - -let LLVMInitializeAllAsmPrinters = () => void { - LLVMInitializeAArch64AsmPrinter(); - LLVMInitializeAMDGPUAsmPrinter(); - LLVMInitializeARMAsmPrinter(); - LLVMInitializeAVRAsmPrinter(); - LLVMInitializeBPFAsmPrinter(); - LLVMInitializeHexagonAsmPrinter(); - LLVMInitializeLanaiAsmPrinter(); - LLVMInitializeLoongArchAsmPrinter(); - LLVMInitializeMipsAsmPrinter(); - LLVMInitializeMSP430AsmPrinter(); - LLVMInitializeNVPTXAsmPrinter(); - LLVMInitializePowerPCAsmPrinter(); - LLVMInitializeRISCVAsmPrinter(); - LLVMInitializeSparcAsmPrinter(); - LLVMInitializeSystemZAsmPrinter(); - LLVMInitializeVEAsmPrinter(); - LLVMInitializeWebAssemblyAsmPrinter(); - LLVMInitializeX86AsmPrinter(); - LLVMInitializeXCoreAsmPrinter(); - return; -}; - -extern LLVMInitializeAArch64AsmParser = () => void; -extern LLVMInitializeAMDGPUAsmParser = () => void; -extern LLVMInitializeARMAsmParser = () => void; -extern LLVMInitializeAVRAsmParser = () => void; -extern LLVMInitializeBPFAsmParser = () => void; -extern LLVMInitializeHexagonAsmParser = () => void; -extern LLVMInitializeLanaiAsmParser = () => void; -extern LLVMInitializeLoongArchAsmParser = () => void; -extern LLVMInitializeMipsAsmParser = () => void; -extern LLVMInitializeMSP430AsmParser = () => void; -extern LLVMInitializePowerPCAsmParser = () => void; -extern LLVMInitializeRISCVAsmParser = () => void; -extern LLVMInitializeSparcAsmParser = () => void; -extern LLVMInitializeSystemZAsmParser = () => void; -extern LLVMInitializeVEAsmParser = () => void; -extern LLVMInitializeWebAssemblyAsmParser = () => void; -extern LLVMInitializeX86AsmParser = () => void; - -let LLVMInitializeAllAsmParsers = () => void { - LLVMInitializeAArch64AsmParser(); - LLVMInitializeAMDGPUAsmParser(); - LLVMInitializeARMAsmParser(); - LLVMInitializeAVRAsmParser(); - LLVMInitializeBPFAsmParser(); - LLVMInitializeHexagonAsmParser(); - LLVMInitializeLanaiAsmParser(); - LLVMInitializeLoongArchAsmParser(); - LLVMInitializeMipsAsmParser(); - LLVMInitializeMSP430AsmParser(); - LLVMInitializePowerPCAsmParser(); - LLVMInitializeRISCVAsmParser(); - LLVMInitializeSparcAsmParser(); - LLVMInitializeSystemZAsmParser(); - LLVMInitializeVEAsmParser(); - LLVMInitializeWebAssemblyAsmParser(); - LLVMInitializeX86AsmParser(); - return; -}; - -extern LLVMInitializeAArch64Disassembler = () => void; -extern LLVMInitializeAMDGPUDisassembler = () => void; -extern LLVMInitializeARMDisassembler = () => void; -extern LLVMInitializeAVRDisassembler = () => void; -extern LLVMInitializeBPFDisassembler = () => void; -extern LLVMInitializeHexagonDisassembler = () => void; -extern LLVMInitializeLanaiDisassembler = () => void; -extern LLVMInitializeLoongArchDisassembler = () => void; -extern LLVMInitializeMipsDisassembler = () => void; -extern LLVMInitializeMSP430Disassembler = () => void; -extern LLVMInitializePowerPCDisassembler = () => void; -extern LLVMInitializeRISCVDisassembler = () => void; -extern LLVMInitializeSparcDisassembler = () => void; -extern LLVMInitializeSystemZDisassembler = () => void; -extern LLVMInitializeVEDisassembler = () => void; -extern LLVMInitializeWebAssemblyDisassembler = () => void; -extern LLVMInitializeX86Disassembler = () => void; -extern LLVMInitializeXCoreDisassembler = () => void; - -let LLVMInitializeAllDisassemblers = () => void { - LLVMInitializeAArch64Disassembler(); - LLVMInitializeAMDGPUDisassembler(); - LLVMInitializeARMDisassembler(); - LLVMInitializeAVRDisassembler(); - LLVMInitializeBPFDisassembler(); - LLVMInitializeHexagonDisassembler(); - LLVMInitializeLanaiDisassembler(); - LLVMInitializeLoongArchDisassembler(); - LLVMInitializeMipsDisassembler(); - LLVMInitializeMSP430Disassembler(); - LLVMInitializePowerPCDisassembler(); - LLVMInitializeRISCVDisassembler(); - LLVMInitializeSparcDisassembler(); - LLVMInitializeSystemZDisassembler(); - LLVMInitializeVEDisassembler(); - LLVMInitializeWebAssemblyDisassembler(); - LLVMInitializeX86Disassembler(); - LLVMInitializeXCoreDisassembler(); - return; -}; - -let LLVMBuilderRef = newtype *void; -let LLVMModuleRef = newtype *void; -let LLVMTargetMachineRef = newtype *void; -let LLVMContextRef = newtype *void; -let LLVMTargetRef = newtype *void; - -let LLVMValueRef = newtype *void; -let LLVMTypeRef = newtype *void; - -extern LLVMConstInt = (LLVMTypeRef, i64, i64) => LLVMValueRef; -extern LLVMInt64Type = () => LLVMTypeRef; - -extern LLVMModuleCreateWithName = (*i8) => LLVMModuleRef; -extern LLVMGetGlobalContext = () => LLVMContextRef; -extern LLVMCreateBuilder = () => LLVMBuilderRef; -extern LLVMDisposeModule = (LLVMModuleRef) => void; -extern LLVMShutdown = () => void; -extern LLVMDisposeBuilder = (LLVMBuilderRef) => void; - -extern LLVMDumpModule = (LLVMModuleRef) => void; -extern LLVMGetDefaultTargetTriple = () => *i8; -extern LLVMGetTargetFromTriple = (*i8, *LLVMTargetRef, **i8) => i64; -extern LLVMDisposeMessage = (*i8) => void; -extern LLVMCreateTargetMachine = (LLVMTargetRef, *i8, *i8, *i8, i64, i64, i64) => LLVMTargetMachineRef; -extern LLVMDisposeTargetMachine = (LLVMTargetMachineRef) => void; - -let LLVMCodeGenLevelDefault = 2; -let LLVMRelocDefault = 0; -let LLVMCodeModelDefault = 0; - -extern LLVMVerifyModule = (LLVMModuleRef, i64, **i8) => i64; - -let LLVMAbortProcessAction = 0; - -extern LLVMTargetMachineEmitToFile = (LLVMTargetMachineRef, LLVMModuleRef, *i8, i64, **i8) => i64; - -let LLVMObjectFile = 1; diff --git a/src/bootstrap/main.pry b/src/bootstrap/main.pry new file mode 100644 index 0000000..29e41ff --- /dev/null +++ b/src/bootstrap/main.pry @@ -0,0 +1,69 @@ +extern fopen = (*i8, *i8) => *i8; +extern fgets = (*i8, i64, *i8) => void; +extern feof = (*i8) => bool; +extern fseek = (*i8, i64, i64) => i64; +extern ftell = (*i8) => i64; +extern fread = (*i8, i64, i64, *i8) => i64; +extern fclose = (*i8) => *i8; + +import "!stdlib.pry"; +import "!mem.pry"; + +let slice = struct { + data: *void, + data_len: i64, +}; + +import "tokenizer.pry"; +import "parser.pry"; +import "codegen.pry"; + +let read_file = (filename: *i8, alloc: *arena) => slice { + let file = fopen(filename, "r"); + + fseek(file, 0, 2); + let file_size = ftell(file); + fseek(file, 0, 0); + + let buf = cast(*i8, arena_alloc(alloc, file_size + 1)); + + let bytes_read = fread(buf, 1, file_size, file); + (*(buf + cast(*i8, bytes_read))) = '\0'; + + fclose(file); + + let sl = slice{}; + sl.data = cast(*void, buf); + sl.data_len = file_size; + return sl; +}; + +let main = (argc: i64, argv: **i8) => i64 { + if argc < 2 { + printf("Need filename!\n"); + return 1; + }; + + let filename = *(argv + cast(**i8, 1)); + + printf("%s\n", filename); + + let alloc = arena_init(999999999); + + let file = read_file(filename, alloc); + + let t = tokenizer_init(alloc, file); + let ts = tokenizer_tokenize(t); + + let p = parser_init(cast(*token, ts.data), ts.data_len, alloc); + let ast = parse(p); + + let c = codegen_init(alloc); + let res = codegen_generate(c, ast); + let res = codegen_compile(c); + codegen_deinit(c); + + arena_free(alloc); + + return 0; +}; diff --git a/src/bootstrap/main.src b/src/bootstrap/main.src deleted file mode 100644 index 571cc03..0000000 --- a/src/bootstrap/main.src +++ /dev/null @@ -1,69 +0,0 @@ -extern fopen = (*i8, *i8) => *i8; -extern fgets = (*i8, i64, *i8) => void; -extern feof = (*i8) => bool; -extern fseek = (*i8, i64, i64) => i64; -extern ftell = (*i8) => i64; -extern fread = (*i8, i64, i64, *i8) => i64; -extern fclose = (*i8) => *i8; - -import "!stdlib.src"; -import "!mem.src"; - -let slice = struct { - data: *void, - data_len: i64, -}; - -import "tokenizer.src"; -import "parser.src"; -import "codegen.src"; - -let read_file = (filename: *i8, alloc: *arena) => slice { - let file = fopen(filename, "r"); - - fseek(file, 0, 2); - let file_size = ftell(file); - fseek(file, 0, 0); - - let buf = cast(*i8, arena_alloc(alloc, file_size + 1)); - - let bytes_read = fread(buf, 1, file_size, file); - (*(buf + cast(*i8, bytes_read))) = '\0'; - - fclose(file); - - let sl = slice{}; - sl.data = cast(*void, buf); - sl.data_len = file_size; - return sl; -}; - -let main = (argc: i64, argv: **i8) => i64 { - if argc < 2 { - printf("Need filename!\n"); - return 1; - }; - - let filename = *(argv + cast(**i8, 1)); - - printf("%s\n", filename); - - let alloc = arena_init(999999999); - - let file = read_file(filename, alloc); - - let t = tokenizer_init(alloc, file); - let ts = tokenizer_tokenize(t); - - let p = parser_init(cast(*token, ts.data), ts.data_len, alloc); - let ast = parse(p); - - let c = codegen_init(alloc); - let res = codegen_generate(c, ast); - let res = codegen_compile(c); - codegen_deinit(c); - - arena_free(alloc); - - return 0; -}; diff --git a/src/bootstrap/parser.pry b/src/bootstrap/parser.pry new file mode 100644 index 0000000..daac296 --- /dev/null +++ b/src/bootstrap/parser.pry @@ -0,0 +1,530 @@ +let Node = struct { + type: i64, + data: *void, +}; + +let NODE_PROGRAM = 1; +let NODE_STATEMENT = 2; +let NODE_ASSIGNMENT_STATEMENT = 3; +let NODE_IMPORT_DECLARATION = 4; +let NODE_FUNCTION_CALL_STATEMENT = 5; +let NODE_IF_STATEMENT = 6; +let NODE_WHILE_STATEMENT = 7; +let NODE_EQUALITY_EXPRESSION = 8; +let NODE_ADDITIVE_EXPRESSION = 9; +let NODE_MULTIPLICATIVE_EXPRESSION = 10; +let NODE_UNARY_EXPRESSION = 11; +let NODE_POSTFIX_EXPRESSION = 12; +let NODE_PRIMARY_EXPRESSION_NUMBER = 13; +let NODE_PRIMARY_EXPRESSION_BOOLEAN = 14; +let NODE_PRIMARY_EXPRESSION_NULL = 15; +let NODE_PRIMARY_EXPRESSION_CHAR = 16; +let NODE_PRIMARY_EXPRESSION_STRING = 17; +let NODE_PRIMARY_EXPRESSION_IDENTIFIER = 18; +let NODE_FUNCTION_DEFINITION = 19; +let NODE_STRUCT_INSTANCIATION = 20; +let NODE_FIELD_ACCESS = 21; +let NODE_TYPE_SIMPLE_TYPE = 22; +let NODE_TYPE_FUNCTION_TYPE = 23; +let NODE_TYPE_POINTER_TYPE = 24; +let NODE_TYPE_STRUCT_TYPE = 25; +let NODE_RETURN_STATEMENT = 26; +let NODE_CAST_STATEMENT = 27; +let NODE_SIZEOF_STATEMENT = 28; +let NODE_BREAK_STATEMENT = 29; +let NODE_CONTINUE_STATEMENT = 30; + +let EQUALITY_EXPRESSION_TYPE_EQ = 0; +let EQUALITY_EXPRESSION_TYPE_NE = 1; +let EQUALITY_EXPRESSION_TYPE_GE = 2; +let EQUALITY_EXPRESSION_TYPE_LE = 3; +let EQUALITY_EXPRESSION_TYPE_LT = 4; +let EQUALITY_EXPRESSION_TYPE_GT = 5; + +let MULTIPLICATIVE_EXPRESSION_TYPE_MUL = 0; +let MULTIPLICATIVE_EXPRESSION_TYPE_DIV = 1; +let MULTIPLICATIVE_EXPRESSION_TYPE_MOD = 2; + +let UNARY_EXPRESSION_TYPE_NOT = 0; +let UNARY_EXPRESSION_TYPE_MINUS = 1; +let UNARY_EXPRESSION_TYPE_STAR = 2; + +let NODE_PROGRAM_DATA = struct { + statements: **Node, + statements_len: i64, +}; + +let NODE_STATEMENT_DATA = struct { + statement: *Node, +}; + +let NODE_ASSIGNMENT_STATEMENT_DATA = struct { + is_declaration: bool, + is_dereference: bool, + lhs: *Node, + rhs: *Node, +}; + +let NODE_IMPORT_DECLARATION_DATA = struct { + filename: *i8, + program: *Node, +}; + +let NODE_FUNCTION_CALL_STATEMENT_DATA = struct { + expression: *Node, + arguments: *Node, + arguments_len: i64, +}; + +let NODE_IF_STATEMENT_DATA = struct { + condition: *Node, + statements: **Node, + statements_len: i64, +}; + +let NODE_WHILE_STATEMENT_DATA = struct { + condition: *Node, + statements: **Node, + statements_len: i64, +}; + +let NODE_EQUALITY_EXPRESSION_DATA = struct { + lhs: *Node, + rhs: *Node, + typ: i64, +}; + +let NODE_ADDITIVE_EXPRESSION_DATA = struct { + addition: bool, + lhs: *Node, + rhs: *Node, +}; + +let NODE_MULTIPLICATIVE_EXPRESSION_DATA = struct { + lhs: *Node, + rhs: *Node, + typ: i64, +}; + +let NODE_UNARY_EXPRESSION_DATA = struct { + typ: i64, + expression: *Node, +}; + +let NODE_POSTFIX_EXPRESSION_DATA = struct { + lhs: *Node, + rhs: *Node, +}; + +let NODE_PRIMARY_EXPRESSION_NUMBER_DATA = struct { + value: i64, +}; + +let NODE_PRIMARY_EXPRESSION_BOOLEAN_DATA = struct { + value: bool, +}; + +let NODE_PRIMARY_EXPRESSION_CHAR_DATA = struct { + value: i8, +}; + +let NODE_PRIMARY_EXPRESSION_STRING_DATA = struct { + value: *i8, +}; + +let NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA = struct { + name: *i8, + type: *Node, +}; + +let NODE_FUNCTION_DEFINITION_DATA = struct { + statements: **Node, + statements_len: i64, + parameters: **Node, + parameters_len: i64, + retur_type: *Node, +}; + +let NODE_STRUCT_INSTANCIATION_DATA = struct { + typ: *i8, +}; + +let NODE_FIELD_ACCESS_DATA = struct { + expression: *Node, + name: *i8, +}; + +let NODE_TYPE_SIMPLE_TYPE_DATA = struct { + name: *i8, + underlying_type: *Node, +}; + +let NODE_TYPE_FUNCTION_TYPE_DATA = struct { + parameters: *Node, + parameters_len: i64, + retur_type: *Node, +}; + +let NODE_TYPE_POINTER_TYPE_DATA = struct { + type: *Node, +}; + +let NODE_TYPE_STRUCT_TYPE_DATA = struct { + fields: *Node, + fields_len: i64, +}; + +let NODE_RETURN_STATEMENT_DATA = struct { + expression: *Node, +}; + +let NODE_CAST_STATEMENT_DATA = struct { + typ: *Node, + expression: *Node, +}; + +let NODE_SIZEOF_STATEMENT_DATA = struct { + typ: *Node, +}; + + +let parser = struct { + tokens: *token, + tokens_len: i64, + + offset: i64, + + arena: *arena, +}; + +extern parser_parse_statement = (*parser) => *Node; +extern parser_parse_expression = (*parser) => *Node; + +let parser_init = (ts: *token, ts_len: i64, ar: *arena) => *parser { + let p = cast(*parser, arena_alloc(ar, sizeof(parser))); + + (*p).tokens = ts; + (*p).tokens_len = ts_len; + (*p).offset = 0; + (*p).arena = ar; + + return p; +}; + +let create_node = (p: *parser, n: Node) => *Node { + let res = cast(*Node, arena_alloc((*p).arena, sizeof(Node))); + *res = n; + return res; +}; + +let parser_peek_token = (p: *parser) => *token { + if (*p).offset >= (*p).tokens_len { + return cast(*token, null); + }; + + return ((*p).tokens + cast(*token, (*p).offset)); +}; + + let parser_consume_token = (p: *parser) => *token { + if (*p).offset >= (*p).tokens_len { + return cast(*token, null); + }; + + let t = ((*p).tokens + cast(*token, (*p).offset)); + (*p).offset = (*p).offset + 1; + return t; +}; + +let parser_accept_token = (p: *parser, t: i64) => *token { + let curr_token = parser_peek_token(p); + if curr_token == cast(*token, null) { + return cast(*token, null); + }; + + if (*curr_token).type == t { + return parser_consume_token(p); + }; + return cast(*token, null); +}; + +let parser_accept_parse = (p: *parser, parsing_func: (*parser) => *Node) => *Node { + let prev_offset = (*p).offset; + let node = parsing_func(p); + if node == cast(*Node, null) { + (*p).offset = prev_offset; + }; + return node; +}; + +/* ReturnStatement ::= RETURN (Expression)? */ +let parser_parse_return_statement = (p: *parser) => *Node { + if parser_accept_token(p, TOKEN_RETURN) == cast(*token, null) { + return cast(*Node, null); + }; + + let maybe_expr = parser_accept_parse(p, parser_parse_expression); + + let d = cast(*NODE_RETURN_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_RETURN_STATEMENT_DATA ))); + (*d).expression = maybe_expr; + + let r = Node{}; + r.type = NODE_RETURN_STATEMENT; + r.data = cast(*void, d); + + return create_node(p, r); +}; + +/* Type ::= IDENTIFIER | FunctionType */ +let parser_parse_type = (p: *parser) => *Node { + /* TODO: Function type */ + let to = parser_consume_token(p); + assert(to != cast(*token, null)); + assert((*to).type == TOKEN_IDENTIFIER); + + let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*p).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*d).name = cast(*i8, (*to).data); + (*d).underlying_type = cast(*Node, null); + + let r = Node{}; + r.type = NODE_TYPE_SIMPLE_TYPE; + r.data = cast(*void, d); + + return create_node(p, r); +}; + +/* FunctionParameters ::= IDENTIFIER ":" Type ("," IDENTIFIER ":" Type)* */ +let parser_parse_function_parameters = (p: *parser) => *slice { + /* TODO: Params */ + + let node_list = cast(**Node, arena_alloc((*p).arena, sizeof(**Node) * 20)); + let i = 0; + while true { + if i != 0 { + parser_accept_token(p, TOKEN_COMMA); + }; + let ident = parser_accept_token(p, TOKEN_IDENTIFIER); + if ident == cast(*token, null) { + break; + }; + /* TODO: Rest */ + }; + + let s = cast(*slice, arena_alloc((*p).arena, sizeof(slice))); + (*s).data = cast(*void, node_list); + (*s).data_len = 0; + return s; +}; + +/* FunctionDefinition ::= LPAREN FunctionParameters? RPAREN ARROW IDENTIFIER LBRACE Statement* ReturnStatement SEMICOLON RBRACE */ +let parser_parse_function_definition = (p: *parser) => *Node { + if parser_accept_token(p, TOKEN_LPAREN) == cast(*token, null) { + return cast(*Node, null); + }; + let params = parser_parse_function_parameters(p); + if params == cast(*slice, null) { + return cast(*Node, null); + }; + if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) { + return cast(*Node, null); + }; + if parser_accept_token(p, TOKEN_ARROW) == cast(*token, null) { + return cast(*Node, null); + }; + let retur_type = parser_parse_type(p); + if retur_type == cast(*Node, null) { + return cast(*Node, null); + }; + if parser_accept_token(p, TOKEN_LBRACE) == cast(*token, null) { + return cast(*Node, null); + }; + + /* TODO: Body */ + let statements = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 100)); + let i = 0; + while true { + let n = parser_accept_parse(p, parser_parse_statement); + if n == cast(*Node, null) { + break; + }; + (*(statements + cast(**Node, i))) = n; + i = i + 1; + }; + + + if parser_accept_token(p, TOKEN_RBRACE) == cast(*token, null) { + return cast(*Node, null); + }; + + + let d = cast(*NODE_FUNCTION_DEFINITION_DATA, arena_alloc((*p).arena, sizeof(NODE_FUNCTION_DEFINITION_DATA))); + (*d).statements = statements; + (*d).statements_len = i; + (*d).parameters = cast(**Node, params.data); + (*d).parameters_len = params.data_len; + (*d).retur_type = cast(*Node, null); + + let n = Node{}; + n.type = NODE_FUNCTION_DEFINITION; + n.data = cast(*void, d); + + return create_node(p, n); +}; + +/* PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | FunctionDefinition | StructDefinition | StructInstantiation | FieldAccess | LPAREN Expression RPAREN */ +let parser_parse_primary_expression = (p: *parser) => *Node { + let stmt = parser_accept_parse(p, parser_parse_function_definition); + if stmt != cast(*Node, null) { + return stmt; + }; + + let tok = parser_consume_token(p); + if tok == cast(*token, null) { + printf("NO TOK\n"); + return cast(*Node, null); + }; + + if (*tok).type == TOKEN_IDENTIFIER { + let d = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA))); + (*d).name = cast(*i8, (*tok).data); + (*d).type = cast(*Node, null); /* TODO */ + let n = Node{}; + n.type = NODE_PRIMARY_EXPRESSION_IDENTIFIER; + n.data = cast(*void, d); + return create_node(p, n); + }; + + if (*tok).type == TOKEN_NUMBER { + let d = cast(*NODE_PRIMARY_EXPRESSION_NUMBER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_NUMBER_DATA))); + (*d).value = *(cast(*i64, (*tok).data)); + let n = Node{}; + n.type = NODE_PRIMARY_EXPRESSION_NUMBER; + n.data = cast(*void, d); + return create_node(p, n); + }; + + printf("DIFF TYPE: %d\n", (*tok).type); + + return cast(*Node, null); +}; + +/* EqualityExpression ::= AdditiveExpression ("==" | "!=" | "<=" | ">=" | "<" | ">") AdditiveExpression */ +let parser_parse_equality_expression = (p: *parser) => *Node { + /* TODO */ + return cast(*Node, null); +}; + +/* AdditiveExpression ::= MultiplicativeExpression (("+" | "-") MultiplicativeExpression)* */ +let parser_parse_additive_expression = (p: *parser) => *Node { + /* TODO */ + return parser_parse_primary_expression(p); +}; + +/* Expression ::= EqualityExpression | AdditiveExpression */ +let parser_parse_expression = (p: *parser) => *Node { + let ex = parser_accept_parse(p, parser_parse_equality_expression); + if ex != cast(*Node, null) { + return ex; + }; + let ax = parser_accept_parse(p, parser_parse_additive_expression); + if ax != cast(*Node, null) { + return ax; + }; + + return cast(*Node, null); +}; + +/* AssignmentStatement ::= ("let")? ("*")? Expression EQUALS Expression */ +let parse_assignment_statement = (p: *parser) => *Node { + let is_declaration = false; + if parser_accept_token(p, TOKEN_LET) != cast(*token, null) { + printf("IS DECLARATION\n"); + is_declaration = true; + }; + + /* TODO: is_dereference */ + + let lhs = parser_parse_expression(p); /* TODO */ + if lhs == cast(*Node, null) { + printf("ANOTHER BNLL\n"); + return cast(*Node, null); + }; + + if parser_accept_token(p, TOKEN_EQUALS) == cast(*token, null) { + return cast(*Node, null); + }; + + let rhs = parser_parse_expression(p); /* TODO */ + if rhs == cast(*Node, null) { + printf("NUL EXP\n"); + return cast(*Node, null); + }; + + let d = cast(*NODE_ASSIGNMENT_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_ASSIGNMENT_STATEMENT_DATA ))); + (*d).is_declaration = is_declaration; + (*d).is_dereference = false; + (*d).lhs = lhs; + (*d).rhs = rhs; + let n = Node{}; + n.type = NODE_ASSIGNMENT_STATEMENT; + n.data = cast(*void, d); + printf("CONTINUE\n"); + return create_node(p, n); +}; + +/* Statement ::= (AssignmentStatement | ImportDeclaration | ExternDeclaration | CastStatement | SizeOfStatement | FunctionCallStatement | IfStatement | WhileStatement | ReturnStatement | "break" | "continue") SEMICOLON */ +let parser_parse_statement = (p: *parser) => *Node { + let assignment = parser_accept_parse(p, parse_assignment_statement); + if assignment != cast(*Node, null) { + if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { + return cast(*Node, null); + }; + return assignment; + }; + + let retu = parser_accept_parse(p, parser_parse_return_statement); + if retu != cast(*Node, null) { + if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { + return cast(*Node, null); + }; + return retu; + }; + + + printf("None\n"); + + return cast(*Node, null); +}; + +/* Program ::= Statement+ */ +let parse_program = (p: *parser) => *Node { + let nodes = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 1000)); + + let i = 0; + while (*p).offset < (*p).tokens_len { + let s = parser_parse_statement(p); + assert(s != cast(*Node, null)); + (*(nodes + cast(**Node, i))) = s; + i = i + 1; + }; + + let d = cast(*NODE_PROGRAM_DATA, arena_alloc((*p).arena, sizeof(NODE_PROGRAM_DATA))); + (*d).statements = nodes; + (*d).statements_len = i; + let n = Node{}; + n.type = NODE_PROGRAM; + n.data = cast(*void, d); + return create_node(p, n); +}; + +let parse = (p: *parser) => *Node { + return parse_program(p); +}; + +/* + +For example -2: + +* parsing assignment statement +* parsing ident and num literals + +*/ diff --git a/src/bootstrap/parser.src b/src/bootstrap/parser.src deleted file mode 100644 index daac296..0000000 --- a/src/bootstrap/parser.src +++ /dev/null @@ -1,530 +0,0 @@ -let Node = struct { - type: i64, - data: *void, -}; - -let NODE_PROGRAM = 1; -let NODE_STATEMENT = 2; -let NODE_ASSIGNMENT_STATEMENT = 3; -let NODE_IMPORT_DECLARATION = 4; -let NODE_FUNCTION_CALL_STATEMENT = 5; -let NODE_IF_STATEMENT = 6; -let NODE_WHILE_STATEMENT = 7; -let NODE_EQUALITY_EXPRESSION = 8; -let NODE_ADDITIVE_EXPRESSION = 9; -let NODE_MULTIPLICATIVE_EXPRESSION = 10; -let NODE_UNARY_EXPRESSION = 11; -let NODE_POSTFIX_EXPRESSION = 12; -let NODE_PRIMARY_EXPRESSION_NUMBER = 13; -let NODE_PRIMARY_EXPRESSION_BOOLEAN = 14; -let NODE_PRIMARY_EXPRESSION_NULL = 15; -let NODE_PRIMARY_EXPRESSION_CHAR = 16; -let NODE_PRIMARY_EXPRESSION_STRING = 17; -let NODE_PRIMARY_EXPRESSION_IDENTIFIER = 18; -let NODE_FUNCTION_DEFINITION = 19; -let NODE_STRUCT_INSTANCIATION = 20; -let NODE_FIELD_ACCESS = 21; -let NODE_TYPE_SIMPLE_TYPE = 22; -let NODE_TYPE_FUNCTION_TYPE = 23; -let NODE_TYPE_POINTER_TYPE = 24; -let NODE_TYPE_STRUCT_TYPE = 25; -let NODE_RETURN_STATEMENT = 26; -let NODE_CAST_STATEMENT = 27; -let NODE_SIZEOF_STATEMENT = 28; -let NODE_BREAK_STATEMENT = 29; -let NODE_CONTINUE_STATEMENT = 30; - -let EQUALITY_EXPRESSION_TYPE_EQ = 0; -let EQUALITY_EXPRESSION_TYPE_NE = 1; -let EQUALITY_EXPRESSION_TYPE_GE = 2; -let EQUALITY_EXPRESSION_TYPE_LE = 3; -let EQUALITY_EXPRESSION_TYPE_LT = 4; -let EQUALITY_EXPRESSION_TYPE_GT = 5; - -let MULTIPLICATIVE_EXPRESSION_TYPE_MUL = 0; -let MULTIPLICATIVE_EXPRESSION_TYPE_DIV = 1; -let MULTIPLICATIVE_EXPRESSION_TYPE_MOD = 2; - -let UNARY_EXPRESSION_TYPE_NOT = 0; -let UNARY_EXPRESSION_TYPE_MINUS = 1; -let UNARY_EXPRESSION_TYPE_STAR = 2; - -let NODE_PROGRAM_DATA = struct { - statements: **Node, - statements_len: i64, -}; - -let NODE_STATEMENT_DATA = struct { - statement: *Node, -}; - -let NODE_ASSIGNMENT_STATEMENT_DATA = struct { - is_declaration: bool, - is_dereference: bool, - lhs: *Node, - rhs: *Node, -}; - -let NODE_IMPORT_DECLARATION_DATA = struct { - filename: *i8, - program: *Node, -}; - -let NODE_FUNCTION_CALL_STATEMENT_DATA = struct { - expression: *Node, - arguments: *Node, - arguments_len: i64, -}; - -let NODE_IF_STATEMENT_DATA = struct { - condition: *Node, - statements: **Node, - statements_len: i64, -}; - -let NODE_WHILE_STATEMENT_DATA = struct { - condition: *Node, - statements: **Node, - statements_len: i64, -}; - -let NODE_EQUALITY_EXPRESSION_DATA = struct { - lhs: *Node, - rhs: *Node, - typ: i64, -}; - -let NODE_ADDITIVE_EXPRESSION_DATA = struct { - addition: bool, - lhs: *Node, - rhs: *Node, -}; - -let NODE_MULTIPLICATIVE_EXPRESSION_DATA = struct { - lhs: *Node, - rhs: *Node, - typ: i64, -}; - -let NODE_UNARY_EXPRESSION_DATA = struct { - typ: i64, - expression: *Node, -}; - -let NODE_POSTFIX_EXPRESSION_DATA = struct { - lhs: *Node, - rhs: *Node, -}; - -let NODE_PRIMARY_EXPRESSION_NUMBER_DATA = struct { - value: i64, -}; - -let NODE_PRIMARY_EXPRESSION_BOOLEAN_DATA = struct { - value: bool, -}; - -let NODE_PRIMARY_EXPRESSION_CHAR_DATA = struct { - value: i8, -}; - -let NODE_PRIMARY_EXPRESSION_STRING_DATA = struct { - value: *i8, -}; - -let NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA = struct { - name: *i8, - type: *Node, -}; - -let NODE_FUNCTION_DEFINITION_DATA = struct { - statements: **Node, - statements_len: i64, - parameters: **Node, - parameters_len: i64, - retur_type: *Node, -}; - -let NODE_STRUCT_INSTANCIATION_DATA = struct { - typ: *i8, -}; - -let NODE_FIELD_ACCESS_DATA = struct { - expression: *Node, - name: *i8, -}; - -let NODE_TYPE_SIMPLE_TYPE_DATA = struct { - name: *i8, - underlying_type: *Node, -}; - -let NODE_TYPE_FUNCTION_TYPE_DATA = struct { - parameters: *Node, - parameters_len: i64, - retur_type: *Node, -}; - -let NODE_TYPE_POINTER_TYPE_DATA = struct { - type: *Node, -}; - -let NODE_TYPE_STRUCT_TYPE_DATA = struct { - fields: *Node, - fields_len: i64, -}; - -let NODE_RETURN_STATEMENT_DATA = struct { - expression: *Node, -}; - -let NODE_CAST_STATEMENT_DATA = struct { - typ: *Node, - expression: *Node, -}; - -let NODE_SIZEOF_STATEMENT_DATA = struct { - typ: *Node, -}; - - -let parser = struct { - tokens: *token, - tokens_len: i64, - - offset: i64, - - arena: *arena, -}; - -extern parser_parse_statement = (*parser) => *Node; -extern parser_parse_expression = (*parser) => *Node; - -let parser_init = (ts: *token, ts_len: i64, ar: *arena) => *parser { - let p = cast(*parser, arena_alloc(ar, sizeof(parser))); - - (*p).tokens = ts; - (*p).tokens_len = ts_len; - (*p).offset = 0; - (*p).arena = ar; - - return p; -}; - -let create_node = (p: *parser, n: Node) => *Node { - let res = cast(*Node, arena_alloc((*p).arena, sizeof(Node))); - *res = n; - return res; -}; - -let parser_peek_token = (p: *parser) => *token { - if (*p).offset >= (*p).tokens_len { - return cast(*token, null); - }; - - return ((*p).tokens + cast(*token, (*p).offset)); -}; - - let parser_consume_token = (p: *parser) => *token { - if (*p).offset >= (*p).tokens_len { - return cast(*token, null); - }; - - let t = ((*p).tokens + cast(*token, (*p).offset)); - (*p).offset = (*p).offset + 1; - return t; -}; - -let parser_accept_token = (p: *parser, t: i64) => *token { - let curr_token = parser_peek_token(p); - if curr_token == cast(*token, null) { - return cast(*token, null); - }; - - if (*curr_token).type == t { - return parser_consume_token(p); - }; - return cast(*token, null); -}; - -let parser_accept_parse = (p: *parser, parsing_func: (*parser) => *Node) => *Node { - let prev_offset = (*p).offset; - let node = parsing_func(p); - if node == cast(*Node, null) { - (*p).offset = prev_offset; - }; - return node; -}; - -/* ReturnStatement ::= RETURN (Expression)? */ -let parser_parse_return_statement = (p: *parser) => *Node { - if parser_accept_token(p, TOKEN_RETURN) == cast(*token, null) { - return cast(*Node, null); - }; - - let maybe_expr = parser_accept_parse(p, parser_parse_expression); - - let d = cast(*NODE_RETURN_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_RETURN_STATEMENT_DATA ))); - (*d).expression = maybe_expr; - - let r = Node{}; - r.type = NODE_RETURN_STATEMENT; - r.data = cast(*void, d); - - return create_node(p, r); -}; - -/* Type ::= IDENTIFIER | FunctionType */ -let parser_parse_type = (p: *parser) => *Node { - /* TODO: Function type */ - let to = parser_consume_token(p); - assert(to != cast(*token, null)); - assert((*to).type == TOKEN_IDENTIFIER); - - let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*p).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); - (*d).name = cast(*i8, (*to).data); - (*d).underlying_type = cast(*Node, null); - - let r = Node{}; - r.type = NODE_TYPE_SIMPLE_TYPE; - r.data = cast(*void, d); - - return create_node(p, r); -}; - -/* FunctionParameters ::= IDENTIFIER ":" Type ("," IDENTIFIER ":" Type)* */ -let parser_parse_function_parameters = (p: *parser) => *slice { - /* TODO: Params */ - - let node_list = cast(**Node, arena_alloc((*p).arena, sizeof(**Node) * 20)); - let i = 0; - while true { - if i != 0 { - parser_accept_token(p, TOKEN_COMMA); - }; - let ident = parser_accept_token(p, TOKEN_IDENTIFIER); - if ident == cast(*token, null) { - break; - }; - /* TODO: Rest */ - }; - - let s = cast(*slice, arena_alloc((*p).arena, sizeof(slice))); - (*s).data = cast(*void, node_list); - (*s).data_len = 0; - return s; -}; - -/* FunctionDefinition ::= LPAREN FunctionParameters? RPAREN ARROW IDENTIFIER LBRACE Statement* ReturnStatement SEMICOLON RBRACE */ -let parser_parse_function_definition = (p: *parser) => *Node { - if parser_accept_token(p, TOKEN_LPAREN) == cast(*token, null) { - return cast(*Node, null); - }; - let params = parser_parse_function_parameters(p); - if params == cast(*slice, null) { - return cast(*Node, null); - }; - if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) { - return cast(*Node, null); - }; - if parser_accept_token(p, TOKEN_ARROW) == cast(*token, null) { - return cast(*Node, null); - }; - let retur_type = parser_parse_type(p); - if retur_type == cast(*Node, null) { - return cast(*Node, null); - }; - if parser_accept_token(p, TOKEN_LBRACE) == cast(*token, null) { - return cast(*Node, null); - }; - - /* TODO: Body */ - let statements = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 100)); - let i = 0; - while true { - let n = parser_accept_parse(p, parser_parse_statement); - if n == cast(*Node, null) { - break; - }; - (*(statements + cast(**Node, i))) = n; - i = i + 1; - }; - - - if parser_accept_token(p, TOKEN_RBRACE) == cast(*token, null) { - return cast(*Node, null); - }; - - - let d = cast(*NODE_FUNCTION_DEFINITION_DATA, arena_alloc((*p).arena, sizeof(NODE_FUNCTION_DEFINITION_DATA))); - (*d).statements = statements; - (*d).statements_len = i; - (*d).parameters = cast(**Node, params.data); - (*d).parameters_len = params.data_len; - (*d).retur_type = cast(*Node, null); - - let n = Node{}; - n.type = NODE_FUNCTION_DEFINITION; - n.data = cast(*void, d); - - return create_node(p, n); -}; - -/* PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | FunctionDefinition | StructDefinition | StructInstantiation | FieldAccess | LPAREN Expression RPAREN */ -let parser_parse_primary_expression = (p: *parser) => *Node { - let stmt = parser_accept_parse(p, parser_parse_function_definition); - if stmt != cast(*Node, null) { - return stmt; - }; - - let tok = parser_consume_token(p); - if tok == cast(*token, null) { - printf("NO TOK\n"); - return cast(*Node, null); - }; - - if (*tok).type == TOKEN_IDENTIFIER { - let d = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA))); - (*d).name = cast(*i8, (*tok).data); - (*d).type = cast(*Node, null); /* TODO */ - let n = Node{}; - n.type = NODE_PRIMARY_EXPRESSION_IDENTIFIER; - n.data = cast(*void, d); - return create_node(p, n); - }; - - if (*tok).type == TOKEN_NUMBER { - let d = cast(*NODE_PRIMARY_EXPRESSION_NUMBER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_NUMBER_DATA))); - (*d).value = *(cast(*i64, (*tok).data)); - let n = Node{}; - n.type = NODE_PRIMARY_EXPRESSION_NUMBER; - n.data = cast(*void, d); - return create_node(p, n); - }; - - printf("DIFF TYPE: %d\n", (*tok).type); - - return cast(*Node, null); -}; - -/* EqualityExpression ::= AdditiveExpression ("==" | "!=" | "<=" | ">=" | "<" | ">") AdditiveExpression */ -let parser_parse_equality_expression = (p: *parser) => *Node { - /* TODO */ - return cast(*Node, null); -}; - -/* AdditiveExpression ::= MultiplicativeExpression (("+" | "-") MultiplicativeExpression)* */ -let parser_parse_additive_expression = (p: *parser) => *Node { - /* TODO */ - return parser_parse_primary_expression(p); -}; - -/* Expression ::= EqualityExpression | AdditiveExpression */ -let parser_parse_expression = (p: *parser) => *Node { - let ex = parser_accept_parse(p, parser_parse_equality_expression); - if ex != cast(*Node, null) { - return ex; - }; - let ax = parser_accept_parse(p, parser_parse_additive_expression); - if ax != cast(*Node, null) { - return ax; - }; - - return cast(*Node, null); -}; - -/* AssignmentStatement ::= ("let")? ("*")? Expression EQUALS Expression */ -let parse_assignment_statement = (p: *parser) => *Node { - let is_declaration = false; - if parser_accept_token(p, TOKEN_LET) != cast(*token, null) { - printf("IS DECLARATION\n"); - is_declaration = true; - }; - - /* TODO: is_dereference */ - - let lhs = parser_parse_expression(p); /* TODO */ - if lhs == cast(*Node, null) { - printf("ANOTHER BNLL\n"); - return cast(*Node, null); - }; - - if parser_accept_token(p, TOKEN_EQUALS) == cast(*token, null) { - return cast(*Node, null); - }; - - let rhs = parser_parse_expression(p); /* TODO */ - if rhs == cast(*Node, null) { - printf("NUL EXP\n"); - return cast(*Node, null); - }; - - let d = cast(*NODE_ASSIGNMENT_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_ASSIGNMENT_STATEMENT_DATA ))); - (*d).is_declaration = is_declaration; - (*d).is_dereference = false; - (*d).lhs = lhs; - (*d).rhs = rhs; - let n = Node{}; - n.type = NODE_ASSIGNMENT_STATEMENT; - n.data = cast(*void, d); - printf("CONTINUE\n"); - return create_node(p, n); -}; - -/* Statement ::= (AssignmentStatement | ImportDeclaration | ExternDeclaration | CastStatement | SizeOfStatement | FunctionCallStatement | IfStatement | WhileStatement | ReturnStatement | "break" | "continue") SEMICOLON */ -let parser_parse_statement = (p: *parser) => *Node { - let assignment = parser_accept_parse(p, parse_assignment_statement); - if assignment != cast(*Node, null) { - if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { - return cast(*Node, null); - }; - return assignment; - }; - - let retu = parser_accept_parse(p, parser_parse_return_statement); - if retu != cast(*Node, null) { - if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { - return cast(*Node, null); - }; - return retu; - }; - - - printf("None\n"); - - return cast(*Node, null); -}; - -/* Program ::= Statement+ */ -let parse_program = (p: *parser) => *Node { - let nodes = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 1000)); - - let i = 0; - while (*p).offset < (*p).tokens_len { - let s = parser_parse_statement(p); - assert(s != cast(*Node, null)); - (*(nodes + cast(**Node, i))) = s; - i = i + 1; - }; - - let d = cast(*NODE_PROGRAM_DATA, arena_alloc((*p).arena, sizeof(NODE_PROGRAM_DATA))); - (*d).statements = nodes; - (*d).statements_len = i; - let n = Node{}; - n.type = NODE_PROGRAM; - n.data = cast(*void, d); - return create_node(p, n); -}; - -let parse = (p: *parser) => *Node { - return parse_program(p); -}; - -/* - -For example -2: - -* parsing assignment statement -* parsing ident and num literals - -*/ diff --git a/src/bootstrap/tokenizer.pry b/src/bootstrap/tokenizer.pry new file mode 100644 index 0000000..a2cafb4 --- /dev/null +++ b/src/bootstrap/tokenizer.pry @@ -0,0 +1,553 @@ +extern strlen = (*i8) => i64; +extern memcpy = (*void, *void, i64) => void; +extern sprintf = (*i8, *i8, varargs) => void; +extern atoi = (*i8) => i64; + +import "!stdlib.pry"; +import "!mem.pry"; + +/* Keywords */ +let TOKEN_IMPORT = 1; +let TOKEN_LET = 2; +let TOKEN_EXTERN = 3; +let TOKEN_IF = 4; +let TOKEN_WHILE = 5; +let TOKEN_RETURN = 6; +let TOKEN_BREAK = 7; +let TOKEN_CONTINUE = 8; +let TOKEN_ARROW = 9; +let TOKEN_STRUCT = 10; +let TOKEN_TYPE = 34; + +/* Identifiers */ +let TOKEN_IDENTIFIER = 11; + +/* Literals */ +let TOKEN_NUMBER = 12; +let TOKEN_BOOLEAN = 13; +let TOKEN_NULL = 14; +let TOKEN_CHAR = 15; +let TOKEN_STRING = 16; + +/* Operators */ +let TOKEN_EQUALS = 17; +let TOKEN_PLUS = 18; +let TOKEN_MINUS = 19; +let TOKEN_MUL = 20; +let TOKEN_DIV = 21; +let TOKEN_MOD = 22; +let TOKEN_BANG = 23; +let TOKEN_LESS = 24; +let TOKEN_GREATER = 25; +let TOKEN_DOT = 26; + +/* Punctuation */ +let TOKEN_SEMICOLON = 27; +let TOKEN_COMMA = 28; +let TOKEN_COLON = 29; +let TOKEN_LPAREN = 30; +let TOKEN_RPAREN = 31; +let TOKEN_LBRACE = 32; +let TOKEN_RBRACE = 33; + +let token = struct { + type: i64, + data: *void, +}; + +let tokenizer = struct { + buf: *i8, + buf_len: i64, + offset: i64, + + arena: *arena, +}; + +let print_tokens = (ts: *token, ts_len: i64) => i64 { + let i = 0; + while i < ts_len { + let to = (*(ts + cast(*token, i))); + + if (to.type == TOKEN_IMPORT) { + printf("Import\n"); + }; + if (to.type == TOKEN_LET) { + printf("Let\n"); + }; + if (to.type == TOKEN_EXTERN) { + printf("Extern\n"); + }; + if (to.type == TOKEN_IF) { + printf("If\n"); + }; + if (to.type == TOKEN_WHILE) { + printf("While\n"); + }; + if (to.type == TOKEN_RETURN) { + printf("Return\n"); + }; + if (to.type == TOKEN_BREAK) { + printf("Break\n"); + }; + if (to.type == TOKEN_CONTINUE) { + printf("Continue\n"); + }; + if (to.type == TOKEN_ARROW) { + printf("Arrow\n"); + }; + if (to.type == TOKEN_STRUCT) { + printf("Struct\n"); + }; + if (to.type == TOKEN_TYPE) { + printf("Type\n"); + }; + if (to.type == TOKEN_IDENTIFIER) { + printf("Identifier: %s\n", cast(*i8, to.data)); + }; + if (to.type == TOKEN_NUMBER) { + printf("Number: %d\n", *cast(*i64, to.data)); + }; + if (to.type == TOKEN_BOOLEAN) { + printf("Boolean: %d\n", *cast(*bool, to.data)); + }; + if (to.type == TOKEN_NULL) { + printf("Null\n"); + }; + if (to.type == TOKEN_CHAR) { + printf("Char: %c\n", *cast(*i8, to.data)); + }; + if (to.type == TOKEN_STRING) { + printf("String: %s\n", cast(*i8, to.data)); + }; + if (to.type == TOKEN_EQUALS) { + printf("Equals\n"); + }; + if (to.type == TOKEN_PLUS) { + printf("Plus\n"); + }; + if (to.type == TOKEN_MINUS) { + printf("Minus\n"); + }; + if (to.type == TOKEN_MUL) { + printf("Mul\n"); + }; + if (to.type == TOKEN_DIV) { + printf("Div\n"); + }; + if (to.type == TOKEN_MOD) { + printf("Mod\n"); + }; + if (to.type == TOKEN_BANG) { + printf("Bang\n"); + }; + if (to.type == TOKEN_LESS) { + printf("Less\n"); + }; + if (to.type == TOKEN_GREATER) { + printf("Greater\n"); + }; + if (to.type == TOKEN_DOT) { + printf("Dot\n"); + }; + if (to.type == TOKEN_SEMICOLON) { + printf("Semicolon\n"); + }; + if (to.type == TOKEN_COMMA) { + printf("Comma\n"); + }; + if (to.type == TOKEN_COLON) { + printf("Colon\n"); + }; + if (to.type == TOKEN_LPAREN) { + printf("LParen\n"); + }; + if (to.type == TOKEN_RPAREN) { + printf("RParen\n"); + }; + if (to.type == TOKEN_LBRACE) { + printf("LBrace\n"); + }; + if (to.type == TOKEN_RBRACE) { + printf("RBrace\n"); + }; + + i = i + 1; + }; + + return 0; +}; + +let tokenizer_skip_whitespace = (t: *tokenizer) => void { + while true { + if (*t).offset >= (*t).buf_len { return; }; + let c = (*((*t).buf + cast(*i8, (*t).offset))); + if !iswhitespace(c) { + return; + }; + (*t).offset = (*t).offset + 1; + }; + + return; +}; + +let tokenizer_accept_string = (t: *tokenizer, str: *i8) => bool { + let str_len = strlen(str); + if (*t).offset + str_len > (*t).buf_len { return false; }; + + let s = cast(*i8, arena_alloc((*t).arena, 1000)); + memcpy(cast(*void, s), cast(*void, (*t).buf + cast(*i8, (*t).offset)), str_len); + + if strcmp(s, str) { + (*t).offset = (*t).offset + str_len; + return true; + }; + + return false; +}; + +let tokenizer_consume_until_condition = (t: *tokenizer, condition: (i8) => bool) => *i8 { + let start = (*t).offset; + let res = cast(*i8, arena_alloc((*t).arena, 1000)); + + while true { + if (*t).offset >= (*t).buf_len { + return res; + }; + + let c = (*((*t).buf + cast(*i8, (*t).offset))); + + let offset = (*t).offset; + if c == '\\' { + let next_c = (*((*t).buf + cast(*i8, offset + 1))); + + let any = false; + if next_c == 'n' { + (*(res + cast(*i8, offset - start))) = '\n'; + any = true; + }; + if next_c == 't' { + (*(res + cast(*i8, offset - start))) = '\t'; + any = true; + }; + if next_c == 'r' { + (*(res + cast(*i8, offset - start))) = '\r'; + any = true; + }; + if next_c == '0' { + (*(res + cast(*i8, offset - start))) = '\0'; + any = true; + }; + if next_c == '\\' { + (*(res + cast(*i8, offset - start))) = '\\'; + any = true; + }; + if !any { + (*(res + cast(*i8, offset - start))) = next_c; + }; + + offset = offset + 1; + offset = offset + 1; + (*t).offset = offset; + + continue; + }; + + if condition(c) { + return res; + }; + + (*(res + cast(*i8, offset - start))) = c; + (*(res + cast(*i8, offset - start + 1))) = '\0'; + + offset = offset + 1; + (*t).offset = offset; + }; + + return cast(*i8, null); +}; + +let tokenizer_accept_int_type = (t: *tokenizer) => *i64 { + let string = tokenizer_consume_until_condition(t, (c: i8) => bool { + return !isdigit(c); + }); + if string == cast(*i8, null) { + return cast(*i64, null); + }; + if strlen(string) == 0 { + return cast(*i64, null); + }; + let x = cast(*i64, arena_alloc((*t).arena, sizeof(i64))); + *x = atoi(string); + return x; +}; + +let tokenizer_accept_char_type = (t: *tokenizer) => *i8 { + let prev_offset = (*t).offset; + if !tokenizer_accept_string(t, "'") { + (*t).offset = prev_offset; + return cast(*i8, null); + }; + + let string = tokenizer_consume_until_condition(t, (c: i8) => bool { + return c == '\''; + }); + + if !tokenizer_accept_string(t, "'") { + (*t).offset = prev_offset; + return cast(*i8, null); + }; + + return string; +}; + +let tokenizer_accept_string_type = (t: *tokenizer) => *i8 { + let prev_offset = (*t).offset; + if !tokenizer_accept_string(t, "\"") { + (*t).offset = prev_offset; + return cast(*i8, null); + }; + + let string = tokenizer_consume_until_condition(t, (c: i8) => bool { + return c == '"'; + }); + + if !tokenizer_accept_string(t, "\"") { + (*t).offset = prev_offset; + return cast(*i8, null); + }; + + return string; +}; + +let tokenizer_skip_comments = (t: *tokenizer) => void { + if !tokenizer_accept_string(t, "/*") { return; }; + + while !tokenizer_accept_string(t, "*/") { + (*t).offset = (*t).offset + 1; + }; + + return; +}; + +let tokenizer_next = (t: *tokenizer) => *token { + tokenizer_skip_whitespace(t); + tokenizer_skip_comments(t); + tokenizer_skip_whitespace(t); + + if (*t).offset >= (*t).buf_len { + return cast(*token, null); + }; + + let to = cast(*token, arena_alloc((*t).arena, sizeof(token))); + + if tokenizer_accept_string(t, "import") { + (*to).type = TOKEN_IMPORT; + return to; + }; + if tokenizer_accept_string(t, "let") { + (*to).type = TOKEN_LET; + return to; + }; + if tokenizer_accept_string(t, "extern") { + (*to).type = TOKEN_EXTERN; + return to; + }; + if tokenizer_accept_string(t, "if") { + (*to).type = TOKEN_IF; + return to; + }; + if tokenizer_accept_string(t, "while") { + (*to).type = TOKEN_WHILE; + return to; + }; + if tokenizer_accept_string(t, "return") { + (*to).type = TOKEN_RETURN; + return to; + }; + if tokenizer_accept_string(t, "break") { + (*to).type = TOKEN_BREAK; + return to; + }; + if tokenizer_accept_string(t, "continue") { + (*to).type = TOKEN_CONTINUE; + return to; + }; + if tokenizer_accept_string(t, "true") { + (*to).type = TOKEN_BOOLEAN; + let data = cast(*bool, arena_alloc((*t).arena, sizeof(bool))); + *data = true; + (*to).data = cast(*void, data); + return to; + }; + if tokenizer_accept_string(t, "false") { + (*to).type = TOKEN_BOOLEAN; + let data = cast(*bool, arena_alloc((*t).arena, sizeof(bool))); + *data = false; + (*to).data = cast(*void, data); + return to; + }; + if tokenizer_accept_string(t, "null") { + (*to).type = TOKEN_NULL; + return to; + }; + if tokenizer_accept_string(t, "struct") { + (*to).type = TOKEN_STRUCT; + return to; + }; + if tokenizer_accept_string(t, "newtype") { + (*to).type = TOKEN_TYPE; + return to; + }; + + if tokenizer_accept_string(t, "=>") { + (*to).type = TOKEN_ARROW; + return to; + }; + if tokenizer_accept_string(t, ";") { + (*to).type = TOKEN_SEMICOLON; + return to; + }; + if tokenizer_accept_string(t, ",") { + (*to).type = TOKEN_COMMA; + return to; + }; + if tokenizer_accept_string(t, ":") { + (*to).type = TOKEN_COLON; + return to; + }; + if tokenizer_accept_string(t, "(") { + (*to).type = TOKEN_LPAREN; + return to; + }; + if tokenizer_accept_string(t, ")") { + (*to).type = TOKEN_RPAREN; + return to; + }; + if tokenizer_accept_string(t, "{") { + (*to).type = TOKEN_LBRACE; + return to; + }; + if tokenizer_accept_string(t, "}") { + (*to).type = TOKEN_RBRACE; + return to; + }; + if tokenizer_accept_string(t, "=") { + (*to).type = TOKEN_EQUALS; + return to; + }; + if tokenizer_accept_string(t, "+") { + (*to).type = TOKEN_PLUS; + return to; + }; + if tokenizer_accept_string(t, "-") { + (*to).type = TOKEN_MINUS; + return to; + }; + if tokenizer_accept_string(t, "*") { + (*to).type = TOKEN_MUL; + return to; + }; + if tokenizer_accept_string(t, "/") { + (*to).type = TOKEN_DIV; + return to; + }; + if tokenizer_accept_string(t, "%") { + (*to).type = TOKEN_MOD; + return to; + }; + if tokenizer_accept_string(t, "!") { + (*to).type = TOKEN_BANG; + return to; + }; + if tokenizer_accept_string(t, "<") { + (*to).type = TOKEN_LESS; + return to; + }; + if tokenizer_accept_string(t, ">") { + (*to).type = TOKEN_GREATER; + return to; + }; + if tokenizer_accept_string(t, ".") { + (*to).type = TOKEN_DOT; + return to; + }; + + let maybe_int = tokenizer_accept_int_type(t); + if maybe_int != cast(*i64, null) { + (*to).type = TOKEN_NUMBER; + (*to).data = cast(*void, maybe_int); + return to; + }; + + let maybe_char = tokenizer_accept_char_type(t); + if maybe_char != cast(*i8, null) { + (*to).type = TOKEN_CHAR; + (*to).data = cast(*void, maybe_char); + return to; + }; + + let maybe_string = tokenizer_accept_string_type(t); + if maybe_string != cast(*i8, null) { + (*to).type = TOKEN_STRING; + (*to).data = cast(*void, maybe_string); + return to; + }; + + let string = tokenizer_consume_until_condition(t, (c: i8) => bool { + if isalphanum(c) { + return false; + }; + if c == '_' { + return false; + }; + return true; + }); + if strlen(string) == 0 { + printf("NO IDENT!\n"); + return cast(*token, null); + }; + + (*to).type = TOKEN_IDENTIFIER; + (*to).data = cast(*void, string); + + return to; +}; + +let tokenizer_init = (alloc: *arena, file: slice) => *tokenizer { + let t = cast(*tokenizer, arena_alloc(alloc, sizeof(tokenizer))); + (*t).arena = alloc; + (*t).offset = 0; + (*t).buf = cast(*i8, file.data); + (*t).buf_len = file.data_len; + + printf("File size: %d\n", (*t).buf_len); + + printf("%s\n", (*t).buf); + + return t; +}; + +let tokenizer_tokenize = (t: *tokenizer) => slice { + let tokens = cast(*token, arena_alloc((*t).arena, sizeof(token) * 1000)); /* why does it not care about type here */ + let tokens_len = 0; + + while true { + let tk = tokenizer_next(t); + if tk == cast(*token, null) { + break; + }; + printf("Add token: %d\n", (*tk).type); + + (*(tokens + cast(*token, tokens_len))) = *tk; + tokens_len = tokens_len + 1; + }; + + printf("PRINT TOKENS: %d\n", tokens_len); + + print_tokens(tokens, tokens_len); + + let res = slice{}; + res.data = cast(*void, tokens); + res.data_len = tokens_len; + return res; +}; diff --git a/src/bootstrap/tokenizer.src b/src/bootstrap/tokenizer.src deleted file mode 100644 index 8d7f997..0000000 --- a/src/bootstrap/tokenizer.src +++ /dev/null @@ -1,553 +0,0 @@ -extern strlen = (*i8) => i64; -extern memcpy = (*void, *void, i64) => void; -extern sprintf = (*i8, *i8, varargs) => void; -extern atoi = (*i8) => i64; - -import "!stdlib.src"; -import "!mem.src"; - -/* Keywords */ -let TOKEN_IMPORT = 1; -let TOKEN_LET = 2; -let TOKEN_EXTERN = 3; -let TOKEN_IF = 4; -let TOKEN_WHILE = 5; -let TOKEN_RETURN = 6; -let TOKEN_BREAK = 7; -let TOKEN_CONTINUE = 8; -let TOKEN_ARROW = 9; -let TOKEN_STRUCT = 10; -let TOKEN_TYPE = 34; - -/* Identifiers */ -let TOKEN_IDENTIFIER = 11; - -/* Literals */ -let TOKEN_NUMBER = 12; -let TOKEN_BOOLEAN = 13; -let TOKEN_NULL = 14; -let TOKEN_CHAR = 15; -let TOKEN_STRING = 16; - -/* Operators */ -let TOKEN_EQUALS = 17; -let TOKEN_PLUS = 18; -let TOKEN_MINUS = 19; -let TOKEN_MUL = 20; -let TOKEN_DIV = 21; -let TOKEN_MOD = 22; -let TOKEN_BANG = 23; -let TOKEN_LESS = 24; -let TOKEN_GREATER = 25; -let TOKEN_DOT = 26; - -/* Punctuation */ -let TOKEN_SEMICOLON = 27; -let TOKEN_COMMA = 28; -let TOKEN_COLON = 29; -let TOKEN_LPAREN = 30; -let TOKEN_RPAREN = 31; -let TOKEN_LBRACE = 32; -let TOKEN_RBRACE = 33; - -let token = struct { - type: i64, - data: *void, -}; - -let tokenizer = struct { - buf: *i8, - buf_len: i64, - offset: i64, - - arena: *arena, -}; - -let print_tokens = (ts: *token, ts_len: i64) => i64 { - let i = 0; - while i < ts_len { - let to = (*(ts + cast(*token, i))); - - if (to.type == TOKEN_IMPORT) { - printf("Import\n"); - }; - if (to.type == TOKEN_LET) { - printf("Let\n"); - }; - if (to.type == TOKEN_EXTERN) { - printf("Extern\n"); - }; - if (to.type == TOKEN_IF) { - printf("If\n"); - }; - if (to.type == TOKEN_WHILE) { - printf("While\n"); - }; - if (to.type == TOKEN_RETURN) { - printf("Return\n"); - }; - if (to.type == TOKEN_BREAK) { - printf("Break\n"); - }; - if (to.type == TOKEN_CONTINUE) { - printf("Continue\n"); - }; - if (to.type == TOKEN_ARROW) { - printf("Arrow\n"); - }; - if (to.type == TOKEN_STRUCT) { - printf("Struct\n"); - }; - if (to.type == TOKEN_TYPE) { - printf("Type\n"); - }; - if (to.type == TOKEN_IDENTIFIER) { - printf("Identifier: %s\n", cast(*i8, to.data)); - }; - if (to.type == TOKEN_NUMBER) { - printf("Number: %d\n", *cast(*i64, to.data)); - }; - if (to.type == TOKEN_BOOLEAN) { - printf("Boolean: %d\n", *cast(*bool, to.data)); - }; - if (to.type == TOKEN_NULL) { - printf("Null\n"); - }; - if (to.type == TOKEN_CHAR) { - printf("Char: %c\n", *cast(*i8, to.data)); - }; - if (to.type == TOKEN_STRING) { - printf("String: %s\n", cast(*i8, to.data)); - }; - if (to.type == TOKEN_EQUALS) { - printf("Equals\n"); - }; - if (to.type == TOKEN_PLUS) { - printf("Plus\n"); - }; - if (to.type == TOKEN_MINUS) { - printf("Minus\n"); - }; - if (to.type == TOKEN_MUL) { - printf("Mul\n"); - }; - if (to.type == TOKEN_DIV) { - printf("Div\n"); - }; - if (to.type == TOKEN_MOD) { - printf("Mod\n"); - }; - if (to.type == TOKEN_BANG) { - printf("Bang\n"); - }; - if (to.type == TOKEN_LESS) { - printf("Less\n"); - }; - if (to.type == TOKEN_GREATER) { - printf("Greater\n"); - }; - if (to.type == TOKEN_DOT) { - printf("Dot\n"); - }; - if (to.type == TOKEN_SEMICOLON) { - printf("Semicolon\n"); - }; - if (to.type == TOKEN_COMMA) { - printf("Comma\n"); - }; - if (to.type == TOKEN_COLON) { - printf("Colon\n"); - }; - if (to.type == TOKEN_LPAREN) { - printf("LParen\n"); - }; - if (to.type == TOKEN_RPAREN) { - printf("RParen\n"); - }; - if (to.type == TOKEN_LBRACE) { - printf("LBrace\n"); - }; - if (to.type == TOKEN_RBRACE) { - printf("RBrace\n"); - }; - - i = i + 1; - }; - - return 0; -}; - -let tokenizer_skip_whitespace = (t: *tokenizer) => void { - while true { - if (*t).offset >= (*t).buf_len { return; }; - let c = (*((*t).buf + cast(*i8, (*t).offset))); - if !iswhitespace(c) { - return; - }; - (*t).offset = (*t).offset + 1; - }; - - return; -}; - -let tokenizer_accept_string = (t: *tokenizer, str: *i8) => bool { - let str_len = strlen(str); - if (*t).offset + str_len > (*t).buf_len { return false; }; - - let s = cast(*i8, arena_alloc((*t).arena, 1000)); - memcpy(cast(*void, s), cast(*void, (*t).buf + cast(*i8, (*t).offset)), str_len); - - if strcmp(s, str) { - (*t).offset = (*t).offset + str_len; - return true; - }; - - return false; -}; - -let tokenizer_consume_until_condition = (t: *tokenizer, condition: (i8) => bool) => *i8 { - let start = (*t).offset; - let res = cast(*i8, arena_alloc((*t).arena, 1000)); - - while true { - if (*t).offset >= (*t).buf_len { - return res; - }; - - let c = (*((*t).buf + cast(*i8, (*t).offset))); - - let offset = (*t).offset; - if c == '\\' { - let next_c = (*((*t).buf + cast(*i8, offset + 1))); - - let any = false; - if next_c == 'n' { - (*(res + cast(*i8, offset - start))) = '\n'; - any = true; - }; - if next_c == 't' { - (*(res + cast(*i8, offset - start))) = '\t'; - any = true; - }; - if next_c == 'r' { - (*(res + cast(*i8, offset - start))) = '\r'; - any = true; - }; - if next_c == '0' { - (*(res + cast(*i8, offset - start))) = '\0'; - any = true; - }; - if next_c == '\\' { - (*(res + cast(*i8, offset - start))) = '\\'; - any = true; - }; - if !any { - (*(res + cast(*i8, offset - start))) = next_c; - }; - - offset = offset + 1; - offset = offset + 1; - (*t).offset = offset; - - continue; - }; - - if condition(c) { - return res; - }; - - (*(res + cast(*i8, offset - start))) = c; - (*(res + cast(*i8, offset - start + 1))) = '\0'; - - offset = offset + 1; - (*t).offset = offset; - }; - - return cast(*i8, null); -}; - -let tokenizer_accept_int_type = (t: *tokenizer) => *i64 { - let string = tokenizer_consume_until_condition(t, (c: i8) => bool { - return !isdigit(c); - }); - if string == cast(*i8, null) { - return cast(*i64, null); - }; - if strlen(string) == 0 { - return cast(*i64, null); - }; - let x = cast(*i64, arena_alloc((*t).arena, sizeof(i64))); - *x = atoi(string); - return x; -}; - -let tokenizer_accept_char_type = (t: *tokenizer) => *i8 { - let prev_offset = (*t).offset; - if !tokenizer_accept_string(t, "'") { - (*t).offset = prev_offset; - return cast(*i8, null); - }; - - let string = tokenizer_consume_until_condition(t, (c: i8) => bool { - return c == '\''; - }); - - if !tokenizer_accept_string(t, "'") { - (*t).offset = prev_offset; - return cast(*i8, null); - }; - - return string; -}; - -let tokenizer_accept_string_type = (t: *tokenizer) => *i8 { - let prev_offset = (*t).offset; - if !tokenizer_accept_string(t, "\"") { - (*t).offset = prev_offset; - return cast(*i8, null); - }; - - let string = tokenizer_consume_until_condition(t, (c: i8) => bool { - return c == '"'; - }); - - if !tokenizer_accept_string(t, "\"") { - (*t).offset = prev_offset; - return cast(*i8, null); - }; - - return string; -}; - -let tokenizer_skip_comments = (t: *tokenizer) => void { - if !tokenizer_accept_string(t, "/*") { return; }; - - while !tokenizer_accept_string(t, "*/") { - (*t).offset = (*t).offset + 1; - }; - - return; -}; - -let tokenizer_next = (t: *tokenizer) => *token { - tokenizer_skip_whitespace(t); - tokenizer_skip_comments(t); - tokenizer_skip_whitespace(t); - - if (*t).offset >= (*t).buf_len { - return cast(*token, null); - }; - - let to = cast(*token, arena_alloc((*t).arena, sizeof(token))); - - if tokenizer_accept_string(t, "import") { - (*to).type = TOKEN_IMPORT; - return to; - }; - if tokenizer_accept_string(t, "let") { - (*to).type = TOKEN_LET; - return to; - }; - if tokenizer_accept_string(t, "extern") { - (*to).type = TOKEN_EXTERN; - return to; - }; - if tokenizer_accept_string(t, "if") { - (*to).type = TOKEN_IF; - return to; - }; - if tokenizer_accept_string(t, "while") { - (*to).type = TOKEN_WHILE; - return to; - }; - if tokenizer_accept_string(t, "return") { - (*to).type = TOKEN_RETURN; - return to; - }; - if tokenizer_accept_string(t, "break") { - (*to).type = TOKEN_BREAK; - return to; - }; - if tokenizer_accept_string(t, "continue") { - (*to).type = TOKEN_CONTINUE; - return to; - }; - if tokenizer_accept_string(t, "true") { - (*to).type = TOKEN_BOOLEAN; - let data = cast(*bool, arena_alloc((*t).arena, sizeof(bool))); - *data = true; - (*to).data = cast(*void, data); - return to; - }; - if tokenizer_accept_string(t, "false") { - (*to).type = TOKEN_BOOLEAN; - let data = cast(*bool, arena_alloc((*t).arena, sizeof(bool))); - *data = false; - (*to).data = cast(*void, data); - return to; - }; - if tokenizer_accept_string(t, "null") { - (*to).type = TOKEN_NULL; - return to; - }; - if tokenizer_accept_string(t, "struct") { - (*to).type = TOKEN_STRUCT; - return to; - }; - if tokenizer_accept_string(t, "newtype") { - (*to).type = TOKEN_TYPE; - return to; - }; - - if tokenizer_accept_string(t, "=>") { - (*to).type = TOKEN_ARROW; - return to; - }; - if tokenizer_accept_string(t, ";") { - (*to).type = TOKEN_SEMICOLON; - return to; - }; - if tokenizer_accept_string(t, ",") { - (*to).type = TOKEN_COMMA; - return to; - }; - if tokenizer_accept_string(t, ":") { - (*to).type = TOKEN_COLON; - return to; - }; - if tokenizer_accept_string(t, "(") { - (*to).type = TOKEN_LPAREN; - return to; - }; - if tokenizer_accept_string(t, ")") { - (*to).type = TOKEN_RPAREN; - return to; - }; - if tokenizer_accept_string(t, "{") { - (*to).type = TOKEN_LBRACE; - return to; - }; - if tokenizer_accept_string(t, "}") { - (*to).type = TOKEN_RBRACE; - return to; - }; - if tokenizer_accept_string(t, "=") { - (*to).type = TOKEN_EQUALS; - return to; - }; - if tokenizer_accept_string(t, "+") { - (*to).type = TOKEN_PLUS; - return to; - }; - if tokenizer_accept_string(t, "-") { - (*to).type = TOKEN_MINUS; - return to; - }; - if tokenizer_accept_string(t, "*") { - (*to).type = TOKEN_MUL; - return to; - }; - if tokenizer_accept_string(t, "/") { - (*to).type = TOKEN_DIV; - return to; - }; - if tokenizer_accept_string(t, "%") { - (*to).type = TOKEN_MOD; - return to; - }; - if tokenizer_accept_string(t, "!") { - (*to).type = TOKEN_BANG; - return to; - }; - if tokenizer_accept_string(t, "<") { - (*to).type = TOKEN_LESS; - return to; - }; - if tokenizer_accept_string(t, ">") { - (*to).type = TOKEN_GREATER; - return to; - }; - if tokenizer_accept_string(t, ".") { - (*to).type = TOKEN_DOT; - return to; - }; - - let maybe_int = tokenizer_accept_int_type(t); - if maybe_int != cast(*i64, null) { - (*to).type = TOKEN_NUMBER; - (*to).data = cast(*void, maybe_int); - return to; - }; - - let maybe_char = tokenizer_accept_char_type(t); - if maybe_char != cast(*i8, null) { - (*to).type = TOKEN_CHAR; - (*to).data = cast(*void, maybe_char); - return to; - }; - - let maybe_string = tokenizer_accept_string_type(t); - if maybe_string != cast(*i8, null) { - (*to).type = TOKEN_STRING; - (*to).data = cast(*void, maybe_string); - return to; - }; - - let string = tokenizer_consume_until_condition(t, (c: i8) => bool { - if isalphanum(c) { - return false; - }; - if c == '_' { - return false; - }; - return true; - }); - if strlen(string) == 0 { - printf("NO IDENT!\n"); - return cast(*token, null); - }; - - (*to).type = TOKEN_IDENTIFIER; - (*to).data = cast(*void, string); - - return to; -}; - -let tokenizer_init = (alloc: *arena, file: slice) => *tokenizer { - let t = cast(*tokenizer, arena_alloc(alloc, sizeof(tokenizer))); - (*t).arena = alloc; - (*t).offset = 0; - (*t).buf = cast(*i8, file.data); - (*t).buf_len = file.data_len; - - printf("File size: %d\n", (*t).buf_len); - - printf("%s\n", (*t).buf); - - return t; -}; - -let tokenizer_tokenize = (t: *tokenizer) => slice { - let tokens = cast(*token, arena_alloc((*t).arena, sizeof(token) * 1000)); /* why does it not care about type here */ - let tokens_len = 0; - - while true { - let tk = tokenizer_next(t); - if tk == cast(*token, null) { - break; - }; - printf("Add token: %d\n", (*tk).type); - - (*(tokens + cast(*token, tokens_len))) = *tk; - tokens_len = tokens_len + 1; - }; - - printf("PRINT TOKENS: %d\n", tokens_len); - - print_tokens(tokens, tokens_len); - - let res = slice{}; - res.data = cast(*void, tokens); - res.data_len = tokens_len; - return res; -}; -- cgit 1.4.1