diff options
| author | Baitinq <[email protected]> | 2025-07-15 17:34:39 +0200 |
|---|---|---|
| committer | Baitinq <[email protected]> | 2025-07-15 18:00:31 +0200 |
| commit | cc56ed42486c2636af50bae451825ad90cfd4b6c (patch) | |
| tree | 2307d6ced51f427405e4152b4ff1493e245a6b30 /src | |
| parent | Boostrap: Support generating LLVM IR file (diff) | |
| download | pry-lang-cc56ed42486c2636af50bae451825ad90cfd4b6c.tar.gz pry-lang-cc56ed42486c2636af50bae451825ad90cfd4b6c.tar.bz2 pry-lang-cc56ed42486c2636af50bae451825ad90cfd4b6c.zip | |
Finish bootstrapping :^)
Diffstat (limited to 'src')
| -rw-r--r-- | src/codegen.pry (renamed from src/bootstrap/codegen.pry) | 0 | ||||
| -rw-r--r-- | src/codegen.zig | 1101 | ||||
| -rw-r--r-- | src/llvm.pry (renamed from src/bootstrap/llvm.pry) | 0 | ||||
| -rw-r--r-- | src/main.pry (renamed from src/bootstrap/main.pry) | 0 | ||||
| -rw-r--r-- | src/main.zig | 49 | ||||
| -rw-r--r-- | src/parser.pry (renamed from src/bootstrap/parser.pry) | 0 | ||||
| -rw-r--r-- | src/parser.zig | 1055 | ||||
| -rw-r--r-- | src/tokenizer.pry (renamed from src/bootstrap/tokenizer.pry) | 0 | ||||
| -rw-r--r-- | src/tokenizer.zig | 327 |
9 files changed, 0 insertions, 2532 deletions
diff --git a/src/bootstrap/codegen.pry b/src/codegen.pry index cb054ec..cb054ec 100644 --- a/src/bootstrap/codegen.pry +++ b/src/codegen.pry diff --git a/src/codegen.zig b/src/codegen.zig deleted file mode 100644 index 2ece01e..0000000 --- a/src/codegen.zig +++ /dev/null @@ -1,1101 +0,0 @@ -const std = @import("std"); - -const llvm = @cImport({ - @cInclude("llvm-c/Core.h"); - @cInclude("llvm-c/TargetMachine.h"); - @cInclude("llvm-c/Types.h"); - @cInclude("llvm-c/Analysis.h"); - @cInclude("llvm-c/Target.h"); -}); - -const parser = @import("parser.zig"); - -pub const CodeGenError = error{ - CompilationError, - OutOfMemory, -}; - -pub const CodeGen = struct { - llvm_module: llvm.LLVMModuleRef, - llvm_target_data: llvm.LLVMTargetDataRef, - llvm_context: llvm.LLVMContextRef, - builder: llvm.LLVMBuilderRef, - environment: *Environment, - - arena: std.mem.Allocator, - - while_loop_exit: ?llvm.LLVMBasicBlockRef, - while_block: ?llvm.LLVMBasicBlockRef, - current_function: ?llvm.LLVMValueRef, - current_function_return_type: ?*parser.Node, - - pub fn init(arena: std.mem.Allocator) !*CodeGen { - // Initialize LLVM - llvm.LLVMInitializeAllTargetInfos(); - llvm.LLVMInitializeAllTargetMCs(); - llvm.LLVMInitializeAllTargets(); - llvm.LLVMInitializeAllAsmPrinters(); - llvm.LLVMInitializeAllAsmParsers(); - - const module: llvm.LLVMModuleRef = llvm.LLVMModuleCreateWithName("module"); - const context = llvm.LLVMGetGlobalContext(); - const builder = llvm.LLVMCreateBuilder(); - - const self = try arena.create(CodeGen); - self.* = .{ - .llvm_module = module, - .llvm_target_data = llvm.LLVMGetModuleDataLayout(module), - .llvm_context = context, - .builder = builder, - .environment = try Environment.init(arena), - - .arena = arena, - - .while_loop_exit = null, - .while_block = null, - .current_function = null, - .current_function_return_type = null, - }; - - return self; - } - - pub fn compile(self: *CodeGen) void { - // Dump module - llvm.LLVMDumpModule(self.llvm_module); - - // Generate code - const triple = llvm.LLVMGetDefaultTargetTriple(); - var target_ref: llvm.LLVMTargetRef = undefined; - var message: [*c]u8 = undefined; - var result = llvm.LLVMGetTargetFromTriple(triple, &target_ref, &message); - if (result != 0) { - std.debug.print("Target output: {s}.\n", .{message}); - llvm.LLVMDisposeMessage(message.?); - } - const target_machine = llvm.LLVMCreateTargetMachine( - target_ref, - triple, - "", - "", - llvm.LLVMCodeGenLevelDefault, - llvm.LLVMRelocDefault, - llvm.LLVMCodeModelDefault, - ); - - result = llvm.LLVMVerifyModule(self.llvm_module, llvm.LLVMAbortProcessAction, &message); - if (result != 0) { - std.debug.print("Verification output: {any}.\n", .{message}); - llvm.LLVMDisposeMessage(message); - } - - // Generate the object file - const filename = "output.o"; - _ = llvm.LLVMTargetMachineEmitToFile( - target_machine, - self.llvm_module, - filename, - llvm.LLVMObjectFile, - null, - ); - std.debug.print("Object file generated: {s}\n", .{filename}); - } - - pub fn deinit(self: *CodeGen) void { - defer llvm.LLVMDisposeBuilder(self.builder); - llvm.LLVMDisposeModule(self.llvm_module); - llvm.LLVMShutdown(); - } - - pub fn generate(self: *CodeGen, ast: *parser.Node) CodeGenError!void { - std.debug.assert(ast.* == parser.Node.PROGRAM); - - const program = ast.PROGRAM; - - for (program.statements) |stmt| { - _ = try self.generate_statement(stmt); - } - } - - fn generate_statement(self: *CodeGen, statement: *parser.Node) CodeGenError!void { - errdefer std.debug.print("Error generating statement\n", .{}); - std.debug.assert(statement.* == parser.Node.STATEMENT); - - switch (statement.STATEMENT.statement.*) { - .ASSIGNMENT_STATEMENT => |*assignment_statement| { - try self.generate_assignment_statement(@ptrCast(assignment_statement)); - }, - .FUNCTION_CALL_STATEMENT => |*function_call_statement| { - _ = try self.generate_function_call_statement(@ptrCast(function_call_statement)); - }, - .RETURN_STATEMENT => |*return_statement| return try self.generate_return_statement(@ptrCast(return_statement)), - .BREAK_STATEMENT => |*break_statement| return try self.generate_break_statement(@ptrCast(@alignCast(break_statement))), - .CONTINUE_STATEMENT => |*continue_statement| return try self.generate_continue_statement(@ptrCast(@alignCast(continue_statement))), - .IF_STATEMENT => |*if_statement| return try self.generate_if_statement(@ptrCast(if_statement)), - .WHILE_STATEMENT => |*while_statement| return try self.generate_while_statement(@ptrCast(while_statement)), - .IMPORT_DECLARATION => |*import_declaration| return try self.generate_import_declaration(@ptrCast(import_declaration)), - else => unreachable, - } - } - - fn generate_assignment_statement(self: *CodeGen, statement: *parser.Node) CodeGenError!void { - errdefer std.debug.print("Error generating assignment statement\n", .{}); - std.debug.assert(statement.* == parser.Node.ASSIGNMENT_STATEMENT); - const assignment_statement = statement.ASSIGNMENT_STATEMENT; - - switch (assignment_statement.lhs.*) { - .PRIMARY_EXPRESSION => { - const identifier = assignment_statement.lhs.PRIMARY_EXPRESSION.IDENTIFIER; - const variable = try self.generate_expression_value(assignment_statement.rhs, identifier.name); - - if (self.environment.scope_stack.items.len == 1) { - try self.environment.add_variable(identifier.name, try self.create_variable(.{ - .value = variable.value, - .type = variable.type, - .node = variable.node, - .node_type = variable.node_type, - .stack_level = null, - })); - return; - } - - var ptr: llvm.LLVMValueRef = undefined; - var typ = variable.node_type; - if (assignment_statement.is_declaration) { - var x = try self.get_llvm_type(variable.node_type); - if (variable.node_type.TYPE == .FUNCTION_TYPE) { - x = llvm.LLVMPointerType(x, 0); - } - ptr = llvm.LLVMBuildAlloca(self.builder, x, try std.fmt.allocPrintZ(self.arena, "{s}", .{identifier.name})); - } else { - ptr = self.environment.get_variable(identifier.name).?.value; - typ = self.environment.get_variable(identifier.name).?.node_type; - // TODO: Do this in more places! (everywhere get_llvm_type or get_variable?) Also check types in return and cmp - std.debug.print("TYP {s}: {any} vs {any} -- {any}\n", .{ identifier.name, typ.TYPE, variable.node_type.TYPE, variable.node }); - std.debug.assert(self.compare_types(typ, variable.node_type, assignment_statement.is_dereference)); - } - - if (assignment_statement.is_dereference) { - ptr = llvm.LLVMBuildLoad2(self.builder, try self.get_llvm_type(typ), ptr, ""); - } - - // NOTE: structs have a null variable.value - if (variable.value != null) { - _ = llvm.LLVMBuildStore(self.builder, variable.value, ptr); - } - - if (assignment_statement.is_dereference) { - ptr = self.environment.get_variable(identifier.name).?.value; - } - - const new_variable = try self.create_variable(.{ - .value = ptr, - .type = variable.type, - .node = variable.node, - .node_type = typ, - .stack_level = null, - }); - // Adding variable doesnt actually replace the variable of previous scope - if (assignment_statement.is_declaration) { - try self.environment.add_variable(identifier.name, new_variable); - } else { - try self.environment.set_variable(identifier.name, new_variable); - } - }, - .UNARY_EXPRESSION => { - const xd = assignment_statement.lhs.UNARY_EXPRESSION.expression; - const a = try self.generate_expression_value(xd, null); - const variable = try self.generate_expression_value(assignment_statement.rhs, null); - std.debug.assert(self.compare_types(a.node_type, variable.node_type, true)); - _ = llvm.LLVMBuildStore(self.builder, variable.value, a.value); - }, - .FIELD_ACCESS => |field_access| { - const xd = assignment_statement.lhs.FIELD_ACCESS.expression; - const name = field_access.name; - - const x = try self.get_struct_field(xd, name); - - const variable = try self.generate_expression_value(assignment_statement.rhs, null); - std.debug.print("7TYP {s}: {any} vs {any} -- {any}\n", .{ name, x.type, variable.node_type.TYPE, variable.node }); - std.debug.assert(self.compare_types(x.type, variable.node_type, assignment_statement.is_dereference)); - _ = llvm.LLVMBuildStore(self.builder, variable.value, x.value); - }, - else => unreachable, - } - } - - fn generate_function_call_statement(self: *CodeGen, statement: *parser.Node) CodeGenError!*Variable { - errdefer std.debug.print("Error generating function call statement\n", .{}); - std.debug.assert(statement.* == parser.Node.FUNCTION_CALL_STATEMENT); - const function_call_statement = statement.FUNCTION_CALL_STATEMENT; - - var node = statement; - - var function: *Variable = undefined; - switch (function_call_statement.expression.*) { - .PRIMARY_EXPRESSION => |primary_expression| { - std.debug.assert(primary_expression == .IDENTIFIER); - function = self.environment.get_variable(primary_expression.IDENTIFIER.name) orelse return CodeGenError.CompilationError; - if (llvm.LLVMGetValueKind(function.value) != llvm.LLVMFunctionValueKind) { - function.value = llvm.LLVMBuildLoad2(self.builder, llvm.LLVMPointerType(try self.get_llvm_type(function.node_type), 0), function.value, ""); - node = function.node; - } - }, - .FUNCTION_DEFINITION => |*function_definition| { - function = try self.generate_expression_value(@ptrCast(function_definition), null); - }, - else => unreachable, - } - - var arguments = std.ArrayList(llvm.LLVMValueRef).init(self.arena); - - for (0.., function_call_statement.arguments) |i, argument| { - const arg = try self.generate_expression_value(argument, null); - const expected_type = function.node_type.TYPE.FUNCTION_TYPE.parameters[i]; //TODO: If varargs we shouldnt do this - std.debug.print("2 TYP {s}: {any} vs {any}\n", .{ function_call_statement.expression.PRIMARY_EXPRESSION.IDENTIFIER.name, expected_type.TYPE, arg.node_type.TYPE }); - std.debug.assert(self.compare_types(expected_type, arg.node_type, false)); - try arguments.append(arg.value); - } - - const res = llvm.LLVMBuildCall2(self.builder, try self.get_llvm_type(function.node_type), function.value, @ptrCast(arguments.items), @intCast(arguments.items.len), "") orelse return CodeGenError.CompilationError; - - const get_function_return_type = struct { - fn call(iSelf: *CodeGen, fun: *parser.Node) *parser.Node { - switch (fun.*) { - .FUNCTION_DEFINITION => |x| { - return x.return_type; - }, - .PRIMARY_EXPRESSION => |x| { - const f = iSelf.environment.get_variable(x.IDENTIFIER.name).?.node_type; - std.debug.assert(f.TYPE == .FUNCTION_TYPE); - return call(iSelf, f); - }, - .TYPE => |x| { - return x.FUNCTION_TYPE.return_type; - }, - else => unreachable, - } - } - }; - - const function_return_type = get_function_return_type.call(self, function.node_type); - - std.debug.print("FN: {s} -> ret: {any}\n", .{ function_call_statement.expression.PRIMARY_EXPRESSION.IDENTIFIER.name, function_return_type }); - - return self.create_variable(.{ - .value = res, - .type = null, - .stack_level = null, - .node = node, - .node_type = function_return_type, - }) catch return CodeGenError.CompilationError; - } - - fn generate_return_statement(self: *CodeGen, statement: *parser.Node) !void { - errdefer std.debug.print("Error generating return statement\n", .{}); - std.debug.assert(statement.* == parser.Node.RETURN_STATEMENT); - - const expression = statement.RETURN_STATEMENT.expression; - - if (expression == null) { - _ = llvm.LLVMBuildRetVoid(self.builder); - return; - } - - const val = try self.generate_expression_value(expression.?, null); - - std.debug.print("3TYP : {any} vs {any}\n", .{ self.current_function_return_type.?, val.node_type }); - std.debug.assert(self.compare_types(self.current_function_return_type.?, val.node_type, false)); - - _ = llvm.LLVMBuildRet(self.builder, val.value); - } - - fn generate_break_statement(self: *CodeGen, statement: *parser.Node) !void { - errdefer std.debug.print("Error generating break statement\n", .{}); - std.debug.assert(statement.* == parser.Node.BREAK_STATEMENT); - std.debug.assert(self.while_loop_exit != null); - - _ = llvm.LLVMBuildBr(self.builder, self.while_loop_exit.?); - } - - fn generate_continue_statement(self: *CodeGen, statement: *parser.Node) !void { - errdefer std.debug.print("Error generating continue statement\n", .{}); - std.debug.assert(statement.* == parser.Node.CONTINUE_STATEMENT); - std.debug.assert(self.while_block != null); - - _ = llvm.LLVMBuildBr(self.builder, self.while_block.?); - } - - fn generate_if_statement(self: *CodeGen, statement: *parser.Node) !void { - errdefer std.debug.print("Error generating if statement\n", .{}); - std.debug.assert(statement.* == parser.Node.IF_STATEMENT); - - const if_statement = statement.IF_STATEMENT; - - const condition_value = try self.generate_expression_value(if_statement.condition, null); - - const current_block = llvm.LLVMGetInsertBlock(self.builder); - - const then_block = llvm.LLVMAppendBasicBlock(self.current_function.?, "then_block"); - _ = llvm.LLVMPositionBuilderAtEnd(self.builder, then_block); - for (if_statement.statements) |stmt| { - try self.generate_statement(stmt); - } - const merge_block = llvm.LLVMAppendBasicBlock(self.current_function.?, "merge_block"); - const last_instr = llvm.LLVMGetLastInstruction(llvm.LLVMGetInsertBlock(self.builder)); - if (last_instr == null or llvm.LLVMIsATerminatorInst(last_instr) == null) { - _ = llvm.LLVMBuildBr(self.builder, merge_block); - } - llvm.LLVMPositionBuilderAtEnd(self.builder, current_block); - - _ = llvm.LLVMBuildCondBr(self.builder, condition_value.value, then_block, merge_block); - llvm.LLVMPositionBuilderAtEnd(self.builder, merge_block); - } - - fn generate_while_statement(self: *CodeGen, statement: *parser.Node) !void { - errdefer std.debug.print("Error generating while statement\n", .{}); - std.debug.assert(statement.* == parser.Node.WHILE_STATEMENT); - - const while_statement = statement.WHILE_STATEMENT; - - const while_block = llvm.LLVMAppendBasicBlock(self.current_function.?, "while_block"); - _ = llvm.LLVMBuildBr(self.builder, while_block); - _ = llvm.LLVMPositionBuilderAtEnd(self.builder, while_block); - const condition_value = try self.generate_expression_value(while_statement.condition, null); - - const inner_block = llvm.LLVMAppendBasicBlock(self.current_function.?, "inner_block"); - const outer_block = llvm.LLVMAppendBasicBlock(self.current_function.?, "outer_block"); - _ = llvm.LLVMBuildCondBr(self.builder, condition_value.value, inner_block, outer_block); - - self.while_loop_exit = outer_block; - self.while_block = while_block; - defer { - self.while_block = null; - self.while_loop_exit = null; - } - - _ = llvm.LLVMPositionBuilderAtEnd(self.builder, inner_block); - for (while_statement.statements) |stmt| { - try self.generate_statement(stmt); - } - - _ = llvm.LLVMBuildBr(self.builder, while_block); - - llvm.LLVMPositionBuilderAtEnd(self.builder, outer_block); - } - - fn generate_import_declaration(self: *CodeGen, declaration: *parser.Node) !void { - errdefer std.debug.print("Error generating import declaration\n", .{}); - std.debug.assert(declaration.* == parser.Node.IMPORT_DECLARATION); - - const import_declaration = declaration.IMPORT_DECLARATION; - - try self.generate(import_declaration.program); - } - - fn generate_expression_value(self: *CodeGen, expression: *parser.Node, name: ?[]const u8) CodeGenError!*Variable { - errdefer std.debug.print("Error generating statement value\n", .{}); - return switch (expression.*) { - .FUNCTION_DEFINITION => |function_definition| { - - // Functions should be declared "globally" - const builder_pos = llvm.LLVMGetInsertBlock(self.builder); - - var llvm_param_types = std.ArrayList(llvm.LLVMTypeRef).init(self.arena); - var param_types = std.ArrayList(*parser.Node).init(self.arena); - var is_varargs: i8 = 0; - for (function_definition.parameters) |param| { - std.debug.assert(param.PRIMARY_EXPRESSION == .IDENTIFIER); - const param_type = param.PRIMARY_EXPRESSION.IDENTIFIER.type.?; - if (param_type.TYPE == .SIMPLE_TYPE and std.mem.eql(u8, param_type.TYPE.SIMPLE_TYPE.name, "varargs")) { - is_varargs = 1; - } - var llvm_param_type = try self.get_llvm_type(param_type); - if (param_type.TYPE == .FUNCTION_TYPE) { - llvm_param_type = llvm.LLVMPointerType(llvm_param_type, 0); - } - try llvm_param_types.append(llvm_param_type); - try param_types.append(param_type); - } - var return_type = try self.get_llvm_type(function_definition.return_type); - if (function_definition.return_type.TYPE == .FUNCTION_TYPE) { - return_type = llvm.LLVMPointerType(return_type, 0); - } - var function: llvm.LLVMValueRef = null; - if (name != null) { - if (self.environment.get_variable(name.?)) |x| { - // If the function has been forward declared, we reuse its declaration - function = x.value; - } - } - if (function == null) { - const function_type = llvm.LLVMFunctionType(return_type, llvm_param_types.items.ptr, @intCast(llvm_param_types.items.len), is_varargs) orelse return CodeGenError.CompilationError; - function = llvm.LLVMAddFunction(self.llvm_module, try std.fmt.allocPrintZ(self.arena, "{s}", .{name orelse "unnamed_func"}), function_type) orelse return CodeGenError.CompilationError; - } - const function_entry = llvm.LLVMAppendBasicBlock(function, "entrypoint") orelse return CodeGenError.CompilationError; - llvm.LLVMPositionBuilderAtEnd(self.builder, function_entry); - - try self.environment.create_scope(); - const last_function = self.current_function; - self.current_function = function; - const last_return_type = self.current_function_return_type; - self.current_function_return_type = function_definition.return_type; - defer { - self.current_function = last_function; - self.current_function_return_type = last_return_type; - self.environment.drop_scope(); - } - - const node_type = try self.create_node(.{ - .TYPE = .{ - .FUNCTION_TYPE = .{ - .parameters = param_types.items, - .return_type = function_definition.return_type, - }, - }, - }); - - // Needed for recursive functions - if (name != null) { - try self.environment.add_variable(name.?, try self.create_variable(.{ - .value = function, - .type = null, - .stack_level = null, - .node = expression, - .node_type = node_type, - })); - } - - const params = try self.arena.alloc(llvm.LLVMValueRef, function_definition.parameters.len); - llvm.LLVMGetParams(function, params.ptr); - - var parameters_index: usize = 0; - for (params) |p| { - defer parameters_index += 1; - const param_node = function_definition.parameters[parameters_index]; - std.debug.assert(param_node.* == .PRIMARY_EXPRESSION); - - const param_type = param_node.PRIMARY_EXPRESSION.IDENTIFIER.type.?; - var llvm_param_type = try self.get_llvm_type(param_type); - if (param_node.PRIMARY_EXPRESSION.IDENTIFIER.type.?.TYPE == .FUNCTION_TYPE) { - llvm_param_type = llvm.LLVMPointerType(llvm_param_type.?, 0); - } - // We need to alloca params because we assume all identifiers are alloca - const alloca = llvm.LLVMBuildAlloca(self.builder, llvm_param_type, try std.fmt.allocPrintZ(self.arena, "{s}", .{param_node.PRIMARY_EXPRESSION.IDENTIFIER.name})); - _ = llvm.LLVMBuildStore(self.builder, p, alloca); - - try self.environment.add_variable(param_node.PRIMARY_EXPRESSION.IDENTIFIER.name, try self.create_variable(.{ - .value = alloca, - .type = null, - .stack_level = null, - .node = param_node, - .node_type = param_type, - })); - } - - for (function_definition.statements) |stmt| { - try self.generate_statement(stmt); - } - - // TODO: This should be done with a defer when `builder_pos` is declared, but for some reason it doesn't work - llvm.LLVMPositionBuilderAtEnd(self.builder, builder_pos); - - return try self.create_variable(.{ - .value = function, - .type = null, - .stack_level = null, - .node = expression, - .node_type = node_type, - }); - }, - .FUNCTION_CALL_STATEMENT => |*fn_call| { - return try self.generate_function_call_statement(@ptrCast(fn_call)); - }, - .STRUCT_INSTANCIATION => |struct_instanciation| { - return self.environment.get_variable(struct_instanciation.typ).?; - }, - .PRIMARY_EXPRESSION => |primary_expression| switch (primary_expression) { - .NULL => { - return try self.generate_literal(llvm.LLVMConstNull(llvm.LLVMPointerType(llvm.LLVMInt8Type(), 0)), name, expression, try self.create_node(.{ - .TYPE = .{ - .POINTER_TYPE = .{ - .type = try self.create_node(.{ - .TYPE = .{ .SIMPLE_TYPE = .{ - .name = "void", - .underlying_type = null, - } }, - }), - }, - }, - })); - }, - .NUMBER => |n| { - return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt64Type(), @intCast(n.value), 0), name, expression, try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "i64", - .underlying_type = null, - }, - }, - })); - }, - .BOOLEAN => |b| { - const int_value: i64 = switch (b.value) { - false => 0, - true => 1, - }; - - return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt1Type(), @intCast(int_value), 0), name, expression, try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "bool", - .underlying_type = null, - }, - }, - })); - }, - .CHAR => |c| { - return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt8Type(), @intCast(c.value), 0), name, expression, try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "i8", - .underlying_type = null, - }, - }, - })); - }, - .STRING => |s| { - const x = llvm.LLVMBuildGlobalStringPtr(self.builder, try std.fmt.allocPrintZ(self.arena, "{s}", .{s.value}), ""); - return self.create_variable( - .{ - .value = x, - .type = null, - .stack_level = null, - .node = expression, - .node_type = try self.create_node(.{ - .TYPE = .{ - .POINTER_TYPE = .{ - .type = try self.create_node(.{ - .TYPE = .{ .SIMPLE_TYPE = .{ - .name = "i8", - .underlying_type = null, - } }, - }), - }, - }, - }), - }, - ); - }, - .IDENTIFIER => |i| { - const variable = self.environment.get_variable(i.name).?; - var param_value = variable.value; - if (variable.node_type.TYPE != .FUNCTION_TYPE or variable.stack_level != 0) { - var param_type = try self.get_llvm_type(variable.node_type); - if (variable.node_type.TYPE == .FUNCTION_TYPE) { - param_type = llvm.LLVMPointerType(param_type.?, 0); - } - param_value = llvm.LLVMBuildLoad2(self.builder, param_type, variable.value, ""); - } - - return self.generate_literal(param_value, name, expression, variable.node_type); - }, - }, - .ADDITIVE_EXPRESSION => |exp| { - const lhs_value = try self.generate_expression_value(exp.lhs, null); - const rhs_value = try self.generate_expression_value(exp.rhs, null); - - std.debug.print("4 TYP {s}: {any} vs {any}\n", .{ name orelse "unknown", lhs_value.node_type.TYPE, rhs_value.node_type.TYPE }); - std.debug.assert(self.compare_types(lhs_value.node_type, rhs_value.node_type, false)); - - var result: llvm.LLVMValueRef = undefined; - var node_type: *parser.Node = try self.create_node(.{ .TYPE = .{ .SIMPLE_TYPE = .{ - .name = "i64", - .underlying_type = null, - } } }); - - if (exp.addition) { - if (lhs_value.node_type.TYPE == .POINTER_TYPE) { - result = llvm.LLVMBuildGEP2(self.builder, try self.get_llvm_type(lhs_value.node_type.TYPE.POINTER_TYPE.type), lhs_value.value, @constCast(&[_]llvm.LLVMValueRef{rhs_value.value}), 1, ""); - node_type = lhs_value.node_type; - } else { - result = llvm.LLVMBuildAdd(self.builder, lhs_value.value, rhs_value.value, "") orelse return CodeGenError.CompilationError; - } - } else { - result = llvm.LLVMBuildSub(self.builder, lhs_value.value, rhs_value.value, "") orelse return CodeGenError.CompilationError; - } - - return self.generate_literal(result, name, expression, node_type); - }, - .MULTIPLICATIVE_EXPRESSION => |exp| { - const lhs_value = try self.generate_expression_value(exp.lhs, null); - const rhs_value = try self.generate_expression_value(exp.rhs, null); - - std.debug.print("5 TYP {s}: {any} vs {any}\n", .{ name orelse "unknown", lhs_value.node_type.TYPE, rhs_value.node_type.TYPE }); - std.debug.assert(self.compare_types(lhs_value.node_type, rhs_value.node_type, false)); - - var result: llvm.LLVMValueRef = undefined; - switch (exp.typ) { - .MUL => { - result = llvm.LLVMBuildMul(self.builder, lhs_value.value, rhs_value.value, "") orelse return CodeGenError.CompilationError; - }, - .DIV => { - result = llvm.LLVMBuildSDiv(self.builder, lhs_value.value, rhs_value.value, "") orelse return CodeGenError.CompilationError; - }, - .MOD => { - result = llvm.LLVMBuildSRem(self.builder, lhs_value.value, rhs_value.value, "") orelse return CodeGenError.CompilationError; - }, - } - - return self.generate_literal(result, name, expression, lhs_value.node_type); - }, - .UNARY_EXPRESSION => |exp| { - const k = try self.generate_expression_value(exp.expression, null); - - var r: llvm.LLVMValueRef = undefined; - var typ: *parser.Node = k.node_type; - switch (exp.typ) { - .NOT => { - std.debug.assert(std.mem.eql(u8, k.node_type.TYPE.SIMPLE_TYPE.name, "bool")); - r = llvm.LLVMBuildICmp(self.builder, llvm.LLVMIntEQ, k.value, llvm.LLVMConstInt(llvm.LLVMInt1Type(), 0, 0), ""); - typ = try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "bool", - .underlying_type = null, - }, - }, - }); - }, - .MINUS => { - r = llvm.LLVMBuildNeg(self.builder, k.value, ""); - typ = try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "i64", - .underlying_type = null, - }, - }, - }); - }, - .STAR => { - std.debug.assert(k.node_type.TYPE == .POINTER_TYPE); - typ = k.node_type.TYPE.POINTER_TYPE.type; - r = llvm.LLVMBuildLoad2(self.builder, try self.get_llvm_type(typ), k.value, ""); - }, - } - - return self.generate_literal(r, name, expression, typ); - }, - .EQUALITY_EXPRESSION => |exp| { - const lhs_value = try self.generate_expression_value(exp.lhs, null); - const rhs_value = try self.generate_expression_value(exp.rhs, null); - - std.debug.print("6 TYP {s}: {any} vs {any}\n", .{ name orelse "unknown", lhs_value.node_type.TYPE, rhs_value.node_type.TYPE }); - std.debug.assert(self.compare_types(lhs_value.node_type, rhs_value.node_type, false)); - - const op: c_uint = switch (exp.typ) { - .EQ => llvm.LLVMIntEQ, - .NE => llvm.LLVMIntNE, - .GE => llvm.LLVMIntSGE, - .LE => llvm.LLVMIntSLE, - .LT => llvm.LLVMIntSLT, - .GT => llvm.LLVMIntSGT, - }; - const cmp = llvm.LLVMBuildICmp(self.builder, op, lhs_value.value, rhs_value.value, ""); - - return self.generate_literal(cmp, name, expression, try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "bool", - .underlying_type = null, - }, - }, - })); - }, - .TYPE => |typ| { - switch (typ) { - .FUNCTION_TYPE => { - std.debug.assert(self.environment.scope_stack.items.len == 1); - - const variable = self.environment.get_variable(name.?); - if (variable) |v| { - return v; - } - - const function_type = try self.get_llvm_type(expression); - const function = llvm.LLVMAddFunction(self.llvm_module, try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?}), function_type); - - return try self.create_variable(.{ - .value = function, - .type = null, - .stack_level = null, - .node = expression, - .node_type = expression, - }); - }, - .STRUCT_TYPE => |t| { - const simple_type_node = try self.create_node(.{ .TYPE = .{ .SIMPLE_TYPE = .{ - .name = name.?, - .underlying_type = expression, - } } }); - const struct_type = llvm.LLVMStructCreateNamed(self.llvm_context, try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?})); - - // Needed for recursive structs - if (name != null) { - try self.environment.add_variable(name.?, try self.create_variable(.{ - .value = null, - .type = struct_type, - .stack_level = null, - .node = expression, - .node_type = simple_type_node, - })); - } - - var llvm_types = std.ArrayList(llvm.LLVMTypeRef).init(self.arena); - - for (t.fields) |field| { - try llvm_types.append(try self.get_llvm_type(field.PRIMARY_EXPRESSION.IDENTIFIER.type.?)); - } - llvm.LLVMStructSetBody(struct_type, llvm_types.items.ptr, @intCast(llvm_types.items.len), 0); - return try self.create_variable(.{ - .value = null, - .type = struct_type, - .stack_level = null, - .node = expression, - .node_type = simple_type_node, - }); - }, - .SIMPLE_TYPE => |t| { - return try self.create_variable(.{ - .value = null, - .type = try self.get_llvm_type(t.underlying_type.?), - .stack_level = null, - .node = expression, - .node_type = t.underlying_type.?, - }); - }, - else => unreachable, - } - }, - .CAST_STATEMENT => |exp| { - const val = try self.generate_expression_value(exp.expression, ""); - return try self.create_variable(.{ - .value = val.value, //TODO: do real casting - .type = null, - .stack_level = null, - .node = expression, - .node_type = exp.typ, - }); - }, - .SIZEOF_STATEMENT => |exp| { - const typ = try self.get_llvm_type(exp.typ); - const size_in_bytes = llvm.LLVMStoreSizeOfType(self.llvm_target_data, typ); - - const size_val = llvm.LLVMConstInt(llvm.LLVMInt64Type(), size_in_bytes, 0); - - return try self.create_variable(.{ - .value = size_val, - .type = null, - .node_type = try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "i64", - .underlying_type = null, - }, - }, - }), - .stack_level = null, - .node = expression, - }); - }, - .FIELD_ACCESS => |exp| { - const x = try self.get_struct_field(exp.expression, exp.name); - const loaded = llvm.LLVMBuildLoad2(self.builder, try self.get_llvm_type(x.type), x.value, ""); - - return try self.create_variable(.{ - .value = loaded, - .type = null, - .stack_level = null, - .node = expression, - .node_type = x.type, - }); - }, - else => unreachable, - }; - } - - fn generate_literal(self: *CodeGen, literal_val: llvm.LLVMValueRef, name: ?[]const u8, node: *parser.Node, node_type: *parser.Node) !*Variable { - if (name != null and self.environment.scope_stack.items.len == 1) { - const ptr = try self.create_variable(.{ - .value = llvm.LLVMAddGlobal(self.llvm_module, try self.get_llvm_type(node_type), try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?})), - .type = null, - .stack_level = null, - .node = node, - .node_type = node_type, - }); - llvm.LLVMSetInitializer(ptr.value, literal_val); - return ptr; - } - - return try self.create_variable(.{ - .value = literal_val, - .type = null, - .stack_level = null, - .node = node, - .node_type = node_type, - }); - } - - fn get_struct_field(self: *CodeGen, node: *parser.Node, name: []const u8) !struct { value: llvm.LLVMValueRef, type: *parser.Node } { - var ptr: *Variable = undefined; - switch (node.*) { - .PRIMARY_EXPRESSION => { - ptr = self.environment.get_variable(node.PRIMARY_EXPRESSION.IDENTIFIER.name).?; - }, - .UNARY_EXPRESSION => { - ptr = try self.generate_expression_value(node.UNARY_EXPRESSION.expression, ""); - }, - else => unreachable, - } - - var typ: *parser.Node = undefined; - if (ptr.node_type.TYPE == .STRUCT_TYPE) { - typ = ptr.node_type; - } else if (ptr.node_type.TYPE == .POINTER_TYPE) { - typ = self.environment.get_variable(ptr.node_type.TYPE.POINTER_TYPE.type.TYPE.SIMPLE_TYPE.name).?.node_type; //TODO: we shouldnt be able to get fields of pointers, we have to dref first. - } else if (ptr.node_type.TYPE == .SIMPLE_TYPE) { - typ = self.environment.get_variable(ptr.node_type.TYPE.SIMPLE_TYPE.name).?.node_type; - } else { - unreachable; - } - var fieldIndex: ?usize = null; - for (0.., typ.TYPE.SIMPLE_TYPE.underlying_type.?.TYPE.STRUCT_TYPE.fields) |i, field| { - if (std.mem.eql(u8, name, field.PRIMARY_EXPRESSION.IDENTIFIER.name)) { - fieldIndex = i; - break; - } - } - if (fieldIndex == null) unreachable; - - const zero = llvm.LLVMConstInt(llvm.LLVMInt32Type(), 0, 0); - const llvmFieldIndex = llvm.LLVMConstInt(llvm.LLVMInt32Type(), fieldIndex.?, 0); - const indices = @constCast(&[_]llvm.LLVMValueRef{ zero, llvmFieldIndex }); - - return .{ - .value = llvm.LLVMBuildGEP2(self.builder, try self.get_llvm_type(typ), ptr.value, indices, indices.len, try std.fmt.allocPrintZ(self.arena, "{s}", .{name})), - .type = typ.TYPE.SIMPLE_TYPE.underlying_type.?.TYPE.STRUCT_TYPE.fields[fieldIndex.?].PRIMARY_EXPRESSION.IDENTIFIER.type.?, - }; - } - - fn get_llvm_type(self: *CodeGen, node: *parser.Node) !llvm.LLVMTypeRef { - std.debug.assert(node.* == parser.Node.TYPE); - const type_node = node.TYPE; - - switch (type_node) { - .SIMPLE_TYPE => |t| { - if (std.mem.eql(u8, t.name, "i8")) return llvm.LLVMInt8Type(); - if (std.mem.eql(u8, t.name, "i64")) return llvm.LLVMInt64Type(); - if (std.mem.eql(u8, t.name, "bool")) return llvm.LLVMInt1Type(); - if (std.mem.eql(u8, t.name, "void")) return llvm.LLVMVoidType(); - if (std.mem.eql(u8, t.name, "varargs")) return llvm.LLVMPointerType(llvm.LLVMInt64Type(), 0); // Hack for varargs (only used for printf) - if (self.environment.get_variable(t.name)) |v| { - std.debug.assert(v.type != null); - return v.type; - } - std.debug.print("Unknown type: {s}\n", .{t.name}); - unreachable; - }, - .FUNCTION_TYPE => |t| { - var return_type = try self.get_llvm_type(t.return_type); - if (t.return_type.TYPE == .FUNCTION_TYPE) { - return_type = llvm.LLVMPointerType(return_type, 0); - } - var paramtypes = std.ArrayList(llvm.LLVMTypeRef).init(self.arena); - var is_varargs: i8 = 0; - for (t.parameters) |param| { - if (param.TYPE == .SIMPLE_TYPE and std.mem.eql(u8, param.TYPE.SIMPLE_TYPE.name, "varargs")) { - is_varargs = 1; - continue; - } - var typ = try self.get_llvm_type(param); - if (param.TYPE == .FUNCTION_TYPE) { - typ = llvm.LLVMPointerType(typ, 0); - } - try paramtypes.append(typ); - } - const function_type = llvm.LLVMFunctionType(return_type, paramtypes.items.ptr, @intCast(paramtypes.items.len), is_varargs) orelse unreachable; - return function_type; - }, - .POINTER_TYPE => |t| { - const inner_type = try self.get_llvm_type(t.type); - return llvm.LLVMPointerType(inner_type, 0); - }, - .STRUCT_TYPE => |t| { - var llvm_types = std.ArrayList(llvm.LLVMTypeRef).init(self.arena); - - for (t.fields) |field| { - try llvm_types.append(try self.get_llvm_type(field.PRIMARY_EXPRESSION.IDENTIFIER.type.?)); - } - - return llvm.LLVMStructType(llvm_types.items.ptr, @intCast(llvm_types.items.len), 0); - }, - } - } - - fn compare_types(self: *CodeGen, a: *parser.Node, b: *parser.Node, is_dereference: bool) bool { - std.debug.assert(a.* == parser.Node.TYPE); - std.debug.assert(b.* == parser.Node.TYPE); - - var a_type = a.TYPE; - const b_type = b.TYPE; - - if (a_type == .SIMPLE_TYPE and std.mem.eql(u8, "varargs", a_type.SIMPLE_TYPE.name)) { - return true; - } - - if (is_dereference) { - a_type = a_type.POINTER_TYPE.type.TYPE; - } - - if (!std.mem.eql(u8, @tagName(a_type), @tagName(b_type))) { - std.debug.print("Tagname mismatch: {any} vs {any}\n", .{ a_type, b_type }); - return false; - } - - switch (a_type) { - .SIMPLE_TYPE => |a_simple| { - const b_simple = b_type.SIMPLE_TYPE; - const res = std.mem.eql(u8, a_simple.name, b_simple.name); - if (!res) { - std.debug.print("Simple type name mismatch: '{s}' vs '{s}'\n", .{ a_simple.name, b_simple.name }); - } - return res; - }, - .FUNCTION_TYPE => |a_func| { - const b_func = b_type.FUNCTION_TYPE; - - if (!self.compare_types(a_func.return_type, b_func.return_type, false)) { - std.debug.print("Function return type mismatch\n", .{}); - return false; - } - - if (a_func.parameters.len != b_func.parameters.len) { - std.debug.print("Parameter count mismatch: {} vs {}\n", .{ a_func.parameters.len, b_func.parameters.len }); - return false; - } - - for (a_func.parameters, b_func.parameters) |a_param, b_param| { - if (!self.compare_types(a_param, b_param, false)) { - std.debug.print("Parameter type mismatch\n", .{}); - return false; - } - } - - return true; - }, - .POINTER_TYPE => |a_ptr| { - const b_ptr = b_type.POINTER_TYPE; - - const res = self.compare_types(a_ptr.type, b_ptr.type, false); - if (!res) { - std.debug.print("Pointer base type mismatch\n", .{}); - } - return res; - }, - .STRUCT_TYPE => |a_struct| { - const b_struct = b_type.STRUCT_TYPE; - - if (a_struct.fields.len != b_struct.fields.len) return false; - - for (0.., a_struct.fields) |i, f| { - if (!self.compare_types(f, b_struct.fields[i], false)) { - return false; - } - } - return true; - }, - } - } - - fn create_variable(self: *CodeGen, variable_value: Variable) !*Variable { - const variable = try self.arena.create(Variable); - variable.* = variable_value; - return variable; - } - - fn create_node(self: *CodeGen, node_value: parser.Node) !*parser.Node { - const node = try self.arena.create(parser.Node); - node.* = node_value; - return node; - } -}; - -const Variable = struct { - value: llvm.LLVMValueRef, - type: llvm.LLVMTypeRef, - node: *parser.Node, - node_type: *parser.Node, - stack_level: ?usize, -}; - -const Scope = struct { - variables: std.StringHashMap(*Variable), -}; - -const Environment = struct { - scope_stack: std.ArrayList(*Scope), - - arena: std.mem.Allocator, - - fn init(arena_allocator: std.mem.Allocator) !*Environment { - const self = try arena_allocator.create(Environment); - - self.* = .{ - .scope_stack = std.ArrayList(*Scope).init(arena_allocator), - .arena = arena_allocator, - }; - - // Create global scope - try self.create_scope(); - - return self; - } - - fn create_scope(self: *Environment) !void { - const scope = try self.arena.create(Scope); - scope.* = .{ - .variables = std.StringHashMap(*Variable).init(self.arena), - }; - try self.scope_stack.append(scope); - } - - fn drop_scope(self: *Environment) void { - _ = self.scope_stack.pop(); - } - - fn add_variable(self: *Environment, name: []const u8, variable: *Variable) !void { - // TODO: Dont allow shadowing if value != value or type != type (across things) - try self.scope_stack.getLast().variables.put(name, variable); - } - - fn set_variable(self: *Environment, name: []const u8, variable: *Variable) !void { - self.get_variable(name).?.* = variable.*; - } - - fn get_variable(self: *Environment, name: []const u8) ?*Variable { - var i = self.scope_stack.items.len; - var variable: ?*Variable = null; - while (i > 0) { - i -= 1; - const scope = self.scope_stack.items[i]; - if (scope.variables.get(name)) |v| { - if (variable == null) { - variable = v; - } - variable.?.stack_level = i; - } - } - return variable; - } -}; diff --git a/src/bootstrap/llvm.pry b/src/llvm.pry index 2feb815..2feb815 100644 --- a/src/bootstrap/llvm.pry +++ b/src/llvm.pry diff --git a/src/bootstrap/main.pry b/src/main.pry index a564965..a564965 100644 --- a/src/bootstrap/main.pry +++ b/src/main.pry diff --git a/src/main.zig b/src/main.zig deleted file mode 100644 index 79def61..0000000 --- a/src/main.zig +++ /dev/null @@ -1,49 +0,0 @@ -const std = @import("std"); -const tokenizer = @import("tokenizer.zig"); -const parser = @import("parser.zig"); -const codegen = @import("codegen.zig"); - -pub fn main() !void { - const pathLen = std.mem.len(std.os.argv[1]); - const path = std.os.argv[1][0..pathLen]; - - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - const allocator = gpa.allocator(); - defer { - const deinit_status = gpa.deinit(); - if (deinit_status == .leak) @panic("Memory leak detected!"); - } - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - - std.debug.print("Tokenizing! {s}\n", .{path}); - const file = try std.fs.cwd().openFile(path, .{}); - const buf = try file.readToEndAlloc(allocator, 1 * 1024 * 1024); - defer allocator.free(buf); - const source_codegen = try codegen.CodeGen.init(arena.allocator()); - defer source_codegen.deinit(); - try process_buf( - buf, - arena.allocator(), - source_codegen, - path, - ); - source_codegen.compile(); -} - -fn process_buf(buf: []u8, arena: std.mem.Allocator, source_codegen: ?*codegen.CodeGen, filename: []const u8) !void { - std.debug.print("Buf:\n{s}\n", .{buf}); - - var source_tokenizer = try tokenizer.Tokenizer.init(buf, arena); - const token_list = try source_tokenizer.tokenize(); - const source_parser = try parser.Parser.init(token_list, arena, filename); - const ast = try source_parser.parse(); - std.debug.print("AST: {any}\n", .{ast}); - - try source_codegen.?.generate(ast); -} - -test { - std.testing.refAllDecls(@This()); -} diff --git a/src/bootstrap/parser.pry b/src/parser.pry index 0b448d0..0b448d0 100644 --- a/src/bootstrap/parser.pry +++ b/src/parser.pry diff --git a/src/parser.zig b/src/parser.zig deleted file mode 100644 index da3c71c..0000000 --- a/src/parser.zig +++ /dev/null @@ -1,1055 +0,0 @@ -const std = @import("std"); -const tokenizer = @import("tokenizer.zig"); - -const ParserError = error{ - ParsingError, - OutOfMemory, -}; - -pub const Node = union(enum) { - PROGRAM: struct { - statements: []*Node, - }, - STATEMENT: struct { - statement: *Node, - }, - ASSIGNMENT_STATEMENT: struct { - is_declaration: bool, - is_dereference: bool, - lhs: *Node, - rhs: *Node, - }, - IMPORT_DECLARATION: struct { - filename: []const u8, - program: *Node, - }, - FUNCTION_CALL_STATEMENT: struct { - expression: *Node, - arguments: []*Node, - }, - IF_STATEMENT: struct { - condition: *Node, - statements: []*Node, - }, - WHILE_STATEMENT: struct { - condition: *Node, - statements: []*Node, - }, - EQUALITY_EXPRESSION: struct { lhs: *Node, rhs: *Node, typ: EqualityExpressionType }, - ADDITIVE_EXPRESSION: struct { - addition: bool, - lhs: *Node, - rhs: *Node, - }, - MULTIPLICATIVE_EXPRESSION: struct { - lhs: *Node, - rhs: *Node, - typ: MultiplicativeExpressionType, - }, - UNARY_EXPRESSION: struct { - typ: enum { - NOT, - MINUS, - STAR, - }, - expression: *Node, - }, - POSTFIX_EXPRESSION: struct { - lhs: *Node, - rhs: ?*Node, - }, - PRIMARY_EXPRESSION: union(enum) { - NUMBER: struct { - value: i64, - }, - BOOLEAN: struct { - value: bool, - }, - NULL: void, - CHAR: struct { - value: u8, - }, - STRING: struct { - value: []const u8, - }, - IDENTIFIER: struct { - name: []const u8, - type: ?*Node, - }, - }, - FUNCTION_DEFINITION: struct { - statements: []*Node, - parameters: []*Node, - return_type: *Node, - }, - STRUCT_INSTANCIATION: struct { - typ: []const u8, - }, - FIELD_ACCESS: struct { - expression: *Node, - name: []const u8, - }, - TYPE: union(enum) { - SIMPLE_TYPE: struct { - name: []const u8, - underlying_type: ?*Node, - }, - FUNCTION_TYPE: struct { - parameters: []*Node, - return_type: *Node, - }, - POINTER_TYPE: struct { - type: *Node, - }, - STRUCT_TYPE: struct { - fields: []*Node, - }, - }, - RETURN_STATEMENT: struct { - expression: ?*Node, - }, - CAST_STATEMENT: struct { - typ: *Node, - expression: *Node, - }, - SIZEOF_STATEMENT: struct { - typ: *Node, - }, - BREAK_STATEMENT: void, - CONTINUE_STATEMENT: void, -}; - -pub const EqualityExpressionType = enum { - EQ, - NE, - GE, - LE, - LT, - GT, -}; - -pub const MultiplicativeExpressionType = enum { - MUL, - DIV, - MOD, -}; - -pub const Parser = struct { - filename: []const u8, - - tokens: []tokenizer.Token, - offset: u32, - - arena: std.mem.Allocator, - - try_context: bool, //TODO: I dont like this - - pub fn init(tokens: []tokenizer.Token, arena_allocator: std.mem.Allocator, filename: []const u8) ParserError!*Parser { - const parser = try arena_allocator.create(Parser); - parser.* = .{ - .filename = filename, - .tokens = tokens, - .offset = 0, - .arena = arena_allocator, - .try_context = false, - }; - return parser; - } - - pub fn parse(self: *Parser) !*Node { - return try self.parse_program(); - } - - // Program ::= Statement+ - fn parse_program(self: *Parser) !*Node { - var nodes = std.ArrayList(*Node).init(self.arena); - while (self.offset < self.tokens.len) { - try nodes.append(@constCast(try self.parse_statement())); - } - - return self.create_node(.{ .PROGRAM = .{ - .statements = try nodes.toOwnedSlice(), - } }); - } - - // Statement ::= (AssignmentStatement | ImportDeclaration | ExternDeclaration | CastStatement | SizeOfStatement | FunctionCallStatement | IfStatement | WhileStatement | ReturnStatement | "break" | "continue") SEMICOLON - fn parse_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing statement {any}\n", .{self.peek_token()}); - - const statement = - self.accept_parse(parse_cast_statement) orelse //TODO: Can we not deal with cast / sizeof in parser? - self.accept_parse(parse_sizeof_statement) orelse - self.accept_parse(parse_function_call_statement) orelse - self.accept_parse(parse_if_statement) orelse - self.accept_parse(parse_while_statement) orelse - self.accept_parse(parse_return_statement) orelse - self.accept_parse(parse_assignment_statement) orelse - self.accept_parse(parse_import_declaration) orelse - self.accept_parse(parse_extern_declaration) orelse - self.accept_parse(struct { - fn parse_break_statement(iself: *Parser) ParserError!*Node { - _ = try iself.parse_token(tokenizer.TokenType.BREAK); - return try iself.create_node(.{ - .BREAK_STATEMENT = void{}, - }); - } - }.parse_break_statement) orelse - self.accept_parse(struct { - fn parse_continue_statement(iself: *Parser) ParserError!*Node { - _ = try iself.parse_token(tokenizer.TokenType.CONTINUE); - return try iself.create_node(.{ - .CONTINUE_STATEMENT = void{}, - }); - } - }.parse_continue_statement); - - _ = try self.parse_token(tokenizer.TokenType.SEMICOLON); - - return self.create_node(.{ - .STATEMENT = .{ - .statement = statement.?, - }, - }); - } - - // AssignmentStatement ::= ("let")? ("*")? Expression EQUALS Expression - fn parse_assignment_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing assignment statement {any}\n", .{self.peek_token()}); - - var is_declaration = false; - if (self.accept_token(.LET) != null) { - is_declaration = true; - } - - var is_dereference = false; - if (self.accept_token(.MUL) != null) { - is_dereference = true; - } - - const lhs = try self.parse_expression(); - - _ = try self.parse_token(tokenizer.TokenType.EQUALS); - - const rhs = try self.parse_expression(); - - return self.create_node(.{ - .ASSIGNMENT_STATEMENT = .{ - .is_declaration = is_declaration, - .is_dereference = is_dereference, - .lhs = lhs, - .rhs = rhs, - }, - }); - } - - // ImportDeclaration ::= "import" STRING - fn parse_import_declaration(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing import declaration {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(.IMPORT); - - const expr = try self.parse_primary_expression(); - - std.debug.assert(expr.PRIMARY_EXPRESSION == .STRING); - - var import_filename = expr.PRIMARY_EXPRESSION.STRING.value; - var current_file = self.filename; - - // stdlib. TODO: this is very hacky and won't work if running the compiler binary by itself - if (import_filename.ptr[0] == '!') { - import_filename = std.fmt.allocPrint(self.arena, "./std/{s}", .{import_filename[1..]}) catch return ParserError.OutOfMemory; - current_file = "."; - } - - // Open the directory containing current_file - const dir_path = std.fs.path.dirname(current_file) orelse "."; - var dir = std.fs.cwd().openDir(dir_path, .{}) catch { - std.debug.print("Couldn't open directory {s}\n", .{current_file}); - return ParserError.OutOfMemory; - }; - defer dir.close(); - - // Open the target file - const file = dir.openFile(import_filename, .{}) catch { - std.debug.print("Couldn't open file {s}\n", .{import_filename}); - return ParserError.OutOfMemory; - }; - defer file.close(); - - // Read file contents - const buf = file.readToEndAlloc(self.arena, 1 * 1024 * 1024) catch return ParserError.OutOfMemory; - - // Initialize tokenizer and parse - var inner_tokenizer = try tokenizer.Tokenizer.init(buf, self.arena); - const tokens = inner_tokenizer.tokenize() catch return ParserError.OutOfMemory; - - // Resolve the full path of the imported file - const full_path = try std.fs.path.resolve(self.arena, &.{ dir_path, import_filename }); - - const inner_parser = try Parser.init(tokens, self.arena, full_path); - const ast = try inner_parser.parse(); - - return self.create_node(.{ - .IMPORT_DECLARATION = .{ - .filename = import_filename, - .program = ast, - }, - }); - } - - // ExternDeclaration ::= "extern" IDENTIFIER EQUALS Type - fn parse_extern_declaration(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing extern declaration {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(.EXTERN); - - const identifier = try self.parse_token(tokenizer.TokenType.IDENTIFIER); - - _ = try self.parse_token(tokenizer.TokenType.EQUALS); - - const typ = try self.parse_type(); - - return self.create_node(.{ - .ASSIGNMENT_STATEMENT = .{ - .is_declaration = true, - .is_dereference = false, - .lhs = try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ - .IDENTIFIER = .{ - .name = try self.arena.dupe(u8, identifier.type.IDENTIFIER), - .type = null, - }, - }, - }), - .rhs = @constCast(typ), - }, - }); - } - - // FunctionCallStatement ::= (IDENTIFIER | FunctionDefinition) LPAREN FunctionArguments? RPAREN - fn parse_function_call_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing function call statement {any}\n", .{self.peek_token()}); - - const identifier = self.accept_token(tokenizer.TokenType.IDENTIFIER); - const fn_def = self.accept_parse(parse_function_definition); - - if (identifier == null and fn_def == null) return ParserError.ParsingError; - - _ = try self.parse_token(tokenizer.TokenType.LPAREN); - - const arguments = try self.parse_function_arguments(); - - _ = try self.parse_token(tokenizer.TokenType.RPAREN); - - if (fn_def != null) { - return self.create_node(.{ .FUNCTION_CALL_STATEMENT = .{ - .expression = fn_def.?, - .arguments = arguments, - } }); - } - - return self.create_node(.{ .FUNCTION_CALL_STATEMENT = .{ - .expression = try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ - .IDENTIFIER = .{ - .name = try self.arena.dupe(u8, identifier.?.type.IDENTIFIER), - .type = null, - }, - }, - }), - .arguments = arguments, - } }); - } - - // FunctionArguments ::= Expression ("," Expression)* - fn parse_function_arguments(self: *Parser) ParserError![]*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing function arguments {any}\n", .{self.peek_token()}); - - var node_list = std.ArrayList(*Node).init(self.arena); - - var first = true; - while (true) { - if (!first) { - _ = self.accept_token(tokenizer.TokenType.COMMA); - } - first = false; - const expr = self.accept_parse(parse_expression) orelse return node_list.items; - try node_list.append(expr); - } - - return node_list.items; - } - - // IfStatement ::= "if" Expression LBRACE Statement* RBRACE - fn parse_if_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing if statement {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(tokenizer.TokenType.IF); - - const expression = try self.parse_expression(); - - _ = try self.parse_token(tokenizer.TokenType.LBRACE); - - var statements = std.ArrayList(*Node).init(self.arena); - while (self.accept_parse(parse_statement)) |expr| { - try statements.append(expr); - } - - _ = try self.parse_token(tokenizer.TokenType.RBRACE); - - return try self.create_node(.{ .IF_STATEMENT = .{ - .condition = expression, - .statements = statements.items, - } }); - } - - // WhileStatement ::= "while" Expression LBRACE Statement* RBRACE - fn parse_while_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing while statement {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(tokenizer.TokenType.WHILE); - - const expression = try self.parse_expression(); - - _ = try self.parse_token(tokenizer.TokenType.LBRACE); - - var statements = std.ArrayList(*Node).init(self.arena); - while (self.accept_parse(parse_statement)) |expr| { - try statements.append(expr); - } - - _ = try self.parse_token(tokenizer.TokenType.RBRACE); - - return try self.create_node(.{ .WHILE_STATEMENT = .{ - .condition = expression, - .statements = statements.items, - } }); - } - - // Expression ::= EqualityExpression | AdditiveExpression - fn parse_expression(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing expression {any}\n", .{self.peek_token()}); - - return self.accept_parse(parse_equality_expression) orelse - self.accept_parse(parse_additive_expression) orelse - return ParserError.ParsingError; - } - - // EqualityExpression ::= AdditiveExpression ("==" | "!=" | "<=" | ">=" | "<" | ">") AdditiveExpression - fn parse_equality_expression(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing equality expression {any}\n", .{self.peek_token()}); - - const lhs = try self.parse_additive_expression(); - - var typ: EqualityExpressionType = undefined; - - if (self.accept_parse(struct { - fn parse(iself: *Parser) ParserError!*Node { - _ = try iself.parse_token(tokenizer.TokenType.EQUALS); - _ = try iself.parse_token(tokenizer.TokenType.EQUALS); - return try iself.create_node(.{ .PROGRAM = .{ - .statements = &[_]*Node{}, - } }); - } - }.parse) != null) { - typ = .EQ; - } else if (self.accept_parse(struct { - fn parse(iself: *Parser) ParserError!*Node { - _ = try iself.parse_token(tokenizer.TokenType.BANG); - _ = try iself.parse_token(tokenizer.TokenType.EQUALS); - return try iself.create_node(.{ .PROGRAM = .{ - .statements = &[_]*Node{}, - } }); - } - }.parse) != null) { - typ = .NE; - } else if (self.accept_parse(struct { - fn parse(iself: *Parser) ParserError!*Node { - _ = try iself.parse_token(tokenizer.TokenType.LESS); - _ = try iself.parse_token(tokenizer.TokenType.EQUALS); - return try iself.create_node(.{ .PROGRAM = .{ - .statements = &[_]*Node{}, - } }); - } - }.parse) != null) { - typ = .LE; - } else if (self.accept_parse(struct { - fn parse(iself: *Parser) ParserError!*Node { - _ = try iself.parse_token(tokenizer.TokenType.GREATER); - _ = try iself.parse_token(tokenizer.TokenType.EQUALS); - return try iself.create_node(.{ .PROGRAM = .{ - .statements = &[_]*Node{}, - } }); - } - }.parse) != null) { - typ = .GE; - } else if (self.accept_token(tokenizer.TokenType.LESS) != null) { - typ = .LT; - } else if (self.accept_token(tokenizer.TokenType.GREATER) != null) { - typ = .GT; - } else { - return ParserError.ParsingError; - } - - const rhs = try self.parse_additive_expression(); - - return self.create_node(.{ .EQUALITY_EXPRESSION = .{ - .lhs = lhs, - .rhs = rhs, - .typ = typ, - } }); - } - - // AdditiveExpression ::= MultiplicativeExpression (("+" | "-") MultiplicativeExpression)* - fn parse_additive_expression(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing additive expression {any}\n", .{self.peek_token()}); - - var lhs = try self.parse_multiplicative_expression(); - - while (true) { - const plus = self.accept_token(tokenizer.TokenType.PLUS); - const minus = self.accept_token(tokenizer.TokenType.MINUS); - - if (plus == null and minus == null) break; - - const rhs = try self.parse_multiplicative_expression(); - - lhs = try self.create_node(.{ .ADDITIVE_EXPRESSION = .{ - .addition = plus != null, - .lhs = lhs, - .rhs = rhs, - } }); - } - - return lhs; - } - - // MultiplicativeExpression ::= UnaryExpression (("*" | "/" | "%") UnaryExpression)* - fn parse_multiplicative_expression(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing additive expression {any}\n", .{self.peek_token()}); - - var lhs = try self.parse_unary_expression(); - - while (true) { - var typ: MultiplicativeExpressionType = undefined; - if (self.accept_token(tokenizer.TokenType.MUL) != null) { - typ = .MUL; - } else if (self.accept_token(tokenizer.TokenType.DIV) != null) { - typ = .DIV; - } else if (self.accept_token(tokenizer.TokenType.MOD) != null) { - typ = .MOD; - } else { - break; - } - - const rhs = try self.parse_unary_expression(); - - lhs = try self.create_node(.{ .MULTIPLICATIVE_EXPRESSION = .{ - .lhs = lhs, - .rhs = rhs, - .typ = typ, - } }); - } - - return lhs; - } - - // UnaryExpression ::= ("!" | "-" | "*") UnaryExpression | PostfixExpression - fn parse_unary_expression(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing unary expression {any}\n", .{self.peek_token()}); - - const not = self.accept_token(tokenizer.TokenType.BANG) != null; - const minus = self.accept_token(tokenizer.TokenType.MINUS) != null; - const star = self.accept_token(tokenizer.TokenType.MUL) != null; - - if (!not and !minus and !star) { - return try self.parse_postfix_expression(); - } - - return self.create_node(.{ .UNARY_EXPRESSION = .{ - .typ = if (not) .NOT else if (minus) .MINUS else .STAR, - .expression = try self.parse_unary_expression(), - } }); - } - - // PostfixExpression ::= PrimaryExpression (CastStatement | SizeOfStatement | FunctionCallStatement | FieldAccess )* - fn parse_postfix_expression(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing postfix expression {any}\n", .{self.peek_token()}); - - if (self.accept_parse(parse_cast_statement)) |stmt| { - return stmt; - } else if (self.accept_parse(parse_sizeof_statement)) |stmt| { - return stmt; - } else if (self.accept_parse(parse_function_call_statement)) |stmt| { - return stmt; - } else if (self.accept_parse(parse_field_access)) |stmt| { - return stmt; - } else { - return try self.parse_primary_expression(); - } - } - - // PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | FunctionDefinition | TypeDefinition | StructDefinition | StructInstantiation | FieldAccess | LPAREN Expression RPAREN - fn parse_primary_expression(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing primary expression {any}\n", .{self.peek_token()}); - - if (self.accept_parse(parse_function_definition)) |stmt| return stmt; - if (self.accept_parse(parse_type_definition)) |stmt| return stmt; - if (self.accept_parse(parse_struct_definition)) |stmt| return stmt; - if (self.accept_parse(parse_struct_instanciation)) |stmt| return stmt; - - // LPAREN (Expression) RPAREN - if (self.accept_token(tokenizer.TokenType.LPAREN)) |_| { - const expr = try self.parse_expression(); - _ = try self.parse_token(tokenizer.TokenType.RPAREN); - return expr; - } - - const token = self.consume_token() orelse return ParserError.ParsingError; - - return switch (token.type) { - .NULL => try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ .NULL = void{} }, - }), - .NUMBER => |number_token| try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ - .NUMBER = .{ - .value = number_token, - }, - }, - }), - .BOOLEAN => |boolean_token| try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ .BOOLEAN = .{ - .value = boolean_token, - } }, - }), - .CHAR => |char_token| try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ .CHAR = .{ - .value = char_token, - } }, - }), - .STRING => |string_token| try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ .STRING = .{ - .value = try self.arena.dupe(u8, string_token), - } }, - }), - .IDENTIFIER => |identifier_token| try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ - .IDENTIFIER = .{ - .name = try self.arena.dupe(u8, identifier_token), - .type = null, - }, - }, - }), - else => ParserError.ParsingError, - }; - } - - // FunctionDefinition ::= LPAREN FunctionParameters? RPAREN ARROW IDENTIFIER LBRACE Statement* ReturnStatement SEMICOLON RBRACE - fn parse_function_definition(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing function definition {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(tokenizer.TokenType.LPAREN); - - const parameters = try self.parse_function_parameters(); - - _ = try self.parse_token(tokenizer.TokenType.RPAREN); - - _ = try self.parse_token(tokenizer.TokenType.ARROW); - - const return_type = try self.parse_type(); - - _ = try self.parse_token(tokenizer.TokenType.LBRACE); - - var nodes = std.ArrayList(*Node).init(self.arena); - while (self.accept_parse(parse_statement)) |expression| { - try nodes.append(expression); - } - - if (nodes.items.len == 0 or nodes.getLast().STATEMENT.statement.* != .RETURN_STATEMENT) return ParserError.ParsingError; - - _ = try self.parse_token(tokenizer.TokenType.RBRACE); - - return self.create_node(.{ .FUNCTION_DEFINITION = .{ - .statements = nodes.items, - .parameters = parameters, - .return_type = return_type, - } }); - } - - // FunctionParameters ::= IDENTIFIER ":" Type ("," IDENTIFIER ":" Type)* - fn parse_function_parameters(self: *Parser) ParserError![]*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing function parameters {any}\n", .{self.peek_token()}); - - var node_list = std.ArrayList(*Node).init(self.arena); - - var first = true; - while (true) { - if (!first) { - _ = self.accept_token(tokenizer.TokenType.COMMA); - } - first = false; - const ident = self.accept_token(tokenizer.TokenType.IDENTIFIER) orelse return node_list.items; - - _ = try self.parse_token(tokenizer.TokenType.COLON); - const type_annotation = try self.parse_type(); - - try node_list.append(try self.create_node(.{ - .PRIMARY_EXPRESSION = .{ - .IDENTIFIER = .{ - .name = try self.arena.dupe(u8, ident.type.IDENTIFIER), - .type = type_annotation, - }, - }, - })); - } - - return node_list.items; - } - - // TypeDefinition ::= "newtype" Type - fn parse_type_definition(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing type definition {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(tokenizer.TokenType.TYPE); - - const typ = try self.parse_type(); - - return self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = "", - .underlying_type = typ, - }, - }, - }); - } - - // StructDefinition ::= "struct" LBRACE StructFields? RBRACE - fn parse_struct_definition(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing struct definition {any}\n", .{self.peek_token()}); - - // StructField ::= IDENTIFIER ":" Type - const parse_struct_field = struct { - fn call(iself: *Parser) ParserError!*Node { - const ident = try iself.parse_token(tokenizer.TokenType.IDENTIFIER); - _ = try iself.parse_token(tokenizer.TokenType.COLON); - const type_annotation = try iself.parse_type(); - - return iself.create_node(.{ - .PRIMARY_EXPRESSION = .{ - .IDENTIFIER = .{ - .name = try iself.arena.dupe(u8, ident.type.IDENTIFIER), - .type = type_annotation, - }, - }, - }); - } - }; - - _ = try self.parse_token(tokenizer.TokenType.STRUCT); - _ = try self.parse_token(tokenizer.TokenType.LBRACE); - - var fields = std.ArrayList(*Node).init(self.arena); - while (self.accept_parse(parse_struct_field.call)) |field| { - _ = self.accept_token(tokenizer.TokenType.COMMA); - try fields.append(field); - } - _ = try self.parse_token(tokenizer.TokenType.RBRACE); - - return self.create_node(.{ - .TYPE = .{ - .STRUCT_TYPE = .{ - .fields = fields.items, - }, - }, - }); - } - - // StructInstantiation ::= IDENTIFIER LBRACE RBRACE - fn parse_struct_instanciation(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing struct instanciation {any}\n", .{self.peek_token()}); - - const typ = try self.parse_token(tokenizer.TokenType.IDENTIFIER); - _ = try self.parse_token(tokenizer.TokenType.LBRACE); - _ = try self.parse_token(tokenizer.TokenType.RBRACE); - - return self.create_node(.{ - .STRUCT_INSTANCIATION = .{ - .typ = try self.arena.dupe(u8, typ.type.IDENTIFIER), - }, - }); - } - - // FieldAccess ::= PrimaryExpression DOT IDENTIFIER - fn parse_field_access(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing field access {any}\n", .{self.peek_token()}); - - const expression = try self.parse_primary_expression(); - _ = try self.parse_token(tokenizer.TokenType.DOT); - const ident = try self.parse_token(tokenizer.TokenType.IDENTIFIER); - - return self.create_node(.{ - .FIELD_ACCESS = .{ - .expression = expression, - .name = try self.arena.dupe(u8, ident.type.IDENTIFIER), - }, - }); - } - - // ReturnStatement ::= RETURN (Expression)? - fn parse_return_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing return statement {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(tokenizer.TokenType.RETURN); - - const maybe_expression = self.accept_parse(parse_expression); - - return self.create_node(.{ - .RETURN_STATEMENT = .{ - .expression = maybe_expression, - }, - }); - } - - // CastStatement ::= "cast" LPAREN TYPE "," Expression RPAREN - fn parse_cast_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing cast statement {any}\n", .{self.peek_token()}); - - const ident = try self.parse_token(tokenizer.TokenType.IDENTIFIER); - - if (!std.mem.eql(u8, "cast", ident.type.IDENTIFIER)) { - return ParserError.ParsingError; - } - - _ = try self.parse_token(tokenizer.TokenType.LPAREN); - - const typ = try self.parse_type(); - - _ = try self.parse_token(tokenizer.TokenType.COMMA); - - const expression = try self.parse_expression(); - - _ = try self.parse_token(tokenizer.TokenType.RPAREN); - - return self.create_node(.{ - .CAST_STATEMENT = .{ - .typ = typ, - .expression = expression, - }, - }); - } - - // SizeOfStatement ::= "sizeof" LPAREN TYPE RPAREN - fn parse_sizeof_statement(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing sizeof statement {any}\n", .{self.peek_token()}); - - const ident = try self.parse_token(tokenizer.TokenType.IDENTIFIER); - - if (!std.mem.eql(u8, "sizeof", ident.type.IDENTIFIER)) { - return ParserError.ParsingError; - } - - _ = try self.parse_token(tokenizer.TokenType.LPAREN); - - const typ = try self.parse_type(); - - _ = try self.parse_token(tokenizer.TokenType.RPAREN); - - return self.create_node(.{ - .SIZEOF_STATEMENT = .{ - .typ = typ, - }, - }); - } - - // Type ::= IDENTIFIER | FunctionType - fn parse_type(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing type annotation {any}\n", .{self.peek_token()}); - - return self.accept_parse(parse_function_type) orelse switch (self.consume_token().?.type) { - .MUL => { - return self.create_node(.{ - .TYPE = .{ - .POINTER_TYPE = .{ - .type = try self.parse_type(), - }, - }, - }); - }, - .IDENTIFIER => |ident| { - //TODO: we should only accept specific type identifiers - return try self.create_node(.{ - .TYPE = .{ - .SIMPLE_TYPE = .{ - .name = try self.arena.dupe(u8, ident), - .underlying_type = null, - }, - }, - }); - }, - else => ParserError.ParsingError, - }; - } - - // FunctionType ::= LPAREN (Type ("," Type)*)? RPAREN ARROW Type - fn parse_function_type(self: *Parser) ParserError!*Node { - errdefer if (!self.try_context) std.debug.print("Error parsing function type {any}\n", .{self.peek_token()}); - - _ = try self.parse_token(tokenizer.TokenType.LPAREN); - - var parameters = std.ArrayList(*Node).init(self.arena); - var first = true; - while (true) { - if (!first) { - _ = self.accept_token(tokenizer.TokenType.COMMA); - } - first = false; - const type_annotation = self.accept_parse(parse_type) orelse break; - try parameters.append(type_annotation); - } - - _ = try self.parse_token(tokenizer.TokenType.RPAREN); - - _ = try self.parse_token(tokenizer.TokenType.ARROW); - - const return_type = try self.parse_type(); - - return try self.create_node(.{ - .TYPE = .{ - .FUNCTION_TYPE = .{ - .parameters = parameters.items, - .return_type = return_type, - }, - }, - }); - } - - fn parse_token(self: *Parser, expected_token: std.meta.Tag(tokenizer.TokenType)) ParserError!tokenizer.Token { - errdefer if (!self.try_context) std.debug.print("Error accepting token: {any}\n", .{expected_token}); - const token = self.peek_token() orelse return ParserError.ParsingError; - - if (expected_token != std.meta.activeTag(token.type)) { - if (!self.try_context) std.debug.print("Expected {any} - found {any}\n", .{ expected_token, token }); - return ParserError.ParsingError; - } - - return self.consume_token() orelse unreachable; - } - - fn accept_parse(self: *Parser, parsing_func: *const fn (_: *Parser) ParserError!*Node) ?*Node { - const prev_offset = self.offset; - const prev_try_context = self.try_context; - self.try_context = true; - const node = parsing_func(self) catch { - self.offset = prev_offset; - self.try_context = prev_try_context; - return null; - }; - self.try_context = prev_try_context; - return node; - } - - fn accept_token(self: *Parser, token_type: std.meta.Tag(tokenizer.TokenType)) ?tokenizer.Token { - const curr_token = self.peek_token() orelse return null; - if (std.meta.activeTag(curr_token.type) == token_type) { - return self.consume_token(); - } - return null; - } - - fn consume_token(self: *Parser) ?tokenizer.Token { - if (self.offset >= self.tokens.len) return null; - - defer self.offset += 1; - - return self.tokens[self.offset]; - } - - fn peek_token(self: *Parser) ?tokenizer.Token { - if (self.offset >= self.tokens.len) return null; - - return self.tokens[self.offset]; - } - - fn create_node(self: *Parser, node_value: Node) !*Node { - const node = try self.arena.create(Node); - node.* = node_value; - return node; - } -}; - -test "parse print" { - const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{ - tokenizer.Token{ .PRINT = void{} }, - tokenizer.Token{ .LPAREN = void{} }, - tokenizer.Token{ .NUMBER = 7 }, - tokenizer.Token{ .RPAREN = void{} }, - tokenizer.Token{ .SEMICOLON = void{} }, - }); - var arena = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena.deinit(); - var parser = try Parser.init(tokens, arena.allocator()); - const actualNode = try parser.parse_print_statement(); - const expectedNode = Node{ .PRINT_STATEMENT = .{ - .expression = @constCast(&Node{ .EXPRESSION = .{ - .NUMBER = .{ .value = 7 }, - } }), - } }; - try std.testing.expectEqualDeep(&expectedNode, actualNode); -} - -test "parse identifier" { - const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{ - tokenizer.Token{ .IDENTIFIER = @constCast("i") }, - }); - var arena = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena.deinit(); - var parser = try Parser.init(tokens, arena.allocator()); - const actualNode = try parser.parse_expression(); - const expectedNode = Node{ .EXPRESSION = .{ - .IDENTIFIER = .{ - .name = @constCast("i"), - }, - } }; - try std.testing.expectEqualDeep(&expectedNode, actualNode); -} - -test "parse number" { - const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{ - tokenizer.Token{ .NUMBER = 12 }, - }); - var arena = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena.deinit(); - var parser = try Parser.init(tokens, arena.allocator()); - const actualNode = try parser.parse_expression(); - const expectedNode = Node{ .EXPRESSION = .{ - .NUMBER = .{ - .value = 12, - }, - } }; - try std.testing.expectEqualDeep(&expectedNode, actualNode); -} - -test "simple e2e" { - const tokens: []tokenizer.Token = @constCast(&[_]tokenizer.Token{ - tokenizer.Token{ .LET = void{} }, - tokenizer.Token{ .IDENTIFIER = @constCast("i") }, - tokenizer.Token{ .EQUALS = void{} }, - tokenizer.Token{ .NUMBER = 2 }, - tokenizer.Token{ .SEMICOLON = void{} }, - }); - - var arena = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena.deinit(); - var parser = try Parser.init(tokens, arena.allocator()); - const ast = try parser.parse(); - const expected_ast = Node{ .PROGRAM = .{ .statements = @constCast(&[_]*Node{@constCast(&Node{ .STATEMENT = .{ .statement = @constCast(&Node{ .ASSIGNMENT_STATEMENT = .{ - .is_declaration = true, - .name = @constCast("i"), - .expression = @constCast(&Node{ .EXPRESSION = .{ - .NUMBER = .{ .value = 2 }, - } }), - } }) } })}) } }; - try std.testing.expectEqualDeep(&expected_ast, ast); -} diff --git a/src/bootstrap/tokenizer.pry b/src/tokenizer.pry index ddc2cef..ddc2cef 100644 --- a/src/bootstrap/tokenizer.pry +++ b/src/tokenizer.pry diff --git a/src/tokenizer.zig b/src/tokenizer.zig deleted file mode 100644 index 5dacc75..0000000 --- a/src/tokenizer.zig +++ /dev/null @@ -1,327 +0,0 @@ -const std = @import("std"); - -const TokenizerError = error{ - TokenizingError, -}; - -pub const TokenType = union(enum) { - // Keywords - IMPORT: void, - LET: void, - EXTERN: void, - IF: void, - WHILE: void, - RETURN: void, - BREAK: void, - CONTINUE: void, - ARROW: void, - STRUCT: void, - TYPE: void, - - // Identifiers - IDENTIFIER: []u8, - - // Literals - NUMBER: i64, - BOOLEAN: bool, - NULL: void, - CHAR: u8, - STRING: []u8, - - // Operators - EQUALS: void, - PLUS: void, - MINUS: void, - MUL: void, - DIV: void, - MOD: void, - BANG: void, - LESS: void, - GREATER: void, - DOT: void, - - // Punctuation - SEMICOLON: void, - COMMA: void, - COLON: void, - LPAREN: void, - RPAREN: void, - LBRACE: void, - RBRACE: void, -}; - -const TokenLocation = struct { - col: u64, - row: u64, -}; - -pub const Token = struct { - location: TokenLocation, - offset: u64, - type: TokenType, -}; - -pub const Tokenizer = struct { - buf: []u8, - offset: u64, - - arena: std.mem.Allocator, - - pub fn init(buf: []u8, arena: std.mem.Allocator) !Tokenizer { - return Tokenizer{ .buf = buf, .offset = 0, .arena = arena }; - } - - pub fn tokenize(self: *Tokenizer) ![]Token { - var token_list = std.ArrayList(Token).init(self.arena); - - while (try self.next()) |token| { - std.debug.print("{any}\n", .{token}); - try token_list.append(token); - } - - return token_list.items; - } - - fn next(self: *Tokenizer) TokenizerError!?Token { - self.skip_whitespace(); - self.skip_comments(); - self.skip_whitespace(); - - if (self.offset >= self.buf.len) return null; - - if (self.accept_string("import")) return self.create_token(.{ .IMPORT = void{} }); - - if (self.accept_string("let")) return self.create_token(.{ .LET = void{} }); - if (self.accept_string("extern")) return self.create_token(.{ .EXTERN = void{} }); - if (self.accept_string("if")) return self.create_token(.{ .IF = void{} }); - if (self.accept_string("while")) return self.create_token(.{ .WHILE = void{} }); - if (self.accept_string("return")) return self.create_token(.{ .RETURN = void{} }); - if (self.accept_string("break")) return self.create_token(.{ .BREAK = void{} }); - if (self.accept_string("continue")) return self.create_token(.{ .CONTINUE = void{} }); - if (self.accept_string("true")) return self.create_token(.{ .BOOLEAN = true }); - if (self.accept_string("false")) return self.create_token(.{ .BOOLEAN = false }); - if (self.accept_string("null")) return self.create_token(.{ .NULL = void{} }); - if (self.accept_string("struct")) return self.create_token(.{ .STRUCT = void{} }); - if (self.accept_string("newtype")) return self.create_token(.{ .TYPE = void{} }); - - if (self.accept_string("=>")) return self.create_token(.{ .ARROW = void{} }); - if (self.accept_string(";")) return self.create_token(.{ .SEMICOLON = void{} }); - if (self.accept_string(",")) return self.create_token(.{ .COMMA = void{} }); - if (self.accept_string(":")) return self.create_token(.{ .COLON = void{} }); - if (self.accept_string("(")) return self.create_token(.{ .LPAREN = void{} }); - if (self.accept_string(")")) return self.create_token(.{ .RPAREN = void{} }); - if (self.accept_string("{")) return self.create_token(.{ .LBRACE = void{} }); - if (self.accept_string("}")) return self.create_token(.{ .RBRACE = void{} }); - if (self.accept_string("=")) return self.create_token(.{ .EQUALS = void{} }); - if (self.accept_string("+")) return self.create_token(.{ .PLUS = void{} }); - if (self.accept_string("-")) return self.create_token(.{ .MINUS = void{} }); - if (self.accept_string("*")) return self.create_token(.{ .MUL = void{} }); - if (self.accept_string("/")) return self.create_token(.{ .DIV = void{} }); - if (self.accept_string("%")) return self.create_token(.{ .MOD = void{} }); - if (self.accept_string("!")) return self.create_token(.{ .BANG = void{} }); - if (self.accept_string("<")) return self.create_token(.{ .LESS = void{} }); - if (self.accept_string(">")) return self.create_token(.{ .GREATER = void{} }); - if (self.accept_string(".")) return self.create_token(.{ .DOT = void{} }); - - if (self.accept_int_type()) |i| return self.create_token(.{ .NUMBER = i }); - if (self.accept_char_type()) |c| return self.create_token(.{ .CHAR = c }); - if (self.accept_string_type()) |s| return self.create_token(.{ .STRING = s }); - - const string = self.consume_until_condition(struct { - fn condition(c: u8) bool { - return !std.ascii.isAlphanumeric(c) and c != '_'; - } - }.condition); - if (string.len == 0) return TokenizerError.TokenizingError; - - return self.create_token(.{ .IDENTIFIER = string }); - } - - fn skip_comments(self: *Tokenizer) void { - if (!self.accept_string("/*")) return; - - while (!self.accept_string("*/")) { - self.offset += 1; - } - } - - fn skip_whitespace(self: *Tokenizer) void { - while (true) { - if (self.offset >= self.buf.len) return; - const c = self.buf[self.offset]; - if (!std.ascii.isWhitespace(c)) return; - self.offset += 1; - } - } - - fn consume_until_condition(self: *Tokenizer, condition: fn (c: u8) bool) []u8 { - var res = std.ArrayList(u8).init(self.arena); - while (true) : (self.offset += 1) { - if (self.offset >= self.buf.len) { - return res.items; - } - - const c = self.buf[self.offset]; - - if (c == '\\') { - const next_c = self.buf[self.offset + 1]; - res.append(switch (next_c) { - 'n' => '\n', - 't' => '\t', - 'r' => '\r', - '0' => 0, - '\\' => '\\', - else => |x| x, - }) catch unreachable; - self.offset += 1; - continue; - } - - if (condition(c)) { - return res.items; - } - - res.append(c) catch unreachable; - } - return res.items; - } - - fn accept_string(self: *Tokenizer, substr: []const u8) bool { - if (self.offset + substr.len > self.buf.len) return false; - if (std.mem.eql(u8, self.buf[self.offset .. self.offset + substr.len], substr)) { - self.offset += substr.len; - return true; - } - return false; - } - - fn accept_int_type(self: *Tokenizer) ?i64 { - const res = self.consume_until_condition(struct { - fn condition(c: u8) bool { - return !std.ascii.isDigit(c); - } - }.condition); - - return std.fmt.parseInt(i64, res, 10) catch null; - } - - fn accept_char_type(self: *Tokenizer) ?u8 { - const prev_offset = self.offset; - if (!self.accept_string("'")) { - self.offset = prev_offset; - return null; - } - - const string = self.consume_until_condition(struct { - fn condition(c: u8) bool { - return c == '\''; - } - }.condition); - - std.debug.assert(string.len == 1); - - if (!self.accept_string("'")) { - self.offset = prev_offset; - return null; - } - - return string[0]; - } - - fn accept_string_type(self: *Tokenizer) ?[]u8 { - const prev_offset = self.offset; - if (!self.accept_string("\"")) { - self.offset = prev_offset; - return null; - } - - const string = self.consume_until_condition(struct { - fn condition(c: u8) bool { - return c == '"'; - } - }.condition); - - if (!self.accept_string("\"")) { - self.offset = prev_offset; - return null; - } - - return string; - } - - fn create_token(self: *Tokenizer, token_type: TokenType) Token { - return Token{ - .location = self.compute_location(), - .offset = self.offset - 1, - .type = token_type, - }; - } - - fn compute_location(self: *Tokenizer) TokenLocation { - var location = TokenLocation{ .col = 1, .row = 1 }; - - var i: usize = 0; - while (i < self.offset) : (i += 1) { - if (self.buf[i] == '\n') { - location.row += 1; - location.col = 1; - } else { - location.col += 1; - } - } - - // We need to do this because we call this fn after we consume the token - location.row -= 1; - location.col -= 1; - - return location; - } -}; - -test "simple" { - const tests = [_]struct { - buf: []u8, - tokens: []const Token, - }{ - .{ - .buf = @constCast( - \\ let i = 2; - \\ - \\ print(i); - ), - .tokens = &[_]Token{ - Token{ .LET = {} }, - Token{ .IDENTIFIER = @constCast("i") }, - Token{ .EQUALS = {} }, - Token{ .NUMBER = 2 }, - Token{ .SEMICOLON = {} }, - Token{ .PRINT = {} }, - Token{ .LPAREN = {} }, - Token{ .IDENTIFIER = @constCast("i") }, - Token{ .RPAREN = {} }, - Token{ .SEMICOLON = {} }, - }, - }, - .{ - .buf = @constCast( - \\ - \\ let hello - ), - .tokens = &[_]Token{ - Token{ .LET = {} }, - Token{ .IDENTIFIER = @constCast("hello") }, - }, - }, - }; - - for (tests) |t| { - var token_list = std.ArrayList(Token).init(std.testing.allocator); - defer token_list.deinit(); - - var tokenizer = try Tokenizer.init(t.buf); - while (try tokenizer.next()) |token| { - try token_list.append(token); - } - try std.testing.expectEqualDeep(t.tokens, token_list.items); - } -} |