diff options
| author | Baitinq <[email protected]> | 2025-04-06 16:17:22 +0200 |
|---|---|---|
| committer | Baitinq <[email protected]> | 2025-04-06 16:17:22 +0200 |
| commit | bd42f4899d09c8f6317ae1af1747ae5b6ac81650 (patch) | |
| tree | ed21afbbcca7f0be0751e4bf7104bf7cdc9f36e6 | |
| parent | Codegen: Fix GEP type (diff) | |
| download | interpreter-bd42f4899d09c8f6317ae1af1747ae5b6ac81650.tar.gz interpreter-bd42f4899d09c8f6317ae1af1747ae5b6ac81650.tar.bz2 interpreter-bd42f4899d09c8f6317ae1af1747ae5b6ac81650.zip | |
Feature: Add char type and support underlying pointer values
| -rw-r--r-- | examples/19.src | 23 | ||||
| -rw-r--r-- | grammar.ebnf | 2 | ||||
| -rw-r--r-- | src/codegen.zig | 48 | ||||
| -rw-r--r-- | src/parser.zig | 10 | ||||
| -rw-r--r-- | src/tokenizer.zig | 38 |
5 files changed, 117 insertions, 4 deletions
diff --git a/examples/19.src b/examples/19.src new file mode 100644 index 0000000..2aa5dd7 --- /dev/null +++ b/examples/19.src @@ -0,0 +1,23 @@ +extern printf = (*i64, varargs) => void; +extern malloc = (i64) => *i8; +extern free = (*i64) => void; + +let main = () => i64 { + let buf = malloc(13); + (*(buf+0)) = 'h'; + (*(buf+1)) = 'e'; + (*(buf+2)) = 'l'; + (*(buf+3)) = 'l'; + (*(buf+4)) = 'o'; + (*(buf+5)) = ' '; + (*(buf+6)) = 'w'; + (*(buf+7)) = 'o'; + (*(buf+8)) = 'r'; + (*(buf+9)) = 'l'; + (*(buf+10)) = 'd'; + (*(buf+11)) = '\n'; + (*(buf+12)) = '\0'; + printf("%s", buf); + free(buf); + return 0; +}; diff --git a/grammar.ebnf b/grammar.ebnf index f421d28..18d71da 100644 --- a/grammar.ebnf +++ b/grammar.ebnf @@ -26,7 +26,7 @@ MultiplicativeExpression ::= UnaryExpression (("*" | "/" | "%") UnaryExpression) UnaryExpression ::= ("!" | "-" | "*") UnaryExpression | PrimaryExpression -PrimaryExpression ::= NUMBER | BOOLEAN | STRING | IDENTIFIER | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN +PrimaryExpression ::= NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN FunctionDefinition ::= LPAREN FunctionParameters? RPAREN ARROW IDENTIFIER LBRACE Statement* ReturnStatement SEMICOLON RBRACE diff --git a/src/codegen.zig b/src/codegen.zig index 77f5ea3..cbd33ac 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -137,6 +137,7 @@ pub const CodeGen = struct { .type = llvm.LLVMVoidType(), // This gets set to the correct type during the expression type resolution. ALTERNATIVE: Pass the alloca .stack_level = null, .node = statement, + .node_type = null, })); } @@ -150,6 +151,7 @@ pub const CodeGen = struct { .type = ptr.type, .stack_level = null, .node = statement, + .node_type = null, })); } @@ -207,11 +209,14 @@ pub const CodeGen = struct { else => unreachable, }; + const typ = try self.get_llvm_type(function_return_type); + return self.create_variable(.{ - .type = try self.get_llvm_type(function_return_type), + .type = typ, .value = res, .stack_level = null, .node = node, + .node_type = function_return_type, }) catch return CodeGenError.CompilationError; } @@ -315,6 +320,7 @@ pub const CodeGen = struct { .type = function_type, .stack_level = null, .node = expression, + .node_type = null, })); } @@ -341,6 +347,7 @@ pub const CodeGen = struct { .type = param_type, .stack_level = null, .node = param_node, + .node_type = null, })); } @@ -358,6 +365,7 @@ pub const CodeGen = struct { .type = function_type, .stack_level = null, .node = expression, + .node_type = null, }); } @@ -374,6 +382,7 @@ pub const CodeGen = struct { _ = llvm.LLVMBuildStore(self.builder, result.value, ptr.value) orelse return CodeGenError.CompilationError; ptr.type = result.type; ptr.node = result.node; + ptr.node_type = result.node_type; return ptr; } else { return try self.generate_function_call_statement(@ptrCast(fn_call)); @@ -391,6 +400,9 @@ pub const CodeGen = struct { return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt1Type(), @intCast(int_value), 0), llvm.LLVMInt1Type(), name, expression); }, + .CHAR => |c| { + return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt8Type(), @intCast(c.value), 0), llvm.LLVMInt8Type(), name, expression); + }, .STRING => |s| { const x = llvm.LLVMBuildGlobalStringPtr(self.builder, try std.fmt.allocPrintZ(self.arena, "{s}", .{s.value}), ""); return self.create_variable( @@ -399,6 +411,7 @@ pub const CodeGen = struct { .type = llvm.LLVMPointerType(llvm.LLVMInt8Type(), 0), .stack_level = null, .node = expression, + .node_type = null, }, ); }, @@ -430,7 +443,20 @@ pub const CodeGen = struct { var result: llvm.LLVMValueRef = undefined; if (exp.addition) { if (llvm.LLVMGetTypeKind(lhs_value.type.?) == llvm.LLVMPointerTypeKind) { - result = llvm.LLVMBuildGEP2(self.builder, rhs_value.type, lhs_value.value, @constCast(&[_]llvm.LLVMValueRef{rhs_value.value}), 1, ""); + var x: llvm.LLVMTypeRef = lhs_value.type.?; + const inner_type = lhs_value.node_type; + if (inner_type != null) { + x = try self.get_llvm_type(inner_type.?); + } + if (lhs_value.node.?.* == .PRIMARY_EXPRESSION and lhs_value.node.?.PRIMARY_EXPRESSION == .IDENTIFIER) { + //TODO: We can probably clean this up a lot. We could store the node_type in the identifier too! + const ptr = self.environment.get_variable(lhs_value.node.?.PRIMARY_EXPRESSION.IDENTIFIER.name) orelse unreachable; + const u = ptr.node_type; + if (u != null) { + x = try self.get_underlying_llvm_ptr_type(u.?); + } + } + result = llvm.LLVMBuildGEP2(self.builder, x, lhs_value.value, @constCast(&[_]llvm.LLVMValueRef{rhs_value.value}), 1, ""); } else { result = llvm.LLVMBuildAdd(self.builder, lhs_value.value, rhs_value.value, "") orelse return CodeGenError.CompilationError; } @@ -507,6 +533,7 @@ pub const CodeGen = struct { .type = function_type, .stack_level = null, .node = expression, + .node_type = null, }); } @@ -529,6 +556,7 @@ pub const CodeGen = struct { .type = literal_type, .stack_level = null, .node = node, + .node_type = null, }); llvm.LLVMSetInitializer(ptr.value, literal_val); return ptr; @@ -545,6 +573,7 @@ pub const CodeGen = struct { .type = literal_type, .stack_level = null, .node = node, + .node_type = null, }); } @@ -554,6 +583,7 @@ pub const CodeGen = struct { switch (type_node) { .SIMPLE_TYPE => |t| { + if (std.mem.eql(u8, t.name, "i8")) return llvm.LLVMInt8Type(); if (std.mem.eql(u8, t.name, "i64")) return llvm.LLVMInt64Type(); if (std.mem.eql(u8, t.name, "bool")) return llvm.LLVMInt1Type(); if (std.mem.eql(u8, t.name, "void")) return llvm.LLVMVoidType(); @@ -581,6 +611,19 @@ pub const CodeGen = struct { } } + fn get_underlying_llvm_ptr_type(self: *CodeGen, node: *parser.Node) !llvm.LLVMTypeRef { + std.debug.assert(node.* == .TYPE); + + switch (node.TYPE) { + .POINTER_TYPE => |t| { + return try self.get_underlying_llvm_ptr_type(t.type); + }, + else => { + return try self.get_llvm_type(node); + }, + } + } + fn create_variable(self: *CodeGen, variable_value: Variable) !*Variable { const variable = try self.arena.create(Variable); variable.* = variable_value; @@ -598,6 +641,7 @@ const Variable = struct { type: llvm.LLVMTypeRef, value: llvm.LLVMValueRef, node: ?*parser.Node, + node_type: ?*parser.Node, stack_level: ?usize, }; diff --git a/src/parser.zig b/src/parser.zig index 4f607be..7ed1cb3 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -57,6 +57,9 @@ pub const Node = union(enum) { BOOLEAN: struct { value: bool, }, + CHAR: struct { + value: u8, + }, STRING: struct { value: []const u8, }, @@ -429,7 +432,7 @@ pub const Parser = struct { } }); } - // PrimaryExpression ::= NUMBER | BOOLEAN | STRING | IDENTIFIER | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN + // PrimaryExpression ::= NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN fn parse_primary_expression(self: *Parser) ParserError!*Node { errdefer if (!self.try_context) std.debug.print("Error parsing primary expression {any}\n", .{self.peek_token()}); @@ -458,6 +461,11 @@ pub const Parser = struct { .value = boolean_token, } }, }), + .CHAR => |char_token| try self.create_node(.{ + .PRIMARY_EXPRESSION = .{ .CHAR = .{ + .value = char_token, + } }, + }), .STRING => |string_token| try self.create_node(.{ .PRIMARY_EXPRESSION = .{ .STRING = .{ .value = try self.arena.dupe(u8, string_token), diff --git a/src/tokenizer.zig b/src/tokenizer.zig index 1c88bf2..2b57b8d 100644 --- a/src/tokenizer.zig +++ b/src/tokenizer.zig @@ -19,6 +19,7 @@ pub const TokenType = union(enum) { // Literals NUMBER: i64, BOOLEAN: bool, + CHAR: u8, STRING: []u8, // Operators @@ -97,6 +98,7 @@ pub const Tokenizer = struct { if (self.accept_string(">")) return self.create_token(.{ .GREATER = void{} }); if (self.accept_int_type()) |i| return self.create_token(.{ .NUMBER = i }); + if (self.accept_char_type()) |c| return self.create_token(.{ .CHAR = c }); if (self.accept_string_type()) |s| return self.create_token(.{ .STRING = s }); const string = self.consume_until_condition(struct { @@ -158,6 +160,42 @@ pub const Tokenizer = struct { return std.fmt.parseInt(i64, res, 10) catch null; } + fn accept_char_type(self: *Tokenizer) ?u8 { + const prev_offset = self.offset; + if (!self.accept_string("'")) { + self.offset = prev_offset; + return null; + } + + const string = self.consume_until_condition(struct { + fn condition(c: u8) bool { + return c == '\''; + } + }.condition); + + var res: u8 = string[0]; + var i: usize = 0; + while (i < string.len) : (i += 1) { + if (string[i] == '\\') { + i += 1; + res = switch (string[i]) { + 'n' => '\n', + 't' => '\t', + '0' => 0, + else => unreachable, + }; + break; + } + } + + if (!self.accept_string("'")) { + self.offset = prev_offset; + return null; + } + + return res; + } + fn accept_string_type(self: *Tokenizer) ?[]u8 { const prev_offset = self.offset; if (!self.accept_string("\"")) { |