about summary refs log tree commit diff
diff options
context:
space:
mode:
authorBaitinq <[email protected]>2025-04-06 16:17:22 +0200
committerBaitinq <[email protected]>2025-04-06 16:17:22 +0200
commitbd42f4899d09c8f6317ae1af1747ae5b6ac81650 (patch)
treeed21afbbcca7f0be0751e4bf7104bf7cdc9f36e6
parentCodegen: Fix GEP type (diff)
downloadpry-lang-bd42f4899d09c8f6317ae1af1747ae5b6ac81650.tar.gz
pry-lang-bd42f4899d09c8f6317ae1af1747ae5b6ac81650.tar.bz2
pry-lang-bd42f4899d09c8f6317ae1af1747ae5b6ac81650.zip
Feature: Add char type and support underlying pointer values
-rw-r--r--examples/19.src23
-rw-r--r--grammar.ebnf2
-rw-r--r--src/codegen.zig48
-rw-r--r--src/parser.zig10
-rw-r--r--src/tokenizer.zig38
5 files changed, 117 insertions, 4 deletions
diff --git a/examples/19.src b/examples/19.src
new file mode 100644
index 0000000..2aa5dd7
--- /dev/null
+++ b/examples/19.src
@@ -0,0 +1,23 @@
+extern printf = (*i64, varargs) => void;
+extern malloc = (i64) => *i8;
+extern free = (*i64) => void;
+
+let main = () => i64 {
+	let buf = malloc(13);
+	(*(buf+0)) = 'h';
+	(*(buf+1)) = 'e';
+	(*(buf+2)) = 'l';
+	(*(buf+3)) = 'l';
+	(*(buf+4)) = 'o';
+	(*(buf+5)) = ' ';
+	(*(buf+6)) = 'w';
+	(*(buf+7)) = 'o';
+	(*(buf+8)) = 'r';
+	(*(buf+9)) = 'l';
+	(*(buf+10)) = 'd';
+	(*(buf+11)) = '\n';
+	(*(buf+12)) = '\0';
+	printf("%s", buf);
+	free(buf);
+	return 0;
+};
diff --git a/grammar.ebnf b/grammar.ebnf
index f421d28..18d71da 100644
--- a/grammar.ebnf
+++ b/grammar.ebnf
@@ -26,7 +26,7 @@ MultiplicativeExpression ::= UnaryExpression (("*" | "/" | "%") UnaryExpression)
 
 UnaryExpression ::= ("!" | "-" | "*") UnaryExpression | PrimaryExpression
 
-PrimaryExpression ::= NUMBER | BOOLEAN | STRING | IDENTIFIER | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN
+PrimaryExpression ::= NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN
 
 FunctionDefinition ::= LPAREN FunctionParameters? RPAREN ARROW IDENTIFIER LBRACE Statement* ReturnStatement SEMICOLON RBRACE
 
diff --git a/src/codegen.zig b/src/codegen.zig
index 77f5ea3..cbd33ac 100644
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -137,6 +137,7 @@ pub const CodeGen = struct {
                     .type = llvm.LLVMVoidType(), // This gets set to the correct type during the expression type resolution. ALTERNATIVE: Pass the alloca
                     .stack_level = null,
                     .node = statement,
+                    .node_type = null,
                 }));
             }
 
@@ -150,6 +151,7 @@ pub const CodeGen = struct {
                     .type = ptr.type,
                     .stack_level = null,
                     .node = statement,
+                    .node_type = null,
                 }));
             }
 
@@ -207,11 +209,14 @@ pub const CodeGen = struct {
             else => unreachable,
         };
 
+        const typ = try self.get_llvm_type(function_return_type);
+
         return self.create_variable(.{
-            .type = try self.get_llvm_type(function_return_type),
+            .type = typ,
             .value = res,
             .stack_level = null,
             .node = node,
+            .node_type = function_return_type,
         }) catch return CodeGenError.CompilationError;
     }
 
@@ -315,6 +320,7 @@ pub const CodeGen = struct {
                         .type = function_type,
                         .stack_level = null,
                         .node = expression,
+                        .node_type = null,
                     }));
                 }
 
@@ -341,6 +347,7 @@ pub const CodeGen = struct {
                         .type = param_type,
                         .stack_level = null,
                         .node = param_node,
+                        .node_type = null,
                     }));
                 }
 
@@ -358,6 +365,7 @@ pub const CodeGen = struct {
                         .type = function_type,
                         .stack_level = null,
                         .node = expression,
+                        .node_type = null,
                     });
                 }
 
@@ -374,6 +382,7 @@ pub const CodeGen = struct {
                     _ = llvm.LLVMBuildStore(self.builder, result.value, ptr.value) orelse return CodeGenError.CompilationError;
                     ptr.type = result.type;
                     ptr.node = result.node;
+                    ptr.node_type = result.node_type;
                     return ptr;
                 } else {
                     return try self.generate_function_call_statement(@ptrCast(fn_call));
@@ -391,6 +400,9 @@ pub const CodeGen = struct {
 
                     return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt1Type(), @intCast(int_value), 0), llvm.LLVMInt1Type(), name, expression);
                 },
+                .CHAR => |c| {
+                    return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt8Type(), @intCast(c.value), 0), llvm.LLVMInt8Type(), name, expression);
+                },
                 .STRING => |s| {
                     const x = llvm.LLVMBuildGlobalStringPtr(self.builder, try std.fmt.allocPrintZ(self.arena, "{s}", .{s.value}), "");
                     return self.create_variable(
@@ -399,6 +411,7 @@ pub const CodeGen = struct {
                             .type = llvm.LLVMPointerType(llvm.LLVMInt8Type(), 0),
                             .stack_level = null,
                             .node = expression,
+                            .node_type = null,
                         },
                     );
                 },
@@ -430,7 +443,20 @@ pub const CodeGen = struct {
                 var result: llvm.LLVMValueRef = undefined;
                 if (exp.addition) {
                     if (llvm.LLVMGetTypeKind(lhs_value.type.?) == llvm.LLVMPointerTypeKind) {
-                        result = llvm.LLVMBuildGEP2(self.builder, rhs_value.type, lhs_value.value, @constCast(&[_]llvm.LLVMValueRef{rhs_value.value}), 1, "");
+                        var x: llvm.LLVMTypeRef = lhs_value.type.?;
+                        const inner_type = lhs_value.node_type;
+                        if (inner_type != null) {
+                            x = try self.get_llvm_type(inner_type.?);
+                        }
+                        if (lhs_value.node.?.* == .PRIMARY_EXPRESSION and lhs_value.node.?.PRIMARY_EXPRESSION == .IDENTIFIER) {
+                            //TODO: We can probably clean this up a lot. We could store the node_type in the identifier too!
+                            const ptr = self.environment.get_variable(lhs_value.node.?.PRIMARY_EXPRESSION.IDENTIFIER.name) orelse unreachable;
+                            const u = ptr.node_type;
+                            if (u != null) {
+                                x = try self.get_underlying_llvm_ptr_type(u.?);
+                            }
+                        }
+                        result = llvm.LLVMBuildGEP2(self.builder, x, lhs_value.value, @constCast(&[_]llvm.LLVMValueRef{rhs_value.value}), 1, "");
                     } else {
                         result = llvm.LLVMBuildAdd(self.builder, lhs_value.value, rhs_value.value, "") orelse return CodeGenError.CompilationError;
                     }
@@ -507,6 +533,7 @@ pub const CodeGen = struct {
                         .type = function_type,
                         .stack_level = null,
                         .node = expression,
+                        .node_type = null,
                     });
                 }
 
@@ -529,6 +556,7 @@ pub const CodeGen = struct {
                     .type = literal_type,
                     .stack_level = null,
                     .node = node,
+                    .node_type = null,
                 });
                 llvm.LLVMSetInitializer(ptr.value, literal_val);
                 return ptr;
@@ -545,6 +573,7 @@ pub const CodeGen = struct {
             .type = literal_type,
             .stack_level = null,
             .node = node,
+            .node_type = null,
         });
     }
 
@@ -554,6 +583,7 @@ pub const CodeGen = struct {
 
         switch (type_node) {
             .SIMPLE_TYPE => |t| {
+                if (std.mem.eql(u8, t.name, "i8")) return llvm.LLVMInt8Type();
                 if (std.mem.eql(u8, t.name, "i64")) return llvm.LLVMInt64Type();
                 if (std.mem.eql(u8, t.name, "bool")) return llvm.LLVMInt1Type();
                 if (std.mem.eql(u8, t.name, "void")) return llvm.LLVMVoidType();
@@ -581,6 +611,19 @@ pub const CodeGen = struct {
         }
     }
 
+    fn get_underlying_llvm_ptr_type(self: *CodeGen, node: *parser.Node) !llvm.LLVMTypeRef {
+        std.debug.assert(node.* == .TYPE);
+
+        switch (node.TYPE) {
+            .POINTER_TYPE => |t| {
+                return try self.get_underlying_llvm_ptr_type(t.type);
+            },
+            else => {
+                return try self.get_llvm_type(node);
+            },
+        }
+    }
+
     fn create_variable(self: *CodeGen, variable_value: Variable) !*Variable {
         const variable = try self.arena.create(Variable);
         variable.* = variable_value;
@@ -598,6 +641,7 @@ const Variable = struct {
     type: llvm.LLVMTypeRef,
     value: llvm.LLVMValueRef,
     node: ?*parser.Node,
+    node_type: ?*parser.Node,
     stack_level: ?usize,
 };
 
diff --git a/src/parser.zig b/src/parser.zig
index 4f607be..7ed1cb3 100644
--- a/src/parser.zig
+++ b/src/parser.zig
@@ -57,6 +57,9 @@ pub const Node = union(enum) {
         BOOLEAN: struct {
             value: bool,
         },
+        CHAR: struct {
+            value: u8,
+        },
         STRING: struct {
             value: []const u8,
         },
@@ -429,7 +432,7 @@ pub const Parser = struct {
         } });
     }
 
-    // PrimaryExpression ::= NUMBER | BOOLEAN | STRING | IDENTIFIER | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN
+    // PrimaryExpression ::= NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN
     fn parse_primary_expression(self: *Parser) ParserError!*Node {
         errdefer if (!self.try_context) std.debug.print("Error parsing primary expression {any}\n", .{self.peek_token()});
 
@@ -458,6 +461,11 @@ pub const Parser = struct {
                     .value = boolean_token,
                 } },
             }),
+            .CHAR => |char_token| try self.create_node(.{
+                .PRIMARY_EXPRESSION = .{ .CHAR = .{
+                    .value = char_token,
+                } },
+            }),
             .STRING => |string_token| try self.create_node(.{
                 .PRIMARY_EXPRESSION = .{ .STRING = .{
                     .value = try self.arena.dupe(u8, string_token),
diff --git a/src/tokenizer.zig b/src/tokenizer.zig
index 1c88bf2..2b57b8d 100644
--- a/src/tokenizer.zig
+++ b/src/tokenizer.zig
@@ -19,6 +19,7 @@ pub const TokenType = union(enum) {
     // Literals
     NUMBER: i64,
     BOOLEAN: bool,
+    CHAR: u8,
     STRING: []u8,
 
     // Operators
@@ -97,6 +98,7 @@ pub const Tokenizer = struct {
         if (self.accept_string(">")) return self.create_token(.{ .GREATER = void{} });
 
         if (self.accept_int_type()) |i| return self.create_token(.{ .NUMBER = i });
+        if (self.accept_char_type()) |c| return self.create_token(.{ .CHAR = c });
         if (self.accept_string_type()) |s| return self.create_token(.{ .STRING = s });
 
         const string = self.consume_until_condition(struct {
@@ -158,6 +160,42 @@ pub const Tokenizer = struct {
         return std.fmt.parseInt(i64, res, 10) catch null;
     }
 
+    fn accept_char_type(self: *Tokenizer) ?u8 {
+        const prev_offset = self.offset;
+        if (!self.accept_string("'")) {
+            self.offset = prev_offset;
+            return null;
+        }
+
+        const string = self.consume_until_condition(struct {
+            fn condition(c: u8) bool {
+                return c == '\'';
+            }
+        }.condition);
+
+        var res: u8 = string[0];
+        var i: usize = 0;
+        while (i < string.len) : (i += 1) {
+            if (string[i] == '\\') {
+                i += 1;
+                res = switch (string[i]) {
+                    'n' => '\n',
+                    't' => '\t',
+                    '0' => 0,
+                    else => unreachable,
+                };
+                break;
+            }
+        }
+
+        if (!self.accept_string("'")) {
+            self.offset = prev_offset;
+            return null;
+        }
+
+        return res;
+    }
+
     fn accept_string_type(self: *Tokenizer) ?[]u8 {
         const prev_offset = self.offset;
         if (!self.accept_string("\"")) {