about summary refs log tree commit diff
diff options
context:
space:
mode:
authorBaitinq <[email protected]>2025-05-25 22:49:18 +0200
committerBaitinq <[email protected]>2025-05-25 22:49:53 +0200
commitf5904b62012ca475c14ad55b7f2c0b0c5c48b362 (patch)
tree890b3f1274b8dab2e7e2a2052d7d452fa3ca6db2
parentExamples: Fix example 21 (diff)
downloadpry-lang-f5904b62012ca475c14ad55b7f2c0b0c5c48b362.tar.gz
pry-lang-f5904b62012ca475c14ad55b7f2c0b0c5c48b362.tar.bz2
pry-lang-f5904b62012ca475c14ad55b7f2c0b0c5c48b362.zip
Feature: Start adding structs support
-rw-r--r--examples/22.src34
-rw-r--r--grammar.ebnf10
-rw-r--r--src/bootstrap/tokenizer.src3
-rw-r--r--src/codegen.zig95
-rw-r--r--src/parser.zig89
-rw-r--r--src/tokenizer.zig4
6 files changed, 218 insertions, 17 deletions
diff --git a/examples/22.src b/examples/22.src
new file mode 100644
index 0000000..9d73d1a
--- /dev/null
+++ b/examples/22.src
@@ -0,0 +1,34 @@
+import "!stdlib.src";
+
+/* declare new struct type */
+let test = struct {
+	 x: i64,
+	 y: *i8,
+	 z: bool
+};
+
+let main = () => i64 {
+	/* instanciate new struct. instanciating fields isn't supported here */
+	let inst = test{};
+	inst.x = 2;
+	inst.y = "hello";
+	inst.z = true;
+
+	println("Inst x: %d", inst.x);
+	println("Inst y: %s", inst.y);
+	println("Inst z: %d", inst.z);
+
+	return 0;
+};
+
+/*
+
+Expected stdout:
+
+2
+hello
+1
+
+Expected return: 0
+
+*/
diff --git a/grammar.ebnf b/grammar.ebnf
index ddb0cef..d203a69 100644
--- a/grammar.ebnf
+++ b/grammar.ebnf
@@ -30,7 +30,7 @@ MultiplicativeExpression ::= UnaryExpression (("*" | "/" | "%") UnaryExpression)
 
 UnaryExpression ::= ("!" | "-" | "*") UnaryExpression | PrimaryExpression
 
-PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | CastStatement | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN
+PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | CastStatement | FunctionCallStatement | FunctionDefinition | StructDefinition | StructInstantiation | FieldAccess | LPAREN Expression RPAREN
 
 FunctionDefinition ::= LPAREN FunctionParameters? RPAREN ARROW IDENTIFIER LBRACE Statement* ReturnStatement SEMICOLON RBRACE
 
@@ -41,3 +41,11 @@ Type ::= IDENTIFIER | FunctionType
 FunctionType ::= LPAREN (Type ("," Type)*)? RPAREN ARROW Type
 
 ParameterTypes ::= Type ("," Type)*
+
+StructDefinition ::= "struct" LBRACE (StructField ("," StructField)*)? RBRACE
+
+StructField ::= IDENTIFIER ":" Type
+
+StructInstantiation ::= IDENTIFIER LBRACE RBRACE
+
+FieldAccess ::= Expression DOT IDENTIFIER
diff --git a/src/bootstrap/tokenizer.src b/src/bootstrap/tokenizer.src
index 49a717b..a24d090 100644
--- a/src/bootstrap/tokenizer.src
+++ b/src/bootstrap/tokenizer.src
@@ -311,6 +311,9 @@ let tokenizer_next = () => *i8 {
 	if tokenizer_accept_string(">") {
 	    return ">";
 	};
+	if tokenizer_accept_string(".") {
+	    return ".";
+	};
 	
 	let maybe_int = tokenizer_accept_int_type();
 	if maybe_int != cast(*i64, null) {
diff --git a/src/codegen.zig b/src/codegen.zig
index b3fb24a..4f69218 100644
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -17,6 +17,7 @@ pub const CodeGenError = error{
 
 pub const CodeGen = struct {
     llvm_module: llvm.LLVMModuleRef,
+    llvm_context: llvm.LLVMContextRef,
     builder: llvm.LLVMBuilderRef,
     environment: *Environment,
 
@@ -36,11 +37,13 @@ pub const CodeGen = struct {
         llvm.LLVMInitializeAllAsmParsers();
 
         const module: llvm.LLVMModuleRef = llvm.LLVMModuleCreateWithName("module");
+        const context = llvm.LLVMGetGlobalContext();
         const builder = llvm.LLVMCreateBuilder();
 
         const self = try arena.create(CodeGen);
         self.* = .{
             .llvm_module = module,
+            .llvm_context = context,
             .builder = builder,
             .environment = try Environment.init(arena),
 
@@ -145,6 +148,7 @@ pub const CodeGen = struct {
             if (self.environment.scope_stack.items.len == 1) {
                 try self.environment.add_variable(identifier.name, try self.create_variable(.{
                     .value = variable.value,
+                    .type = null,
                     .node = variable.node,
                     .node_type = variable.node_type,
                     .stack_level = null,
@@ -171,7 +175,11 @@ pub const CodeGen = struct {
             if (assignment_statement.is_dereference) {
                 ptr = llvm.LLVMBuildLoad2(self.builder, try self.get_llvm_type(typ), ptr, "");
             }
-            _ = llvm.LLVMBuildStore(self.builder, variable.value, ptr);
+
+            // NOTE: structs have a null variable.value
+            if (variable.value != null) {
+                _ = llvm.LLVMBuildStore(self.builder, variable.value, ptr);
+            }
 
             if (assignment_statement.is_dereference) {
                 ptr = self.environment.get_variable(identifier.name).?.value;
@@ -179,6 +187,7 @@ pub const CodeGen = struct {
 
             const new_variable = try self.create_variable(.{
                 .value = ptr,
+                .type = null,
                 .node = variable.node,
                 .node_type = typ,
                 .stack_level = null,
@@ -257,6 +266,7 @@ pub const CodeGen = struct {
 
         return self.create_variable(.{
             .value = res,
+            .type = null,
             .stack_level = null,
             .node = node,
             .node_type = function_return_type,
@@ -422,6 +432,7 @@ pub const CodeGen = struct {
                 if (name != null) {
                     try self.environment.add_variable(name.?, try self.create_variable(.{
                         .value = function,
+                        .type = null,
                         .stack_level = null,
                         .node = expression,
                         .node_type = node_type,
@@ -448,6 +459,7 @@ pub const CodeGen = struct {
 
                     try self.environment.add_variable(param_node.PRIMARY_EXPRESSION.IDENTIFIER.name, try self.create_variable(.{
                         .value = alloca,
+                        .type = null,
                         .stack_level = null,
                         .node = param_node,
                         .node_type = param_type,
@@ -465,6 +477,7 @@ pub const CodeGen = struct {
                 if (name == null or self.environment.scope_stack.items.len == 2) {
                     return try self.create_variable(.{
                         .value = function,
+                        .type = null,
                         .stack_level = null,
                         .node = expression,
                         .node_type = node_type,
@@ -473,6 +486,7 @@ pub const CodeGen = struct {
 
                 return try self.create_variable(.{
                     .value = function,
+                    .type = null,
                     .stack_level = null,
                     .node = expression,
                     .node_type = node_type,
@@ -481,6 +495,9 @@ pub const CodeGen = struct {
             .FUNCTION_CALL_STATEMENT => |*fn_call| {
                 return try self.generate_function_call_statement(@ptrCast(fn_call));
             },
+            .STRUCT_INSTANCIATION => |struct_instanciation| {
+                return self.environment.get_variable(struct_instanciation.typ).?;
+            },
             .PRIMARY_EXPRESSION => |primary_expression| switch (primary_expression) {
                 .NULL => {
                     return try self.generate_literal(llvm.LLVMConstNull(llvm.LLVMPointerType(llvm.LLVMInt8Type(), 0)), name, expression, try self.create_node(.{
@@ -532,6 +549,7 @@ pub const CodeGen = struct {
                     return self.create_variable(
                         .{
                             .value = x,
+                            .type = null,
                             .stack_level = null,
                             .node = expression,
                             .node_type = try self.create_node(.{
@@ -671,28 +689,51 @@ pub const CodeGen = struct {
                 }));
             },
             .TYPE => |typ| {
-                std.debug.assert(typ == .FUNCTION_TYPE);
-                std.debug.assert(self.environment.scope_stack.items.len == 1);
+                switch (typ) {
+                    .FUNCTION_TYPE => {
+                        std.debug.assert(self.environment.scope_stack.items.len == 1);
 
-                const variable = self.environment.get_variable(name.?);
-                if (variable) |v| {
-                    return v;
-                }
+                        const variable = self.environment.get_variable(name.?);
+                        if (variable) |v| {
+                            return v;
+                        }
 
-                const function_type = try self.get_llvm_type(expression);
-                const function = llvm.LLVMAddFunction(self.llvm_module, try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?}), function_type);
+                        const function_type = try self.get_llvm_type(expression);
+                        const function = llvm.LLVMAddFunction(self.llvm_module, try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?}), function_type);
 
-                return try self.create_variable(.{
-                    .value = function,
-                    .stack_level = null,
-                    .node = expression,
-                    .node_type = expression,
-                });
+                        return try self.create_variable(.{
+                            .value = function,
+                            .type = null,
+                            .stack_level = null,
+                            .node = expression,
+                            .node_type = expression,
+                        });
+                    },
+                    .STRUCT_TYPE => |t| {
+                        const struct_type = llvm.LLVMStructCreateNamed(self.llvm_context, try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?}));
+
+                        var llvm_types = std.ArrayList(llvm.LLVMTypeRef).init(self.arena);
+
+                        for (t.fields) |field| {
+                            try llvm_types.append(try self.get_llvm_type(field.PRIMARY_EXPRESSION.IDENTIFIER.type.?));
+                        }
+                        llvm.LLVMStructSetBody(struct_type, llvm_types.items.ptr, @intCast(llvm_types.items.len), 0);
+                        return try self.create_variable(.{
+                            .value = null,
+                            .type = struct_type,
+                            .stack_level = null,
+                            .node = expression,
+                            .node_type = expression,
+                        });
+                    },
+                    else => unreachable,
+                }
             },
             .CAST_STATEMENT => |exp| {
                 const val = try self.generate_expression_value(exp.expression, "");
                 return try self.create_variable(.{
                     .value = val.value, //TODO: do real casting
+                    .type = null,
                     .stack_level = null,
                     .node = expression,
                     .node_type = exp.typ,
@@ -706,6 +747,7 @@ pub const CodeGen = struct {
         if (name != null and self.environment.scope_stack.items.len == 1) {
             const ptr = try self.create_variable(.{
                 .value = llvm.LLVMAddGlobal(self.llvm_module, try self.get_llvm_type(node_type), try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?})),
+                .type = null,
                 .stack_level = null,
                 .node = node,
                 .node_type = node_type,
@@ -716,6 +758,7 @@ pub const CodeGen = struct {
 
         return try self.create_variable(.{
             .value = literal_val,
+            .type = null,
             .stack_level = null,
             .node = node,
             .node_type = node_type,
@@ -761,6 +804,15 @@ pub const CodeGen = struct {
                 const inner_type = try self.get_llvm_type(t.type);
                 return llvm.LLVMPointerType(inner_type, 0);
             },
+            .STRUCT_TYPE => |t| {
+                var llvm_types = std.ArrayList(llvm.LLVMTypeRef).init(self.arena);
+
+                for (t.fields) |field| {
+                    try llvm_types.append(try self.get_llvm_type(field.PRIMARY_EXPRESSION.IDENTIFIER.type.?));
+                }
+
+                return llvm.LLVMStructType(llvm_types.items.ptr, @intCast(llvm_types.items.len), 0);
+            },
         }
     }
 
@@ -824,6 +876,18 @@ pub const CodeGen = struct {
                 }
                 return res;
             },
+            .STRUCT_TYPE => |a_struct| {
+                const b_struct = b_type.STRUCT_TYPE;
+
+                if (a_struct.fields.len != b_struct.fields.len) return false;
+
+                for (0.., a_struct.fields) |i, f| {
+                    if (!self.compare_types(f, b_struct.fields[i], false)) {
+                        return false;
+                    }
+                }
+                return true;
+            },
         }
     }
 
@@ -842,6 +906,7 @@ pub const CodeGen = struct {
 
 const Variable = struct {
     value: llvm.LLVMValueRef,
+    type: llvm.LLVMTypeRef,
     node: *parser.Node,
     node_type: *parser.Node,
     stack_level: ?usize,
diff --git a/src/parser.zig b/src/parser.zig
index 25ab6aa..f92f0c5 100644
--- a/src/parser.zig
+++ b/src/parser.zig
@@ -78,6 +78,13 @@ pub const Node = union(enum) {
         parameters: []*Node,
         return_type: *Node,
     },
+    STRUCT_INSTANCIATION: struct {
+        typ: []const u8,
+    },
+    FIELD_ACCESS: struct {
+        expression: *Node,
+        name: []const u8,
+    },
     TYPE: union(enum) {
         SIMPLE_TYPE: struct {
             name: []const u8,
@@ -89,6 +96,9 @@ pub const Node = union(enum) {
         POINTER_TYPE: struct {
             type: *Node,
         },
+        STRUCT_TYPE: struct {
+            fields: []*Node,
+        },
     },
     RETURN_STATEMENT: struct {
         expression: ?*Node,
@@ -553,13 +563,15 @@ pub const Parser = struct {
         } });
     }
 
-    // PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | CastStatement | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN
+    // PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | CastStatement | FunctionCallStatement | FunctionDefinition | StructDefinition | StructInstantiation | FieldAccess | LPAREN Expression RPAREN
     fn parse_primary_expression(self: *Parser) ParserError!*Node {
         errdefer if (!self.try_context) std.debug.print("Error parsing primary expression {any}\n", .{self.peek_token()});
 
         if (self.accept_parse(parse_cast_statement)) |stmt| return stmt;
         if (self.accept_parse(parse_function_call_statement)) |stmt| return stmt;
         if (self.accept_parse(parse_function_definition)) |stmt| return stmt;
+        if (self.accept_parse(parse_struct_definition)) |stmt| return stmt;
+        if (self.accept_parse(parse_struct_instanciation)) |stmt| return stmt;
 
         // LPAREN (Expression) RPAREN
         if (self.accept_token(tokenizer.TokenType.LPAREN)) |_| {
@@ -571,6 +583,7 @@ pub const Parser = struct {
         const token = self.consume_token() orelse return ParserError.ParsingError;
 
         return switch (token.type) {
+            .DOT => try self.parse_field_access(),
             .NULL => try self.create_node(.{
                 .PRIMARY_EXPRESSION = .{ .NULL = void{} },
             }),
@@ -670,6 +683,80 @@ pub const Parser = struct {
         return node_list.items;
     }
 
+    // StructDefinition ::= "struct" LBRACE StructFields? RBRACE
+    fn parse_struct_definition(self: *Parser) ParserError!*Node {
+        errdefer if (!self.try_context) std.debug.print("Error parsing struct definition {any}\n", .{self.peek_token()});
+
+        // StructField ::= IDENTIFIER ":" Type
+        const parse_struct_field = struct {
+            fn call(iself: *Parser) ParserError!*Node {
+                const ident = try iself.parse_token(tokenizer.TokenType.IDENTIFIER);
+                _ = try iself.parse_token(tokenizer.TokenType.COLON);
+                const type_annotation = try iself.parse_type();
+
+                return iself.create_node(.{
+                    .PRIMARY_EXPRESSION = .{
+                        .IDENTIFIER = .{
+                            .name = try iself.arena.dupe(u8, ident.type.IDENTIFIER),
+                            .type = type_annotation,
+                        },
+                    },
+                });
+            }
+        };
+
+        _ = try self.parse_token(tokenizer.TokenType.STRUCT);
+        _ = try self.parse_token(tokenizer.TokenType.LBRACE);
+
+        var fields = std.ArrayList(*Node).init(self.arena);
+        while (self.accept_parse(parse_struct_field.call)) |field| {
+            _ = self.accept_token(tokenizer.TokenType.COMMA);
+            try fields.append(field);
+        }
+        _ = try self.parse_token(tokenizer.TokenType.RBRACE);
+
+        return self.create_node(.{
+            .TYPE = .{
+                .STRUCT_TYPE = .{
+                    .fields = fields.items,
+                },
+            },
+        });
+    }
+
+    // StructInstantiation ::= IDENTIFIER LBRACE RBRACE
+    fn parse_struct_instanciation(self: *Parser) ParserError!*Node {
+        errdefer if (!self.try_context) std.debug.print("Error parsing struct instanciation {any}\n", .{self.peek_token()});
+
+        const typ = try self.parse_token(tokenizer.TokenType.IDENTIFIER);
+        _ = try self.parse_token(tokenizer.TokenType.LBRACE);
+        _ = try self.parse_token(tokenizer.TokenType.RBRACE);
+
+        return self.create_node(.{
+            .STRUCT_INSTANCIATION = .{
+                .typ = try self.arena.dupe(u8, typ.type.IDENTIFIER),
+            },
+        });
+    }
+
+    // FieldAccess ::= Expression DOT IDENTIFIER
+    fn parse_field_access(self: *Parser) ParserError!*Node {
+        errdefer if (!self.try_context) std.debug.print("Error parsing field access {any}\n", .{self.peek_token()});
+
+        const expression = try self.parse_expression();
+
+        _ = try self.parse_token(tokenizer.TokenType.DOT);
+
+        const ident = try self.parse_token(tokenizer.TokenType.IDENTIFIER);
+
+        return self.create_node(.{
+            .FIELD_ACCESS = .{
+                .expression = expression,
+                .name = try self.arena.dupe(u8, ident.type.IDENTIFIER),
+            },
+        });
+    }
+
     // ReturnStatement ::= RETURN (Expression)?
     fn parse_return_statement(self: *Parser) ParserError!*Node {
         errdefer if (!self.try_context) std.debug.print("Error parsing return statement {any}\n", .{self.peek_token()});
diff --git a/src/tokenizer.zig b/src/tokenizer.zig
index d150058..f6fbf11 100644
--- a/src/tokenizer.zig
+++ b/src/tokenizer.zig
@@ -15,6 +15,7 @@ pub const TokenType = union(enum) {
     BREAK: void,
     CONTINUE: void,
     ARROW: void,
+    STRUCT: void,
 
     // Identifiers
     IDENTIFIER: []u8,
@@ -36,6 +37,7 @@ pub const TokenType = union(enum) {
     BANG: void,
     LESS: void,
     GREATER: void,
+    DOT: void,
 
     // Punctuation
     SEMICOLON: void,
@@ -98,6 +100,7 @@ pub const Tokenizer = struct {
         if (self.accept_string("true")) return self.create_token(.{ .BOOLEAN = true });
         if (self.accept_string("false")) return self.create_token(.{ .BOOLEAN = false });
         if (self.accept_string("null")) return self.create_token(.{ .NULL = void{} });
+        if (self.accept_string("struct")) return self.create_token(.{ .STRUCT = void{} });
 
         if (self.accept_string("=>")) return self.create_token(.{ .ARROW = void{} });
         if (self.accept_string(";")) return self.create_token(.{ .SEMICOLON = void{} });
@@ -116,6 +119,7 @@ pub const Tokenizer = struct {
         if (self.accept_string("!")) return self.create_token(.{ .BANG = void{} });
         if (self.accept_string("<")) return self.create_token(.{ .LESS = void{} });
         if (self.accept_string(">")) return self.create_token(.{ .GREATER = void{} });
+        if (self.accept_string(".")) return self.create_token(.{ .DOT = void{} });
 
         if (self.accept_int_type()) |i| return self.create_token(.{ .NUMBER = i });
         if (self.accept_char_type()) |c| return self.create_token(.{ .CHAR = c });