diff options
| -rw-r--r-- | examples/22.src | 34 | ||||
| -rw-r--r-- | grammar.ebnf | 10 | ||||
| -rw-r--r-- | src/bootstrap/tokenizer.src | 3 | ||||
| -rw-r--r-- | src/codegen.zig | 95 | ||||
| -rw-r--r-- | src/parser.zig | 89 | ||||
| -rw-r--r-- | src/tokenizer.zig | 4 |
6 files changed, 218 insertions, 17 deletions
diff --git a/examples/22.src b/examples/22.src new file mode 100644 index 0000000..9d73d1a --- /dev/null +++ b/examples/22.src @@ -0,0 +1,34 @@ +import "!stdlib.src"; + +/* declare new struct type */ +let test = struct { + x: i64, + y: *i8, + z: bool +}; + +let main = () => i64 { + /* instanciate new struct. instanciating fields isn't supported here */ + let inst = test{}; + inst.x = 2; + inst.y = "hello"; + inst.z = true; + + println("Inst x: %d", inst.x); + println("Inst y: %s", inst.y); + println("Inst z: %d", inst.z); + + return 0; +}; + +/* + +Expected stdout: + +2 +hello +1 + +Expected return: 0 + +*/ diff --git a/grammar.ebnf b/grammar.ebnf index ddb0cef..d203a69 100644 --- a/grammar.ebnf +++ b/grammar.ebnf @@ -30,7 +30,7 @@ MultiplicativeExpression ::= UnaryExpression (("*" | "/" | "%") UnaryExpression) UnaryExpression ::= ("!" | "-" | "*") UnaryExpression | PrimaryExpression -PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | CastStatement | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN +PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | CastStatement | FunctionCallStatement | FunctionDefinition | StructDefinition | StructInstantiation | FieldAccess | LPAREN Expression RPAREN FunctionDefinition ::= LPAREN FunctionParameters? RPAREN ARROW IDENTIFIER LBRACE Statement* ReturnStatement SEMICOLON RBRACE @@ -41,3 +41,11 @@ Type ::= IDENTIFIER | FunctionType FunctionType ::= LPAREN (Type ("," Type)*)? RPAREN ARROW Type ParameterTypes ::= Type ("," Type)* + +StructDefinition ::= "struct" LBRACE (StructField ("," StructField)*)? RBRACE + +StructField ::= IDENTIFIER ":" Type + +StructInstantiation ::= IDENTIFIER LBRACE RBRACE + +FieldAccess ::= Expression DOT IDENTIFIER diff --git a/src/bootstrap/tokenizer.src b/src/bootstrap/tokenizer.src index 49a717b..a24d090 100644 --- a/src/bootstrap/tokenizer.src +++ b/src/bootstrap/tokenizer.src @@ -311,6 +311,9 @@ let tokenizer_next = () => *i8 { if tokenizer_accept_string(">") { return ">"; }; + if tokenizer_accept_string(".") { + return "."; + }; let maybe_int = tokenizer_accept_int_type(); if maybe_int != cast(*i64, null) { diff --git a/src/codegen.zig b/src/codegen.zig index b3fb24a..4f69218 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -17,6 +17,7 @@ pub const CodeGenError = error{ pub const CodeGen = struct { llvm_module: llvm.LLVMModuleRef, + llvm_context: llvm.LLVMContextRef, builder: llvm.LLVMBuilderRef, environment: *Environment, @@ -36,11 +37,13 @@ pub const CodeGen = struct { llvm.LLVMInitializeAllAsmParsers(); const module: llvm.LLVMModuleRef = llvm.LLVMModuleCreateWithName("module"); + const context = llvm.LLVMGetGlobalContext(); const builder = llvm.LLVMCreateBuilder(); const self = try arena.create(CodeGen); self.* = .{ .llvm_module = module, + .llvm_context = context, .builder = builder, .environment = try Environment.init(arena), @@ -145,6 +148,7 @@ pub const CodeGen = struct { if (self.environment.scope_stack.items.len == 1) { try self.environment.add_variable(identifier.name, try self.create_variable(.{ .value = variable.value, + .type = null, .node = variable.node, .node_type = variable.node_type, .stack_level = null, @@ -171,7 +175,11 @@ pub const CodeGen = struct { if (assignment_statement.is_dereference) { ptr = llvm.LLVMBuildLoad2(self.builder, try self.get_llvm_type(typ), ptr, ""); } - _ = llvm.LLVMBuildStore(self.builder, variable.value, ptr); + + // NOTE: structs have a null variable.value + if (variable.value != null) { + _ = llvm.LLVMBuildStore(self.builder, variable.value, ptr); + } if (assignment_statement.is_dereference) { ptr = self.environment.get_variable(identifier.name).?.value; @@ -179,6 +187,7 @@ pub const CodeGen = struct { const new_variable = try self.create_variable(.{ .value = ptr, + .type = null, .node = variable.node, .node_type = typ, .stack_level = null, @@ -257,6 +266,7 @@ pub const CodeGen = struct { return self.create_variable(.{ .value = res, + .type = null, .stack_level = null, .node = node, .node_type = function_return_type, @@ -422,6 +432,7 @@ pub const CodeGen = struct { if (name != null) { try self.environment.add_variable(name.?, try self.create_variable(.{ .value = function, + .type = null, .stack_level = null, .node = expression, .node_type = node_type, @@ -448,6 +459,7 @@ pub const CodeGen = struct { try self.environment.add_variable(param_node.PRIMARY_EXPRESSION.IDENTIFIER.name, try self.create_variable(.{ .value = alloca, + .type = null, .stack_level = null, .node = param_node, .node_type = param_type, @@ -465,6 +477,7 @@ pub const CodeGen = struct { if (name == null or self.environment.scope_stack.items.len == 2) { return try self.create_variable(.{ .value = function, + .type = null, .stack_level = null, .node = expression, .node_type = node_type, @@ -473,6 +486,7 @@ pub const CodeGen = struct { return try self.create_variable(.{ .value = function, + .type = null, .stack_level = null, .node = expression, .node_type = node_type, @@ -481,6 +495,9 @@ pub const CodeGen = struct { .FUNCTION_CALL_STATEMENT => |*fn_call| { return try self.generate_function_call_statement(@ptrCast(fn_call)); }, + .STRUCT_INSTANCIATION => |struct_instanciation| { + return self.environment.get_variable(struct_instanciation.typ).?; + }, .PRIMARY_EXPRESSION => |primary_expression| switch (primary_expression) { .NULL => { return try self.generate_literal(llvm.LLVMConstNull(llvm.LLVMPointerType(llvm.LLVMInt8Type(), 0)), name, expression, try self.create_node(.{ @@ -532,6 +549,7 @@ pub const CodeGen = struct { return self.create_variable( .{ .value = x, + .type = null, .stack_level = null, .node = expression, .node_type = try self.create_node(.{ @@ -671,28 +689,51 @@ pub const CodeGen = struct { })); }, .TYPE => |typ| { - std.debug.assert(typ == .FUNCTION_TYPE); - std.debug.assert(self.environment.scope_stack.items.len == 1); + switch (typ) { + .FUNCTION_TYPE => { + std.debug.assert(self.environment.scope_stack.items.len == 1); - const variable = self.environment.get_variable(name.?); - if (variable) |v| { - return v; - } + const variable = self.environment.get_variable(name.?); + if (variable) |v| { + return v; + } - const function_type = try self.get_llvm_type(expression); - const function = llvm.LLVMAddFunction(self.llvm_module, try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?}), function_type); + const function_type = try self.get_llvm_type(expression); + const function = llvm.LLVMAddFunction(self.llvm_module, try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?}), function_type); - return try self.create_variable(.{ - .value = function, - .stack_level = null, - .node = expression, - .node_type = expression, - }); + return try self.create_variable(.{ + .value = function, + .type = null, + .stack_level = null, + .node = expression, + .node_type = expression, + }); + }, + .STRUCT_TYPE => |t| { + const struct_type = llvm.LLVMStructCreateNamed(self.llvm_context, try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?})); + + var llvm_types = std.ArrayList(llvm.LLVMTypeRef).init(self.arena); + + for (t.fields) |field| { + try llvm_types.append(try self.get_llvm_type(field.PRIMARY_EXPRESSION.IDENTIFIER.type.?)); + } + llvm.LLVMStructSetBody(struct_type, llvm_types.items.ptr, @intCast(llvm_types.items.len), 0); + return try self.create_variable(.{ + .value = null, + .type = struct_type, + .stack_level = null, + .node = expression, + .node_type = expression, + }); + }, + else => unreachable, + } }, .CAST_STATEMENT => |exp| { const val = try self.generate_expression_value(exp.expression, ""); return try self.create_variable(.{ .value = val.value, //TODO: do real casting + .type = null, .stack_level = null, .node = expression, .node_type = exp.typ, @@ -706,6 +747,7 @@ pub const CodeGen = struct { if (name != null and self.environment.scope_stack.items.len == 1) { const ptr = try self.create_variable(.{ .value = llvm.LLVMAddGlobal(self.llvm_module, try self.get_llvm_type(node_type), try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?})), + .type = null, .stack_level = null, .node = node, .node_type = node_type, @@ -716,6 +758,7 @@ pub const CodeGen = struct { return try self.create_variable(.{ .value = literal_val, + .type = null, .stack_level = null, .node = node, .node_type = node_type, @@ -761,6 +804,15 @@ pub const CodeGen = struct { const inner_type = try self.get_llvm_type(t.type); return llvm.LLVMPointerType(inner_type, 0); }, + .STRUCT_TYPE => |t| { + var llvm_types = std.ArrayList(llvm.LLVMTypeRef).init(self.arena); + + for (t.fields) |field| { + try llvm_types.append(try self.get_llvm_type(field.PRIMARY_EXPRESSION.IDENTIFIER.type.?)); + } + + return llvm.LLVMStructType(llvm_types.items.ptr, @intCast(llvm_types.items.len), 0); + }, } } @@ -824,6 +876,18 @@ pub const CodeGen = struct { } return res; }, + .STRUCT_TYPE => |a_struct| { + const b_struct = b_type.STRUCT_TYPE; + + if (a_struct.fields.len != b_struct.fields.len) return false; + + for (0.., a_struct.fields) |i, f| { + if (!self.compare_types(f, b_struct.fields[i], false)) { + return false; + } + } + return true; + }, } } @@ -842,6 +906,7 @@ pub const CodeGen = struct { const Variable = struct { value: llvm.LLVMValueRef, + type: llvm.LLVMTypeRef, node: *parser.Node, node_type: *parser.Node, stack_level: ?usize, diff --git a/src/parser.zig b/src/parser.zig index 25ab6aa..f92f0c5 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -78,6 +78,13 @@ pub const Node = union(enum) { parameters: []*Node, return_type: *Node, }, + STRUCT_INSTANCIATION: struct { + typ: []const u8, + }, + FIELD_ACCESS: struct { + expression: *Node, + name: []const u8, + }, TYPE: union(enum) { SIMPLE_TYPE: struct { name: []const u8, @@ -89,6 +96,9 @@ pub const Node = union(enum) { POINTER_TYPE: struct { type: *Node, }, + STRUCT_TYPE: struct { + fields: []*Node, + }, }, RETURN_STATEMENT: struct { expression: ?*Node, @@ -553,13 +563,15 @@ pub const Parser = struct { } }); } - // PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | CastStatement | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN + // PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | CastStatement | FunctionCallStatement | FunctionDefinition | StructDefinition | StructInstantiation | FieldAccess | LPAREN Expression RPAREN fn parse_primary_expression(self: *Parser) ParserError!*Node { errdefer if (!self.try_context) std.debug.print("Error parsing primary expression {any}\n", .{self.peek_token()}); if (self.accept_parse(parse_cast_statement)) |stmt| return stmt; if (self.accept_parse(parse_function_call_statement)) |stmt| return stmt; if (self.accept_parse(parse_function_definition)) |stmt| return stmt; + if (self.accept_parse(parse_struct_definition)) |stmt| return stmt; + if (self.accept_parse(parse_struct_instanciation)) |stmt| return stmt; // LPAREN (Expression) RPAREN if (self.accept_token(tokenizer.TokenType.LPAREN)) |_| { @@ -571,6 +583,7 @@ pub const Parser = struct { const token = self.consume_token() orelse return ParserError.ParsingError; return switch (token.type) { + .DOT => try self.parse_field_access(), .NULL => try self.create_node(.{ .PRIMARY_EXPRESSION = .{ .NULL = void{} }, }), @@ -670,6 +683,80 @@ pub const Parser = struct { return node_list.items; } + // StructDefinition ::= "struct" LBRACE StructFields? RBRACE + fn parse_struct_definition(self: *Parser) ParserError!*Node { + errdefer if (!self.try_context) std.debug.print("Error parsing struct definition {any}\n", .{self.peek_token()}); + + // StructField ::= IDENTIFIER ":" Type + const parse_struct_field = struct { + fn call(iself: *Parser) ParserError!*Node { + const ident = try iself.parse_token(tokenizer.TokenType.IDENTIFIER); + _ = try iself.parse_token(tokenizer.TokenType.COLON); + const type_annotation = try iself.parse_type(); + + return iself.create_node(.{ + .PRIMARY_EXPRESSION = .{ + .IDENTIFIER = .{ + .name = try iself.arena.dupe(u8, ident.type.IDENTIFIER), + .type = type_annotation, + }, + }, + }); + } + }; + + _ = try self.parse_token(tokenizer.TokenType.STRUCT); + _ = try self.parse_token(tokenizer.TokenType.LBRACE); + + var fields = std.ArrayList(*Node).init(self.arena); + while (self.accept_parse(parse_struct_field.call)) |field| { + _ = self.accept_token(tokenizer.TokenType.COMMA); + try fields.append(field); + } + _ = try self.parse_token(tokenizer.TokenType.RBRACE); + + return self.create_node(.{ + .TYPE = .{ + .STRUCT_TYPE = .{ + .fields = fields.items, + }, + }, + }); + } + + // StructInstantiation ::= IDENTIFIER LBRACE RBRACE + fn parse_struct_instanciation(self: *Parser) ParserError!*Node { + errdefer if (!self.try_context) std.debug.print("Error parsing struct instanciation {any}\n", .{self.peek_token()}); + + const typ = try self.parse_token(tokenizer.TokenType.IDENTIFIER); + _ = try self.parse_token(tokenizer.TokenType.LBRACE); + _ = try self.parse_token(tokenizer.TokenType.RBRACE); + + return self.create_node(.{ + .STRUCT_INSTANCIATION = .{ + .typ = try self.arena.dupe(u8, typ.type.IDENTIFIER), + }, + }); + } + + // FieldAccess ::= Expression DOT IDENTIFIER + fn parse_field_access(self: *Parser) ParserError!*Node { + errdefer if (!self.try_context) std.debug.print("Error parsing field access {any}\n", .{self.peek_token()}); + + const expression = try self.parse_expression(); + + _ = try self.parse_token(tokenizer.TokenType.DOT); + + const ident = try self.parse_token(tokenizer.TokenType.IDENTIFIER); + + return self.create_node(.{ + .FIELD_ACCESS = .{ + .expression = expression, + .name = try self.arena.dupe(u8, ident.type.IDENTIFIER), + }, + }); + } + // ReturnStatement ::= RETURN (Expression)? fn parse_return_statement(self: *Parser) ParserError!*Node { errdefer if (!self.try_context) std.debug.print("Error parsing return statement {any}\n", .{self.peek_token()}); diff --git a/src/tokenizer.zig b/src/tokenizer.zig index d150058..f6fbf11 100644 --- a/src/tokenizer.zig +++ b/src/tokenizer.zig @@ -15,6 +15,7 @@ pub const TokenType = union(enum) { BREAK: void, CONTINUE: void, ARROW: void, + STRUCT: void, // Identifiers IDENTIFIER: []u8, @@ -36,6 +37,7 @@ pub const TokenType = union(enum) { BANG: void, LESS: void, GREATER: void, + DOT: void, // Punctuation SEMICOLON: void, @@ -98,6 +100,7 @@ pub const Tokenizer = struct { if (self.accept_string("true")) return self.create_token(.{ .BOOLEAN = true }); if (self.accept_string("false")) return self.create_token(.{ .BOOLEAN = false }); if (self.accept_string("null")) return self.create_token(.{ .NULL = void{} }); + if (self.accept_string("struct")) return self.create_token(.{ .STRUCT = void{} }); if (self.accept_string("=>")) return self.create_token(.{ .ARROW = void{} }); if (self.accept_string(";")) return self.create_token(.{ .SEMICOLON = void{} }); @@ -116,6 +119,7 @@ pub const Tokenizer = struct { if (self.accept_string("!")) return self.create_token(.{ .BANG = void{} }); if (self.accept_string("<")) return self.create_token(.{ .LESS = void{} }); if (self.accept_string(">")) return self.create_token(.{ .GREATER = void{} }); + if (self.accept_string(".")) return self.create_token(.{ .DOT = void{} }); if (self.accept_int_type()) |i| return self.create_token(.{ .NUMBER = i }); if (self.accept_char_type()) |c| return self.create_token(.{ .CHAR = c }); |