diff options
| author | Baitinq <[email protected]> | 2025-03-23 23:57:05 +0100 |
|---|---|---|
| committer | Baitinq <[email protected]> | 2025-03-24 00:17:07 +0100 |
| commit | 31fa32743a5ed25724868cfa476e3af028adb118 (patch) | |
| tree | 97afd19643223af3784baef5c2b57e48467dcce9 /src | |
| parent | Parser: Fix ambiguity with symbol declaration (diff) | |
| download | interpreter-31fa32743a5ed25724868cfa476e3af028adb118.tar.gz interpreter-31fa32743a5ed25724868cfa476e3af028adb118.tar.bz2 interpreter-31fa32743a5ed25724868cfa476e3af028adb118.zip | |
Feature: Add support for strings
Diffstat (limited to 'src')
| -rw-r--r-- | src/codegen.zig | 111 | ||||
| -rw-r--r-- | src/evaluator.zig | 4 | ||||
| -rw-r--r-- | src/parser.zig | 10 | ||||
| -rw-r--r-- | src/tokenizer.zig | 47 |
4 files changed, 83 insertions, 89 deletions
diff --git a/src/codegen.zig b/src/codegen.zig index 4e2d897..b873842 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -59,9 +59,6 @@ pub const CodeGen = struct { } }), })); - try self.create_print_function(); - try self.create_printb_function(); - return self; } @@ -374,7 +371,7 @@ pub const CodeGen = struct { }, .PRIMARY_EXPRESSION => |primary_expression| switch (primary_expression) { .NUMBER => |n| { - return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt64Type(), @intCast(n.value), 0), llvm.LLVMInt64Type(), name); + return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt64Type(), @intCast(n.value), 0), llvm.LLVMInt64Type(), name, expression); }, .BOOLEAN => |b| { const int_value: i64 = switch (b.value) { @@ -382,7 +379,18 @@ pub const CodeGen = struct { true => 1, }; - return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt1Type(), @intCast(int_value), 0), llvm.LLVMInt1Type(), name); + return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt1Type(), @intCast(int_value), 0), llvm.LLVMInt1Type(), name, expression); + }, + .STRING => |s| { + const x = llvm.LLVMBuildGlobalStringPtr(self.builder, try std.fmt.allocPrintZ(self.arena, "{s}", .{s.value}), ""); + return self.create_variable( + .{ + .value = x, + .type = llvm.LLVMPointerType(llvm.LLVMInt8Type(), 0), + .stack_level = null, + .node = expression, + }, + ); }, .IDENTIFIER => |i| { const variable = self.environment.get_variable(i.name).?; @@ -390,9 +398,16 @@ pub const CodeGen = struct { if (llvm.LLVMGetTypeKind(param_type.?) == llvm.LLVMFunctionTypeKind) { param_type = llvm.LLVMPointerType(param_type.?, 0); } - const loaded = llvm.LLVMBuildLoad2(self.builder, param_type, variable.value, ""); - return self.generate_literal(loaded, variable.type, name); + var loaded: llvm.LLVMValueRef = undefined; + + if (variable.node.?.* == .PRIMARY_EXPRESSION and variable.node.?.PRIMARY_EXPRESSION == .STRING) { + loaded = variable.value; + } else { + loaded = llvm.LLVMBuildLoad2(self.builder, param_type, variable.value, ""); + } + + return self.generate_literal(loaded, variable.type, name, expression); }, }, .ADDITIVE_EXPRESSION => |exp| { @@ -406,7 +421,7 @@ pub const CodeGen = struct { result = llvm.LLVMBuildSub(self.builder, lhs_value.value, rhs_value.value, "") orelse return CodeGenError.CompilationError; } - return self.generate_literal(result, llvm.LLVMInt64Type(), name); + return self.generate_literal(result, llvm.LLVMInt64Type(), name, expression); }, .MULTIPLICATIVE_EXPRESSION => |exp| { const lhs_value = try self.generate_expression_value(exp.lhs, null); @@ -425,7 +440,7 @@ pub const CodeGen = struct { }, } - return self.generate_literal(result, llvm.LLVMInt64Type(), name); + return self.generate_literal(result, llvm.LLVMInt64Type(), name, expression); }, .UNARY_EXPRESSION => |exp| { const k = try self.generate_expression_value(exp.expression, null); @@ -444,7 +459,7 @@ pub const CodeGen = struct { }, } - return self.generate_literal(r, t, name); + return self.generate_literal(r, t, name, expression); }, .EQUALITY_EXPRESSION => |exp| { const lhs_value = try self.generate_expression_value(exp.lhs, null); @@ -457,7 +472,7 @@ pub const CodeGen = struct { }; const cmp = llvm.LLVMBuildICmp(self.builder, op, lhs_value.value, rhs_value.value, ""); - return self.generate_literal(cmp, llvm.LLVMInt1Type(), name); + return self.generate_literal(cmp, llvm.LLVMInt1Type(), name, expression); }, .TYPE => |typ| { std.debug.assert(typ == .FUNCTION_TYPE); @@ -485,14 +500,14 @@ pub const CodeGen = struct { }; } - fn generate_literal(self: *CodeGen, literal_val: llvm.LLVMValueRef, literal_type: llvm.LLVMTypeRef, name: ?[]const u8) !*Variable { + fn generate_literal(self: *CodeGen, literal_val: llvm.LLVMValueRef, literal_type: llvm.LLVMTypeRef, name: ?[]const u8, node: *parser.Node) !*Variable { if (name != null) { if (self.environment.scope_stack.items.len == 1) { const ptr = try self.create_variable(.{ .value = llvm.LLVMAddGlobal(self.llvm_module, literal_type, try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?})), .type = literal_type, .stack_level = null, - .node = null, //TODO + .node = node, }); llvm.LLVMSetInitializer(ptr.value, literal_val); return ptr; @@ -500,6 +515,7 @@ pub const CodeGen = struct { const ptr = self.environment.get_variable(name.?) orelse unreachable; _ = llvm.LLVMBuildStore(self.builder, literal_val, ptr.value) orelse return CodeGenError.CompilationError; ptr.type = literal_type; + ptr.node = node; return ptr; } @@ -535,75 +551,6 @@ pub const CodeGen = struct { } } - fn create_print_function(self: *CodeGen) !void { - const print_function_type = llvm.LLVMFunctionType(llvm.LLVMVoidType(), @constCast(&[_]llvm.LLVMTypeRef{llvm.LLVMInt64Type()}), 1, 0); - const print_function = llvm.LLVMAddFunction(self.llvm_module, "print", print_function_type); - const print_function_entry = llvm.LLVMAppendBasicBlock(print_function, "entrypoint") orelse return CodeGenError.CompilationError; - llvm.LLVMPositionBuilderAtEnd(self.builder, print_function_entry); - - const format_str = "%d\n"; - const format_str_ptr = llvm.LLVMBuildGlobalStringPtr(self.builder, format_str, "format_str_ptr"); - - const arguments = @constCast(&[_]llvm.LLVMValueRef{ - format_str_ptr, - llvm.LLVMGetParam(print_function, 0), - }); - - const printf_function_var = self.environment.get_variable("printf") orelse return CodeGenError.CompilationError; - - _ = llvm.LLVMBuildCall2(self.builder, printf_function_var.type, printf_function_var.value, arguments, 2, "") orelse return CodeGenError.CompilationError; - _ = llvm.LLVMBuildRetVoid(self.builder); - - try self.environment.add_variable("print", try self.create_variable(.{ - .value = print_function, - .type = print_function_type, - .stack_level = null, - .node = try self.create_node(.{ .FUNCTION_DEFINITION = .{ - .statements = &[_]*parser.Node{}, - .parameters = &[_]*parser.Node{}, - .return_type = try self.create_node(.{ .TYPE = .{ .SIMPLE_TYPE = .{ - .name = "i64", - } } }), - } }), - })); - } - - fn create_printb_function(self: *CodeGen) !void { - const print_function_type = llvm.LLVMFunctionType(llvm.LLVMVoidType(), @constCast(&[_]llvm.LLVMTypeRef{llvm.LLVMInt1Type()}), 1, 0); - const print_function = llvm.LLVMAddFunction(self.llvm_module, "printb", print_function_type); - const print_function_entry = llvm.LLVMAppendBasicBlock(print_function, "entrypoint") orelse return CodeGenError.CompilationError; - llvm.LLVMPositionBuilderAtEnd(self.builder, print_function_entry); - - const format_str = "%d\n"; - const format_str_ptr = llvm.LLVMBuildGlobalStringPtr(self.builder, format_str, "format_str_ptr"); - - const p = llvm.LLVMGetParam(print_function, 0); - const x = llvm.LLVMBuildZExt(self.builder, p, llvm.LLVMInt64Type(), ""); - - const arguments = @constCast(&[_]llvm.LLVMValueRef{ - format_str_ptr, - x, - }); - - const printf_function_var = self.environment.get_variable("printf") orelse return CodeGenError.CompilationError; - - _ = llvm.LLVMBuildCall2(self.builder, printf_function_var.type, printf_function_var.value, arguments, 2, "") orelse return CodeGenError.CompilationError; - _ = llvm.LLVMBuildRetVoid(self.builder); - - try self.environment.add_variable("printb", try self.create_variable(.{ - .value = print_function, - .type = print_function_type, - .stack_level = null, - .node = try self.create_node(.{ .FUNCTION_DEFINITION = .{ - .statements = &[_]*parser.Node{}, - .parameters = &[_]*parser.Node{}, - .return_type = try self.create_node(.{ .TYPE = .{ .SIMPLE_TYPE = .{ - .name = "i64", - } } }), - } }), - })); - } - fn create_variable(self: *CodeGen, variable_value: Variable) !*Variable { const variable = try self.arena.create(Variable); variable.* = variable_value; diff --git a/src/evaluator.zig b/src/evaluator.zig index fef08a2..f3c0a6c 100644 --- a/src/evaluator.zig +++ b/src/evaluator.zig @@ -6,11 +6,12 @@ const EvaluatorError = error{ OutOfMemory, }; -const VariableType = enum { NUMBER, BOOLEAN, FUNCTION_DEFINITION }; +const VariableType = enum { NUMBER, BOOLEAN, STRING, FUNCTION_DEFINITION }; const Variable = union(VariableType) { NUMBER: i64, BOOLEAN: bool, + STRING: []const u8, FUNCTION_DEFINITION: *parser.Node, }; @@ -206,6 +207,7 @@ pub const Evaluator = struct { switch (x) { .NUMBER => |number| return self.create_variable(.{ .NUMBER = number.value }), .BOOLEAN => |b| return self.create_variable(.{ .BOOLEAN = b.value }), + .STRING => |s| return self.create_variable(.{ .STRING = s.value }), .IDENTIFIER => |identifier| { const val = self.environment.get_variable(identifier.name) orelse { std.debug.print("Identifier {any} not found\n", .{identifier.name}); diff --git a/src/parser.zig b/src/parser.zig index 64e670e..9d40c65 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -52,6 +52,9 @@ pub const Node = union(enum) { BOOLEAN: struct { value: bool, }, + STRING: struct { + value: []const u8, + }, IDENTIFIER: struct { name: []const u8, type: ?*Node, @@ -403,7 +406,7 @@ pub const Parser = struct { } }); } - // PrimaryExpression ::= NUMBER | BOOLEAN | IDENTIFIER | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN + // PrimaryExpression ::= NUMBER | BOOLEAN | STRING | IDENTIFIER | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN fn parse_primary_expression(self: *Parser) ParserError!*Node { errdefer if (!self.try_context) std.debug.print("Error parsing primary expression {any}\n", .{self.peek_token()}); @@ -432,6 +435,11 @@ pub const Parser = struct { .value = boolean_token, } }, }), + .STRING => |string_token| try self.create_node(.{ + .PRIMARY_EXPRESSION = .{ .STRING = .{ + .value = try self.arena.dupe(u8, string_token), + } }, + }), .IDENTIFIER => |identifier_token| try self.create_node(.{ .PRIMARY_EXPRESSION = .{ .IDENTIFIER = .{ diff --git a/src/tokenizer.zig b/src/tokenizer.zig index 138ad69..b959738 100644 --- a/src/tokenizer.zig +++ b/src/tokenizer.zig @@ -19,6 +19,7 @@ pub const TokenType = union(enum) { // Literals NUMBER: i64, BOOLEAN: bool, + STRING: []u8, // Operators EQUALS: void, @@ -93,10 +94,15 @@ pub const Tokenizer = struct { if (self.accept_string("<")) return self.create_token(.{ .LESS = void{} }); if (self.accept_string(">")) return self.create_token(.{ .GREATER = void{} }); - const string = self.consume_string(); - if (string.len == 0) return TokenizerError.TokenizingError; + if (self.accept_int_type()) |i| return self.create_token(.{ .NUMBER = i }); + if (self.accept_string_type()) |s| return self.create_token(.{ .STRING = s }); - if (std.fmt.parseInt(i32, string, 10) catch null) |i| return self.create_token(.{ .NUMBER = i }); + const string = self.consume_until_condition(struct { + fn condition(c: u8) bool { + return !std.ascii.isAlphanumeric(c) and c != '_'; + } + }.condition); + if (string.len == 0) return TokenizerError.TokenizingError; return self.create_token(.{ .IDENTIFIER = string }); } @@ -118,7 +124,7 @@ pub const Tokenizer = struct { } } - fn consume_string(self: *Tokenizer) []u8 { + fn consume_until_condition(self: *Tokenizer, condition: fn (c: u8) bool) []u8 { defer self.offset = if (self.offset > 0) self.offset - 1 else self.offset; const start = self.offset; while (true) { @@ -127,7 +133,7 @@ pub const Tokenizer = struct { const c = self.buf[self.offset]; - if (!std.ascii.isAlphanumeric(c) and c != '_') return self.buf[start..self.offset]; + if (condition(c)) return self.buf[start..self.offset]; } } @@ -140,6 +146,37 @@ pub const Tokenizer = struct { return false; } + fn accept_int_type(self: *Tokenizer) ?i64 { + const res = self.consume_until_condition(struct { + fn condition(c: u8) bool { + return !std.ascii.isDigit(c); + } + }.condition); + + return std.fmt.parseInt(i64, res, 10) catch null; + } + + fn accept_string_type(self: *Tokenizer) ?[]u8 { + const prev_offset = self.offset; + if (!self.accept_string("\"")) { + self.offset = prev_offset; + return null; + } + + const res = self.consume_until_condition(struct { + fn condition(c: u8) bool { + return c == '"'; + } + }.condition); + + if (!self.accept_string("\"")) { + self.offset = prev_offset; + return null; + } + + return res; + } + fn create_token(self: *Tokenizer, token_type: TokenType) Token { return Token{ .location = self.compute_location(), |