about summary refs log tree commit diff
diff options
context:
space:
mode:
authorBaitinq <[email protected]>2025-03-23 23:57:05 +0100
committerBaitinq <[email protected]>2025-03-24 00:17:07 +0100
commit31fa32743a5ed25724868cfa476e3af028adb118 (patch)
tree97afd19643223af3784baef5c2b57e48467dcce9
parentParser: Fix ambiguity with symbol declaration (diff)
downloadpry-lang-31fa32743a5ed25724868cfa476e3af028adb118.tar.gz
pry-lang-31fa32743a5ed25724868cfa476e3af028adb118.tar.bz2
pry-lang-31fa32743a5ed25724868cfa476e3af028adb118.zip
Feature: Add support for strings
Diffstat (limited to '')
-rw-r--r--examples/0.src2
-rw-r--r--examples/1.5.src4
-rw-r--r--examples/1.src4
-rw-r--r--examples/10.src2
-rw-r--r--examples/12.src4
-rw-r--r--examples/13.src4
-rw-r--r--examples/2.src2
-rw-r--r--examples/3.src2
-rw-r--r--examples/4.src4
-rw-r--r--examples/5.src6
-rw-r--r--examples/6.5.src2
-rw-r--r--examples/6.src6
-rw-r--r--examples/7.src2
-rw-r--r--examples/8.src2
-rw-r--r--examples/9.src2
-rw-r--r--grammar.ebnf2
-rw-r--r--src/codegen.zig111
-rw-r--r--src/evaluator.zig4
-rw-r--r--src/parser.zig10
-rw-r--r--src/tokenizer.zig47
20 files changed, 107 insertions, 115 deletions
diff --git a/examples/0.src b/examples/0.src
index a6e4b38..64e0b97 100644
--- a/examples/0.src
+++ b/examples/0.src
@@ -1,7 +1,7 @@
 /* HELLO! Welcome to the unnamed language */
 
 let main = (argc: i64) => i64 {
-	print(argc);
+	printf("%d", argc);
 	
 	return 2;
 };
diff --git a/examples/1.5.src b/examples/1.5.src
index 1f99ee3..ea4c4e0 100644
--- a/examples/1.5.src
+++ b/examples/1.5.src
@@ -1,12 +1,12 @@
 let x = () => i64 {
-	print(22);
+	printf("%d", 22);
 	return 11;
 };
 
 let main = () => i64 {
 	let i = 4;
 
-	print(i);
+	print("%d", i);
 
 	return x();
 };
diff --git a/examples/1.src b/examples/1.src
index cc9cfe7..d3c8482 100644
--- a/examples/1.src
+++ b/examples/1.src
@@ -1,7 +1,7 @@
 let main = () => i64 {
-	let i = 4;
+	let s = "hello";
 
-	print(i);
+	printf("%s", s);
 
 	return 0;
 };
diff --git a/examples/10.src b/examples/10.src
index 59f91e1..c0bb704 100644
--- a/examples/10.src
+++ b/examples/10.src
@@ -2,7 +2,7 @@ let main = () => i64 {
 	let counter = 0;
 
 	while counter < 10 {
-		print(counter);
+		printf("%d", counter);
 		counter = counter + 1;
 	};
 
diff --git a/examples/12.src b/examples/12.src
index a680efa..941682a 100644
--- a/examples/12.src
+++ b/examples/12.src
@@ -29,10 +29,10 @@ let main = () => i64 {
     };
 
     let fact_val = factorial(6);
-    print(fact_val);
+    printf("%d", fact_val);
 
     let even_sum = sum_if(is_even, 20);
-    print(even_sum);
+    printf("%d", even_sum);
 
     return 0;
 };
diff --git a/examples/13.src b/examples/13.src
index eeb5b32..c1f2fcb 100644
--- a/examples/13.src
+++ b/examples/13.src
@@ -22,10 +22,10 @@ let main = () => i64 {
     };
 
     let fib_val = fibonacci_iter(10);
-    print(fib_val);
+    printf("%d", fib_val);
 
     let gcd_val = gcd(48, 18);
-    print(gcd_val);
+    printf("%d", gcd_val);
 
     return 0;
 };
diff --git a/examples/2.src b/examples/2.src
index 646f1fa..766086d 100644
--- a/examples/2.src
+++ b/examples/2.src
@@ -3,7 +3,7 @@ let main = () => i64 {
 
 	let uwu = test;
 
-	print(uwu);
+	printf("%d", uwu);
 
 	return 0;
 };
diff --git a/examples/3.src b/examples/3.src
index 92fb50a..8ff1920 100644
--- a/examples/3.src
+++ b/examples/3.src
@@ -3,7 +3,7 @@ let main = () => i64 {
 
 	seventeen = seventeen + 1;
 
-	print(seventeen);
+	printf("%d", seventeen);
 
 	return seventeen;
 };
diff --git a/examples/4.src b/examples/4.src
index b6e8ab3..d1ae454 100644
--- a/examples/4.src
+++ b/examples/4.src
@@ -1,12 +1,12 @@
 let main = () => i64 {
 	let print_one = () => i64 {
-		print(1);
+		printf("%d", 1);
 		return 4;
 	};
 
 	let y = print_one() + 2;
 
-	print(y);
+	printf("%d", y);
 
 	return y;
 };
diff --git a/examples/5.src b/examples/5.src
index 7f4f38f..7c767ea 100644
--- a/examples/5.src
+++ b/examples/5.src
@@ -4,14 +4,14 @@ let x = 18;
 
 let foo = () => i64 {
 	let x = 1;
-	print(x);
+	printf("%d", x);
 	return x;
 };
 
 let main = () => i64 {
-	print(x);
+	printf("%d", x);
 	let x = 2;
 	let y = foo();
-	print(x);
+	printf("%d", x);
 	return x + y;
 };
diff --git a/examples/6.5.src b/examples/6.5.src
index 53abd86..e82393a 100644
--- a/examples/6.5.src
+++ b/examples/6.5.src
@@ -1,7 +1,7 @@
 let main = () => i64 {
 	let x = !(1 == 1);
 	if !x {
-		printb(x);
+		printf("%d", x);
 	};
 	return 0;
 };
diff --git a/examples/6.src b/examples/6.src
index f97587c..2492e76 100644
--- a/examples/6.src
+++ b/examples/6.src
@@ -1,6 +1,6 @@
 let print_input = (input_a: i64, input_b: i64) => i64 {
-	print(input_a);
-	print(input_b);
+	printf("%d", input_a);
+	printf("%d", input_b);
 	return input_a + input_b;
 };
 
@@ -8,5 +8,3 @@ let main = () => i64 {
 	let i = print_input(1,4);
 	return print_input(7, 2) + i;
 };
-
-/* TODO */
diff --git a/examples/7.src b/examples/7.src
index b10a350..fbb8d06 100644
--- a/examples/7.src
+++ b/examples/7.src
@@ -8,7 +8,7 @@ let main = () => i64 {
 	let i = 4;
 	
 	if (1 - -1 * 2) == 5 - (-1 + 1 + ten() / 2) + 3 {
-		print(i);
+		printf("%d", i);
 		return i;
 	};
 
diff --git a/examples/8.src b/examples/8.src
index 985ca77..f2d1b97 100644
--- a/examples/8.src
+++ b/examples/8.src
@@ -10,6 +10,6 @@ let main = () => i64 {
 	};
 
 	let result = fib(30);
-	print(result);
+	printf("%d", result);
 	return result;
 };
diff --git a/examples/9.src b/examples/9.src
index c226217..f36e92c 100644
--- a/examples/9.src
+++ b/examples/9.src
@@ -1,5 +1,5 @@
 let print_boolean = (b: bool) => i64 {
-	printb(b);
+	printf("%d", b);
 	return 0;
 };
 
diff --git a/grammar.ebnf b/grammar.ebnf
index cbc3f0a..83da90a 100644
--- a/grammar.ebnf
+++ b/grammar.ebnf
@@ -26,7 +26,7 @@ MultiplicativeExpression ::= UnaryExpression (("*" | "/" | "%") UnaryExpression)
 
 UnaryExpression ::= ("!" | "-") UnaryExpression | PrimaryExpression
 
-PrimaryExpression ::= NUMBER | BOOLEAN | IDENTIFIER | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN
+PrimaryExpression ::= NUMBER | BOOLEAN | STRING | IDENTIFIER | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN
 
 FunctionDefinition ::= LPAREN FunctionParameters? RPAREN ARROW IDENTIFIER LBRACE Statement* ReturnStatement SEMICOLON RBRACE
 
diff --git a/src/codegen.zig b/src/codegen.zig
index 4e2d897..b873842 100644
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -59,9 +59,6 @@ pub const CodeGen = struct {
             } }),
         }));
 
-        try self.create_print_function();
-        try self.create_printb_function();
-
         return self;
     }
 
@@ -374,7 +371,7 @@ pub const CodeGen = struct {
             },
             .PRIMARY_EXPRESSION => |primary_expression| switch (primary_expression) {
                 .NUMBER => |n| {
-                    return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt64Type(), @intCast(n.value), 0), llvm.LLVMInt64Type(), name);
+                    return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt64Type(), @intCast(n.value), 0), llvm.LLVMInt64Type(), name, expression);
                 },
                 .BOOLEAN => |b| {
                     const int_value: i64 = switch (b.value) {
@@ -382,7 +379,18 @@ pub const CodeGen = struct {
                         true => 1,
                     };
 
-                    return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt1Type(), @intCast(int_value), 0), llvm.LLVMInt1Type(), name);
+                    return try self.generate_literal(llvm.LLVMConstInt(llvm.LLVMInt1Type(), @intCast(int_value), 0), llvm.LLVMInt1Type(), name, expression);
+                },
+                .STRING => |s| {
+                    const x = llvm.LLVMBuildGlobalStringPtr(self.builder, try std.fmt.allocPrintZ(self.arena, "{s}", .{s.value}), "");
+                    return self.create_variable(
+                        .{
+                            .value = x,
+                            .type = llvm.LLVMPointerType(llvm.LLVMInt8Type(), 0),
+                            .stack_level = null,
+                            .node = expression,
+                        },
+                    );
                 },
                 .IDENTIFIER => |i| {
                     const variable = self.environment.get_variable(i.name).?;
@@ -390,9 +398,16 @@ pub const CodeGen = struct {
                     if (llvm.LLVMGetTypeKind(param_type.?) == llvm.LLVMFunctionTypeKind) {
                         param_type = llvm.LLVMPointerType(param_type.?, 0);
                     }
-                    const loaded = llvm.LLVMBuildLoad2(self.builder, param_type, variable.value, "");
 
-                    return self.generate_literal(loaded, variable.type, name);
+                    var loaded: llvm.LLVMValueRef = undefined;
+
+                    if (variable.node.?.* == .PRIMARY_EXPRESSION and variable.node.?.PRIMARY_EXPRESSION == .STRING) {
+                        loaded = variable.value;
+                    } else {
+                        loaded = llvm.LLVMBuildLoad2(self.builder, param_type, variable.value, "");
+                    }
+
+                    return self.generate_literal(loaded, variable.type, name, expression);
                 },
             },
             .ADDITIVE_EXPRESSION => |exp| {
@@ -406,7 +421,7 @@ pub const CodeGen = struct {
                     result = llvm.LLVMBuildSub(self.builder, lhs_value.value, rhs_value.value, "") orelse return CodeGenError.CompilationError;
                 }
 
-                return self.generate_literal(result, llvm.LLVMInt64Type(), name);
+                return self.generate_literal(result, llvm.LLVMInt64Type(), name, expression);
             },
             .MULTIPLICATIVE_EXPRESSION => |exp| {
                 const lhs_value = try self.generate_expression_value(exp.lhs, null);
@@ -425,7 +440,7 @@ pub const CodeGen = struct {
                     },
                 }
 
-                return self.generate_literal(result, llvm.LLVMInt64Type(), name);
+                return self.generate_literal(result, llvm.LLVMInt64Type(), name, expression);
             },
             .UNARY_EXPRESSION => |exp| {
                 const k = try self.generate_expression_value(exp.expression, null);
@@ -444,7 +459,7 @@ pub const CodeGen = struct {
                     },
                 }
 
-                return self.generate_literal(r, t, name);
+                return self.generate_literal(r, t, name, expression);
             },
             .EQUALITY_EXPRESSION => |exp| {
                 const lhs_value = try self.generate_expression_value(exp.lhs, null);
@@ -457,7 +472,7 @@ pub const CodeGen = struct {
                 };
                 const cmp = llvm.LLVMBuildICmp(self.builder, op, lhs_value.value, rhs_value.value, "");
 
-                return self.generate_literal(cmp, llvm.LLVMInt1Type(), name);
+                return self.generate_literal(cmp, llvm.LLVMInt1Type(), name, expression);
             },
             .TYPE => |typ| {
                 std.debug.assert(typ == .FUNCTION_TYPE);
@@ -485,14 +500,14 @@ pub const CodeGen = struct {
         };
     }
 
-    fn generate_literal(self: *CodeGen, literal_val: llvm.LLVMValueRef, literal_type: llvm.LLVMTypeRef, name: ?[]const u8) !*Variable {
+    fn generate_literal(self: *CodeGen, literal_val: llvm.LLVMValueRef, literal_type: llvm.LLVMTypeRef, name: ?[]const u8, node: *parser.Node) !*Variable {
         if (name != null) {
             if (self.environment.scope_stack.items.len == 1) {
                 const ptr = try self.create_variable(.{
                     .value = llvm.LLVMAddGlobal(self.llvm_module, literal_type, try std.fmt.allocPrintZ(self.arena, "{s}", .{name.?})),
                     .type = literal_type,
                     .stack_level = null,
-                    .node = null, //TODO
+                    .node = node,
                 });
                 llvm.LLVMSetInitializer(ptr.value, literal_val);
                 return ptr;
@@ -500,6 +515,7 @@ pub const CodeGen = struct {
             const ptr = self.environment.get_variable(name.?) orelse unreachable;
             _ = llvm.LLVMBuildStore(self.builder, literal_val, ptr.value) orelse return CodeGenError.CompilationError;
             ptr.type = literal_type;
+            ptr.node = node;
             return ptr;
         }
 
@@ -535,75 +551,6 @@ pub const CodeGen = struct {
         }
     }
 
-    fn create_print_function(self: *CodeGen) !void {
-        const print_function_type = llvm.LLVMFunctionType(llvm.LLVMVoidType(), @constCast(&[_]llvm.LLVMTypeRef{llvm.LLVMInt64Type()}), 1, 0);
-        const print_function = llvm.LLVMAddFunction(self.llvm_module, "print", print_function_type);
-        const print_function_entry = llvm.LLVMAppendBasicBlock(print_function, "entrypoint") orelse return CodeGenError.CompilationError;
-        llvm.LLVMPositionBuilderAtEnd(self.builder, print_function_entry);
-
-        const format_str = "%d\n";
-        const format_str_ptr = llvm.LLVMBuildGlobalStringPtr(self.builder, format_str, "format_str_ptr");
-
-        const arguments = @constCast(&[_]llvm.LLVMValueRef{
-            format_str_ptr,
-            llvm.LLVMGetParam(print_function, 0),
-        });
-
-        const printf_function_var = self.environment.get_variable("printf") orelse return CodeGenError.CompilationError;
-
-        _ = llvm.LLVMBuildCall2(self.builder, printf_function_var.type, printf_function_var.value, arguments, 2, "") orelse return CodeGenError.CompilationError;
-        _ = llvm.LLVMBuildRetVoid(self.builder);
-
-        try self.environment.add_variable("print", try self.create_variable(.{
-            .value = print_function,
-            .type = print_function_type,
-            .stack_level = null,
-            .node = try self.create_node(.{ .FUNCTION_DEFINITION = .{
-                .statements = &[_]*parser.Node{},
-                .parameters = &[_]*parser.Node{},
-                .return_type = try self.create_node(.{ .TYPE = .{ .SIMPLE_TYPE = .{
-                    .name = "i64",
-                } } }),
-            } }),
-        }));
-    }
-
-    fn create_printb_function(self: *CodeGen) !void {
-        const print_function_type = llvm.LLVMFunctionType(llvm.LLVMVoidType(), @constCast(&[_]llvm.LLVMTypeRef{llvm.LLVMInt1Type()}), 1, 0);
-        const print_function = llvm.LLVMAddFunction(self.llvm_module, "printb", print_function_type);
-        const print_function_entry = llvm.LLVMAppendBasicBlock(print_function, "entrypoint") orelse return CodeGenError.CompilationError;
-        llvm.LLVMPositionBuilderAtEnd(self.builder, print_function_entry);
-
-        const format_str = "%d\n";
-        const format_str_ptr = llvm.LLVMBuildGlobalStringPtr(self.builder, format_str, "format_str_ptr");
-
-        const p = llvm.LLVMGetParam(print_function, 0);
-        const x = llvm.LLVMBuildZExt(self.builder, p, llvm.LLVMInt64Type(), "");
-
-        const arguments = @constCast(&[_]llvm.LLVMValueRef{
-            format_str_ptr,
-            x,
-        });
-
-        const printf_function_var = self.environment.get_variable("printf") orelse return CodeGenError.CompilationError;
-
-        _ = llvm.LLVMBuildCall2(self.builder, printf_function_var.type, printf_function_var.value, arguments, 2, "") orelse return CodeGenError.CompilationError;
-        _ = llvm.LLVMBuildRetVoid(self.builder);
-
-        try self.environment.add_variable("printb", try self.create_variable(.{
-            .value = print_function,
-            .type = print_function_type,
-            .stack_level = null,
-            .node = try self.create_node(.{ .FUNCTION_DEFINITION = .{
-                .statements = &[_]*parser.Node{},
-                .parameters = &[_]*parser.Node{},
-                .return_type = try self.create_node(.{ .TYPE = .{ .SIMPLE_TYPE = .{
-                    .name = "i64",
-                } } }),
-            } }),
-        }));
-    }
-
     fn create_variable(self: *CodeGen, variable_value: Variable) !*Variable {
         const variable = try self.arena.create(Variable);
         variable.* = variable_value;
diff --git a/src/evaluator.zig b/src/evaluator.zig
index fef08a2..f3c0a6c 100644
--- a/src/evaluator.zig
+++ b/src/evaluator.zig
@@ -6,11 +6,12 @@ const EvaluatorError = error{
     OutOfMemory,
 };
 
-const VariableType = enum { NUMBER, BOOLEAN, FUNCTION_DEFINITION };
+const VariableType = enum { NUMBER, BOOLEAN, STRING, FUNCTION_DEFINITION };
 
 const Variable = union(VariableType) {
     NUMBER: i64,
     BOOLEAN: bool,
+    STRING: []const u8,
     FUNCTION_DEFINITION: *parser.Node,
 };
 
@@ -206,6 +207,7 @@ pub const Evaluator = struct {
                 switch (x) {
                     .NUMBER => |number| return self.create_variable(.{ .NUMBER = number.value }),
                     .BOOLEAN => |b| return self.create_variable(.{ .BOOLEAN = b.value }),
+                    .STRING => |s| return self.create_variable(.{ .STRING = s.value }),
                     .IDENTIFIER => |identifier| {
                         const val = self.environment.get_variable(identifier.name) orelse {
                             std.debug.print("Identifier {any} not found\n", .{identifier.name});
diff --git a/src/parser.zig b/src/parser.zig
index 64e670e..9d40c65 100644
--- a/src/parser.zig
+++ b/src/parser.zig
@@ -52,6 +52,9 @@ pub const Node = union(enum) {
         BOOLEAN: struct {
             value: bool,
         },
+        STRING: struct {
+            value: []const u8,
+        },
         IDENTIFIER: struct {
             name: []const u8,
             type: ?*Node,
@@ -403,7 +406,7 @@ pub const Parser = struct {
         } });
     }
 
-    // PrimaryExpression ::= NUMBER | BOOLEAN | IDENTIFIER | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN
+    // PrimaryExpression ::= NUMBER | BOOLEAN | STRING | IDENTIFIER | FunctionCallStatement | FunctionDefinition | LPAREN Expression RPAREN
     fn parse_primary_expression(self: *Parser) ParserError!*Node {
         errdefer if (!self.try_context) std.debug.print("Error parsing primary expression {any}\n", .{self.peek_token()});
 
@@ -432,6 +435,11 @@ pub const Parser = struct {
                     .value = boolean_token,
                 } },
             }),
+            .STRING => |string_token| try self.create_node(.{
+                .PRIMARY_EXPRESSION = .{ .STRING = .{
+                    .value = try self.arena.dupe(u8, string_token),
+                } },
+            }),
             .IDENTIFIER => |identifier_token| try self.create_node(.{
                 .PRIMARY_EXPRESSION = .{
                     .IDENTIFIER = .{
diff --git a/src/tokenizer.zig b/src/tokenizer.zig
index 138ad69..b959738 100644
--- a/src/tokenizer.zig
+++ b/src/tokenizer.zig
@@ -19,6 +19,7 @@ pub const TokenType = union(enum) {
     // Literals
     NUMBER: i64,
     BOOLEAN: bool,
+    STRING: []u8,
 
     // Operators
     EQUALS: void,
@@ -93,10 +94,15 @@ pub const Tokenizer = struct {
         if (self.accept_string("<")) return self.create_token(.{ .LESS = void{} });
         if (self.accept_string(">")) return self.create_token(.{ .GREATER = void{} });
 
-        const string = self.consume_string();
-        if (string.len == 0) return TokenizerError.TokenizingError;
+        if (self.accept_int_type()) |i| return self.create_token(.{ .NUMBER = i });
+        if (self.accept_string_type()) |s| return self.create_token(.{ .STRING = s });
 
-        if (std.fmt.parseInt(i32, string, 10) catch null) |i| return self.create_token(.{ .NUMBER = i });
+        const string = self.consume_until_condition(struct {
+            fn condition(c: u8) bool {
+                return !std.ascii.isAlphanumeric(c) and c != '_';
+            }
+        }.condition);
+        if (string.len == 0) return TokenizerError.TokenizingError;
 
         return self.create_token(.{ .IDENTIFIER = string });
     }
@@ -118,7 +124,7 @@ pub const Tokenizer = struct {
         }
     }
 
-    fn consume_string(self: *Tokenizer) []u8 {
+    fn consume_until_condition(self: *Tokenizer, condition: fn (c: u8) bool) []u8 {
         defer self.offset = if (self.offset > 0) self.offset - 1 else self.offset;
         const start = self.offset;
         while (true) {
@@ -127,7 +133,7 @@ pub const Tokenizer = struct {
 
             const c = self.buf[self.offset];
 
-            if (!std.ascii.isAlphanumeric(c) and c != '_') return self.buf[start..self.offset];
+            if (condition(c)) return self.buf[start..self.offset];
         }
     }
 
@@ -140,6 +146,37 @@ pub const Tokenizer = struct {
         return false;
     }
 
+    fn accept_int_type(self: *Tokenizer) ?i64 {
+        const res = self.consume_until_condition(struct {
+            fn condition(c: u8) bool {
+                return !std.ascii.isDigit(c);
+            }
+        }.condition);
+
+        return std.fmt.parseInt(i64, res, 10) catch null;
+    }
+
+    fn accept_string_type(self: *Tokenizer) ?[]u8 {
+        const prev_offset = self.offset;
+        if (!self.accept_string("\"")) {
+            self.offset = prev_offset;
+            return null;
+        }
+
+        const res = self.consume_until_condition(struct {
+            fn condition(c: u8) bool {
+                return c == '"';
+            }
+        }.condition);
+
+        if (!self.accept_string("\"")) {
+            self.offset = prev_offset;
+            return null;
+        }
+
+        return res;
+    }
+
     fn create_token(self: *Tokenizer, token_type: TokenType) Token {
         return Token{
             .location = self.compute_location(),