about summary refs log tree commit diff
path: root/src/bootstrap/parser.pry
diff options
context:
space:
mode:
Diffstat (limited to 'src/bootstrap/parser.pry')
-rw-r--r--src/bootstrap/parser.pry530
1 files changed, 530 insertions, 0 deletions
diff --git a/src/bootstrap/parser.pry b/src/bootstrap/parser.pry
new file mode 100644
index 0000000..daac296
--- /dev/null
+++ b/src/bootstrap/parser.pry
@@ -0,0 +1,530 @@
+let Node = struct {
+	type: i64,
+	data: *void,
+};
+
+let NODE_PROGRAM = 1;
+let NODE_STATEMENT = 2;
+let NODE_ASSIGNMENT_STATEMENT = 3;
+let NODE_IMPORT_DECLARATION = 4;
+let NODE_FUNCTION_CALL_STATEMENT = 5;
+let NODE_IF_STATEMENT = 6;
+let NODE_WHILE_STATEMENT = 7;
+let NODE_EQUALITY_EXPRESSION = 8;
+let NODE_ADDITIVE_EXPRESSION = 9;
+let NODE_MULTIPLICATIVE_EXPRESSION = 10;
+let NODE_UNARY_EXPRESSION = 11;
+let NODE_POSTFIX_EXPRESSION = 12;
+let NODE_PRIMARY_EXPRESSION_NUMBER = 13;
+let NODE_PRIMARY_EXPRESSION_BOOLEAN = 14;
+let NODE_PRIMARY_EXPRESSION_NULL = 15;
+let NODE_PRIMARY_EXPRESSION_CHAR = 16;
+let NODE_PRIMARY_EXPRESSION_STRING = 17;
+let NODE_PRIMARY_EXPRESSION_IDENTIFIER = 18;
+let NODE_FUNCTION_DEFINITION = 19;
+let NODE_STRUCT_INSTANCIATION = 20;
+let NODE_FIELD_ACCESS = 21;
+let NODE_TYPE_SIMPLE_TYPE = 22;
+let NODE_TYPE_FUNCTION_TYPE = 23;
+let NODE_TYPE_POINTER_TYPE = 24;
+let NODE_TYPE_STRUCT_TYPE = 25;
+let NODE_RETURN_STATEMENT = 26;
+let NODE_CAST_STATEMENT = 27;
+let NODE_SIZEOF_STATEMENT = 28;
+let NODE_BREAK_STATEMENT = 29;
+let NODE_CONTINUE_STATEMENT = 30;
+
+let EQUALITY_EXPRESSION_TYPE_EQ = 0;
+let EQUALITY_EXPRESSION_TYPE_NE = 1;
+let EQUALITY_EXPRESSION_TYPE_GE = 2;
+let EQUALITY_EXPRESSION_TYPE_LE = 3;
+let EQUALITY_EXPRESSION_TYPE_LT = 4;
+let EQUALITY_EXPRESSION_TYPE_GT = 5;
+
+let MULTIPLICATIVE_EXPRESSION_TYPE_MUL = 0;
+let MULTIPLICATIVE_EXPRESSION_TYPE_DIV = 1;
+let MULTIPLICATIVE_EXPRESSION_TYPE_MOD = 2;
+
+let UNARY_EXPRESSION_TYPE_NOT = 0;
+let UNARY_EXPRESSION_TYPE_MINUS = 1;
+let UNARY_EXPRESSION_TYPE_STAR = 2;
+
+let NODE_PROGRAM_DATA = struct {
+    statements: **Node,
+    statements_len: i64,
+};
+
+let NODE_STATEMENT_DATA = struct {
+    statement: *Node,
+};
+
+let NODE_ASSIGNMENT_STATEMENT_DATA = struct {
+    is_declaration: bool,
+    is_dereference: bool,
+    lhs: *Node,
+    rhs: *Node,
+};
+
+let NODE_IMPORT_DECLARATION_DATA = struct {
+    filename: *i8,
+    program: *Node,
+};
+
+let NODE_FUNCTION_CALL_STATEMENT_DATA = struct {
+    expression: *Node,
+    arguments: *Node,
+    arguments_len: i64,
+};
+
+let NODE_IF_STATEMENT_DATA = struct {
+    condition: *Node,
+    statements: **Node,
+    statements_len: i64,
+};
+
+let NODE_WHILE_STATEMENT_DATA = struct {
+    condition: *Node,
+    statements: **Node,
+    statements_len: i64,
+};
+
+let NODE_EQUALITY_EXPRESSION_DATA = struct {
+    lhs: *Node,
+    rhs: *Node,
+    typ: i64,
+};
+
+let NODE_ADDITIVE_EXPRESSION_DATA = struct {
+    addition: bool,
+    lhs: *Node,
+    rhs: *Node,
+};
+
+let NODE_MULTIPLICATIVE_EXPRESSION_DATA = struct {
+    lhs: *Node,
+    rhs: *Node,
+    typ: i64,
+};
+
+let NODE_UNARY_EXPRESSION_DATA = struct {
+    typ: i64,
+    expression: *Node,
+};
+
+let NODE_POSTFIX_EXPRESSION_DATA = struct {
+    lhs: *Node,
+    rhs: *Node,
+};
+
+let NODE_PRIMARY_EXPRESSION_NUMBER_DATA = struct {
+    value: i64,
+};
+
+let NODE_PRIMARY_EXPRESSION_BOOLEAN_DATA = struct {
+    value: bool,
+};
+
+let NODE_PRIMARY_EXPRESSION_CHAR_DATA = struct {
+    value: i8,
+};
+
+let NODE_PRIMARY_EXPRESSION_STRING_DATA = struct {
+    value: *i8,
+};
+
+let NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA = struct {
+    name: *i8,
+    type: *Node,
+};
+
+let NODE_FUNCTION_DEFINITION_DATA = struct {
+    statements: **Node,
+    statements_len: i64,
+    parameters: **Node,
+    parameters_len: i64,
+    retur_type: *Node,
+};
+
+let NODE_STRUCT_INSTANCIATION_DATA = struct {
+    typ: *i8,
+};
+
+let NODE_FIELD_ACCESS_DATA = struct {
+    expression: *Node,
+    name: *i8,
+};
+
+let NODE_TYPE_SIMPLE_TYPE_DATA = struct {
+    name: *i8,
+    underlying_type: *Node,
+};
+
+let NODE_TYPE_FUNCTION_TYPE_DATA = struct {
+    parameters: *Node,
+    parameters_len: i64,
+    retur_type: *Node,
+};
+
+let NODE_TYPE_POINTER_TYPE_DATA = struct {
+    type: *Node,
+};
+
+let NODE_TYPE_STRUCT_TYPE_DATA = struct {
+    fields: *Node,
+    fields_len: i64,
+};
+
+let NODE_RETURN_STATEMENT_DATA = struct {
+    expression: *Node,
+};
+
+let NODE_CAST_STATEMENT_DATA = struct {
+    typ: *Node,
+    expression: *Node,
+};
+
+let NODE_SIZEOF_STATEMENT_DATA = struct {
+    typ: *Node,
+};
+
+
+let parser = struct {
+	tokens: *token,
+	tokens_len: i64,
+
+	offset: i64,
+
+	arena: *arena,
+};
+
+extern parser_parse_statement = (*parser) => *Node;
+extern parser_parse_expression = (*parser) => *Node;
+
+let parser_init = (ts: *token, ts_len: i64, ar: *arena) => *parser {
+	let p = cast(*parser, arena_alloc(ar, sizeof(parser)));
+
+	(*p).tokens = ts;
+	(*p).tokens_len = ts_len;
+	(*p).offset = 0;
+	(*p).arena = ar;
+
+	return p;
+};
+
+let create_node = (p: *parser, n: Node) => *Node {
+	let res = cast(*Node, arena_alloc((*p).arena, sizeof(Node)));
+	*res = n;
+	return res;
+};
+
+let parser_peek_token = (p: *parser) => *token {
+    if (*p).offset >= (*p).tokens_len {
+	return cast(*token, null);
+     };
+
+    return ((*p).tokens + cast(*token, (*p).offset));
+};
+
+ let parser_consume_token = (p: *parser) => *token {
+    if (*p).offset >= (*p).tokens_len {
+	return cast(*token, null);
+     };
+	
+    let t = ((*p).tokens + cast(*token, (*p).offset));
+    (*p).offset = (*p).offset + 1;
+    return t;
+};
+
+let parser_accept_token = (p: *parser, t: i64) => *token {
+    let curr_token = parser_peek_token(p);
+    if curr_token == cast(*token, null) {
+	return cast(*token, null);
+    };
+
+    if (*curr_token).type == t {
+	return parser_consume_token(p);
+    };
+    return cast(*token, null);
+};
+
+let parser_accept_parse = (p: *parser, parsing_func: (*parser) => *Node) => *Node {
+	let prev_offset = (*p).offset;
+	let node = parsing_func(p);
+	if node == cast(*Node, null) {
+		(*p).offset = prev_offset;
+	};
+	return node;
+};
+
+/* ReturnStatement ::= RETURN (Expression)? */
+let parser_parse_return_statement = (p: *parser) => *Node {
+	if parser_accept_token(p, TOKEN_RETURN) == cast(*token, null) {
+		return cast(*Node, null);
+	};
+
+	let maybe_expr = parser_accept_parse(p, parser_parse_expression);
+	
+	let d = cast(*NODE_RETURN_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_RETURN_STATEMENT_DATA )));
+	(*d).expression = maybe_expr;
+
+	let r = Node{};
+	r.type = NODE_RETURN_STATEMENT;
+	r.data = cast(*void, d);
+
+	return create_node(p, r);
+};
+
+/* Type ::= IDENTIFIER | FunctionType */
+let parser_parse_type = (p: *parser) => *Node {
+	/* TODO: Function type */
+	let to = parser_consume_token(p);
+	assert(to != cast(*token, null));
+	assert((*to).type == TOKEN_IDENTIFIER);
+
+	let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*p).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA)));
+	(*d).name = cast(*i8, (*to).data);
+	(*d).underlying_type = cast(*Node, null);
+
+	let r = Node{};
+	r.type = NODE_TYPE_SIMPLE_TYPE;
+	r.data = cast(*void, d);
+
+	return create_node(p, r);
+};
+
+/* FunctionParameters ::= IDENTIFIER ":" Type ("," IDENTIFIER ":" Type)* */
+let parser_parse_function_parameters = (p: *parser) => *slice {
+	/* TODO: Params */
+
+	let node_list = cast(**Node, arena_alloc((*p).arena, sizeof(**Node) * 20));
+	let i = 0;
+	while true {
+		if i != 0 {
+			parser_accept_token(p, TOKEN_COMMA);
+		};
+		let ident = parser_accept_token(p, TOKEN_IDENTIFIER);
+		if ident == cast(*token, null) {
+			break;
+		};
+		/* TODO: Rest */
+	};
+
+	let s = cast(*slice, arena_alloc((*p).arena, sizeof(slice)));
+	(*s).data = cast(*void, node_list);
+	(*s).data_len = 0;
+	return s;
+};
+
+/* FunctionDefinition ::= LPAREN FunctionParameters? RPAREN ARROW IDENTIFIER LBRACE Statement* ReturnStatement SEMICOLON RBRACE */
+let parser_parse_function_definition = (p: *parser) => *Node {
+	if parser_accept_token(p, TOKEN_LPAREN) == cast(*token, null) {
+		return cast(*Node, null);
+	};
+	let params = parser_parse_function_parameters(p);
+	if params == cast(*slice, null) {
+		return cast(*Node, null);
+	};
+	if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) {
+		return cast(*Node, null);
+	};
+	if parser_accept_token(p, TOKEN_ARROW) == cast(*token, null) {
+		return cast(*Node, null);
+	};
+	let retur_type = parser_parse_type(p);
+	if retur_type == cast(*Node, null) {
+		return cast(*Node, null);
+	};
+	if parser_accept_token(p, TOKEN_LBRACE) == cast(*token, null) {
+		return cast(*Node, null);
+	};
+	
+	/* TODO: Body */
+	let statements = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 100));
+	let i = 0;
+	while true {
+		let n = parser_accept_parse(p, parser_parse_statement);
+		if n == cast(*Node, null) {
+			break;
+		};
+		(*(statements + cast(**Node, i))) = n;
+		i = i + 1;
+	};
+
+
+	if parser_accept_token(p, TOKEN_RBRACE) == cast(*token, null) {
+		return cast(*Node, null);
+	};
+
+
+	let d = cast(*NODE_FUNCTION_DEFINITION_DATA, arena_alloc((*p).arena, sizeof(NODE_FUNCTION_DEFINITION_DATA)));
+	(*d).statements = statements;
+	(*d).statements_len = i;
+	(*d).parameters = cast(**Node, params.data);
+	(*d).parameters_len = params.data_len;
+	(*d).retur_type = cast(*Node, null);
+
+	let n = Node{};
+	n.type = NODE_FUNCTION_DEFINITION;
+	n.data = cast(*void, d);
+
+	return create_node(p, n); 
+};
+
+/* PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | FunctionDefinition | StructDefinition | StructInstantiation | FieldAccess | LPAREN Expression RPAREN */
+let parser_parse_primary_expression = (p: *parser) => *Node {
+	let stmt = parser_accept_parse(p, parser_parse_function_definition);
+	if stmt != cast(*Node, null) {
+		return stmt;
+	};
+
+	let tok = parser_consume_token(p);
+	if tok == cast(*token, null) {
+	printf("NO TOK\n");
+	    return cast(*Node, null); 
+	};
+
+	if (*tok).type == TOKEN_IDENTIFIER {
+	    let d = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA)));
+	    (*d).name = cast(*i8, (*tok).data);
+	    (*d).type = cast(*Node, null); /* TODO */
+	    let n = Node{};
+	    n.type = NODE_PRIMARY_EXPRESSION_IDENTIFIER;
+	    n.data = cast(*void, d);
+	    return create_node(p, n);
+	};
+
+	if (*tok).type == TOKEN_NUMBER {
+	    let d = cast(*NODE_PRIMARY_EXPRESSION_NUMBER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_NUMBER_DATA)));
+	    (*d).value = *(cast(*i64, (*tok).data));
+	    let n = Node{};
+	    n.type = NODE_PRIMARY_EXPRESSION_NUMBER;
+	    n.data = cast(*void, d);
+	    return create_node(p, n);
+	};
+
+	printf("DIFF TYPE: %d\n", (*tok).type);
+
+	return cast(*Node, null);
+};
+
+/* EqualityExpression ::= AdditiveExpression ("==" | "!=" | "<=" | ">=" | "<" | ">") AdditiveExpression */
+let parser_parse_equality_expression = (p: *parser) => *Node {
+	/* TODO */
+	return cast(*Node, null);
+};
+
+/* AdditiveExpression ::= MultiplicativeExpression (("+" | "-") MultiplicativeExpression)* */
+let parser_parse_additive_expression = (p: *parser) => *Node {
+	/* TODO */
+	return parser_parse_primary_expression(p);
+};
+
+/* Expression ::= EqualityExpression | AdditiveExpression */
+let parser_parse_expression = (p: *parser) => *Node {
+	let ex = parser_accept_parse(p, parser_parse_equality_expression);
+	if ex != cast(*Node, null) {
+		return ex;
+	};
+	let ax = parser_accept_parse(p, parser_parse_additive_expression);
+	if ax != cast(*Node, null) {
+		return ax;
+	};
+
+	return cast(*Node, null);
+};
+
+/* AssignmentStatement ::= ("let")? ("*")? Expression EQUALS Expression */
+let parse_assignment_statement = (p: *parser) => *Node {
+	let is_declaration = false;
+	if parser_accept_token(p, TOKEN_LET) != cast(*token, null) {
+	    printf("IS DECLARATION\n");
+	    is_declaration = true;
+	};
+
+	/* TODO: is_dereference */
+
+	let lhs = parser_parse_expression(p); /* TODO */
+	if lhs == cast(*Node, null) {
+	printf("ANOTHER BNLL\n");
+	    return cast(*Node, null);
+	};	
+	
+	if parser_accept_token(p, TOKEN_EQUALS) == cast(*token, null) {
+	    return cast(*Node, null);
+	};
+	
+	let rhs = parser_parse_expression(p); /* TODO */
+	if rhs == cast(*Node, null) {
+		printf("NUL EXP\n");
+	    return cast(*Node, null);
+	};
+
+	let d = cast(*NODE_ASSIGNMENT_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_ASSIGNMENT_STATEMENT_DATA )));
+	(*d).is_declaration = is_declaration;
+	(*d).is_dereference = false;
+	(*d).lhs = lhs;
+	(*d).rhs = rhs;
+	let n = Node{};
+	n.type = NODE_ASSIGNMENT_STATEMENT;
+	n.data = cast(*void, d);
+	printf("CONTINUE\n");
+	return create_node(p, n);
+};
+
+/* Statement    ::= (AssignmentStatement | ImportDeclaration | ExternDeclaration | CastStatement | SizeOfStatement | FunctionCallStatement | IfStatement | WhileStatement | ReturnStatement | "break" | "continue") SEMICOLON */
+let parser_parse_statement = (p: *parser) => *Node {
+	let assignment = parser_accept_parse(p, parse_assignment_statement);
+	if assignment != cast(*Node, null) {
+		if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) {
+		    return cast(*Node, null);
+		};
+	    return assignment;
+	};
+
+	let retu = parser_accept_parse(p, parser_parse_return_statement);
+	if retu != cast(*Node, null) {
+		if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) {
+		    return cast(*Node, null);
+		};
+	    return retu;
+	};
+
+
+	printf("None\n");
+	
+	return cast(*Node, null);
+};
+
+/* Program ::= Statement+ */
+let parse_program = (p: *parser) => *Node {
+	let nodes = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 1000));
+
+	let i = 0;
+	while (*p).offset < (*p).tokens_len {
+		let s = parser_parse_statement(p);
+		assert(s != cast(*Node, null));
+		(*(nodes + cast(**Node, i))) = s;
+		i = i + 1;
+	};
+
+	let d = cast(*NODE_PROGRAM_DATA, arena_alloc((*p).arena, sizeof(NODE_PROGRAM_DATA)));
+	(*d).statements = nodes;
+	(*d).statements_len = i;
+	let n = Node{};
+	n.type = NODE_PROGRAM;
+	n.data = cast(*void, d);
+	return create_node(p, n);
+};
+
+let parse = (p: *parser) => *Node {
+	return parse_program(p);
+};
+
+/*
+
+For example -2:
+
+* parsing assignment statement
+* parsing ident and num literals
+
+*/