diff options
Diffstat (limited to 'src/bootstrap/parser.pry')
| -rw-r--r-- | src/bootstrap/parser.pry | 530 |
1 files changed, 530 insertions, 0 deletions
diff --git a/src/bootstrap/parser.pry b/src/bootstrap/parser.pry new file mode 100644 index 0000000..daac296 --- /dev/null +++ b/src/bootstrap/parser.pry @@ -0,0 +1,530 @@ +let Node = struct { + type: i64, + data: *void, +}; + +let NODE_PROGRAM = 1; +let NODE_STATEMENT = 2; +let NODE_ASSIGNMENT_STATEMENT = 3; +let NODE_IMPORT_DECLARATION = 4; +let NODE_FUNCTION_CALL_STATEMENT = 5; +let NODE_IF_STATEMENT = 6; +let NODE_WHILE_STATEMENT = 7; +let NODE_EQUALITY_EXPRESSION = 8; +let NODE_ADDITIVE_EXPRESSION = 9; +let NODE_MULTIPLICATIVE_EXPRESSION = 10; +let NODE_UNARY_EXPRESSION = 11; +let NODE_POSTFIX_EXPRESSION = 12; +let NODE_PRIMARY_EXPRESSION_NUMBER = 13; +let NODE_PRIMARY_EXPRESSION_BOOLEAN = 14; +let NODE_PRIMARY_EXPRESSION_NULL = 15; +let NODE_PRIMARY_EXPRESSION_CHAR = 16; +let NODE_PRIMARY_EXPRESSION_STRING = 17; +let NODE_PRIMARY_EXPRESSION_IDENTIFIER = 18; +let NODE_FUNCTION_DEFINITION = 19; +let NODE_STRUCT_INSTANCIATION = 20; +let NODE_FIELD_ACCESS = 21; +let NODE_TYPE_SIMPLE_TYPE = 22; +let NODE_TYPE_FUNCTION_TYPE = 23; +let NODE_TYPE_POINTER_TYPE = 24; +let NODE_TYPE_STRUCT_TYPE = 25; +let NODE_RETURN_STATEMENT = 26; +let NODE_CAST_STATEMENT = 27; +let NODE_SIZEOF_STATEMENT = 28; +let NODE_BREAK_STATEMENT = 29; +let NODE_CONTINUE_STATEMENT = 30; + +let EQUALITY_EXPRESSION_TYPE_EQ = 0; +let EQUALITY_EXPRESSION_TYPE_NE = 1; +let EQUALITY_EXPRESSION_TYPE_GE = 2; +let EQUALITY_EXPRESSION_TYPE_LE = 3; +let EQUALITY_EXPRESSION_TYPE_LT = 4; +let EQUALITY_EXPRESSION_TYPE_GT = 5; + +let MULTIPLICATIVE_EXPRESSION_TYPE_MUL = 0; +let MULTIPLICATIVE_EXPRESSION_TYPE_DIV = 1; +let MULTIPLICATIVE_EXPRESSION_TYPE_MOD = 2; + +let UNARY_EXPRESSION_TYPE_NOT = 0; +let UNARY_EXPRESSION_TYPE_MINUS = 1; +let UNARY_EXPRESSION_TYPE_STAR = 2; + +let NODE_PROGRAM_DATA = struct { + statements: **Node, + statements_len: i64, +}; + +let NODE_STATEMENT_DATA = struct { + statement: *Node, +}; + +let NODE_ASSIGNMENT_STATEMENT_DATA = struct { + is_declaration: bool, + is_dereference: bool, + lhs: *Node, + rhs: *Node, +}; + +let NODE_IMPORT_DECLARATION_DATA = struct { + filename: *i8, + program: *Node, +}; + +let NODE_FUNCTION_CALL_STATEMENT_DATA = struct { + expression: *Node, + arguments: *Node, + arguments_len: i64, +}; + +let NODE_IF_STATEMENT_DATA = struct { + condition: *Node, + statements: **Node, + statements_len: i64, +}; + +let NODE_WHILE_STATEMENT_DATA = struct { + condition: *Node, + statements: **Node, + statements_len: i64, +}; + +let NODE_EQUALITY_EXPRESSION_DATA = struct { + lhs: *Node, + rhs: *Node, + typ: i64, +}; + +let NODE_ADDITIVE_EXPRESSION_DATA = struct { + addition: bool, + lhs: *Node, + rhs: *Node, +}; + +let NODE_MULTIPLICATIVE_EXPRESSION_DATA = struct { + lhs: *Node, + rhs: *Node, + typ: i64, +}; + +let NODE_UNARY_EXPRESSION_DATA = struct { + typ: i64, + expression: *Node, +}; + +let NODE_POSTFIX_EXPRESSION_DATA = struct { + lhs: *Node, + rhs: *Node, +}; + +let NODE_PRIMARY_EXPRESSION_NUMBER_DATA = struct { + value: i64, +}; + +let NODE_PRIMARY_EXPRESSION_BOOLEAN_DATA = struct { + value: bool, +}; + +let NODE_PRIMARY_EXPRESSION_CHAR_DATA = struct { + value: i8, +}; + +let NODE_PRIMARY_EXPRESSION_STRING_DATA = struct { + value: *i8, +}; + +let NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA = struct { + name: *i8, + type: *Node, +}; + +let NODE_FUNCTION_DEFINITION_DATA = struct { + statements: **Node, + statements_len: i64, + parameters: **Node, + parameters_len: i64, + retur_type: *Node, +}; + +let NODE_STRUCT_INSTANCIATION_DATA = struct { + typ: *i8, +}; + +let NODE_FIELD_ACCESS_DATA = struct { + expression: *Node, + name: *i8, +}; + +let NODE_TYPE_SIMPLE_TYPE_DATA = struct { + name: *i8, + underlying_type: *Node, +}; + +let NODE_TYPE_FUNCTION_TYPE_DATA = struct { + parameters: *Node, + parameters_len: i64, + retur_type: *Node, +}; + +let NODE_TYPE_POINTER_TYPE_DATA = struct { + type: *Node, +}; + +let NODE_TYPE_STRUCT_TYPE_DATA = struct { + fields: *Node, + fields_len: i64, +}; + +let NODE_RETURN_STATEMENT_DATA = struct { + expression: *Node, +}; + +let NODE_CAST_STATEMENT_DATA = struct { + typ: *Node, + expression: *Node, +}; + +let NODE_SIZEOF_STATEMENT_DATA = struct { + typ: *Node, +}; + + +let parser = struct { + tokens: *token, + tokens_len: i64, + + offset: i64, + + arena: *arena, +}; + +extern parser_parse_statement = (*parser) => *Node; +extern parser_parse_expression = (*parser) => *Node; + +let parser_init = (ts: *token, ts_len: i64, ar: *arena) => *parser { + let p = cast(*parser, arena_alloc(ar, sizeof(parser))); + + (*p).tokens = ts; + (*p).tokens_len = ts_len; + (*p).offset = 0; + (*p).arena = ar; + + return p; +}; + +let create_node = (p: *parser, n: Node) => *Node { + let res = cast(*Node, arena_alloc((*p).arena, sizeof(Node))); + *res = n; + return res; +}; + +let parser_peek_token = (p: *parser) => *token { + if (*p).offset >= (*p).tokens_len { + return cast(*token, null); + }; + + return ((*p).tokens + cast(*token, (*p).offset)); +}; + + let parser_consume_token = (p: *parser) => *token { + if (*p).offset >= (*p).tokens_len { + return cast(*token, null); + }; + + let t = ((*p).tokens + cast(*token, (*p).offset)); + (*p).offset = (*p).offset + 1; + return t; +}; + +let parser_accept_token = (p: *parser, t: i64) => *token { + let curr_token = parser_peek_token(p); + if curr_token == cast(*token, null) { + return cast(*token, null); + }; + + if (*curr_token).type == t { + return parser_consume_token(p); + }; + return cast(*token, null); +}; + +let parser_accept_parse = (p: *parser, parsing_func: (*parser) => *Node) => *Node { + let prev_offset = (*p).offset; + let node = parsing_func(p); + if node == cast(*Node, null) { + (*p).offset = prev_offset; + }; + return node; +}; + +/* ReturnStatement ::= RETURN (Expression)? */ +let parser_parse_return_statement = (p: *parser) => *Node { + if parser_accept_token(p, TOKEN_RETURN) == cast(*token, null) { + return cast(*Node, null); + }; + + let maybe_expr = parser_accept_parse(p, parser_parse_expression); + + let d = cast(*NODE_RETURN_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_RETURN_STATEMENT_DATA ))); + (*d).expression = maybe_expr; + + let r = Node{}; + r.type = NODE_RETURN_STATEMENT; + r.data = cast(*void, d); + + return create_node(p, r); +}; + +/* Type ::= IDENTIFIER | FunctionType */ +let parser_parse_type = (p: *parser) => *Node { + /* TODO: Function type */ + let to = parser_consume_token(p); + assert(to != cast(*token, null)); + assert((*to).type == TOKEN_IDENTIFIER); + + let d = cast(*NODE_TYPE_SIMPLE_TYPE_DATA, arena_alloc((*p).arena, sizeof(NODE_TYPE_SIMPLE_TYPE_DATA))); + (*d).name = cast(*i8, (*to).data); + (*d).underlying_type = cast(*Node, null); + + let r = Node{}; + r.type = NODE_TYPE_SIMPLE_TYPE; + r.data = cast(*void, d); + + return create_node(p, r); +}; + +/* FunctionParameters ::= IDENTIFIER ":" Type ("," IDENTIFIER ":" Type)* */ +let parser_parse_function_parameters = (p: *parser) => *slice { + /* TODO: Params */ + + let node_list = cast(**Node, arena_alloc((*p).arena, sizeof(**Node) * 20)); + let i = 0; + while true { + if i != 0 { + parser_accept_token(p, TOKEN_COMMA); + }; + let ident = parser_accept_token(p, TOKEN_IDENTIFIER); + if ident == cast(*token, null) { + break; + }; + /* TODO: Rest */ + }; + + let s = cast(*slice, arena_alloc((*p).arena, sizeof(slice))); + (*s).data = cast(*void, node_list); + (*s).data_len = 0; + return s; +}; + +/* FunctionDefinition ::= LPAREN FunctionParameters? RPAREN ARROW IDENTIFIER LBRACE Statement* ReturnStatement SEMICOLON RBRACE */ +let parser_parse_function_definition = (p: *parser) => *Node { + if parser_accept_token(p, TOKEN_LPAREN) == cast(*token, null) { + return cast(*Node, null); + }; + let params = parser_parse_function_parameters(p); + if params == cast(*slice, null) { + return cast(*Node, null); + }; + if parser_accept_token(p, TOKEN_RPAREN) == cast(*token, null) { + return cast(*Node, null); + }; + if parser_accept_token(p, TOKEN_ARROW) == cast(*token, null) { + return cast(*Node, null); + }; + let retur_type = parser_parse_type(p); + if retur_type == cast(*Node, null) { + return cast(*Node, null); + }; + if parser_accept_token(p, TOKEN_LBRACE) == cast(*token, null) { + return cast(*Node, null); + }; + + /* TODO: Body */ + let statements = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 100)); + let i = 0; + while true { + let n = parser_accept_parse(p, parser_parse_statement); + if n == cast(*Node, null) { + break; + }; + (*(statements + cast(**Node, i))) = n; + i = i + 1; + }; + + + if parser_accept_token(p, TOKEN_RBRACE) == cast(*token, null) { + return cast(*Node, null); + }; + + + let d = cast(*NODE_FUNCTION_DEFINITION_DATA, arena_alloc((*p).arena, sizeof(NODE_FUNCTION_DEFINITION_DATA))); + (*d).statements = statements; + (*d).statements_len = i; + (*d).parameters = cast(**Node, params.data); + (*d).parameters_len = params.data_len; + (*d).retur_type = cast(*Node, null); + + let n = Node{}; + n.type = NODE_FUNCTION_DEFINITION; + n.data = cast(*void, d); + + return create_node(p, n); +}; + +/* PrimaryExpression ::= NULL | NUMBER | BOOLEAN | CHAR | STRING | IDENTIFIER | FunctionDefinition | StructDefinition | StructInstantiation | FieldAccess | LPAREN Expression RPAREN */ +let parser_parse_primary_expression = (p: *parser) => *Node { + let stmt = parser_accept_parse(p, parser_parse_function_definition); + if stmt != cast(*Node, null) { + return stmt; + }; + + let tok = parser_consume_token(p); + if tok == cast(*token, null) { + printf("NO TOK\n"); + return cast(*Node, null); + }; + + if (*tok).type == TOKEN_IDENTIFIER { + let d = cast(*NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_IDENTIFIER_DATA))); + (*d).name = cast(*i8, (*tok).data); + (*d).type = cast(*Node, null); /* TODO */ + let n = Node{}; + n.type = NODE_PRIMARY_EXPRESSION_IDENTIFIER; + n.data = cast(*void, d); + return create_node(p, n); + }; + + if (*tok).type == TOKEN_NUMBER { + let d = cast(*NODE_PRIMARY_EXPRESSION_NUMBER_DATA, arena_alloc((*p).arena, sizeof(NODE_PRIMARY_EXPRESSION_NUMBER_DATA))); + (*d).value = *(cast(*i64, (*tok).data)); + let n = Node{}; + n.type = NODE_PRIMARY_EXPRESSION_NUMBER; + n.data = cast(*void, d); + return create_node(p, n); + }; + + printf("DIFF TYPE: %d\n", (*tok).type); + + return cast(*Node, null); +}; + +/* EqualityExpression ::= AdditiveExpression ("==" | "!=" | "<=" | ">=" | "<" | ">") AdditiveExpression */ +let parser_parse_equality_expression = (p: *parser) => *Node { + /* TODO */ + return cast(*Node, null); +}; + +/* AdditiveExpression ::= MultiplicativeExpression (("+" | "-") MultiplicativeExpression)* */ +let parser_parse_additive_expression = (p: *parser) => *Node { + /* TODO */ + return parser_parse_primary_expression(p); +}; + +/* Expression ::= EqualityExpression | AdditiveExpression */ +let parser_parse_expression = (p: *parser) => *Node { + let ex = parser_accept_parse(p, parser_parse_equality_expression); + if ex != cast(*Node, null) { + return ex; + }; + let ax = parser_accept_parse(p, parser_parse_additive_expression); + if ax != cast(*Node, null) { + return ax; + }; + + return cast(*Node, null); +}; + +/* AssignmentStatement ::= ("let")? ("*")? Expression EQUALS Expression */ +let parse_assignment_statement = (p: *parser) => *Node { + let is_declaration = false; + if parser_accept_token(p, TOKEN_LET) != cast(*token, null) { + printf("IS DECLARATION\n"); + is_declaration = true; + }; + + /* TODO: is_dereference */ + + let lhs = parser_parse_expression(p); /* TODO */ + if lhs == cast(*Node, null) { + printf("ANOTHER BNLL\n"); + return cast(*Node, null); + }; + + if parser_accept_token(p, TOKEN_EQUALS) == cast(*token, null) { + return cast(*Node, null); + }; + + let rhs = parser_parse_expression(p); /* TODO */ + if rhs == cast(*Node, null) { + printf("NUL EXP\n"); + return cast(*Node, null); + }; + + let d = cast(*NODE_ASSIGNMENT_STATEMENT_DATA , arena_alloc((*p).arena, sizeof(NODE_ASSIGNMENT_STATEMENT_DATA ))); + (*d).is_declaration = is_declaration; + (*d).is_dereference = false; + (*d).lhs = lhs; + (*d).rhs = rhs; + let n = Node{}; + n.type = NODE_ASSIGNMENT_STATEMENT; + n.data = cast(*void, d); + printf("CONTINUE\n"); + return create_node(p, n); +}; + +/* Statement ::= (AssignmentStatement | ImportDeclaration | ExternDeclaration | CastStatement | SizeOfStatement | FunctionCallStatement | IfStatement | WhileStatement | ReturnStatement | "break" | "continue") SEMICOLON */ +let parser_parse_statement = (p: *parser) => *Node { + let assignment = parser_accept_parse(p, parse_assignment_statement); + if assignment != cast(*Node, null) { + if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { + return cast(*Node, null); + }; + return assignment; + }; + + let retu = parser_accept_parse(p, parser_parse_return_statement); + if retu != cast(*Node, null) { + if parser_accept_token(p, TOKEN_SEMICOLON) == cast(*token, null) { + return cast(*Node, null); + }; + return retu; + }; + + + printf("None\n"); + + return cast(*Node, null); +}; + +/* Program ::= Statement+ */ +let parse_program = (p: *parser) => *Node { + let nodes = cast(**Node, arena_alloc((*p).arena, sizeof(*Node) * 1000)); + + let i = 0; + while (*p).offset < (*p).tokens_len { + let s = parser_parse_statement(p); + assert(s != cast(*Node, null)); + (*(nodes + cast(**Node, i))) = s; + i = i + 1; + }; + + let d = cast(*NODE_PROGRAM_DATA, arena_alloc((*p).arena, sizeof(NODE_PROGRAM_DATA))); + (*d).statements = nodes; + (*d).statements_len = i; + let n = Node{}; + n.type = NODE_PROGRAM; + n.data = cast(*void, d); + return create_node(p, n); +}; + +let parse = (p: *parser) => *Node { + return parse_program(p); +}; + +/* + +For example -2: + +* parsing assignment statement +* parsing ident and num literals + +*/ |