#include "lang/parser.hpp" #include #include #define MATCHES(x) (begin() && x) #define TO_NUMERIC_EXPRESSION(node) new ASTNodeNumericExpression((node), new ASTNodeIntegerLiteral({ Token::ValueType::Signed32Bit, s32(0) }), Token::Operator::Plus) // Definition syntax: // [A] : Either A or no token // [A|B] : Either A, B or no token // : Either A or B // : One or more of A // A B C : Sequence of tokens A then B then C // (parseXXXX) : Parsing handled by other function namespace hex::lang { /* Mathematical expressions */ // Identifier:: ASTNode* Parser::parseScopeResolution(std::vector &path) { if (peek(IDENTIFIER, -1)) path.push_back(getValue(-1)); if (MATCHES(sequence(SEPARATOR_SCOPE_RESOLUTION))) { if (MATCHES(sequence(IDENTIFIER))) return this->parseScopeResolution(path); else throwParseError("expected member name", -1); } else return TO_NUMERIC_EXPRESSION(new ASTNodeScopeResolution(path)); } // ASTNode* Parser::parseRValue(std::vector &path) { if (peek(IDENTIFIER, -1)) path.push_back(getValue(-1)); if (MATCHES(sequence(SEPARATOR_DOT))) { if (MATCHES(sequence(IDENTIFIER))) return this->parseRValue(path); else throwParseError("expected member name", -1); } else return TO_NUMERIC_EXPRESSION(new ASTNodeRValue(path)); } // ASTNode* Parser::parseFactor() { if (MATCHES(sequence(INTEGER))) return TO_NUMERIC_EXPRESSION(new ASTNodeIntegerLiteral(getValue(-1))); else if (MATCHES(sequence(SEPARATOR_ROUNDBRACKETOPEN))) { auto node = this->parseMathematicalExpression(); if (!MATCHES(sequence(SEPARATOR_ROUNDBRACKETCLOSE))) throwParseError("expected closing parenthesis"); return node; } else if (MATCHES(sequence(IDENTIFIER) && peek(SEPARATOR_SCOPE_RESOLUTION))) { std::vector path; return this->parseScopeResolution(path); } else if (MATCHES(sequence(IDENTIFIER))) { std::vector path; return this->parseRValue(path); } else throwParseError("expected integer or parenthesis"); } // (parseFactor) <*|/> (parseFactor) ASTNode* Parser::parseMultiplicativeExpression() { auto node = this->parseFactor(); while (MATCHES(variant(OPERATOR_STAR, OPERATOR_SLASH))) { if (peek(OPERATOR_STAR, -1)) node = new ASTNodeNumericExpression(node, this->parseFactor(), Token::Operator::Star); else node = new ASTNodeNumericExpression(node, this->parseFactor(), Token::Operator::Slash); } return node; } // (parseMultiplicativeExpression) <+|-> (parseMultiplicativeExpression) ASTNode* Parser::parseAdditiveExpression() { auto node = this->parseMultiplicativeExpression(); while (MATCHES(variant(OPERATOR_PLUS, OPERATOR_MINUS))) { if (peek(OPERATOR_PLUS, -1)) node = new ASTNodeNumericExpression(node, this->parseMultiplicativeExpression(), Token::Operator::Plus); else node = new ASTNodeNumericExpression(node, this->parseMultiplicativeExpression(), Token::Operator::Minus); } return node; } // (parseAdditiveExpression) <>>|<<> (parseAdditiveExpression) ASTNode* Parser::parseShiftExpression() { auto node = this->parseAdditiveExpression(); while (MATCHES(variant(OPERATOR_SHIFTLEFT, OPERATOR_SHIFTRIGHT))) { if (peek(OPERATOR_SHIFTLEFT, -1)) node = new ASTNodeNumericExpression(node, this->parseAdditiveExpression(), Token::Operator::ShiftLeft); else node = new ASTNodeNumericExpression(node, this->parseAdditiveExpression(), Token::Operator::ShiftRight); } return node; } // (parseShiftExpression) & (parseShiftExpression) ASTNode* Parser::parseBinaryAndExpression() { auto node = this->parseShiftExpression(); while (MATCHES(sequence(OPERATOR_BITAND))) { node = new ASTNodeNumericExpression(node, this->parseShiftExpression(), Token::Operator::BitAnd); } return node; } // (parseBinaryAndExpression) ^ (parseBinaryAndExpression) ASTNode* Parser::parseBinaryXorExpression() { auto node = this->parseBinaryAndExpression(); while (MATCHES(sequence(OPERATOR_BITXOR))) { node = new ASTNodeNumericExpression(node, this->parseBinaryAndExpression(), Token::Operator::BitXor); } return node; } // (parseBinaryXorExpression) | (parseBinaryXorExpression) ASTNode* Parser::parseBinaryOrExpression() { auto node = this->parseBinaryXorExpression(); while (MATCHES(sequence(OPERATOR_BITOR))) { node = new ASTNodeNumericExpression(node, this->parseBinaryXorExpression(), Token::Operator::BitOr); } return node; } // (parseBinaryOrExpression) ASTNode* Parser::parseMathematicalExpression() { return this->parseBinaryOrExpression(); } /* Type declarations */ // [be|le] ASTNode* Parser::parseType(s32 startIndex) { std::optional endian; if (peekOptional(KEYWORD_LE, 0)) endian = std::endian::little; else if (peekOptional(KEYWORD_BE, 0)) endian = std::endian::big; if (getType(startIndex) == Token::Type::Identifier) { // Custom type if (!this->m_types.contains(getValue(startIndex))) throwParseError("failed to parse type"); return new ASTNodeTypeDecl({ }, this->m_types[getValue(startIndex)]->clone(), endian); } else { // Builtin type return new ASTNodeTypeDecl({ }, new ASTNodeBuiltinType(getValue(startIndex)), endian); } } // using Identifier = (parseType) ASTNode* Parser::parseUsingDeclaration() { auto *temporaryType = dynamic_cast(parseType(-1)); if (temporaryType == nullptr) throwParseError("invalid type used in variable declaration", -1); SCOPE_EXIT( delete temporaryType; ); if (peekOptional(KEYWORD_BE) || peekOptional(KEYWORD_LE)) return new ASTNodeTypeDecl(getValue(-4), temporaryType->getType()->clone(), temporaryType->getEndian()); else return new ASTNodeTypeDecl(getValue(-3), temporaryType->getType()->clone(), temporaryType->getEndian()); } // padding[(parseMathematicalExpression)] ASTNode* Parser::parsePadding() { auto size = parseMathematicalExpression(); if (!MATCHES(sequence(SEPARATOR_SQUAREBRACKETCLOSE))) { delete size; throwParseError("expected closing ']' at end of array declaration", -1); } return new ASTNodeArrayVariableDecl({ }, new ASTNodeTypeDecl({ }, new ASTNodeBuiltinType(Token::ValueType::Padding)), size);; } // (parseType) Identifier ASTNode* Parser::parseMemberVariable() { auto temporaryType = dynamic_cast(parseType(-2)); if (temporaryType == nullptr) throwParseError("invalid type used in variable declaration", -1); SCOPE_EXIT( delete temporaryType; ); return new ASTNodeVariableDecl(getValue(-1), temporaryType->getType()->clone()); } // (parseType) Identifier[(parseMathematicalExpression)] ASTNode* Parser::parseMemberArrayVariable() { auto temporaryType = dynamic_cast(parseType(-3)); if (temporaryType == nullptr) throwParseError("invalid type used in variable declaration", -1); SCOPE_EXIT( delete temporaryType; ); auto name = getValue(-2); auto size = parseMathematicalExpression(); if (!MATCHES(sequence(SEPARATOR_SQUAREBRACKETCLOSE))) throwParseError("expected closing ']' at end of array declaration", -1); return new ASTNodeArrayVariableDecl(name, temporaryType->getType()->clone(), size); } // (parseType) *Identifier : (parseType) ASTNode* Parser::parseMemberPointerVariable() { auto name = getValue(-2); auto temporaryPointerType = dynamic_cast(parseType(-4)); if (temporaryPointerType == nullptr) throwParseError("invalid type used in variable declaration", -1); SCOPE_EXIT( delete temporaryPointerType; ); if (!MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && sequence(VALUETYPE_UNSIGNED))) throwParseError("expected unsigned builtin type as size", -1); auto temporarySizeType = dynamic_cast(parseType(-1)); if (temporarySizeType == nullptr) throwParseError("invalid type used for pointer size", -1); SCOPE_EXIT( delete temporarySizeType; ); return new ASTNodePointerVariableDecl(name, temporaryPointerType->getType()->clone(), temporarySizeType->getType()->clone()); } // struct Identifier { <(parseMember)...> } ASTNode* Parser::parseStruct() { const auto structNode = new ASTNodeStruct(); const auto &typeName = getValue(-2); ScopeExit structGuard([&]{ delete structNode; }); while (!MATCHES(sequence(SEPARATOR_CURLYBRACKETCLOSE))) { if (MATCHES(sequence(VALUETYPE_PADDING, SEPARATOR_SQUAREBRACKETOPEN))) structNode->addMember(parsePadding()); else if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(IDENTIFIER, SEPARATOR_SQUAREBRACKETOPEN))) structNode->addMember(parseMemberArrayVariable()); else if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(IDENTIFIER))) structNode->addMember(parseMemberVariable()); else if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(OPERATOR_STAR, IDENTIFIER, OPERATOR_INHERIT))) structNode->addMember(parseMemberPointerVariable()); else if (MATCHES(sequence(SEPARATOR_ENDOFPROGRAM))) throwParseError("unexpected end of program", -2); else throwParseError("invalid struct member", 0); if (!MATCHES(sequence(SEPARATOR_ENDOFEXPRESSION))) throwParseError("missing ';' at end of expression", -1); } structGuard.release(); return new ASTNodeTypeDecl(typeName, structNode); } // union Identifier { <(parseMember)...> } ASTNode* Parser::parseUnion() { const auto unionNode = new ASTNodeUnion(); const auto &typeName = getValue(-2); ScopeExit unionGuard([&]{ delete unionNode; }); while (!MATCHES(sequence(SEPARATOR_CURLYBRACKETCLOSE))) { if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(IDENTIFIER, SEPARATOR_SQUAREBRACKETOPEN))) unionNode->addMember(parseMemberArrayVariable()); else if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(IDENTIFIER))) unionNode->addMember(parseMemberVariable()); else if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(OPERATOR_STAR, IDENTIFIER, OPERATOR_INHERIT))) unionNode->addMember(parseMemberPointerVariable()); else if (MATCHES(sequence(SEPARATOR_ENDOFPROGRAM))) throwParseError("unexpected end of program", -2); else throwParseError("invalid union member", 0); if (!MATCHES(sequence(SEPARATOR_ENDOFEXPRESSION))) throwParseError("missing ';' at end of expression", -1); } unionGuard.release(); return new ASTNodeTypeDecl(typeName, unionNode); } // enum Identifier : (parseType) { <...> } ASTNode* Parser::parseEnum() { std::string typeName; if (peekOptional(KEYWORD_BE) || peekOptional(KEYWORD_LE)) typeName = getValue(-5); else typeName = getValue(-4); auto temporaryTypeDecl = dynamic_cast(parseType(-2)); if (temporaryTypeDecl == nullptr) throwParseError("failed to parse type", -2); auto underlyingType = dynamic_cast(temporaryTypeDecl->getType()); if (underlyingType == nullptr) throwParseError("underlying type is not a built-in type", -2); const auto enumNode = new ASTNodeEnum(underlyingType); ScopeExit enumGuard([&]{ delete enumNode; }); while (!MATCHES(sequence(SEPARATOR_CURLYBRACKETCLOSE))) { if (MATCHES(sequence(IDENTIFIER, OPERATOR_ASSIGNMENT))) { auto name = getValue(-2); enumNode->addEntry(name, parseMathematicalExpression()); } else if (MATCHES(sequence(IDENTIFIER))) { ASTNode *valueExpr; auto name = getValue(-1); if (enumNode->getEntries().empty()) { auto type = underlyingType->getType(); switch (type) { case Token::ValueType::Signed8Bit: valueExpr = new ASTNodeIntegerLiteral({ type, s8(0) }); break; case Token::ValueType::Unsigned8Bit: valueExpr = new ASTNodeIntegerLiteral({ type, u8(0) }); break; case Token::ValueType::Signed16Bit: valueExpr = new ASTNodeIntegerLiteral({ type, s16(0) }); break; case Token::ValueType::Unsigned16Bit: valueExpr = new ASTNodeIntegerLiteral({ type, u16(0) }); break; case Token::ValueType::Signed32Bit: valueExpr = new ASTNodeIntegerLiteral({ type, s32(0) }); break; case Token::ValueType::Unsigned32Bit: valueExpr = new ASTNodeIntegerLiteral({ type, u32(0) }); break; case Token::ValueType::Signed64Bit: valueExpr = new ASTNodeIntegerLiteral({ type, s64(0) }); break; case Token::ValueType::Unsigned64Bit: valueExpr = new ASTNodeIntegerLiteral({ type, u64(0) }); break; case Token::ValueType::Signed128Bit: valueExpr = new ASTNodeIntegerLiteral({ type, s128(0) }); break; case Token::ValueType::Unsigned128Bit: valueExpr = new ASTNodeIntegerLiteral({ type, u128(0) }); break; default: throwParseError("invalid enum underlying type", -1); } } else valueExpr = new ASTNodeNumericExpression(enumNode->getEntries().back().second, new ASTNodeIntegerLiteral({ Token::ValueType::Signed32Bit, s32(1) }), Token::Operator::Plus); enumNode->addEntry(name, valueExpr); } else if (MATCHES(sequence(SEPARATOR_ENDOFPROGRAM))) throwParseError("unexpected end of program", -2); else throwParseError("invalid union member", 0); if (!MATCHES(sequence(SEPARATOR_COMMA))) { if (MATCHES(sequence(SEPARATOR_CURLYBRACKETCLOSE))) break; else throwParseError("missing ',' between enum entries", 0); } } enumGuard.release(); return new ASTNodeTypeDecl(typeName, enumNode); } // bitfield Identifier { } ASTNode* Parser::parseBitfield() { std::string typeName = getValue(-2); const auto bitfieldNode = new ASTNodeBitfield(); ScopeExit enumGuard([&]{ delete bitfieldNode; }); while (!MATCHES(sequence(SEPARATOR_CURLYBRACKETCLOSE))) { if (MATCHES(sequence(IDENTIFIER, OPERATOR_INHERIT))) { auto name = getValue(-2); bitfieldNode->addEntry(name, parseMathematicalExpression()); } else if (MATCHES(sequence(SEPARATOR_ENDOFPROGRAM))) throwParseError("unexpected end of program", -2); else throwParseError("invalid bitfield member", 0); if (!MATCHES(sequence(SEPARATOR_ENDOFEXPRESSION))) { throwParseError("missing ';' at end of expression", -1); } } enumGuard.release(); return new ASTNodeTypeDecl(typeName, bitfieldNode); } // (parseType) Identifier @ Integer ASTNode* Parser::parseVariablePlacement() { auto temporaryType = dynamic_cast(parseType(-3)); if (temporaryType == nullptr) throwParseError("invalid type used in variable declaration", -1); SCOPE_EXIT( delete temporaryType; ); return new ASTNodeVariableDecl(getValue(-2), temporaryType->getType()->clone(), parseMathematicalExpression()); } // (parseType) Identifier[(parseMathematicalExpression)] @ Integer ASTNode* Parser::parseArrayVariablePlacement() { auto temporaryType = dynamic_cast(parseType(-3)); if (temporaryType == nullptr) throwParseError("invalid type used in variable declaration", -1); SCOPE_EXIT( delete temporaryType; ); auto name = getValue(-2); auto size = parseMathematicalExpression(); if (!MATCHES(sequence(SEPARATOR_SQUAREBRACKETCLOSE))) throwParseError("expected closing ']' at end of array declaration", -1); if (!MATCHES(sequence(OPERATOR_AT))) throwParseError("expected placement instruction", -1); return new ASTNodeArrayVariableDecl(name, temporaryType->getType()->clone(), size, parseMathematicalExpression()); } // (parseType) *Identifier : (parseType) @ Integer ASTNode* Parser::parsePointerVariablePlacement() { auto name = getValue(-2); auto temporaryPointerType = dynamic_cast(parseType(-4)); if (temporaryPointerType == nullptr) throwParseError("invalid type used in variable declaration", -1); SCOPE_EXIT( delete temporaryPointerType; ); if (!MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && sequence(VALUETYPE_UNSIGNED))) throwParseError("expected unsigned builtin type as size", -1); auto temporaryPointerSizeType = dynamic_cast(parseType(-1)); if (temporaryPointerSizeType == nullptr) throwParseError("invalid size type used in pointer declaration", -1); SCOPE_EXIT( delete temporaryPointerSizeType; ); if (!MATCHES(sequence(OPERATOR_AT))) throwParseError("expected placement instruction", -1); return new ASTNodePointerVariableDecl(name, temporaryPointerType->getType()->clone(), temporaryPointerSizeType->getType()->clone(), parseMathematicalExpression()); } /* Program */ // <(parseUsingDeclaration)|(parseVariablePlacement)|(parseStruct)> ASTNode* Parser::parseStatement() { ASTNode *statement; if (MATCHES(sequence(KEYWORD_USING, IDENTIFIER, OPERATOR_ASSIGNMENT) && (optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY))) statement = dynamic_cast(parseUsingDeclaration()); else if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(IDENTIFIER, SEPARATOR_SQUAREBRACKETOPEN))) statement = parseArrayVariablePlacement(); else if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(IDENTIFIER, OPERATOR_AT))) statement = parseVariablePlacement(); else if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(OPERATOR_STAR, IDENTIFIER, OPERATOR_INHERIT))) statement = parsePointerVariablePlacement(); else if (MATCHES(sequence(KEYWORD_STRUCT, IDENTIFIER, SEPARATOR_CURLYBRACKETOPEN))) statement = parseStruct(); else if (MATCHES(sequence(KEYWORD_UNION, IDENTIFIER, SEPARATOR_CURLYBRACKETOPEN))) statement = parseUnion(); else if (MATCHES(sequence(KEYWORD_ENUM, IDENTIFIER, OPERATOR_INHERIT) && (optional(KEYWORD_BE), optional(KEYWORD_LE)) && sequence(VALUETYPE_UNSIGNED, SEPARATOR_CURLYBRACKETOPEN))) statement = parseEnum(); else if (MATCHES(sequence(KEYWORD_BITFIELD, IDENTIFIER, SEPARATOR_CURLYBRACKETOPEN))) statement = parseBitfield(); else throwParseError("invalid sequence", 0); if (!MATCHES(sequence(SEPARATOR_ENDOFEXPRESSION))) throwParseError("missing ';' at end of expression", -1); if (auto typeDecl = dynamic_cast(statement); typeDecl != nullptr) this->m_types.insert({ typeDecl->getName().data(), typeDecl }); return statement; } // <(parseStatement)...> EndOfProgram std::optional> Parser::parse(const std::vector &tokens) { this->m_curr = tokens.begin(); this->m_types.clear(); try { auto program = parseTillToken(SEPARATOR_ENDOFPROGRAM); if (program.empty() || this->m_curr != tokens.end()) throwParseError("program is empty!", -1); return program; } catch (ParseError &e) { this->m_error = e; } return { }; } }