patterns: Initial namespace support

This commit is contained in:
WerWolv 2021-08-25 17:07:01 +02:00
parent 15665b03a7
commit 9289ebf4c9
6 changed files with 240 additions and 124 deletions

View File

@ -161,6 +161,7 @@ namespace hex::lang {
return new ASTNodeTypeDecl(*this); return new ASTNodeTypeDecl(*this);
} }
void setName(const std::string &name) { this->m_name = name; }
[[nodiscard]] std::string_view getName() const { return this->m_name; } [[nodiscard]] std::string_view getName() const { return this->m_name; }
[[nodiscard]] ASTNode* getType() { return this->m_type; } [[nodiscard]] ASTNode* getType() { return this->m_type; }
[[nodiscard]] std::optional<std::endian> getEndian() const { return this->m_endian; } [[nodiscard]] std::optional<std::endian> getEndian() const { return this->m_endian; }

View File

@ -32,6 +32,7 @@ namespace hex::lang {
std::unordered_map<std::string, ASTNode*> m_types; std::unordered_map<std::string, ASTNode*> m_types;
std::vector<TokenIter> m_matchedOptionals; std::vector<TokenIter> m_matchedOptionals;
std::vector<std::vector<std::string>> m_currNamespace;
u32 getLineNumber(s32 index) const { u32 getLineNumber(s32 index) const {
return this->m_curr[index].lineNumber; return this->m_curr[index].lineNumber;
@ -51,9 +52,20 @@ namespace hex::lang {
return this->m_curr[index].type; return this->m_curr[index].type;
} }
std::string getNamespacePrefixedName(const std::string &name) {
std::string result;
for (const auto &part : this->m_currNamespace.back()) {
result += part + "::";
}
result += name;
return result;
}
ASTNode* parseFunctionCall(); ASTNode* parseFunctionCall();
ASTNode* parseStringLiteral(); ASTNode* parseStringLiteral();
ASTNode* parseScopeResolution(std::vector<std::string> &path); std::string parseScopeResolution();
ASTNode* parseRValue(ASTNodeRValue::Path &path); ASTNode* parseRValue(ASTNodeRValue::Path &path);
ASTNode* parseFactor(); ASTNode* parseFactor();
ASTNode* parseUnaryExpression(); ASTNode* parseUnaryExpression();
@ -81,21 +93,23 @@ namespace hex::lang {
void parseAttribute(Attributable *currNode); void parseAttribute(Attributable *currNode);
ASTNode* parseConditional(); ASTNode* parseConditional();
ASTNode* parseWhileStatement(); ASTNode* parseWhileStatement();
ASTNode* parseType(s32 startIndex); ASTNodeTypeDecl* parseType();
ASTNode* parseUsingDeclaration(); ASTNode* parseUsingDeclaration();
ASTNode* parsePadding(); ASTNode* parsePadding();
ASTNode* parseMemberVariable(); ASTNode* parseMemberVariable(ASTNodeTypeDecl *type);
ASTNode* parseMemberArrayVariable(); ASTNode* parseMemberArrayVariable(ASTNodeTypeDecl *type);
ASTNode* parseMemberPointerVariable(); ASTNode* parseMemberPointerVariable(ASTNodeTypeDecl *type);
ASTNode* parseMember(); ASTNode* parseMember();
ASTNode* parseStruct(); ASTNode* parseStruct();
ASTNode* parseUnion(); ASTNode* parseUnion();
ASTNode* parseEnum(); ASTNode* parseEnum();
ASTNode* parseBitfield(); ASTNode* parseBitfield();
ASTNode* parseVariablePlacement(); ASTNode* parseVariablePlacement(ASTNodeTypeDecl *type);
ASTNode* parseArrayVariablePlacement(); ASTNode* parseArrayVariablePlacement(ASTNodeTypeDecl *type);
ASTNode* parsePointerVariablePlacement(); ASTNode* parsePointerVariablePlacement(ASTNodeTypeDecl *type);
ASTNode* parseStatement(); ASTNode* parsePlacement();
std::vector<ASTNode*> parseNamespace();
std::vector<ASTNode*> parseStatements();
std::vector<ASTNode*> parseTillToken(Token::Type endTokenType, const auto value) { std::vector<ASTNode*> parseTillToken(Token::Type endTokenType, const auto value) {
std::vector<ASTNode*> program; std::vector<ASTNode*> program;
@ -105,7 +119,8 @@ namespace hex::lang {
}; };
while (this->m_curr->type != endTokenType || (*this->m_curr) != value) { while (this->m_curr->type != endTokenType || (*this->m_curr) != value) {
program.push_back(parseStatement()); for (auto statement : parseStatements())
program.push_back(statement);
} }
this->m_curr++; this->m_curr++;
@ -132,6 +147,10 @@ namespace hex::lang {
return true; return true;
} }
void reset() {
this->m_curr = this->m_originalPosition;
}
template<Setting S = Normal> template<Setting S = Normal>
bool sequence() { bool sequence() {
if constexpr (S == Normal) if constexpr (S == Normal)
@ -146,14 +165,14 @@ namespace hex::lang {
bool sequence(Token::Type type, auto value, auto ... args) { bool sequence(Token::Type type, auto value, auto ... args) {
if constexpr (S == Normal) { if constexpr (S == Normal) {
if (!peek(type, value)) { if (!peek(type, value)) {
this->m_curr = this->m_originalPosition; reset();
return false; return false;
} }
this->m_curr++; this->m_curr++;
if (!sequence<Normal>(args...)) { if (!sequence<Normal>(args...)) {
this->m_curr = this->m_originalPosition; reset();
return false; return false;
} }
@ -167,7 +186,7 @@ namespace hex::lang {
if (!sequence<Normal>(args...)) if (!sequence<Normal>(args...))
return true; return true;
this->m_curr = this->m_originalPosition; reset();
return false; return false;
} else } else
__builtin_unreachable(); __builtin_unreachable();
@ -196,7 +215,7 @@ namespace hex::lang {
bool variant(Token::Type type1, auto value1, Token::Type type2, auto value2) { bool variant(Token::Type type1, auto value1, Token::Type type2, auto value2) {
if (!peek(type1, value1)) { if (!peek(type1, value1)) {
if (!peek(type2, value2)) { if (!peek(type2, value2)) {
this->m_curr = this->m_originalPosition; reset();
return false; return false;
} }
} }

View File

@ -35,7 +35,8 @@ namespace hex::lang {
Parent, Parent,
While, While,
Function, Function,
Return Return,
Namespace
}; };
enum class Operator { enum class Operator {
@ -66,7 +67,8 @@ namespace hex::lang {
TernaryConditional, TernaryConditional,
Dollar, Dollar,
AddressOf, AddressOf,
SizeOf SizeOf,
ScopeResolution
}; };
enum class ValueType { enum class ValueType {
@ -208,6 +210,7 @@ namespace hex::lang {
#define KEYWORD_WHILE COMPONENT(Keyword, While) #define KEYWORD_WHILE COMPONENT(Keyword, While)
#define KEYWORD_FUNCTION COMPONENT(Keyword, Function) #define KEYWORD_FUNCTION COMPONENT(Keyword, Function)
#define KEYWORD_RETURN COMPONENT(Keyword, Return) #define KEYWORD_RETURN COMPONENT(Keyword, Return)
#define KEYWORD_NAMESPACE COMPONENT(Keyword, Namespace)
#define INTEGER hex::lang::Token::Type::Integer, hex::lang::Token::IntegerLiteral(u64(0)) #define INTEGER hex::lang::Token::Type::Integer, hex::lang::Token::IntegerLiteral(u64(0))
#define IDENTIFIER hex::lang::Token::Type::Identifier, "" #define IDENTIFIER hex::lang::Token::Type::Identifier, ""
@ -241,6 +244,7 @@ namespace hex::lang {
#define OPERATOR_DOLLAR COMPONENT(Operator, Dollar) #define OPERATOR_DOLLAR COMPONENT(Operator, Dollar)
#define OPERATOR_ADDRESSOF COMPONENT(Operator, AddressOf) #define OPERATOR_ADDRESSOF COMPONENT(Operator, AddressOf)
#define OPERATOR_SIZEOF COMPONENT(Operator, SizeOf) #define OPERATOR_SIZEOF COMPONENT(Operator, SizeOf)
#define OPERATOR_SCOPERESOLUTION COMPONENT(Operator, ScopeResolution)
#define VALUETYPE_CUSTOMTYPE COMPONENT(ValueType, CustomType) #define VALUETYPE_CUSTOMTYPE COMPONENT(ValueType, CustomType)
#define VALUETYPE_PADDING COMPONENT(ValueType, Padding) #define VALUETYPE_PADDING COMPONENT(ValueType, Padding)

View File

@ -284,6 +284,9 @@ namespace hex::lang {
} else if (c == '.') { } else if (c == '.') {
tokens.emplace_back(TOKEN(Separator, Dot)); tokens.emplace_back(TOKEN(Separator, Dot));
offset += 1; offset += 1;
} else if (code.substr(offset, 2) == "::") {
tokens.emplace_back(TOKEN(Operator, ScopeResolution));
offset += 2;
} else if (c == '@') { } else if (c == '@') {
tokens.emplace_back(TOKEN(Operator, AtDeclaration)); tokens.emplace_back(TOKEN(Operator, AtDeclaration));
offset += 1; offset += 1;
@ -428,6 +431,8 @@ namespace hex::lang {
tokens.emplace_back(TOKEN(Keyword, Function)); tokens.emplace_back(TOKEN(Keyword, Function));
else if (identifier == "return") else if (identifier == "return")
tokens.emplace_back(TOKEN(Keyword, Return)); tokens.emplace_back(TOKEN(Keyword, Return));
else if (identifier == "namespace")
tokens.emplace_back(TOKEN(Keyword, Namespace));
// Check for built-in types // Check for built-in types
else if (identifier == "u8") else if (identifier == "u8")

View File

@ -1,7 +1,6 @@
#include <hex/lang/parser.hpp> #include <hex/lang/parser.hpp>
#include <optional> #include <optional>
#include <variant>
#define MATCHES(x) (begin() && x) #define MATCHES(x) (begin() && x)
@ -20,7 +19,11 @@ namespace hex::lang {
// Identifier([(parseMathematicalExpression)|<(parseMathematicalExpression),...>(parseMathematicalExpression)] // Identifier([(parseMathematicalExpression)|<(parseMathematicalExpression),...>(parseMathematicalExpression)]
ASTNode* Parser::parseFunctionCall() { ASTNode* Parser::parseFunctionCall() {
auto functionName = getValue<std::string>(-2); std::string functionName = parseScopeResolution();
if (!MATCHES(sequence(SEPARATOR_ROUNDBRACKETOPEN)))
throwParseError("expected '(' after function name");
std::vector<ASTNode*> params; std::vector<ASTNode*> params;
auto paramCleanup = SCOPE_GUARD { auto paramCleanup = SCOPE_GUARD {
for (auto &param : params) for (auto &param : params)
@ -51,18 +54,21 @@ namespace hex::lang {
return new ASTNodeStringLiteral(getValue<std::string>(-1)); return new ASTNodeStringLiteral(getValue<std::string>(-1));
} }
// Identifier::<Identifier[::]...> std::string Parser::parseScopeResolution() {
ASTNode* Parser::parseScopeResolution(std::vector<std::string> &path) { std::string name;
if (peek(IDENTIFIER, -1))
path.push_back(getValue<std::string>(-1));
if (MATCHES(sequence(SEPARATOR_SCOPE_RESOLUTION))) { while (true) {
if (MATCHES(sequence(IDENTIFIER))) name += getValue<std::string>(-1);
return this->parseScopeResolution(path);
if (MATCHES(sequence(OPERATOR_SCOPERESOLUTION, IDENTIFIER))) {
name += "::";
continue;
}
else else
throwParseError("expected member name", -1); break;
} else }
return TO_NUMERIC_EXPRESSION(new ASTNodeScopeResolution(path));
return name;
} }
// <Identifier[.]...> // <Identifier[.]...>
@ -98,13 +104,22 @@ namespace hex::lang {
throwParseError("expected closing parenthesis"); throwParseError("expected closing parenthesis");
} }
return node; return node;
} else if (MATCHES(sequence(IDENTIFIER, SEPARATOR_SCOPE_RESOLUTION))) { } else if (MATCHES(sequence(IDENTIFIER))) {
std::vector<std::string> path; auto originalPos = this->m_curr;
this->m_curr--; this->m_curr++;
return this->parseScopeResolution(path); parseScopeResolution();
} else if (MATCHES(sequence(IDENTIFIER, SEPARATOR_ROUNDBRACKETOPEN))) { bool isFunction = peek(SEPARATOR_ROUNDBRACKETOPEN);
return TO_NUMERIC_EXPRESSION(this->parseFunctionCall()); this->m_curr = originalPos;
} else if (MATCHES(oneOf(IDENTIFIER, KEYWORD_PARENT))) {
if (isFunction) {
this->m_curr++;
return TO_NUMERIC_EXPRESSION(parseFunctionCall());
}
else {
ASTNodeRValue::Path path;
return TO_NUMERIC_EXPRESSION(this->parseRValue(path));
}
} else if (MATCHES(oneOf(KEYWORD_PARENT))) {
ASTNodeRValue::Path path; ASTNodeRValue::Path path;
return TO_NUMERIC_EXPRESSION(this->parseRValue(path)); return TO_NUMERIC_EXPRESSION(this->parseRValue(path));
} else if (MATCHES(sequence(OPERATOR_DOLLAR))) { } else if (MATCHES(sequence(OPERATOR_DOLLAR))) {
@ -397,17 +412,29 @@ namespace hex::lang {
} }
bodyCleanup.release(); bodyCleanup.release();
return new ASTNodeFunctionDefinition(functionName, params, body); return new ASTNodeFunctionDefinition(getNamespacePrefixedName(functionName), params, body);
} }
ASTNode* Parser::parseFunctionStatement() { ASTNode* Parser::parseFunctionStatement() {
bool needsSemicolon = true; bool needsSemicolon = true;
ASTNode *statement; ASTNode *statement;
if (MATCHES(sequence(IDENTIFIER, SEPARATOR_ROUNDBRACKETOPEN))) if (peek(IDENTIFIER)) {
statement = parseFunctionCall(); auto originalPos = this->m_curr;
else if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(IDENTIFIER))) this->m_curr++;
statement = parseMemberVariable(); parseScopeResolution();
bool isFunction = peek(SEPARATOR_ROUNDBRACKETOPEN);
this->m_curr = originalPos;
if (isFunction) {
this->m_curr++;
statement = parseFunctionCall();
}
else
statement = parseMemberVariable(parseType());
}
else if (peek(KEYWORD_BE) || peek(KEYWORD_LE) || peek(VALUETYPE_ANY))
statement = parseMemberVariable(parseType());
else if (MATCHES(sequence(IDENTIFIER, OPERATOR_ASSIGNMENT))) else if (MATCHES(sequence(IDENTIFIER, OPERATOR_ASSIGNMENT)))
statement = parseFunctionVariableAssignment(); statement = parseFunctionVariableAssignment();
else if (MATCHES(sequence(KEYWORD_RETURN))) else if (MATCHES(sequence(KEYWORD_RETURN)))
@ -418,7 +445,8 @@ namespace hex::lang {
} else if (MATCHES(sequence(KEYWORD_WHILE, SEPARATOR_ROUNDBRACKETOPEN))) { } else if (MATCHES(sequence(KEYWORD_WHILE, SEPARATOR_ROUNDBRACKETOPEN))) {
statement = parseFunctionWhileLoop(); statement = parseFunctionWhileLoop();
needsSemicolon = false; needsSemicolon = false;
} else }
else
throwParseError("invalid sequence", 0); throwParseError("invalid sequence", 0);
if (needsSemicolon && !MATCHES(sequence(SEPARATOR_ENDOFEXPRESSION))) { if (needsSemicolon && !MATCHES(sequence(SEPARATOR_ENDOFEXPRESSION))) {
@ -558,34 +586,36 @@ namespace hex::lang {
/* Type declarations */ /* Type declarations */
// [be|le] <Identifier|u8|u16|u32|u64|u128|s8|s16|s32|s64|s128|float|double> // [be|le] <Identifier|u8|u16|u32|u64|u128|s8|s16|s32|s64|s128|float|double>
ASTNode* Parser::parseType(s32 startIndex) { ASTNodeTypeDecl* Parser::parseType() {
std::optional<std::endian> endian; std::optional<std::endian> endian;
if (peekOptional(KEYWORD_LE, 0)) if (MATCHES(sequence(KEYWORD_LE)))
endian = std::endian::little; endian = std::endian::little;
else if (peekOptional(KEYWORD_BE, 0)) else if (MATCHES(sequence(KEYWORD_BE)))
endian = std::endian::big; endian = std::endian::big;
if (getType(startIndex) == Token::Type::Identifier) { // Custom type if (MATCHES(sequence(IDENTIFIER))) { // Custom type
if (!this->m_types.contains(getValue<std::string>(startIndex))) std::string typeName = parseScopeResolution();
throwParseError("failed to parse type");
return new ASTNodeTypeDecl({ }, this->m_types[getValue<std::string>(startIndex)]->clone(), endian); if (this->m_types.contains(typeName))
} return new ASTNodeTypeDecl({ }, this->m_types[typeName]->clone(), endian);
else { // Builtin type else if (this->m_types.contains(getNamespacePrefixedName(typeName)))
return new ASTNodeTypeDecl({ }, new ASTNodeBuiltinType(getValue<Token::ValueType>(startIndex)), endian); return new ASTNodeTypeDecl({ }, this->m_types[getNamespacePrefixedName(typeName)]->clone(), endian);
else
throwParseError(hex::format("unknown type '{}'", typeName));
} }
else if (MATCHES(sequence(VALUETYPE_ANY))) { // Builtin type
return new ASTNodeTypeDecl({ }, new ASTNodeBuiltinType(getValue<Token::ValueType>(-1)), endian);
} else throwParseError("failed to parse type. Expected identifier or builtin type");
} }
// using Identifier = (parseType) // using Identifier = (parseType)
ASTNode* Parser::parseUsingDeclaration() { ASTNode* Parser::parseUsingDeclaration() {
auto *type = dynamic_cast<ASTNodeTypeDecl *>(parseType(-1)); auto name = getValue<std::string>(-2);
auto *type = dynamic_cast<ASTNodeTypeDecl *>(parseType());
if (type == nullptr) throwParseError("invalid type used in variable declaration", -1); if (type == nullptr) throwParseError("invalid type used in variable declaration", -1);
if (peekOptional(KEYWORD_BE) || peekOptional(KEYWORD_LE)) return new ASTNodeTypeDecl(name, type, type->getEndian());
return new ASTNodeTypeDecl(getValue<std::string>(-4), type, type->getEndian());
else
return new ASTNodeTypeDecl(getValue<std::string>(-3), type, type->getEndian());
} }
// padding[(parseMathematicalExpression)] // padding[(parseMathematicalExpression)]
@ -601,16 +631,14 @@ namespace hex::lang {
} }
// (parseType) Identifier // (parseType) Identifier
ASTNode* Parser::parseMemberVariable() { ASTNode* Parser::parseMemberVariable(ASTNodeTypeDecl *type) {
auto type = dynamic_cast<ASTNodeTypeDecl *>(parseType(-2));
if (type == nullptr) throwParseError("invalid type used in variable declaration", -1); if (type == nullptr) throwParseError("invalid type used in variable declaration", -1);
return new ASTNodeVariableDecl(getValue<std::string>(-1), type); return new ASTNodeVariableDecl(getValue<std::string>(-1), type);
} }
// (parseType) Identifier[(parseMathematicalExpression)] // (parseType) Identifier[(parseMathematicalExpression)]
ASTNode* Parser::parseMemberArrayVariable() { ASTNode* Parser::parseMemberArrayVariable(ASTNodeTypeDecl *type) {
auto type = dynamic_cast<ASTNodeTypeDecl *>(parseType(-3));
if (type == nullptr) throwParseError("invalid type used in variable declaration", -1); if (type == nullptr) throwParseError("invalid type used in variable declaration", -1);
auto name = getValue<std::string>(-2); auto name = getValue<std::string>(-2);
@ -634,33 +662,40 @@ namespace hex::lang {
} }
// (parseType) *Identifier : (parseType) // (parseType) *Identifier : (parseType)
ASTNode* Parser::parseMemberPointerVariable() { ASTNode* Parser::parseMemberPointerVariable(ASTNodeTypeDecl *type) {
auto name = getValue<std::string>(-2); auto name = getValue<std::string>(-2);
auto pointerType = dynamic_cast<ASTNodeTypeDecl *>(parseType(-4)); auto sizeType = parseType();
if (pointerType == nullptr) throwParseError("invalid type used in variable declaration", -1);
if (!MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && sequence(VALUETYPE_UNSIGNED))) {
throwParseError("expected unsigned builtin type as size", -1); auto builtinType = dynamic_cast<ASTNodeBuiltinType*>(sizeType->getType());
auto sizeType = dynamic_cast<ASTNodeTypeDecl *>(parseType(-1)); if (builtinType == nullptr || !Token::isUnsigned(builtinType->getType()))
if (sizeType == nullptr) throwParseError("invalid type used for pointer size", -1); throwParseError("invalid type used for pointer size", -1);
}
return new ASTNodePointerVariableDecl(name, pointerType, sizeType); return new ASTNodePointerVariableDecl(name, type, sizeType);
} }
// [(parsePadding)|(parseMemberVariable)|(parseMemberArrayVariable)|(parseMemberPointerVariable)] // [(parsePadding)|(parseMemberVariable)|(parseMemberArrayVariable)|(parseMemberPointerVariable)]
ASTNode* Parser::parseMember() { ASTNode* Parser::parseMember() {
ASTNode *member; ASTNode *member;
if (MATCHES(sequence(VALUETYPE_PADDING, SEPARATOR_SQUAREBRACKETOPEN)))
if (peek(KEYWORD_BE) || peek(KEYWORD_LE) || peek(VALUETYPE_ANY) || peek(IDENTIFIER)) {
// Some kind of variable definition
auto type = parseType();
if (MATCHES(sequence(IDENTIFIER, SEPARATOR_SQUAREBRACKETOPEN)) && sequence<Not>(SEPARATOR_SQUAREBRACKETOPEN))
member = parseMemberArrayVariable(type);
else if (MATCHES(sequence(IDENTIFIER)))
member = parseMemberVariable(type);
else if (MATCHES(sequence(OPERATOR_STAR, IDENTIFIER, OPERATOR_INHERIT)))
member = parseMemberPointerVariable(type);
}
else if (MATCHES(sequence(VALUETYPE_PADDING, SEPARATOR_SQUAREBRACKETOPEN)))
member = parsePadding(); member = parsePadding();
else if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(IDENTIFIER, SEPARATOR_SQUAREBRACKETOPEN) && sequence<Not>(SEPARATOR_SQUAREBRACKETOPEN)))
member = parseMemberArrayVariable();
else if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(IDENTIFIER)))
member = parseMemberVariable();
else if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(OPERATOR_STAR, IDENTIFIER, OPERATOR_INHERIT)))
member = parseMemberPointerVariable();
else if (MATCHES(sequence(KEYWORD_IF, SEPARATOR_ROUNDBRACKETOPEN))) else if (MATCHES(sequence(KEYWORD_IF, SEPARATOR_ROUNDBRACKETOPEN)))
return parseConditional(); return parseConditional();
else if (MATCHES(sequence(SEPARATOR_ENDOFPROGRAM))) else if (MATCHES(sequence(SEPARATOR_ENDOFPROGRAM)))
@ -686,8 +721,6 @@ namespace hex::lang {
if (this->m_types.contains(typeName)) if (this->m_types.contains(typeName))
throwParseError(hex::format("redefinition of type '{}'", typeName)); throwParseError(hex::format("redefinition of type '{}'", typeName));
this->m_types.insert({ typeName, new ASTNodeTypeDecl(typeName, nullptr) });
while (!MATCHES(sequence(SEPARATOR_CURLYBRACKETCLOSE))) { while (!MATCHES(sequence(SEPARATOR_CURLYBRACKETCLOSE))) {
structNode->addMember(parseMember()); structNode->addMember(parseMember());
} }
@ -706,8 +739,6 @@ namespace hex::lang {
if (this->m_types.contains(typeName)) if (this->m_types.contains(typeName))
throwParseError(hex::format("redefinition of type '{}'", typeName)); throwParseError(hex::format("redefinition of type '{}'", typeName));
this->m_types.insert({ typeName, new ASTNodeTypeDecl(typeName, nullptr) });
while (!MATCHES(sequence(SEPARATOR_CURLYBRACKETCLOSE))) { while (!MATCHES(sequence(SEPARATOR_CURLYBRACKETCLOSE))) {
unionNode->addMember(parseMember()); unionNode->addMember(parseMember());
} }
@ -719,14 +750,9 @@ namespace hex::lang {
// enum Identifier : (parseType) { <<Identifier|Identifier = (parseMathematicalExpression)[,]>...> } // enum Identifier : (parseType) { <<Identifier|Identifier = (parseMathematicalExpression)[,]>...> }
ASTNode* Parser::parseEnum() { ASTNode* Parser::parseEnum() {
std::string typeName; auto typeName = getNamespacePrefixedName(getValue<std::string>(-2));
if (peekOptional(KEYWORD_BE) || peekOptional(KEYWORD_LE))
typeName = getValue<std::string>(-5);
else
typeName = getValue<std::string>(-4);
auto underlyingType = dynamic_cast<ASTNodeTypeDecl*>(parseType(-2)); auto underlyingType = parseType();
if (underlyingType == nullptr) throwParseError("failed to parse type", -2);
if (underlyingType->getEndian().has_value()) throwParseError("underlying type may not have an endian specification", -2); if (underlyingType->getEndian().has_value()) throwParseError("underlying type may not have an endian specification", -2);
const auto enumNode = new ASTNodeEnum(underlyingType); const auto enumNode = new ASTNodeEnum(underlyingType);
@ -735,8 +761,6 @@ namespace hex::lang {
if (this->m_types.contains(typeName)) if (this->m_types.contains(typeName))
throwParseError(hex::format("redefinition of type '{}'", typeName)); throwParseError(hex::format("redefinition of type '{}'", typeName));
this->m_types.insert({ typeName, new ASTNodeTypeDecl(typeName, nullptr) });
ASTNode *lastEntry = nullptr; ASTNode *lastEntry = nullptr;
while (!MATCHES(sequence(SEPARATOR_CURLYBRACKETCLOSE))) { while (!MATCHES(sequence(SEPARATOR_CURLYBRACKETCLOSE))) {
if (MATCHES(sequence(IDENTIFIER, OPERATOR_ASSIGNMENT))) { if (MATCHES(sequence(IDENTIFIER, OPERATOR_ASSIGNMENT))) {
@ -784,8 +808,6 @@ namespace hex::lang {
if (this->m_types.contains(typeName)) if (this->m_types.contains(typeName))
throwParseError(hex::format("redefinition of type '{}'", typeName)); throwParseError(hex::format("redefinition of type '{}'", typeName));
this->m_types.insert({ typeName, new ASTNodeTypeDecl(typeName, nullptr) });
while (!MATCHES(sequence(SEPARATOR_CURLYBRACKETCLOSE))) { while (!MATCHES(sequence(SEPARATOR_CURLYBRACKETCLOSE))) {
if (MATCHES(sequence(IDENTIFIER, OPERATOR_INHERIT))) { if (MATCHES(sequence(IDENTIFIER, OPERATOR_INHERIT))) {
auto name = getValue<std::string>(-2); auto name = getValue<std::string>(-2);
@ -807,18 +829,19 @@ namespace hex::lang {
} }
// (parseType) Identifier @ Integer // (parseType) Identifier @ Integer
ASTNode* Parser::parseVariablePlacement() { ASTNode* Parser::parseVariablePlacement(ASTNodeTypeDecl *type) {
auto type = dynamic_cast<ASTNodeTypeDecl *>(parseType(-3)); auto name = getValue<std::string>(-1);
if (type == nullptr) throwParseError("invalid type used in variable declaration", -1);
return new ASTNodeVariableDecl(getValue<std::string>(-2), type, parseMathematicalExpression()); if (!MATCHES(sequence(OPERATOR_AT)))
throwParseError("expected placement instruction", -1);
auto placementOffset = parseMathematicalExpression();
return new ASTNodeVariableDecl(name, type, placementOffset);
} }
// (parseType) Identifier[[(parseMathematicalExpression)]] @ Integer // (parseType) Identifier[[(parseMathematicalExpression)]] @ Integer
ASTNode* Parser::parseArrayVariablePlacement() { ASTNode* Parser::parseArrayVariablePlacement(ASTNodeTypeDecl *type) {
auto type = dynamic_cast<ASTNodeTypeDecl *>(parseType(-3));
if (type == nullptr) throwParseError("invalid type used in variable declaration", -1);
auto name = getValue<std::string>(-2); auto name = getValue<std::string>(-2);
ASTNode *size = nullptr; ASTNode *size = nullptr;
@ -837,58 +860,115 @@ namespace hex::lang {
if (!MATCHES(sequence(OPERATOR_AT))) if (!MATCHES(sequence(OPERATOR_AT)))
throwParseError("expected placement instruction", -1); throwParseError("expected placement instruction", -1);
auto placementOffset = parseMathematicalExpression();
sizeCleanup.release(); sizeCleanup.release();
return new ASTNodeArrayVariableDecl(name, type, size, parseMathematicalExpression()); return new ASTNodeArrayVariableDecl(name, type, size, placementOffset);
} }
// (parseType) *Identifier : (parseType) @ Integer // (parseType) *Identifier : (parseType) @ Integer
ASTNode* Parser::parsePointerVariablePlacement() { ASTNode* Parser::parsePointerVariablePlacement(ASTNodeTypeDecl *type) {
auto name = getValue<std::string>(-2); auto name = getValue<std::string>(-2);
auto temporaryPointerType = dynamic_cast<ASTNodeTypeDecl *>(parseType(-4)); auto sizeType = parseType();
if (temporaryPointerType == nullptr) throwParseError("invalid type used in variable declaration", -1); auto sizeCleanup = SCOPE_GUARD { delete sizeType; };
if (!MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && sequence(VALUETYPE_UNSIGNED))) {
throwParseError("expected unsigned builtin type as size", -1); auto builtinType = dynamic_cast<ASTNodeBuiltinType*>(sizeType->getType());
auto temporaryPointerSizeType = dynamic_cast<ASTNodeTypeDecl *>(parseType(-1)); if (builtinType == nullptr || !Token::isUnsigned(builtinType->getType()))
if (temporaryPointerSizeType == nullptr) throwParseError("invalid size type used in pointer declaration", -1); throwParseError("invalid type used for pointer size", -1);
}
if (!MATCHES(sequence(OPERATOR_AT))) if (!MATCHES(sequence(OPERATOR_AT)))
throwParseError("expected placement instruction", -1); throwParseError("expected placement instruction", -1);
return new ASTNodePointerVariableDecl(name, temporaryPointerType, temporaryPointerSizeType, parseMathematicalExpression()); auto placementOffset = parseMathematicalExpression();
sizeCleanup.release();
return new ASTNodePointerVariableDecl(name, type, sizeType, placementOffset);
} }
std::vector<ASTNode*> Parser::parseNamespace() {
std::vector<ASTNode*> statements;
if (!MATCHES(sequence(IDENTIFIER)))
throwParseError("expected namespace identifier");
this->m_currNamespace.push_back(this->m_currNamespace.back());
while (true) {
this->m_currNamespace.back().push_back(getValue<std::string>(-1));
if (MATCHES(sequence(OPERATOR_SCOPERESOLUTION, IDENTIFIER)))
continue;
else
break;
}
if (!MATCHES(sequence(SEPARATOR_CURLYBRACKETOPEN)))
throwParseError("expected '{' at start of namespace");
while (!MATCHES(sequence(SEPARATOR_CURLYBRACKETCLOSE))) {
auto newStatements = parseStatements();
std::copy(newStatements.begin(), newStatements.end(), std::back_inserter(statements));
}
this->m_currNamespace.pop_back();
return statements;
}
ASTNode* Parser::parsePlacement() {
auto type = parseType();
if (MATCHES(sequence(IDENTIFIER, SEPARATOR_SQUAREBRACKETOPEN)))
return parseArrayVariablePlacement(type);
else if (MATCHES(sequence(IDENTIFIER)))
return parseVariablePlacement(type);
else if (MATCHES(sequence(OPERATOR_STAR, IDENTIFIER, OPERATOR_INHERIT)))
return parsePointerVariablePlacement(type);
else throwParseError("invalid sequence", 0);
}
/* Program */ /* Program */
// <(parseUsingDeclaration)|(parseVariablePlacement)|(parseStruct)> // <(parseUsingDeclaration)|(parseVariablePlacement)|(parseStruct)>
ASTNode* Parser::parseStatement() { std::vector<ASTNode*> Parser::parseStatements() {
ASTNode *statement; ASTNode *statement;
if (MATCHES(sequence(KEYWORD_USING, IDENTIFIER, OPERATOR_ASSIGNMENT) && (optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY))) if (MATCHES(sequence(KEYWORD_USING, IDENTIFIER, OPERATOR_ASSIGNMENT)))
statement = dynamic_cast<ASTNodeTypeDecl*>(parseUsingDeclaration()); statement = parseUsingDeclaration();
else if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(IDENTIFIER, SEPARATOR_SQUAREBRACKETOPEN))) else if (peek(IDENTIFIER)) {
statement = parseArrayVariablePlacement(); auto originalPos = this->m_curr;
else if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(IDENTIFIER, OPERATOR_AT))) this->m_curr++;
statement = parseVariablePlacement(); parseScopeResolution();
else if (MATCHES((optional(KEYWORD_BE), optional(KEYWORD_LE)) && variant(IDENTIFIER, VALUETYPE_ANY) && sequence(OPERATOR_STAR, IDENTIFIER, OPERATOR_INHERIT))) bool isFunction = peek(SEPARATOR_ROUNDBRACKETOPEN);
statement = parsePointerVariablePlacement(); this->m_curr = originalPos;
if (isFunction) {
this->m_curr++;
statement = parseFunctionCall();
}
else
statement = parsePlacement();
}
else if (peek(KEYWORD_BE) || peek(KEYWORD_LE) || peek(VALUETYPE_ANY))
statement = parsePlacement();
else if (MATCHES(sequence(KEYWORD_STRUCT, IDENTIFIER, SEPARATOR_CURLYBRACKETOPEN))) else if (MATCHES(sequence(KEYWORD_STRUCT, IDENTIFIER, SEPARATOR_CURLYBRACKETOPEN)))
statement = parseStruct(); statement = parseStruct();
else if (MATCHES(sequence(KEYWORD_UNION, IDENTIFIER, SEPARATOR_CURLYBRACKETOPEN))) else if (MATCHES(sequence(KEYWORD_UNION, IDENTIFIER, SEPARATOR_CURLYBRACKETOPEN)))
statement = parseUnion(); statement = parseUnion();
else if (MATCHES(sequence(KEYWORD_ENUM, IDENTIFIER, OPERATOR_INHERIT) && (optional(KEYWORD_BE), optional(KEYWORD_LE)) && sequence(VALUETYPE_UNSIGNED, SEPARATOR_CURLYBRACKETOPEN))) else if (MATCHES(sequence(KEYWORD_ENUM, IDENTIFIER, OPERATOR_INHERIT)))
statement = parseEnum(); statement = parseEnum();
else if (MATCHES(sequence(KEYWORD_BITFIELD, IDENTIFIER, SEPARATOR_CURLYBRACKETOPEN))) else if (MATCHES(sequence(KEYWORD_BITFIELD, IDENTIFIER, SEPARATOR_CURLYBRACKETOPEN)))
statement = parseBitfield(); statement = parseBitfield();
else if (MATCHES(sequence(IDENTIFIER, SEPARATOR_ROUNDBRACKETOPEN)))
statement = parseFunctionCall();
else if (MATCHES(sequence(KEYWORD_FUNCTION, IDENTIFIER, SEPARATOR_ROUNDBRACKETOPEN))) else if (MATCHES(sequence(KEYWORD_FUNCTION, IDENTIFIER, SEPARATOR_ROUNDBRACKETOPEN)))
statement = parseFunctionDefintion(); statement = parseFunctionDefintion();
else if (MATCHES(sequence(KEYWORD_NAMESPACE)))
return parseNamespace();
else throwParseError("invalid sequence", 0); else throwParseError("invalid sequence", 0);
if (MATCHES(sequence(SEPARATOR_SQUAREBRACKETOPEN, SEPARATOR_SQUAREBRACKETOPEN))) if (MATCHES(sequence(SEPARATOR_SQUAREBRACKETOPEN, SEPARATOR_SQUAREBRACKETOPEN)))
@ -897,18 +977,25 @@ namespace hex::lang {
if (!MATCHES(sequence(SEPARATOR_ENDOFEXPRESSION))) if (!MATCHES(sequence(SEPARATOR_ENDOFEXPRESSION)))
throwParseError("missing ';' at end of expression", -1); throwParseError("missing ';' at end of expression", -1);
if (auto typeDecl = dynamic_cast<ASTNodeTypeDecl*>(statement); typeDecl != nullptr) if (auto typeDecl = dynamic_cast<ASTNodeTypeDecl*>(statement); typeDecl != nullptr) {
this->m_types.insert({ typeDecl->getName().data(), typeDecl }); auto typeName = getNamespacePrefixedName(typeDecl->getName().data());
return statement; typeDecl->setName(typeName);
this->m_types.insert({ typeName, typeDecl });
}
return { statement };
} }
// <(parseStatement)...> EndOfProgram // <(parseNamespace)...> EndOfProgram
std::optional<std::vector<ASTNode*>> Parser::parse(const std::vector<Token> &tokens) { std::optional<std::vector<ASTNode*>> Parser::parse(const std::vector<Token> &tokens) {
this->m_curr = tokens.begin(); this->m_curr = tokens.begin();
this->m_types.clear(); this->m_types.clear();
this->m_currNamespace.clear();
this->m_currNamespace.emplace_back();
try { try {
auto program = parseTillToken(SEPARATOR_ENDOFPROGRAM); auto program = parseTillToken(SEPARATOR_ENDOFPROGRAM);

View File

@ -15,7 +15,7 @@ namespace hex {
static TextEditor::LanguageDefinition langDef; static TextEditor::LanguageDefinition langDef;
if (!initialized) { if (!initialized) {
static const char* const keywords[] = { static const char* const keywords[] = {
"using", "struct", "union", "enum", "bitfield", "be", "le", "if", "else", "false", "true", "parent", "addressof", "sizeof", "$", "while", "fn", "return" "using", "struct", "union", "enum", "bitfield", "be", "le", "if", "else", "false", "true", "parent", "addressof", "sizeof", "$", "while", "fn", "return", "namespace"
}; };
for (auto& k : keywords) for (auto& k : keywords)
langDef.mKeywords.insert(k); langDef.mKeywords.insert(k);