From e56062715ac3ef47f99ed9e53fb7031dc7b9ad22 Mon Sep 17 00:00:00 2001 From: yhirose Date: Thu, 4 Jun 2015 11:41:14 -0400 Subject: [PATCH] Improved AST support. --- example/calc3.cc | 96 ++++++++++++---------------------------------- language/parser.cc | 51 ++++++++++-------------- peglib.h | 66 +++++++++++++------------------ test/test.cc | 43 +++++++++++++++++++++ test/test.sln | 3 -- 5 files changed, 115 insertions(+), 144 deletions(-) diff --git a/example/calc3.cc b/example/calc3.cc index a001778..6169345 100644 --- a/example/calc3.cc +++ b/example/calc3.cc @@ -12,71 +12,6 @@ using namespace peglib; using namespace std; -template -static U reduce(T i, T end, U val, F f) { - if (i == end) { - return val; - } - tie(val, i) = f(val, i); - return reduce(i, end, val, f); -}; - -struct ast_node -{ - virtual ~ast_node() = default; - virtual long eval() = 0; -}; - -struct ast_ope : public ast_node -{ - ast_ope(char ope, shared_ptr left, shared_ptr right) - : ope_(ope), left_(left), right_(right) {} - - long eval() override { - switch (ope_) { - case '+': return left_->eval() + right_->eval(); - case '-': return left_->eval() - right_->eval(); - case '*': return left_->eval() * right_->eval(); - case '/': return left_->eval() / right_->eval(); - } - assert(false); - return 0; - }; - - static shared_ptr create(const SemanticValues& sv) { - assert(!sv.empty()); - return reduce( - sv.begin() + 1, - sv.end(), - sv[0].get>(), - [](shared_ptr r, SemanticValues::const_iterator i) { - auto ope = (i++)->val.get(); - auto nd = (i++)->val.get>(); - r = make_shared(ope, r, nd); - return make_tuple(r, i); - }); - } - -private: - char ope_; - shared_ptr left_; - shared_ptr right_; -}; - -struct ast_num : public ast_node -{ - ast_num(long num) : num_(num) {} - - long eval() override { return num_; }; - - static shared_ptr create(const char* s, size_t n) { - return make_shared(atol(s)); - } - -private: - long num_; -}; - int main(int argc, const char** argv) { if (argc < 2 || string("--help") == argv[1]) { @@ -84,6 +19,26 @@ int main(int argc, const char** argv) return 1; } + function eval = [&](const Ast& ast) { + if (ast.name == "NUMBER") { + return stol(ast.token); + } else { + const auto& nodes = ast.nodes; + auto result = eval(*nodes[0]); + for (auto i = 1u; i < nodes.size(); i += 2) { + auto num = eval(*nodes[i + 1]); + auto ope = nodes[i]->token[0]; + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + } + }; + peg parser( " EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* " " TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* " @@ -94,16 +49,13 @@ int main(int argc, const char** argv) " ~_ <- [ \t\r\n]* " ); - parser["EXPRESSION"] = ast_ope::create; - parser["TERM"] = ast_ope::create; - parser["TERM_OPERATOR"] = [](const char* s, size_t n) { return *s; }; - parser["FACTOR_OPERATOR"] = [](const char* s, size_t n) { return *s; }; - parser["NUMBER"] = ast_num::create; + parser.enable_ast(); auto expr = argv[1]; - shared_ptr ast; + shared_ptr ast; if (parser.parse(expr, ast)) { - cout << expr << " = " << ast->eval() << endl; + ast->print(); + cout << expr << " = " << eval(*ast) << endl; return 0; } diff --git a/language/parser.cc b/language/parser.cc index ce8ae4c..2e72d90 100644 --- a/language/parser.cc +++ b/language/parser.cc @@ -72,36 +72,27 @@ peg& get_parser() throw logic_error("invalid peg grammar"); } - parser.ast({ - { peg::AstNodeType::Regular, "STATEMENTS", Statements }, - { peg::AstNodeType::Regular, "WHILE", While }, - { peg::AstNodeType::Regular, "ASSIGNMENT", Assignment }, - { peg::AstNodeType::Regular, "IF", If }, - { peg::AstNodeType::Regular, "FUNCTION", Function }, - { peg::AstNodeType::Regular, "PARAMETERS", Undefined }, - { peg::AstNodeType::Regular, "FUNCTION_CALL", FunctionCall }, - { peg::AstNodeType::Regular, "ARGUMENTS", Undefined }, - { peg::AstNodeType::Optimizable, "PRIMARY", LogicalOr }, - { peg::AstNodeType::Optimizable, "LOGICAL_OR", LogicalAnd }, - { peg::AstNodeType::Optimizable, "LOGICAL_AND", Condition }, - { peg::AstNodeType::Optimizable, "CONDITION", BinExpresion }, - { peg::AstNodeType::Optimizable, "TERM", UnaryPlus }, - { peg::AstNodeType::Optimizable, "UNARY_PLUS", UnaryMinus }, - { peg::AstNodeType::Optimizable, "UNARY_MINUS", UnaryNot }, - { peg::AstNodeType::Optimizable, "UNARY_NOT", BinExpresion }, - { peg::AstNodeType::Token, "CONDITION_OPERATOR", Undefined }, - { peg::AstNodeType::Token, "TERM_OPERATOR", Undefined }, - { peg::AstNodeType::Token, "UNARY_PLUS_OPERATOR", Undefined }, - { peg::AstNodeType::Token, "UNARY_MINUS_OPERATOR", Undefined }, - { peg::AstNodeType::Token, "UNARY_NOT_OPERATOR", Undefined }, - { peg::AstNodeType::Token, "FACTOR_OPERATOR", Undefined }, - { peg::AstNodeType::Token, "NUMBER", Number }, - { peg::AstNodeType::Token, "BOOLEAN", Boolean }, - { peg::AstNodeType::Token, "STRING", Undefined }, - { peg::AstNodeType::Token, "IDENTIFIER", Identifier }, - { peg::AstNodeType::Regular, "INTERPOLATED_STRING", InterpolatedString }, - { peg::AstNodeType::Token, "INTERPOLATED_CONTENT", Undefined }, - { peg::AstNodeType::Token, "MUTABLE", Undefined }, + parser.enable_ast({ + { "STATEMENTS", Statements }, + { "WHILE", While }, + { "ASSIGNMENT", Assignment }, + { "IF", If }, + { "FUNCTION", Function }, + { "PARAMETERS", Undefined }, + { "FUNCTION_CALL", FunctionCall }, + { "ARGUMENTS", Undefined }, + { "PRIMARY", LogicalOr, true }, + { "LOGICAL_OR", LogicalAnd, true }, + { "LOGICAL_AND", Condition, true }, + { "CONDITION", BinExpresion, true }, + { "TERM", UnaryPlus, true }, + { "UNARY_PLUS", UnaryMinus, true }, + { "UNARY_MINUS", UnaryNot, true }, + { "UNARY_NOT", BinExpresion, true }, + { "NUMBER", Number }, + { "BOOLEAN", Boolean }, + { "IDENTIFIER", Identifier }, + { "INTERPOLATED_STRING", InterpolatedString }, }, Undefined); } diff --git a/peglib.h b/peglib.h index 8fba467..36e9922 100644 --- a/peglib.h +++ b/peglib.h @@ -180,8 +180,10 @@ struct SemanticValues : protected std::vector const char* s; size_t n; size_t choice; + bool has_anchor; + bool is_leaf; - SemanticValues() : s(nullptr), n(0), choice(0) {} + SemanticValues() : s(nullptr), n(0), choice(0), has_anchor(false), is_leaf(true) {} std::string str(size_t i = 0) const { if (i > 0) { @@ -190,6 +192,10 @@ struct SemanticValues : protected std::vector return std::string(s, n); } + bool is_token() const { + return has_anchor || is_leaf; + } + typedef SemanticValue T; using std::vector::iterator; using std::vector::const_iterator; @@ -515,6 +521,8 @@ struct Context } sv.s = nullptr; sv.n = 0; + sv.has_anchor = false; + sv.is_leaf = true; return sv; } @@ -618,6 +626,8 @@ public: sv.s = chldsv.s; sv.n = chldsv.n; sv.choice = id; + sv.has_anchor = chldsv.has_anchor; + sv.is_leaf = chldsv.is_leaf; c.pop(); return len; } @@ -880,6 +890,7 @@ public: if (success(len)) { sv.s = s; sv.n = len; + sv.has_anchor = true; } return len; } @@ -1286,6 +1297,7 @@ inline any Holder::reduce(const SemanticValues& sv, any& dt, const Action& actio inline size_t DefinitionReference::parse( const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const { + sv.is_leaf = false; const auto& rule = *get_rule(); return rule.parse(s, n, sv, c, dt); } @@ -2013,33 +2025,15 @@ public: } } - enum AstNodeType { - Regular, - Optimizable, - Token - }; - struct AstNodeInfo { - AstNodeType type; const char* name; int tag; + bool optimize; }; - peg& ast(std::initializer_list list, int tag) { + peg& enable_ast(std::initializer_list list = {}, int tag = -1) { for (const auto& info: list) { - switch (info.type) { - case Regular: - ast_node(info.name, info.tag); - break; - case Optimizable: - ast_node_optimizable(info.name, info.tag); - break; - case Token: - ast_token(info.name, info.tag); - break; - default: - throw std::logic_error("Invalid Ast type was used..."); - } + ast_node(info); } ast_end(tag); return *this; @@ -2061,25 +2055,16 @@ private: } } - void ast_node(const char* name, int tag) { - (*this)[name] = [=](const SemanticValues& sv) { - return std::make_shared(name, tag, sv.map>()); - }; - } - - void ast_node_optimizable(const char* name, int tag) { - (*this)[name] = [=](const SemanticValues& sv) { - if (sv.size() == 1) { + void ast_node(const AstNodeInfo& info) { + (*this)[info.name] = [info](const SemanticValues& sv) { + if (sv.is_token()) { + return std::make_shared(info.name, info.tag, std::string(sv.s, sv.n)); + } + if (info.optimize && sv.size() == 1) { std::shared_ptr ast = sv[0].get>(); return ast; } - return std::make_shared(name, tag, sv.map>()); - }; - } - - void ast_token(const char* name, int tag) { - (*this)[name] = [=](const SemanticValues& sv) { - return std::make_shared(name, tag, std::string(sv.s, sv.n)); + return std::make_shared(info.name, info.tag, sv.map>()); }; } @@ -2089,7 +2074,10 @@ private: auto& def = x.second; auto& action = def.actions.front(); if (!action) { - action = [&](const SemanticValues& sv) { + action = [tag, name](const SemanticValues& sv) { + if (sv.is_token()) { + return std::make_shared(name.c_str(), tag, std::string(sv.s, sv.n)); + } if (sv.size() == 1) { std::shared_ptr ast = sv[0].get>(); return ast; diff --git a/test/test.cc b/test/test.cc index 316dd5b..f698b6e 100644 --- a/test/test.cc +++ b/test/test.cc @@ -422,6 +422,49 @@ TEST_CASE("Calculator test3", "[general]") REQUIRE(val == -3); } +TEST_CASE("Calculator test with AST", "[general]") +{ + peg parser( + " EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* " + " TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* " + " FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ " + " TERM_OPERATOR <- < [-+] > _ " + " FACTOR_OPERATOR <- < [/*] > _ " + " NUMBER <- < [0-9]+ > _ " + " ~_ <- [ \t\r\n]* " + ); + + const int kTagNumber = 0; + parser.enable_ast({ { "NUMBER", kTagNumber } }); + + function eval = [&](const Ast& ast) { + if (ast.tag == kTagNumber) { + return stol(ast.token); + } else { + const auto& nodes = ast.nodes; + auto result = eval(*nodes[0]); + for (auto i = 1u; i < nodes.size(); i += 2) { + auto num = eval(*nodes[i + 1]); + auto ope = nodes[i]->token[0]; + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + } + }; + + shared_ptr ast; + auto ret = parser.parse("1+2*3*(4-5+6)/7-8", ast); + auto val = eval(*ast); + + REQUIRE(ret == true); + REQUIRE(val == -3); +} + TEST_CASE("Predicate test", "[general]") { peg parser("NUMBER <- [0-9]+"); diff --git a/test/test.sln b/test/test.sln index 314469f..1d496fe 100644 --- a/test/test.sln +++ b/test/test.sln @@ -25,7 +25,4 @@ Global GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection - GlobalSection(Performance) = preSolution - HasPerformanceSessions = true - EndGlobalSection EndGlobal