From 375794e344b8e22e6574a1812b28016047fa28ad Mon Sep 17 00:00:00 2001 From: yhirose Date: Fri, 7 Feb 2020 15:50:06 -0500 Subject: [PATCH] Support expression parsing in macro --- README.md | 74 ++++++++++++++++++------------- peglib.h | 102 ++++++++++++++++++++++++++++++------------- test/test2.cc | 118 ++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 232 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index cabc1cd..9f9994a 100644 --- a/README.md +++ b/README.md @@ -347,40 +347,54 @@ Parsing expressions by precedence climbing altorithm *cpp-peglib* supports [operator-precedence parsering](https://en.wikipedia.org/wiki/Operator-precedence_parser) by [**precedence climbing algorithm**](https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing) ```cpp - parser parser(R"( - EXPRESSION <- ATOM (OPERATOR ATOM)* { - precedence - L - + - L / * - } - ATOM <- NUMBER / '(' EXPRESSION ')' - OPERATOR <- < [-+/*] > - NUMBER <- < '-'? [0-9]+ > - %whitespace <- [ \t\r\n]* - )"); +parser parser(R"( + EXPRESSION <- PRECEDENCE_PARSING(ATOM, OPERATOR) + PRECEDENCE_PARSING(A, O) <- A (O A)* { + precedence + L + - + L * / + } + ATOM <- NUMBER / '(' EXPRESSION ')' + OPERATOR <- < [-+/*] > + NUMBER <- < '-'? [0-9]+ > + %whitespace <- [ \t]* +)"); - parser["EXPRESSION"] = [](const SemanticValues& sv) -> long { - auto result = any_cast(sv[0]); - if (sv.size() > 1) { - auto ope = any_cast(sv[1]); - auto num = any_cast(sv[2]); - switch (ope) { - case '+': result += num; break; - case '-': result -= num; break; - case '*': result *= num; break; - case '/': result /= num; break; - } - } - return result; - }; - parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; - parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; +parser["PRECEDENCE_PARSING"] = [](const SemanticValues& sv) -> long { + auto result = any_cast(sv[0]); + if (sv.size() > 1) { + auto ope = any_cast(sv[1]); + auto num = any_cast(sv[2]); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; +}; +parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; +parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; - long val; - parser.parse(" -1 + (1 + 2) * 3 - -1", val); - assert(val == 9); +long val; +parser.parse(" -1 + (1 + 2) * 3 - -1", val); +assert(val == 9); ``` +*precedence* instruction can be applied only to the following 'list' style rule. + +``` +R <- A (B A)* { + precedence + L - + + L / * + R ^ +} +``` + +*precedence* instruction contains precedence info entries. Each entry starts with *associativity* which is 'L' (left) or 'R' (right), then operator tokens follow. The first entry has the highest order. + AST generation -------------- diff --git a/peglib.h b/peglib.h index d706b83..b433912 100644 --- a/peglib.h +++ b/peglib.h @@ -707,8 +707,7 @@ private: } template - Fty make_adaptor(F fn, - R (F::*)(const SemanticValues &sv, any &dt) const) { + Fty make_adaptor(F fn, R (F::*)(const SemanticValues &sv, any &dt) const) { return TypeAdaptor_csv_dt(fn); } @@ -1536,8 +1535,8 @@ public: PrecedenceClimbing(const std::shared_ptr &atom, const std::shared_ptr &binop, const BinOpeInfo &info, - const Action &action) - : atom_(atom), binop_(binop), info_(info), action_(action) {} + const Definition &rule) + : atom_(atom), binop_(binop), info_(info), rule_(rule) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override { @@ -1549,11 +1548,13 @@ public: std::shared_ptr atom_; std::shared_ptr binop_; BinOpeInfo info_; - const Action &action_; + const Definition &rule_; private: size_t parse_expression(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt, size_t min_prec) const; + + Definition &get_reference_for_binop(Context &c) const; }; /* @@ -1660,8 +1661,8 @@ inline std::shared_ptr bkr(const std::string &name) { inline std::shared_ptr pre(const std::shared_ptr &atom, const std::shared_ptr &binop, const PrecedenceClimbing::BinOpeInfo &info, - const Action &action) { - return std::make_shared(atom, binop, info, action); + const Definition &rule) { + return std::make_shared(atom, binop, info, rule); } /* @@ -2281,6 +2282,8 @@ public: } std::string name; + const char *s = nullptr; + ; size_t id = 0; Action action; std::function enter; @@ -2584,6 +2587,18 @@ inline size_t BackReference::parse_core(const char *s, size_t n, throw std::runtime_error("Invalid back reference..."); } +inline Definition& PrecedenceClimbing::get_reference_for_binop(Context &c) const { + if (rule_.is_macro) { + // Reference parameter in macro + const auto &args = c.top_args(); + auto iarg = dynamic_cast(*binop_).iarg_; + auto arg = args[iarg]; + return *dynamic_cast(*arg).rule_; + } + + return *dynamic_cast(*binop_).rule_; +} + inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt, @@ -2592,10 +2607,11 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n, if (fail(len)) { return len; } std::string tok; - auto &rule = dynamic_cast(*binop_).rule_; - auto action = rule->action; + //auto &rule = dynamic_cast(*binop_).rule_; + auto &rule = get_reference_for_binop(c); + auto action = rule.action; - rule->action = [&](SemanticValues &sv, any &dt) -> any { + rule.action = [&](SemanticValues &sv, any &dt) -> any { tok = sv.token(); if (action) { return action(sv, dt); @@ -2604,7 +2620,7 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n, } return any(); }; - auto action_se = make_scope_exit([&]() { rule->action = action; }); + auto action_se = make_scope_exit([&]() { rule.action = action; }); auto save_error_pos = c.error_pos; @@ -2651,10 +2667,10 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n, i += chl; any val; - if (action_) { + if (rule_.action) { sv.s_ = s; sv.n_ = i; - val = action_(sv, dt); + val = rule_.action(sv, dt); } else if (!sv.empty()) { val = sv[0]; } @@ -3018,6 +3034,7 @@ private: auto &rule = grammar[name]; rule <= ope; rule.name = name; + rule.s = sv.c_str(); rule.ignoreSemanticValue = ignore; rule.is_macro = is_macro; rule.params = params; @@ -3227,6 +3244,45 @@ private: g["PrecedenceAssoc"] = [](const SemanticValues &sv) { return sv.token(); }; } + bool apply_precedence_instruction(Definition &rule, + const PrecedenceClimbing::BinOpeInfo &info, + const char *s, Log log) { + try { + auto &seq = dynamic_cast(*rule.get_core_operator()); + auto atom = seq.opes_[0]; + auto &seq1 = dynamic_cast( + *dynamic_cast(*seq.opes_[1]).ope_); + auto binop = seq1.opes_[0]; + auto atom1 = seq1.opes_[1]; + + auto atom_name = dynamic_cast(*atom).name_; + auto binop_name = dynamic_cast(*binop).name_; + auto atom1_name = dynamic_cast(*atom1).name_; + + if (atom_name != atom1_name || atom_name == binop_name) { + if (log) { + auto line = line_info(s, rule.s); + log(line.first, line.second, + "'precedence' instruction cannt be applied to '" + rule.name + + "'."); + } + return false; + } + + rule.holder_->ope_ = pre(atom, binop, info, rule); + rule.disable_action = true; + } catch (...) { + if (log) { + auto line = line_info(s, rule.s); + log(line.first, line.second, + "'precedence' instruction cannt be applied to '" + rule.name + + "'."); + } + return false; + } + return true; + } + std::shared_ptr perform_core(const char *s, size_t n, const Rules &rules, std::string &start, Log log) { @@ -3318,7 +3374,6 @@ private: log(line.first, line.second, "'" + name + "' is left recursive."); } ret = false; - ; } } @@ -3363,26 +3418,15 @@ private: for (const auto &item : data.instructions) { const auto &name = item.first; const auto &instruction = item.second; + auto &rule = grammar[name]; if (instruction.type == "precedence") { - auto &rule = grammar[name]; - - auto &seq = dynamic_cast(*rule.get_core_operator()); - auto &atom = seq.opes_[0]; - auto &seq1 = dynamic_cast( - *dynamic_cast(*seq.opes_[1]).ope_); - auto &binop = seq1.opes_[0]; - auto &atom1 = seq1.opes_[1]; - - if (atom != atom1) { - // TODO: check - } - const auto &info = any_cast(instruction.data); - rule.holder_->ope_ = pre(atom, binop, info, rule.action); - rule.disable_action = true; + if (!apply_precedence_instruction(rule, info, s, log)) { + return nullptr; + } } } diff --git a/test/test2.cc b/test/test2.cc index 0947d9f..bb342ff 100644 --- a/test/test2.cc +++ b/test/test2.cc @@ -100,9 +100,7 @@ TEST_CASE("Not infinite 3", "[infinite loop]") TEST_CASE("Precedence climbing", "[precedence]") { - // Create a PEG parser parser parser(R"( - # Grammar for simple calculator... START <- _ EXPRESSION EXPRESSION <- ATOM (OPERATOR ATOM)* { precedence @@ -156,8 +154,122 @@ TEST_CASE("Precedence climbing", "[precedence]") } } -TEST_CASE("Packrat parser test with %whitespace%", "[packrat]") +TEST_CASE("Precedence climbing with macro", "[precedence]") { + // Create a PEG parser + parser parser(R"( + EXPRESSION <- PRECEDENCE_PARSING(ATOM, OPERATOR) + PRECEDENCE_PARSING(A, O) <- A (O A)* { + precedence + L + - + L * / + } + ATOM <- NUMBER / '(' EXPRESSION ')' + OPERATOR <- < [-+/*] > + NUMBER <- < '-'? [0-9]+ > + %whitespace <- [ \t]* + )"); + + bool ret = parser; + REQUIRE(ret == true); + + // Setup actions + parser["PRECEDENCE_PARSING"] = [](const SemanticValues& sv) -> long { + auto result = any_cast(sv[0]); + if (sv.size() > 1) { + auto ope = any_cast(sv[1]); + auto num = any_cast(sv[2]); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + }; + parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; + parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; + + { + auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "; + long val = 0; + ret = parser.parse(expr, val); + + REQUIRE(ret == true); + REQUIRE(val == -3); + } + + { + auto expr = "-1+-2--3"; // -1 + -2 - -3 = 0 + long val = 0; + ret = parser.parse(expr, val); + + REQUIRE(ret == true); + REQUIRE(val == 0); + } +} + +TEST_CASE("Precedence climbing error1", "[precedence]") +{ + parser parser(R"( + START <- _ EXPRESSION + EXPRESSION <- ATOM (OPERATOR ATOM1)* { + precedence + L + - + L * / + } + ATOM <- NUMBER / T('(') EXPRESSION T(')') + ATOM1 <- NUMBER / T('(') EXPRESSION T(')') + OPERATOR <- T([-+/*]) + NUMBER <- T('-'? [0-9]+) + ~_ <- [ \t]* + T(S) <- < S > _ + )"); + + bool ret = parser; + REQUIRE(ret == false); +} + +TEST_CASE("Precedence climbing error2", "[precedence]") +{ + parser parser(R"( + START <- _ EXPRESSION + EXPRESSION <- ATOM OPERATOR ATOM { + precedence + L + - + L * / + } + ATOM <- NUMBER / T('(') EXPRESSION T(')') + OPERATOR <- T([-+/*]) + NUMBER <- T('-'? [0-9]+) + ~_ <- [ \t]* + T(S) <- < S > _ + )"); + + bool ret = parser; + REQUIRE(ret == false); +} + +TEST_CASE("Precedence climbing error3", "[precedence]") { + parser parser(R"( + EXPRESSION <- PRECEDENCE_PARSING(ATOM, OPERATOR) + PRECEDENCE_PARSING(A, O) <- A (O A)+ { + precedence + L + - + L * / + } + ATOM <- NUMBER / '(' EXPRESSION ')' + OPERATOR <- < [-+/*] > + NUMBER <- < '-'? [0-9]+ > + %whitespace <- [ \t]* + )"); + + bool ret = parser; + REQUIRE(ret == false); +} + +TEST_CASE("Packrat parser test with %whitespace%", "[packrat]") { peg::parser parser(R"( ROOT <- 'a' %whitespace <- SPACE*