From 038bc063435f8b18f2dddec44dc87368a0839096 Mon Sep 17 00:00:00 2001 From: yhirose Date: Wed, 5 Feb 2020 16:03:02 -0500 Subject: [PATCH] Precedence climbing feature support --- peglib.h | 265 ++++++++++++++++++++++++++++++++++++++++++++++----- test/test.cc | 60 ++++++++++++ 2 files changed, 299 insertions(+), 26 deletions(-) diff --git a/peglib.h b/peglib.h index cfe0c71..9bd8c03 100644 --- a/peglib.h +++ b/peglib.h @@ -532,6 +532,7 @@ private: friend class Sequence; friend class PrioritizedChoice; friend class Holder; + friend class PrecedenceClimbing; const char *s_ = nullptr; size_t n_ = 0; @@ -671,63 +672,63 @@ private: typedef std::function Fty; template - Fty make_adaptor(F fn, R (F::* /*mf*/)(SemanticValues &sv) const) { + Fty make_adaptor(F fn, R (F::*)(SemanticValues &sv) const) { return TypeAdaptor_sv(fn); } template - Fty make_adaptor(F fn, R (F::* /*mf*/)(const SemanticValues &sv) const) { + Fty make_adaptor(F fn, R (F::*)(const SemanticValues &sv) const) { return TypeAdaptor_csv(fn); } template - Fty make_adaptor(F fn, R (F::* /*mf*/)(SemanticValues &sv)) { + Fty make_adaptor(F fn, R (F::*)(SemanticValues &sv)) { return TypeAdaptor_sv(fn); } template - Fty make_adaptor(F fn, R (F::* /*mf*/)(const SemanticValues &sv)) { + Fty make_adaptor(F fn, R (F::*)(const SemanticValues &sv)) { return TypeAdaptor_csv(fn); } template - Fty make_adaptor(F fn, R (*/*mf*/)(SemanticValues &sv)) { + Fty make_adaptor(F fn, R (*)(SemanticValues &sv)) { return TypeAdaptor_sv(fn); } template - Fty make_adaptor(F fn, R (*/*mf*/)(const SemanticValues &sv)) { + Fty make_adaptor(F fn, R (*)(const SemanticValues &sv)) { return TypeAdaptor_csv(fn); } template - Fty make_adaptor(F fn, R (F::* /*mf*/)(SemanticValues &sv, any &dt) const) { + Fty make_adaptor(F fn, R (F::*)(SemanticValues &sv, any &dt) const) { return TypeAdaptor_sv_dt(fn); } template Fty make_adaptor(F fn, - R (F::* /*mf*/)(const SemanticValues &sv, any &dt) const) { + R (F::*)(const SemanticValues &sv, any &dt) const) { return TypeAdaptor_csv_dt(fn); } template - Fty make_adaptor(F fn, R (F::* /*mf*/)(SemanticValues &sv, any &dt)) { + Fty make_adaptor(F fn, R (F::*)(SemanticValues &sv, any &dt)) { return TypeAdaptor_sv_dt(fn); } template - Fty make_adaptor(F fn, R (F::* /*mf*/)(const SemanticValues &sv, any &dt)) { + Fty make_adaptor(F fn, R (F::*)(const SemanticValues &sv, any &dt)) { return TypeAdaptor_csv_dt(fn); } template - Fty make_adaptor(F fn, R (*/*mf*/)(SemanticValues &sv, any &dt)) { + Fty make_adaptor(F fn, R (*)(SemanticValues &sv, any &dt)) { return TypeAdaptor_sv_dt(fn); } template - Fty make_adaptor(F fn, R (*/*mf*/)(const SemanticValues &sv, any &dt)) { + Fty make_adaptor(F fn, R (*)(const SemanticValues &sv, any &dt)) { return TypeAdaptor_csv_dt(fn); } @@ -1529,6 +1530,32 @@ public: std::string name_; }; +class PrecedenceClimbing : public Ope { +public: + using BinOpeInfo = std::map>; + + PrecedenceClimbing(const std::shared_ptr &atom, + const std::shared_ptr &binop, const BinOpeInfo &info, + const Action &action) + : atom_(atom), binop_(binop), info_(info), action_(action) {} + + size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, + any &dt) const override { + return parse_expression(s, n, sv, c, dt, 0); + } + + void accept(Visitor &v) override; + + std::shared_ptr atom_; + std::shared_ptr binop_; + BinOpeInfo info_; + const Action &action_; + +private: + size_t parse_expression(const char *s, size_t n, SemanticValues &sv, + Context &c, any &dt, size_t min_prec) const; +}; + /* * Factories */ @@ -1630,6 +1657,13 @@ inline std::shared_ptr bkr(const std::string &name) { return std::make_shared(name); } +inline std::shared_ptr pre(const std::shared_ptr &atom, + const std::shared_ptr &binop, + const PrecedenceClimbing::BinOpeInfo &info, + const Action &action) { + return std::make_shared(atom, binop, info, action); +} + /* * Visitor */ @@ -1656,6 +1690,7 @@ struct Ope::Visitor { virtual void visit(Reference & /*ope*/) {} virtual void visit(Whitespace & /*ope*/) {} virtual void visit(BackReference & /*ope*/) {} + virtual void visit(PrecedenceClimbing & /*ope*/) {} }; struct IsReference : public Ope::Visitor { @@ -1685,6 +1720,7 @@ struct TraceOpeName : public Ope::Visitor { void visit(Reference &ope) override { name = "Reference"; } void visit(Whitespace &ope) override { name = "Whitespace"; } void visit(BackReference &ope) override { name = "BackReference"; } + void visit(PrecedenceClimbing &ope) override { name = "PrecedenceClimbing"; } const char *name = nullptr; }; @@ -1758,6 +1794,7 @@ struct TokenChecker : public Ope::Visitor { void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); } void visit(Reference &ope) override; void visit(Whitespace &ope) override { ope.ope_->accept(*this); } + void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } static bool is_token(Ope &ope) { if (IsLiteralToken::check(ope)) { return true; } @@ -1829,6 +1866,7 @@ struct DetectLeftRecursion : public Ope::Visitor { void visit(Reference &ope) override; void visit(Whitespace &ope) override { ope.ope_->accept(*this); } void visit(BackReference & /*ope*/) override { done_ = true; } + void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } const char *error_s = nullptr; @@ -1878,6 +1916,7 @@ struct HasEmptyElement : public Ope::Visitor { void visit(Holder &ope) override { ope.ope_->accept(*this); } void visit(Reference &ope) override; void visit(Whitespace &ope) override { ope.ope_->accept(*this); } + void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } bool is_empty = false; const char *error_s = nullptr; @@ -1938,6 +1977,7 @@ struct DetectInfiniteLoop : public Ope::Visitor { void visit(Holder &ope) override { ope.ope_->accept(*this); } void visit(Reference &ope) override; void visit(Whitespace &ope) override { ope.ope_->accept(*this); } + void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } bool has_error = false; const char *error_s = nullptr; @@ -1975,6 +2015,7 @@ struct ReferenceChecker : public Ope::Visitor { void visit(Holder &ope) override { ope.ope_->accept(*this); } void visit(Reference &ope) override; void visit(Whitespace &ope) override { ope.ope_->accept(*this); } + void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } std::unordered_map error_s; std::unordered_map error_message; @@ -2011,6 +2052,7 @@ struct LinkReferences : public Ope::Visitor { void visit(Holder &ope) override { ope.ope_->accept(*this); } void visit(Reference &ope) override; void visit(Whitespace &ope) override { ope.ope_->accept(*this); } + void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } private: Grammar &grammar_; @@ -2089,6 +2131,10 @@ struct FindReference : public Ope::Visitor { ope.ope_->accept(*this); found_ope = wsp(found_ope); } + void visit(PrecedenceClimbing &ope) override { + ope.atom_->accept(*this); + found_ope = csc(found_ope); + } std::shared_ptr found_ope; @@ -2250,9 +2296,11 @@ public: std::vector params; TracerEnter tracer_enter; TracerLeave tracer_leave; + bool disable_action = false; private: friend class Reference; + friend class ParserGenerator; Definition &operator=(const Definition &rhs); Definition &operator=(Definition &&rhs); @@ -2471,7 +2519,7 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &sv, } inline any Holder::reduce(SemanticValues &sv, any &dt) const { - if (outer_->action) { + if (outer_->action && !outer_->disable_action) { return outer_->action(sv, dt); } else if (sv.empty()) { return any(); @@ -2523,7 +2571,8 @@ inline std::shared_ptr Reference::get_core_operator() const { inline size_t BackReference::parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const { - for (int i = c.capture_scope_stack_size - 1; i >= 0; i--) { + auto size = static_cast(c.capture_scope_stack_size); + for (auto i = size - 1; i >= 0; i--) { const auto &cs = c.capture_scope_stack[i]; if (cs.find(name_) != cs.end()) { const auto &lit = cs.at(name_); @@ -2535,6 +2584,87 @@ inline size_t BackReference::parse_core(const char *s, size_t n, throw std::runtime_error("Invalid back reference..."); } +inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n, + SemanticValues &sv, + Context &c, any &dt, + size_t min_prec) const { + auto len = atom_->parse(s, n, sv, c, dt); + if (fail(len)) { return len; } + + std::string tok; + auto &rule = dynamic_cast(*binop_).rule_; + auto action = rule->action; + + rule->action = [&](SemanticValues &sv, any &dt) -> any { + tok = sv.token(); + if (action) { + return action(sv, dt); + } else if (!sv.empty()) { + return sv[0]; + } + return any(); + }; + auto action_se = make_scope_exit([&]() { rule->action = action; }); + + auto save_error_pos = c.error_pos; + + auto i = len; + while (i < n) { + std::vector save_values(sv.begin(), sv.end()); + auto save_tokens = sv.tokens; + + auto chv = c.push(); + auto chl = binop_->parse(s + i, n - i, chv, c, dt); + c.pop(); + + if (fail(chl)) { + c.error_pos = save_error_pos; + break; + } + + auto it = info_.find(tok); + if (it == info_.end()) { break; } + + auto level = std::get<0>(it->second); + auto assoc = std::get<1>(it->second); + + if (level < min_prec) { break; } + + sv.emplace_back(std::move(chv[0])); + i += chl; + + auto next_min_prec = level; + if (assoc == 'L') { next_min_prec = level + 1; } + + chv = c.push(); + chl = parse_expression(s + i, n - i, chv, c, dt, next_min_prec); + c.pop(); + + if (fail(chl)) { + sv.assign(save_values.begin(), save_values.end()); + sv.tokens = save_tokens; + c.error_pos = save_error_pos; + break; + } + + sv.emplace_back(std::move(chv[0])); + i += chl; + + any val; + if (action_) { + sv.s_ = s; + sv.n_ = i; + val = action_(sv, dt); + } else if (!sv.empty()) { + val = sv[0]; + } + sv.clear(); + sv.emplace_back(std::move(val)); + } + + return i; +} + inline void Sequence::accept(Visitor &v) { v.visit(*this); } inline void PrioritizedChoice::accept(Visitor &v) { v.visit(*this); } inline void ZeroOrMore::accept(Visitor &v) { v.visit(*this); } @@ -2556,6 +2686,7 @@ inline void Holder::accept(Visitor &v) { v.visit(*this); } inline void Reference::accept(Visitor &v) { v.visit(*this); } inline void Whitespace::accept(Visitor &v) { v.visit(*this); } inline void BackReference::accept(Visitor &v) { v.visit(*this); } +inline void PrecedenceClimbing::accept(Visitor &v) { v.visit(*this); } inline void AssignIDToDefinition::visit(Holder &ope) { auto p = static_cast(ope.outer_); @@ -2717,11 +2848,17 @@ private: setup_actions(); } + struct Instruction { + std::string type; + any data; + }; + struct Data { std::shared_ptr grammar; std::string start; const char *start_pos = nullptr; std::vector> duplicates; + std::map instructions; Data() : grammar(std::make_shared()) {} }; @@ -2731,9 +2868,9 @@ private: g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]); g["Definition"] <= cho(seq(g["Ignore"], g["IdentCont"], g["Parameters"], g["LEFTARROW"], - g["Expression"]), - seq(g["Ignore"], g["Identifier"], g["LEFTARROW"], g["Expression"])); - + g["Expression"], opt(g["Instruction"])), + seq(g["Ignore"], g["Identifier"], g["LEFTARROW"], g["Expression"], + opt(g["Instruction"]))); g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"]))); g["Sequence"] <= zom(g["Prefix"]); g["Prefix"] <= seq(opt(cho(g["AND"], g["NOT"])), g["Suffix"]); @@ -2826,6 +2963,27 @@ private: zom(seq(g["COMMA"], g["Expression"])), g["CLOSE"]); ~g["COMMA"] <= seq(chr(','), g["Spacing"]); + // Instruction grammars + g["Instruction"] <= + seq(g["BeginBlacket"], cho(g["PrecedenceClimbing"]), g["EndBlacket"]); + + ~g["SpacesZom"] <= zom(g["Space"]); + ~g["SpacesOom"] <= oom(g["Space"]); + ~g["BeginBlacket"] <= seq(chr('{'), g["Spacing"]); + ~g["EndBlacket"] <= seq(chr('}'), g["Spacing"]); + + // PrecedenceClimbing instruction + g["PrecedenceClimbing"] <= + seq(lit("precedence"), g["SpacesZom"], g["PrecedenceInfo"], + zom(seq(g["SpacesOom"], g["PrecedenceInfo"])), g["SpacesZom"]); + g["PrecedenceInfo"] <= + seq(g["PrecedenceAssoc"], + oom(seq(ign(g["SpacesOom"]), g["PrecedenceOpe"]))); + g["PrecedenceOpe"] <= + tok(oom( + seq(npd(cho(g["PrecedenceAssoc"], g["Space"], chr('}'))), dot()))); + g["PrecedenceAssoc"] <= cls("LR"); + // Set definition names for (auto &x : g) { x.second.name = x.first; @@ -2834,6 +2992,8 @@ private: void setup_actions() { g["Definition"] = [&](const SemanticValues &sv, any &dt) { + Data &data = *any_cast(dt); + auto is_macro = sv.choice() == 0; auto ignore = any_cast(sv[0]); auto name = any_cast(sv[1]); @@ -2843,12 +3003,16 @@ private: if (is_macro) { params = any_cast>(sv[2]); ope = any_cast>(sv[4]); + if (sv.size() == 6) { + data.instructions[name] = any_cast(sv[5]); + } } else { ope = any_cast>(sv[3]); + if (sv.size() == 5) { + data.instructions[name] = any_cast(sv[4]); + } } - Data &data = *any_cast(dt); - auto &grammar = *data.grammar; if (!grammar.count(name)) { auto &rule = grammar[name]; @@ -2928,8 +3092,7 @@ private: } }; - g["Primary"] = [&](const SemanticValues &sv, - any &dt) -> std::shared_ptr { + g["Primary"] = [&](const SemanticValues &sv, any &dt) { Data &data = *any_cast(dt); switch (sv.choice()) { @@ -2944,10 +3107,13 @@ private: args = any_cast>>(sv[2]); } + std::shared_ptr ope = + ref(*data.grammar, ident, sv.c_str(), is_macro, args); + if (ignore) { - return ign(ref(*data.grammar, ident, sv.c_str(), is_macro, args)); + return ign(ope); } else { - return ref(*data.grammar, ident, sv.c_str(), is_macro, args); + return ope; } } case 2: { // (Expression) @@ -3036,6 +3202,29 @@ private: g["Arguments"] = [](const SemanticValues &sv) { return sv.transform>(); }; + + g["PrecedenceClimbing"] = [](const SemanticValues &sv) { + PrecedenceClimbing::BinOpeInfo binOpeInfo; + size_t level = 1; + for (auto v : sv) { + auto tokens = any_cast>(v); + auto assoc = tokens[0][0]; + for (size_t i = 1; i < tokens.size(); i++) { + const auto &tok = tokens[i]; + binOpeInfo[tok] = std::make_pair(level, assoc); + } + level++; + } + Instruction instruction; + instruction.type = "precedence"; + instruction.data = binOpeInfo; + return instruction; + }; + g["PrecedenceInfo"] = [](const SemanticValues &sv) { + return sv.transform(); + }; + g["PrecedenceOpe"] = [](const SemanticValues &sv) { return sv.token(); }; + g["PrecedenceAssoc"] = [](const SemanticValues &sv) { return sv.token(); }; } std::shared_ptr perform_core(const char *s, size_t n, @@ -3170,6 +3359,33 @@ private: (*data.grammar)[WORD_DEFINITION_NAME].get_core_operator(); } + // Apply instructions + for (const auto &item : data.instructions) { + const auto &name = item.first; + const auto &instruction = item.second; + + if (instruction.type == "precedence") { + auto &rule = grammar[name]; + + auto &seq = dynamic_cast(*rule.get_core_operator()); + auto &atom = seq.opes_[0]; + auto &seq1 = dynamic_cast( + *dynamic_cast(*seq.opes_[1]).ope_); + auto &binop = seq1.opes_[0]; + auto &atom1 = seq1.opes_[1]; + + if (atom != atom1) { + // TODO: check + } + + const auto &info = + any_cast(instruction.data); + + rule.holder_->ope_ = pre(atom, binop, info, rule.action); + rule.disable_action = true; + } + } + // Set root definition start = data.start; @@ -3241,7 +3457,6 @@ template struct AstBase : public Annotation { template void ast_to_s_core(const std::shared_ptr &ptr, std::string &s, int level, std::function fn) { - const auto &ast = *ptr; for (auto i = 0; i < level; i++) { s += " "; @@ -3266,7 +3481,6 @@ template std::string ast_to_s(const std::shared_ptr &ptr, std::function fn = nullptr) { - std::string s; ast_to_s_core(ptr, s, 0, fn); return s; @@ -3280,7 +3494,6 @@ struct AstOptimizer { template std::shared_ptr optimize(std::shared_ptr original, std::shared_ptr parent = nullptr) { - auto found = std::find(filters_.begin(), filters_.end(), original->name) != filters_.end(); bool opt = optimize_nodes_ ? !found : found; diff --git a/test/test.cc b/test/test.cc index 189ac42..5d4264f 100644 --- a/test/test.cc +++ b/test/test.cc @@ -201,6 +201,66 @@ TEST_CASE("String capture test", "[general]") using namespace peg; +TEST_CASE("Precedence climbing", "[precedence]") +{ + // Create a PEG parser + parser parser(R"( + # Grammar for simple calculator... + START <- _ EXPRESSION + EXPRESSION <- ATOM (OPERATOR ATOM)* { + precedence + L + - + L * / + } + ATOM <- NUMBER / T('(') EXPRESSION T(')') + OPERATOR <- T([-+/*]) + NUMBER <- T('-'? [0-9]+) + ~_ <- [ \t]* + T(S) <- < S > _ + )"); + + // Setup actions + auto reduce = [](const SemanticValues& sv) -> long { + auto result = any_cast(sv[0]); + for (auto i = 1u; i < sv.size(); i += 2) { + auto num = any_cast(sv[i + 1]); + auto ope = any_cast(sv[i]); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + }; + + parser["EXPRESSION"] = reduce; + parser["OPERATOR"] = [](const SemanticValues& sv) { return static_cast(*sv.c_str()); }; + parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; + + bool ret = parser; + REQUIRE(ret == true); + + { + auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "; + long val = 0; + ret = parser.parse(expr, val); + + REQUIRE(ret == true); + REQUIRE(val == -3); + } + + { + auto expr = "-1+-2--3"; // -1 + -2 - -3 = 0 + long val = 0; + ret = parser.parse(expr, val); + + REQUIRE(ret == true); + REQUIRE(val == 0); + } +} + TEST_CASE("String capture test2", "[general]") { std::vector tags;