Support expression parsing in macro

2024-12-22 20:05:31 +00:00 · 2020-02-07 15:50:06 -05:00 · 2020-02-07 15:50:06 -05:00 · 375794e344
commit 375794e344
parent 4b25458012
3 changed files with 232 additions and 62 deletions
--- a/README.md
+++ b/README.md
@ -347,40 +347,54 @@ Parsing expressions by precedence climbing altorithm
 *cpp-peglib* supports [operator-precedence parsering](https://en.wikipedia.org/wiki/Operator-precedence_parser) by [**precedence climbing algorithm**](https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing)
 ```cpp
-  parser parser(R"(
+parser parser(R"(
-      EXPRESSION  <- ATOM (OPERATOR ATOM)* {
+    EXPRESSION               <-  PRECEDENCE_PARSING(ATOM, OPERATOR)
-                       precedence
+    PRECEDENCE_PARSING(A, O) <-  A (O A)* {
-                         L - +
+                                   precedence
-                         L / *
+                                     L + - 
-                     }
+                                     L * /
-      ATOM        <- NUMBER / '(' EXPRESSION ')'
+                                 }
-      OPERATOR    <- < [-+/*] >
+    ATOM                     <-  NUMBER / '(' EXPRESSION ')'
-      NUMBER      <- < '-'? [0-9]+ >
+    OPERATOR                 <-  < [-+/*] >
-      %whitespace <- [ \t\r\n]*
+    NUMBER                   <-  < '-'? [0-9]+ >
-  )");
+    %whitespace              <-  [ \t]*
 )");
-  parser["EXPRESSION"] = [](const SemanticValues& sv) -> long {
+parser["PRECEDENCE_PARSING"] = [](const SemanticValues& sv) -> long {
-      auto result = any_cast<long>(sv[0]);
+    auto result = any_cast<long>(sv[0]);
-      if (sv.size() > 1) {
+    if (sv.size() > 1) {
-          auto ope = any_cast<char>(sv[1]);
+        auto ope = any_cast<char>(sv[1]);
-          auto num = any_cast<long>(sv[2]);
+        auto num = any_cast<long>(sv[2]);
-          switch (ope) {
+        switch (ope) {
-              case '+': result += num; break;
+            case '+': result += num; break;
-              case '-': result -= num; break;
+            case '-': result -= num; break;
-              case '*': result *= num; break;
+            case '*': result *= num; break;
-              case '/': result /= num; break;
+            case '/': result /= num; break;
-          }
+        }
-      }
+    }
-      return result;
+    return result;
-  };
+};
-  parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
+parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
-  parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); };
+parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); };
-  long val;
+long val;
-  parser.parse(" -1 + (1 + 2) * 3 - -1", val);
+parser.parse(" -1 + (1 + 2) * 3 - -1", val);
-  assert(val == 9);
+assert(val == 9);
 ```
 *precedence* instruction can be applied only to the following 'list' style rule.
 ```
 R <- A (B A)* {
  precedence
    L - +
    L / *
    R ^
 }
 ```
 *precedence* instruction contains precedence info entries. Each entry starts with *associativity* which is 'L' (left) or 'R' (right), then operator tokens follow. The first entry has the highest order.
 AST generation
 --------------
--- a/peglib.h
+++ b/peglib.h
@ -707,8 +707,7 @@ private:
  }
  template <typename F, typename R>
-  Fty make_adaptor(F fn,
+  Fty make_adaptor(F fn, R (F::*)(const SemanticValues &sv, any &dt) const) {
                   R (F::*)(const SemanticValues &sv, any &dt) const) {
    return TypeAdaptor_csv_dt<R>(fn);
  }
@ -1536,8 +1535,8 @@ public:
  PrecedenceClimbing(const std::shared_ptr<Ope> &atom,
                     const std::shared_ptr<Ope> &binop, const BinOpeInfo &info,
-                     const Action &action)
+                     const Definition &rule)
-      : atom_(atom), binop_(binop), info_(info), action_(action) {}
+      : atom_(atom), binop_(binop), info_(info), rule_(rule) {}
  size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c,
                    any &dt) const override {
@ -1549,11 +1548,13 @@ public:
  std::shared_ptr<Ope> atom_;
  std::shared_ptr<Ope> binop_;
  BinOpeInfo info_;
-  const Action &action_;
+  const Definition &rule_;
 private:
  size_t parse_expression(const char *s, size_t n, SemanticValues &sv,
                          Context &c, any &dt, size_t min_prec) const;
  Definition &get_reference_for_binop(Context &c) const;
 };
 /*
@ -1660,8 +1661,8 @@ inline std::shared_ptr<Ope> bkr(const std::string &name) {
 inline std::shared_ptr<Ope> pre(const std::shared_ptr<Ope> &atom,
                                const std::shared_ptr<Ope> &binop,
                                const PrecedenceClimbing::BinOpeInfo &info,
-                                const Action &action) {
+                                const Definition &rule) {
-  return std::make_shared<PrecedenceClimbing>(atom, binop, info, action);
+  return std::make_shared<PrecedenceClimbing>(atom, binop, info, rule);
 }
 /*
@ -2281,6 +2282,8 @@ public:
  }
  std::string name;
  const char *s = nullptr;
  ;
  size_t id = 0;
  Action action;
  std::function<void(const char *s, size_t n, any &dt)> enter;
@ -2584,6 +2587,18 @@ inline size_t BackReference::parse_core(const char *s, size_t n,
  throw std::runtime_error("Invalid back reference...");
 }
 inline Definition& PrecedenceClimbing::get_reference_for_binop(Context &c) const {
  if (rule_.is_macro) {
    // Reference parameter in macro
    const auto &args = c.top_args();
    auto iarg = dynamic_cast<Reference &>(*binop_).iarg_;
    auto arg = args[iarg];
    return *dynamic_cast<Reference &>(*arg).rule_;
  }
  return *dynamic_cast<Reference &>(*binop_).rule_;
 }
 inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
                                                   SemanticValues &sv,
                                                   Context &c, any &dt,
@ -2592,10 +2607,11 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
  if (fail(len)) { return len; }
  std::string tok;
-  auto &rule = dynamic_cast<Reference &>(*binop_).rule_;
+  //auto &rule = dynamic_cast<Reference &>(*binop_).rule_;
-  auto action = rule->action;
+  auto &rule = get_reference_for_binop(c);
  auto action = rule.action;
-  rule->action = [&](SemanticValues &sv, any &dt) -> any {
+  rule.action = [&](SemanticValues &sv, any &dt) -> any {
    tok = sv.token();
    if (action) {
      return action(sv, dt);
@ -2604,7 +2620,7 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
    }
    return any();
  };
-  auto action_se = make_scope_exit([&]() { rule->action = action; });
+  auto action_se = make_scope_exit([&]() { rule.action = action; });
  auto save_error_pos = c.error_pos;
@ -2651,10 +2667,10 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
    i += chl;
    any val;
-    if (action_) {
+    if (rule_.action) {
      sv.s_ = s;
      sv.n_ = i;
-      val = action_(sv, dt);
+      val = rule_.action(sv, dt);
    } else if (!sv.empty()) {
      val = sv[0];
    }
@ -3018,6 +3034,7 @@ private:
        auto &rule = grammar[name];
        rule <= ope;
        rule.name = name;
        rule.s = sv.c_str();
        rule.ignoreSemanticValue = ignore;
        rule.is_macro = is_macro;
        rule.params = params;
@ -3227,6 +3244,45 @@ private:
    g["PrecedenceAssoc"] = [](const SemanticValues &sv) { return sv.token(); };
  }
  bool apply_precedence_instruction(Definition &rule,
                                    const PrecedenceClimbing::BinOpeInfo &info,
                                    const char *s, Log log) {
    try {
      auto &seq = dynamic_cast<Sequence &>(*rule.get_core_operator());
      auto atom = seq.opes_[0];
      auto &seq1 = dynamic_cast<Sequence &>(
          *dynamic_cast<ZeroOrMore &>(*seq.opes_[1]).ope_);
      auto binop = seq1.opes_[0];
      auto atom1 = seq1.opes_[1];
      auto atom_name = dynamic_cast<Reference &>(*atom).name_;
      auto binop_name = dynamic_cast<Reference &>(*binop).name_;
      auto atom1_name = dynamic_cast<Reference &>(*atom1).name_;
      if (atom_name != atom1_name || atom_name == binop_name) {
        if (log) {
          auto line = line_info(s, rule.s);
          log(line.first, line.second,
              "'precedence' instruction cannt be applied to '" + rule.name +
                  "'.");
        }
        return false;
      }
      rule.holder_->ope_ = pre(atom, binop, info, rule);
      rule.disable_action = true;
    } catch (...) {
      if (log) {
        auto line = line_info(s, rule.s);
        log(line.first, line.second,
            "'precedence' instruction cannt be applied to '" + rule.name +
                "'.");
      }
      return false;
    }
    return true;
  }
  std::shared_ptr<Grammar> perform_core(const char *s, size_t n,
                                        const Rules &rules, std::string &start,
                                        Log log) {
@ -3318,7 +3374,6 @@ private:
          log(line.first, line.second, "'" + name + "' is left recursive.");
        }
        ret = false;
        ;
      }
    }
@ -3363,26 +3418,15 @@ private:
    for (const auto &item : data.instructions) {
      const auto &name = item.first;
      const auto &instruction = item.second;
      auto &rule = grammar[name];
      if (instruction.type == "precedence") {
        auto &rule = grammar[name];
        auto &seq = dynamic_cast<Sequence &>(*rule.get_core_operator());
        auto &atom = seq.opes_[0];
        auto &seq1 = dynamic_cast<Sequence &>(
            *dynamic_cast<ZeroOrMore &>(*seq.opes_[1]).ope_);
        auto &binop = seq1.opes_[0];
        auto &atom1 = seq1.opes_[1];
        if (atom != atom1) {
          // TODO: check
        }
        const auto &info =
            any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data);
-        rule.holder_->ope_ = pre(atom, binop, info, rule.action);
+        if (!apply_precedence_instruction(rule, info, s, log)) {
-        rule.disable_action = true;
+          return nullptr;
        }
      }
    }
--- a/test/test2.cc
+++ b/test/test2.cc
@ -100,9 +100,7 @@ TEST_CASE("Not infinite 3", "[infinite loop]")
 TEST_CASE("Precedence climbing", "[precedence]")
 {
    // Create a PEG parser
    parser parser(R"(
        # Grammar for simple calculator...
        START            <-  _ EXPRESSION
        EXPRESSION       <-  ATOM (OPERATOR ATOM)* {
                               precedence
@ -156,8 +154,122 @@ TEST_CASE("Precedence climbing", "[precedence]")
    }
 }
-TEST_CASE("Packrat parser test with %whitespace%", "[packrat]")
+TEST_CASE("Precedence climbing with macro", "[precedence]")
 {
    // Create a PEG parser
    parser parser(R"(
        EXPRESSION               <-  PRECEDENCE_PARSING(ATOM, OPERATOR)
        PRECEDENCE_PARSING(A, O) <-  A (O A)* {
                                       precedence
                                         L + - 
                                         L * /
                                     }
        ATOM                     <-  NUMBER / '(' EXPRESSION ')'
        OPERATOR                 <-  < [-+/*] >
        NUMBER                   <-  < '-'? [0-9]+ >
        %whitespace              <-  [ \t]*
 	)");
    bool ret = parser;
    REQUIRE(ret == true);
    // Setup actions
    parser["PRECEDENCE_PARSING"] = [](const SemanticValues& sv) -> long {
        auto result = any_cast<long>(sv[0]);
        if (sv.size() > 1) {
            auto ope = any_cast<char>(sv[1]);
            auto num = any_cast<long>(sv[2]);
            switch (ope) {
                case '+': result += num; break;
                case '-': result -= num; break;
                case '*': result *= num; break;
                case '/': result /= num; break;
            }
        }
        return result;
    };
    parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
    parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); };
    {
        auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 ";
        long val = 0;
        ret = parser.parse(expr, val);
        REQUIRE(ret == true);
        REQUIRE(val == -3);
    }
    {
      auto expr = "-1+-2--3"; // -1 + -2 - -3 = 0
      long val = 0;
      ret = parser.parse(expr, val);
      REQUIRE(ret == true);
      REQUIRE(val == 0);
    }
 }
 TEST_CASE("Precedence climbing error1", "[precedence]")
 {
    parser parser(R"(
        START            <-  _ EXPRESSION
        EXPRESSION       <-  ATOM (OPERATOR ATOM1)* {
                               precedence
                                 L + -
                                 L * /
                             }
        ATOM             <-  NUMBER / T('(') EXPRESSION T(')')
        ATOM1            <-  NUMBER / T('(') EXPRESSION T(')')
        OPERATOR         <-  T([-+/*])
        NUMBER           <-  T('-'? [0-9]+)
 		~_               <-  [ \t]*
 		T(S)             <-  < S > _
 	)");
    bool ret = parser;
    REQUIRE(ret == false);
 }
 TEST_CASE("Precedence climbing error2", "[precedence]")
 {
    parser parser(R"(
        START            <-  _ EXPRESSION
        EXPRESSION       <-  ATOM OPERATOR ATOM {
                               precedence
                                 L + -
                                 L * /
                             }
        ATOM             <-  NUMBER / T('(') EXPRESSION T(')')
        OPERATOR         <-  T([-+/*])
        NUMBER           <-  T('-'? [0-9]+)
 		~_               <-  [ \t]*
 		T(S)             <-  < S > _
 	)");
    bool ret = parser;
    REQUIRE(ret == false);
 }
 TEST_CASE("Precedence climbing error3", "[precedence]") {
    parser parser(R"(
        EXPRESSION               <-  PRECEDENCE_PARSING(ATOM, OPERATOR)
        PRECEDENCE_PARSING(A, O) <-  A (O A)+ {
                                       precedence
                                         L + - 
                                         L * /
                                     }
        ATOM                     <-  NUMBER / '(' EXPRESSION ')'
        OPERATOR                 <-  < [-+/*] >
        NUMBER                   <-  < '-'? [0-9]+ >
        %whitespace              <-  [ \t]*
 	)");
    bool ret = parser;
    REQUIRE(ret == false);
 }
 TEST_CASE("Packrat parser test with %whitespace%", "[packrat]") {
    peg::parser parser(R"(
        ROOT         <-  'a'
        %whitespace  <-  SPACE*