From 375794e344b8e22e6574a1812b28016047fa28ad Mon Sep 17 00:00:00 2001
From: yhirose <yuji.hirose.bug@gmail.com>
Date: Fri, 7 Feb 2020 15:50:06 -0500
Subject: [PATCH] Support expression parsing in macro

---
 README.md     |  74 ++++++++++++++++++-------------
 peglib.h      | 102 ++++++++++++++++++++++++++++++-------------
 test/test2.cc | 118 ++++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 232 insertions(+), 62 deletions(-)
diff --git a/README.md b/README.md
index cabc1cd..9f9994a 100644
--- a/README.md
+++ b/README.md
@@ -347,40 +347,54 @@ Parsing expressions by precedence climbing altorithm
 *cpp-peglib* supports [operator-precedence parsering](https://en.wikipedia.org/wiki/Operator-precedence_parser) by [**precedence climbing algorithm**](https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing)
 
 ```cpp
-  parser parser(R"(
-      EXPRESSION  <- ATOM (OPERATOR ATOM)* {
-                       precedence
-                         L - +
-                         L / *
-                     }
-      ATOM        <- NUMBER / '(' EXPRESSION ')'
-      OPERATOR    <- < [-+/*] >
-      NUMBER      <- < '-'? [0-9]+ >
-      %whitespace <- [ \t\r\n]*
-  )");
+parser parser(R"(
+    EXPRESSION               <-  PRECEDENCE_PARSING(ATOM, OPERATOR)
+    PRECEDENCE_PARSING(A, O) <-  A (O A)* {
+                                   precedence
+                                     L + - 
+                                     L * /
+                                 }
+    ATOM                     <-  NUMBER / '(' EXPRESSION ')'
+    OPERATOR                 <-  < [-+/*] >
+    NUMBER                   <-  < '-'? [0-9]+ >
+    %whitespace              <-  [ \t]*
+)");
 
-  parser["EXPRESSION"] = [](const SemanticValues& sv) -> long {
-      auto result = any_cast<long>(sv[0]);
-      if (sv.size() > 1) {
-          auto ope = any_cast<char>(sv[1]);
-          auto num = any_cast<long>(sv[2]);
-          switch (ope) {
-              case '+': result += num; break;
-              case '-': result -= num; break;
-              case '*': result *= num; break;
-              case '/': result /= num; break;
-          }
-      }
-      return result;
-  };
-  parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
-  parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); };
+parser["PRECEDENCE_PARSING"] = [](const SemanticValues& sv) -> long {
+    auto result = any_cast<long>(sv[0]);
+    if (sv.size() > 1) {
+        auto ope = any_cast<char>(sv[1]);
+        auto num = any_cast<long>(sv[2]);
+        switch (ope) {
+            case '+': result += num; break;
+            case '-': result -= num; break;
+            case '*': result *= num; break;
+            case '/': result /= num; break;
+        }
+    }
+    return result;
+};
+parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
+parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); };
 
-  long val;
-  parser.parse(" -1 + (1 + 2) * 3 - -1", val);
-  assert(val == 9);
+long val;
+parser.parse(" -1 + (1 + 2) * 3 - -1", val);
+assert(val == 9);
 ```
 
+*precedence* instruction can be applied only to the following 'list' style rule.
+
+```
+R <- A (B A)* {
+  precedence
+    L - +
+    L / *
+    R ^
+}
+```
+
+*precedence* instruction contains precedence info entries. Each entry starts with *associativity* which is 'L' (left) or 'R' (right), then operator tokens follow. The first entry has the highest order.
+
 AST generation
 --------------
 
diff --git a/peglib.h b/peglib.h
index d706b83..b433912 100644
--- a/peglib.h
+++ b/peglib.h
@@ -707,8 +707,7 @@ private:
   }
 
   template <typename F, typename R>
-  Fty make_adaptor(F fn,
-                   R (F::*)(const SemanticValues &sv, any &dt) const) {
+  Fty make_adaptor(F fn, R (F::*)(const SemanticValues &sv, any &dt) const) {
     return TypeAdaptor_csv_dt<R>(fn);
   }
 
@@ -1536,8 +1535,8 @@ public:
 
   PrecedenceClimbing(const std::shared_ptr<Ope> &atom,
                      const std::shared_ptr<Ope> &binop, const BinOpeInfo &info,
-                     const Action &action)
-      : atom_(atom), binop_(binop), info_(info), action_(action) {}
+                     const Definition &rule)
+      : atom_(atom), binop_(binop), info_(info), rule_(rule) {}
 
   size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c,
                     any &dt) const override {
@@ -1549,11 +1548,13 @@ public:
   std::shared_ptr<Ope> atom_;
   std::shared_ptr<Ope> binop_;
   BinOpeInfo info_;
-  const Action &action_;
+  const Definition &rule_;
 
 private:
   size_t parse_expression(const char *s, size_t n, SemanticValues &sv,
                           Context &c, any &dt, size_t min_prec) const;
+
+  Definition &get_reference_for_binop(Context &c) const;
 };
 
 /*
@@ -1660,8 +1661,8 @@ inline std::shared_ptr<Ope> bkr(const std::string &name) {
 inline std::shared_ptr<Ope> pre(const std::shared_ptr<Ope> &atom,
                                 const std::shared_ptr<Ope> &binop,
                                 const PrecedenceClimbing::BinOpeInfo &info,
-                                const Action &action) {
-  return std::make_shared<PrecedenceClimbing>(atom, binop, info, action);
+                                const Definition &rule) {
+  return std::make_shared<PrecedenceClimbing>(atom, binop, info, rule);
 }
 
 /*
@@ -2281,6 +2282,8 @@ public:
   }
 
   std::string name;
+  const char *s = nullptr;
+  ;
   size_t id = 0;
   Action action;
   std::function<void(const char *s, size_t n, any &dt)> enter;
@@ -2584,6 +2587,18 @@ inline size_t BackReference::parse_core(const char *s, size_t n,
   throw std::runtime_error("Invalid back reference...");
 }
 
+inline Definition& PrecedenceClimbing::get_reference_for_binop(Context &c) const {
+  if (rule_.is_macro) {
+    // Reference parameter in macro
+    const auto &args = c.top_args();
+    auto iarg = dynamic_cast<Reference &>(*binop_).iarg_;
+    auto arg = args[iarg];
+    return *dynamic_cast<Reference &>(*arg).rule_;
+  }
+
+  return *dynamic_cast<Reference &>(*binop_).rule_;
+}
+
 inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
                                                    SemanticValues &sv,
                                                    Context &c, any &dt,
@@ -2592,10 +2607,11 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
   if (fail(len)) { return len; }
 
   std::string tok;
-  auto &rule = dynamic_cast<Reference &>(*binop_).rule_;
-  auto action = rule->action;
+  //auto &rule = dynamic_cast<Reference &>(*binop_).rule_;
+  auto &rule = get_reference_for_binop(c);
+  auto action = rule.action;
 
-  rule->action = [&](SemanticValues &sv, any &dt) -> any {
+  rule.action = [&](SemanticValues &sv, any &dt) -> any {
     tok = sv.token();
     if (action) {
       return action(sv, dt);
@@ -2604,7 +2620,7 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
     }
     return any();
   };
-  auto action_se = make_scope_exit([&]() { rule->action = action; });
+  auto action_se = make_scope_exit([&]() { rule.action = action; });
 
   auto save_error_pos = c.error_pos;
 
@@ -2651,10 +2667,10 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
     i += chl;
 
     any val;
-    if (action_) {
+    if (rule_.action) {
       sv.s_ = s;
       sv.n_ = i;
-      val = action_(sv, dt);
+      val = rule_.action(sv, dt);
     } else if (!sv.empty()) {
       val = sv[0];
     }
@@ -3018,6 +3034,7 @@ private:
         auto &rule = grammar[name];
         rule <= ope;
         rule.name = name;
+        rule.s = sv.c_str();
         rule.ignoreSemanticValue = ignore;
         rule.is_macro = is_macro;
         rule.params = params;
@@ -3227,6 +3244,45 @@ private:
     g["PrecedenceAssoc"] = [](const SemanticValues &sv) { return sv.token(); };
   }
 
+  bool apply_precedence_instruction(Definition &rule,
+                                    const PrecedenceClimbing::BinOpeInfo &info,
+                                    const char *s, Log log) {
+    try {
+      auto &seq = dynamic_cast<Sequence &>(*rule.get_core_operator());
+      auto atom = seq.opes_[0];
+      auto &seq1 = dynamic_cast<Sequence &>(
+          *dynamic_cast<ZeroOrMore &>(*seq.opes_[1]).ope_);
+      auto binop = seq1.opes_[0];
+      auto atom1 = seq1.opes_[1];
+
+      auto atom_name = dynamic_cast<Reference &>(*atom).name_;
+      auto binop_name = dynamic_cast<Reference &>(*binop).name_;
+      auto atom1_name = dynamic_cast<Reference &>(*atom1).name_;
+
+      if (atom_name != atom1_name || atom_name == binop_name) {
+        if (log) {
+          auto line = line_info(s, rule.s);
+          log(line.first, line.second,
+              "'precedence' instruction cannt be applied to '" + rule.name +
+                  "'.");
+        }
+        return false;
+      }
+
+      rule.holder_->ope_ = pre(atom, binop, info, rule);
+      rule.disable_action = true;
+    } catch (...) {
+      if (log) {
+        auto line = line_info(s, rule.s);
+        log(line.first, line.second,
+            "'precedence' instruction cannt be applied to '" + rule.name +
+                "'.");
+      }
+      return false;
+    }
+    return true;
+  }
+
   std::shared_ptr<Grammar> perform_core(const char *s, size_t n,
                                         const Rules &rules, std::string &start,
                                         Log log) {
@@ -3318,7 +3374,6 @@ private:
           log(line.first, line.second, "'" + name + "' is left recursive.");
         }
         ret = false;
-        ;
       }
     }
 
@@ -3363,26 +3418,15 @@ private:
     for (const auto &item : data.instructions) {
       const auto &name = item.first;
       const auto &instruction = item.second;
+      auto &rule = grammar[name];
 
       if (instruction.type == "precedence") {
-        auto &rule = grammar[name];
-
-        auto &seq = dynamic_cast<Sequence &>(*rule.get_core_operator());
-        auto &atom = seq.opes_[0];
-        auto &seq1 = dynamic_cast<Sequence &>(
-            *dynamic_cast<ZeroOrMore &>(*seq.opes_[1]).ope_);
-        auto &binop = seq1.opes_[0];
-        auto &atom1 = seq1.opes_[1];
-
-        if (atom != atom1) {
-          // TODO: check
-        }
-
         const auto &info =
             any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data);
 
-        rule.holder_->ope_ = pre(atom, binop, info, rule.action);
-        rule.disable_action = true;
+        if (!apply_precedence_instruction(rule, info, s, log)) {
+          return nullptr;
+        }
       }
     }
 
diff --git a/test/test2.cc b/test/test2.cc
index 0947d9f..bb342ff 100644
--- a/test/test2.cc
+++ b/test/test2.cc
@@ -100,9 +100,7 @@ TEST_CASE("Not infinite 3", "[infinite loop]")
 
 TEST_CASE("Precedence climbing", "[precedence]")
 {
-    // Create a PEG parser
     parser parser(R"(
-        # Grammar for simple calculator...
         START            <-  _ EXPRESSION
         EXPRESSION       <-  ATOM (OPERATOR ATOM)* {
                                precedence
@@ -156,8 +154,122 @@ TEST_CASE("Precedence climbing", "[precedence]")
     }
 }
 
-TEST_CASE("Packrat parser test with %whitespace%", "[packrat]")
+TEST_CASE("Precedence climbing with macro", "[precedence]")
 {
+    // Create a PEG parser
+    parser parser(R"(
+        EXPRESSION               <-  PRECEDENCE_PARSING(ATOM, OPERATOR)
+        PRECEDENCE_PARSING(A, O) <-  A (O A)* {
+                                       precedence
+                                         L + - 
+                                         L * /
+                                     }
+        ATOM                     <-  NUMBER / '(' EXPRESSION ')'
+        OPERATOR                 <-  < [-+/*] >
+        NUMBER                   <-  < '-'? [0-9]+ >
+        %whitespace              <-  [ \t]*
+	)");
+
+    bool ret = parser;
+    REQUIRE(ret == true);
+
+    // Setup actions
+    parser["PRECEDENCE_PARSING"] = [](const SemanticValues& sv) -> long {
+        auto result = any_cast<long>(sv[0]);
+        if (sv.size() > 1) {
+            auto ope = any_cast<char>(sv[1]);
+            auto num = any_cast<long>(sv[2]);
+            switch (ope) {
+                case '+': result += num; break;
+                case '-': result -= num; break;
+                case '*': result *= num; break;
+                case '/': result /= num; break;
+            }
+        }
+        return result;
+    };
+    parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
+    parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); };
+
+    {
+        auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 ";
+        long val = 0;
+        ret = parser.parse(expr, val);
+
+        REQUIRE(ret == true);
+        REQUIRE(val == -3);
+    }
+
+    {
+      auto expr = "-1+-2--3"; // -1 + -2 - -3 = 0
+      long val = 0;
+      ret = parser.parse(expr, val);
+
+      REQUIRE(ret == true);
+      REQUIRE(val == 0);
+    }
+}
+
+TEST_CASE("Precedence climbing error1", "[precedence]")
+{
+    parser parser(R"(
+        START            <-  _ EXPRESSION
+        EXPRESSION       <-  ATOM (OPERATOR ATOM1)* {
+                               precedence
+                                 L + -
+                                 L * /
+                             }
+        ATOM             <-  NUMBER / T('(') EXPRESSION T(')')
+        ATOM1            <-  NUMBER / T('(') EXPRESSION T(')')
+        OPERATOR         <-  T([-+/*])
+        NUMBER           <-  T('-'? [0-9]+)
+		~_               <-  [ \t]*
+		T(S)             <-  < S > _
+	)");
+
+    bool ret = parser;
+    REQUIRE(ret == false);
+}
+
+TEST_CASE("Precedence climbing error2", "[precedence]")
+{
+    parser parser(R"(
+        START            <-  _ EXPRESSION
+        EXPRESSION       <-  ATOM OPERATOR ATOM {
+                               precedence
+                                 L + -
+                                 L * /
+                             }
+        ATOM             <-  NUMBER / T('(') EXPRESSION T(')')
+        OPERATOR         <-  T([-+/*])
+        NUMBER           <-  T('-'? [0-9]+)
+		~_               <-  [ \t]*
+		T(S)             <-  < S > _
+	)");
+
+    bool ret = parser;
+    REQUIRE(ret == false);
+}
+
+TEST_CASE("Precedence climbing error3", "[precedence]") {
+    parser parser(R"(
+        EXPRESSION               <-  PRECEDENCE_PARSING(ATOM, OPERATOR)
+        PRECEDENCE_PARSING(A, O) <-  A (O A)+ {
+                                       precedence
+                                         L + - 
+                                         L * /
+                                     }
+        ATOM                     <-  NUMBER / '(' EXPRESSION ')'
+        OPERATOR                 <-  < [-+/*] >
+        NUMBER                   <-  < '-'? [0-9]+ >
+        %whitespace              <-  [ \t]*
+	)");
+
+    bool ret = parser;
+    REQUIRE(ret == false);
+}
+
+TEST_CASE("Packrat parser test with %whitespace%", "[packrat]") {
     peg::parser parser(R"(
         ROOT         <-  'a'
         %whitespace  <-  SPACE*