From c9090b66615713c014476c552a93c38398ae642f Mon Sep 17 00:00:00 2001
From: yhirose <yhirose@users.noreply.github.com>
Date: Fri, 2 Sep 2022 23:47:55 -0400
Subject: [PATCH] Resolve #234 (#244)

---
 example/indent.cc |   9 +--
 lint/peglint.cc   |  14 ++--
 peglib.h          | 122 ++++++++++++++++---------------
 test/test1.cc     |  12 ++--
 test/test2.cc     | 180 ++++++++++++++++++++++++++++++++++++++++++----
 5 files changed, 249 insertions(+), 88 deletions(-)

diff --git a/example/indent.cc b/example/indent.cc
index 74c46ab..af3b614 100644
--- a/example/indent.cc
+++ b/example/indent.cc
@@ -32,11 +32,12 @@ Block <- Statements {}
 
   size_t indent = 0;
 
-  parser["Block"].enter = [&](const char * /*s*/, size_t /*n*/,
-                              std::any & /*dt*/) { indent += 2; };
+  parser["Block"].enter = [&](const Context & /*c*/, const char * /*s*/,
+                              size_t /*n*/, std::any & /*dt*/) { indent += 2; };
 
-  parser["Block"].leave = [&](const char * /*s*/, size_t /*n*/,
-                              size_t /*matchlen*/, std::any & /*value*/,
+  parser["Block"].leave = [&](const Context & /*c*/, const char * /*s*/,
+                              size_t /*n*/, size_t /*matchlen*/,
+                              std::any & /*value*/,
                               std::any & /*dt*/) { indent -= 2; };
 
   parser["Samedent"].predicate =
diff --git a/lint/peglint.cc b/lint/peglint.cc
index c78b9b2..bea46b5 100644
--- a/lint/peglint.cc
+++ b/lint/peglint.cc
@@ -105,7 +105,8 @@ int main(int argc, const char **argv) {
 
   peg::parser parser;
 
-  parser.log = [&](size_t ln, size_t col, const string &msg) {
+  parser.log = [&](size_t ln, size_t col, const string &msg,
+                   const string & /*rule*/) {
     cerr << syntax_path << ":" << ln << ":" << col << ": " << msg << endl;
   };
 
@@ -123,19 +124,16 @@ int main(int argc, const char **argv) {
     source_path = path_list[1];
   }
 
-  parser.log = [&](size_t ln, size_t col, const string &msg) {
+  parser.log = [&](size_t ln, size_t col, const string &msg,
+                   const string & /*rule*/) {
     cerr << source_path << ":" << ln << ":" << col << ": " << msg << endl;
   };
 
   if (opt_packrat) { parser.enable_packrat_parsing(); }
 
-  if (opt_trace) {
-    enable_tracing(parser, std::cout);
-  }
+  if (opt_trace) { enable_tracing(parser, std::cout); }
 
-  if (opt_profile) {
-    enable_profiling(parser, std::cout);
-  }
+  if (opt_profile) { enable_profiling(parser, std::cout); }
 
   parser.set_verbose_trace(opt_verbose);
 
diff --git a/peglib.h b/peglib.h
index 6052c56..54cbd55 100644
--- a/peglib.h
+++ b/peglib.h
@@ -495,20 +495,7 @@ struct SemanticValues : protected std::vector<std::any> {
   std::vector<unsigned int> tags;
 
   // Line number and column at which the matched string is
-  std::pair<size_t, size_t> line_info() const {
-    if (!c_) { return std::pair(1, 1); }
-
-    auto &idx = source_line_index();
-
-    auto cur = static_cast<size_t>(std::distance(ss, sv_.data()));
-    auto it = std::lower_bound(
-        idx.begin(), idx.end(), cur,
-        [](size_t element, size_t value) { return element < value; });
-
-    auto id = static_cast<size_t>(std::distance(idx.begin(), it));
-    auto off = cur - (id == 0 ? 0 : idx[id - 1] + 1);
-    return std::pair(id + 1, off + 1);
-  }
+  std::pair<size_t, size_t> line_info() const;
 
   // Choice count
   size_t choice_count() const { return choice_count_; }
@@ -590,8 +577,6 @@ private:
   friend class Holder;
   friend class PrecedenceClimbing;
 
-  const std::vector<size_t> &source_line_index() const;
-
   Context *c_ = nullptr;
   std::string_view sv_;
   size_t choice_count_ = 0;
@@ -665,7 +650,8 @@ inline bool fail(size_t len) { return len == static_cast<size_t>(-1); }
 /*
  * Log
  */
-using Log = std::function<void(size_t, size_t, const std::string &)>;
+using Log = std::function<void(size_t line, size_t col, const std::string &msg,
+                               const std::string &rule)>;
 
 /*
  * ErrorInfo
@@ -677,7 +663,8 @@ struct ErrorInfo {
   std::vector<std::pair<const char *, const Definition *>> expected_tokens;
   const char *message_pos = nullptr;
   std::string message;
-  mutable const char *last_output_pos = nullptr;
+  std::string label;
+  mutable const char *last_output_pos = nullptr; // TODO: protect...
   bool keep_previous_token = false;
 
   void clear() {
@@ -755,7 +742,6 @@ public:
   const char *path;
   const char *s;
   const size_t l;
-  std::vector<size_t> source_line_index;
 
   ErrorInfo error_info;
   bool recovered = false;
@@ -933,9 +919,30 @@ public:
                    const SemanticValues &vs, std::any &dt, size_t len);
   bool is_traceable(const Ope &ope) const;
 
-  mutable size_t next_trace_id = 0;
-  mutable std::vector<size_t> trace_ids;
+  // Line info
+  std::pair<size_t, size_t> line_info(const char *cur) const {
+    if (source_line_index.empty()) {
+      for (size_t pos = 0; pos < l; pos++) {
+        if (s[pos] == '\n') { source_line_index.push_back(pos); }
+      }
+      source_line_index.push_back(l);
+    }
+
+    auto pos = static_cast<size_t>(std::distance(s, cur));
+
+    auto it = std::lower_bound(
+        source_line_index.begin(), source_line_index.end(), pos,
+        [](size_t element, size_t value) { return element < value; });
+
+    auto id = static_cast<size_t>(std::distance(source_line_index.begin(), it));
+    auto off = pos - (id == 0 ? 0 : source_line_index[id - 1] + 1);
+    return std::pair(id + 1, off + 1);
+  }
+
+  size_t next_trace_id = 0;
+  std::vector<size_t> trace_ids;
   bool ignore_trace_state = false;
+  mutable std::vector<size_t> source_line_index; // TODO: protect...
 };
 
 /*
@@ -1402,7 +1409,7 @@ public:
 
   std::shared_ptr<Ope> ope_;
   Definition *outer_;
-  mutable std::string trace_name_;
+  mutable std::string trace_name_; // TODO: protect...
 
   friend class Definition;
 };
@@ -2326,9 +2333,10 @@ public:
 
   size_t id = 0;
   Action action;
-  std::function<void(const char *s, size_t n, std::any &dt)> enter;
-  std::function<void(const char *s, size_t n, size_t matchlen, std::any &value,
-                     std::any &dt)>
+  std::function<void(const Context &c, const char *s, size_t n, std::any &dt)>
+      enter;
+  std::function<void(const Context &c, const char *s, size_t n, size_t matchlen,
+                     std::any &value, std::any &dt)>
       leave;
   bool ignoreSemanticValue = false;
   std::shared_ptr<Ope> whitespaceOpe;
@@ -2489,15 +2497,9 @@ inline size_t parse_literal(const char *s, size_t n, SemanticValues &vs,
   return i;
 }
 
-inline const std::vector<size_t> &SemanticValues::source_line_index() const {
+inline std::pair<size_t, size_t> SemanticValues::line_info() const {
   assert(c_);
-  if (c_->source_line_index.empty()) {
-    for (size_t pos = 0; pos < c_->l; pos++) {
-      if (c_->s[pos] == '\n') { c_->source_line_index.push_back(pos); }
-    }
-    c_->source_line_index.push_back(c_->l);
-  }
-  return c_->source_line_index;
+  return c_->line_info(sv_.data());
 }
 
 inline void ErrorInfo::output_log(const Log &log, const char *s,
@@ -2519,7 +2521,7 @@ inline void ErrorInfo::output_log(const Log &log, const char *s,
       } else {
         msg = message;
       }
-      log(line.first, line.second, msg);
+      log(line.first, line.second, msg, label);
     }
   } else if (error_pos) {
     if (error_pos > last_output_pos) {
@@ -2562,8 +2564,7 @@ inline void ErrorInfo::output_log(const Log &log, const char *s,
         }
         msg += ".";
       }
-
-      log(line.first, line.second, msg);
+      log(line.first, line.second, msg, label);
     }
   }
 }
@@ -2697,11 +2698,11 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
   std::any val;
 
   c.packrat(s, outer_->id, len, val, [&](std::any &a_val) {
-    if (outer_->enter) { outer_->enter(s, n, dt); }
+    if (outer_->enter) { outer_->enter(c, s, n, dt); }
     auto &chvs = c.push_semantic_values_scope();
     auto se = scope_exit([&]() {
       c.pop_semantic_values_scope();
-      if (outer_->leave) { outer_->leave(s, n, len, a_val, dt); }
+      if (outer_->leave) { outer_->leave(c, s, n, len, a_val, dt); }
     });
 
     c.rule_stack.push_back(outer_);
@@ -2723,6 +2724,7 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
         if (c.log && !msg.empty() && c.error_info.message_pos < s) {
           c.error_info.message_pos = s;
           c.error_info.message = msg;
+          c.error_info.label = outer_->name;
         }
         len = static_cast<size_t>(-1);
       }
@@ -2733,6 +2735,7 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
         if (c.log && !msg.empty() && c.error_info.message_pos < s) {
           c.error_info.message_pos = s;
           c.error_info.message = msg;
+          c.error_info.label = outer_->name;
         }
       }
     } else {
@@ -2740,6 +2743,7 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
           c.error_info.message_pos < s) {
         c.error_info.message_pos = s;
         c.error_info.message = outer_->error_message;
+        c.error_info.label = outer_->name;
       }
     }
   });
@@ -2935,6 +2939,7 @@ inline size_t Recovery::parse_core(const char *s, size_t n,
       if (!label->rule_->error_message.empty()) {
         c.error_info.message_pos = s;
         c.error_info.message = label->rule_->error_message;
+        c.error_info.label = label->rule_->name;
       }
     }
   }
@@ -3487,7 +3492,8 @@ private:
       }
     };
 
-    g["Definition"].enter = [](const char * /*s*/, size_t /*n*/, std::any &dt) {
+    g["Definition"].enter = [](const Context & /*c*/, const char * /*s*/,
+                               size_t /*n*/, std::any &dt) {
       auto &data = *std::any_cast<Data *>(dt);
       data.captures_in_current_definition.clear();
     };
@@ -3723,13 +3729,14 @@ private:
       return vs.token_to_number<size_t>();
     };
 
-    g["CapScope"].enter = [](const char * /*s*/, size_t /*n*/, std::any &dt) {
+    g["CapScope"].enter = [](const Context & /*c*/, const char * /*s*/,
+                             size_t /*n*/, std::any &dt) {
       auto &data = *std::any_cast<Data *>(dt);
       data.captures_stack.emplace_back();
     };
-    g["CapScope"].leave = [](const char * /*s*/, size_t /*n*/,
-                             size_t /*matchlen*/, std::any & /*value*/,
-                             std::any &dt) {
+    g["CapScope"].leave = [](const Context & /*c*/, const char * /*s*/,
+                             size_t /*n*/, size_t /*matchlen*/,
+                             std::any & /*value*/, std::any &dt) {
       auto &data = *std::any_cast<Data *>(dt);
       data.captures_stack.pop_back();
     };
@@ -3849,7 +3856,8 @@ private:
           auto line = line_info(s, rule.s_);
           log(line.first, line.second,
               "'precedence' instruction cannot be applied to '" + rule.name +
-                  "'.");
+                  "'.",
+              "");
         }
         return false;
       }
@@ -3861,7 +3869,8 @@ private:
         auto line = line_info(s, rule.s_);
         log(line.first, line.second,
             "'precedence' instruction cannot be applied to '" + rule.name +
-                "'.");
+                "'.",
+            "");
       }
       return false;
     }
@@ -3895,10 +3904,11 @@ private:
       if (log) {
         if (r.error_info.message_pos) {
           auto line = line_info(s, r.error_info.message_pos);
-          log(line.first, line.second, r.error_info.message);
+          log(line.first, line.second, r.error_info.message,
+              r.error_info.label);
         } else {
           auto line = line_info(s, r.error_info.error_pos);
-          log(line.first, line.second, "syntax error");
+          log(line.first, line.second, "syntax error", r.error_info.label);
         }
       }
       return nullptr;
@@ -3928,7 +3938,7 @@ private:
         if (log) {
           auto line = line_info(s, ptr);
           log(line.first, line.second,
-              "The definition '" + name + "' is already defined.");
+              "The definition '" + name + "' is already defined.", "");
         }
       }
       ret = false;
@@ -3940,7 +3950,7 @@ private:
         if (log) {
           auto line = line_info(s, ptr);
           log(line.first, line.second,
-              "The instruction '" + type + "' is already defined.");
+              "The instruction '" + type + "' is already defined.", "");
         }
       }
       ret = false;
@@ -3952,7 +3962,7 @@ private:
         if (log) {
           auto line = line_info(s, ptr);
           log(line.first, line.second,
-              "The back reference '" + name + "' is undefined.");
+              "The back reference '" + name + "' is undefined.", "");
         }
       }
       ret = false;
@@ -3967,8 +3977,8 @@ private:
         if (log) {
           auto line = line_info(s, start_rule.s_);
           log(line.first, line.second,
-              "Ignore operator cannot be applied to '" + start_rule.name +
-                  "'.");
+              "Ignore operator cannot be applied to '" + start_rule.name + "'.",
+              "");
         }
         ret = false;
       }
@@ -3991,7 +4001,7 @@ private:
       for (const auto &[name, ptr] : vis.error_s) {
         if (log) {
           auto line = line_info(s, ptr);
-          log(line.first, line.second, vis.error_message[name]);
+          log(line.first, line.second, vis.error_message[name], "");
         }
         ret = false;
       }
@@ -4002,7 +4012,7 @@ private:
         if (log) {
           auto line = line_info(s, rule.s_);
           auto msg = "'" + name + "' is not referenced.";
-          log(line.first, line.second, msg);
+          log(line.first, line.second, msg, "");
         }
       }
     }
@@ -4025,7 +4035,7 @@ private:
       if (vis.error_s) {
         if (log) {
           auto line = line_info(s, vis.error_s);
-          log(line.first, line.second, "'" + name + "' is left recursive.");
+          log(line.first, line.second, "'" + name + "' is left recursive.", "");
         }
         ret = false;
       }
@@ -4095,7 +4105,7 @@ private:
       if (log) {
         auto line = line_info(s, vis.error_s);
         log(line.first, line.second,
-            "infinite loop is detected in '" + vis.error_name + "'.");
+            "infinite loop is detected in '" + vis.error_name + "'.", "");
       }
       return true;
     }
diff --git a/test/test1.cc b/test/test1.cc
index 1e3d4bd..69e8e01 100644
--- a/test/test1.cc
+++ b/test/test1.cc
@@ -214,12 +214,13 @@ TEST(GeneralTest, enter_leave_handlers_test) {
         TOKEN  <- [A-Za-z]+
     )");
 
-  parser["LTOKEN"].enter = [&](const char *, size_t, std::any &dt) {
+  parser["LTOKEN"].enter = [&](const Context & /*c*/, const char *, size_t,
+                               std::any &dt) {
     auto &require_upper_case = *std::any_cast<bool *>(dt);
     require_upper_case = false;
   };
-  parser["LTOKEN"].leave = [&](const char *, size_t, size_t, std::any &,
-                               std::any &dt) {
+  parser["LTOKEN"].leave = [&](const Context & /*c*/, const char *, size_t,
+                               size_t, std::any &, std::any &dt) {
     auto &require_upper_case = *std::any_cast<bool *>(dt);
     require_upper_case = true;
   };
@@ -246,7 +247,8 @@ TEST(GeneralTest, enter_leave_handlers_test) {
   EXPECT_TRUE(parser.parse("hello=WORLD", dt));
   EXPECT_TRUE(parser.parse("HELLO=WORLD", dt));
 
-  parser.log = [&](size_t ln, size_t col, const std::string &msg) {
+  parser.log = [&](size_t ln, size_t col, const std::string &msg,
+                   const std::string & /*rule*/) {
     EXPECT_EQ(1, ln);
     EXPECT_EQ(7, col);
     EXPECT_EQ(message, msg);
@@ -1054,7 +1056,7 @@ TEST(GeneralTest, HeuristicErrorTokenTest) {
     untyped_enum <- '' { message "invalid/missing enum type, expected one of 'sequence' or 'bitmask', got '%t'"}
 	)");
 
-  parser.log = [&](size_t ln, size_t col, const std::string &msg) {
+  parser.log = [&](size_t ln, size_t col, const std::string &msg, const std::string & /*rule*/) {
     EXPECT_EQ(1, ln);
     EXPECT_EQ(6, col);
     EXPECT_EQ("invalid/missing enum type, expected one of 'sequence' or "
diff --git a/test/test2.cc b/test/test2.cc
index 33a9cc5..6c2c5bc 100644
--- a/test/test2.cc
+++ b/test/test2.cc
@@ -853,7 +853,8 @@ TEST(PredicateTest, Semantic_predicate_test) {
   EXPECT_TRUE(parser.parse("100", val));
   EXPECT_EQ(100, val);
 
-  parser.log = [](size_t line, size_t col, const std::string &msg) {
+  parser.log = [](size_t line, size_t col, const std::string &msg,
+                  const std::string & /*rule*/) {
     EXPECT_EQ(1, line);
     EXPECT_EQ(1, col);
     EXPECT_EQ("value error!!", msg);
@@ -878,7 +879,8 @@ is_symbol    <- Name { check_symbol var_table }
 ref aaa
 ref bbb
 )";
-    parser.log = [](size_t line, size_t col, const std::string &msg) {
+    parser.log = [](size_t line, size_t col, const std::string &msg,
+                    const std::string & /*rule*/) {
       EXPECT_EQ(3, line);
       EXPECT_EQ(5, col);
       EXPECT_EQ("'bbb' doesn't exist.", msg);
@@ -891,7 +893,8 @@ ref bbb
 ref aaa
 decl aaa
 )";
-    parser.log = [](size_t line, size_t col, const std::string &msg) {
+    parser.log = [](size_t line, size_t col, const std::string &msg,
+                    const std::string & /*rule*/) {
       EXPECT_EQ(3, line);
       EXPECT_EQ(6, col);
       EXPECT_EQ("'aaa' already exists.", msg);
@@ -963,7 +966,8 @@ typedef __off64_t __loff_t;
 typedef long __off64_t;
 typedef __off64_T __loff_t;
 )";
-    parser.log = [](size_t line, size_t col, const std::string &msg) {
+    parser.log = [](size_t line, size_t col, const std::string &msg,
+                    const std::string & /*rule*/) {
       EXPECT_EQ(3, line);
       EXPECT_EQ(9, col);
       EXPECT_EQ("'__off64_T' doesn't exist.", msg);
@@ -977,7 +981,8 @@ typedef long __off64_t;
 typedef __off64_t __loff_t;
 typedef __off64_t __loff_t;
 )";
-    parser.log = [](size_t line, size_t col, const std::string &msg) {
+    parser.log = [](size_t line, size_t col, const std::string &msg,
+                    const std::string & /*rule*/) {
       EXPECT_EQ(4, line);
       EXPECT_EQ(19, col);
       EXPECT_EQ("'__loff_t' already exists.", msg);
@@ -1029,7 +1034,8 @@ is_symbol    <- < Name >
 ref aaa
 ref bbb
 )";
-    parser.log = [](size_t line, size_t col, const std::string &msg) {
+    parser.log = [](size_t line, size_t col, const std::string &msg,
+                    const std::string & /*rule*/) {
       EXPECT_EQ(3, line);
       EXPECT_EQ(5, col);
       EXPECT_EQ("'bbb' doesn't exist.", msg);
@@ -1044,7 +1050,8 @@ ref bbb
 ref aaa
 decl aaa
 )";
-    parser.log = [](size_t line, size_t col, const std::string &msg) {
+    parser.log = [](size_t line, size_t col, const std::string &msg,
+                    const std::string & /*rule*/) {
       std::cerr << line << ":" << col << ": " << msg << "\n";
       EXPECT_EQ(3, line);
       EXPECT_EQ(6, col);
@@ -1445,7 +1452,8 @@ TEST(ErrorTest, Default_error_handling_1) {
   };
 
   size_t i = 0;
-  pg.log = [&](size_t ln, size_t col, const std::string &msg) {
+  pg.log = [&](size_t ln, size_t col, const std::string &msg,
+               const std::string & /*rule*/) {
     std::stringstream ss;
     ss << ln << ":" << col << ": " << msg;
     EXPECT_EQ(errors[i++], ss.str());
@@ -1471,7 +1479,8 @@ TEST(ErrorTest, Default_error_handling_2) {
   };
 
   size_t i = 0;
-  pg.log = [&](size_t ln, size_t col, const std::string &msg) {
+  pg.log = [&](size_t ln, size_t col, const std::string &msg,
+               const std::string & /*rule*/) {
     std::stringstream ss;
     ss << ln << ":" << col << ": " << msg;
     EXPECT_EQ(errors[i++], ss.str());
@@ -1515,7 +1524,8 @@ TEST(ErrorTest, Default_error_handling_fiblang) {
   };
 
   size_t i = 0;
-  pg.log = [&](size_t ln, size_t col, const std::string &msg) {
+  pg.log = [&](size_t ln, size_t col, const std::string &msg,
+               const std::string & /*rule*/) {
     std::stringstream ss;
     ss << ln << ":" << col << ": " << msg;
     EXPECT_EQ(errors[i++], ss.str());
@@ -1572,7 +1582,8 @@ entry  <- (!(__ / HEADER) .)+ { error_message "invalid entry." }
   };
 
   size_t i = 0;
-  pg.log = [&](size_t ln, size_t col, const std::string &msg) {
+  pg.log = [&](size_t ln, size_t col, const std::string &msg,
+               const std::string & /*rule*/) {
     std::stringstream ss;
     ss << ln << ":" << col << ": " << msg;
     EXPECT_EQ(errors[i++], ss.str());
@@ -1684,7 +1695,8 @@ TEST(ErrorTest, Error_recovery_2) {
   };
 
   size_t i = 0;
-  pg.log = [&](size_t ln, size_t col, const std::string &msg) {
+  pg.log = [&](size_t ln, size_t col, const std::string &msg,
+               const std::string & /*rule*/) {
     std::stringstream ss;
     ss << ln << ":" << col << ": " << msg;
     EXPECT_EQ(errors[i++], ss.str());
@@ -1771,7 +1783,8 @@ skip_puncs       <- [|=]* _
   };
 
   size_t i = 0;
-  pg.log = [&](size_t ln, size_t col, const std::string &msg) {
+  pg.log = [&](size_t ln, size_t col, const std::string &msg,
+               const std::string & /*rule*/) {
     std::stringstream ss;
     ss << ln << ":" << col << ": " << msg;
     EXPECT_EQ(errors[i++], ss.str());
@@ -1987,7 +2000,8 @@ SkipToRCUR ← (!RCUR (LCUR SkipToRCUR / .))* RCUR
   };
 
   size_t i = 0;
-  pg.log = [&](size_t ln, size_t col, const std::string &msg) {
+  pg.log = [&](size_t ln, size_t col, const std::string &msg,
+               const std::string & /*rule*/) {
     std::stringstream ss;
     ss << ln << ":" << col << ": " << msg;
     EXPECT_EQ(errors[i++], ss.str());
@@ -2075,7 +2089,8 @@ STR         <- < [a-z0-9]+ >
   };
 
   size_t i = 0;
-  pg.log = [&](size_t ln, size_t col, const std::string &msg) {
+  pg.log = [&](size_t ln, size_t col, const std::string &msg,
+               const std::string & /*rule*/) {
     std::stringstream ss;
     ss << ln << ":" << col << ": " << msg;
     EXPECT_EQ(errors[i++], ss.str());
@@ -2087,3 +2102,138 @@ STR         <- < [a-z0-9]+ >
   )"));
   EXPECT_EQ(i, errors.size());
 }
+
+TEST(StateTest, Indent) {
+  parser pg(R"(Start <- Statements {}
+Statements <- Statement*
+Statement <- Samedent (S / I)
+
+S <- 'S' EOS { no_ast_opt }
+I <- 'I' EOL Block / 'I' EOS { no_ast_opt }
+
+Block <- Statements {}
+
+~Samedent <- ' '* {}
+
+~EOS <- EOL / EOF
+~EOL <- '\n'
+~EOF <- !.
+  )");
+
+  EXPECT_TRUE(!!pg);
+
+  size_t indent = 0;
+
+  pg["Block"].enter = [&](const Context & /*c*/, const char * /*s*/,
+                          size_t /*n*/, std::any & /*dt*/) { indent += 2; };
+
+  pg["Block"].leave = [&](const Context & /*c*/, const char * /*s*/,
+                          size_t /*n*/, size_t /*matchlen*/,
+                          std::any & /*value*/,
+                          std::any & /*dt*/) { indent -= 2; };
+
+  pg["Samedent"].predicate = [&](const SemanticValues &vs,
+                                 const std::any & /*dt*/, std::string &msg) {
+    if (indent != vs.sv().size()) {
+      msg = "different indent...";
+      return false;
+    }
+    return true;
+  };
+
+  pg.enable_ast();
+
+  const auto source = R"(I
+  S
+  I
+    I
+      S
+      S
+    S
+  S
+)";
+
+  std::shared_ptr<Ast> ast;
+
+  EXPECT_TRUE(pg.parse(source, ast));
+
+  ast = pg.optimize_ast(ast);
+  auto s = ast_to_s(ast);
+
+  EXPECT_EQ(R"(+ Start/0[I]
+  + Block/0[Statements]
+    + Statement/0[S]
+    + Statement/0[I]
+      + Block/0[Statements]
+        + Statement/0[I]
+          + Block/0[Statements]
+            + Statement/0[S]
+            + Statement/0[S]
+        + Statement/0[S]
+    + Statement/0[S]
+)", s);
+}
+
+TEST(StateTest, NestedBlocks) {
+  parser pg(R"(
+program <- (~NL / expr)*
+
+~BLOCK_COMMENT  <- '/*' ('/'+[^*/]+ / BLOCK_COMMENT / '*'+[^*/]+ / [^*/] )* ('*/'^unterminated_comment)
+~LINE_COMMENT   <- '//' [^\n]*
+~NOISE          <- [ \f\r\t] / BLOCK_COMMENT
+
+NL              <- NOISE* LINE_COMMENT? '\n'
+
+# error recovery
+unterminated_comment <- '' { error_message "unterminated block comment" }
+
+expr <- 'hello'
+  )");
+
+  EXPECT_TRUE(!!pg);
+
+  std::vector<std::pair<size_t, size_t>> locations;
+
+  pg["BLOCK_COMMENT"].enter = [&](const Context &c, const char *s, size_t /*n*/,
+                                  std::any & /*dt*/) {
+    locations.push_back(c.line_info(s));
+  };
+
+  pg["BLOCK_COMMENT"].leave = [&](const Context & /*c*/, const char * /*s*/,
+                                  size_t /*n*/, size_t /*matchlen*/,
+                                  std::any & /*value*/,
+                                  std::any & /*dt*/) { locations.pop_back(); };
+
+  std::vector<std::string> errors{
+      R"(7:1: unterminated block comment)",
+  };
+
+  size_t i = 0;
+  pg.log = [&](size_t ln, size_t col, const std::string &msg,
+               const std::string & /*rule*/) {
+    std::stringstream ss;
+    ss << ln << ":" << col << ": " << msg;
+    EXPECT_EQ(errors[i++], ss.str());
+
+    EXPECT_EQ(4, locations.size());
+    EXPECT_EQ(1, locations[0].first);
+    EXPECT_EQ(1, locations[0].second);
+    EXPECT_EQ(2, locations[1].first);
+    EXPECT_EQ(2, locations[1].second);
+    EXPECT_EQ(3, locations[2].first);
+    EXPECT_EQ(3, locations[2].second);
+    EXPECT_EQ(4, locations[3].first);
+    EXPECT_EQ(4, locations[3].second);
+  };
+
+  EXPECT_FALSE(pg.parse(R"(/* line 1:1 is the first comment open
+ /* line 2:2 is the second
+  /* line 3:3 and so on
+   /* line 4:4
+    /* line 5:5
+*/
+)"));
+
+  EXPECT_EQ(i, errors.size());
+}
+