Added unexpected token place holder %t

2025-04-04 01:42:08 +00:00 · 2021-01-18 15:26:54 -05:00 · 2021-01-18 15:26:54 -05:00 · f560ceca97
commit f560ceca97
parent 8a853ef742
3 changed files with 38 additions and 16 deletions
--- a/README.md
+++ b/README.md
@ -27,6 +27,9 @@ The PEG syntax is well described on page 2 in the [document](http://www.brynosau
  * `|` (Dictionary operator)
  * `MACRO_NAME(` ... `)` (Parameterized rule or Macro)
  * `{ precedence L - + L / * }` (Parsing infix expression)
  * `%recovery(` ... `)` (Error recovery operator)
  * `exp^label` (Syntax sugar for `(exp / %recover(label))`)
  * `label { message "..." }` (Error message instruction)
 This library supports the linear-time parsing known as the [*Packrat*](http://pdos.csail.mit.edu/~baford/packrat/thesis/thesis.pdf) parsing.
@ -515,6 +518,8 @@ cpp-peglib supports the furthest failure error posision report as descrived in t
 For better error report and recovery, cpp-peglib supports 'recovery' operator with label which can be assosiated with a recovery expression and a custom error message. This idea comes from the fantastic ["Syntax Error Recovery in Parsing Expression Grammars"](https://arxiv.org/pdf/1806.11150.pdf) paper by Sergio Medeiros and Fabio Mascarenhas.
 The custom message supports `%t` which is a place holder for the unexpected token.
 Here is an example of Java-like grammar:
 ```peg
--- a/docs/native.wasm
+++ b/docs/native.wasm
--- a/peglib.h
+++ b/peglib.h
@ -634,22 +634,29 @@ struct ErrorInfo {
  void output_log(const Log &log, const char *s, size_t n) const {
    if (message_pos) {
      auto line = line_info(s, message_pos);
-      log(line.first, line.second, message);
+      std::string msg;
      if (auto unexpected_token = heuristic_error_token(log, s, n, message_pos);
          !unexpected_token.empty()) {
        msg = replace_all(message, "%t", unexpected_token);
      } else {
        msg = message;
      }
      log(line.first, line.second, msg);
    } else if (error_pos) {
      auto line = line_info(s, error_pos);
-      std::string message;
+      std::string msg;
      if (expected_tokens.empty()) {
-        message = "syntax error.";
+        msg = "syntax error.";
      } else {
-        message = "syntax error";
+        msg = "syntax error";
        // unexpected token
        if (auto unexpected_token = heuristic_error_token(log, s, n, error_pos);
            !unexpected_token.empty()) {
-          message += ", unexpected '";
+          msg += ", unexpected '";
-          message += unexpected_token;
+          msg += unexpected_token;
-          message += "'";
+          msg += "'";
        }
        auto first_item = true;
@ -660,25 +667,25 @@ struct ErrorInfo {
          // Skip rules start with '_'
          if (!is_literal && token[0] != '_') {
-            message += (first_item ? ", expecting " : ", ");
+            msg += (first_item ? ", expecting " : ", ");
            if (is_literal) {
-              message += "'";
+              msg += "'";
-              message += token;
+              msg += token;
-              message += "'";
+              msg += "'";
            } else {
-              message += "<";
+              msg += "<";
-              message += token;
+              msg += token;
-              message += ">";
+              msg += ">";
            }
            first_item = false;
          }
          i++;
        }
-        message += ".";
+        msg += ".";
      }
-      log(line.first, line.second, message);
+      log(line.first, line.second, msg);
    }
  }
@ -699,6 +706,16 @@ private:
    }
    return std::string();
  }
  std::string replace_all(std::string str, const std::string &from,
                          const std::string &to) const {
    size_t pos = 0;
    while ((pos = str.find(from, pos)) != std::string::npos) {
      str.replace(pos, from.length(), to);
      pos += to.length();
    }
    return str;
  }
 };
 /*