diff --git a/README.md b/README.md index 0e6cef5..6687c18 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,9 @@ The PEG syntax is well described on page 2 in the [document](http://www.brynosau * `|` (Dictionary operator) * `MACRO_NAME(` ... `)` (Parameterized rule or Macro) * `{ precedence L - + L / * }` (Parsing infix expression) + * `%recovery(` ... `)` (Error recovery operator) + * `exp^label` (Syntax sugar for `(exp / %recover(label))`) + * `label { message "..." }` (Error message instruction) This library supports the linear-time parsing known as the [*Packrat*](http://pdos.csail.mit.edu/~baford/packrat/thesis/thesis.pdf) parsing. @@ -515,6 +518,8 @@ cpp-peglib supports the furthest failure error posision report as descrived in t For better error report and recovery, cpp-peglib supports 'recovery' operator with label which can be assosiated with a recovery expression and a custom error message. This idea comes from the fantastic ["Syntax Error Recovery in Parsing Expression Grammars"](https://arxiv.org/pdf/1806.11150.pdf) paper by Sergio Medeiros and Fabio Mascarenhas. +The custom message supports `%t` which is a place holder for the unexpected token. + Here is an example of Java-like grammar: ```peg diff --git a/docs/native.wasm b/docs/native.wasm index c7d4b33..da74b80 100644 Binary files a/docs/native.wasm and b/docs/native.wasm differ diff --git a/peglib.h b/peglib.h index 2b2ee05..9a4ce33 100644 --- a/peglib.h +++ b/peglib.h @@ -634,22 +634,29 @@ struct ErrorInfo { void output_log(const Log &log, const char *s, size_t n) const { if (message_pos) { auto line = line_info(s, message_pos); - log(line.first, line.second, message); + std::string msg; + if (auto unexpected_token = heuristic_error_token(log, s, n, message_pos); + !unexpected_token.empty()) { + msg = replace_all(message, "%t", unexpected_token); + } else { + msg = message; + } + log(line.first, line.second, msg); } else if (error_pos) { auto line = line_info(s, error_pos); - std::string message; + std::string msg; if (expected_tokens.empty()) { - message = "syntax error."; + msg = "syntax error."; } else { - message = "syntax error"; + msg = "syntax error"; // unexpected token if (auto unexpected_token = heuristic_error_token(log, s, n, error_pos); !unexpected_token.empty()) { - message += ", unexpected '"; - message += unexpected_token; - message += "'"; + msg += ", unexpected '"; + msg += unexpected_token; + msg += "'"; } auto first_item = true; @@ -660,25 +667,25 @@ struct ErrorInfo { // Skip rules start with '_' if (!is_literal && token[0] != '_') { - message += (first_item ? ", expecting " : ", "); + msg += (first_item ? ", expecting " : ", "); if (is_literal) { - message += "'"; - message += token; - message += "'"; + msg += "'"; + msg += token; + msg += "'"; } else { - message += "<"; - message += token; - message += ">"; + msg += "<"; + msg += token; + msg += ">"; } first_item = false; } i++; } - message += "."; + msg += "."; } - log(line.first, line.second, message); + log(line.first, line.second, msg); } } @@ -699,6 +706,16 @@ private: } return std::string(); } + + std::string replace_all(std::string str, const std::string &from, + const std::string &to) const { + size_t pos = 0; + while ((pos = str.find(from, pos)) != std::string::npos) { + str.replace(pos, from.length(), to); + pos += to.length(); + } + return str; + } }; /*