Added unexpected token place holder %t

This commit is contained in:
yhirose 2021-01-18 15:26:54 -05:00
parent 8a853ef742
commit f560ceca97
3 changed files with 38 additions and 16 deletions

View File

@ -27,6 +27,9 @@ The PEG syntax is well described on page 2 in the [document](http://www.brynosau
* `|` (Dictionary operator)
* `MACRO_NAME(` ... `)` (Parameterized rule or Macro)
* `{ precedence L - + L / * }` (Parsing infix expression)
* `%recovery(` ... `)` (Error recovery operator)
* `exp^label` (Syntax sugar for `(exp / %recover(label))`)
* `label { message "..." }` (Error message instruction)
This library supports the linear-time parsing known as the [*Packrat*](http://pdos.csail.mit.edu/~baford/packrat/thesis/thesis.pdf) parsing.
@ -515,6 +518,8 @@ cpp-peglib supports the furthest failure error posision report as descrived in t
For better error report and recovery, cpp-peglib supports 'recovery' operator with label which can be assosiated with a recovery expression and a custom error message. This idea comes from the fantastic ["Syntax Error Recovery in Parsing Expression Grammars"](https://arxiv.org/pdf/1806.11150.pdf) paper by Sergio Medeiros and Fabio Mascarenhas.
The custom message supports `%t` which is a place holder for the unexpected token.
Here is an example of Java-like grammar:
```peg

Binary file not shown.

View File

@ -634,22 +634,29 @@ struct ErrorInfo {
void output_log(const Log &log, const char *s, size_t n) const {
if (message_pos) {
auto line = line_info(s, message_pos);
log(line.first, line.second, message);
std::string msg;
if (auto unexpected_token = heuristic_error_token(log, s, n, message_pos);
!unexpected_token.empty()) {
msg = replace_all(message, "%t", unexpected_token);
} else {
msg = message;
}
log(line.first, line.second, msg);
} else if (error_pos) {
auto line = line_info(s, error_pos);
std::string message;
std::string msg;
if (expected_tokens.empty()) {
message = "syntax error.";
msg = "syntax error.";
} else {
message = "syntax error";
msg = "syntax error";
// unexpected token
if (auto unexpected_token = heuristic_error_token(log, s, n, error_pos);
!unexpected_token.empty()) {
message += ", unexpected '";
message += unexpected_token;
message += "'";
msg += ", unexpected '";
msg += unexpected_token;
msg += "'";
}
auto first_item = true;
@ -660,25 +667,25 @@ struct ErrorInfo {
// Skip rules start with '_'
if (!is_literal && token[0] != '_') {
message += (first_item ? ", expecting " : ", ");
msg += (first_item ? ", expecting " : ", ");
if (is_literal) {
message += "'";
message += token;
message += "'";
msg += "'";
msg += token;
msg += "'";
} else {
message += "<";
message += token;
message += ">";
msg += "<";
msg += token;
msg += ">";
}
first_item = false;
}
i++;
}
message += ".";
msg += ".";
}
log(line.first, line.second, message);
log(line.first, line.second, msg);
}
}
@ -699,6 +706,16 @@ private:
}
return std::string();
}
std::string replace_all(std::string str, const std::string &from,
const std::string &to) const {
size_t pos = 0;
while ((pos = str.find(from, pos)) != std::string::npos) {
str.replace(pos, from.length(), to);
pos += to.length();
}
return str;
}
};
/*