Added automatic EOI check

This commit is contained in:
yhirose 2022-06-08 11:10:59 -04:00
parent 0e8406ebeb
commit 6d92d503cc
6 changed files with 41 additions and 31 deletions

View File

@ -32,6 +32,8 @@ The PEG syntax is well described on page 2 in the [document](http://www.brynosau
* `exp⇑label` or `exp^label` (Syntax sugar for `(exp / %recover(label))`) * `exp⇑label` or `exp^label` (Syntax sugar for `(exp / %recover(label))`)
* `label { message "..." }` (Error message instruction) * `label { message "..." }` (Error message instruction)
'End of Input' check will be done as default. In order to disable the check, please call `disable_eoi_check`.
This library supports the linear-time parsing known as the [*Packrat*](http://pdos.csail.mit.edu/~baford/packrat/thesis/thesis.pdf) parsing. This library supports the linear-time parsing known as the [*Packrat*](http://pdos.csail.mit.edu/~baford/packrat/thesis/thesis.pdf) parsing.
IMPORTANT NOTE for some Linux distributions such as Ubuntu and CentOS: Need `-pthread` option when linking. See [#23](https://github.com/yhirose/cpp-peglib/issues/23#issuecomment-261126127), [#46](https://github.com/yhirose/cpp-peglib/issues/46#issuecomment-417870473) and [#62](https://github.com/yhirose/cpp-peglib/issues/62#issuecomment-492032680). IMPORTANT NOTE for some Linux distributions such as Ubuntu and CentOS: Need `-pthread` option when linking. See [#23](https://github.com/yhirose/cpp-peglib/issues/23#issuecomment-261126127), [#46](https://github.com/yhirose/cpp-peglib/issues/46#issuecomment-417870473) and [#62](https://github.com/yhirose/cpp-peglib/issues/62#issuecomment-492032680).

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -2358,6 +2358,8 @@ public:
std::string error_message; std::string error_message;
bool no_ast_opt = false; bool no_ast_opt = false;
bool eoi_check = true;
private: private:
friend class Reference; friend class Reference;
friend class ParserGenerator; friend class ParserGenerator;
@ -2409,12 +2411,14 @@ private:
auto ret = success(len); auto ret = success(len);
if (ret) { if (ret) {
i += len; i += len;
if (i < n) { if (eoi_check) {
if (c.error_info.error_pos - c.s < s + i - c.s) { if (i < n) {
c.error_info.message_pos = s + i; if (c.error_info.error_pos - c.s < s + i - c.s) {
c.error_info.message = "expected end of input"; c.error_info.message_pos = s + i;
c.error_info.message = "expected end of input";
}
ret = false;
} }
ret = false;
} }
} }
return Result{ret, c.recovered, i, c.error_info}; return Result{ret, c.recovered, i, c.error_info};
@ -4410,6 +4414,13 @@ public:
return rules; return rules;
} }
void disable_eoi_check() {
if (grammar_ != nullptr) {
auto &rule = (*grammar_)[start_];
rule.eoi_check = false;
}
}
void enable_packrat_parsing() { void enable_packrat_parsing() {
if (grammar_ != nullptr) { if (grammar_ != nullptr) {
auto &rule = (*grammar_)[start_]; auto &rule = (*grammar_)[start_];

View File

@ -977,6 +977,17 @@ TEST(GeneralTest, ParentReferencesShouldNotBeExpired) {
} }
TEST(GeneralTest, EndOfInputTest) { TEST(GeneralTest, EndOfInputTest) {
auto parser = peg::parser(R"(
S <- '[[' (!']]' .)* ']]' !.
)");
parser.disable_eoi_check();
auto ret = parser.parse("[[]]]");
EXPECT_FALSE(ret);
}
TEST(GeneralTest, DefaultEndOfInputTest) {
auto parser = peg::parser(R"( auto parser = peg::parser(R"(
S <- '[[' (!']]' .)* ']]' S <- '[[' (!']]' .)* ']]'
)"); )");
@ -987,10 +998,12 @@ TEST(GeneralTest, EndOfInputTest) {
TEST(GeneralTest, DisableEndOfInputCheckTest) { TEST(GeneralTest, DisableEndOfInputCheckTest) {
auto parser = peg::parser(R"( auto parser = peg::parser(R"(
S <- '[[' (!']]' .)* ']]' !. S <- '[[' (!']]' .)* ']]'
)"); )");
parser.disable_eoi_check();
auto ret = parser.parse("[[]]]"); auto ret = parser.parse("[[]]]");
EXPECT_FALSE(ret); EXPECT_TRUE(ret);
} }

View File

@ -1228,7 +1228,7 @@ TEST(DicTest, Dictionary_invalid) {
TEST(ErrorTest, Default_error_handling_1) { TEST(ErrorTest, Default_error_handling_1) {
parser pg(R"( parser pg(R"(
S <- '@' A B !. S <- '@' A B
A <- < [a-z]+ > A <- < [a-z]+ >
B <- 'hello' | 'world' B <- 'hello' | 'world'
%whitespace <- [ ]* %whitespace <- [ ]*
@ -1253,7 +1253,7 @@ TEST(ErrorTest, Default_error_handling_1) {
TEST(ErrorTest, Default_error_handling_2) { TEST(ErrorTest, Default_error_handling_2) {
parser pg(R"( parser pg(R"(
S <- '@' A B !. S <- '@' A B
A <- < [a-z]+ > A <- < [a-z]+ >
B <- 'hello' / 'world' B <- 'hello' / 'world'
%whitespace <- ' '* %whitespace <- ' '*
@ -1279,7 +1279,7 @@ TEST(ErrorTest, Default_error_handling_2) {
TEST(ErrorTest, Default_error_handling_fiblang) { TEST(ErrorTest, Default_error_handling_fiblang) {
parser pg(R"( parser pg(R"(
# Syntax # Syntax
START STATEMENTS !. START STATEMENTS
STATEMENTS (DEFINITION / EXPRESSION)* STATEMENTS (DEFINITION / EXPRESSION)*
DEFINITION 'def' Identifier '(' Identifier ')' EXPRESSION DEFINITION 'def' Identifier '(' Identifier ')' EXPRESSION
EXPRESSION TERNARY EXPRESSION TERNARY
@ -1326,7 +1326,7 @@ for n frm 1 to 30
TEST(ErrorTest, Error_recovery_1) { TEST(ErrorTest, Error_recovery_1) {
parser pg(R"( parser pg(R"(
START <- __? SECTION* !. START <- __? SECTION*
SECTION <- HEADER __ ENTRIES __? SECTION <- HEADER __ ENTRIES __?
@ -1490,29 +1490,13 @@ TEST(ErrorTest, Error_recovery_2) {
)", )",
ast)); ast));
ast = pg.optimize_ast(ast); EXPECT_FALSE(ast);
EXPECT_EQ(R"(+ START
- ENTRY/0[NUM] (000)
- ENTRY/0[NUM] (111)
+ ENTRY
+ ITEM/2
+ ITEM/2
- ITEM/0[WORD] ("bbb")
+ ITEM/2
+ ENTRY
+ ITEM/2
- ITEM/1[NUM] (444)
- ITEM/1[NUM] (555)
+ ITEM/2
)",
ast_to_s(ast));
} }
TEST(ErrorTest, Error_recovery_3) { TEST(ErrorTest, Error_recovery_3) {
parser pg(R"~( parser pg(R"~(
# Grammar # Grammar
START <- __? SECTION* !. START <- __? SECTION*
SECTION <- HEADER __ ENTRIES __? SECTION <- HEADER __ ENTRIES __?
@ -1723,7 +1707,7 @@ sss | ttt
TEST(ErrorTest, Error_recovery_Java) { TEST(ErrorTest, Error_recovery_Java) {
parser pg(R"( parser pg(R"(
Prog PUBLIC CLASS NAME LCUR PUBLIC STATIC VOID MAIN LPAR STRING LBRA RBRA NAME RPAR BlockStmt RCUR !. Prog PUBLIC CLASS NAME LCUR PUBLIC STATIC VOID MAIN LPAR STRING LBRA RBRA NAME RPAR BlockStmt RCUR
BlockStmt LCUR (Stmt)* RCUR^rcblk BlockStmt LCUR (Stmt)* RCUR^rcblk
Stmt IfStmt / WhileStmt / PrintStmt / DecStmt / AssignStmt / BlockStmt Stmt IfStmt / WhileStmt / PrintStmt / DecStmt / AssignStmt / BlockStmt
IfStmt IF LPAR Exp RPAR Stmt (ELSE Stmt)? IfStmt IF LPAR Exp RPAR Stmt (ELSE Stmt)?