From 7ee4fccb95dc0f21ed99c5d67f9ec00d9f57a9b7 Mon Sep 17 00:00:00 2001 From: yhirose Date: Thu, 26 May 2022 20:22:15 -0400 Subject: [PATCH] Fix #202 --- peglib.h | 9 +-- test/test2.cc | 149 +++++++++++++++++++++++++++++++++----------------- 2 files changed, 105 insertions(+), 53 deletions(-) diff --git a/peglib.h b/peglib.h index e6712cc..ad013e9 100644 --- a/peglib.h +++ b/peglib.h @@ -1,7 +1,7 @@ // // peglib.h // -// Copyright (c) 2020 Yuji Hirose. All rights reserved. +// Copyright (c) 2022 Yuji Hirose. All rights reserved. // MIT License // @@ -719,11 +719,10 @@ struct ErrorInfo { auto first_item = true; size_t i = 0; while (i < expected_tokens.size()) { - auto [token, is_literal] = - expected_tokens[expected_tokens.size() - i - 1]; + auto [token, is_literal] = expected_tokens[i]; // Skip rules start with '_' - if (!is_literal && token[0] != '_') { + if (!is_literal || token[0] != '_') { msg += (first_item ? ", expecting " : ", "); if (is_literal) { msg += "'"; @@ -1123,6 +1122,7 @@ public: auto se = scope_exit([&]() { c.pop_capture_scope(); }); auto save_sv_size = vs.size(); auto save_tok_size = vs.tokens.size(); + auto save_error_info = c.error_info; const auto &rule = *ope_; auto len = rule.parse(s + i, n - i, vs, c, dt); if (success(len)) { @@ -1137,6 +1137,7 @@ public: vs.tokens.erase(vs.tokens.begin() + static_cast(save_tok_size)); } + c.error_info = save_error_info; break; } i += len; diff --git a/test/test2.cc b/test/test2.cc index ddac8d3..bedb092 100644 --- a/test/test2.cc +++ b/test/test2.cc @@ -461,7 +461,7 @@ TEST(PackratTest, Packrat_parser_test_with_whitespace) { EXPECT_TRUE(ret); } -TEST(PackratText, Packrat_parser_test_with_macro) { +TEST(PackratTest, Packrat_parser_test_with_macro) { parser parser(R"( EXPRESSION <- _ LIST(TERM, TERM_OPERATOR) TERM <- LIST(FACTOR, FACTOR_OPERATOR) @@ -480,7 +480,7 @@ TEST(PackratText, Packrat_parser_test_with_macro) { EXPECT_TRUE(ret); } -TEST(PackratText, Packrat_parser_test_with_precedence_expression_parser) { +TEST(PackratTest, Packrat_parser_test_with_precedence_expression_parser) { peg::parser parser(R"( Expression <- Atom (Operator Atom)* { precedence L + - L * / } Atom <- _? Number _? @@ -498,7 +498,7 @@ TEST(PackratText, Packrat_parser_test_with_precedence_expression_parser) { EXPECT_TRUE(ret); } -TEST(BackreferenceText, Backreference_test) { +TEST(BackreferenceTest, Backreference_test) { parser parser(R"( START <- _ LQUOTE < (!RQUOTE .)* > RQUOTE _ LQUOTE <- 'R"' $delm< [a-zA-Z]* > '(' @@ -550,7 +550,7 @@ TEST(BackreferenceText, Backreference_test) { } } -TEST(BackreferenceText, Invalid_backreference_test) { +TEST(BackreferenceTest, Invalid_backreference_test) { parser parser(R"( START <- _ LQUOTE (!RQUOTE .)* RQUOTE _ LQUOTE <- 'R"' $delm< [a-zA-Z]* > '(' @@ -564,7 +564,7 @@ TEST(BackreferenceText, Invalid_backreference_test) { std::runtime_error); } -TEST(BackreferenceText, Nested_capture_test) { +TEST(BackreferenceTest, Nested_capture_test) { parser parser(R"( ROOT <- CONTENT CONTENT <- (ELEMENT / TEXT)* @@ -582,7 +582,7 @@ TEST(BackreferenceText, Nested_capture_test) { EXPECT_FALSE(parser.parse("This is a test text.")); } -TEST(BackreferenceText, Backreference_with_Prioritized_Choice_test) { +TEST(BackreferenceTest, Backreference_with_Prioritized_Choice_test) { parser parser(R"( TREE <- WRONG_BRANCH / CORRECT_BRANCH WRONG_BRANCH <- BRANCH THAT IS_capture WRONG @@ -598,7 +598,7 @@ TEST(BackreferenceText, Backreference_with_Prioritized_Choice_test) { EXPECT_THROW(parser.parse("branchthatiscorrect"), std::runtime_error); } -TEST(BackreferenceText, Backreference_with_Zero_or_More_test) { +TEST(BackreferenceTest, Backreference_with_Zero_or_More_test) { parser parser(R"( TREE <- WRONG_BRANCH* CORRECT_BRANCH WRONG_BRANCH <- BRANCH THAT IS_capture WRONG @@ -622,7 +622,7 @@ TEST(BackreferenceText, Backreference_with_Zero_or_More_test) { std::runtime_error); } -TEST(BackreferenceText, Backreference_with_One_or_More_test) { +TEST(BackreferenceTest, Backreference_with_One_or_More_test) { parser parser(R"( TREE <- WRONG_BRANCH+ CORRECT_BRANCH WRONG_BRANCH <- BRANCH THAT IS_capture WRONG @@ -645,7 +645,7 @@ TEST(BackreferenceText, Backreference_with_One_or_More_test) { EXPECT_FALSE(parser.parse("branchthatiswron_branchthatiscorrect")); } -TEST(BackreferenceText, Backreference_with_Option_test) { +TEST(BackreferenceTest, Backreference_with_Option_test) { parser parser(R"( TREE <- WRONG_BRANCH? CORRECT_BRANCH WRONG_BRANCH <- BRANCH THAT IS_capture WRONG @@ -669,7 +669,7 @@ TEST(BackreferenceText, Backreference_with_Option_test) { std::runtime_error); } -TEST(RepetitionText, Repetition_0) { +TEST(RepetitionTest, Repetition_0) { parser parser(R"( START <- '(' DIGIT{3} ') ' DIGIT{3} '-' DIGIT{4} DIGIT <- [0-9] @@ -680,7 +680,7 @@ TEST(RepetitionText, Repetition_0) { EXPECT_FALSE(parser.parse("(123) 45-7a90")); } -TEST(RepetitionText, Repetition_2_4) { +TEST(RepetitionTest, Repetition_2_4) { parser parser(R"( START <- DIGIT{2,4} DIGIT <- [0-9] @@ -692,7 +692,7 @@ TEST(RepetitionText, Repetition_2_4) { EXPECT_FALSE(parser.parse("12345")); } -TEST(RepetitionText, Repetition_2_1) { +TEST(RepetitionTest, Repetition_2_1) { parser parser(R"( START <- DIGIT{2,1} # invalid range DIGIT <- [0-9] @@ -702,7 +702,7 @@ TEST(RepetitionText, Repetition_2_1) { EXPECT_FALSE(parser.parse("123")); } -TEST(RepetitionText, Repetition_2) { +TEST(RepetitionTest, Repetition_2) { parser parser(R"( START <- DIGIT{2,} DIGIT <- [0-9] @@ -713,7 +713,7 @@ TEST(RepetitionText, Repetition_2) { EXPECT_TRUE(parser.parse("1234")); } -TEST(RepetitionText, Repetition__2) { +TEST(RepetitionTest, Repetition__2) { parser parser(R"( START <- DIGIT{,2} DIGIT <- [0-9] @@ -786,7 +786,7 @@ TEST(UserRuleTest, User_defined_rule_test) { EXPECT_TRUE(g.parse(" Hello BNF! ")); } -TEST(PredicateText, Semantic_predicate_test) { +TEST(PredicateTest, Semantic_predicate_test) { parser parser("NUMBER <- [0-9]+"); parser["NUMBER"] = [](const SemanticValues &vs) { @@ -807,7 +807,7 @@ TEST(PredicateText, Semantic_predicate_test) { EXPECT_FALSE(parser.parse("200", val)); } -TEST(UnicodeText, Japanese_character) { +TEST(UnicodeTest, Japanese_character) { peg::parser parser(u8R"( 文 <- 修飾語? 主語 述語 '。' 主語 <- 名詞 助詞 @@ -825,17 +825,17 @@ TEST(UnicodeText, Japanese_character) { EXPECT_TRUE(parser.parse(u8R"(サーバーを復旧します。)")); } -TEST(UnicodeText, dot_with_a_code) { +TEST(UnicodeTest, dot_with_a_code) { peg::parser parser(" S <- 'a' . 'b' "); EXPECT_TRUE(parser.parse(u8R"(aあb)")); } -TEST(UnicodeText, dot_with_a_char) { +TEST(UnicodeTest, dot_with_a_char) { peg::parser parser(" S <- 'a' . 'b' "); EXPECT_TRUE(parser.parse(u8R"(aåb)")); } -TEST(UnicodeText, character_class) { +TEST(UnicodeTest, character_class) { peg::parser parser(R"( S <- 'a' [い-おAさC-Eた-とは] 'b' )"); @@ -862,14 +862,14 @@ TEST(UnicodeText, character_class) { } #if 0 // TODO: Unicode Grapheme support -TEST(UnicodeText, dot_with_a_grapheme) +TEST(UnicodeTest, dot_with_a_grapheme) { peg::parser parser(" S <- 'a' . 'b' "); EXPECT_TRUE(parser.parse(u8R"(aसिb)")); } #endif -TEST(MacroText, Macro_simple_test) { +TEST(MacroTest, Macro_simple_test) { parser parser(R"( S <- HELLO WORLD HELLO <- T('hello') @@ -880,7 +880,7 @@ TEST(MacroText, Macro_simple_test) { EXPECT_TRUE(parser.parse("hello \tworld ")); } -TEST(MacroText, Macro_two_parameters) { +TEST(MacroTest, Macro_two_parameters) { parser parser(R"( S <- HELLO_WORLD HELLO_WORLD <- T('hello', 'world') @@ -890,7 +890,7 @@ TEST(MacroText, Macro_two_parameters) { EXPECT_TRUE(parser.parse("hello \tworld ")); } -TEST(MacroText, Macro_syntax_error) { +TEST(MacroTest, Macro_syntax_error) { parser parser(R"( S <- T('hello') T (a) <- a [ \t]* @@ -900,7 +900,7 @@ TEST(MacroText, Macro_syntax_error) { EXPECT_FALSE(ret); } -TEST(MacroText, Macro_missing_argument) { +TEST(MacroTest, Macro_missing_argument) { parser parser(R"( S <- T ('hello') T(a, b) <- a [ \t]* b @@ -910,7 +910,7 @@ TEST(MacroText, Macro_missing_argument) { EXPECT_FALSE(ret); } -TEST(MacroText, Macro_reference_syntax_error) { +TEST(MacroTest, Macro_reference_syntax_error) { parser parser(R"( S <- T ('hello') T(a) <- a [ \t]* @@ -920,7 +920,7 @@ TEST(MacroText, Macro_reference_syntax_error) { EXPECT_FALSE(ret); } -TEST(MacroText, Macro_invalid_macro_reference_error) { +TEST(MacroTest, Macro_invalid_macro_reference_error) { parser parser(R"( S <- T('hello') T <- 'world' @@ -930,7 +930,7 @@ TEST(MacroText, Macro_invalid_macro_reference_error) { EXPECT_FALSE(ret); } -TEST(MacroText, Macro_calculator) { +TEST(MacroTest, Macro_calculator) { // Create a PEG parser parser parser(R"( # Grammar for simple calculator... @@ -984,7 +984,7 @@ TEST(MacroText, Macro_calculator) { EXPECT_EQ(-3, val); } -TEST(MacroText, Macro_expression_arguments) { +TEST(MacroTest, Macro_expression_arguments) { parser parser(R"( S <- M('hello' / 'Hello', 'world' / 'World') M(arg0, arg1) <- arg0 [ \t]+ arg1 @@ -993,7 +993,7 @@ TEST(MacroText, Macro_expression_arguments) { EXPECT_TRUE(parser.parse("Hello world")); } -TEST(MacroText, Macro_recursive) { +TEST(MacroTest, Macro_recursive) { parser parser(R"( S <- M('abc') M(s) <- !s / s ' ' M(s / '123') / s @@ -1005,7 +1005,7 @@ TEST(MacroText, Macro_recursive) { EXPECT_TRUE(parser.parse("abc 123 abc")); } -TEST(MacroText, Macro_recursive2) { +TEST(MacroTest, Macro_recursive2) { auto syntaxes = std::vector{ "S <- M('abc') M(s) <- !s / s ' ' M(s* '-' '123') / s", "S <- M('abc') M(s) <- !s / s ' ' M(s+ '-' '123') / s", @@ -1022,7 +1022,7 @@ TEST(MacroText, Macro_recursive2) { } } -TEST(MacroText, Macro_exclusive_modifiers) { +TEST(MacroTest, Macro_exclusive_modifiers) { parser parser(R"( S <- Modifiers(!"") _ Modifiers(Appeared) <- (!Appeared) ( @@ -1042,7 +1042,7 @@ TEST(MacroText, Macro_exclusive_modifiers) { EXPECT_FALSE(parser.parse("public static public")); } -TEST(MacroText, Macro_token_check_test) { +TEST(MacroTest, Macro_token_check_test) { parser parser(R"( # Grammar for simple calculator... EXPRESSION <- _ LIST(TERM, TERM_OPERATOR) @@ -1066,7 +1066,7 @@ TEST(MacroText, Macro_token_check_test) { EXPECT_TRUE(parser["T"].is_token()); } -TEST(MacroText, Macro_passes_an_arg_to_another_macro) { +TEST(MacroTest, Macro_passes_an_arg_to_another_macro) { parser parser(R"( A <- B(C) B(D) <- D @@ -1076,7 +1076,7 @@ TEST(MacroText, Macro_passes_an_arg_to_another_macro) { EXPECT_TRUE(parser.parse("c")); } -TEST(MacroText, Unreferenced_rule) { +TEST(MacroTest, Unreferenced_rule) { parser parser(R"( A <- B(C) B(D) <- D @@ -1088,7 +1088,7 @@ TEST(MacroText, Unreferenced_rule) { EXPECT_TRUE(ret); // This is OK, because it's a warning, not an erro... } -TEST(MacroText, Nested_macro_call) { +TEST(MacroTest, Nested_macro_call) { parser parser(R"( A <- B(T) B(X) <- C(X) @@ -1099,7 +1099,7 @@ TEST(MacroText, Nested_macro_call) { EXPECT_TRUE(parser.parse("val")); } -TEST(MacroText, Nested_macro_call2) { +TEST(MacroTest, Nested_macro_call2) { parser parser(R"( START <- A('TestVal1', 'TestVal2')+ A(Aarg1, Aarg2) <- B(Aarg1) '#End' @@ -1157,7 +1157,7 @@ TEST(LineInformationTest, Line_information_test) { } } -TEST(DicText, Dictionary) { +TEST(DicTest, Dictionary) { parser parser(R"( START <- 'This month is ' MONTH '.' MONTH <- 'Jan' | 'January' | 'Feb' | 'February' @@ -1169,7 +1169,7 @@ TEST(DicText, Dictionary) { EXPECT_FALSE(parser.parse("This month is .")); } -TEST(DicText, Dictionary_invalid) { +TEST(DicTest, Dictionary_invalid) { parser parser(R"( START <- 'This month is ' MONTH '.' MONTH <- 'Jan' | 'January' | [a-z]+ | 'Feb' | 'February' @@ -1179,7 +1179,57 @@ TEST(DicText, Dictionary_invalid) { EXPECT_FALSE(ret); } -TEST(ErrorText, Error_recovery_1) { +TEST(ErrorTest, Default_error_handling_1) { + parser pg(R"( + S <- '@' A B + A <- < [a-z]+ > + B <- 'hello' | 'world' + %whitespace <- [ ]* + %word <- [a-z] + )"); + + EXPECT_TRUE(!!pg); + + std::vector errors{ + R"(1:8: syntax error, unexpected 'typo', expecting .)", + }; + + size_t i = 0; + pg.log = [&](size_t ln, size_t col, const std::string &msg) { + std::stringstream ss; + ss << ln << ":" << col << ": " << msg; + EXPECT_EQ(errors[i++], ss.str()); + }; + + EXPECT_FALSE(pg.parse(" @ aaa typo ")); +} + +TEST(ErrorTest, Default_error_handling_2) { + parser pg(R"( + S <- '@' A B + A <- < [a-z]+ > + B <- 'hello' / 'world' + %whitespace <- ' '* + %word <- [a-z] + )"); + + EXPECT_TRUE(!!pg); + + std::vector errors{ + R"(1:8: syntax error, unexpected 'typo', expecting 'hello', 'world'.)", + }; + + size_t i = 0; + pg.log = [&](size_t ln, size_t col, const std::string &msg) { + std::stringstream ss; + ss << ln << ":" << col << ": " << msg; + EXPECT_EQ(errors[i++], ss.str()); + }; + + EXPECT_FALSE(pg.parse(" @ aaa typo ")); +} + +TEST(ErrorTest, Error_recovery_1) { parser pg(R"( START <- __? SECTION* @@ -1304,7 +1354,7 @@ rrr | sss )", ast_to_s(ast)); } -TEST(ErrorText, Error_recovery_2) { +TEST(ErrorTest, Error_recovery_2) { parser pg(R"( START <- ENTRY ((',' ENTRY) / %recover((!(',' / Space) .)+))* (_ / %recover(.*)) ENTRY <- '[' ITEM (',' ITEM)* ']' @@ -1312,20 +1362,20 @@ TEST(ErrorText, Error_recovery_2) { NUM <- [0-9]+ ![a-z] WORD <- '"' [a-z]+ '"' - ~_ <- Space+ + ~_ <- Space* Space <- [ \n] )"); EXPECT_TRUE(!!pg); std::vector errors{ - R"(1:6: syntax error, unexpected ']'.)", + R"(1:6: syntax error, unexpected ']', expecting ','.)", R"(1:18: syntax error, unexpected 'z', expecting .)", - R"(1:24: syntax error, unexpected ',', expecting .)", - R"(1:31: syntax error, unexpected 'ccc', expecting .)", - R"(1:38: syntax error, unexpected 'ddd', expecting .)", - R"(1:55: syntax error, unexpected ']', expecting .)", - R"(1:58: syntax error, unexpected '\n', expecting .)", + R"(1:24: syntax error, unexpected ',', expecting '"'.)", + R"(1:31: syntax error, unexpected 'ccc', expecting '"', .)", + R"(1:38: syntax error, unexpected 'ddd', expecting '"', .)", + R"(1:55: syntax error, unexpected ']', expecting '"'.)", + R"(1:58: syntax error, unexpected '\n', expecting '"', .)", R"(2:3: syntax error.)", }; @@ -1363,7 +1413,8 @@ TEST(ErrorText, Error_recovery_2) { ast_to_s(ast)); } -TEST(ErrorText, Error_recovery_3) { + +TEST(ErrorTest, Error_recovery_3) { parser pg(R"~( # Grammar START <- __? SECTION* @@ -1574,7 +1625,7 @@ sss | ttt )", ast_to_s(ast)); } -TEST(ErrorText, Error_recovery_Java) { +TEST(ErrorTest, Error_recovery_Java) { parser pg(R"( Prog ← PUBLIC CLASS NAME LCUR PUBLIC STATIC VOID MAIN LPAR STRING LBRA RBRA NAME RPAR BlockStmt RCUR BlockStmt ← LCUR (Stmt)* RCUR^rcblk