1
0
mirror of https://github.com/yhirose/cpp-peglib.git synced 2025-01-10 17:45:30 +00:00
cpp-peglib/test/test2.cc

1838 lines
45 KiB
C++
Raw Normal View History

2021-08-19 06:29:11 +00:00
#include <gtest/gtest.h>
2020-02-07 18:28:01 +00:00
#include <peglib.h>
2021-01-13 15:11:06 +00:00
#include <sstream>
2020-02-07 18:28:01 +00:00
using namespace peg;
2021-08-19 06:29:11 +00:00
TEST(TokenBoundaryTest, Token_boundary_1) {
parser pg(R"(
2020-11-09 17:06:48 +00:00
ROOT <- TOP
TOP <- 'a' 'b' 'c'
%whitespace <- [ \t\r\n]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(pg.parse(" a b c "));
2020-11-09 17:06:48 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(TokenBoundaryTest, Token_boundary_2) {
parser pg(R"(
2020-11-09 17:06:48 +00:00
ROOT <- TOP
TOP <- < 'a' 'b' 'c' >
%whitespace <- [ \t\r\n]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg.parse(" a b c "));
2020-11-09 17:06:48 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(TokenBoundaryTest, Token_boundary_3) {
parser pg(R"(
2020-11-09 17:06:48 +00:00
ROOT <- TOP
TOP <- < 'a' B 'c' >
B <- 'b'
%whitespace <- [ \t\r\n]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg.parse(" a b c "));
2020-11-09 17:06:48 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(TokenBoundaryTest, Token_boundary_4) {
parser pg(R"(
2020-11-09 17:06:48 +00:00
ROOT <- TOP
TOP <- < A 'b' 'c' >
A <- 'a'
%whitespace <- [ \t\r\n]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg.parse(" a b c "));
2020-11-09 17:06:48 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(TokenBoundaryTest, Token_boundary_5) {
parser pg(R"(
2020-11-09 17:06:48 +00:00
ROOT <- TOP
TOP <- A < 'b' C >
A <- 'a'
C <- 'c'
%whitespace <- [ \t\r\n]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg.parse(" a b c "));
2020-11-09 17:06:48 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(TokenBoundaryTest, Token_boundary_6) {
parser pg(R"(
2020-11-09 17:06:48 +00:00
ROOT <- TOP
TOP <- < A > B C
A <- 'a'
B <- 'b'
C <- 'c'
%whitespace <- [ \t\r\n]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(pg.parse(" a b c "));
2020-11-09 17:06:48 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(TokenBoundaryTest, Token_boundary_7) {
parser pg(R"(
2020-11-09 17:06:48 +00:00
ROOT <- TOP
TOP <- < A B C >
A <- 'a'
B <- 'b'
C <- 'c'
%whitespace <- [ \t\r\n]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg.parse(" a b c "));
2020-11-09 17:06:48 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(InfiniteLoopTest, Infinite_loop_1) {
parser pg(R"(
2020-02-07 18:28:01 +00:00
ROOT <- WH TOKEN* WH
TOKEN <- [a-z0-9]*
WH <- [ \t]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg);
2020-02-07 18:28:01 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(InfiniteLoopTest, Infinite_loop_2) {
2020-10-02 01:26:04 +00:00
parser pg(R"(
2020-02-07 18:28:01 +00:00
ROOT <- WH TOKEN+ WH
TOKEN <- [a-z0-9]*
WH <- [ \t]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg);
2020-02-07 18:28:01 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(InfiniteLoopTest, Infinite_loop_3) {
2020-10-02 01:26:04 +00:00
parser pg(R"(
2020-02-07 18:28:01 +00:00
ROOT <- WH TOKEN* WH
TOKEN <- !'word1'
WH <- [ \t]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg);
2020-02-07 18:28:01 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(InfiniteLoopTest, Infinite_loop_4) {
2020-10-02 01:26:04 +00:00
parser pg(R"(
2020-02-07 18:28:01 +00:00
ROOT <- WH TOKEN* WH
TOKEN <- &'word1'
WH <- [ \t]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg);
2020-02-07 18:28:01 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(InfiniteLoopTest, Infinite_loop_5) {
2020-10-02 01:26:04 +00:00
parser pg(R"(
2020-02-07 18:28:01 +00:00
Numbers <- Number*
Number <- [0-9]+ / Spacing
Spacing <- ' ' / '\t' / '\n' / EOF # EOF is empty
EOF <- !.
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg);
2020-02-07 18:28:01 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(InfiniteLoopTest, Infinite_loop_6) {
2020-10-02 01:26:04 +00:00
parser pg(R"(
2020-06-01 23:47:32 +00:00
S <- ''*
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg);
2020-06-01 23:47:32 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(InfiniteLoopTest, Infinite_loop_7) {
2020-10-02 01:26:04 +00:00
parser pg(R"(
2020-06-01 23:47:32 +00:00
S <- A*
A <- ''
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg);
2020-06-01 23:47:32 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(InfiniteLoopTest, Infinite_loop_8) {
parser pg(R"(
ROOT <- ('A' /)*
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg);
}
2021-08-19 06:29:11 +00:00
TEST(InfiniteLoopTest, Infinite_loop_9) {
parser pg(R"(
2021-03-31 00:59:17 +00:00
ROOT <- %recover(('A' /)*)
)");
2021-01-18 23:06:54 +00:00
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg);
2021-01-18 23:06:54 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(InfiniteLoopTest, Not_infinite_1) {
2020-10-02 01:26:04 +00:00
parser pg(R"(
2020-02-07 18:28:01 +00:00
Numbers <- Number* EOF
Number <- [0-9]+ / Spacing
Spacing <- ' ' / '\t' / '\n'
EOF <- !.
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(!!pg);
2020-02-07 18:28:01 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(InfiniteLoopTest, Not_infinite_2) {
2020-10-02 01:26:04 +00:00
parser pg(R"(
2020-02-07 18:28:01 +00:00
ROOT <- _ ('[' TAG_NAME ']' _)*
# In a sequence operator, if there is at least one non-empty element, we can treat it as non-empty
TAG_NAME <- (!']' .)+
_ <- [ \t]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(!!pg);
2020-02-07 18:28:01 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(InfiniteLoopTest, Not_infinite_3) {
2020-10-02 01:26:04 +00:00
parser pg(R"(
2020-02-07 18:28:01 +00:00
EXPRESSION <- _ TERM (TERM_OPERATOR TERM)*
TERM <- FACTOR (FACTOR_OPERATOR FACTOR)*
FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ # Recursive...
TERM_OPERATOR <- < [-+] > _
FACTOR_OPERATOR <- < [/*] > _
NUMBER <- < [0-9]+ > _
_ <- [ \t\r\n]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(!!pg);
2020-02-07 18:28:01 +00:00
}
2022-05-27 01:37:08 +00:00
TEST(InfiniteLoopTest, whitespace) {
parser pg(R"(
S <- 'hello'
%whitespace <- ('')*
)");
EXPECT_FALSE(pg);
}
TEST(InfiniteLoopTest, word) {
parser pg(R"(
S <- 'hello'
%whitespace <- ' '*
%word <- ('')*
)");
EXPECT_FALSE(pg);
}
2021-08-19 06:29:11 +00:00
TEST(PrecedenceTest, Precedence_climbing) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
START <- _ EXPRESSION
EXPRESSION <- ATOM (OPERATOR ATOM)* {
precedence
L + -
L * /
}
ATOM <- NUMBER / T('(') EXPRESSION T(')')
OPERATOR <- T([-+/*])
NUMBER <- T('-'? [0-9]+)
~_ <- [ \t]*
T(S) <- < S > _
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(!!parser);
2021-01-13 15:11:06 +00:00
2020-10-02 01:26:04 +00:00
parser.enable_packrat_parsing();
2020-02-07 18:28:01 +00:00
2020-10-02 01:26:04 +00:00
// Setup actions
parser["EXPRESSION"] = [](const SemanticValues &vs) -> long {
auto result = std::any_cast<long>(vs[0]);
if (vs.size() > 1) {
auto ope = std::any_cast<char>(vs[1]);
auto num = std::any_cast<long>(vs[2]);
switch (ope) {
case '+': result += num; break;
case '-': result -= num; break;
case '*': result *= num; break;
case '/': result /= num; break;
}
2020-02-07 18:28:01 +00:00
}
2020-10-02 01:26:04 +00:00
return result;
};
parser["OPERATOR"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
2021-08-19 06:29:11 +00:00
parser["NUMBER"] = [](const SemanticValues &vs) {
return vs.token_to_number<long>();
};
2020-10-02 01:26:04 +00:00
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
2020-02-07 18:28:01 +00:00
2020-10-02 01:26:04 +00:00
{
auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 ";
long val = 0;
ret = parser.parse(expr, val);
2020-02-07 18:28:01 +00:00
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
EXPECT_EQ(-3, val);
2020-10-02 01:26:04 +00:00
}
{
auto expr = "-1+-2--3"; // -1 + -2 - -3 = 0
long val = 0;
ret = parser.parse(expr, val);
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
EXPECT_EQ(0, val);
2020-10-02 01:26:04 +00:00
}
2020-02-07 18:28:01 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(PrecedenceTest, Precedence_climbing_with_literal_operator) {
parser parser(R"(
START <- _ EXPRESSION
EXPRESSION <- ATOM (OPERATOR ATOM)* {
precedence
L '#plus#' - # weaker
L '#multiply#' / # stronger
}
ATOM <- NUMBER / T('(') EXPRESSION T(')')
OPERATOR <- T('#plus#' / '#multiply#' / [-/])
NUMBER <- T('-'? [0-9]+)
~_ <- [ \t]*
T(S) <- < S > _
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(!!parser);
parser.enable_packrat_parsing();
// Setup actions
parser["EXPRESSION"] = [](const SemanticValues &vs) -> long {
auto result = std::any_cast<long>(vs[0]);
if (vs.size() > 1) {
auto ope = std::any_cast<std::string>(vs[1]);
auto num = std::any_cast<long>(vs[2]);
if (ope == "#plus#") {
result += num;
} else if (ope == "-") {
result -= num;
} else if (ope == "#multiply#") {
result *= num;
} else if (ope == "/") {
result /= num;
}
}
return result;
};
2021-08-19 06:29:11 +00:00
parser["OPERATOR"] = [](const SemanticValues &vs) {
return vs.token_to_string();
};
parser["NUMBER"] = [](const SemanticValues &vs) {
return vs.token_to_number<long>();
};
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
{
2021-08-19 06:29:11 +00:00
auto expr =
" 1 #plus# 2 #multiply# 3 #multiply# (4 - 5 #plus# 6) / 7 - 8 ";
long val = 0;
ret = parser.parse(expr, val);
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
EXPECT_EQ(-3, val);
}
{
auto expr = "-1#plus#-2--3"; // -1 + -2 - -3 = 0
long val = 0;
ret = parser.parse(expr, val);
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
EXPECT_EQ(0, val);
}
}
2021-08-19 06:29:11 +00:00
TEST(PrecedenceTest, Precedence_climbing_with_macro) {
2020-10-02 01:26:04 +00:00
// Create a PEG parser
parser parser(R"(
2020-02-08 02:52:54 +00:00
EXPRESSION <- INFIX_EXPRESSION(ATOM, OPERATOR)
INFIX_EXPRESSION(A, O) <- A (O A)* {
precedence
L + -
L * /
}
ATOM <- NUMBER / '(' EXPRESSION ')'
OPERATOR <- < [-+/*] >
NUMBER <- < '-'? [0-9]+ >
%whitespace <- [ \t]*
2020-02-07 20:50:06 +00:00
)");
2020-10-02 01:26:04 +00:00
parser.enable_packrat_parsing();
2020-10-02 01:26:04 +00:00
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
2020-02-07 20:50:06 +00:00
2020-10-02 01:26:04 +00:00
// Setup actions
parser["INFIX_EXPRESSION"] = [](const SemanticValues &vs) -> long {
auto result = std::any_cast<long>(vs[0]);
if (vs.size() > 1) {
auto ope = std::any_cast<char>(vs[1]);
auto num = std::any_cast<long>(vs[2]);
switch (ope) {
case '+': result += num; break;
case '-': result -= num; break;
case '*': result *= num; break;
case '/': result /= num; break;
}
2020-02-07 20:50:06 +00:00
}
2020-10-02 01:26:04 +00:00
return result;
};
parser["OPERATOR"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
2021-08-19 06:29:11 +00:00
parser["NUMBER"] = [](const SemanticValues &vs) {
return vs.token_to_number<long>();
};
2020-02-07 20:50:06 +00:00
2020-10-02 01:26:04 +00:00
{
auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 ";
long val = 0;
ret = parser.parse(expr, val);
2020-02-07 20:50:06 +00:00
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
EXPECT_EQ(-3, val);
2020-10-02 01:26:04 +00:00
}
{
auto expr = "-1+-2--3"; // -1 + -2 - -3 = 0
long val = 0;
ret = parser.parse(expr, val);
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
EXPECT_EQ(0, val);
2020-10-02 01:26:04 +00:00
}
2020-02-07 20:50:06 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(PrecedenceTest, Precedence_climbing_error1) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 20:50:06 +00:00
START <- _ EXPRESSION
EXPRESSION <- ATOM (OPERATOR ATOM1)* {
precedence
L + -
L * /
}
ATOM <- NUMBER / T('(') EXPRESSION T(')')
ATOM1 <- NUMBER / T('(') EXPRESSION T(')')
OPERATOR <- T([-+/*])
NUMBER <- T('-'? [0-9]+)
~_ <- [ \t]*
T(S) <- < S > _
)");
2020-10-02 01:26:04 +00:00
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(ret);
2020-02-07 20:50:06 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(PrecedenceTest, Precedence_climbing_error2) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 20:50:06 +00:00
START <- _ EXPRESSION
EXPRESSION <- ATOM OPERATOR ATOM {
precedence
L + -
L * /
}
ATOM <- NUMBER / T('(') EXPRESSION T(')')
OPERATOR <- T([-+/*])
NUMBER <- T('-'? [0-9]+)
~_ <- [ \t]*
T(S) <- < S > _
)");
2020-10-02 01:26:04 +00:00
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(ret);
2020-02-07 20:50:06 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(PrecedenceTest, Precedence_climbing_error3) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 20:50:06 +00:00
EXPRESSION <- PRECEDENCE_PARSING(ATOM, OPERATOR)
PRECEDENCE_PARSING(A, O) <- A (O A)+ {
precedence
2020-02-08 02:52:54 +00:00
L + -
2020-02-07 20:50:06 +00:00
L * /
}
ATOM <- NUMBER / '(' EXPRESSION ')'
OPERATOR <- < [-+/*] >
NUMBER <- < '-'? [0-9]+ >
%whitespace <- [ \t]*
)");
2020-10-02 01:26:04 +00:00
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(ret);
2020-02-07 20:50:06 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(PackratTest, Packrat_parser_test_with_whitespace) {
2020-10-02 01:26:04 +00:00
peg::parser parser(R"(
2020-02-07 18:28:01 +00:00
ROOT <- 'a'
%whitespace <- SPACE*
SPACE <- ' '
)");
2020-10-02 01:26:04 +00:00
parser.enable_packrat_parsing();
2020-02-07 18:28:01 +00:00
2020-10-02 01:26:04 +00:00
auto ret = parser.parse("a");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(PackratTest, Packrat_parser_test_with_macro) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
EXPRESSION <- _ LIST(TERM, TERM_OPERATOR)
TERM <- LIST(FACTOR, FACTOR_OPERATOR)
FACTOR <- NUMBER / T('(') EXPRESSION T(')')
TERM_OPERATOR <- T([-+])
FACTOR_OPERATOR <- T([/*])
NUMBER <- T([0-9]+)
~_ <- [ \t]*
LIST(I, D) <- I (D I)*
T(S) <- < S > _
)");
2020-10-02 01:26:04 +00:00
parser.enable_packrat_parsing();
2020-02-07 18:28:01 +00:00
2020-10-02 01:26:04 +00:00
auto ret = parser.parse(" 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 ");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(PackratTest, Packrat_parser_test_with_precedence_expression_parser) {
2020-06-09 02:54:35 +00:00
peg::parser parser(R"(
Expression <- Atom (Operator Atom)* { precedence L + - L * / }
Atom <- _? Number _?
Number <- [0-9]+
Operator <- '+' / '-' / '*' / '/'
_ <- ' '+
)");
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
2020-06-09 02:54:35 +00:00
parser.enable_packrat_parsing();
ret = parser.parse(" 1 + 2 * 3 ");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
2020-06-09 02:54:35 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(BackreferenceTest, Backreference_test) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
START <- _ LQUOTE < (!RQUOTE .)* > RQUOTE _
LQUOTE <- 'R"' $delm< [a-zA-Z]* > '('
RQUOTE <- ')' $delm '"'
~_ <- [ \t\r\n]*
)");
2020-10-02 01:26:04 +00:00
std::string token;
parser["START"] = [&](const SemanticValues &vs) { token = vs.token(); };
2020-02-07 18:28:01 +00:00
2020-10-02 01:26:04 +00:00
{
token.clear();
auto ret = parser.parse(R"delm(
2020-02-07 18:28:01 +00:00
R"("hello world")"
)delm");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
EXPECT_EQ("\"hello world\"", token);
2020-10-02 01:26:04 +00:00
}
2020-02-07 18:28:01 +00:00
2020-10-02 01:26:04 +00:00
{
token.clear();
auto ret = parser.parse(R"delm(
2020-02-07 18:28:01 +00:00
R"foo("(hello world)")foo"
)delm");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
EXPECT_EQ("\"(hello world)\"", token);
2020-10-02 01:26:04 +00:00
}
2020-02-07 18:28:01 +00:00
2020-10-02 01:26:04 +00:00
{
token.clear();
auto ret = parser.parse(R"delm(
2020-02-07 18:28:01 +00:00
R"foo("(hello world)foo")foo"
)delm");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(ret);
EXPECT_EQ("\"(hello world", token);
2020-10-02 01:26:04 +00:00
}
2020-02-07 18:28:01 +00:00
2020-10-02 01:26:04 +00:00
{
token.clear();
auto ret = parser.parse(R"delm(
2020-02-07 18:28:01 +00:00
R"foo("(hello world)")bar"
)delm");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(ret);
EXPECT_TRUE(token.empty());
2020-10-02 01:26:04 +00:00
}
2020-02-07 18:28:01 +00:00
}
2022-06-02 19:42:03 +00:00
TEST(BackreferenceTest, Undefined_backreference_test) {
parser parser("S <- $bref");
EXPECT_FALSE(parser);
}
2022-05-27 00:22:15 +00:00
TEST(BackreferenceTest, Invalid_backreference_test) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
START <- _ LQUOTE (!RQUOTE .)* RQUOTE _
LQUOTE <- 'R"' $delm< [a-zA-Z]* > '('
RQUOTE <- ')' $delm2 '"'
~_ <- [ \t\r\n]*
)");
2022-06-02 19:42:03 +00:00
EXPECT_FALSE(parser);
EXPECT_FALSE(parser.parse(R"delm(
2020-02-07 18:28:01 +00:00
R"foo("(hello world)")foo"
2022-06-02 19:42:03 +00:00
)delm"));
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(BackreferenceTest, Nested_capture_test) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
ROOT <- CONTENT
CONTENT <- (ELEMENT / TEXT)*
ELEMENT <- $(STAG CONTENT ETAG)
STAG <- '<' $tag< TAG_NAME > '>'
ETAG <- '</' $tag '>'
TAG_NAME <- 'b' / 'u'
TEXT <- TEXT_DATA
TEXT_DATA <- ![<] .
)");
2022-06-02 19:42:03 +00:00
EXPECT_TRUE(!!parser);
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("This is <b>a <u>test</u> text</b>."));
EXPECT_FALSE(parser.parse("This is <b>a <u>test</b> text</u>."));
EXPECT_FALSE(parser.parse("This is <b>a <u>test text</b>."));
EXPECT_FALSE(parser.parse("This is a <u>test</u> text</b>."));
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(BackreferenceTest, Backreference_with_Prioritized_Choice_test) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
TREE <- WRONG_BRANCH / CORRECT_BRANCH
WRONG_BRANCH <- BRANCH THAT IS_capture WRONG
CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT
BRANCH <- 'branch'
THAT <- 'that'
IS_capture <- $ref<..>
IS_backref <- $ref
WRONG <- 'wrong'
CORRECT <- 'correct'
)");
2022-06-02 19:42:03 +00:00
EXPECT_TRUE(!!parser);
EXPECT_FALSE(parser.parse("branchthatiscorrect"));
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(BackreferenceTest, Backreference_with_Zero_or_More_test) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
TREE <- WRONG_BRANCH* CORRECT_BRANCH
WRONG_BRANCH <- BRANCH THAT IS_capture WRONG
CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT
BRANCH <- 'branch'
THAT <- 'that'
IS_capture <- $ref<..>
IS_backref <- $ref
WRONG <- 'wrong'
CORRECT <- 'correct'
)");
2022-06-02 19:42:03 +00:00
EXPECT_TRUE(!!parser);
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("branchthatiswrongbranchthatiscorrect"));
EXPECT_FALSE(parser.parse("branchthatiswrongbranchthatIscorrect"));
EXPECT_FALSE(
parser.parse("branchthatiswrongbranchthatIswrongbranchthatiscorrect"));
EXPECT_TRUE(
2020-10-02 01:26:04 +00:00
parser.parse("branchthatiswrongbranchthatIswrongbranchthatIscorrect"));
2022-06-02 19:42:03 +00:00
EXPECT_FALSE(parser.parse("branchthatiscorrect"));
EXPECT_FALSE(parser.parse("branchthatiswron_branchthatiscorrect"));
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(BackreferenceTest, Backreference_with_One_or_More_test) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
TREE <- WRONG_BRANCH+ CORRECT_BRANCH
WRONG_BRANCH <- BRANCH THAT IS_capture WRONG
CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT
BRANCH <- 'branch'
THAT <- 'that'
IS_capture <- $ref<..>
IS_backref <- $ref
WRONG <- 'wrong'
CORRECT <- 'correct'
)");
2022-06-02 19:42:03 +00:00
EXPECT_TRUE(!!parser);
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("branchthatiswrongbranchthatiscorrect"));
EXPECT_FALSE(parser.parse("branchthatiswrongbranchthatIscorrect"));
EXPECT_FALSE(
parser.parse("branchthatiswrongbranchthatIswrongbranchthatiscorrect"));
EXPECT_TRUE(
2020-10-02 01:26:04 +00:00
parser.parse("branchthatiswrongbranchthatIswrongbranchthatIscorrect"));
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(parser.parse("branchthatiscorrect"));
EXPECT_FALSE(parser.parse("branchthatiswron_branchthatiscorrect"));
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(BackreferenceTest, Backreference_with_Option_test) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
TREE <- WRONG_BRANCH? CORRECT_BRANCH
WRONG_BRANCH <- BRANCH THAT IS_capture WRONG
CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT
BRANCH <- 'branch'
THAT <- 'that'
IS_capture <- $ref<..>
IS_backref <- $ref
WRONG <- 'wrong'
CORRECT <- 'correct'
)");
2022-06-02 19:42:03 +00:00
EXPECT_TRUE(!!parser);
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("branchthatiswrongbranchthatiscorrect"));
EXPECT_FALSE(parser.parse("branchthatiswrongbranchthatIscorrect"));
EXPECT_FALSE(
parser.parse("branchthatiswrongbranchthatIswrongbranchthatiscorrect"));
EXPECT_FALSE(
parser.parse("branchthatiswrongbranchthatIswrongbranchthatIscorrect"));
2022-06-02 19:42:03 +00:00
EXPECT_FALSE(parser.parse("branchthatiscorrect"));
EXPECT_FALSE(parser.parse("branchthatiswron_branchthatiscorrect"));
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(RepetitionTest, Repetition_0) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-03-28 22:38:01 +00:00
START <- '(' DIGIT{3} ') ' DIGIT{3} '-' DIGIT{4}
DIGIT <- [0-9]
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("(123) 456-7890"));
EXPECT_FALSE(parser.parse("(12a) 456-7890"));
EXPECT_FALSE(parser.parse("(123) 45-7890"));
EXPECT_FALSE(parser.parse("(123) 45-7a90"));
2020-03-28 22:38:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(RepetitionTest, Repetition_2_4) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-03-28 22:38:01 +00:00
START <- DIGIT{2,4}
DIGIT <- [0-9]
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(parser.parse("1"));
EXPECT_TRUE(parser.parse("12"));
EXPECT_TRUE(parser.parse("123"));
EXPECT_TRUE(parser.parse("1234"));
EXPECT_FALSE(parser.parse("12345"));
2020-03-28 22:38:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(RepetitionTest, Repetition_2_1) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-03-28 22:38:01 +00:00
START <- DIGIT{2,1} # invalid range
DIGIT <- [0-9]
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(parser.parse("1"));
EXPECT_TRUE(parser.parse("12"));
EXPECT_FALSE(parser.parse("123"));
2020-03-28 22:38:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(RepetitionTest, Repetition_2) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-03-28 22:38:01 +00:00
START <- DIGIT{2,}
DIGIT <- [0-9]
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(parser.parse("1"));
EXPECT_TRUE(parser.parse("12"));
EXPECT_TRUE(parser.parse("123"));
EXPECT_TRUE(parser.parse("1234"));
2020-03-28 22:38:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(RepetitionTest, Repetition__2) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-03-28 22:38:01 +00:00
START <- DIGIT{,2}
DIGIT <- [0-9]
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("1"));
EXPECT_TRUE(parser.parse("12"));
EXPECT_FALSE(parser.parse("123"));
EXPECT_FALSE(parser.parse("1234"));
2020-03-28 22:38:01 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(LeftRecursiveTest, Left_recursive_test) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
A <- A 'a'
B <- A 'a'
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(parser);
2020-02-07 18:28:01 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(LeftRecursiveTest, Left_recursive_with_option_test) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
A <- 'a' / 'b'? B 'c'
B <- A
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(parser);
2020-02-07 18:28:01 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(LeftRecursiveTest, Left_recursive_with_zom_test) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
A <- 'a'* A*
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(parser);
2020-02-07 18:28:01 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(LeftRecursiveTest, Left_recursive_with_a_ZOM_content_rule) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
A <- B
B <- _ A
_ <- ' '* # Zero or more
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(parser);
2020-02-07 18:28:01 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(LeftRecursiveTest, Left_recursive_with_empty_string_test) {
2020-10-02 01:26:04 +00:00
parser parser(" A <- '' A");
2020-02-07 18:28:01 +00:00
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(parser);
2020-02-07 18:28:01 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(UserRuleTest, User_defined_rule_test) {
2020-10-02 01:26:04 +00:00
auto g = parser(R"(
2020-02-07 18:28:01 +00:00
ROOT <- _ 'Hello' _ NAME '!' _
)",
2020-10-02 01:26:04 +00:00
{{"NAME", usr([](const char *s, size_t n, SemanticValues &,
std::any &) -> size_t {
static std::vector<std::string> names = {"PEG", "BNF"};
for (const auto &name : names) {
if (name.size() <= n &&
!name.compare(0, name.size(), s, name.size())) {
return name.size();
}
}
return static_cast<size_t>(-1);
})},
{"~_", zom(cls(" \t\r\n"))}});
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(g.parse(" Hello BNF! "));
2020-10-02 01:26:04 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(PredicateTest, Semantic_predicate_test) {
2020-10-02 01:26:04 +00:00
parser parser("NUMBER <- [0-9]+");
parser["NUMBER"] = [](const SemanticValues &vs) {
auto val = vs.token_to_number<long>();
if (val != 100) { throw parse_error("value error!!"); }
return val;
};
long val;
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("100", val));
EXPECT_EQ(100, val);
2020-10-02 01:26:04 +00:00
parser.log = [](size_t line, size_t col, const std::string &msg) {
2021-08-19 06:29:11 +00:00
EXPECT_EQ(1, line);
EXPECT_EQ(1, col);
EXPECT_EQ("value error!!", msg);
2020-10-02 01:26:04 +00:00
};
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(parser.parse("200", val));
2020-10-02 01:26:04 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(UnicodeTest, Japanese_character) {
2020-10-02 01:26:04 +00:00
peg::parser parser(u8R"(
2020-02-07 18:28:01 +00:00
<- ? ''
<-
<-
<-
<- '' / ''
<- '' / ''
<- '' / ''
<- '' / '' / '' / '' / ''
)");
2020-10-02 01:26:04 +00:00
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
2020-02-07 18:28:01 +00:00
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse(u8R"(サーバーを復旧します。)"));
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(UnicodeTest, dot_with_a_code) {
2020-10-02 01:26:04 +00:00
peg::parser parser(" S <- 'a' . 'b' ");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse(u8R"(aあb)"));
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(UnicodeTest, dot_with_a_char) {
2020-10-02 01:26:04 +00:00
peg::parser parser(" S <- 'a' . 'b' ");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse(u8R"(aåb)"));
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(UnicodeTest, character_class) {
2020-10-02 01:26:04 +00:00
peg::parser parser(R"(
2020-02-07 18:28:01 +00:00
S <- 'a' [-AさC-Eた-] 'b'
)");
2020-10-02 01:26:04 +00:00
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
EXPECT_FALSE(parser.parse(u8R"(aあb)"));
EXPECT_TRUE(parser.parse(u8R"(aいb)"));
EXPECT_TRUE(parser.parse(u8R"(aうb)"));
EXPECT_TRUE(parser.parse(u8R"(aおb)"));
EXPECT_FALSE(parser.parse(u8R"(aかb)"));
EXPECT_TRUE(parser.parse(u8R"(aAb)"));
EXPECT_FALSE(parser.parse(u8R"(aBb)"));
EXPECT_TRUE(parser.parse(u8R"(aEb)"));
EXPECT_FALSE(parser.parse(u8R"(aFb)"));
EXPECT_FALSE(parser.parse(u8R"(aそb)"));
EXPECT_TRUE(parser.parse(u8R"(aたb)"));
EXPECT_TRUE(parser.parse(u8R"(aちb)"));
EXPECT_TRUE(parser.parse(u8R"(aとb)"));
EXPECT_FALSE(parser.parse(u8R"(aなb)"));
EXPECT_TRUE(parser.parse(u8R"(aはb)"));
EXPECT_FALSE(parser.parse(u8R"(a?b)"));
2020-02-07 18:28:01 +00:00
}
#if 0 // TODO: Unicode Grapheme support
2022-05-27 00:22:15 +00:00
TEST(UnicodeTest, dot_with_a_grapheme)
2020-02-07 18:28:01 +00:00
{
peg::parser parser(" S <- 'a' . 'b' ");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse(u8R"(aसिb)"));
2020-02-07 18:28:01 +00:00
}
#endif
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Macro_simple_test) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
S <- HELLO WORLD
HELLO <- T('hello')
WORLD <- T('world')
T(a) <- a [ \t]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("hello \tworld "));
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Macro_two_parameters) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
S <- HELLO_WORLD
HELLO_WORLD <- T('hello', 'world')
T(a, b) <- a [ \t]* b [ \t]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("hello \tworld "));
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Macro_syntax_error) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
S <- T('hello')
T (a) <- a [ \t]*
)");
2020-10-02 01:26:04 +00:00
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(ret);
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Macro_missing_argument) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
S <- T ('hello')
T(a, b) <- a [ \t]* b
)");
2020-10-02 01:26:04 +00:00
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(ret);
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Macro_reference_syntax_error) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
S <- T ('hello')
T(a) <- a [ \t]*
)");
2020-10-02 01:26:04 +00:00
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(ret);
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Macro_invalid_macro_reference_error) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
S <- T('hello')
T <- 'world'
)");
2020-10-02 01:26:04 +00:00
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(ret);
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Macro_calculator) {
2020-10-02 01:26:04 +00:00
// Create a PEG parser
parser parser(R"(
2020-02-07 18:28:01 +00:00
# Grammar for simple calculator...
EXPRESSION <- _ LIST(TERM, TERM_OPERATOR)
TERM <- LIST(FACTOR, FACTOR_OPERATOR)
FACTOR <- NUMBER / T('(') EXPRESSION T(')')
TERM_OPERATOR <- T([-+])
FACTOR_OPERATOR <- T([/*])
NUMBER <- T([0-9]+)
~_ <- [ \t]*
LIST(I, D) <- I (D I)*
T(S) <- < S > _
)");
2020-10-02 01:26:04 +00:00
// Setup actions
auto reduce = [](const SemanticValues &vs) {
auto result = std::any_cast<long>(vs[0]);
for (auto i = 1u; i < vs.size(); i += 2) {
auto num = std::any_cast<long>(vs[i + 1]);
auto ope = std::any_cast<char>(vs[i]);
switch (ope) {
case '+': result += num; break;
case '-': result -= num; break;
case '*': result *= num; break;
case '/': result /= num; break;
}
}
return result;
};
parser["EXPRESSION"] = reduce;
parser["TERM"] = reduce;
parser["TERM_OPERATOR"] = [](const SemanticValues &vs) {
return static_cast<char>(*vs.sv().data());
};
parser["FACTOR_OPERATOR"] = [](const SemanticValues &vs) {
return static_cast<char>(*vs.sv().data());
};
2021-08-19 06:29:11 +00:00
parser["NUMBER"] = [](const SemanticValues &vs) {
return vs.token_to_number<long>();
};
2020-02-07 18:28:01 +00:00
2020-10-02 01:26:04 +00:00
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
2020-02-07 18:28:01 +00:00
2020-10-02 01:26:04 +00:00
auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 ";
long val = 0;
ret = parser.parse(expr, val);
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
EXPECT_EQ(-3, val);
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Macro_expression_arguments) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
S <- M('hello' / 'Hello', 'world' / 'World')
M(arg0, arg1) <- arg0 [ \t]+ arg1
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("Hello world"));
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Macro_recursive) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
S <- M('abc')
M(s) <- !s / s ' ' M(s / '123') / s
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse(""));
EXPECT_TRUE(parser.parse("abc"));
EXPECT_TRUE(parser.parse("abc abc"));
EXPECT_TRUE(parser.parse("abc 123 abc"));
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Macro_recursive2) {
2020-10-02 01:26:04 +00:00
auto syntaxes = std::vector<const char *>{
"S <- M('abc') M(s) <- !s / s ' ' M(s* '-' '123') / s",
"S <- M('abc') M(s) <- !s / s ' ' M(s+ '-' '123') / s",
"S <- M('abc') M(s) <- !s / s ' ' M(s? '-' '123') / s",
"S <- M('abc') M(s) <- !s / s ' ' M(&s s+ '-' '123') / s",
"S <- M('abc') M(s) <- !s / s ' ' M(s '-' !s '123') / s",
"S <- M('abc') M(s) <- !s / s ' ' M(< s > '-' '123') / s",
"S <- M('abc') M(s) <- !s / s ' ' M(~s '-' '123') / s",
};
for (const auto &syntax : syntaxes) {
parser parser(syntax);
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("abc abc-123"));
2020-10-02 01:26:04 +00:00
}
}
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Macro_exclusive_modifiers) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
S <- Modifiers(!"") _
Modifiers(Appeared) <- (!Appeared) (
Token('public') Modifiers(Appeared / 'public') /
Token('static') Modifiers(Appeared / 'static') /
Token('final') Modifiers(Appeared / 'final') /
"")
Token(t) <- t _
_ <- [ \t\r\n]*
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("public"));
EXPECT_TRUE(parser.parse("static"));
EXPECT_TRUE(parser.parse("final"));
EXPECT_TRUE(parser.parse("public static final"));
EXPECT_FALSE(parser.parse("public public"));
EXPECT_FALSE(parser.parse("public static public"));
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Macro_token_check_test) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
# Grammar for simple calculator...
EXPRESSION <- _ LIST(TERM, TERM_OPERATOR)
TERM <- LIST(FACTOR, FACTOR_OPERATOR)
FACTOR <- NUMBER / T('(') EXPRESSION T(')')
TERM_OPERATOR <- T([-+])
FACTOR_OPERATOR <- T([/*])
NUMBER <- T([0-9]+)
~_ <- [ \t]*
LIST(I, D) <- I (D I)*
T(S) <- < S > _
)");
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(parser["EXPRESSION"].is_token());
EXPECT_FALSE(parser["TERM"].is_token());
EXPECT_FALSE(parser["FACTOR"].is_token());
EXPECT_TRUE(parser["FACTOR_OPERATOR"].is_token());
EXPECT_TRUE(parser["NUMBER"].is_token());
EXPECT_TRUE(parser["_"].is_token());
EXPECT_FALSE(parser["LIST"].is_token());
EXPECT_TRUE(parser["T"].is_token());
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Macro_passes_an_arg_to_another_macro) {
2020-06-01 03:46:46 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
A <- B(C)
B(D) <- D
C <- 'c'
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("c"));
2020-06-01 03:46:46 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Unreferenced_rule) {
2021-03-02 13:14:22 +00:00
parser parser(R"(
A <- B(C)
B(D) <- D
C <- 'c'
D <- 'd'
)");
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret); // This is OK, because it's a warning, not an erro...
2021-03-02 13:14:22 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Nested_macro_call) {
2020-06-01 03:46:46 +00:00
parser parser(R"(
A <- B(T)
B(X) <- C(X)
C(Y) <- Y
T <- 'val'
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("val"));
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(MacroTest, Nested_macro_call2) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-06-01 03:46:46 +00:00
START <- A('TestVal1', 'TestVal2')+
A(Aarg1, Aarg2) <- B(Aarg1) '#End'
B(Barg1) <- '#' Barg1
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("#TestVal1#End"));
2020-06-01 03:46:46 +00:00
}
2021-08-19 06:29:11 +00:00
TEST(LineInformationTest, Line_information_test) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-07 18:28:01 +00:00
S <- _ (WORD _)+
WORD <- [A-Za-z]+
~_ <- [ \t\r\n]+
)");
2020-10-02 01:26:04 +00:00
std::vector<std::pair<size_t, size_t>> locations;
parser["WORD"] = [&](const peg::SemanticValues &vs) {
locations.push_back(vs.line_info());
};
2020-02-07 18:28:01 +00:00
2020-10-02 01:26:04 +00:00
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
2020-02-07 18:28:01 +00:00
2020-10-02 01:26:04 +00:00
ret = parser.parse(" Mon Tue Wed \nThu Fri Sat\nSun\n");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(ret);
2020-02-07 18:28:01 +00:00
2021-08-19 06:29:11 +00:00
{
auto val = std::make_pair<size_t, size_t>(1, 2);
EXPECT_TRUE(val == locations[0]);
}
{
auto val = std::make_pair<size_t, size_t>(1, 6);
EXPECT_TRUE(val == locations[1]);
}
{
auto val = std::make_pair<size_t, size_t>(1, 10);
EXPECT_TRUE(val == locations[2]);
}
{
auto val = std::make_pair<size_t, size_t>(2, 1);
EXPECT_TRUE(val == locations[3]);
}
{
auto val = std::make_pair<size_t, size_t>(2, 6);
EXPECT_TRUE(val == locations[4]);
}
{
auto val = std::make_pair<size_t, size_t>(2, 11);
EXPECT_TRUE(val == locations[5]);
}
{
auto val = std::make_pair<size_t, size_t>(3, 1);
EXPECT_TRUE(val == locations[6]);
}
2020-02-07 18:28:01 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(DicTest, Dictionary) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-11 21:50:26 +00:00
START <- 'This month is ' MONTH '.'
MONTH <- 'Jan' | 'January' | 'Feb' | 'February'
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(parser.parse("This month is Jan."));
EXPECT_TRUE(parser.parse("This month is January."));
EXPECT_FALSE(parser.parse("This month is Jannuary."));
EXPECT_FALSE(parser.parse("This month is ."));
2020-02-11 21:50:26 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(DicTest, Dictionary_invalid) {
2020-10-02 01:26:04 +00:00
parser parser(R"(
2020-02-11 21:50:26 +00:00
START <- 'This month is ' MONTH '.'
MONTH <- 'Jan' | 'January' | [a-z]+ | 'Feb' | 'February'
)");
2020-10-02 01:26:04 +00:00
bool ret = parser;
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(ret);
2020-02-11 21:50:26 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(ErrorTest, Default_error_handling_1) {
parser pg(R"(
S <- '@' A B
A <- < [a-z]+ >
B <- 'hello' | 'world'
%whitespace <- [ ]*
%word <- [a-z]
)");
EXPECT_TRUE(!!pg);
std::vector<std::string> errors{
R"(1:8: syntax error, unexpected 'typo', expecting <B>.)",
};
size_t i = 0;
pg.log = [&](size_t ln, size_t col, const std::string &msg) {
std::stringstream ss;
ss << ln << ":" << col << ": " << msg;
EXPECT_EQ(errors[i++], ss.str());
};
EXPECT_FALSE(pg.parse(" @ aaa typo "));
}
TEST(ErrorTest, Default_error_handling_2) {
parser pg(R"(
S <- '@' A B
A <- < [a-z]+ >
B <- 'hello' / 'world'
%whitespace <- ' '*
%word <- [a-z]
)");
EXPECT_TRUE(!!pg);
std::vector<std::string> errors{
R"(1:8: syntax error, unexpected 'typo', expecting 'hello', 'world'.)",
};
size_t i = 0;
pg.log = [&](size_t ln, size_t col, const std::string &msg) {
std::stringstream ss;
ss << ln << ":" << col << ": " << msg;
EXPECT_EQ(errors[i++], ss.str());
};
EXPECT_FALSE(pg.parse(" @ aaa typo "));
}
2022-05-28 01:48:12 +00:00
TEST(ErrorTest, Default_error_handling_fiblang) {
parser pg(R"(
# Syntax
START STATEMENTS
STATEMENTS (DEFINITION / EXPRESSION)*
DEFINITION 'def' Identifier '(' Identifier ')' EXPRESSION
EXPRESSION TERNARY
TERNARY CONDITION ('?' EXPRESSION (':' / %recover(col)) EXPRESSION)?
CONDITION INFIX (ConditionOperator INFIX)?
INFIX CALL (InfixOperator CALL)*
CALL PRIMARY ('(' EXPRESSION ')')?
PRIMARY FOR / Identifier / '(' EXPRESSION ')' / Number
FOR 'for' Identifier 'from' Number 'to' Number EXPRESSION
# Token
ConditionOperator '<'
InfixOperator '+' / '-'
Identifier !Keyword < [a-zA-Z][a-zA-Z0-9_]* >
Number < [0-9]+ >
Keyword 'def' / 'for' / 'from' / 'to'
%whitespace [ \t\r\n]*
%word [a-zA-Z]
col '' { message "missing colon." }
)");
EXPECT_TRUE(!!pg);
std::vector<std::string> errors{
2022-05-28 01:50:27 +00:00
R"(4:7: syntax error, unexpected 'frm', expecting 'from'.)",
2022-05-28 01:48:12 +00:00
};
size_t i = 0;
pg.log = [&](size_t ln, size_t col, const std::string &msg) {
std::stringstream ss;
ss << ln << ":" << col << ": " << msg;
EXPECT_EQ(errors[i++], ss.str());
};
EXPECT_FALSE(pg.parse(R"(def fib(x)
x < 2 ? 1 : fib(x - 2) + fib(x - 1)
for n frm 1 to 30
puts(fib(n))
)"));
}
2022-05-27 00:22:15 +00:00
TEST(ErrorTest, Error_recovery_1) {
2021-01-13 15:11:06 +00:00
parser pg(R"(
2021-01-18 23:06:54 +00:00
START <- __? SECTION*
SECTION <- HEADER __ ENTRIES __?
2021-01-13 15:11:06 +00:00
2021-01-18 23:06:54 +00:00
HEADER <- '[' _ CATEGORY (':' _ ATTRIBUTES)? ']'^header
2021-01-13 15:11:06 +00:00
2021-01-18 23:06:54 +00:00
CATEGORY <- < [-_a-zA-Z0-9\u0080-\uFFFF ]+ > _
ATTRIBUTES <- ATTRIBUTE (',' _ ATTRIBUTE)*
ATTRIBUTE <- < [-_a-zA-Z0-9\u0080-\uFFFF]+ > _
2021-01-13 15:11:06 +00:00
2021-01-22 01:56:05 +00:00
ENTRIES <- (ENTRY (__ ENTRY)*)? { no_ast_opt }
2021-01-13 15:11:06 +00:00
2021-01-18 23:06:54 +00:00
ENTRY <- ONE_WAY PHRASE ('|' _ PHRASE)* !'='
/ PHRASE ('|' _ PHRASE)+ !'='
/ %recover(entry)
2021-01-13 15:11:06 +00:00
2021-01-18 23:06:54 +00:00
ONE_WAY <- PHRASE '=' _
PHRASE <- WORD (' ' WORD)* _
WORD <- < (![ \t\r\n=|[\]#] .)+ >
2021-01-13 15:11:06 +00:00
2021-01-18 23:06:54 +00:00
~__ <- _ (comment? nl _)+
~_ <- [ \t]*
2021-01-13 15:11:06 +00:00
2021-01-18 23:06:54 +00:00
comment <- ('#' (!nl .)*)
nl <- '\r'? '\n'
2021-01-13 15:11:06 +00:00
2021-01-18 23:06:54 +00:00
header <- (!__ .)* { message "invalid section header, missing ']'." }
2021-02-04 15:45:18 +00:00
entry <- (!(__ / HEADER) .)+ { message "invalid entry." }
2021-01-13 15:11:06 +00:00
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(!!pg);
2021-01-13 15:11:06 +00:00
std::vector<std::string> errors{
2021-08-19 06:29:11 +00:00
R"(3:1: invalid entry.)",
R"(7:1: invalid entry.)",
R"(10:11: invalid section header, missing ']'.)",
R"(18:1: invalid entry.)",
2021-01-13 15:11:06 +00:00
};
size_t i = 0;
pg.log = [&](size_t ln, size_t col, const std::string &msg) {
std::stringstream ss;
ss << ln << ":" << col << ": " << msg;
2021-08-19 06:29:11 +00:00
EXPECT_EQ(errors[i++], ss.str());
2021-01-13 15:11:06 +00:00
};
pg.enable_ast();
std::shared_ptr<Ast> ast;
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg.parse(R"([Section 1]
2021-01-15 21:51:36 +00:00
111 = 222 | 333
2021-01-13 15:11:06 +00:00
aaa || bbb
ccc = ddd
2021-01-15 21:51:36 +00:00
[Section 2]
2021-01-13 15:11:06 +00:00
eee
2021-01-15 21:51:36 +00:00
fff | ggg
[Section 3
hhh | iii
2021-01-13 15:11:06 +00:00
2021-01-15 21:51:36 +00:00
[Section ]
ppp | qqq
2021-01-13 15:11:06 +00:00
2021-01-15 21:51:36 +00:00
[Section 4]
jjj | kkk
lll = mmm | nnn = ooo
2021-01-13 15:11:06 +00:00
2021-01-15 21:51:36 +00:00
[Section 5]
rrr | sss
2021-01-13 15:11:06 +00:00
2021-08-19 06:29:11 +00:00
)",
ast));
2021-01-13 15:11:06 +00:00
2021-01-22 01:56:05 +00:00
ast = pg.optimize_ast(ast);
2021-01-13 15:11:06 +00:00
2021-08-19 06:29:11 +00:00
EXPECT_EQ(R"(+ START
2021-01-13 15:11:06 +00:00
+ SECTION
2021-01-15 21:51:36 +00:00
- HEADER/0[CATEGORY] (Section 1)
2021-01-13 15:11:06 +00:00
+ ENTRIES
2021-01-15 21:51:36 +00:00
+ ENTRY/0
- ONE_WAY/0[WORD] (111)
- PHRASE/0[WORD] (222)
- PHRASE/0[WORD] (333)
2021-01-13 15:11:06 +00:00
+ ENTRY/2
+ ENTRY/0
- ONE_WAY/0[WORD] (ccc)
- PHRASE/0[WORD] (ddd)
+ SECTION
2021-01-15 21:51:36 +00:00
- HEADER/0[CATEGORY] (Section 2)
2021-01-13 15:11:06 +00:00
+ ENTRIES
+ ENTRY/2
+ ENTRY/1
2021-01-15 21:51:36 +00:00
- PHRASE/0[WORD] (fff)
- PHRASE/0[WORD] (ggg)
2021-01-18 23:06:54 +00:00
+ SECTION
- HEADER/0[CATEGORY] (Section 3)
+ ENTRIES
2021-01-15 21:51:36 +00:00
+ ENTRY/1
- PHRASE/0[WORD] (hhh)
2021-01-13 15:11:06 +00:00
- PHRASE/0[WORD] (iii)
2021-01-15 21:51:36 +00:00
+ SECTION
- HEADER/0[CATEGORY] (Section )
+ ENTRIES
+ ENTRY/1
- PHRASE/0[WORD] (ppp)
- PHRASE/0[WORD] (qqq)
+ SECTION
- HEADER/0[CATEGORY] (Section 4)
+ ENTRIES
+ ENTRY/1
- PHRASE/0[WORD] (jjj)
- PHRASE/0[WORD] (kkk)
+ ENTRY/2
+ SECTION
- HEADER/0[CATEGORY] (Section 5)
+ ENTRIES
+ ENTRY/1
- PHRASE/0[WORD] (rrr)
- PHRASE/0[WORD] (sss)
2022-05-28 01:50:27 +00:00
)",
ast_to_s(ast));
2021-01-13 15:11:06 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(ErrorTest, Error_recovery_2) {
2021-01-13 15:11:06 +00:00
parser pg(R"(
2021-01-26 03:46:28 +00:00
START <- ENTRY ((',' ENTRY) / %recover((!(',' / Space) .)+))* (_ / %recover(.*))
2021-01-13 15:11:06 +00:00
ENTRY <- '[' ITEM (',' ITEM)* ']'
2021-01-15 21:51:36 +00:00
ITEM <- WORD / NUM / %recover((!(',' / ']') .)+)
2021-01-13 15:11:06 +00:00
NUM <- [0-9]+ ![a-z]
WORD <- '"' [a-z]+ '"'
2022-05-27 00:22:15 +00:00
~_ <- Space*
2021-01-13 15:11:06 +00:00
Space <- [ \n]
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(!!pg);
2021-01-13 15:11:06 +00:00
std::vector<std::string> errors{
2022-05-27 00:22:15 +00:00
R"(1:6: syntax error, unexpected ']', expecting ','.)",
2021-08-19 06:29:11 +00:00
R"(1:18: syntax error, unexpected 'z', expecting <NUM>.)",
2022-05-27 00:22:15 +00:00
R"(1:24: syntax error, unexpected ',', expecting '"'.)",
R"(1:31: syntax error, unexpected 'ccc', expecting '"', <NUM>.)",
R"(1:38: syntax error, unexpected 'ddd', expecting '"', <NUM>.)",
R"(1:55: syntax error, unexpected ']', expecting '"'.)",
R"(1:58: syntax error, unexpected '\n', expecting '"', <NUM>.)",
2022-05-28 01:48:12 +00:00
R"(2:3: syntax error, expecting ']'.)",
2021-01-13 15:11:06 +00:00
};
size_t i = 0;
pg.log = [&](size_t ln, size_t col, const std::string &msg) {
std::stringstream ss;
ss << ln << ":" << col << ": " << msg;
2021-08-19 06:29:11 +00:00
EXPECT_EQ(errors[i++], ss.str());
2021-01-13 15:11:06 +00:00
};
pg.enable_ast();
std::shared_ptr<Ast> ast;
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(
pg.parse(R"([000]],[111],[222z,"aaa,"bbb",ccc"],[ddd",444,555,"eee],[
)",
ast));
2021-01-13 15:11:06 +00:00
2021-01-22 01:56:05 +00:00
ast = pg.optimize_ast(ast);
2021-01-13 15:11:06 +00:00
2021-08-19 06:29:11 +00:00
EXPECT_EQ(R"(+ START
2021-01-13 15:11:06 +00:00
- ENTRY/0[NUM] (000)
- ENTRY/0[NUM] (111)
+ ENTRY
+ ITEM/2
+ ITEM/2
- ITEM/0[WORD] ("bbb")
+ ITEM/2
+ ENTRY
+ ITEM/2
- ITEM/1[NUM] (444)
- ITEM/1[NUM] (555)
+ ITEM/2
2021-08-19 06:29:11 +00:00
)",
ast_to_s(ast));
2021-01-13 15:11:06 +00:00
}
2021-01-16 01:18:19 +00:00
2022-05-27 00:22:15 +00:00
TEST(ErrorTest, Error_recovery_3) {
2021-02-04 15:45:18 +00:00
parser pg(R"~(
# Grammar
START <- __? SECTION*
SECTION <- HEADER __ ENTRIES __?
HEADER <- '['^missing_bracket _ CATEGORY (':' _ ATTRIBUTES)? ']'^missing_bracket ___
CATEGORY <- < (&[-_a-zA-Z0-9\u0080-\uFFFF ] (![\u0080-\uFFFF])^vernacular_char .)+ > _
ATTRIBUTES <- ATTRIBUTE (',' _ ATTRIBUTE)*
ATTRIBUTE <- < [-_a-zA-Z0-9]+ > _
ENTRIES <- (ENTRY (__ ENTRY)*)? { no_ast_opt }
ENTRY <- ONE_WAY PHRASE^expect_phrase (or _ PHRASE^expect_phrase)* ___
/ PHRASE (or^missing_or _ PHRASE^expect_phrase) (or _ PHRASE^expect_phrase)* ___ { no_ast_opt }
ONE_WAY <- PHRASE assign _
PHRASE <- WORD (' ' WORD)* _ { no_ast_opt }
WORD <- < (![ \t\r\n=|[\]#] (![*?] / %recover(wildcard)) .)+ >
~assign <- '=' ____
~or <- '|' (!'|')^duplicate_or ____
~_ <- [ \t]*
~__ <- _ (comment? nl _)+
~___ <- (!operators)^invalid_ope
~____ <- (!operators)^invalid_ope_comb
operators <- [|=]+
comment <- ('#' (!nl .)*)
nl <- '\r'? '\n'
# Recovery
duplicate_or <- skip_puncs { message "Duplicate OR operator (|)" }
missing_or <- '' { message "Missing OR operator (|)" }
missing_bracket <- skip_puncs { message "Missing opening/closing square bracket" }
expect_phrase <- skip { message "Expect phrase" }
invalid_ope_comb <- skip_puncs { message "Use of invalid operator combination" }
invalid_ope <- skip { message "Use of invalid operator" }
wildcard <- '' { message "Wildcard characters (%c) should not be used" }
vernacular_char <- '' { message "Section name %c must be in English" }
skip <- (!(__) .)*
skip_puncs <- [|=]* _
)~");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(!!pg);
2021-02-04 15:45:18 +00:00
std::vector<std::string> errors{
2021-08-19 06:29:11 +00:00
R"(3:7: Wildcard characters (*) should not be used)",
R"(4:6: Wildcard characters (?) should not be used)",
R"(5:6: Duplicate OR operator (|))",
R"(9:4: Missing OR operator (|))",
R"(11:16: Expect phrase)",
R"(13:11: Missing opening/closing square bracket)",
R"(16:10: Section name 日 must be in English)",
R"(16:11: Section name 本 must be in English)",
R"(16:12: Section name 語 must be in English)",
R"(16:13: Section name で must be in English)",
R"(16:14: Section name す must be in English)",
R"(21:17: Use of invalid operator)",
R"(24:10: Use of invalid operator combination)",
R"(26:10: Missing OR operator (|))",
2021-02-04 15:45:18 +00:00
};
size_t i = 0;
pg.log = [&](size_t ln, size_t col, const std::string &msg) {
std::stringstream ss;
ss << ln << ":" << col << ": " << msg;
2021-08-19 06:29:11 +00:00
EXPECT_EQ(errors[i++], ss.str());
2021-02-04 15:45:18 +00:00
};
pg.enable_ast();
std::shared_ptr<Ast> ast;
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg.parse(R"([Section 1]
2021-02-04 15:45:18 +00:00
111 = 222 | 333
AAA BB* | CCC
AAA B?B | CCC
aaa || bbb
ccc = ddd
[Section 2]
eee
fff | ggg
fff | ggg 111 |
[Section 3
hhh | iii
[Section ]
ppp | qqq
[Section 4]
jjj | kkk
lll = mmm | nnn = ooo
[Section 5]
ppp qqq |= rrr
Section 6]
sss | ttt
2021-08-19 06:29:11 +00:00
)",
ast));
2021-02-04 15:45:18 +00:00
ast = pg.optimize_ast(ast);
2021-08-19 06:29:11 +00:00
EXPECT_EQ(R"(+ START
2021-02-04 15:45:18 +00:00
+ SECTION
- HEADER/0[CATEGORY] (Section 1)
+ ENTRIES
+ ENTRY/0
+ ONE_WAY/0[PHRASE]
- WORD (111)
+ PHRASE
- WORD (222)
+ PHRASE
- WORD (333)
+ ENTRY/1
+ PHRASE
- WORD (AAA)
- WORD (BB*)
+ PHRASE
- WORD (CCC)
+ ENTRY/1
+ PHRASE
- WORD (AAA)
- WORD (B?B)
+ PHRASE
- WORD (CCC)
+ ENTRY/1
+ PHRASE
- WORD (aaa)
+ PHRASE
- WORD (bbb)
+ ENTRY/0
+ ONE_WAY/0[PHRASE]
- WORD (ccc)
+ PHRASE
- WORD (ddd)
+ SECTION
- HEADER/0[CATEGORY] (Section 2)
+ ENTRIES
+ ENTRY/1
+ PHRASE
- WORD (eee)
+ ENTRY/1
+ PHRASE
- WORD (fff)
+ PHRASE
- WORD (ggg)
+ ENTRY/1
+ PHRASE
- WORD (fff)
+ PHRASE
- WORD (ggg)
- WORD (111)
+ SECTION
- HEADER/0[CATEGORY] (Section 3)
+ ENTRIES
+ ENTRY/1
+ PHRASE
- WORD (hhh)
+ PHRASE
- WORD (iii)
+ SECTION
- HEADER/0[CATEGORY] (Section )
+ ENTRIES
+ ENTRY/1
+ PHRASE
- WORD (ppp)
+ PHRASE
- WORD (qqq)
+ SECTION
- HEADER/0[CATEGORY] (Section 4)
+ ENTRIES
+ ENTRY/1
+ PHRASE
- WORD (jjj)
+ PHRASE
- WORD (kkk)
+ ENTRY/0
+ ONE_WAY/0[PHRASE]
- WORD (lll)
+ PHRASE
- WORD (mmm)
+ PHRASE
- WORD (nnn)
+ SECTION
- HEADER/0[CATEGORY] (Section 5)
+ ENTRIES
+ ENTRY/1
+ PHRASE
- WORD (ppp)
- WORD (qqq)
+ PHRASE
- WORD (rrr)
+ ENTRY/1
+ PHRASE
- WORD (Section)
- WORD (6)
+ ENTRY/1
+ PHRASE
- WORD (sss)
+ PHRASE
- WORD (ttt)
2022-05-28 01:50:27 +00:00
)",
ast_to_s(ast));
2021-02-04 15:45:18 +00:00
}
2022-05-27 00:22:15 +00:00
TEST(ErrorTest, Error_recovery_Java) {
2021-01-16 01:18:19 +00:00
parser pg(R"(
Prog PUBLIC CLASS NAME LCUR PUBLIC STATIC VOID MAIN LPAR STRING LBRA RBRA NAME RPAR BlockStmt RCUR
BlockStmt LCUR (Stmt)* RCUR^rcblk
Stmt IfStmt / WhileStmt / PrintStmt / DecStmt / AssignStmt / BlockStmt
IfStmt IF LPAR Exp RPAR Stmt (ELSE Stmt)?
WhileStmt WHILE LPAR Exp RPAR Stmt
DecStmt INT NAME (ASSIGN Exp)? SEMI
AssignStmt NAME ASSIGN Exp SEMI^semia
PrintStmt PRINTLN LPAR Exp RPAR SEMI
Exp RelExp (EQ RelExp)*
RelExp AddExp (LT AddExp)*
AddExp MulExp ((PLUS / MINUS) MulExp)*
MulExp AtomExp ((TIMES / DIV) AtomExp)*
AtomExp LPAR Exp RPAR / NUMBER / NAME
NUMBER < [0-9]+ >
NAME < [a-zA-Z_][a-zA-Z_0-9]* >
~LPAR '('
~RPAR ')'
~LCUR '{'
~RCUR '}'
~LBRA '['
~RBRA ']'
~SEMI ';'
~EQ '=='
~LT '<'
~ASSIGN '='
~IF 'if'
~ELSE 'else'
~WHILE 'while'
PLUS '+'
MINUS '-'
TIMES '*'
DIV '/'
CLASS 'class'
PUBLIC 'public'
STATIC 'static'
VOID 'void'
INT 'int'
MAIN 'main'
STRING 'String'
PRINTLN 'System.out.println'
%whitespace [ \t\n]*
%word NAME
# Throw operator labels
rcblk SkipToRCUR { message "missing end of block." }
semia '' { message "missing simicolon in assignment." }
# Recovery expressions
SkipToRCUR (!RCUR (LCUR SkipToRCUR / .))* RCUR
)");
2021-08-19 06:29:11 +00:00
EXPECT_TRUE(!!pg);
2021-01-16 01:18:19 +00:00
std::vector<std::string> errors{
2021-08-19 06:29:11 +00:00
R"(8:5: missing simicolon in assignment.)",
R"(8:6: missing end of block.)",
2021-01-16 01:18:19 +00:00
};
size_t i = 0;
pg.log = [&](size_t ln, size_t col, const std::string &msg) {
std::stringstream ss;
ss << ln << ":" << col << ": " << msg;
2021-08-19 06:29:11 +00:00
EXPECT_EQ(errors[i++], ss.str());
2021-01-16 01:18:19 +00:00
};
pg.enable_ast();
std::shared_ptr<Ast> ast;
2021-08-19 06:29:11 +00:00
EXPECT_FALSE(pg.parse(R"(public class Example {
2021-01-16 01:18:19 +00:00
public static void main(String[] args) {
int n = 5;
int f = 1;
while(0 < n) {
f = f * n;
n = n - 1
};
System.out.println(f);
}
}
2021-08-19 06:29:11 +00:00
)",
ast));
2021-01-16 01:18:19 +00:00
2021-01-22 01:56:05 +00:00
ast = pg.optimize_ast(ast);
2021-01-16 01:18:19 +00:00
2021-08-19 06:29:11 +00:00
EXPECT_EQ(R"(+ Prog
2021-01-16 01:18:19 +00:00
- PUBLIC (public)
- CLASS (class)
- NAME (Example)
- PUBLIC (public)
- STATIC (static)
- VOID (void)
- MAIN (main)
- STRING (String)
- NAME (args)
+ BlockStmt
+ Stmt/3[DecStmt]
- INT (int)
- NAME (n)
- Exp/0[NUMBER] (5)
+ Stmt/3[DecStmt]
- INT (int)
- NAME (f)
- Exp/0[NUMBER] (1)
+ Stmt/1[WhileStmt]
+ Exp/0[RelExp]
- AddExp/0[NUMBER] (0)
- AddExp/0[NAME] (n)
+ Stmt/5[BlockStmt]
+ Stmt/4[AssignStmt]
- NAME (f)
+ Exp/0[MulExp]
- AtomExp/2[NAME] (f)
- TIMES (*)
- AtomExp/2[NAME] (n)
+ Stmt/4[AssignStmt]
- NAME (n)
+ Exp/0[AddExp]
- MulExp/0[NAME] (n)
- MINUS (-)
- MulExp/0[NUMBER] (1)
2021-08-19 06:29:11 +00:00
)",
ast_to_s(ast));
2021-01-16 01:18:19 +00:00
}