This commit is contained in:
yhirose 2020-11-09 12:06:48 -05:00
parent 11ed83e46f
commit b92da07bed
4 changed files with 103 additions and 10 deletions

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -827,7 +827,7 @@ public:
std::vector<Definition *> rule_stack; std::vector<Definition *> rule_stack;
std::vector<std::vector<std::shared_ptr<Ope>>> args_stack; std::vector<std::vector<std::shared_ptr<Ope>>> args_stack;
bool in_token = false; size_t in_token_boundary_count = 0;
std::shared_ptr<Ope> whitespaceOpe; std::shared_ptr<Ope> whitespaceOpe;
bool in_whitespace = false; bool in_whitespace = false;
@ -2392,7 +2392,7 @@ inline size_t parse_literal(const char *s, size_t n, SemanticValues &sv,
} }
// Skip whiltespace // Skip whiltespace
if (!c.in_token) { if (!c.in_token_boundary_count) {
if (c.whitespaceOpe) { if (c.whitespaceOpe) {
auto len = c.whitespaceOpe->parse(s + i, n - i, sv, c, dt); auto len = c.whitespaceOpe->parse(s + i, n - i, sv, c, dt);
if (fail(len)) { return static_cast<size_t>(-1); } if (fail(len)) { return static_cast<size_t>(-1); }
@ -2457,16 +2457,22 @@ inline size_t LiteralString::parse_core(const char *s, size_t n,
inline size_t TokenBoundary::parse_core(const char *s, size_t n, inline size_t TokenBoundary::parse_core(const char *s, size_t n,
SemanticValues &sv, Context &c, SemanticValues &sv, Context &c,
any &dt) const { any &dt) const {
c.in_token = true; size_t len;
auto se = make_scope_exit([&]() { c.in_token = false; }); {
auto len = ope_->parse(s, n, sv, c, dt); c.in_token_boundary_count++;
auto se = make_scope_exit([&]() { c.in_token_boundary_count--; });
len = ope_->parse(s, n, sv, c, dt);
}
if (success(len)) { if (success(len)) {
sv.tokens.emplace_back(std::make_pair(s, len)); sv.tokens.emplace_back(std::make_pair(s, len));
if (c.whitespaceOpe) { if (!c.in_token_boundary_count) {
auto l = c.whitespaceOpe->parse(s + len, n - len, sv, c, dt); if (c.whitespaceOpe) {
if (fail(l)) { return static_cast<size_t>(-1); } auto l = c.whitespaceOpe->parse(s + len, n - len, sv, c, dt);
len += l; if (fail(l)) { return static_cast<size_t>(-1); }
len += l;
}
} }
} }
return len; return len;

View File

@ -3,6 +3,93 @@
using namespace peg; using namespace peg;
TEST_CASE("Token boundary 1", "[token boundary]")
{
parser pg(R"(
ROOT <- TOP
TOP <- 'a' 'b' 'c'
%whitespace <- [ \t\r\n]*
)");
REQUIRE(pg.parse(" a b c "));
}
TEST_CASE("Token boundary 2", "[token boundary]")
{
parser pg(R"(
ROOT <- TOP
TOP <- < 'a' 'b' 'c' >
%whitespace <- [ \t\r\n]*
)");
REQUIRE(!pg.parse(" a b c "));
}
TEST_CASE("Token boundary 3", "[token boundary]")
{
parser pg(R"(
ROOT <- TOP
TOP <- < 'a' B 'c' >
B <- 'b'
%whitespace <- [ \t\r\n]*
)");
REQUIRE(!pg.parse(" a b c "));
}
TEST_CASE("Token boundary 4", "[token boundary]")
{
parser pg(R"(
ROOT <- TOP
TOP <- < A 'b' 'c' >
A <- 'a'
%whitespace <- [ \t\r\n]*
)");
REQUIRE(!pg.parse(" a b c "));
}
TEST_CASE("Token boundary 5", "[token boundary]")
{
parser pg(R"(
ROOT <- TOP
TOP <- A < 'b' C >
A <- 'a'
C <- 'c'
%whitespace <- [ \t\r\n]*
)");
REQUIRE(!pg.parse(" a b c "));
}
TEST_CASE("Token boundary 6", "[token boundary]")
{
parser pg(R"(
ROOT <- TOP
TOP <- < A > B C
A <- 'a'
B <- 'b'
C <- 'c'
%whitespace <- [ \t\r\n]*
)");
REQUIRE(pg.parse(" a b c "));
}
TEST_CASE("Token boundary 7", "[token boundary]")
{
parser pg(R"(
ROOT <- TOP
TOP <- < A B C >
A <- 'a'
B <- 'b'
C <- 'c'
%whitespace <- [ \t\r\n]*
)");
REQUIRE(!pg.parse(" a b c "));
}
TEST_CASE("Infinite loop 1", "[infinite loop]") TEST_CASE("Infinite loop 1", "[infinite loop]")
{ {
parser pg(R"( parser pg(R"(