diff --git a/README.md b/README.md index 0ee227a..bd32baa 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ The PEG syntax is well described on page 2 in the [document](http://www.brynosau * `<` ... `>` (Token boundary operator) * `~` (Ignore operator) * `\x20` (Hex number char) + * `\u10FFFF` (Unicode char) * `%whitespace` (Automatic whitespace skipping) * `%word` (Word expression) * `$name(` ... `)` (Capture scope operator) diff --git a/docs/native.wasm b/docs/native.wasm index c77e041..1705f0c 100644 Binary files a/docs/native.wasm and b/docs/native.wasm differ diff --git a/peglib.h b/peglib.h index 878e3c7..806724d 100644 --- a/peglib.h +++ b/peglib.h @@ -2963,13 +2963,16 @@ private: g["Spacing"]); g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]); - g["Char"] <= cho(seq(chr('\\'), cls("nrt'\"[]\\^")), - seq(chr('\\'), cls("0-3"), cls("0-7"), cls("0-7")), - seq(chr('\\'), cls("0-7"), opt(cls("0-7"))), - seq(lit("\\x"), cls("0-9a-fA-F"), opt(cls("0-9a-fA-F"))), - seq(lit("\\u"), cls("0-9a-fA-F"), cls("0-9a-fA-F"), - cls("0-9a-fA-F"), cls("0-9a-fA-F")), - seq(npd(chr('\\')), dot())); + g["Char"] <= + cho(seq(chr('\\'), cls("nrt'\"[]\\^")), + seq(chr('\\'), cls("0-3"), cls("0-7"), cls("0-7")), + seq(chr('\\'), cls("0-7"), opt(cls("0-7"))), + seq(lit("\\x"), cls("0-9a-fA-F"), opt(cls("0-9a-fA-F"))), + seq(lit("\\u"), + cho(seq(cho(seq(chr('0'), cls("0-9a-fA-F")), lit("10")), + rep(cls("0-9a-fA-F"), 4, 4)), + rep(cls("0-9a-fA-F"), 4, 5))), + seq(npd(chr('\\')), dot())); g["Repetition"] <= seq(g["BeginBlacket"], g["RepetitionRange"], g["EndBlacket"]); @@ -3210,9 +3213,7 @@ private: } auto ope = ref(*data.grammar, ident, vs.sv().data(), is_macro, args); - if (ident == RECOVER_DEFINITION_NAME) { - ope = rec(ope); - } + if (ident == RECOVER_DEFINITION_NAME) { ope = rec(ope); } if (ignore) { return ign(ope); diff --git a/test/test3.cc b/test/test3.cc index c309131..f22556c 100644 --- a/test/test3.cc +++ b/test/test3.cc @@ -239,6 +239,27 @@ TEST_CASE("PEG Char", "[peg]") REQUIRE(exact(g, "Char", "\\0") == true); REQUIRE(exact(g, "Char", "\\7") == true); REQUIRE(exact(g, "Char", "\\8") == false); + REQUIRE(exact(g, "Char", "\\x0") == true); + REQUIRE(exact(g, "Char", "\\x00") == true); + REQUIRE(exact(g, "Char", "\\x000") == false); + REQUIRE(exact(g, "Char", "\\xa") == true); + REQUIRE(exact(g, "Char", "\\xab") == true); + REQUIRE(exact(g, "Char", "\\xabc") == false); + REQUIRE(exact(g, "Char", "\\xA") == true); + REQUIRE(exact(g, "Char", "\\xAb") == true); + REQUIRE(exact(g, "Char", "\\xAbc") == false); + REQUIRE(exact(g, "Char", "\\xg") == false); + REQUIRE(exact(g, "Char", "\\xga") == false); + REQUIRE(exact(g, "Char", "\\u0") == false); + REQUIRE(exact(g, "Char", "\\u00") == false); + REQUIRE(exact(g, "Char", "\\u0000") == true); + REQUIRE(exact(g, "Char", "\\u000000") == true); + REQUIRE(exact(g, "Char", "\\u0000000") == false); + REQUIRE(exact(g, "Char", "\\uFFFF") == true); + REQUIRE(exact(g, "Char", "\\u10000") == true); + REQUIRE(exact(g, "Char", "\\u10FFFF") == true); + REQUIRE(exact(g, "Char", "\\u110000") == false); + REQUIRE(exact(g, "Char", "\\uFFFFFF") == false); REQUIRE(exact(g, "Char", "a") == true); REQUIRE(exact(g, "Char", ".") == true); REQUIRE(exact(g, "Char", "0") == true);