Unicode char support

This commit is contained in:
yhirose 2021-01-15 17:26:29 -05:00
parent 8ad71c4adc
commit 1442e3e21f
4 changed files with 33 additions and 10 deletions

View File

@ -18,6 +18,7 @@ The PEG syntax is well described on page 2 in the [document](http://www.brynosau
* `<` ... `>` (Token boundary operator) * `<` ... `>` (Token boundary operator)
* `~` (Ignore operator) * `~` (Ignore operator)
* `\x20` (Hex number char) * `\x20` (Hex number char)
* `\u10FFFF` (Unicode char)
* `%whitespace` (Automatic whitespace skipping) * `%whitespace` (Automatic whitespace skipping)
* `%word` (Word expression) * `%word` (Word expression)
* `$name(` ... `)` (Capture scope operator) * `$name(` ... `)` (Capture scope operator)

Binary file not shown.

View File

@ -2963,13 +2963,16 @@ private:
g["Spacing"]); g["Spacing"]);
g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]); g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]);
g["Char"] <= cho(seq(chr('\\'), cls("nrt'\"[]\\^")), g["Char"] <=
seq(chr('\\'), cls("0-3"), cls("0-7"), cls("0-7")), cho(seq(chr('\\'), cls("nrt'\"[]\\^")),
seq(chr('\\'), cls("0-7"), opt(cls("0-7"))), seq(chr('\\'), cls("0-3"), cls("0-7"), cls("0-7")),
seq(lit("\\x"), cls("0-9a-fA-F"), opt(cls("0-9a-fA-F"))), seq(chr('\\'), cls("0-7"), opt(cls("0-7"))),
seq(lit("\\u"), cls("0-9a-fA-F"), cls("0-9a-fA-F"), seq(lit("\\x"), cls("0-9a-fA-F"), opt(cls("0-9a-fA-F"))),
cls("0-9a-fA-F"), cls("0-9a-fA-F")), seq(lit("\\u"),
seq(npd(chr('\\')), dot())); cho(seq(cho(seq(chr('0'), cls("0-9a-fA-F")), lit("10")),
rep(cls("0-9a-fA-F"), 4, 4)),
rep(cls("0-9a-fA-F"), 4, 5))),
seq(npd(chr('\\')), dot()));
g["Repetition"] <= g["Repetition"] <=
seq(g["BeginBlacket"], g["RepetitionRange"], g["EndBlacket"]); seq(g["BeginBlacket"], g["RepetitionRange"], g["EndBlacket"]);
@ -3210,9 +3213,7 @@ private:
} }
auto ope = ref(*data.grammar, ident, vs.sv().data(), is_macro, args); auto ope = ref(*data.grammar, ident, vs.sv().data(), is_macro, args);
if (ident == RECOVER_DEFINITION_NAME) { if (ident == RECOVER_DEFINITION_NAME) { ope = rec(ope); }
ope = rec(ope);
}
if (ignore) { if (ignore) {
return ign(ope); return ign(ope);

View File

@ -239,6 +239,27 @@ TEST_CASE("PEG Char", "[peg]")
REQUIRE(exact(g, "Char", "\\0") == true); REQUIRE(exact(g, "Char", "\\0") == true);
REQUIRE(exact(g, "Char", "\\7") == true); REQUIRE(exact(g, "Char", "\\7") == true);
REQUIRE(exact(g, "Char", "\\8") == false); REQUIRE(exact(g, "Char", "\\8") == false);
REQUIRE(exact(g, "Char", "\\x0") == true);
REQUIRE(exact(g, "Char", "\\x00") == true);
REQUIRE(exact(g, "Char", "\\x000") == false);
REQUIRE(exact(g, "Char", "\\xa") == true);
REQUIRE(exact(g, "Char", "\\xab") == true);
REQUIRE(exact(g, "Char", "\\xabc") == false);
REQUIRE(exact(g, "Char", "\\xA") == true);
REQUIRE(exact(g, "Char", "\\xAb") == true);
REQUIRE(exact(g, "Char", "\\xAbc") == false);
REQUIRE(exact(g, "Char", "\\xg") == false);
REQUIRE(exact(g, "Char", "\\xga") == false);
REQUIRE(exact(g, "Char", "\\u0") == false);
REQUIRE(exact(g, "Char", "\\u00") == false);
REQUIRE(exact(g, "Char", "\\u0000") == true);
REQUIRE(exact(g, "Char", "\\u000000") == true);
REQUIRE(exact(g, "Char", "\\u0000000") == false);
REQUIRE(exact(g, "Char", "\\uFFFF") == true);
REQUIRE(exact(g, "Char", "\\u10000") == true);
REQUIRE(exact(g, "Char", "\\u10FFFF") == true);
REQUIRE(exact(g, "Char", "\\u110000") == false);
REQUIRE(exact(g, "Char", "\\uFFFFFF") == false);
REQUIRE(exact(g, "Char", "a") == true); REQUIRE(exact(g, "Char", "a") == true);
REQUIRE(exact(g, "Char", ".") == true); REQUIRE(exact(g, "Char", ".") == true);
REQUIRE(exact(g, "Char", "0") == true); REQUIRE(exact(g, "Char", "0") == true);