Handled UTF-8 codes from 0x80 as valid identifier codes.

This commit is contained in:
yhirose 2015-08-08 20:30:05 -04:00
parent de5cfa955d
commit a3cfd1b8ad
2 changed files with 19 additions and 1 deletions

View File

@ -1588,7 +1588,7 @@ private:
g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"]));
g["IdentStart"] <= cls("a-zA-Z_");
g["IdentStart"] <= cls("a-zA-Z_\x80-\xff");
g["IdentRest"] <= cho(g["IdentStart"], cls("0-9"));
g["Literal"] <= cho(seq(cls("'"), anc(zom(seq(npd(cls("'")), g["Char"]))), cls("'"), g["Spacing"]),

View File

@ -653,6 +653,24 @@ TEST_CASE("Semantic predicate test", "[predicate]")
REQUIRE(ret == false);
}
TEST_CASE("Japanese character", "[unicode]")
{
peglib::peg parser(R"(
<- ? ''
<-
<-
<-
<- '' / ''
<- '' / ''
<- '' / ''
<- '' / '' / '' / '' / ''
)");
auto ret = parser.parse(R"(サーバーを復旧します。)");
REQUIRE(ret == true);
}
bool exact(Grammar& g, const char* d, const char* s) {
auto n = strlen(s);
auto r = g[d].parse(s, n);