mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2024-12-22 20:05:31 +00:00
Added [...]i
and [^...]i
(Fix #220)
This commit is contained in:
parent
0366380091
commit
924588bb69
@ -13,7 +13,9 @@ You can also try the online version, PEG Playground at https://yhirose.github.io
|
|||||||
The PEG syntax is well described on page 2 in the [document](http://www.brynosaurus.com/pub/lang/peg.pdf) by Bryan Ford. *cpp-peglib* also supports the following additional syntax for now:
|
The PEG syntax is well described on page 2 in the [document](http://www.brynosaurus.com/pub/lang/peg.pdf) by Bryan Ford. *cpp-peglib* also supports the following additional syntax for now:
|
||||||
|
|
||||||
* `'...'i` (Case-insensitive literal operator)
|
* `'...'i` (Case-insensitive literal operator)
|
||||||
|
* `[...]i` (Case-insensitive character class operator)
|
||||||
* `[^...]` (Negated character class operator)
|
* `[^...]` (Negated character class operator)
|
||||||
|
* `[^...]i` (Case-insensitive negated character class operator)
|
||||||
* `{2,5}` (Regex-like repetition operator)
|
* `{2,5}` (Regex-like repetition operator)
|
||||||
* `<` ... `>` (Token boundary operator)
|
* `<` ... `>` (Token boundary operator)
|
||||||
* `~` (Ignore operator)
|
* `~` (Ignore operator)
|
||||||
|
BIN
docs/native.wasm
BIN
docs/native.wasm
Binary file not shown.
@ -28,7 +28,9 @@ Primary <-
|
|||||||
/ LiteralI
|
/ LiteralI
|
||||||
/ Dictionary
|
/ Dictionary
|
||||||
/ Literal
|
/ Literal
|
||||||
|
/ NegatedClassI
|
||||||
/ NegatedClass
|
/ NegatedClass
|
||||||
|
/ ClassI
|
||||||
/ Class
|
/ Class
|
||||||
/ DOT
|
/ DOT
|
||||||
|
|
||||||
@ -56,7 +58,9 @@ LiteralI <-
|
|||||||
|
|
||||||
# NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
|
# NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
|
||||||
Class <- '[' !'^' <(!']' Range)+> ']' Spacing
|
Class <- '[' !'^' <(!']' Range)+> ']' Spacing
|
||||||
|
ClassI <- '[' !'^' <(!']' Range)+> ']i' Spacing
|
||||||
NegatedClass <- "[^" <(!']' Range)+> ']' Spacing
|
NegatedClass <- "[^" <(!']' Range)+> ']' Spacing
|
||||||
|
NegatedClassI <- "[^" <(!']' Range)+> ']i' Spacing
|
||||||
|
|
||||||
Range <- (Char '-' ! ']' Char) / Char
|
Range <- (Char '-' ! ']' Char) / Char
|
||||||
|
|
||||||
|
74
peglib.h
74
peglib.h
@ -1265,7 +1265,8 @@ public:
|
|||||||
class CharacterClass : public Ope,
|
class CharacterClass : public Ope,
|
||||||
public std::enable_shared_from_this<CharacterClass> {
|
public std::enable_shared_from_this<CharacterClass> {
|
||||||
public:
|
public:
|
||||||
CharacterClass(const std::string &s, bool negated) : negated_(negated) {
|
CharacterClass(const std::string &s, bool negated, bool ignore_case)
|
||||||
|
: negated_(negated), ignore_case_(ignore_case) {
|
||||||
auto chars = decode(s.data(), s.length());
|
auto chars = decode(s.data(), s.length());
|
||||||
auto i = 0u;
|
auto i = 0u;
|
||||||
while (i < chars.size()) {
|
while (i < chars.size()) {
|
||||||
@ -1284,8 +1285,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
CharacterClass(const std::vector<std::pair<char32_t, char32_t>> &ranges,
|
CharacterClass(const std::vector<std::pair<char32_t, char32_t>> &ranges,
|
||||||
bool negated)
|
bool negated, bool ignore_case)
|
||||||
: ranges_(ranges), negated_(negated) {
|
: ranges_(ranges), negated_(negated), ignore_case_(ignore_case) {
|
||||||
assert(!ranges_.empty());
|
assert(!ranges_.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1300,7 +1301,7 @@ public:
|
|||||||
auto len = decode_codepoint(s, n, cp);
|
auto len = decode_codepoint(s, n, cp);
|
||||||
|
|
||||||
for (const auto &range : ranges_) {
|
for (const auto &range : ranges_) {
|
||||||
if (range.first <= cp && cp <= range.second) {
|
if (in_range(range, cp)) {
|
||||||
if (negated_) {
|
if (negated_) {
|
||||||
c.set_error_pos(s);
|
c.set_error_pos(s);
|
||||||
return static_cast<size_t>(-1);
|
return static_cast<size_t>(-1);
|
||||||
@ -1320,8 +1321,20 @@ public:
|
|||||||
|
|
||||||
void accept(Visitor &v) override;
|
void accept(Visitor &v) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool in_range(const std::pair<char32_t, char32_t> &range, char32_t cp) const {
|
||||||
|
if (ignore_case_) {
|
||||||
|
auto cpl = std::tolower(cp);
|
||||||
|
return std::tolower(range.first) <= cpl &&
|
||||||
|
cpl <= std::tolower(range.second);
|
||||||
|
} else {
|
||||||
|
return range.first <= cp && cp <= range.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<std::pair<char32_t, char32_t>> ranges_;
|
std::vector<std::pair<char32_t, char32_t>> ranges_;
|
||||||
bool negated_;
|
bool negated_;
|
||||||
|
bool ignore_case_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Character : public Ope, public std::enable_shared_from_this<Character> {
|
class Character : public Ope, public std::enable_shared_from_this<Character> {
|
||||||
@ -1646,21 +1659,23 @@ inline std::shared_ptr<Ope> liti(std::string &&s) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline std::shared_ptr<Ope> cls(const std::string &s) {
|
inline std::shared_ptr<Ope> cls(const std::string &s) {
|
||||||
return std::make_shared<CharacterClass>(s, false);
|
return std::make_shared<CharacterClass>(s, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::shared_ptr<Ope>
|
inline std::shared_ptr<Ope>
|
||||||
cls(const std::vector<std::pair<char32_t, char32_t>> &ranges) {
|
cls(const std::vector<std::pair<char32_t, char32_t>> &ranges,
|
||||||
return std::make_shared<CharacterClass>(ranges, false);
|
bool ignore_case = false) {
|
||||||
|
return std::make_shared<CharacterClass>(ranges, false, ignore_case);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::shared_ptr<Ope> ncls(const std::string &s) {
|
inline std::shared_ptr<Ope> ncls(const std::string &s) {
|
||||||
return std::make_shared<CharacterClass>(s, true);
|
return std::make_shared<CharacterClass>(s, true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::shared_ptr<Ope>
|
inline std::shared_ptr<Ope>
|
||||||
ncls(const std::vector<std::pair<char32_t, char32_t>> &ranges) {
|
ncls(const std::vector<std::pair<char32_t, char32_t>> &ranges,
|
||||||
return std::make_shared<CharacterClass>(ranges, true);
|
bool ignore_case = false) {
|
||||||
|
return std::make_shared<CharacterClass>(ranges, true, ignore_case);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::shared_ptr<Ope> chr(char dt) {
|
inline std::shared_ptr<Ope> chr(char dt) {
|
||||||
@ -2934,9 +2949,7 @@ inline size_t Recovery::parse_core(const char *s, size_t n,
|
|||||||
len = rule.parse(s, n, dummy_vs, c, dummy_dt);
|
len = rule.parse(s, n, dummy_vs, c, dummy_dt);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (success(len)) {
|
if (success(len)) { c.recovered = true; }
|
||||||
c.recovered = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cut
|
// Cut
|
||||||
if (!c.cut_stack.empty()) {
|
if (!c.cut_stack.empty()) {
|
||||||
@ -3241,16 +3254,16 @@ private:
|
|||||||
seq(g["Suffix"], opt(seq(g["LABEL"], g["Identifier"])));
|
seq(g["Suffix"], opt(seq(g["LABEL"], g["Identifier"])));
|
||||||
g["Suffix"] <= seq(g["Primary"], opt(g["Loop"]));
|
g["Suffix"] <= seq(g["Primary"], opt(g["Loop"]));
|
||||||
g["Loop"] <= cho(g["QUESTION"], g["STAR"], g["PLUS"], g["Repetition"]);
|
g["Loop"] <= cho(g["QUESTION"], g["STAR"], g["PLUS"], g["Repetition"]);
|
||||||
g["Primary"] <= cho(seq(g["Ignore"], g["IdentCont"], g["Arguments"],
|
g["Primary"] <=
|
||||||
npd(g["LEFTARROW"])),
|
cho(seq(g["Ignore"], g["IdentCont"], g["Arguments"],
|
||||||
seq(g["Ignore"], g["Identifier"],
|
npd(g["LEFTARROW"])),
|
||||||
npd(seq(opt(g["Parameters"]), g["LEFTARROW"]))),
|
seq(g["Ignore"], g["Identifier"],
|
||||||
seq(g["OPEN"], g["Expression"], g["CLOSE"]),
|
npd(seq(opt(g["Parameters"]), g["LEFTARROW"]))),
|
||||||
seq(g["BeginTok"], g["Expression"], g["EndTok"]),
|
seq(g["OPEN"], g["Expression"], g["CLOSE"]),
|
||||||
g["CapScope"],
|
seq(g["BeginTok"], g["Expression"], g["EndTok"]), g["CapScope"],
|
||||||
seq(g["BeginCap"], g["Expression"], g["EndCap"]),
|
seq(g["BeginCap"], g["Expression"], g["EndCap"]), g["BackRef"],
|
||||||
g["BackRef"], g["LiteralI"], g["Dictionary"],
|
g["LiteralI"], g["Dictionary"], g["Literal"], g["NegatedClassI"],
|
||||||
g["Literal"], g["NegatedClass"], g["Class"], g["DOT"]);
|
g["NegatedClass"], g["ClassI"], g["Class"], g["DOT"]);
|
||||||
|
|
||||||
g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
|
g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
|
||||||
g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"]));
|
g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"]));
|
||||||
@ -3281,9 +3294,16 @@ private:
|
|||||||
g["Class"] <= seq(chr('['), npd(chr('^')),
|
g["Class"] <= seq(chr('['), npd(chr('^')),
|
||||||
tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
|
tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
|
||||||
g["Spacing"]);
|
g["Spacing"]);
|
||||||
|
g["ClassI"] <= seq(chr('['), npd(chr('^')),
|
||||||
|
tok(oom(seq(npd(chr(']')), g["Range"]))), lit("]i"),
|
||||||
|
g["Spacing"]);
|
||||||
|
|
||||||
g["NegatedClass"] <= seq(lit("[^"),
|
g["NegatedClass"] <= seq(lit("[^"),
|
||||||
tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
|
tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
|
||||||
g["Spacing"]);
|
g["Spacing"]);
|
||||||
|
g["NegatedClassI"] <= seq(lit("[^"),
|
||||||
|
tok(oom(seq(npd(chr(']')), g["Range"]))),
|
||||||
|
lit("]i"), g["Spacing"]);
|
||||||
|
|
||||||
// NOTE: This is different from The original Brian Ford's paper, and this
|
// NOTE: This is different from The original Brian Ford's paper, and this
|
||||||
// modification allows us to specify `[+-]` as a valid char class.
|
// modification allows us to specify `[+-]` as a valid char class.
|
||||||
@ -3634,10 +3654,18 @@ private:
|
|||||||
auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
|
auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
|
||||||
return cls(ranges);
|
return cls(ranges);
|
||||||
};
|
};
|
||||||
|
g["ClassI"] = [](const SemanticValues &vs) {
|
||||||
|
auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
|
||||||
|
return cls(ranges, true);
|
||||||
|
};
|
||||||
g["NegatedClass"] = [](const SemanticValues &vs) {
|
g["NegatedClass"] = [](const SemanticValues &vs) {
|
||||||
auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
|
auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
|
||||||
return ncls(ranges);
|
return ncls(ranges);
|
||||||
};
|
};
|
||||||
|
g["NegatedClassI"] = [](const SemanticValues &vs) {
|
||||||
|
auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
|
||||||
|
return ncls(ranges, true);
|
||||||
|
};
|
||||||
g["Range"] = [](const SemanticValues &vs) {
|
g["Range"] = [](const SemanticValues &vs) {
|
||||||
switch (vs.choice()) {
|
switch (vs.choice()) {
|
||||||
case 0: {
|
case 0: {
|
||||||
|
@ -431,13 +431,13 @@ TEST(GeneralTest, Octal_Hex_Unicode_value_test) {
|
|||||||
EXPECT_TRUE(ret);
|
EXPECT_TRUE(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(GeneralTest, Ignore_case_test) {
|
TEST(GeneralTest, Ignore_case_literal_test) {
|
||||||
parser parser(R"(
|
parser parser(R"(
|
||||||
ROOT <- HELLO WORLD
|
ROOT <- HELLO WORLD
|
||||||
HELLO <- 'hello'i
|
HELLO <- 'hello'i
|
||||||
WORLD <- 'world'i
|
WORLD <- 'world'i
|
||||||
%whitespace <- [ \t\r\n]*
|
%whitespace <- [ \t\r\n]*
|
||||||
)");
|
)");
|
||||||
|
|
||||||
parser["HELLO"] = [](const SemanticValues &vs) {
|
parser["HELLO"] = [](const SemanticValues &vs) {
|
||||||
EXPECT_EQ("Hello", vs.token());
|
EXPECT_EQ("Hello", vs.token());
|
||||||
@ -451,6 +451,23 @@ TEST(GeneralTest, Ignore_case_test) {
|
|||||||
EXPECT_TRUE(ret);
|
EXPECT_TRUE(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(GeneralTest, Ignore_case_character_class_test) {
|
||||||
|
parser parser(R"(ROOT <- [a-z]i+)");
|
||||||
|
|
||||||
|
EXPECT_TRUE(parser.parse("abc"));
|
||||||
|
EXPECT_TRUE(parser.parse("ABC"));
|
||||||
|
EXPECT_TRUE(parser.parse("Abc"));
|
||||||
|
EXPECT_TRUE(parser.parse("Abc"));
|
||||||
|
EXPECT_FALSE(parser.parse("123"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(GeneralTest, Ignore_case_negate_character_class_test) {
|
||||||
|
parser parser(R"(ROOT <- [^a-z]i+)");
|
||||||
|
|
||||||
|
EXPECT_TRUE(parser.parse("123"));
|
||||||
|
EXPECT_FALSE(parser.parse("ABC"));
|
||||||
|
}
|
||||||
|
|
||||||
TEST(GeneralTest, mutable_lambda_test) {
|
TEST(GeneralTest, mutable_lambda_test) {
|
||||||
std::vector<std::string_view> vec;
|
std::vector<std::string_view> vec;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user