diff --git a/README.md b/README.md index 504a1fd..72adbdf 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ You can also try the online version, PEG Playground at https://yhirose.github.io The PEG syntax is well described on page 2 in the [document](http://www.brynosaurus.com/pub/lang/peg.pdf). *cpp-peglib* also supports the following additional syntax for now: + * `'...'i` (Case-insensitive literal operator) * `<` ... `>` (Token boundary operator) * `~` (Ignore operator) * `\x20` (Hex number char) @@ -169,11 +170,11 @@ auto ret = pg.parse(" token1, token2 "); We can ignore unnecessary semantic values from the list by using `~` operator. ```cpp -peg::pegparser parser( - " ROOT <- _ ITEM (',' _ ITEM _)* " - " ITEM <- ([a-z])+ " - " ~_ <- [ \t]* " -); +peg::pegparser parser(R"( + ROOT <- _ ITEM (',' _ ITEM _)* + ITEM <- ([a-z])+ + ~_ <- [ \t]* +)"); parser["ROOT"] = [&](const SemanticValues& sv) { assert(sv.size() == 2); // should be 2 instead of 5. @@ -185,11 +186,11 @@ auto ret = parser.parse(" item1, item2 "); The following grammar is same as the above. ```cpp -peg::parser parser( - " ROOT <- ~_ ITEM (',' ~_ ITEM ~_)* " - " ITEM <- ([a-z])+ " - " _ <- [ \t]* " -); +peg::pegparser parser(R"( + ROOT <- ~_ ITEM (',' ~_ ITEM ~_)* + ITEM <- ([a-z])+ + _ <- [ \t]* +)"); ``` *Semantic predicate* support is available. We can do it by throwing a `peg::parse_error` exception in a semantic action. @@ -244,9 +245,10 @@ As you can see in the first example, we can ignore whitespaces between tokens au These are valid tokens: ``` -KEYWORD <- 'keyword' -WORD <- < [a-zA-Z0-9] [a-zA-Z0-9-_]* > # token boundary operator is used. -IDNET <- < IDENT_START_CHAR IDENT_CHAR* > # token boundary operator is used. +KEYWORD <- 'keyword' +KEYWORDI <- 'case_insensitive_keyword' +WORD <- < [a-zA-Z0-9] [a-zA-Z0-9-_]* > # token boundary operator is used. +IDNET <- < IDENT_START_CHAR IDENT_CHAR* > # token boundary operator is used. ``` The following grammar accepts ` one, "two three", four `. @@ -372,6 +374,7 @@ The following are available operators: | apd | And predicate | | npd | Not predicate | | lit | Literal string | +| liti | Case-insensitive Literal string | | cls | Character class | | chr | Character | | dot | Any character | diff --git a/peglib.h b/peglib.h index f2817df..d11d9b2 100644 --- a/peglib.h +++ b/peglib.h @@ -1233,8 +1233,9 @@ class LiteralString : public Ope , public std::enable_shared_from_this { public: - LiteralString(const std::string& s) + LiteralString(const std::string& s, bool ignore_case) : lit_(s) + , ignore_case_(ignore_case) , init_is_word_(false) , is_word_(false) {} @@ -1244,6 +1245,7 @@ public: void accept(Visitor& v) override; std::string lit_; + bool ignore_case_; mutable bool init_is_word_; mutable bool is_word_; }; @@ -1564,8 +1566,12 @@ inline std::shared_ptr npd(const std::shared_ptr& ope) { return std::make_shared(ope); } -inline std::shared_ptr lit(const std::string& lit) { - return std::make_shared(lit); +inline std::shared_ptr lit(const std::string& s) { + return std::make_shared(s, false); +} + +inline std::shared_ptr liti(const std::string& s) { + return std::make_shared(s, true); } inline std::shared_ptr cls(const std::string& s) { @@ -2136,11 +2142,11 @@ private: */ inline size_t parse_literal(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt, - const std::string& lit, bool& init_is_word, bool& is_word) + const std::string& lit, bool& init_is_word, bool& is_word, bool ignore_case) { size_t i = 0; for (; i < lit.size(); i++) { - if (i >= n || s[i] != lit[i]) { + if (i >= n || (ignore_case ? (std::tolower(s[i]) != std::tolower(lit[i])) : (s[i] != lit[i]))) { c.set_error_pos(s); return static_cast(-1); } @@ -2184,7 +2190,7 @@ inline size_t parse_literal(const char* s, size_t n, SemanticValues& sv, Context inline size_t LiteralString::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const { c.trace("LiteralString", s, n, sv, dt); - return parse_literal(s, n, sv, c, dt, lit_, init_is_word_, is_word_); + return parse_literal(s, n, sv, c, dt, lit_, init_is_word_, is_word_, ignore_case_); } inline size_t TokenBoundary::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const { @@ -2338,7 +2344,7 @@ inline size_t BackReference::parse(const char* s, size_t n, SemanticValues& sv, const auto& lit = captures.at(name_); auto init_is_word = false; auto is_word = false; - return parse_literal(s, n, sv, c, dt, lit, init_is_word, is_word); + return parse_literal(s, n, sv, c, dt, lit, init_is_word, is_word, false); } ++it; } @@ -2535,7 +2541,7 @@ private: seq(g["BeginTok"], g["Expression"], g["EndTok"]), seq(g["BeginCapScope"], g["Expression"], g["EndCapScope"]), seq(g["BeginCap"], g["Expression"], g["EndCap"]), - g["BackRef"], g["Literal"], g["Class"], g["DOT"]); + g["BackRef"], g["LiteralI"], g["Literal"], g["Class"], g["DOT"]); g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]); g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"])); @@ -2545,6 +2551,9 @@ private: g["IdentRest"] <= cho(g["IdentStart"], cls("0-9")); + g["LiteralI"] <= cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), lit("'i"), g["Spacing"]), + seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), lit("\"i"), g["Spacing"])); + g["Literal"] <= cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), cls("'"), g["Spacing"]), seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), cls("\""), g["Spacing"])); @@ -2744,19 +2753,22 @@ private: g["IdentCont"] = [](const SemanticValues& sv) { return std::string(sv.c_str(), sv.length()); }; - g["IdentStart"] = [](const SemanticValues& /*sv*/) { return std::string(); }; - g["IdentRest"] = [](const SemanticValues& /*sv*/) { return std::string(); }; + g["LiteralI"] = [](const SemanticValues& sv) { + const auto& tok = sv.tokens.front(); + return liti(resolve_escape_sequence(tok.first, tok.second)); + }; g["Literal"] = [](const SemanticValues& sv) { const auto& tok = sv.tokens.front(); return lit(resolve_escape_sequence(tok.first, tok.second)); }; + g["Class"] = [](const SemanticValues& sv) { auto ranges = sv.transform>(); return cls(ranges); diff --git a/test/test.cc b/test/test.cc index 13120d0..f30323e 100644 --- a/test/test.cc +++ b/test/test.cc @@ -416,6 +416,26 @@ TEST_CASE("Octal/Hex/Unicode value test", "[general]") REQUIRE(ret == true); } +TEST_CASE("Ignore case test", "[general]") { + peg::parser parser(R"( + ROOT <- HELLO WORLD + HELLO <- 'hello'i + WORLD <- 'world'i + %whitespace <- [ \t\r\n]* + )"); + + parser["HELLO"] = [](const SemanticValues& sv) { + REQUIRE(sv.token() == "Hello"); + }; + + parser["WORLD"] = [](const SemanticValues& sv) { + REQUIRE(sv.token() == "World"); + }; + + auto ret = parser.parse(" Hello World "); + REQUIRE(ret == true); +} + TEST_CASE("mutable lambda test", "[general]") { vector vec;