mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2025-01-22 13:25:30 +00:00
Fix #66
This commit is contained in:
parent
b7da359bac
commit
4d0b2b3591
29
README.md
29
README.md
@ -10,6 +10,7 @@ You can also try the online version, PEG Playground at https://yhirose.github.io
|
|||||||
|
|
||||||
The PEG syntax is well described on page 2 in the [document](http://www.brynosaurus.com/pub/lang/peg.pdf). *cpp-peglib* also supports the following additional syntax for now:
|
The PEG syntax is well described on page 2 in the [document](http://www.brynosaurus.com/pub/lang/peg.pdf). *cpp-peglib* also supports the following additional syntax for now:
|
||||||
|
|
||||||
|
* `'...'i` (Case-insensitive literal operator)
|
||||||
* `<` ... `>` (Token boundary operator)
|
* `<` ... `>` (Token boundary operator)
|
||||||
* `~` (Ignore operator)
|
* `~` (Ignore operator)
|
||||||
* `\x20` (Hex number char)
|
* `\x20` (Hex number char)
|
||||||
@ -169,11 +170,11 @@ auto ret = pg.parse(" token1, token2 ");
|
|||||||
We can ignore unnecessary semantic values from the list by using `~` operator.
|
We can ignore unnecessary semantic values from the list by using `~` operator.
|
||||||
|
|
||||||
```cpp
|
```cpp
|
||||||
peg::pegparser parser(
|
peg::pegparser parser(R"(
|
||||||
" ROOT <- _ ITEM (',' _ ITEM _)* "
|
ROOT <- _ ITEM (',' _ ITEM _)*
|
||||||
" ITEM <- ([a-z])+ "
|
ITEM <- ([a-z])+
|
||||||
" ~_ <- [ \t]* "
|
~_ <- [ \t]*
|
||||||
);
|
)");
|
||||||
|
|
||||||
parser["ROOT"] = [&](const SemanticValues& sv) {
|
parser["ROOT"] = [&](const SemanticValues& sv) {
|
||||||
assert(sv.size() == 2); // should be 2 instead of 5.
|
assert(sv.size() == 2); // should be 2 instead of 5.
|
||||||
@ -185,11 +186,11 @@ auto ret = parser.parse(" item1, item2 ");
|
|||||||
The following grammar is same as the above.
|
The following grammar is same as the above.
|
||||||
|
|
||||||
```cpp
|
```cpp
|
||||||
peg::parser parser(
|
peg::pegparser parser(R"(
|
||||||
" ROOT <- ~_ ITEM (',' ~_ ITEM ~_)* "
|
ROOT <- ~_ ITEM (',' ~_ ITEM ~_)*
|
||||||
" ITEM <- ([a-z])+ "
|
ITEM <- ([a-z])+
|
||||||
" _ <- [ \t]* "
|
_ <- [ \t]*
|
||||||
);
|
)");
|
||||||
```
|
```
|
||||||
|
|
||||||
*Semantic predicate* support is available. We can do it by throwing a `peg::parse_error` exception in a semantic action.
|
*Semantic predicate* support is available. We can do it by throwing a `peg::parse_error` exception in a semantic action.
|
||||||
@ -244,9 +245,10 @@ As you can see in the first example, we can ignore whitespaces between tokens au
|
|||||||
These are valid tokens:
|
These are valid tokens:
|
||||||
|
|
||||||
```
|
```
|
||||||
KEYWORD <- 'keyword'
|
KEYWORD <- 'keyword'
|
||||||
WORD <- < [a-zA-Z0-9] [a-zA-Z0-9-_]* > # token boundary operator is used.
|
KEYWORDI <- 'case_insensitive_keyword'
|
||||||
IDNET <- < IDENT_START_CHAR IDENT_CHAR* > # token boundary operator is used.
|
WORD <- < [a-zA-Z0-9] [a-zA-Z0-9-_]* > # token boundary operator is used.
|
||||||
|
IDNET <- < IDENT_START_CHAR IDENT_CHAR* > # token boundary operator is used.
|
||||||
```
|
```
|
||||||
|
|
||||||
The following grammar accepts ` one, "two three", four `.
|
The following grammar accepts ` one, "two three", four `.
|
||||||
@ -372,6 +374,7 @@ The following are available operators:
|
|||||||
| apd | And predicate |
|
| apd | And predicate |
|
||||||
| npd | Not predicate |
|
| npd | Not predicate |
|
||||||
| lit | Literal string |
|
| lit | Literal string |
|
||||||
|
| liti | Case-insensitive Literal string |
|
||||||
| cls | Character class |
|
| cls | Character class |
|
||||||
| chr | Character |
|
| chr | Character |
|
||||||
| dot | Any character |
|
| dot | Any character |
|
||||||
|
32
peglib.h
32
peglib.h
@ -1233,8 +1233,9 @@ class LiteralString : public Ope
|
|||||||
, public std::enable_shared_from_this<LiteralString>
|
, public std::enable_shared_from_this<LiteralString>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
LiteralString(const std::string& s)
|
LiteralString(const std::string& s, bool ignore_case)
|
||||||
: lit_(s)
|
: lit_(s)
|
||||||
|
, ignore_case_(ignore_case)
|
||||||
, init_is_word_(false)
|
, init_is_word_(false)
|
||||||
, is_word_(false)
|
, is_word_(false)
|
||||||
{}
|
{}
|
||||||
@ -1244,6 +1245,7 @@ public:
|
|||||||
void accept(Visitor& v) override;
|
void accept(Visitor& v) override;
|
||||||
|
|
||||||
std::string lit_;
|
std::string lit_;
|
||||||
|
bool ignore_case_;
|
||||||
mutable bool init_is_word_;
|
mutable bool init_is_word_;
|
||||||
mutable bool is_word_;
|
mutable bool is_word_;
|
||||||
};
|
};
|
||||||
@ -1564,8 +1566,12 @@ inline std::shared_ptr<Ope> npd(const std::shared_ptr<Ope>& ope) {
|
|||||||
return std::make_shared<NotPredicate>(ope);
|
return std::make_shared<NotPredicate>(ope);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::shared_ptr<Ope> lit(const std::string& lit) {
|
inline std::shared_ptr<Ope> lit(const std::string& s) {
|
||||||
return std::make_shared<LiteralString>(lit);
|
return std::make_shared<LiteralString>(s, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::shared_ptr<Ope> liti(const std::string& s) {
|
||||||
|
return std::make_shared<LiteralString>(s, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::shared_ptr<Ope> cls(const std::string& s) {
|
inline std::shared_ptr<Ope> cls(const std::string& s) {
|
||||||
@ -2136,11 +2142,11 @@ private:
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
inline size_t parse_literal(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt,
|
inline size_t parse_literal(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt,
|
||||||
const std::string& lit, bool& init_is_word, bool& is_word)
|
const std::string& lit, bool& init_is_word, bool& is_word, bool ignore_case)
|
||||||
{
|
{
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
for (; i < lit.size(); i++) {
|
for (; i < lit.size(); i++) {
|
||||||
if (i >= n || s[i] != lit[i]) {
|
if (i >= n || (ignore_case ? (std::tolower(s[i]) != std::tolower(lit[i])) : (s[i] != lit[i]))) {
|
||||||
c.set_error_pos(s);
|
c.set_error_pos(s);
|
||||||
return static_cast<size_t>(-1);
|
return static_cast<size_t>(-1);
|
||||||
}
|
}
|
||||||
@ -2184,7 +2190,7 @@ inline size_t parse_literal(const char* s, size_t n, SemanticValues& sv, Context
|
|||||||
|
|
||||||
inline size_t LiteralString::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const {
|
inline size_t LiteralString::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const {
|
||||||
c.trace("LiteralString", s, n, sv, dt);
|
c.trace("LiteralString", s, n, sv, dt);
|
||||||
return parse_literal(s, n, sv, c, dt, lit_, init_is_word_, is_word_);
|
return parse_literal(s, n, sv, c, dt, lit_, init_is_word_, is_word_, ignore_case_);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline size_t TokenBoundary::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const {
|
inline size_t TokenBoundary::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const {
|
||||||
@ -2338,7 +2344,7 @@ inline size_t BackReference::parse(const char* s, size_t n, SemanticValues& sv,
|
|||||||
const auto& lit = captures.at(name_);
|
const auto& lit = captures.at(name_);
|
||||||
auto init_is_word = false;
|
auto init_is_word = false;
|
||||||
auto is_word = false;
|
auto is_word = false;
|
||||||
return parse_literal(s, n, sv, c, dt, lit, init_is_word, is_word);
|
return parse_literal(s, n, sv, c, dt, lit, init_is_word, is_word, false);
|
||||||
}
|
}
|
||||||
++it;
|
++it;
|
||||||
}
|
}
|
||||||
@ -2535,7 +2541,7 @@ private:
|
|||||||
seq(g["BeginTok"], g["Expression"], g["EndTok"]),
|
seq(g["BeginTok"], g["Expression"], g["EndTok"]),
|
||||||
seq(g["BeginCapScope"], g["Expression"], g["EndCapScope"]),
|
seq(g["BeginCapScope"], g["Expression"], g["EndCapScope"]),
|
||||||
seq(g["BeginCap"], g["Expression"], g["EndCap"]),
|
seq(g["BeginCap"], g["Expression"], g["EndCap"]),
|
||||||
g["BackRef"], g["Literal"], g["Class"], g["DOT"]);
|
g["BackRef"], g["LiteralI"], g["Literal"], g["Class"], g["DOT"]);
|
||||||
|
|
||||||
g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
|
g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
|
||||||
g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"]));
|
g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"]));
|
||||||
@ -2545,6 +2551,9 @@ private:
|
|||||||
|
|
||||||
g["IdentRest"] <= cho(g["IdentStart"], cls("0-9"));
|
g["IdentRest"] <= cho(g["IdentStart"], cls("0-9"));
|
||||||
|
|
||||||
|
g["LiteralI"] <= cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), lit("'i"), g["Spacing"]),
|
||||||
|
seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), lit("\"i"), g["Spacing"]));
|
||||||
|
|
||||||
g["Literal"] <= cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), cls("'"), g["Spacing"]),
|
g["Literal"] <= cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), cls("'"), g["Spacing"]),
|
||||||
seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), cls("\""), g["Spacing"]));
|
seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), cls("\""), g["Spacing"]));
|
||||||
|
|
||||||
@ -2744,19 +2753,22 @@ private:
|
|||||||
g["IdentCont"] = [](const SemanticValues& sv) {
|
g["IdentCont"] = [](const SemanticValues& sv) {
|
||||||
return std::string(sv.c_str(), sv.length());
|
return std::string(sv.c_str(), sv.length());
|
||||||
};
|
};
|
||||||
|
|
||||||
g["IdentStart"] = [](const SemanticValues& /*sv*/) {
|
g["IdentStart"] = [](const SemanticValues& /*sv*/) {
|
||||||
return std::string();
|
return std::string();
|
||||||
};
|
};
|
||||||
|
|
||||||
g["IdentRest"] = [](const SemanticValues& /*sv*/) {
|
g["IdentRest"] = [](const SemanticValues& /*sv*/) {
|
||||||
return std::string();
|
return std::string();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
g["LiteralI"] = [](const SemanticValues& sv) {
|
||||||
|
const auto& tok = sv.tokens.front();
|
||||||
|
return liti(resolve_escape_sequence(tok.first, tok.second));
|
||||||
|
};
|
||||||
g["Literal"] = [](const SemanticValues& sv) {
|
g["Literal"] = [](const SemanticValues& sv) {
|
||||||
const auto& tok = sv.tokens.front();
|
const auto& tok = sv.tokens.front();
|
||||||
return lit(resolve_escape_sequence(tok.first, tok.second));
|
return lit(resolve_escape_sequence(tok.first, tok.second));
|
||||||
};
|
};
|
||||||
|
|
||||||
g["Class"] = [](const SemanticValues& sv) {
|
g["Class"] = [](const SemanticValues& sv) {
|
||||||
auto ranges = sv.transform<std::pair<char32_t, char32_t>>();
|
auto ranges = sv.transform<std::pair<char32_t, char32_t>>();
|
||||||
return cls(ranges);
|
return cls(ranges);
|
||||||
|
20
test/test.cc
20
test/test.cc
@ -416,6 +416,26 @@ TEST_CASE("Octal/Hex/Unicode value test", "[general]")
|
|||||||
REQUIRE(ret == true);
|
REQUIRE(ret == true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE("Ignore case test", "[general]") {
|
||||||
|
peg::parser parser(R"(
|
||||||
|
ROOT <- HELLO WORLD
|
||||||
|
HELLO <- 'hello'i
|
||||||
|
WORLD <- 'world'i
|
||||||
|
%whitespace <- [ \t\r\n]*
|
||||||
|
)");
|
||||||
|
|
||||||
|
parser["HELLO"] = [](const SemanticValues& sv) {
|
||||||
|
REQUIRE(sv.token() == "Hello");
|
||||||
|
};
|
||||||
|
|
||||||
|
parser["WORLD"] = [](const SemanticValues& sv) {
|
||||||
|
REQUIRE(sv.token() == "World");
|
||||||
|
};
|
||||||
|
|
||||||
|
auto ret = parser.parse(" Hello World ");
|
||||||
|
REQUIRE(ret == true);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE("mutable lambda test", "[general]")
|
TEST_CASE("mutable lambda test", "[general]")
|
||||||
{
|
{
|
||||||
vector<string> vec;
|
vector<string> vec;
|
||||||
|
Loading…
Reference in New Issue
Block a user