Added 'ignore' operator.

This commit is contained in:
yhirose 2015-02-18 18:00:11 -05:00
parent 56daf08d5b
commit 9c872e2d5d
3 changed files with 73 additions and 20 deletions

View File

@ -7,8 +7,9 @@ C++11 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar)
The PEG syntax is well described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf). *cpp-peglib* also supports the following additional syntax for now: The PEG syntax is well described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf). *cpp-peglib* also supports the following additional syntax for now:
* `<` ... `>` (Anchor operators) * `<` ... `>` (Anchor operator)
* `$<` ... `>` (Capture operators) * `$<` ... `>` (Capture operator)
* `~` (Ignore operator)
How to use How to use
---------- ----------
@ -108,6 +109,22 @@ pg["TOKEN"] = [](const char* s, size_t l, const vector<any>& v) {
auto ret = pg.parse(" token1, token2 "); auto ret = pg.parse(" token1, token2 ");
``` ```
We can ignore unnecessary semantic values from the list by using `~` operator.
```c++
peglib::peg parser(
" ROOT <- _ ITEM (',' _ ITEM _)* "
" ITEM <- ([a-z])+ "
" ~_ <- [ \t]* "
);
parser["ROOT"] = [&](const vector<any>& v) {
assert(v.size() == 2); // should be 2 instead of 5.
};
auto ret = parser.parse(" item1, item2 ");
```
Simple interface Simple interface
---------------- ----------------

View File

@ -724,11 +724,13 @@ class Definition
public: public:
Definition() Definition()
: actions(1) : actions(1)
, ignore(false)
, holder_(std::make_shared<Holder>(this)) {} , holder_(std::make_shared<Holder>(this)) {}
Definition(const Definition& rhs) Definition(const Definition& rhs)
: name(rhs.name) : name(rhs.name)
, actions(1) , actions(1)
, ignore(false)
, holder_(rhs.holder_) , holder_(rhs.holder_)
{ {
holder_->outer_ = this; holder_->outer_ = this;
@ -737,6 +739,7 @@ public:
Definition(Definition&& rhs) Definition(Definition&& rhs)
: name(std::move(rhs.name)) : name(std::move(rhs.name))
, actions(1) , actions(1)
, ignore(rhs.ignore)
, holder_(std::move(rhs.holder_)) , holder_(std::move(rhs.holder_))
{ {
holder_->outer_ = this; holder_->outer_ = this;
@ -744,6 +747,7 @@ public:
Definition(const std::shared_ptr<Ope>& ope) Definition(const std::shared_ptr<Ope>& ope)
: actions(1) : actions(1)
, ignore(false)
, holder_(std::make_shared<Holder>(this)) , holder_(std::make_shared<Holder>(this))
{ {
holder_->ope_ = ope; holder_->ope_ = ope;
@ -803,8 +807,14 @@ public:
return *this; return *this;
} }
Definition& operator~() {
ignore = true;
return *this;
}
std::string name; std::string name;
std::vector<Action> actions; std::vector<Action> actions;
bool ignore;
private: private:
friend class DefinitionReference; friend class DefinitionReference;
@ -823,7 +833,7 @@ private:
const auto& rule = *ope_; const auto& rule = *ope_;
SemanticValues chldsv; SemanticValues chldsv;
auto r = rule.parse(s, l, chldsv, c); auto r = rule.parse(s, l, chldsv, c);
if (r.ret) { if (r.ret && !outer_->ignore) {
assert(!outer_->actions.empty()); assert(!outer_->actions.empty());
auto id = r.choice + 1; auto id = r.choice + 1;
@ -1011,7 +1021,7 @@ private:
void make_grammar() { void make_grammar() {
// Setup PEG syntax parser // Setup PEG syntax parser
g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]); g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]);
g["Definition"] <= seq(g["Identifier"], g["LEFTARROW"], g["Expression"]); g["Definition"] <= seq(opt(g["IGNORE"]), g["Identifier"], g["LEFTARROW"], g["Expression"]);
g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"]))); g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"])));
g["Sequence"] <= zom(g["Prefix"]); g["Sequence"] <= zom(g["Prefix"]);
@ -1040,7 +1050,7 @@ private:
seq(npd(chr('\\')), dot())); seq(npd(chr('\\')), dot()));
g["LEFTARROW"] <= seq(lit("<-"), g["Spacing"]); g["LEFTARROW"] <= seq(lit("<-"), g["Spacing"]);
g["SLASH"] <= seq(chr('/'), g["Spacing"]); ~g["SLASH"] <= seq(chr('/'), g["Spacing"]);
g["AND"] <= seq(chr('&'), g["Spacing"]); g["AND"] <= seq(chr('&'), g["Spacing"]);
g["NOT"] <= seq(chr('!'), g["Spacing"]); g["NOT"] <= seq(chr('!'), g["Spacing"]);
g["QUESTION"] <= seq(chr('?'), g["Spacing"]); g["QUESTION"] <= seq(chr('?'), g["Spacing"]);
@ -1062,6 +1072,10 @@ private:
g["BeginCap"] <= seq(lit("$<"), g["Spacing"]); g["BeginCap"] <= seq(lit("$<"), g["Spacing"]);
g["EndCap"] <= seq(lit(">"), g["Spacing"]); g["EndCap"] <= seq(lit(">"), g["Spacing"]);
g["IGNORE"] <= chr('~');
g["Action"] <= seq(chr('{'), anc(zom(npd(chr('}')))), chr('}'), g["Spacing"]);
// Set definition names // Set definition names
for (auto& x: g) { for (auto& x: g) {
x.second.name = x.first; x.second.name = x.first;
@ -1072,9 +1086,16 @@ private:
g["Definition"] = [&](const std::vector<any>& v, any& c) { g["Definition"] = [&](const std::vector<any>& v, any& c) {
Context& cxt = *c.get<Context*>(); Context& cxt = *c.get<Context*>();
const auto& name = v[0].get<std::string>(); auto ignore = (v.size() == 4);
(*cxt.grammar)[name] <= v[2].get<std::shared_ptr<Ope>>(); auto baseId = ignore ? 1 : 0;
(*cxt.grammar)[name].name = name;
const auto& name = v[baseId].get<std::string>();
auto ope = v[baseId + 2].get<std::shared_ptr<Ope>>();
auto& def = (*cxt.grammar)[name];
def <= ope;
def.name = name;
def.ignore = ignore;
if (cxt.start.empty()) { if (cxt.start.empty()) {
cxt.start = name; cxt.start = name;
@ -1087,10 +1108,8 @@ private:
} else { } else {
std::vector<std::shared_ptr<Ope>> opes; std::vector<std::shared_ptr<Ope>> opes;
for (auto i = 0u; i < v.size(); i++) { for (auto i = 0u; i < v.size(); i++) {
if (!(i % 2)) {
opes.push_back(v[i].get<std::shared_ptr<Ope>>()); opes.push_back(v[i].get<std::shared_ptr<Ope>>());
} }
}
const std::shared_ptr<Ope> ope = std::make_shared<PrioritizedChoice>(opes); const std::shared_ptr<Ope> ope = std::make_shared<PrioritizedChoice>(opes);
return ope; return ope;
} }

View File

@ -86,7 +86,7 @@ TEST_CASE("String capture test3", "[general]")
std::vector<std::string> tags; std::vector<std::string> tags;
pg["TOKEN"] = [&](const char* s, size_t l, const vector<any>& v) { pg["TOKEN"] = [&](const char* s, size_t l) {
tags.push_back(std::string(s, l)); tags.push_back(std::string(s, l));
}; };
@ -147,6 +147,23 @@ TEST_CASE("Lambda action test", "[general]")
REQUIRE(ss == "hello"); REQUIRE(ss == "hello");
} }
TEST_CASE("Skip token test", "[general]")
{
peglib::peg parser(
" ROOT <- _ ITEM (',' _ ITEM _)* "
" ITEM <- ([a-z])+ "
" ~_ <- [ \t]* "
);
parser["ROOT"] = [&](const vector<any>& v) {
REQUIRE(v.size() == 2);
};
auto ret = parser.parse(" item1, item2 ");
REQUIRE(ret == true);
}
TEST_CASE("Backtracking test", "[general]") TEST_CASE("Backtracking test", "[general]")
{ {
peg parser( peg parser(