Added 'ignore' operator.

This commit is contained in:
yhirose 2015-02-18 18:00:11 -05:00
parent 56daf08d5b
commit 9c872e2d5d
3 changed files with 73 additions and 20 deletions

View File

@ -7,8 +7,9 @@ C++11 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar)
The PEG syntax is well described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf). *cpp-peglib* also supports the following additional syntax for now:
* `<` ... `>` (Anchor operators)
* `$<` ... `>` (Capture operators)
* `<` ... `>` (Anchor operator)
* `$<` ... `>` (Capture operator)
* `~` (Ignore operator)
How to use
----------
@ -108,6 +109,22 @@ pg["TOKEN"] = [](const char* s, size_t l, const vector<any>& v) {
auto ret = pg.parse(" token1, token2 ");
```
We can ignore unnecessary semantic values from the list by using `~` operator.
```c++
peglib::peg parser(
" ROOT <- _ ITEM (',' _ ITEM _)* "
" ITEM <- ([a-z])+ "
" ~_ <- [ \t]* "
);
parser["ROOT"] = [&](const vector<any>& v) {
assert(v.size() == 2); // should be 2 instead of 5.
};
auto ret = parser.parse(" item1, item2 ");
```
Simple interface
----------------

View File

@ -153,12 +153,12 @@ private:
*/
struct SemanticValues
{
std::vector<any> values;
//std::vector<std::string> names;
const char* s;
size_t l;
std::vector<any> values;
//std::vector<std::string> names;
const char* s;
size_t l;
SemanticValues() : s(nullptr), l(0) {}
SemanticValues() : s(nullptr), l(0) {}
};
/*
@ -724,11 +724,13 @@ class Definition
public:
Definition()
: actions(1)
, ignore(false)
, holder_(std::make_shared<Holder>(this)) {}
Definition(const Definition& rhs)
: name(rhs.name)
, actions(1)
, ignore(false)
, holder_(rhs.holder_)
{
holder_->outer_ = this;
@ -737,6 +739,7 @@ public:
Definition(Definition&& rhs)
: name(std::move(rhs.name))
, actions(1)
, ignore(rhs.ignore)
, holder_(std::move(rhs.holder_))
{
holder_->outer_ = this;
@ -744,6 +747,7 @@ public:
Definition(const std::shared_ptr<Ope>& ope)
: actions(1)
, ignore(false)
, holder_(std::make_shared<Holder>(this))
{
holder_->ope_ = ope;
@ -803,8 +807,14 @@ public:
return *this;
}
Definition& operator~() {
ignore = true;
return *this;
}
std::string name;
std::vector<Action> actions;
bool ignore;
private:
friend class DefinitionReference;
@ -823,7 +833,7 @@ private:
const auto& rule = *ope_;
SemanticValues chldsv;
auto r = rule.parse(s, l, chldsv, c);
if (r.ret) {
if (r.ret && !outer_->ignore) {
assert(!outer_->actions.empty());
auto id = r.choice + 1;
@ -1011,7 +1021,7 @@ private:
void make_grammar() {
// Setup PEG syntax parser
g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]);
g["Definition"] <= seq(g["Identifier"], g["LEFTARROW"], g["Expression"]);
g["Definition"] <= seq(opt(g["IGNORE"]), g["Identifier"], g["LEFTARROW"], g["Expression"]);
g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"])));
g["Sequence"] <= zom(g["Prefix"]);
@ -1031,7 +1041,7 @@ private:
g["Literal"] <= cho(seq(cls("'"), anc(zom(seq(npd(cls("'")), g["Char"]))), cls("'"), g["Spacing"]),
seq(cls("\""), anc(zom(seq(npd(cls("\"")), g["Char"]))), cls("\""), g["Spacing"]));
g["Class"] <= seq(chr('['), anc(zom(seq(npd(chr(']')), g["Range"]))), chr(']'), g["Spacing"]);
g["Class"] <= seq(chr('['), anc(zom(seq(npd(chr(']')), g["Range"]))), chr(']'), g["Spacing"]);
g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]);
g["Char"] <= cho(seq(chr('\\'), cls("nrt'\"[]\\")),
@ -1040,7 +1050,7 @@ private:
seq(npd(chr('\\')), dot()));
g["LEFTARROW"] <= seq(lit("<-"), g["Spacing"]);
g["SLASH"] <= seq(chr('/'), g["Spacing"]);
~g["SLASH"] <= seq(chr('/'), g["Spacing"]);
g["AND"] <= seq(chr('&'), g["Spacing"]);
g["NOT"] <= seq(chr('!'), g["Spacing"]);
g["QUESTION"] <= seq(chr('?'), g["Spacing"]);
@ -1062,6 +1072,10 @@ private:
g["BeginCap"] <= seq(lit("$<"), g["Spacing"]);
g["EndCap"] <= seq(lit(">"), g["Spacing"]);
g["IGNORE"] <= chr('~');
g["Action"] <= seq(chr('{'), anc(zom(npd(chr('}')))), chr('}'), g["Spacing"]);
// Set definition names
for (auto& x: g) {
x.second.name = x.first;
@ -1072,9 +1086,16 @@ private:
g["Definition"] = [&](const std::vector<any>& v, any& c) {
Context& cxt = *c.get<Context*>();
const auto& name = v[0].get<std::string>();
(*cxt.grammar)[name] <= v[2].get<std::shared_ptr<Ope>>();
(*cxt.grammar)[name].name = name;
auto ignore = (v.size() == 4);
auto baseId = ignore ? 1 : 0;
const auto& name = v[baseId].get<std::string>();
auto ope = v[baseId + 2].get<std::shared_ptr<Ope>>();
auto& def = (*cxt.grammar)[name];
def <= ope;
def.name = name;
def.ignore = ignore;
if (cxt.start.empty()) {
cxt.start = name;
@ -1087,9 +1108,7 @@ private:
} else {
std::vector<std::shared_ptr<Ope>> opes;
for (auto i = 0u; i < v.size(); i++) {
if (!(i % 2)) {
opes.push_back(v[i].get<std::shared_ptr<Ope>>());
}
opes.push_back(v[i].get<std::shared_ptr<Ope>>());
}
const std::shared_ptr<Ope> ope = std::make_shared<PrioritizedChoice>(opes);
return ope;

View File

@ -76,7 +76,7 @@ TEST_CASE("String capture test2", "[general]")
TEST_CASE("String capture test3", "[general]")
{
auto syntax =
auto syntax =
" ROOT <- _ TOKEN* "
" TOKEN <- '[' < (!']' .)+ > ']' _ "
" _ <- [ \t\r\n]* "
@ -86,8 +86,8 @@ TEST_CASE("String capture test3", "[general]")
std::vector<std::string> tags;
pg["TOKEN"] = [&](const char* s, size_t l, const vector<any>& v) {
tags.push_back(std::string(s, l));
pg["TOKEN"] = [&](const char* s, size_t l) {
tags.push_back(std::string(s, l));
};
auto ret = pg.parse(" [tag1] [tag:2] [tag-3] ");
@ -147,6 +147,23 @@ TEST_CASE("Lambda action test", "[general]")
REQUIRE(ss == "hello");
}
TEST_CASE("Skip token test", "[general]")
{
peglib::peg parser(
" ROOT <- _ ITEM (',' _ ITEM _)* "
" ITEM <- ([a-z])+ "
" ~_ <- [ \t]* "
);
parser["ROOT"] = [&](const vector<any>& v) {
REQUIRE(v.size() == 2);
};
auto ret = parser.parse(" item1, item2 ");
REQUIRE(ret == true);
}
TEST_CASE("Backtracking test", "[general]")
{
peg parser(