mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2024-12-22 11:55:30 +00:00
Added 'ignore' operator.
This commit is contained in:
parent
56daf08d5b
commit
9c872e2d5d
21
README.md
21
README.md
@ -7,8 +7,9 @@ C++11 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar)
|
|||||||
|
|
||||||
The PEG syntax is well described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf). *cpp-peglib* also supports the following additional syntax for now:
|
The PEG syntax is well described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf). *cpp-peglib* also supports the following additional syntax for now:
|
||||||
|
|
||||||
* `<` ... `>` (Anchor operators)
|
* `<` ... `>` (Anchor operator)
|
||||||
* `$<` ... `>` (Capture operators)
|
* `$<` ... `>` (Capture operator)
|
||||||
|
* `~` (Ignore operator)
|
||||||
|
|
||||||
How to use
|
How to use
|
||||||
----------
|
----------
|
||||||
@ -108,6 +109,22 @@ pg["TOKEN"] = [](const char* s, size_t l, const vector<any>& v) {
|
|||||||
auto ret = pg.parse(" token1, token2 ");
|
auto ret = pg.parse(" token1, token2 ");
|
||||||
```
|
```
|
||||||
|
|
||||||
|
We can ignore unnecessary semantic values from the list by using `~` operator.
|
||||||
|
|
||||||
|
```c++
|
||||||
|
peglib::peg parser(
|
||||||
|
" ROOT <- _ ITEM (',' _ ITEM _)* "
|
||||||
|
" ITEM <- ([a-z])+ "
|
||||||
|
" ~_ <- [ \t]* "
|
||||||
|
);
|
||||||
|
|
||||||
|
parser["ROOT"] = [&](const vector<any>& v) {
|
||||||
|
assert(v.size() == 2); // should be 2 instead of 5.
|
||||||
|
};
|
||||||
|
|
||||||
|
auto ret = parser.parse(" item1, item2 ");
|
||||||
|
```
|
||||||
|
|
||||||
Simple interface
|
Simple interface
|
||||||
----------------
|
----------------
|
||||||
|
|
||||||
|
49
peglib.h
49
peglib.h
@ -153,12 +153,12 @@ private:
|
|||||||
*/
|
*/
|
||||||
struct SemanticValues
|
struct SemanticValues
|
||||||
{
|
{
|
||||||
std::vector<any> values;
|
std::vector<any> values;
|
||||||
//std::vector<std::string> names;
|
//std::vector<std::string> names;
|
||||||
const char* s;
|
const char* s;
|
||||||
size_t l;
|
size_t l;
|
||||||
|
|
||||||
SemanticValues() : s(nullptr), l(0) {}
|
SemanticValues() : s(nullptr), l(0) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -724,11 +724,13 @@ class Definition
|
|||||||
public:
|
public:
|
||||||
Definition()
|
Definition()
|
||||||
: actions(1)
|
: actions(1)
|
||||||
|
, ignore(false)
|
||||||
, holder_(std::make_shared<Holder>(this)) {}
|
, holder_(std::make_shared<Holder>(this)) {}
|
||||||
|
|
||||||
Definition(const Definition& rhs)
|
Definition(const Definition& rhs)
|
||||||
: name(rhs.name)
|
: name(rhs.name)
|
||||||
, actions(1)
|
, actions(1)
|
||||||
|
, ignore(false)
|
||||||
, holder_(rhs.holder_)
|
, holder_(rhs.holder_)
|
||||||
{
|
{
|
||||||
holder_->outer_ = this;
|
holder_->outer_ = this;
|
||||||
@ -737,6 +739,7 @@ public:
|
|||||||
Definition(Definition&& rhs)
|
Definition(Definition&& rhs)
|
||||||
: name(std::move(rhs.name))
|
: name(std::move(rhs.name))
|
||||||
, actions(1)
|
, actions(1)
|
||||||
|
, ignore(rhs.ignore)
|
||||||
, holder_(std::move(rhs.holder_))
|
, holder_(std::move(rhs.holder_))
|
||||||
{
|
{
|
||||||
holder_->outer_ = this;
|
holder_->outer_ = this;
|
||||||
@ -744,6 +747,7 @@ public:
|
|||||||
|
|
||||||
Definition(const std::shared_ptr<Ope>& ope)
|
Definition(const std::shared_ptr<Ope>& ope)
|
||||||
: actions(1)
|
: actions(1)
|
||||||
|
, ignore(false)
|
||||||
, holder_(std::make_shared<Holder>(this))
|
, holder_(std::make_shared<Holder>(this))
|
||||||
{
|
{
|
||||||
holder_->ope_ = ope;
|
holder_->ope_ = ope;
|
||||||
@ -803,8 +807,14 @@ public:
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Definition& operator~() {
|
||||||
|
ignore = true;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
std::string name;
|
std::string name;
|
||||||
std::vector<Action> actions;
|
std::vector<Action> actions;
|
||||||
|
bool ignore;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class DefinitionReference;
|
friend class DefinitionReference;
|
||||||
@ -823,7 +833,7 @@ private:
|
|||||||
const auto& rule = *ope_;
|
const auto& rule = *ope_;
|
||||||
SemanticValues chldsv;
|
SemanticValues chldsv;
|
||||||
auto r = rule.parse(s, l, chldsv, c);
|
auto r = rule.parse(s, l, chldsv, c);
|
||||||
if (r.ret) {
|
if (r.ret && !outer_->ignore) {
|
||||||
assert(!outer_->actions.empty());
|
assert(!outer_->actions.empty());
|
||||||
|
|
||||||
auto id = r.choice + 1;
|
auto id = r.choice + 1;
|
||||||
@ -1011,7 +1021,7 @@ private:
|
|||||||
void make_grammar() {
|
void make_grammar() {
|
||||||
// Setup PEG syntax parser
|
// Setup PEG syntax parser
|
||||||
g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]);
|
g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]);
|
||||||
g["Definition"] <= seq(g["Identifier"], g["LEFTARROW"], g["Expression"]);
|
g["Definition"] <= seq(opt(g["IGNORE"]), g["Identifier"], g["LEFTARROW"], g["Expression"]);
|
||||||
|
|
||||||
g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"])));
|
g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"])));
|
||||||
g["Sequence"] <= zom(g["Prefix"]);
|
g["Sequence"] <= zom(g["Prefix"]);
|
||||||
@ -1031,7 +1041,7 @@ private:
|
|||||||
g["Literal"] <= cho(seq(cls("'"), anc(zom(seq(npd(cls("'")), g["Char"]))), cls("'"), g["Spacing"]),
|
g["Literal"] <= cho(seq(cls("'"), anc(zom(seq(npd(cls("'")), g["Char"]))), cls("'"), g["Spacing"]),
|
||||||
seq(cls("\""), anc(zom(seq(npd(cls("\"")), g["Char"]))), cls("\""), g["Spacing"]));
|
seq(cls("\""), anc(zom(seq(npd(cls("\"")), g["Char"]))), cls("\""), g["Spacing"]));
|
||||||
|
|
||||||
g["Class"] <= seq(chr('['), anc(zom(seq(npd(chr(']')), g["Range"]))), chr(']'), g["Spacing"]);
|
g["Class"] <= seq(chr('['), anc(zom(seq(npd(chr(']')), g["Range"]))), chr(']'), g["Spacing"]);
|
||||||
|
|
||||||
g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]);
|
g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]);
|
||||||
g["Char"] <= cho(seq(chr('\\'), cls("nrt'\"[]\\")),
|
g["Char"] <= cho(seq(chr('\\'), cls("nrt'\"[]\\")),
|
||||||
@ -1040,7 +1050,7 @@ private:
|
|||||||
seq(npd(chr('\\')), dot()));
|
seq(npd(chr('\\')), dot()));
|
||||||
|
|
||||||
g["LEFTARROW"] <= seq(lit("<-"), g["Spacing"]);
|
g["LEFTARROW"] <= seq(lit("<-"), g["Spacing"]);
|
||||||
g["SLASH"] <= seq(chr('/'), g["Spacing"]);
|
~g["SLASH"] <= seq(chr('/'), g["Spacing"]);
|
||||||
g["AND"] <= seq(chr('&'), g["Spacing"]);
|
g["AND"] <= seq(chr('&'), g["Spacing"]);
|
||||||
g["NOT"] <= seq(chr('!'), g["Spacing"]);
|
g["NOT"] <= seq(chr('!'), g["Spacing"]);
|
||||||
g["QUESTION"] <= seq(chr('?'), g["Spacing"]);
|
g["QUESTION"] <= seq(chr('?'), g["Spacing"]);
|
||||||
@ -1062,6 +1072,10 @@ private:
|
|||||||
g["BeginCap"] <= seq(lit("$<"), g["Spacing"]);
|
g["BeginCap"] <= seq(lit("$<"), g["Spacing"]);
|
||||||
g["EndCap"] <= seq(lit(">"), g["Spacing"]);
|
g["EndCap"] <= seq(lit(">"), g["Spacing"]);
|
||||||
|
|
||||||
|
g["IGNORE"] <= chr('~');
|
||||||
|
|
||||||
|
g["Action"] <= seq(chr('{'), anc(zom(npd(chr('}')))), chr('}'), g["Spacing"]);
|
||||||
|
|
||||||
// Set definition names
|
// Set definition names
|
||||||
for (auto& x: g) {
|
for (auto& x: g) {
|
||||||
x.second.name = x.first;
|
x.second.name = x.first;
|
||||||
@ -1072,9 +1086,16 @@ private:
|
|||||||
g["Definition"] = [&](const std::vector<any>& v, any& c) {
|
g["Definition"] = [&](const std::vector<any>& v, any& c) {
|
||||||
Context& cxt = *c.get<Context*>();
|
Context& cxt = *c.get<Context*>();
|
||||||
|
|
||||||
const auto& name = v[0].get<std::string>();
|
auto ignore = (v.size() == 4);
|
||||||
(*cxt.grammar)[name] <= v[2].get<std::shared_ptr<Ope>>();
|
auto baseId = ignore ? 1 : 0;
|
||||||
(*cxt.grammar)[name].name = name;
|
|
||||||
|
const auto& name = v[baseId].get<std::string>();
|
||||||
|
auto ope = v[baseId + 2].get<std::shared_ptr<Ope>>();
|
||||||
|
|
||||||
|
auto& def = (*cxt.grammar)[name];
|
||||||
|
def <= ope;
|
||||||
|
def.name = name;
|
||||||
|
def.ignore = ignore;
|
||||||
|
|
||||||
if (cxt.start.empty()) {
|
if (cxt.start.empty()) {
|
||||||
cxt.start = name;
|
cxt.start = name;
|
||||||
@ -1087,9 +1108,7 @@ private:
|
|||||||
} else {
|
} else {
|
||||||
std::vector<std::shared_ptr<Ope>> opes;
|
std::vector<std::shared_ptr<Ope>> opes;
|
||||||
for (auto i = 0u; i < v.size(); i++) {
|
for (auto i = 0u; i < v.size(); i++) {
|
||||||
if (!(i % 2)) {
|
opes.push_back(v[i].get<std::shared_ptr<Ope>>());
|
||||||
opes.push_back(v[i].get<std::shared_ptr<Ope>>());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
const std::shared_ptr<Ope> ope = std::make_shared<PrioritizedChoice>(opes);
|
const std::shared_ptr<Ope> ope = std::make_shared<PrioritizedChoice>(opes);
|
||||||
return ope;
|
return ope;
|
||||||
|
23
test/test.cc
23
test/test.cc
@ -76,7 +76,7 @@ TEST_CASE("String capture test2", "[general]")
|
|||||||
|
|
||||||
TEST_CASE("String capture test3", "[general]")
|
TEST_CASE("String capture test3", "[general]")
|
||||||
{
|
{
|
||||||
auto syntax =
|
auto syntax =
|
||||||
" ROOT <- _ TOKEN* "
|
" ROOT <- _ TOKEN* "
|
||||||
" TOKEN <- '[' < (!']' .)+ > ']' _ "
|
" TOKEN <- '[' < (!']' .)+ > ']' _ "
|
||||||
" _ <- [ \t\r\n]* "
|
" _ <- [ \t\r\n]* "
|
||||||
@ -86,8 +86,8 @@ TEST_CASE("String capture test3", "[general]")
|
|||||||
|
|
||||||
std::vector<std::string> tags;
|
std::vector<std::string> tags;
|
||||||
|
|
||||||
pg["TOKEN"] = [&](const char* s, size_t l, const vector<any>& v) {
|
pg["TOKEN"] = [&](const char* s, size_t l) {
|
||||||
tags.push_back(std::string(s, l));
|
tags.push_back(std::string(s, l));
|
||||||
};
|
};
|
||||||
|
|
||||||
auto ret = pg.parse(" [tag1] [tag:2] [tag-3] ");
|
auto ret = pg.parse(" [tag1] [tag:2] [tag-3] ");
|
||||||
@ -147,6 +147,23 @@ TEST_CASE("Lambda action test", "[general]")
|
|||||||
REQUIRE(ss == "hello");
|
REQUIRE(ss == "hello");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE("Skip token test", "[general]")
|
||||||
|
{
|
||||||
|
peglib::peg parser(
|
||||||
|
" ROOT <- _ ITEM (',' _ ITEM _)* "
|
||||||
|
" ITEM <- ([a-z])+ "
|
||||||
|
" ~_ <- [ \t]* "
|
||||||
|
);
|
||||||
|
|
||||||
|
parser["ROOT"] = [&](const vector<any>& v) {
|
||||||
|
REQUIRE(v.size() == 2);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto ret = parser.parse(" item1, item2 ");
|
||||||
|
|
||||||
|
REQUIRE(ret == true);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE("Backtracking test", "[general]")
|
TEST_CASE("Backtracking test", "[general]")
|
||||||
{
|
{
|
||||||
peg parser(
|
peg parser(
|
||||||
|
Loading…
Reference in New Issue
Block a user