From 9c872e2d5dfc99a581ce88080eaf59ce5d4470b7 Mon Sep 17 00:00:00 2001 From: yhirose Date: Wed, 18 Feb 2015 18:00:11 -0500 Subject: [PATCH] Added 'ignore' operator. --- README.md | 21 +++++++++++++++++++-- peglib.h | 49 ++++++++++++++++++++++++++++++++++--------------- test/test.cc | 23 ++++++++++++++++++++--- 3 files changed, 73 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index d61b538..7c69d3b 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,9 @@ C++11 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar) The PEG syntax is well described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf). *cpp-peglib* also supports the following additional syntax for now: - * `<` ... `>` (Anchor operators) - * `$<` ... `>` (Capture operators) + * `<` ... `>` (Anchor operator) + * `$<` ... `>` (Capture operator) + * `~` (Ignore operator) How to use ---------- @@ -108,6 +109,22 @@ pg["TOKEN"] = [](const char* s, size_t l, const vector& v) { auto ret = pg.parse(" token1, token2 "); ``` +We can ignore unnecessary semantic values from the list by using `~` operator. + +```c++ +peglib::peg parser( + " ROOT <- _ ITEM (',' _ ITEM _)* " + " ITEM <- ([a-z])+ " + " ~_ <- [ \t]* " +); + +parser["ROOT"] = [&](const vector& v) { + assert(v.size() == 2); // should be 2 instead of 5. +}; + +auto ret = parser.parse(" item1, item2 "); +``` + Simple interface ---------------- diff --git a/peglib.h b/peglib.h index e6e45f5..c37e35f 100644 --- a/peglib.h +++ b/peglib.h @@ -153,12 +153,12 @@ private: */ struct SemanticValues { - std::vector values; - //std::vector names; - const char* s; - size_t l; + std::vector values; + //std::vector names; + const char* s; + size_t l; - SemanticValues() : s(nullptr), l(0) {} + SemanticValues() : s(nullptr), l(0) {} }; /* @@ -724,11 +724,13 @@ class Definition public: Definition() : actions(1) + , ignore(false) , holder_(std::make_shared(this)) {} Definition(const Definition& rhs) : name(rhs.name) , actions(1) + , ignore(false) , holder_(rhs.holder_) { holder_->outer_ = this; @@ -737,6 +739,7 @@ public: Definition(Definition&& rhs) : name(std::move(rhs.name)) , actions(1) + , ignore(rhs.ignore) , holder_(std::move(rhs.holder_)) { holder_->outer_ = this; @@ -744,6 +747,7 @@ public: Definition(const std::shared_ptr& ope) : actions(1) + , ignore(false) , holder_(std::make_shared(this)) { holder_->ope_ = ope; @@ -803,8 +807,14 @@ public: return *this; } + Definition& operator~() { + ignore = true; + return *this; + } + std::string name; std::vector actions; + bool ignore; private: friend class DefinitionReference; @@ -823,7 +833,7 @@ private: const auto& rule = *ope_; SemanticValues chldsv; auto r = rule.parse(s, l, chldsv, c); - if (r.ret) { + if (r.ret && !outer_->ignore) { assert(!outer_->actions.empty()); auto id = r.choice + 1; @@ -1011,7 +1021,7 @@ private: void make_grammar() { // Setup PEG syntax parser g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]); - g["Definition"] <= seq(g["Identifier"], g["LEFTARROW"], g["Expression"]); + g["Definition"] <= seq(opt(g["IGNORE"]), g["Identifier"], g["LEFTARROW"], g["Expression"]); g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"]))); g["Sequence"] <= zom(g["Prefix"]); @@ -1031,7 +1041,7 @@ private: g["Literal"] <= cho(seq(cls("'"), anc(zom(seq(npd(cls("'")), g["Char"]))), cls("'"), g["Spacing"]), seq(cls("\""), anc(zom(seq(npd(cls("\"")), g["Char"]))), cls("\""), g["Spacing"])); - g["Class"] <= seq(chr('['), anc(zom(seq(npd(chr(']')), g["Range"]))), chr(']'), g["Spacing"]); + g["Class"] <= seq(chr('['), anc(zom(seq(npd(chr(']')), g["Range"]))), chr(']'), g["Spacing"]); g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]); g["Char"] <= cho(seq(chr('\\'), cls("nrt'\"[]\\")), @@ -1040,7 +1050,7 @@ private: seq(npd(chr('\\')), dot())); g["LEFTARROW"] <= seq(lit("<-"), g["Spacing"]); - g["SLASH"] <= seq(chr('/'), g["Spacing"]); + ~g["SLASH"] <= seq(chr('/'), g["Spacing"]); g["AND"] <= seq(chr('&'), g["Spacing"]); g["NOT"] <= seq(chr('!'), g["Spacing"]); g["QUESTION"] <= seq(chr('?'), g["Spacing"]); @@ -1062,6 +1072,10 @@ private: g["BeginCap"] <= seq(lit("$<"), g["Spacing"]); g["EndCap"] <= seq(lit(">"), g["Spacing"]); + g["IGNORE"] <= chr('~'); + + g["Action"] <= seq(chr('{'), anc(zom(npd(chr('}')))), chr('}'), g["Spacing"]); + // Set definition names for (auto& x: g) { x.second.name = x.first; @@ -1072,9 +1086,16 @@ private: g["Definition"] = [&](const std::vector& v, any& c) { Context& cxt = *c.get(); - const auto& name = v[0].get(); - (*cxt.grammar)[name] <= v[2].get>(); - (*cxt.grammar)[name].name = name; + auto ignore = (v.size() == 4); + auto baseId = ignore ? 1 : 0; + + const auto& name = v[baseId].get(); + auto ope = v[baseId + 2].get>(); + + auto& def = (*cxt.grammar)[name]; + def <= ope; + def.name = name; + def.ignore = ignore; if (cxt.start.empty()) { cxt.start = name; @@ -1087,9 +1108,7 @@ private: } else { std::vector> opes; for (auto i = 0u; i < v.size(); i++) { - if (!(i % 2)) { - opes.push_back(v[i].get>()); - } + opes.push_back(v[i].get>()); } const std::shared_ptr ope = std::make_shared(opes); return ope; diff --git a/test/test.cc b/test/test.cc index 5a6fd29..ada8c7c 100644 --- a/test/test.cc +++ b/test/test.cc @@ -76,7 +76,7 @@ TEST_CASE("String capture test2", "[general]") TEST_CASE("String capture test3", "[general]") { - auto syntax = + auto syntax = " ROOT <- _ TOKEN* " " TOKEN <- '[' < (!']' .)+ > ']' _ " " _ <- [ \t\r\n]* " @@ -86,8 +86,8 @@ TEST_CASE("String capture test3", "[general]") std::vector tags; - pg["TOKEN"] = [&](const char* s, size_t l, const vector& v) { - tags.push_back(std::string(s, l)); + pg["TOKEN"] = [&](const char* s, size_t l) { + tags.push_back(std::string(s, l)); }; auto ret = pg.parse(" [tag1] [tag:2] [tag-3] "); @@ -147,6 +147,23 @@ TEST_CASE("Lambda action test", "[general]") REQUIRE(ss == "hello"); } +TEST_CASE("Skip token test", "[general]") +{ + peglib::peg parser( + " ROOT <- _ ITEM (',' _ ITEM _)* " + " ITEM <- ([a-z])+ " + " ~_ <- [ \t]* " + ); + + parser["ROOT"] = [&](const vector& v) { + REQUIRE(v.size() == 2); + }; + + auto ret = parser.parse(" item1, item2 "); + + REQUIRE(ret == true); +} + TEST_CASE("Backtracking test", "[general]") { peg parser(