diff --git a/README.md b/README.md index 99aca81..4abe26f 100644 --- a/README.md +++ b/README.md @@ -269,91 +269,6 @@ if (parser.parse("...", ast)) { See actual usages in the [AST calculator example](https://github.com/yhirose/cpp-peglib/blob/master/example/calc3.cc) and [PL/0 language example](https://github.com/yhirose/cpp-peglib/blob/master/pl0/pl0.cc). -Simple interface ----------------- - -*cpp-peglib* provides std::regex-like simple interface for trivial tasks. - -`peg::peg_match` tries to capture strings in the `$< ... >` operator and store them into `peg::match` object. - -```cpp -peg::match m; - -auto ret = peg::peg_match( - R"( - ROOT <- _ ('[' $< TAG_NAME > ']' _)* - TAG_NAME <- (!']' .)+ - _ <- [ \t]* - )", - " [tag1] [tag:2] [tag-3] ", - m); - -assert(ret == true); -assert(m.size() == 4); -assert(m.str(1) == "tag1"); -assert(m.str(2) == "tag:2"); -assert(m.str(3) == "tag-3"); -``` - -It also supports named capture with the `$name<` ... `>` operator. - -```cpp -peg::match m; - -auto ret = peg::peg_match( - R"( - ROOT <- _ ('[' $test< TAG_NAME > ']' _)* - TAG_NAME <- (!']' .)+ - _ <- [ \t]* - )", - " [tag1] [tag:2] [tag-3] ", - m); - -auto cap = m.named_capture("test"); - -REQUIRE(ret == true); -REQUIRE(m.size() == 4); -REQUIRE(cap.size() == 3); -REQUIRE(m.str(cap[2]) == "tag-3"); -``` - -There are some ways to *search* a peg pattern in a document. - -```cpp -using namespace peg; - -auto syntax = R"( - ROOT <- '[' $< [a-z0-9]+ > ']' -)"; - -auto s = " [tag1] [tag2] [tag3] "; - -// peg::peg_search -parser pg(syntax); -size_t pos = 0; -auto n = strlen(s); -match m; -while (peg_search(pg, s + pos, n - pos, m)) { - cout << m.str() << endl; // entire match - cout << m.str(1) << endl; // submatch #1 - pos += m.length(); -} - -// peg::peg_token_iterator -peg_token_iterator it(syntax, s); -while (it != peg_token_iterator()) { - cout << it->str() << endl; // entire match - cout << it->str(1) << endl; // submatch #1 - ++it; -} - -// peg::peg_token_range -for (auto& m: peg_token_range(syntax, s)) { - cout << m.str() << endl; // entire match - cout << m.str(1) << endl; // submatch #1 -} -``` - Make a parser with parser combinators ------------------------------------- diff --git a/peglib.h b/peglib.h index 717b142..45650fc 100644 --- a/peglib.h +++ b/peglib.h @@ -1,7 +1,7 @@ // // peglib.h // -// Copyright (c) 2015-17 Yuji Hirose. All rights reserved. +// Copyright (c) 2015-18 Yuji Hirose. All rights reserved. // MIT License // @@ -1608,10 +1608,6 @@ inline std::shared_ptr cap(const std::shared_ptr& ope, MatchAction ma, return std::make_shared(ope, ma, n, s); } -inline std::shared_ptr cap(const std::shared_ptr& ope, MatchAction ma) { - return std::make_shared(ope, ma, static_cast(-1), std::string()); -} - inline std::shared_ptr tok(const std::shared_ptr& ope) { return std::make_shared(ope); } @@ -2336,14 +2332,7 @@ public: } bool load_grammar(const char* s, size_t n) { - grammar_ = ParserGenerator::parse( - s, n, - start_, - [&](const char* a_s, size_t a_n, size_t a_id, const std::string& a_name) { - if (match_action) match_action(a_s, a_n, a_id, a_name); - }, - log); - + grammar_ = ParserGenerator::parse(s, n, start_, match_action, log); return grammar_ != nullptr; } @@ -2511,256 +2500,6 @@ private: std::string start_; }; -/*----------------------------------------------------------------------------- - * Simple interface - *---------------------------------------------------------------------------*/ - -struct match -{ - struct Item { - const char* s; - size_t n; - size_t id; - std::string name; - - size_t length() const { return n; } - std::string str() const { return std::string(s, n); } - }; - - std::vector matches; - - typedef std::vector::iterator iterator; - typedef std::vector::const_iterator const_iterator; - - bool empty() const { - return matches.empty(); - } - - size_t size() const { - return matches.size(); - } - - size_t length(size_t n = 0) { - return matches[n].length(); - } - - std::string str(size_t n = 0) const { - return matches[n].str(); - } - - const Item& operator[](size_t n) const { - return matches[n]; - } - - iterator begin() { - return matches.begin(); - } - - iterator end() { - return matches.end(); - } - - const_iterator begin() const { - return matches.cbegin(); - } - - const_iterator end() const { - return matches.cend(); - } - - std::vector named_capture(const std::string& name) const { - std::vector ret; - for (auto i = 0u; i < matches.size(); i++) { - if (matches[i].name == name) { - ret.push_back(i); - } - } - return ret; - } - - std::map> named_captures() const { - std::map> ret; - for (auto i = 0u; i < matches.size(); i++) { - ret[matches[i].name].push_back(i); - } - return ret; - } - - std::vector indexed_capture(size_t id) const { - std::vector ret; - for (auto i = 0u; i < matches.size(); i++) { - if (matches[i].id == id) { - ret.push_back(i); - } - } - return ret; - } - - std::map> indexed_captures() const { - std::map> ret; - for (auto i = 0u; i < matches.size(); i++) { - ret[matches[i].id].push_back(i); - } - return ret; - } -}; - -inline bool peg_match(const char* syntax, const char* s, match& m) { - m.matches.clear(); - - parser pg(syntax); - pg.match_action = [&](const char* a_s, size_t a_n, size_t a_id, const std::string& a_name) { - m.matches.push_back(match::Item{ a_s, a_n, a_id, a_name }); - }; - - auto ret = pg.parse(s); - if (ret) { - auto n = strlen(s); - m.matches.insert(m.matches.begin(), match::Item{ s, n, 0, std::string() }); - } - - return ret; -} - -inline bool peg_match(const char* syntax, const char* s) { - parser parser(syntax); - return parser.parse(s); -} - -inline bool peg_search(parser& pg, const char* s, size_t n, match& m) { - m.matches.clear(); - - pg.match_action = [&](const char* a_s, size_t a_n, size_t a_id, const std::string& a_name) { - m.matches.push_back(match::Item{ a_s, a_n, a_id, a_name }); - }; - - size_t mpos, mlen; - auto ret = pg.search(s, n, mpos, mlen); - if (ret) { - m.matches.insert(m.matches.begin(), match::Item{ s + mpos, mlen, 0, std::string() }); - return true; - } - - return false; -} - -inline bool peg_search(parser& pg, const char* s, match& m) { - auto n = strlen(s); - return peg_search(pg, s, n, m); -} - -inline bool peg_search(const char* syntax, const char* s, size_t n, match& m) { - parser pg(syntax); - return peg_search(pg, s, n, m); -} - -inline bool peg_search(const char* syntax, const char* s, match& m) { - parser pg(syntax); - auto n = strlen(s); - return peg_search(pg, s, n, m); -} - -class peg_token_iterator : public std::iterator -{ -public: - peg_token_iterator() - : s_(nullptr) - , l_(0) - , pos_((std::numeric_limits::max)()) {} - - peg_token_iterator(const char* syntax, const char* s) - : peg_(syntax) - , s_(s) - , l_(strlen(s)) - , pos_(0) { - peg_.match_action = [&](const char* a_s, size_t a_n, size_t a_id, const std::string& a_name) { - m_.matches.push_back(match::Item{ a_s, a_n, a_id, a_name }); - }; - search(); - } - - peg_token_iterator(const peg_token_iterator& rhs) - : peg_(rhs.peg_) - , s_(rhs.s_) - , l_(rhs.l_) - , pos_(rhs.pos_) - , m_(rhs.m_) {} - - peg_token_iterator& operator++() { - search(); - return *this; - } - - peg_token_iterator operator++(int) { - auto it = *this; - search(); - return it; - } - - match& operator*() { - return m_; - } - - match* operator->() { - return &m_; - } - - bool operator==(const peg_token_iterator& rhs) { - return pos_ == rhs.pos_; - } - - bool operator!=(const peg_token_iterator& rhs) { - return pos_ != rhs.pos_; - } - -private: - void search() { - m_.matches.clear(); - size_t mpos, mlen; - if (peg_.search(s_ + pos_, l_ - pos_, mpos, mlen)) { - m_.matches.insert(m_.matches.begin(), match::Item{ s_ + mpos, mlen, 0, std::string() }); - pos_ += mpos + mlen; - } else { - pos_ = (std::numeric_limits::max)(); - } - } - - parser peg_; - const char* s_; - size_t l_; - size_t pos_; - match m_; -}; - -struct peg_token_range { - typedef peg_token_iterator iterator; - typedef const peg_token_iterator const_iterator; - - peg_token_range(const char* syntax, const char* s) - : beg_iter(peg_token_iterator(syntax, s)) - , end_iter() {} - - iterator begin() { - return beg_iter; - } - - iterator end() { - return end_iter; - } - - const_iterator cbegin() const { - return beg_iter; - } - - const_iterator cend() const { - return end_iter; - } - -private: - peg_token_iterator beg_iter; - peg_token_iterator end_iter; -}; - } // namespace peg #endif diff --git a/test/test.cc b/test/test.cc index 288a3ec..399d1fc 100644 --- a/test/test.cc +++ b/test/test.cc @@ -59,23 +59,6 @@ TEST_CASE("String capture test", "[general]") REQUIRE(tags[2] == "tag-3"); } -TEST_CASE("String capture test with match", "[general]") -{ - peg::match m; - auto ret = peg::peg_match( - " ROOT <- _ ('[' $< TAG_NAME > ']' _)* " - " TAG_NAME <- (!']' .)+ " - " _ <- [ \t]* ", - " [tag1] [tag:2] [tag-3] ", - m); - - REQUIRE(ret == true); - REQUIRE(m.size() == 4); - REQUIRE(m.str(1) == "tag1"); - REQUIRE(m.str(2) == "tag:2"); - REQUIRE(m.str(3) == "tag-3"); -} - using namespace peg; using namespace std; @@ -123,50 +106,6 @@ TEST_CASE("String capture test3", "[general]") REQUIRE(tags[2] == "tag-3"); } -TEST_CASE("Named capture test", "[general]") -{ - peg::match m; - - auto ret = peg::peg_match( - " ROOT <- _ ('[' $test< TAG_NAME > ']' _)* " - " TAG_NAME <- (!']' .)+ " - " _ <- [ \t]* ", - " [tag1] [tag:2] [tag-3] ", - m); - - auto cap = m.named_capture("test"); - - REQUIRE(ret == true); - REQUIRE(m.size() == 4); - REQUIRE(cap.size() == 3); - REQUIRE(m.str(cap[2]) == "tag-3"); -} - -TEST_CASE("String capture test with embedded match action", "[general]") -{ - Definition ROOT, TAG, TAG_NAME, WS; - - vector tags; - - ROOT <= seq(WS, zom(TAG)); - TAG <= seq(chr('['), - cap(TAG_NAME, [&](const char* s, size_t n, size_t /*id*/, const std::string& /*name*/) { - tags.push_back(string(s, n)); - }), - chr(']'), - WS); - TAG_NAME <= oom(seq(npd(chr(']')), dot())); - WS <= zom(cls(" \t")); - - auto r = ROOT.parse(" [tag1] [tag:2] [tag-3] "); - - REQUIRE(r.ret == true); - REQUIRE(tags.size() == 3); - REQUIRE(tags[0] == "tag1"); - REQUIRE(tags[1] == "tag:2"); - REQUIRE(tags[2] == "tag-3"); -} - TEST_CASE("Cyclic grammer test", "[general]") { Definition PARENT;