mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2025-01-22 13:25:30 +00:00
Removed the simple interface
This commit is contained in:
parent
485a647d2b
commit
ca950520ef
85
README.md
85
README.md
@ -269,91 +269,6 @@ if (parser.parse("...", ast)) {
|
||||
|
||||
See actual usages in the [AST calculator example](https://github.com/yhirose/cpp-peglib/blob/master/example/calc3.cc) and [PL/0 language example](https://github.com/yhirose/cpp-peglib/blob/master/pl0/pl0.cc).
|
||||
|
||||
Simple interface
|
||||
----------------
|
||||
|
||||
*cpp-peglib* provides std::regex-like simple interface for trivial tasks.
|
||||
|
||||
`peg::peg_match` tries to capture strings in the `$< ... >` operator and store them into `peg::match` object.
|
||||
|
||||
```cpp
|
||||
peg::match m;
|
||||
|
||||
auto ret = peg::peg_match(
|
||||
R"(
|
||||
ROOT <- _ ('[' $< TAG_NAME > ']' _)*
|
||||
TAG_NAME <- (!']' .)+
|
||||
_ <- [ \t]*
|
||||
)",
|
||||
" [tag1] [tag:2] [tag-3] ",
|
||||
m);
|
||||
|
||||
assert(ret == true);
|
||||
assert(m.size() == 4);
|
||||
assert(m.str(1) == "tag1");
|
||||
assert(m.str(2) == "tag:2");
|
||||
assert(m.str(3) == "tag-3");
|
||||
```
|
||||
|
||||
It also supports named capture with the `$name<` ... `>` operator.
|
||||
|
||||
```cpp
|
||||
peg::match m;
|
||||
|
||||
auto ret = peg::peg_match(
|
||||
R"(
|
||||
ROOT <- _ ('[' $test< TAG_NAME > ']' _)*
|
||||
TAG_NAME <- (!']' .)+
|
||||
_ <- [ \t]*
|
||||
)",
|
||||
" [tag1] [tag:2] [tag-3] ",
|
||||
m);
|
||||
|
||||
auto cap = m.named_capture("test");
|
||||
|
||||
REQUIRE(ret == true);
|
||||
REQUIRE(m.size() == 4);
|
||||
REQUIRE(cap.size() == 3);
|
||||
REQUIRE(m.str(cap[2]) == "tag-3");
|
||||
```
|
||||
|
||||
There are some ways to *search* a peg pattern in a document.
|
||||
|
||||
```cpp
|
||||
using namespace peg;
|
||||
|
||||
auto syntax = R"(
|
||||
ROOT <- '[' $< [a-z0-9]+ > ']'
|
||||
)";
|
||||
|
||||
auto s = " [tag1] [tag2] [tag3] ";
|
||||
|
||||
// peg::peg_search
|
||||
parser pg(syntax);
|
||||
size_t pos = 0;
|
||||
auto n = strlen(s);
|
||||
match m;
|
||||
while (peg_search(pg, s + pos, n - pos, m)) {
|
||||
cout << m.str() << endl; // entire match
|
||||
cout << m.str(1) << endl; // submatch #1
|
||||
pos += m.length();
|
||||
}
|
||||
|
||||
// peg::peg_token_iterator
|
||||
peg_token_iterator it(syntax, s);
|
||||
while (it != peg_token_iterator()) {
|
||||
cout << it->str() << endl; // entire match
|
||||
cout << it->str(1) << endl; // submatch #1
|
||||
++it;
|
||||
}
|
||||
|
||||
// peg::peg_token_range
|
||||
for (auto& m: peg_token_range(syntax, s)) {
|
||||
cout << m.str() << endl; // entire match
|
||||
cout << m.str(1) << endl; // submatch #1
|
||||
}
|
||||
```
|
||||
|
||||
Make a parser with parser combinators
|
||||
-------------------------------------
|
||||
|
||||
|
265
peglib.h
265
peglib.h
@ -1,7 +1,7 @@
|
||||
//
|
||||
// peglib.h
|
||||
//
|
||||
// Copyright (c) 2015-17 Yuji Hirose. All rights reserved.
|
||||
// Copyright (c) 2015-18 Yuji Hirose. All rights reserved.
|
||||
// MIT License
|
||||
//
|
||||
|
||||
@ -1608,10 +1608,6 @@ inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope>& ope, MatchAction ma,
|
||||
return std::make_shared<Capture>(ope, ma, n, s);
|
||||
}
|
||||
|
||||
inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope>& ope, MatchAction ma) {
|
||||
return std::make_shared<Capture>(ope, ma, static_cast<size_t>(-1), std::string());
|
||||
}
|
||||
|
||||
inline std::shared_ptr<Ope> tok(const std::shared_ptr<Ope>& ope) {
|
||||
return std::make_shared<TokenBoundary>(ope);
|
||||
}
|
||||
@ -2336,14 +2332,7 @@ public:
|
||||
}
|
||||
|
||||
bool load_grammar(const char* s, size_t n) {
|
||||
grammar_ = ParserGenerator::parse(
|
||||
s, n,
|
||||
start_,
|
||||
[&](const char* a_s, size_t a_n, size_t a_id, const std::string& a_name) {
|
||||
if (match_action) match_action(a_s, a_n, a_id, a_name);
|
||||
},
|
||||
log);
|
||||
|
||||
grammar_ = ParserGenerator::parse(s, n, start_, match_action, log);
|
||||
return grammar_ != nullptr;
|
||||
}
|
||||
|
||||
@ -2511,256 +2500,6 @@ private:
|
||||
std::string start_;
|
||||
};
|
||||
|
||||
/*-----------------------------------------------------------------------------
|
||||
* Simple interface
|
||||
*---------------------------------------------------------------------------*/
|
||||
|
||||
struct match
|
||||
{
|
||||
struct Item {
|
||||
const char* s;
|
||||
size_t n;
|
||||
size_t id;
|
||||
std::string name;
|
||||
|
||||
size_t length() const { return n; }
|
||||
std::string str() const { return std::string(s, n); }
|
||||
};
|
||||
|
||||
std::vector<Item> matches;
|
||||
|
||||
typedef std::vector<Item>::iterator iterator;
|
||||
typedef std::vector<Item>::const_iterator const_iterator;
|
||||
|
||||
bool empty() const {
|
||||
return matches.empty();
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return matches.size();
|
||||
}
|
||||
|
||||
size_t length(size_t n = 0) {
|
||||
return matches[n].length();
|
||||
}
|
||||
|
||||
std::string str(size_t n = 0) const {
|
||||
return matches[n].str();
|
||||
}
|
||||
|
||||
const Item& operator[](size_t n) const {
|
||||
return matches[n];
|
||||
}
|
||||
|
||||
iterator begin() {
|
||||
return matches.begin();
|
||||
}
|
||||
|
||||
iterator end() {
|
||||
return matches.end();
|
||||
}
|
||||
|
||||
const_iterator begin() const {
|
||||
return matches.cbegin();
|
||||
}
|
||||
|
||||
const_iterator end() const {
|
||||
return matches.cend();
|
||||
}
|
||||
|
||||
std::vector<size_t> named_capture(const std::string& name) const {
|
||||
std::vector<size_t> ret;
|
||||
for (auto i = 0u; i < matches.size(); i++) {
|
||||
if (matches[i].name == name) {
|
||||
ret.push_back(i);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::map<std::string, std::vector<size_t>> named_captures() const {
|
||||
std::map<std::string, std::vector<size_t>> ret;
|
||||
for (auto i = 0u; i < matches.size(); i++) {
|
||||
ret[matches[i].name].push_back(i);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<size_t> indexed_capture(size_t id) const {
|
||||
std::vector<size_t> ret;
|
||||
for (auto i = 0u; i < matches.size(); i++) {
|
||||
if (matches[i].id == id) {
|
||||
ret.push_back(i);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::map<size_t, std::vector<size_t>> indexed_captures() const {
|
||||
std::map<size_t, std::vector<size_t>> ret;
|
||||
for (auto i = 0u; i < matches.size(); i++) {
|
||||
ret[matches[i].id].push_back(i);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
inline bool peg_match(const char* syntax, const char* s, match& m) {
|
||||
m.matches.clear();
|
||||
|
||||
parser pg(syntax);
|
||||
pg.match_action = [&](const char* a_s, size_t a_n, size_t a_id, const std::string& a_name) {
|
||||
m.matches.push_back(match::Item{ a_s, a_n, a_id, a_name });
|
||||
};
|
||||
|
||||
auto ret = pg.parse(s);
|
||||
if (ret) {
|
||||
auto n = strlen(s);
|
||||
m.matches.insert(m.matches.begin(), match::Item{ s, n, 0, std::string() });
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline bool peg_match(const char* syntax, const char* s) {
|
||||
parser parser(syntax);
|
||||
return parser.parse(s);
|
||||
}
|
||||
|
||||
inline bool peg_search(parser& pg, const char* s, size_t n, match& m) {
|
||||
m.matches.clear();
|
||||
|
||||
pg.match_action = [&](const char* a_s, size_t a_n, size_t a_id, const std::string& a_name) {
|
||||
m.matches.push_back(match::Item{ a_s, a_n, a_id, a_name });
|
||||
};
|
||||
|
||||
size_t mpos, mlen;
|
||||
auto ret = pg.search(s, n, mpos, mlen);
|
||||
if (ret) {
|
||||
m.matches.insert(m.matches.begin(), match::Item{ s + mpos, mlen, 0, std::string() });
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool peg_search(parser& pg, const char* s, match& m) {
|
||||
auto n = strlen(s);
|
||||
return peg_search(pg, s, n, m);
|
||||
}
|
||||
|
||||
inline bool peg_search(const char* syntax, const char* s, size_t n, match& m) {
|
||||
parser pg(syntax);
|
||||
return peg_search(pg, s, n, m);
|
||||
}
|
||||
|
||||
inline bool peg_search(const char* syntax, const char* s, match& m) {
|
||||
parser pg(syntax);
|
||||
auto n = strlen(s);
|
||||
return peg_search(pg, s, n, m);
|
||||
}
|
||||
|
||||
class peg_token_iterator : public std::iterator<std::forward_iterator_tag, match>
|
||||
{
|
||||
public:
|
||||
peg_token_iterator()
|
||||
: s_(nullptr)
|
||||
, l_(0)
|
||||
, pos_((std::numeric_limits<size_t>::max)()) {}
|
||||
|
||||
peg_token_iterator(const char* syntax, const char* s)
|
||||
: peg_(syntax)
|
||||
, s_(s)
|
||||
, l_(strlen(s))
|
||||
, pos_(0) {
|
||||
peg_.match_action = [&](const char* a_s, size_t a_n, size_t a_id, const std::string& a_name) {
|
||||
m_.matches.push_back(match::Item{ a_s, a_n, a_id, a_name });
|
||||
};
|
||||
search();
|
||||
}
|
||||
|
||||
peg_token_iterator(const peg_token_iterator& rhs)
|
||||
: peg_(rhs.peg_)
|
||||
, s_(rhs.s_)
|
||||
, l_(rhs.l_)
|
||||
, pos_(rhs.pos_)
|
||||
, m_(rhs.m_) {}
|
||||
|
||||
peg_token_iterator& operator++() {
|
||||
search();
|
||||
return *this;
|
||||
}
|
||||
|
||||
peg_token_iterator operator++(int) {
|
||||
auto it = *this;
|
||||
search();
|
||||
return it;
|
||||
}
|
||||
|
||||
match& operator*() {
|
||||
return m_;
|
||||
}
|
||||
|
||||
match* operator->() {
|
||||
return &m_;
|
||||
}
|
||||
|
||||
bool operator==(const peg_token_iterator& rhs) {
|
||||
return pos_ == rhs.pos_;
|
||||
}
|
||||
|
||||
bool operator!=(const peg_token_iterator& rhs) {
|
||||
return pos_ != rhs.pos_;
|
||||
}
|
||||
|
||||
private:
|
||||
void search() {
|
||||
m_.matches.clear();
|
||||
size_t mpos, mlen;
|
||||
if (peg_.search(s_ + pos_, l_ - pos_, mpos, mlen)) {
|
||||
m_.matches.insert(m_.matches.begin(), match::Item{ s_ + mpos, mlen, 0, std::string() });
|
||||
pos_ += mpos + mlen;
|
||||
} else {
|
||||
pos_ = (std::numeric_limits<size_t>::max)();
|
||||
}
|
||||
}
|
||||
|
||||
parser peg_;
|
||||
const char* s_;
|
||||
size_t l_;
|
||||
size_t pos_;
|
||||
match m_;
|
||||
};
|
||||
|
||||
struct peg_token_range {
|
||||
typedef peg_token_iterator iterator;
|
||||
typedef const peg_token_iterator const_iterator;
|
||||
|
||||
peg_token_range(const char* syntax, const char* s)
|
||||
: beg_iter(peg_token_iterator(syntax, s))
|
||||
, end_iter() {}
|
||||
|
||||
iterator begin() {
|
||||
return beg_iter;
|
||||
}
|
||||
|
||||
iterator end() {
|
||||
return end_iter;
|
||||
}
|
||||
|
||||
const_iterator cbegin() const {
|
||||
return beg_iter;
|
||||
}
|
||||
|
||||
const_iterator cend() const {
|
||||
return end_iter;
|
||||
}
|
||||
|
||||
private:
|
||||
peg_token_iterator beg_iter;
|
||||
peg_token_iterator end_iter;
|
||||
};
|
||||
|
||||
} // namespace peg
|
||||
|
||||
#endif
|
||||
|
61
test/test.cc
61
test/test.cc
@ -59,23 +59,6 @@ TEST_CASE("String capture test", "[general]")
|
||||
REQUIRE(tags[2] == "tag-3");
|
||||
}
|
||||
|
||||
TEST_CASE("String capture test with match", "[general]")
|
||||
{
|
||||
peg::match m;
|
||||
auto ret = peg::peg_match(
|
||||
" ROOT <- _ ('[' $< TAG_NAME > ']' _)* "
|
||||
" TAG_NAME <- (!']' .)+ "
|
||||
" _ <- [ \t]* ",
|
||||
" [tag1] [tag:2] [tag-3] ",
|
||||
m);
|
||||
|
||||
REQUIRE(ret == true);
|
||||
REQUIRE(m.size() == 4);
|
||||
REQUIRE(m.str(1) == "tag1");
|
||||
REQUIRE(m.str(2) == "tag:2");
|
||||
REQUIRE(m.str(3) == "tag-3");
|
||||
}
|
||||
|
||||
using namespace peg;
|
||||
using namespace std;
|
||||
|
||||
@ -123,50 +106,6 @@ TEST_CASE("String capture test3", "[general]")
|
||||
REQUIRE(tags[2] == "tag-3");
|
||||
}
|
||||
|
||||
TEST_CASE("Named capture test", "[general]")
|
||||
{
|
||||
peg::match m;
|
||||
|
||||
auto ret = peg::peg_match(
|
||||
" ROOT <- _ ('[' $test< TAG_NAME > ']' _)* "
|
||||
" TAG_NAME <- (!']' .)+ "
|
||||
" _ <- [ \t]* ",
|
||||
" [tag1] [tag:2] [tag-3] ",
|
||||
m);
|
||||
|
||||
auto cap = m.named_capture("test");
|
||||
|
||||
REQUIRE(ret == true);
|
||||
REQUIRE(m.size() == 4);
|
||||
REQUIRE(cap.size() == 3);
|
||||
REQUIRE(m.str(cap[2]) == "tag-3");
|
||||
}
|
||||
|
||||
TEST_CASE("String capture test with embedded match action", "[general]")
|
||||
{
|
||||
Definition ROOT, TAG, TAG_NAME, WS;
|
||||
|
||||
vector<string> tags;
|
||||
|
||||
ROOT <= seq(WS, zom(TAG));
|
||||
TAG <= seq(chr('['),
|
||||
cap(TAG_NAME, [&](const char* s, size_t n, size_t /*id*/, const std::string& /*name*/) {
|
||||
tags.push_back(string(s, n));
|
||||
}),
|
||||
chr(']'),
|
||||
WS);
|
||||
TAG_NAME <= oom(seq(npd(chr(']')), dot()));
|
||||
WS <= zom(cls(" \t"));
|
||||
|
||||
auto r = ROOT.parse(" [tag1] [tag:2] [tag-3] ");
|
||||
|
||||
REQUIRE(r.ret == true);
|
||||
REQUIRE(tags.size() == 3);
|
||||
REQUIRE(tags[0] == "tag1");
|
||||
REQUIRE(tags[1] == "tag:2");
|
||||
REQUIRE(tags[2] == "tag-3");
|
||||
}
|
||||
|
||||
TEST_CASE("Cyclic grammer test", "[general]")
|
||||
{
|
||||
Definition PARENT;
|
||||
|
Loading…
Reference in New Issue
Block a user