mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2024-12-22 11:55:30 +00:00
Changed the capture operator and made the anchor operator.
This commit is contained in:
parent
f0351a9909
commit
56daf08d5b
18
README.md
18
README.md
@ -7,7 +7,8 @@ C++11 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar)
|
|||||||
|
|
||||||
The PEG syntax is well described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf). *cpp-peglib* also supports the following additional syntax for now:
|
The PEG syntax is well described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf). *cpp-peglib* also supports the following additional syntax for now:
|
||||||
|
|
||||||
* `<` and `>` (Capture operators)
|
* `<` ... `>` (Anchor operators)
|
||||||
|
* `$<` ... `>` (Capture operators)
|
||||||
|
|
||||||
How to use
|
How to use
|
||||||
----------
|
----------
|
||||||
@ -88,7 +89,7 @@ Here is a complete list of available actions:
|
|||||||
|
|
||||||
`any& c` is a context data which can be used by the user for whatever purposes.
|
`any& c` is a context data which can be used by the user for whatever purposes.
|
||||||
|
|
||||||
The following example uses `<` and ` >` operators. They are the *capture* operators. Each capture operator creates a semantic value that contains `const char*` of the position. It could be useful to eliminate unnecessary characters.
|
The following example uses `<` and ` >` operators. They are the *anchor* operators. Each anchor operator creates a semantic value that contains `const char*` of the position. It could be useful to eliminate unnecessary characters.
|
||||||
|
|
||||||
```c++
|
```c++
|
||||||
auto syntax = R"(
|
auto syntax = R"(
|
||||||
@ -100,9 +101,8 @@ auto syntax = R"(
|
|||||||
peg pg(syntax);
|
peg pg(syntax);
|
||||||
|
|
||||||
pg["TOKEN"] = [](const char* s, size_t l, const vector<any>& v) {
|
pg["TOKEN"] = [](const char* s, size_t l, const vector<any>& v) {
|
||||||
auto b = v[0].get<const char*>(); // '<'
|
// 'token' doesn't include trailing whitespaces
|
||||||
auto e = v[1].get<const char*>(); // '>'
|
auto token = string(s, l);
|
||||||
auto token = string(b, e - b); // 'token' doesn't include trailing whitespaces
|
|
||||||
};
|
};
|
||||||
|
|
||||||
auto ret = pg.parse(" token1, token2 ");
|
auto ret = pg.parse(" token1, token2 ");
|
||||||
@ -113,13 +113,13 @@ Simple interface
|
|||||||
|
|
||||||
*cpp-peglib* provides std::regex-like simple interface for trivial tasks.
|
*cpp-peglib* provides std::regex-like simple interface for trivial tasks.
|
||||||
|
|
||||||
`peglib::peg_match` tries to capture strings in the `< ... >` operator and store them into `peglib::match` object.
|
`peglib::peg_match` tries to capture strings in the `$< ... >` operator and store them into `peglib::match` object.
|
||||||
|
|
||||||
```c++
|
```c++
|
||||||
peglib::match m;
|
peglib::match m;
|
||||||
auto ret = peglib::peg_match(
|
auto ret = peglib::peg_match(
|
||||||
R"(
|
R"(
|
||||||
ROOT <- _ ('[' < TAG_NAME > ']' _)*
|
ROOT <- _ ('[' $< TAG_NAME > ']' _)*
|
||||||
TAG_NAME <- (!']' .)+
|
TAG_NAME <- (!']' .)+
|
||||||
_ <- [ \t]*
|
_ <- [ \t]*
|
||||||
)",
|
)",
|
||||||
@ -139,7 +139,7 @@ There are some ways to *search* a peg pattern in a document.
|
|||||||
using namespace peglib;
|
using namespace peglib;
|
||||||
|
|
||||||
auto syntax = R"(
|
auto syntax = R"(
|
||||||
ROOT <- '[' < [a-z0-9]+ > ']'
|
ROOT <- '[' $< [a-z0-9]+ > ']'
|
||||||
)";
|
)";
|
||||||
|
|
||||||
auto s = " [tag1] [tag2] [tag3] ";
|
auto s = " [tag1] [tag2] [tag3] ";
|
||||||
@ -206,6 +206,8 @@ The following are available operators:
|
|||||||
| cls | Character class |
|
| cls | Character class |
|
||||||
| chr | Character |
|
| chr | Character |
|
||||||
| dot | Any character |
|
| dot | Any character |
|
||||||
|
| anc | Anchor character |
|
||||||
|
| cap | Capture character |
|
||||||
|
|
||||||
Sample codes
|
Sample codes
|
||||||
------------
|
------------
|
||||||
|
@ -27,11 +27,11 @@ int main(int argc, const char** argv)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
peglib::peg parser(syntax.data(), syntax.size(), [&](size_t ln, size_t col, const string& msg) {
|
peglib::peg peg(syntax.data(), syntax.size(), [&](size_t ln, size_t col, const string& msg) {
|
||||||
cerr << syntax_path << ":" << ln << ":" << col << ": " << msg << endl;
|
cerr << syntax_path << ":" << ln << ":" << col << ": " << msg << endl;
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!parser) {
|
if (!peg) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -48,10 +48,14 @@ int main(int argc, const char** argv)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto ret = parser.lint(source.data(), source.size(), true, [&](size_t ln, size_t col, const string& msg) {
|
auto ret = peg.lint(source.data(), source.size(), true, [&](size_t ln, size_t col, const string& msg) {
|
||||||
cerr << source_path << ":" << ln << ":" << col << ": " << msg << endl;
|
cerr << source_path << ":" << ln << ":" << col << ": " << msg << endl;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
peg.parse(source.data(), source.size());
|
||||||
|
}
|
||||||
|
|
||||||
return ret ? 0 : -1;
|
return ret ? 0 : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
162
peglib.h
162
peglib.h
@ -151,7 +151,15 @@ private:
|
|||||||
/*
|
/*
|
||||||
* Semantic values
|
* Semantic values
|
||||||
*/
|
*/
|
||||||
typedef std::vector<any> Values;
|
struct SemanticValues
|
||||||
|
{
|
||||||
|
std::vector<any> values;
|
||||||
|
//std::vector<std::string> names;
|
||||||
|
const char* s;
|
||||||
|
size_t l;
|
||||||
|
|
||||||
|
SemanticValues() : s(nullptr), l(0) {}
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Semantic action
|
* Semantic action
|
||||||
@ -366,7 +374,7 @@ class Ope
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
virtual ~Ope() {};
|
virtual ~Ope() {};
|
||||||
virtual Result parse(const char* s, size_t l, Values& v, any& c) const = 0;
|
virtual Result parse(const char* s, size_t l, SemanticValues& v, any& c) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Sequence : public Ope
|
class Sequence : public Ope
|
||||||
@ -393,7 +401,7 @@ public:
|
|||||||
Sequence(const std::vector<std::shared_ptr<Ope>>& opes) : opes_(opes) {}
|
Sequence(const std::vector<std::shared_ptr<Ope>>& opes) : opes_(opes) {}
|
||||||
Sequence(std::vector<std::shared_ptr<Ope>>&& opes) : opes_(std::move(opes)) {}
|
Sequence(std::vector<std::shared_ptr<Ope>>&& opes) : opes_(std::move(opes)) {}
|
||||||
|
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
for (const auto& ope : opes_) {
|
for (const auto& ope : opes_) {
|
||||||
const auto& rule = *ope;
|
const auto& rule = *ope;
|
||||||
@ -436,18 +444,20 @@ public:
|
|||||||
PrioritizedChoice(const std::vector<std::shared_ptr<Ope>>& opes) : opes_(opes) {}
|
PrioritizedChoice(const std::vector<std::shared_ptr<Ope>>& opes) : opes_(opes) {}
|
||||||
PrioritizedChoice(std::vector<std::shared_ptr<Ope>>&& opes) : opes_(std::move(opes)) {}
|
PrioritizedChoice(std::vector<std::shared_ptr<Ope>>&& opes) : opes_(std::move(opes)) {}
|
||||||
|
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
size_t id = 0;
|
size_t id = 0;
|
||||||
for (const auto& ope : opes_) {
|
for (const auto& ope : opes_) {
|
||||||
const auto& rule = *ope;
|
const auto& rule = *ope;
|
||||||
Values chldsv;
|
SemanticValues chldsv;
|
||||||
auto r = rule.parse(s, l, chldsv, c);
|
auto r = rule.parse(s, l, chldsv, c);
|
||||||
if (r.ret) {
|
if (r.ret) {
|
||||||
if (!chldsv.empty()) {
|
//assert(chldsv.values.size() == chldsv.names.size());
|
||||||
for (const auto& x: chldsv) {
|
if (!chldsv.values.empty()) {
|
||||||
v.push_back(x);
|
v.values.insert(v.values.end(), chldsv.values.begin(), chldsv.values.end());
|
||||||
}
|
//v.names.insert(v.names.end(), chldsv.names.begin(), chldsv.names.end());
|
||||||
}
|
}
|
||||||
|
v.s = chldsv.s;
|
||||||
|
v.l = chldsv.l;
|
||||||
return success(r.len, id);
|
return success(r.len, id);
|
||||||
}
|
}
|
||||||
id++;
|
id++;
|
||||||
@ -466,7 +476,7 @@ class ZeroOrMore : public Ope
|
|||||||
public:
|
public:
|
||||||
ZeroOrMore(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
ZeroOrMore(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
||||||
|
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
auto i = 0;
|
auto i = 0;
|
||||||
while (l - i > 0) {
|
while (l - i > 0) {
|
||||||
const auto& rule = *ope_;
|
const auto& rule = *ope_;
|
||||||
@ -488,7 +498,7 @@ class OneOrMore : public Ope
|
|||||||
public:
|
public:
|
||||||
OneOrMore(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
OneOrMore(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
||||||
|
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
const auto& rule = *ope_;
|
const auto& rule = *ope_;
|
||||||
auto r = rule.parse(s, l, v, c);
|
auto r = rule.parse(s, l, v, c);
|
||||||
if (!r.ret) {
|
if (!r.ret) {
|
||||||
@ -519,7 +529,7 @@ class Option : public Ope
|
|||||||
public:
|
public:
|
||||||
Option(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
Option(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
||||||
|
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
const auto& rule = *ope_;
|
const auto& rule = *ope_;
|
||||||
auto r = rule.parse(s, l, v, c);
|
auto r = rule.parse(s, l, v, c);
|
||||||
return success(r.ret ? r.len : 0);
|
return success(r.ret ? r.len : 0);
|
||||||
@ -534,7 +544,7 @@ class AndPredicate : public Ope
|
|||||||
public:
|
public:
|
||||||
AndPredicate(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
AndPredicate(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
||||||
|
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
const auto& rule = *ope_;
|
const auto& rule = *ope_;
|
||||||
auto r = rule.parse(s, l, v, c);
|
auto r = rule.parse(s, l, v, c);
|
||||||
if (r.ret) {
|
if (r.ret) {
|
||||||
@ -553,7 +563,7 @@ class NotPredicate : public Ope
|
|||||||
public:
|
public:
|
||||||
NotPredicate(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
NotPredicate(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
||||||
|
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
const auto& rule = *ope_;
|
const auto& rule = *ope_;
|
||||||
auto r = rule.parse(s, l, v, c);
|
auto r = rule.parse(s, l, v, c);
|
||||||
if (r.ret) {
|
if (r.ret) {
|
||||||
@ -572,7 +582,7 @@ class LiteralString : public Ope
|
|||||||
public:
|
public:
|
||||||
LiteralString(const std::string& s) : lit_(s) {}
|
LiteralString(const std::string& s) : lit_(s) {}
|
||||||
|
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
auto i = 0u;
|
auto i = 0u;
|
||||||
for (; i < lit_.size(); i++) {
|
for (; i < lit_.size(); i++) {
|
||||||
if (i >= l || s[i] != lit_[i]) {
|
if (i >= l || s[i] != lit_[i]) {
|
||||||
@ -591,7 +601,7 @@ class CharacterClass : public Ope
|
|||||||
public:
|
public:
|
||||||
CharacterClass(const std::string& chars) : chars_(chars) {}
|
CharacterClass(const std::string& chars) : chars_(chars) {}
|
||||||
|
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
// TODO: UTF8 support
|
// TODO: UTF8 support
|
||||||
if (l < 1) {
|
if (l < 1) {
|
||||||
return fail(s);
|
return fail(s);
|
||||||
@ -623,7 +633,7 @@ class Character : public Ope
|
|||||||
public:
|
public:
|
||||||
Character(char ch) : ch_(ch) {}
|
Character(char ch) : ch_(ch) {}
|
||||||
|
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
// TODO: UTF8 support
|
// TODO: UTF8 support
|
||||||
if (l < 1 || s[0] != ch_) {
|
if (l < 1 || s[0] != ch_) {
|
||||||
return fail(s);
|
return fail(s);
|
||||||
@ -638,7 +648,7 @@ private:
|
|||||||
class AnyCharacter : public Ope
|
class AnyCharacter : public Ope
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
// TODO: UTF8 support
|
// TODO: UTF8 support
|
||||||
if (l < 1) {
|
if (l < 1) {
|
||||||
return fail(s);
|
return fail(s);
|
||||||
@ -651,11 +661,10 @@ public:
|
|||||||
class Capture : public Ope
|
class Capture : public Ope
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
Capture(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
|
||||||
Capture(const std::shared_ptr<Ope>& ope, MatchAction ma, size_t ci)
|
Capture(const std::shared_ptr<Ope>& ope, MatchAction ma, size_t ci)
|
||||||
: ope_(ope), match_action_(ma), capture_id(ci) {}
|
: ope_(ope), match_action_(ma), capture_id(ci) {}
|
||||||
|
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
assert(ope_);
|
assert(ope_);
|
||||||
const auto& rule = *ope_;
|
const auto& rule = *ope_;
|
||||||
auto r = rule.parse(s, l, v, c);
|
auto r = rule.parse(s, l, v, c);
|
||||||
@ -674,10 +683,21 @@ private:
|
|||||||
class Anchor : public Ope
|
class Anchor : public Ope
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Anchor(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
||||||
return success(0);
|
|
||||||
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
|
assert(ope_);
|
||||||
|
const auto& rule = *ope_;
|
||||||
|
auto r = rule.parse(s, l, v, c);
|
||||||
|
if (r.ret) {
|
||||||
|
v.s = s;
|
||||||
|
v.l = r.len;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::shared_ptr<Ope> ope_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class WeakHolder : public Ope
|
class WeakHolder : public Ope
|
||||||
@ -685,7 +705,7 @@ class WeakHolder : public Ope
|
|||||||
public:
|
public:
|
||||||
WeakHolder(const std::shared_ptr<Ope>& ope) : weak_(ope) {}
|
WeakHolder(const std::shared_ptr<Ope>& ope) : weak_(ope) {}
|
||||||
|
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
auto ope = weak_.lock();
|
auto ope = weak_.lock();
|
||||||
assert(ope);
|
assert(ope);
|
||||||
const auto& rule = *ope;
|
const auto& rule = *ope;
|
||||||
@ -738,17 +758,17 @@ public:
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
return holder_->parse(s, l, v, c);
|
return holder_->parse(s, l, v, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
Result parse(const char* s, size_t l, T& val) const {
|
Result parse(const char* s, size_t l, T& val) const {
|
||||||
Values v;
|
SemanticValues v;
|
||||||
any c;
|
any c;
|
||||||
auto r = holder_->parse(s, l, v, c);
|
auto r = holder_->parse(s, l, v, c);
|
||||||
if (r.ret && !v.empty() && !v.front().is_undefined()) {
|
if (r.ret && !v.values.empty() && !v.values.front().is_undefined()) {
|
||||||
val = v[0].get<T>();
|
val = v.values[0].get<T>();
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
@ -761,7 +781,7 @@ public:
|
|||||||
|
|
||||||
Result parse(const char* s) const {
|
Result parse(const char* s) const {
|
||||||
auto l = strlen(s);
|
auto l = strlen(s);
|
||||||
Values v;
|
SemanticValues v;
|
||||||
any c;
|
any c;
|
||||||
return holder_->parse(s, l, v, c);
|
return holder_->parse(s, l, v, c);
|
||||||
}
|
}
|
||||||
@ -795,13 +815,13 @@ private:
|
|||||||
Holder(Definition* outer)
|
Holder(Definition* outer)
|
||||||
: outer_(outer) {}
|
: outer_(outer) {}
|
||||||
|
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
if (!ope_) {
|
if (!ope_) {
|
||||||
throw std::logic_error("Uninitialized definition ope was used...");
|
throw std::logic_error("Uninitialized definition ope was used...");
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& rule = *ope_;
|
const auto& rule = *ope_;
|
||||||
Values chldsv;
|
SemanticValues chldsv;
|
||||||
auto r = rule.parse(s, l, chldsv, c);
|
auto r = rule.parse(s, l, chldsv, c);
|
||||||
if (r.ret) {
|
if (r.ret) {
|
||||||
assert(!outer_->actions.empty());
|
assert(!outer_->actions.empty());
|
||||||
@ -811,7 +831,12 @@ private:
|
|||||||
? outer_->actions[id]
|
? outer_->actions[id]
|
||||||
: outer_->actions[0];
|
: outer_->actions[0];
|
||||||
|
|
||||||
v.push_back(reduce(s, r.len, chldsv, c, ac));
|
auto ts = chldsv.s ? chldsv.s : s;
|
||||||
|
auto tl = chldsv.s ? chldsv.l : r.len;
|
||||||
|
auto sv = reduce(ts, tl, chldsv, c, ac);
|
||||||
|
|
||||||
|
v.values.push_back(sv);
|
||||||
|
//v.names.push_back(outer_->name);
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
@ -819,13 +844,13 @@ private:
|
|||||||
private:
|
private:
|
||||||
friend class Definition;
|
friend class Definition;
|
||||||
|
|
||||||
any reduce(const char* s, size_t l, const Values& v, any& c, const Action& action) const {
|
any reduce(const char* s, size_t l, const SemanticValues& v, any& c, const Action& action) const {
|
||||||
if (action) {
|
if (action) {
|
||||||
return action(s, l, v, c);
|
return action(s, l, v.values, c);
|
||||||
} else if (v.empty()) {
|
} else if (v.values.empty()) {
|
||||||
return any();
|
return any();
|
||||||
} else {
|
} else {
|
||||||
return v.front();
|
return v.values.front();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -847,7 +872,7 @@ public:
|
|||||||
: grammar_(grammar)
|
: grammar_(grammar)
|
||||||
, name_(name) {}
|
, name_(name) {}
|
||||||
|
|
||||||
Result parse(const char* s, size_t l, Values& v, any& c) const {
|
Result parse(const char* s, size_t l, SemanticValues& v, any& c) const {
|
||||||
const auto& rule = *grammar_.at(name_).holder_;
|
const auto& rule = *grammar_.at(name_).holder_;
|
||||||
return rule.parse(s, l, v, c);
|
return rule.parse(s, l, v, c);
|
||||||
}
|
}
|
||||||
@ -916,8 +941,8 @@ inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope>& ope, MatchAction ma)
|
|||||||
return std::make_shared<Capture>(ope, ma, (size_t)-1);
|
return std::make_shared<Capture>(ope, ma, (size_t)-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::shared_ptr<Ope> anc() {
|
inline std::shared_ptr<Ope> anc(const std::shared_ptr<Ope>& ope) {
|
||||||
return std::make_shared<Anchor>();
|
return std::make_shared<Anchor>(ope);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::shared_ptr<Ope> ref(const std::map<std::string, Definition>& grammar, const std::string& name) {
|
inline std::shared_ptr<Ope> ref(const std::map<std::string, Definition>& grammar, const std::string& name) {
|
||||||
@ -994,7 +1019,8 @@ private:
|
|||||||
g["Suffix"] <= seq(g["Primary"], opt(cho(g["QUESTION"], g["STAR"], g["PLUS"])));
|
g["Suffix"] <= seq(g["Primary"], opt(cho(g["QUESTION"], g["STAR"], g["PLUS"])));
|
||||||
g["Primary"] <= cho(seq(g["Identifier"], npd(g["LEFTARROW"])),
|
g["Primary"] <= cho(seq(g["Identifier"], npd(g["LEFTARROW"])),
|
||||||
seq(g["OPEN"], g["Expression"], g["CLOSE"]),
|
seq(g["OPEN"], g["Expression"], g["CLOSE"]),
|
||||||
seq(g["CAPTUREOPEN"], g["Expression"], g["CAPTURECLOSE"]),
|
seq(g["Begin"], g["Expression"], g["End"]),
|
||||||
|
seq(g["BeginCap"], g["Expression"], g["EndCap"]),
|
||||||
g["Literal"], g["Class"], g["DOT"]);
|
g["Literal"], g["Class"], g["DOT"]);
|
||||||
|
|
||||||
g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
|
g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
|
||||||
@ -1002,13 +1028,10 @@ private:
|
|||||||
g["IdentStart"] <= cls("a-zA-Z_");
|
g["IdentStart"] <= cls("a-zA-Z_");
|
||||||
g["IdentRest"] <= cho(g["IdentStart"], cls("0-9"));
|
g["IdentRest"] <= cho(g["IdentStart"], cls("0-9"));
|
||||||
|
|
||||||
g["Literal"] <= cho(seq(cls("'"), g["SQCont"], cls("'"), g["Spacing"]),
|
g["Literal"] <= cho(seq(cls("'"), anc(zom(seq(npd(cls("'")), g["Char"]))), cls("'"), g["Spacing"]),
|
||||||
seq(cls("\""), g["DQCont"], cls("\""), g["Spacing"]));
|
seq(cls("\""), anc(zom(seq(npd(cls("\"")), g["Char"]))), cls("\""), g["Spacing"]));
|
||||||
g["SQCont"] <= zom(seq(npd(cls("'")), g["Char"]));
|
|
||||||
g["DQCont"] <= zom(seq(npd(cls("\"")), g["Char"]));
|
|
||||||
|
|
||||||
g["Class"] <= seq(chr('['), g["ClassCont"], chr(']'), g["Spacing"]);
|
g["Class"] <= seq(chr('['), anc(zom(seq(npd(chr(']')), g["Range"]))), chr(']'), g["Spacing"]);
|
||||||
g["ClassCont"] <= zom(seq(npd(chr(']')), g["Range"]));
|
|
||||||
|
|
||||||
g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]);
|
g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]);
|
||||||
g["Char"] <= cho(seq(chr('\\'), cls("nrt'\"[]\\")),
|
g["Char"] <= cho(seq(chr('\\'), cls("nrt'\"[]\\")),
|
||||||
@ -1033,8 +1056,11 @@ private:
|
|||||||
g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r'));
|
g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r'));
|
||||||
g["EndOfFile"] <= npd(dot());
|
g["EndOfFile"] <= npd(dot());
|
||||||
|
|
||||||
g["CAPTUREOPEN"] <= seq(chr('<'), g["Spacing"]);
|
g["Begin"] <= seq(chr('<'), g["Spacing"]);
|
||||||
g["CAPTURECLOSE"] <= seq(chr('>'), g["Spacing"]);
|
g["End"] <= seq(chr('>'), g["Spacing"]);
|
||||||
|
|
||||||
|
g["BeginCap"] <= seq(lit("$<"), g["Spacing"]);
|
||||||
|
g["EndCap"] <= seq(lit(">"), g["Spacing"]);
|
||||||
|
|
||||||
// Set definition names
|
// Set definition names
|
||||||
for (auto& x: g) {
|
for (auto& x: g) {
|
||||||
@ -1130,36 +1156,27 @@ private:
|
|||||||
[&](const std::vector<any>& v) {
|
[&](const std::vector<any>& v) {
|
||||||
return v[1];
|
return v[1];
|
||||||
},
|
},
|
||||||
|
// Anchor
|
||||||
|
[&](const std::vector<any>& v) {
|
||||||
|
auto ope = v[1].get<std::shared_ptr<Ope>>();
|
||||||
|
return anc(ope);
|
||||||
|
},
|
||||||
// Capture
|
// Capture
|
||||||
[&](const char* s, size_t l, const std::vector<any>& v, any& c) {
|
[&](const std::vector<any>& v, any& c) {
|
||||||
Context& cxt = *c.get<Context*>();
|
Context& cxt = *c.get<Context*>();
|
||||||
auto ope = v[1].get<std::shared_ptr<Ope>>();
|
auto ope = v[1].get<std::shared_ptr<Ope>>();
|
||||||
return seq(
|
return cap(ope, cxt.match_action, ++cxt.capture_count);
|
||||||
ref(*cxt.grammar, "%ANCHOR%"),
|
|
||||||
cap(ope, cxt.match_action, ++cxt.capture_count),
|
|
||||||
ref(*cxt.grammar, "%ANCHOR%"));
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
g["IdentCont"] = [](const char* s, size_t l) {
|
g["IdentCont"] = [](const char* s, size_t l) {
|
||||||
return std::string(s, l);
|
return std::string(s, l);
|
||||||
};
|
};
|
||||||
|
g["Literal"] = [this](const char* s, size_t l) {
|
||||||
g["Literal"] = [](const std::vector<any>& v) {
|
return lit(resolve_escape_sequence(s, l));
|
||||||
return lit(v[0].get<std::string>());
|
|
||||||
};
|
};
|
||||||
g["SQCont"] = [this](const char* s, size_t l) {
|
g["Class"] = [this](const char* s, size_t l) {
|
||||||
return resolve_escape_sequence(s, l);
|
return cls(resolve_escape_sequence(s, l));
|
||||||
};
|
|
||||||
g["DQCont"] = [this](const char* s, size_t l) {
|
|
||||||
return resolve_escape_sequence(s, l);
|
|
||||||
};
|
|
||||||
|
|
||||||
g["Class"] = [](const std::vector<any>& v) {
|
|
||||||
return cls(v[0].get<std::string>());
|
|
||||||
};
|
|
||||||
g["ClassCont"] = [this](const char* s, size_t l) {
|
|
||||||
return resolve_escape_sequence(s, l);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
g["AND"] = [](const char* s, size_t l) { return *s; };
|
g["AND"] = [](const char* s, size_t l) { return *s; };
|
||||||
@ -1168,16 +1185,14 @@ private:
|
|||||||
g["STAR"] = [](const char* s, size_t l) { return *s; };
|
g["STAR"] = [](const char* s, size_t l) { return *s; };
|
||||||
g["PLUS"] = [](const char* s, size_t l) { return *s; };
|
g["PLUS"] = [](const char* s, size_t l) { return *s; };
|
||||||
|
|
||||||
g["DOT"] = []() {
|
g["DOT"] = []() { return dot(); };
|
||||||
return dot();
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<Grammar> perform_core(const char* s, size_t l, std::string& start, MatchAction ma, Log log) {
|
std::shared_ptr<Grammar> perform_core(const char* s, size_t l, std::string& start, MatchAction ma, Log log) {
|
||||||
Context cxt;
|
Context cxt;
|
||||||
cxt.match_action = ma;
|
cxt.match_action = ma;
|
||||||
|
|
||||||
Values v;
|
SemanticValues v;
|
||||||
any c = &cxt;
|
any c = &cxt;
|
||||||
auto r = g["Grammar"].parse(s, l, v, c);
|
auto r = g["Grammar"].parse(s, l, v, c);
|
||||||
|
|
||||||
@ -1205,9 +1220,6 @@ private:
|
|||||||
|
|
||||||
start = cxt.start;
|
start = cxt.start;
|
||||||
|
|
||||||
grammar["%ANCHOR%"] <= anc();
|
|
||||||
grammar["%ANCHOR%"] = [](const char* s, size_t l) { return s; };
|
|
||||||
|
|
||||||
return cxt.grammar;
|
return cxt.grammar;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1345,7 +1357,7 @@ public:
|
|||||||
}
|
}
|
||||||
} else if (exact && r.len != l) {
|
} else if (exact && r.len != l) {
|
||||||
auto line = line_info(s, s + r.len);
|
auto line = line_info(s, s + r.len);
|
||||||
log(line.first, line.second, "garbage string at the end");
|
log(line.first, line.second, "syntax error");
|
||||||
}
|
}
|
||||||
return r.ret && (!exact || r.len == l);
|
return r.ret && (!exact || r.len == l);
|
||||||
}
|
}
|
||||||
|
27
test/test.cc
27
test/test.cc
@ -39,7 +39,7 @@ TEST_CASE("String capture test with match", "[general]")
|
|||||||
{
|
{
|
||||||
peglib::match m;
|
peglib::match m;
|
||||||
auto ret = peglib::peg_match(
|
auto ret = peglib::peg_match(
|
||||||
" ROOT <- _ ('[' < TAG_NAME > ']' _)* "
|
" ROOT <- _ ('[' $< TAG_NAME > ']' _)* "
|
||||||
" TAG_NAME <- (!']' .)+ "
|
" TAG_NAME <- (!']' .)+ "
|
||||||
" _ <- [ \t]* ",
|
" _ <- [ \t]* ",
|
||||||
" [tag1] [tag:2] [tag-3] ",
|
" [tag1] [tag:2] [tag-3] ",
|
||||||
@ -74,6 +74,31 @@ TEST_CASE("String capture test2", "[general]")
|
|||||||
REQUIRE(tags[2] == "tag-3");
|
REQUIRE(tags[2] == "tag-3");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE("String capture test3", "[general]")
|
||||||
|
{
|
||||||
|
auto syntax =
|
||||||
|
" ROOT <- _ TOKEN* "
|
||||||
|
" TOKEN <- '[' < (!']' .)+ > ']' _ "
|
||||||
|
" _ <- [ \t\r\n]* "
|
||||||
|
;
|
||||||
|
|
||||||
|
peg pg(syntax);
|
||||||
|
|
||||||
|
std::vector<std::string> tags;
|
||||||
|
|
||||||
|
pg["TOKEN"] = [&](const char* s, size_t l, const vector<any>& v) {
|
||||||
|
tags.push_back(std::string(s, l));
|
||||||
|
};
|
||||||
|
|
||||||
|
auto ret = pg.parse(" [tag1] [tag:2] [tag-3] ");
|
||||||
|
|
||||||
|
REQUIRE(ret == true);
|
||||||
|
REQUIRE(tags.size() == 3);
|
||||||
|
REQUIRE(tags[0] == "tag1");
|
||||||
|
REQUIRE(tags[1] == "tag:2");
|
||||||
|
REQUIRE(tags[2] == "tag-3");
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE("String capture test with embedded match action", "[general]")
|
TEST_CASE("String capture test with embedded match action", "[general]")
|
||||||
{
|
{
|
||||||
rule ROOT, TAG, TAG_NAME, WS;
|
rule ROOT, TAG, TAG_NAME, WS;
|
||||||
|
Loading…
Reference in New Issue
Block a user