mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2024-12-22 20:05:31 +00:00
Changed to be able to associate actions to the prioritized choice.
This commit is contained in:
parent
5a16f53a71
commit
8acb0e2822
17
README.md
17
README.md
@ -3,14 +3,14 @@ cpp-peglib
|
|||||||
|
|
||||||
C++11 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar) (Parsing Expression Grammars) library.
|
C++11 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar) (Parsing Expression Grammars) library.
|
||||||
|
|
||||||
*cpp-peglib* tries to provide more expressive parsing experience than common regular expression libraries such as std::regex. It also keeps it in mind that users can easily start using it.
|
*cpp-peglib* tries to provide more expressive parsing experience than common regular expression libraries such as std::regex. This library depends on only one header file. So, you can start using it right away just by including `peglib.h` in your project.
|
||||||
|
|
||||||
The PEG syntax that *cpp-peglib* understands is described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf).
|
The PEG syntax is well described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf).
|
||||||
|
|
||||||
How to use
|
How to use
|
||||||
----------
|
----------
|
||||||
|
|
||||||
What if we want to extract only tag names in brackets from ` [tag1] [tag2] [tag3] [tag4]... `? It's a bit hard to do it with `std::regex`. We have to write a loop logic, since it doesn't support [Repeated Captures](http://www.boost.org/doc/libs/1_57_0/libs/regex/doc/html/boost_regex/captures.html#boost_regex.captures.repeated_captures). PEG can handle it pretty easily.
|
What if we want to extract only tag names in brackets from ` [tag1] [tag2] [tag3] [tag4]... `? It's a bit hard to do it with *std::regex*, since it doesn't support [Repeated Captures](http://www.boost.org/doc/libs/1_57_0/libs/regex/doc/html/boost_regex/captures.html#boost_regex.captures.repeated_captures). PEG can, however, handle the repetition pretty easily.
|
||||||
|
|
||||||
PEG grammar for this task could be like this:
|
PEG grammar for this task could be like this:
|
||||||
|
|
||||||
@ -20,7 +20,7 @@ TAG_NAME <- (!']' .)+
|
|||||||
_ <- [ \t]*
|
_ <- [ \t]*
|
||||||
```
|
```
|
||||||
|
|
||||||
Here is how to parse text with the PEG syntax and retreive tag names:
|
Here is how to parse text with the PEG syntax and retrieve tag names:
|
||||||
|
|
||||||
|
|
||||||
```c++
|
```c++
|
||||||
@ -131,14 +131,6 @@ _ = zom(cls(" \t"));
|
|||||||
auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] ");
|
auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] ");
|
||||||
```
|
```
|
||||||
|
|
||||||
It is also possible to specify a *string match action* with a *grp* operator. The string match action doesn't affect the resular semantic action behavior.
|
|
||||||
|
|
||||||
```c++
|
|
||||||
ROOT = seq(_, zom(seq(chr('['), grp(TAG_NAME, [&](const char* s, size_t l) { tags.push_back(string(s, l)); }), chr(']'), _)));
|
|
||||||
TAG_NAME = oom(seq(npd(chr(']')), any()));
|
|
||||||
_ = zom(cls(" \t"));
|
|
||||||
```
|
|
||||||
|
|
||||||
In fact, the PEG parser generator is made with the parser operators. You can see the code at `make_peg_grammar` function in `peglib.h`.
|
In fact, the PEG parser generator is made with the parser operators. You can see the code at `make_peg_grammar` function in `peglib.h`.
|
||||||
|
|
||||||
The following are available operators:
|
The following are available operators:
|
||||||
@ -182,6 +174,7 @@ Other C++ PEG parser libraries
|
|||||||
|
|
||||||
Thanks to the authors of the libraries that inspired *cpp-peglib*.
|
Thanks to the authors of the libraries that inspired *cpp-peglib*.
|
||||||
|
|
||||||
|
* [Boost Spirit X3](https://github.com/djowel/spirit_x3) - A set of C++ libraries for parsing and output generation implemented as Domain Specific Embedded Languages (DSEL) using Expression templates and Template Meta-Programming
|
||||||
* [PEGTL](https://github.com/ColinH/PEGTL) - Parsing Expression Grammar Template Library
|
* [PEGTL](https://github.com/ColinH/PEGTL) - Parsing Expression Grammar Template Library
|
||||||
* [lars::Parser](https://github.com/TheLartians/Parser) - A header-only linear-time c++ parsing expression grammar (PEG) parser generator supporting left-recursion and grammar ambiguity
|
* [lars::Parser](https://github.com/TheLartians/Parser) - A header-only linear-time c++ parsing expression grammar (PEG) parser generator supporting left-recursion and grammar ambiguity
|
||||||
|
|
||||||
|
61
test/test.cc
61
test/test.cc
@ -40,7 +40,7 @@ TEST_CASE("String capture test2", "[general]")
|
|||||||
{
|
{
|
||||||
vector<string> tags;
|
vector<string> tags;
|
||||||
|
|
||||||
Definition ROOT, TAG, TAG_NAME, WS;
|
rule ROOT, TAG, TAG_NAME, WS;
|
||||||
ROOT <= seq(WS, zom(TAG));
|
ROOT <= seq(WS, zom(TAG));
|
||||||
TAG <= seq(chr('['), TAG_NAME, chr(']'), WS);
|
TAG <= seq(chr('['), TAG_NAME, chr(']'), WS);
|
||||||
TAG_NAME <= oom(seq(npd(chr(']')), any())), [&](const char* s, size_t l) { tags.push_back(string(s, l)); };
|
TAG_NAME <= oom(seq(npd(chr(']')), any())), [&](const char* s, size_t l) { tags.push_back(string(s, l)); };
|
||||||
@ -57,7 +57,7 @@ TEST_CASE("String capture test2", "[general]")
|
|||||||
|
|
||||||
TEST_CASE("String capture test with embedded match action", "[general]")
|
TEST_CASE("String capture test with embedded match action", "[general]")
|
||||||
{
|
{
|
||||||
Definition ROOT, TAG, TAG_NAME, WS;
|
rule ROOT, TAG, TAG_NAME, WS;
|
||||||
|
|
||||||
vector<string> tags;
|
vector<string> tags;
|
||||||
|
|
||||||
@ -77,8 +77,8 @@ TEST_CASE("String capture test with embedded match action", "[general]")
|
|||||||
|
|
||||||
TEST_CASE("Cyclic grammer test", "[general]")
|
TEST_CASE("Cyclic grammer test", "[general]")
|
||||||
{
|
{
|
||||||
Definition PARENT;
|
rule PARENT;
|
||||||
Definition CHILD;
|
rule CHILD;
|
||||||
|
|
||||||
PARENT <= seq(CHILD);
|
PARENT <= seq(CHILD);
|
||||||
CHILD <= seq(PARENT);
|
CHILD <= seq(PARENT);
|
||||||
@ -100,6 +100,25 @@ TEST_CASE("Lambda action test", "[general]")
|
|||||||
REQUIRE(ss == "hello");
|
REQUIRE(ss == "hello");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE("Backtracking test", "[general]")
|
||||||
|
{
|
||||||
|
auto parser = make_parser(
|
||||||
|
" START <- PAT1 / PAT2 "
|
||||||
|
" PAT1 <- HELLO ' One' "
|
||||||
|
" PAT2 <- HELLO ' Two' "
|
||||||
|
" HELLO <- 'Hello' "
|
||||||
|
);
|
||||||
|
|
||||||
|
size_t count = 0;
|
||||||
|
parser["HELLO"] = [&](const char* s, size_t l) {
|
||||||
|
count++;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool ret = parser.parse("Hello Two");
|
||||||
|
REQUIRE(ret == true);
|
||||||
|
REQUIRE(count == 2);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE("Simple calculator test", "[general]")
|
TEST_CASE("Simple calculator test", "[general]")
|
||||||
{
|
{
|
||||||
auto syntax =
|
auto syntax =
|
||||||
@ -110,15 +129,23 @@ TEST_CASE("Simple calculator test", "[general]")
|
|||||||
|
|
||||||
auto parser = make_parser(syntax);
|
auto parser = make_parser(syntax);
|
||||||
|
|
||||||
parser["Additive"] = [](const vector<Any>& v) {
|
parser["Additive"] = {
|
||||||
return v.size() == 1 ? v[0] : v[0].get<int>() + v[1].get<int>();
|
// Default action
|
||||||
|
[]() {},
|
||||||
|
// Action for the first choice
|
||||||
|
[](const vector<Any>& v) { return v[0].get<int>() + v[1].get<int>(); },
|
||||||
|
// Action for the second choice
|
||||||
|
[](const vector<Any>& v) { return v[0]; }
|
||||||
};
|
};
|
||||||
|
|
||||||
parser["Multitive"] = [](const vector<Any>& v) {
|
parser["Multitive"] = [](const vector<Any>& v) {
|
||||||
return v.size() == 1 ? v[0] : v[0].get<int>() * v[1].get<int>();
|
return v.size() == 1 ? v[0] : v[0].get<int>() * v[1].get<int>();
|
||||||
};
|
};
|
||||||
|
|
||||||
parser["Primary"] = [](const vector<Any>& v) {
|
parser["Primary"] = [](const vector<Any>& v) {
|
||||||
return v.size() == 1 ? v[0] : v[1];
|
return v.size() == 1 ? v[0] : v[1];
|
||||||
};
|
};
|
||||||
|
|
||||||
parser["Number"] = [](const char* s, size_t l) {
|
parser["Number"] = [](const char* s, size_t l) {
|
||||||
return atoi(s);
|
return atoi(s);
|
||||||
};
|
};
|
||||||
@ -132,7 +159,7 @@ TEST_CASE("Simple calculator test", "[general]")
|
|||||||
TEST_CASE("Calculator test", "[general]")
|
TEST_CASE("Calculator test", "[general]")
|
||||||
{
|
{
|
||||||
// Construct grammer
|
// Construct grammer
|
||||||
Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
|
rule EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
|
||||||
|
|
||||||
EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM)));
|
EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM)));
|
||||||
TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR)));
|
TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR)));
|
||||||
@ -156,11 +183,11 @@ TEST_CASE("Calculator test", "[general]")
|
|||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
|
||||||
EXPRESSION.action = reduce;
|
EXPRESSION = reduce;
|
||||||
TERM.action = reduce;
|
TERM = reduce;
|
||||||
TERM_OPERATOR.action = [](const char* s, size_t l) { return *s; };
|
TERM_OPERATOR = [](const char* s, size_t l) { return *s; };
|
||||||
FACTOR_OPERATOR.action = [](const char* s, size_t l) { return *s; };
|
FACTOR_OPERATOR = [](const char* s, size_t l) { return *s; };
|
||||||
NUMBER.action = [&](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
|
NUMBER = [&](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
|
||||||
|
|
||||||
// Parse
|
// Parse
|
||||||
Any val;
|
Any val;
|
||||||
@ -202,11 +229,11 @@ TEST_CASE("Calculator test2", "[general]")
|
|||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
|
||||||
g["EXPRESSION"].action = reduce;
|
g["EXPRESSION"] = reduce;
|
||||||
g["TERM"].action = reduce;
|
g["TERM"] = reduce;
|
||||||
g["TERM_OPERATOR"].action = [](const char* s, size_t l) { return *s; };
|
g["TERM_OPERATOR"] = [](const char* s, size_t l) { return *s; };
|
||||||
g["FACTOR_OPERATOR"].action = [](const char* s, size_t l) { return *s; };
|
g["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return *s; };
|
||||||
g["NUMBER"].action = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
|
g["NUMBER"] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
|
||||||
|
|
||||||
// Parse
|
// Parse
|
||||||
Any val;
|
Any val;
|
||||||
|
@ -151,7 +151,6 @@
|
|||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="..\peglib.h" />
|
<ClInclude Include="..\peglib.h" />
|
||||||
<ClInclude Include="..\variant.h" />
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
|
Loading…
Reference in New Issue
Block a user