Changed to be able to associate actions to the prioritized choice.

This commit is contained in:
yhirose 2015-02-11 19:48:55 -05:00
parent 5a16f53a71
commit 8acb0e2822
4 changed files with 538 additions and 484 deletions

View File

@ -3,14 +3,14 @@ cpp-peglib
C++11 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar) (Parsing Expression Grammars) library. C++11 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar) (Parsing Expression Grammars) library.
*cpp-peglib* tries to provide more expressive parsing experience than common regular expression libraries such as std::regex. It also keeps it in mind that users can easily start using it. *cpp-peglib* tries to provide more expressive parsing experience than common regular expression libraries such as std::regex. This library depends on only one header file. So, you can start using it right away just by including `peglib.h` in your project.
The PEG syntax that *cpp-peglib* understands is described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf). The PEG syntax is well described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf).
How to use How to use
---------- ----------
What if we want to extract only tag names in brackets from ` [tag1] [tag2] [tag3] [tag4]... `? It's a bit hard to do it with `std::regex`. We have to write a loop logic, since it doesn't support [Repeated Captures](http://www.boost.org/doc/libs/1_57_0/libs/regex/doc/html/boost_regex/captures.html#boost_regex.captures.repeated_captures). PEG can handle it pretty easily. What if we want to extract only tag names in brackets from ` [tag1] [tag2] [tag3] [tag4]... `? It's a bit hard to do it with *std::regex*, since it doesn't support [Repeated Captures](http://www.boost.org/doc/libs/1_57_0/libs/regex/doc/html/boost_regex/captures.html#boost_regex.captures.repeated_captures). PEG can, however, handle the repetition pretty easily.
PEG grammar for this task could be like this: PEG grammar for this task could be like this:
@ -20,7 +20,7 @@ TAG_NAME <- (!']' .)+
_ <- [ \t]* _ <- [ \t]*
``` ```
Here is how to parse text with the PEG syntax and retreive tag names: Here is how to parse text with the PEG syntax and retrieve tag names:
```c++ ```c++
@ -131,14 +131,6 @@ _ = zom(cls(" \t"));
auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] "); auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] ");
``` ```
It is also possible to specify a *string match action* with a *grp* operator. The string match action doesn't affect the resular semantic action behavior.
```c++
ROOT = seq(_, zom(seq(chr('['), grp(TAG_NAME, [&](const char* s, size_t l) { tags.push_back(string(s, l)); }), chr(']'), _)));
TAG_NAME = oom(seq(npd(chr(']')), any()));
_ = zom(cls(" \t"));
```
In fact, the PEG parser generator is made with the parser operators. You can see the code at `make_peg_grammar` function in `peglib.h`. In fact, the PEG parser generator is made with the parser operators. You can see the code at `make_peg_grammar` function in `peglib.h`.
The following are available operators: The following are available operators:
@ -182,6 +174,7 @@ Other C++ PEG parser libraries
Thanks to the authors of the libraries that inspired *cpp-peglib*. Thanks to the authors of the libraries that inspired *cpp-peglib*.
* [Boost Spirit X3](https://github.com/djowel/spirit_x3) - A set of C++ libraries for parsing and output generation implemented as Domain Specific Embedded Languages (DSEL) using Expression templates and Template Meta-Programming
* [PEGTL](https://github.com/ColinH/PEGTL) - Parsing Expression Grammar Template Library * [PEGTL](https://github.com/ColinH/PEGTL) - Parsing Expression Grammar Template Library
* [lars::Parser](https://github.com/TheLartians/Parser) - A header-only linear-time c++ parsing expression grammar (PEG) parser generator supporting left-recursion and grammar ambiguity * [lars::Parser](https://github.com/TheLartians/Parser) - A header-only linear-time c++ parsing expression grammar (PEG) parser generator supporting left-recursion and grammar ambiguity

941
peglib.h

File diff suppressed because it is too large Load Diff

View File

@ -40,7 +40,7 @@ TEST_CASE("String capture test2", "[general]")
{ {
vector<string> tags; vector<string> tags;
Definition ROOT, TAG, TAG_NAME, WS; rule ROOT, TAG, TAG_NAME, WS;
ROOT <= seq(WS, zom(TAG)); ROOT <= seq(WS, zom(TAG));
TAG <= seq(chr('['), TAG_NAME, chr(']'), WS); TAG <= seq(chr('['), TAG_NAME, chr(']'), WS);
TAG_NAME <= oom(seq(npd(chr(']')), any())), [&](const char* s, size_t l) { tags.push_back(string(s, l)); }; TAG_NAME <= oom(seq(npd(chr(']')), any())), [&](const char* s, size_t l) { tags.push_back(string(s, l)); };
@ -57,7 +57,7 @@ TEST_CASE("String capture test2", "[general]")
TEST_CASE("String capture test with embedded match action", "[general]") TEST_CASE("String capture test with embedded match action", "[general]")
{ {
Definition ROOT, TAG, TAG_NAME, WS; rule ROOT, TAG, TAG_NAME, WS;
vector<string> tags; vector<string> tags;
@ -77,8 +77,8 @@ TEST_CASE("String capture test with embedded match action", "[general]")
TEST_CASE("Cyclic grammer test", "[general]") TEST_CASE("Cyclic grammer test", "[general]")
{ {
Definition PARENT; rule PARENT;
Definition CHILD; rule CHILD;
PARENT <= seq(CHILD); PARENT <= seq(CHILD);
CHILD <= seq(PARENT); CHILD <= seq(PARENT);
@ -100,6 +100,25 @@ TEST_CASE("Lambda action test", "[general]")
REQUIRE(ss == "hello"); REQUIRE(ss == "hello");
} }
TEST_CASE("Backtracking test", "[general]")
{
auto parser = make_parser(
" START <- PAT1 / PAT2 "
" PAT1 <- HELLO ' One' "
" PAT2 <- HELLO ' Two' "
" HELLO <- 'Hello' "
);
size_t count = 0;
parser["HELLO"] = [&](const char* s, size_t l) {
count++;
};
bool ret = parser.parse("Hello Two");
REQUIRE(ret == true);
REQUIRE(count == 2);
}
TEST_CASE("Simple calculator test", "[general]") TEST_CASE("Simple calculator test", "[general]")
{ {
auto syntax = auto syntax =
@ -110,15 +129,23 @@ TEST_CASE("Simple calculator test", "[general]")
auto parser = make_parser(syntax); auto parser = make_parser(syntax);
parser["Additive"] = [](const vector<Any>& v) { parser["Additive"] = {
return v.size() == 1 ? v[0] : v[0].get<int>() + v[1].get<int>(); // Default action
[]() {},
// Action for the first choice
[](const vector<Any>& v) { return v[0].get<int>() + v[1].get<int>(); },
// Action for the second choice
[](const vector<Any>& v) { return v[0]; }
}; };
parser["Multitive"] = [](const vector<Any>& v) { parser["Multitive"] = [](const vector<Any>& v) {
return v.size() == 1 ? v[0] : v[0].get<int>() * v[1].get<int>(); return v.size() == 1 ? v[0] : v[0].get<int>() * v[1].get<int>();
}; };
parser["Primary"] = [](const vector<Any>& v) { parser["Primary"] = [](const vector<Any>& v) {
return v.size() == 1 ? v[0] : v[1]; return v.size() == 1 ? v[0] : v[1];
}; };
parser["Number"] = [](const char* s, size_t l) { parser["Number"] = [](const char* s, size_t l) {
return atoi(s); return atoi(s);
}; };
@ -132,7 +159,7 @@ TEST_CASE("Simple calculator test", "[general]")
TEST_CASE("Calculator test", "[general]") TEST_CASE("Calculator test", "[general]")
{ {
// Construct grammer // Construct grammer
Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER; rule EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))); EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM)));
TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))); TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR)));
@ -156,11 +183,11 @@ TEST_CASE("Calculator test", "[general]")
return ret; return ret;
}; };
EXPRESSION.action = reduce; EXPRESSION = reduce;
TERM.action = reduce; TERM = reduce;
TERM_OPERATOR.action = [](const char* s, size_t l) { return *s; }; TERM_OPERATOR = [](const char* s, size_t l) { return *s; };
FACTOR_OPERATOR.action = [](const char* s, size_t l) { return *s; }; FACTOR_OPERATOR = [](const char* s, size_t l) { return *s; };
NUMBER.action = [&](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); }; NUMBER = [&](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
// Parse // Parse
Any val; Any val;
@ -202,11 +229,11 @@ TEST_CASE("Calculator test2", "[general]")
return ret; return ret;
}; };
g["EXPRESSION"].action = reduce; g["EXPRESSION"] = reduce;
g["TERM"].action = reduce; g["TERM"] = reduce;
g["TERM_OPERATOR"].action = [](const char* s, size_t l) { return *s; }; g["TERM_OPERATOR"] = [](const char* s, size_t l) { return *s; };
g["FACTOR_OPERATOR"].action = [](const char* s, size_t l) { return *s; }; g["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return *s; };
g["NUMBER"].action = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); }; g["NUMBER"] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
// Parse // Parse
Any val; Any val;

View File

@ -151,7 +151,6 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="..\peglib.h" /> <ClInclude Include="..\peglib.h" />
<ClInclude Include="..\variant.h" />
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">