Changed to be able to associate actions to the prioritized choice.

pull/3/head
yhirose 9 years ago
parent 5a16f53a71
commit 8acb0e2822
  1. 17
      README.md
  2. 769
      peglib.h
  3. 63
      test/test.cc
  4. 1
      test/test.vcxproj

@ -3,14 +3,14 @@ cpp-peglib
C++11 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar) (Parsing Expression Grammars) library.
*cpp-peglib* tries to provide more expressive parsing experience than common regular expression libraries such as std::regex. It also keeps it in mind that users can easily start using it.
*cpp-peglib* tries to provide more expressive parsing experience than common regular expression libraries such as std::regex. This library depends on only one header file. So, you can start using it right away just by including `peglib.h` in your project.
The PEG syntax that *cpp-peglib* understands is described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf).
The PEG syntax is well described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf).
How to use
----------
What if we want to extract only tag names in brackets from ` [tag1] [tag2] [tag3] [tag4]... `? It's a bit hard to do it with `std::regex`. We have to write a loop logic, since it doesn't support [Repeated Captures](http://www.boost.org/doc/libs/1_57_0/libs/regex/doc/html/boost_regex/captures.html#boost_regex.captures.repeated_captures). PEG can handle it pretty easily.
What if we want to extract only tag names in brackets from ` [tag1] [tag2] [tag3] [tag4]... `? It's a bit hard to do it with *std::regex*, since it doesn't support [Repeated Captures](http://www.boost.org/doc/libs/1_57_0/libs/regex/doc/html/boost_regex/captures.html#boost_regex.captures.repeated_captures). PEG can, however, handle the repetition pretty easily.
PEG grammar for this task could be like this:
@ -20,7 +20,7 @@ TAG_NAME <- (!']' .)+
_ <- [ \t]*
```
Here is how to parse text with the PEG syntax and retreive tag names:
Here is how to parse text with the PEG syntax and retrieve tag names:
```c++
@ -131,14 +131,6 @@ _ = zom(cls(" \t"));
auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] ");
```
It is also possible to specify a *string match action* with a *grp* operator. The string match action doesn't affect the resular semantic action behavior.
```c++
ROOT = seq(_, zom(seq(chr('['), grp(TAG_NAME, [&](const char* s, size_t l) { tags.push_back(string(s, l)); }), chr(']'), _)));
TAG_NAME = oom(seq(npd(chr(']')), any()));
_ = zom(cls(" \t"));
```
In fact, the PEG parser generator is made with the parser operators. You can see the code at `make_peg_grammar` function in `peglib.h`.
The following are available operators:
@ -182,6 +174,7 @@ Other C++ PEG parser libraries
Thanks to the authors of the libraries that inspired *cpp-peglib*.
* [Boost Spirit X3](https://github.com/djowel/spirit_x3) - A set of C++ libraries for parsing and output generation implemented as Domain Specific Embedded Languages (DSEL) using Expression templates and Template Meta-Programming
* [PEGTL](https://github.com/ColinH/PEGTL) - Parsing Expression Grammar Template Library
* [lars::Parser](https://github.com/TheLartians/Parser) - A header-only linear-time c++ parsing expression grammar (PEG) parser generator supporting left-recursion and grammar ambiguity

File diff suppressed because it is too large Load Diff

@ -40,7 +40,7 @@ TEST_CASE("String capture test2", "[general]")
{
vector<string> tags;
Definition ROOT, TAG, TAG_NAME, WS;
rule ROOT, TAG, TAG_NAME, WS;
ROOT <= seq(WS, zom(TAG));
TAG <= seq(chr('['), TAG_NAME, chr(']'), WS);
TAG_NAME <= oom(seq(npd(chr(']')), any())), [&](const char* s, size_t l) { tags.push_back(string(s, l)); };
@ -57,7 +57,7 @@ TEST_CASE("String capture test2", "[general]")
TEST_CASE("String capture test with embedded match action", "[general]")
{
Definition ROOT, TAG, TAG_NAME, WS;
rule ROOT, TAG, TAG_NAME, WS;
vector<string> tags;
@ -77,8 +77,8 @@ TEST_CASE("String capture test with embedded match action", "[general]")
TEST_CASE("Cyclic grammer test", "[general]")
{
Definition PARENT;
Definition CHILD;
rule PARENT;
rule CHILD;
PARENT <= seq(CHILD);
CHILD <= seq(PARENT);
@ -100,6 +100,25 @@ TEST_CASE("Lambda action test", "[general]")
REQUIRE(ss == "hello");
}
TEST_CASE("Backtracking test", "[general]")
{
auto parser = make_parser(
" START <- PAT1 / PAT2 "
" PAT1 <- HELLO ' One' "
" PAT2 <- HELLO ' Two' "
" HELLO <- 'Hello' "
);
size_t count = 0;
parser["HELLO"] = [&](const char* s, size_t l) {
count++;
};
bool ret = parser.parse("Hello Two");
REQUIRE(ret == true);
REQUIRE(count == 2);
}
TEST_CASE("Simple calculator test", "[general]")
{
auto syntax =
@ -110,15 +129,23 @@ TEST_CASE("Simple calculator test", "[general]")
auto parser = make_parser(syntax);
parser["Additive"] = [](const vector<Any>& v) {
return v.size() == 1 ? v[0] : v[0].get<int>() + v[1].get<int>();
parser["Additive"] = {
// Default action
[]() {},
// Action for the first choice
[](const vector<Any>& v) { return v[0].get<int>() + v[1].get<int>(); },
// Action for the second choice
[](const vector<Any>& v) { return v[0]; }
};
parser["Multitive"] = [](const vector<Any>& v) {
return v.size() == 1 ? v[0] : v[0].get<int>() * v[1].get<int>();
};
parser["Primary"] = [](const vector<Any>& v) {
return v.size() == 1 ? v[0] : v[1];
return v.size() == 1 ? v[0] : v[1];
};
parser["Number"] = [](const char* s, size_t l) {
return atoi(s);
};
@ -132,7 +159,7 @@ TEST_CASE("Simple calculator test", "[general]")
TEST_CASE("Calculator test", "[general]")
{
// Construct grammer
Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
rule EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM)));
TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR)));
@ -156,11 +183,11 @@ TEST_CASE("Calculator test", "[general]")
return ret;
};
EXPRESSION.action = reduce;
TERM.action = reduce;
TERM_OPERATOR.action = [](const char* s, size_t l) { return *s; };
FACTOR_OPERATOR.action = [](const char* s, size_t l) { return *s; };
NUMBER.action = [&](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
EXPRESSION = reduce;
TERM = reduce;
TERM_OPERATOR = [](const char* s, size_t l) { return *s; };
FACTOR_OPERATOR = [](const char* s, size_t l) { return *s; };
NUMBER = [&](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
// Parse
Any val;
@ -202,11 +229,11 @@ TEST_CASE("Calculator test2", "[general]")
return ret;
};
g["EXPRESSION"].action = reduce;
g["TERM"].action = reduce;
g["TERM_OPERATOR"].action = [](const char* s, size_t l) { return *s; };
g["FACTOR_OPERATOR"].action = [](const char* s, size_t l) { return *s; };
g["NUMBER"].action = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
g["EXPRESSION"] = reduce;
g["TERM"] = reduce;
g["TERM_OPERATOR"] = [](const char* s, size_t l) { return *s; };
g["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return *s; };
g["NUMBER"] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
// Parse
Any val;

@ -151,7 +151,6 @@
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\peglib.h" />
<ClInclude Include="..\variant.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">

Loading…
Cancel
Save