mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2025-01-22 13:25:30 +00:00
Changed to be able to associate actions to the prioritized choice.
This commit is contained in:
parent
5a16f53a71
commit
8acb0e2822
17
README.md
17
README.md
@ -3,14 +3,14 @@ cpp-peglib
|
||||
|
||||
C++11 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar) (Parsing Expression Grammars) library.
|
||||
|
||||
*cpp-peglib* tries to provide more expressive parsing experience than common regular expression libraries such as std::regex. It also keeps it in mind that users can easily start using it.
|
||||
*cpp-peglib* tries to provide more expressive parsing experience than common regular expression libraries such as std::regex. This library depends on only one header file. So, you can start using it right away just by including `peglib.h` in your project.
|
||||
|
||||
The PEG syntax that *cpp-peglib* understands is described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf).
|
||||
The PEG syntax is well described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf).
|
||||
|
||||
How to use
|
||||
----------
|
||||
|
||||
What if we want to extract only tag names in brackets from ` [tag1] [tag2] [tag3] [tag4]... `? It's a bit hard to do it with `std::regex`. We have to write a loop logic, since it doesn't support [Repeated Captures](http://www.boost.org/doc/libs/1_57_0/libs/regex/doc/html/boost_regex/captures.html#boost_regex.captures.repeated_captures). PEG can handle it pretty easily.
|
||||
What if we want to extract only tag names in brackets from ` [tag1] [tag2] [tag3] [tag4]... `? It's a bit hard to do it with *std::regex*, since it doesn't support [Repeated Captures](http://www.boost.org/doc/libs/1_57_0/libs/regex/doc/html/boost_regex/captures.html#boost_regex.captures.repeated_captures). PEG can, however, handle the repetition pretty easily.
|
||||
|
||||
PEG grammar for this task could be like this:
|
||||
|
||||
@ -20,7 +20,7 @@ TAG_NAME <- (!']' .)+
|
||||
_ <- [ \t]*
|
||||
```
|
||||
|
||||
Here is how to parse text with the PEG syntax and retreive tag names:
|
||||
Here is how to parse text with the PEG syntax and retrieve tag names:
|
||||
|
||||
|
||||
```c++
|
||||
@ -131,14 +131,6 @@ _ = zom(cls(" \t"));
|
||||
auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] ");
|
||||
```
|
||||
|
||||
It is also possible to specify a *string match action* with a *grp* operator. The string match action doesn't affect the resular semantic action behavior.
|
||||
|
||||
```c++
|
||||
ROOT = seq(_, zom(seq(chr('['), grp(TAG_NAME, [&](const char* s, size_t l) { tags.push_back(string(s, l)); }), chr(']'), _)));
|
||||
TAG_NAME = oom(seq(npd(chr(']')), any()));
|
||||
_ = zom(cls(" \t"));
|
||||
```
|
||||
|
||||
In fact, the PEG parser generator is made with the parser operators. You can see the code at `make_peg_grammar` function in `peglib.h`.
|
||||
|
||||
The following are available operators:
|
||||
@ -182,6 +174,7 @@ Other C++ PEG parser libraries
|
||||
|
||||
Thanks to the authors of the libraries that inspired *cpp-peglib*.
|
||||
|
||||
* [Boost Spirit X3](https://github.com/djowel/spirit_x3) - A set of C++ libraries for parsing and output generation implemented as Domain Specific Embedded Languages (DSEL) using Expression templates and Template Meta-Programming
|
||||
* [PEGTL](https://github.com/ColinH/PEGTL) - Parsing Expression Grammar Template Library
|
||||
* [lars::Parser](https://github.com/TheLartians/Parser) - A header-only linear-time c++ parsing expression grammar (PEG) parser generator supporting left-recursion and grammar ambiguity
|
||||
|
||||
|
63
test/test.cc
63
test/test.cc
@ -40,7 +40,7 @@ TEST_CASE("String capture test2", "[general]")
|
||||
{
|
||||
vector<string> tags;
|
||||
|
||||
Definition ROOT, TAG, TAG_NAME, WS;
|
||||
rule ROOT, TAG, TAG_NAME, WS;
|
||||
ROOT <= seq(WS, zom(TAG));
|
||||
TAG <= seq(chr('['), TAG_NAME, chr(']'), WS);
|
||||
TAG_NAME <= oom(seq(npd(chr(']')), any())), [&](const char* s, size_t l) { tags.push_back(string(s, l)); };
|
||||
@ -57,7 +57,7 @@ TEST_CASE("String capture test2", "[general]")
|
||||
|
||||
TEST_CASE("String capture test with embedded match action", "[general]")
|
||||
{
|
||||
Definition ROOT, TAG, TAG_NAME, WS;
|
||||
rule ROOT, TAG, TAG_NAME, WS;
|
||||
|
||||
vector<string> tags;
|
||||
|
||||
@ -77,8 +77,8 @@ TEST_CASE("String capture test with embedded match action", "[general]")
|
||||
|
||||
TEST_CASE("Cyclic grammer test", "[general]")
|
||||
{
|
||||
Definition PARENT;
|
||||
Definition CHILD;
|
||||
rule PARENT;
|
||||
rule CHILD;
|
||||
|
||||
PARENT <= seq(CHILD);
|
||||
CHILD <= seq(PARENT);
|
||||
@ -100,6 +100,25 @@ TEST_CASE("Lambda action test", "[general]")
|
||||
REQUIRE(ss == "hello");
|
||||
}
|
||||
|
||||
TEST_CASE("Backtracking test", "[general]")
|
||||
{
|
||||
auto parser = make_parser(
|
||||
" START <- PAT1 / PAT2 "
|
||||
" PAT1 <- HELLO ' One' "
|
||||
" PAT2 <- HELLO ' Two' "
|
||||
" HELLO <- 'Hello' "
|
||||
);
|
||||
|
||||
size_t count = 0;
|
||||
parser["HELLO"] = [&](const char* s, size_t l) {
|
||||
count++;
|
||||
};
|
||||
|
||||
bool ret = parser.parse("Hello Two");
|
||||
REQUIRE(ret == true);
|
||||
REQUIRE(count == 2);
|
||||
}
|
||||
|
||||
TEST_CASE("Simple calculator test", "[general]")
|
||||
{
|
||||
auto syntax =
|
||||
@ -110,15 +129,23 @@ TEST_CASE("Simple calculator test", "[general]")
|
||||
|
||||
auto parser = make_parser(syntax);
|
||||
|
||||
parser["Additive"] = [](const vector<Any>& v) {
|
||||
return v.size() == 1 ? v[0] : v[0].get<int>() + v[1].get<int>();
|
||||
parser["Additive"] = {
|
||||
// Default action
|
||||
[]() {},
|
||||
// Action for the first choice
|
||||
[](const vector<Any>& v) { return v[0].get<int>() + v[1].get<int>(); },
|
||||
// Action for the second choice
|
||||
[](const vector<Any>& v) { return v[0]; }
|
||||
};
|
||||
|
||||
parser["Multitive"] = [](const vector<Any>& v) {
|
||||
return v.size() == 1 ? v[0] : v[0].get<int>() * v[1].get<int>();
|
||||
};
|
||||
|
||||
parser["Primary"] = [](const vector<Any>& v) {
|
||||
return v.size() == 1 ? v[0] : v[1];
|
||||
return v.size() == 1 ? v[0] : v[1];
|
||||
};
|
||||
|
||||
parser["Number"] = [](const char* s, size_t l) {
|
||||
return atoi(s);
|
||||
};
|
||||
@ -132,7 +159,7 @@ TEST_CASE("Simple calculator test", "[general]")
|
||||
TEST_CASE("Calculator test", "[general]")
|
||||
{
|
||||
// Construct grammer
|
||||
Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
|
||||
rule EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
|
||||
|
||||
EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM)));
|
||||
TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR)));
|
||||
@ -156,11 +183,11 @@ TEST_CASE("Calculator test", "[general]")
|
||||
return ret;
|
||||
};
|
||||
|
||||
EXPRESSION.action = reduce;
|
||||
TERM.action = reduce;
|
||||
TERM_OPERATOR.action = [](const char* s, size_t l) { return *s; };
|
||||
FACTOR_OPERATOR.action = [](const char* s, size_t l) { return *s; };
|
||||
NUMBER.action = [&](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
|
||||
EXPRESSION = reduce;
|
||||
TERM = reduce;
|
||||
TERM_OPERATOR = [](const char* s, size_t l) { return *s; };
|
||||
FACTOR_OPERATOR = [](const char* s, size_t l) { return *s; };
|
||||
NUMBER = [&](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
|
||||
|
||||
// Parse
|
||||
Any val;
|
||||
@ -202,11 +229,11 @@ TEST_CASE("Calculator test2", "[general]")
|
||||
return ret;
|
||||
};
|
||||
|
||||
g["EXPRESSION"].action = reduce;
|
||||
g["TERM"].action = reduce;
|
||||
g["TERM_OPERATOR"].action = [](const char* s, size_t l) { return *s; };
|
||||
g["FACTOR_OPERATOR"].action = [](const char* s, size_t l) { return *s; };
|
||||
g["NUMBER"].action = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
|
||||
g["EXPRESSION"] = reduce;
|
||||
g["TERM"] = reduce;
|
||||
g["TERM_OPERATOR"] = [](const char* s, size_t l) { return *s; };
|
||||
g["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return *s; };
|
||||
g["NUMBER"] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
|
||||
|
||||
// Parse
|
||||
Any val;
|
||||
|
@ -151,7 +151,6 @@
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\peglib.h" />
|
||||
<ClInclude Include="..\variant.h" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
|
Loading…
Reference in New Issue
Block a user