Major refactoring.

This commit is contained in:
yhirose 2015-02-09 17:12:59 -05:00
parent 9cb926f7a0
commit d386f2f3b9
7 changed files with 446 additions and 844 deletions

View File

@ -112,46 +112,51 @@ Here are available user actions:
`const std::vector<std::string>& n` holds names of child definitions that could be helpful when we want to check what are the actual child definitions.
Make a parser with parser operators and simple actions
------------------------------------------------------
Make a parser with parser operators
-----------------------------------
Instead of makeing a parser by parsing PEG syntax text, we can also construct a parser by hand with *parser operators* and use the *simple action* method rather than the semantic action method. Here is an example:
Instead of makeing a parser by parsing PEG syntax text, we can also construct a parser by hand with *parser operators*. Here is an example:
```c++
using namespace peglib;
using namespace std;
vector<string> tags;
Definition ROOT, TAG_NAME, _;
ROOT = seq(_, zom(seq(chr('['), TAG_NAME, chr(']'), _)));
TAG_NAME = oom(seq(npd(chr(']')), any()));
TAG_NAME = oom(seq(npd(chr(']')), any())), [&](const char* s, size_t l) { tags.push_back(string(s, l)); };
_ = zom(cls(" \t"));
vector<string> tags;
TAG_NAME.match = [&](const char* s, size_t l) {
tags.push_back(string(s, l));
};
auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] ");
```
In fact, the PEG parser generator is made with operators. You can see the code at `make_peg_grammar` function in `peglib.h`.
It is also possible to specify a *string match action* with a *grp* operator. The string match action doesn't affect the resular semantic action behavior.
```c++
ROOT = seq(_, zom(seq(chr('['), grp(TAG_NAME, [&](const char* s, size_t l) { tags.push_back(string(s, l)); }), chr(']'), _)));
TAG_NAME = oom(seq(npd(chr(']')), any()));
_ = zom(cls(" \t"));
```
In fact, the PEG parser generator is made with the parser operators. You can see the code at `make_peg_grammar` function in `peglib.h`.
The following are available operators:
| Description | Operator |
|:-------------------|:---------|
| Sequence | seq |
| Prioritized Choice | cho |
| Grouping | grp |
| Zero or More | zom |
| One or More | oom |
| Optional | opt |
| And predicate | apd |
| Not predicate | npd |
| Literal string | lit |
| Character class | cls |
| Character | chr |
| Any character | any |
| Operator | Description |
|:---------|:-------------------|
| seq | Sequence |
| cho | Prioritized Choice |
| grp | Grouping |
| zom | Zero or More |
| oom | One or More |
| opt | Optional |
| apd | And predicate |
| npd | Not predicate |
| lit | Literal string |
| cls | Character class |
| chr | Character |
| any | Any character |
Sample codes
------------

View File

@ -5,7 +5,7 @@ ifdef USE_CLANG
CC = clang++
CFLAGS = -std=c++1y -stdlib=libc++ -g
else
CC = g++-4.9
CC = g++
CFLAGS = -std=c++1y -g
endif

View File

@ -7,7 +7,7 @@
#include <peglib.h>
#include <iostream>
#include <map>
#include <cstdlib>
using namespace peglib;
using namespace std;
@ -22,36 +22,16 @@ using namespace std;
// FACTOR_OPERATOR <- [/*]
// NUMBER <- [0-9]+
//
class Calculator
int main(int argc, const char** argv)
{
public:
Calculator() {
const char* syntax =
" EXPRESSION <- TERM (TERM_OPERATOR TERM)* "
" TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* "
" FACTOR <- NUMBER / '(' EXPRESSION ')' "
" TERM_OPERATOR <- [-+] "
" FACTOR_OPERATOR <- [/*] "
" NUMBER <- [0-9]+ "
;
parser.load_syntax(syntax);
parser["EXPRESSION"] = reduce;
parser["TERM"] = reduce;
parser["TERM_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; };
parser["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; };
parser["NUMBER"] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
if (argc < 2 || string("--help") == argv[1]) {
cout << "usage: calc [formula]" << endl;
return 1;
}
bool execute(const char* s, long& v) const {
return parser.parse(s, v);
}
const char* s = argv[1];
private:
Parser parser;
static long reduce(const vector<Any>& v) {
auto reduce = [](const vector<Any>& v) -> long {
auto result = v[0].get<long>();
for (auto i = 1u; i < v.size(); i += 2) {
auto num = v[i + 1].get<long>();
@ -64,22 +44,27 @@ private:
}
}
return result;
}
};
};
int main(int argc, const char** argv)
{
if (argc < 2 || string("--help") == argv[1]) {
cout << "usage: calc [formula]" << endl;
return 1;
}
const char* syntax =
" EXPRESSION <- TERM (TERM_OPERATOR TERM)* "
" TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* "
" FACTOR <- NUMBER / '(' EXPRESSION ')' "
" TERM_OPERATOR <- [-+] "
" FACTOR_OPERATOR <- [/*] "
" NUMBER <- [0-9]+ "
;
const char* s = argv[1];
Parser parser = make_parser(syntax);
Calculator calc;
parser["EXPRESSION"] = reduce;
parser["TERM"] = reduce;
parser["TERM_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; };
parser["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; };
parser["NUMBER"] = [](const char* s, size_t l) { return atol(s); };
long val = 0;
if (calc.execute(s, val)) {
if (parser.parse(s, val)) {
cout << s << " = " << val << endl;
return 0;
}

View File

@ -7,7 +7,7 @@
#include <peglib.h>
#include <iostream>
#include <map>
#include <cstdlib>
using namespace peglib;
using namespace std;
@ -22,32 +22,16 @@ using namespace std;
// FACTOR_OPERATOR <- [/*]
// NUMBER <- [0-9]+
//
class Calculator
int main(int argc, const char** argv)
{
public:
Calculator() {
EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce;
TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce;
FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')')));
TERM_OPERATOR <= cls("+-"), [](const char* s, size_t l) { return (char)*s; };
FACTOR_OPERATOR <= cls("*/"), [](const char* s, size_t l) { return (char)*s; };
NUMBER <= oom(cls("0-9")), [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
if (argc < 2 || string("--help") == argv[1]) {
cout << "usage: calc [formula]" << endl;
return 1;
}
bool execute(const char* s, long& v) const {
Any val;
auto ret = EXPRESSION.parse(s, actions, val);
if (ret) {
v = val.get<long>();
}
return ret;
}
const char* s = argv[1];
private:
Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
SemanticActions<Any> actions;
static long reduce(const vector<Any>& v) {
auto reduce = [](const vector<Any>& v) -> long {
auto result = v[0].get<long>();
for (auto i = 1u; i < v.size(); i += 2) {
auto num = v[i + 1].get<long>();
@ -60,28 +44,23 @@ private:
}
}
return result;
}
};
};
int main(int argc, const char** argv)
{
if (argc < 2 || string("--help") == argv[1]) {
cout << "usage: calc [formula]" << endl;
return 1;
}
Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
const char* s = argv[1];
Calculator calc;
EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce;
TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce;
FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')')));
TERM_OPERATOR <= cls("+-"), [](const char* s, size_t l) { return (char)*s; };
FACTOR_OPERATOR <= cls("*/"), [](const char* s, size_t l) { return (char)*s; };
NUMBER <= oom(cls("0-9")), [](const char* s, size_t l) { return atol(s); };
long val = 0;
if (calc.execute(s, val)) {
if (EXPRESSION.parse(s, val)) {
cout << s << " = " << val << endl;
return 0;
}
cout << "syntax error..." << endl;
return -1;
}

931
peglib.h

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,13 @@
USE_CLANG = 1
ifdef USE_CLANG
CC = clang++
CCFLAGS = -std=c++11 -stdlib=libc++ -g
CCFLAGS = -std=c++1y -stdlib=libc++ -g
else
CC = g++
CCFLAGS = -std=c++1y -g
endif
all : test
./test

View File

@ -7,7 +7,6 @@
TEST_CASE("String capture test", "[general]")
{
{
auto parser = peglib::make_parser(
" ROOT <- _ ('[' TAG_NAME ']' _)* "
" TAG_NAME <- (!']' .)+ "
@ -27,9 +26,6 @@ TEST_CASE("String capture test", "[general]")
REQUIRE(tags[0] == "tag1");
REQUIRE(tags[1] == "tag:2");
REQUIRE(tags[2] == "tag-3");
}
REQUIRE(peglib::VARINT_COUNT == 0);
}
using namespace peglib;
@ -37,7 +33,6 @@ using namespace std;
TEST_CASE("String capture test2", "[general]")
{
{
vector<string> tags;
Definition ROOT, TAG, TAG_NAME, WS;
@ -53,14 +48,10 @@ TEST_CASE("String capture test2", "[general]")
REQUIRE(tags[0] == "tag1");
REQUIRE(tags[1] == "tag:2");
REQUIRE(tags[2] == "tag-3");
}
REQUIRE(VARINT_COUNT == 0);
}
TEST_CASE("String capture test with embedded match action", "[general]")
{
{
Definition ROOT, TAG, TAG_NAME, WS;
vector<string> tags;
@ -77,22 +68,15 @@ TEST_CASE("String capture test with embedded match action", "[general]")
REQUIRE(tags[0] == "tag1");
REQUIRE(tags[1] == "tag:2");
REQUIRE(tags[2] == "tag-3");
}
REQUIRE(VARINT_COUNT == 0);
}
TEST_CASE("Cyclic grammer test", "[general]")
{
{
Definition PARENT;
Definition CHILD;
PARENT <= seq(CHILD);
CHILD <= seq(PARENT);
}
REQUIRE(VARINT_COUNT == 0);
}
TEST_CASE("Lambda action test", "[general]")
@ -113,7 +97,6 @@ TEST_CASE("Lambda action test", "[general]")
TEST_CASE("Calculator test", "[general]")
{
{
// Construct grammer
Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
@ -139,120 +122,106 @@ TEST_CASE("Calculator test", "[general]")
return ret;
};
SemanticActions<Any> actions;
actions[EXPRESSION] = reduce;
actions[TERM] = reduce;
actions[TERM_OPERATOR] = [](const char* s, size_t l) { return *s; };
actions[FACTOR_OPERATOR] = [](const char* s, size_t l) { return *s; };
actions[NUMBER] = [&](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
EXPRESSION.action = reduce;
TERM.action = reduce;
TERM_OPERATOR.action = [](const char* s, size_t l) { return *s; };
FACTOR_OPERATOR.action = [](const char* s, size_t l) { return *s; };
NUMBER.action = [&](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
// Parse
Any val;
auto ret = EXPRESSION.parse("1+2*3*(4-5+6)/7-8", actions, val);
auto ret = EXPRESSION.parse("1+2*3*(4-5+6)/7-8", val);
REQUIRE(ret == true);
REQUIRE(val.get<long>() == -3);
}
REQUIRE(VARINT_COUNT == 0);
}
TEST_CASE("Calculator test2", "[general]")
{
{
// Parse syntax
auto syntax =
" # Grammar for Calculator...\n "
" EXPRESSION <- TERM (TERM_OPERATOR TERM)* "
" TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* "
" FACTOR <- NUMBER / '(' EXPRESSION ')' "
" TERM_OPERATOR <- [-+] "
" FACTOR_OPERATOR <- [/*] "
" NUMBER <- [0-9]+ "
;
// Parse syntax
auto syntax =
" # Grammar for Calculator...\n "
" EXPRESSION <- TERM (TERM_OPERATOR TERM)* "
" TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* "
" FACTOR <- NUMBER / '(' EXPRESSION ')' "
" TERM_OPERATOR <- [-+] "
" FACTOR_OPERATOR <- [/*] "
" NUMBER <- [0-9]+ "
;
string start;
auto grammar = make_grammar(syntax, start);
auto& g = *grammar;
string start;
auto grammar = make_grammar(syntax, start);
auto& g = *grammar;
// Setup actions
SemanticActions<Any> a;
auto reduce = [](const vector<Any>& v) -> long {
long ret = v[0].get<long>();
for (auto i = 1u; i < v.size(); i += 2) {
auto num = v[i + 1].get<long>();
switch (v[i].get<char>()) {
case '+': ret += num; break;
case '-': ret -= num; break;
case '*': ret *= num; break;
case '/': ret /= num; break;
}
// Setup actions
auto reduce = [](const vector<Any>& v) -> long {
long ret = v[0].get<long>();
for (auto i = 1u; i < v.size(); i += 2) {
auto num = v[i + 1].get<long>();
switch (v[i].get<char>()) {
case '+': ret += num; break;
case '-': ret -= num; break;
case '*': ret *= num; break;
case '/': ret /= num; break;
}
return ret;
};
}
return ret;
};
a[g["EXPRESSION"]] = reduce;
a[g["TERM"]] = reduce;
a[g["TERM_OPERATOR"]] = [](const char* s, size_t l) { return *s; };
a[g["FACTOR_OPERATOR"]] = [](const char* s, size_t l) { return *s; };
a[g["NUMBER"]] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
g["EXPRESSION"].action = reduce;
g["TERM"].action = reduce;
g["TERM_OPERATOR"].action = [](const char* s, size_t l) { return *s; };
g["FACTOR_OPERATOR"].action = [](const char* s, size_t l) { return *s; };
g["NUMBER"].action = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
// Parse
Any val;
auto ret = g[start].parse("1+2*3*(4-5+6)/7-8", a, val);
// Parse
Any val;
auto ret = g[start].parse("1+2*3*(4-5+6)/7-8", val);
REQUIRE(ret == true);
REQUIRE(val.get<long>() == -3);
}
REQUIRE(VARINT_COUNT == 0);
REQUIRE(ret == true);
REQUIRE(val.get<long>() == -3);
}
TEST_CASE("Calculator test3", "[general]")
{
{
// Parse syntax
auto parser = make_parser(
" # Grammar for Calculator...\n "
" EXPRESSION <- TERM (TERM_OPERATOR TERM)* "
" TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* "
" FACTOR <- NUMBER / '(' EXPRESSION ')' "
" TERM_OPERATOR <- [-+] "
" FACTOR_OPERATOR <- [/*] "
" NUMBER <- [0-9]+ "
);
// Parse syntax
auto parser = make_parser(
" # Grammar for Calculator...\n "
" EXPRESSION <- TERM (TERM_OPERATOR TERM)* "
" TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* "
" FACTOR <- NUMBER / '(' EXPRESSION ')' "
" TERM_OPERATOR <- [-+] "
" FACTOR_OPERATOR <- [/*] "
" NUMBER <- [0-9]+ "
);
auto reduce = [](const vector<Any>& v) -> long {
long ret = v[0].get<long>();
for (auto i = 1u; i < v.size(); i += 2) {
auto num = v[i + 1].get<long>();
switch (v[i].get<char>()) {
case '+': ret += num; break;
case '-': ret -= num; break;
case '*': ret *= num; break;
case '/': ret /= num; break;
}
auto reduce = [](const vector<Any>& v) -> long {
long ret = v[0].get<long>();
for (auto i = 1u; i < v.size(); i += 2) {
auto num = v[i + 1].get<long>();
switch (v[i].get<char>()) {
case '+': ret += num; break;
case '-': ret -= num; break;
case '*': ret *= num; break;
case '/': ret /= num; break;
}
return ret;
};
}
return ret;
};
// Setup actions
parser["EXPRESSION"] = reduce;
parser["TERM"] = reduce;
parser["TERM_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; };
parser["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; };
parser["NUMBER"] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
// Setup actions
parser["EXPRESSION"] = reduce;
parser["TERM"] = reduce;
parser["TERM_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; };
parser["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; };
parser["NUMBER"] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
// Parse
long val;
auto ret = parser.parse("1+2*3*(4-5+6)/7-8", val);
// Parse
long val;
auto ret = parser.parse("1+2*3*(4-5+6)/7-8", val);
REQUIRE(ret == true);
REQUIRE(val == -3);
}
REQUIRE(VARINT_COUNT == 0);
REQUIRE(ret == true);
REQUIRE(val == -3);
}
TEST_CASE("PEG Grammar", "[peg]")