Major refactoring.

This commit is contained in:
yhirose 2015-02-09 17:12:59 -05:00
parent 9cb926f7a0
commit d386f2f3b9
7 changed files with 446 additions and 844 deletions

View File

@ -112,46 +112,51 @@ Here are available user actions:
`const std::vector<std::string>& n` holds names of child definitions that could be helpful when we want to check what are the actual child definitions. `const std::vector<std::string>& n` holds names of child definitions that could be helpful when we want to check what are the actual child definitions.
Make a parser with parser operators and simple actions Make a parser with parser operators
------------------------------------------------------ -----------------------------------
Instead of makeing a parser by parsing PEG syntax text, we can also construct a parser by hand with *parser operators* and use the *simple action* method rather than the semantic action method. Here is an example: Instead of makeing a parser by parsing PEG syntax text, we can also construct a parser by hand with *parser operators*. Here is an example:
```c++ ```c++
using namespace peglib; using namespace peglib;
using namespace std; using namespace std;
vector<string> tags;
Definition ROOT, TAG_NAME, _; Definition ROOT, TAG_NAME, _;
ROOT = seq(_, zom(seq(chr('['), TAG_NAME, chr(']'), _))); ROOT = seq(_, zom(seq(chr('['), TAG_NAME, chr(']'), _)));
TAG_NAME = oom(seq(npd(chr(']')), any())); TAG_NAME = oom(seq(npd(chr(']')), any())), [&](const char* s, size_t l) { tags.push_back(string(s, l)); };
_ = zom(cls(" \t")); _ = zom(cls(" \t"));
vector<string> tags;
TAG_NAME.match = [&](const char* s, size_t l) {
tags.push_back(string(s, l));
};
auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] "); auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] ");
``` ```
In fact, the PEG parser generator is made with operators. You can see the code at `make_peg_grammar` function in `peglib.h`. It is also possible to specify a *string match action* with a *grp* operator. The string match action doesn't affect the resular semantic action behavior.
```c++
ROOT = seq(_, zom(seq(chr('['), grp(TAG_NAME, [&](const char* s, size_t l) { tags.push_back(string(s, l)); }), chr(']'), _)));
TAG_NAME = oom(seq(npd(chr(']')), any()));
_ = zom(cls(" \t"));
```
In fact, the PEG parser generator is made with the parser operators. You can see the code at `make_peg_grammar` function in `peglib.h`.
The following are available operators: The following are available operators:
| Description | Operator | | Operator | Description |
|:-------------------|:---------| |:---------|:-------------------|
| Sequence | seq | | seq | Sequence |
| Prioritized Choice | cho | | cho | Prioritized Choice |
| Grouping | grp | | grp | Grouping |
| Zero or More | zom | | zom | Zero or More |
| One or More | oom | | oom | One or More |
| Optional | opt | | opt | Optional |
| And predicate | apd | | apd | And predicate |
| Not predicate | npd | | npd | Not predicate |
| Literal string | lit | | lit | Literal string |
| Character class | cls | | cls | Character class |
| Character | chr | | chr | Character |
| Any character | any | | any | Any character |
Sample codes Sample codes
------------ ------------

View File

@ -5,7 +5,7 @@ ifdef USE_CLANG
CC = clang++ CC = clang++
CFLAGS = -std=c++1y -stdlib=libc++ -g CFLAGS = -std=c++1y -stdlib=libc++ -g
else else
CC = g++-4.9 CC = g++
CFLAGS = -std=c++1y -g CFLAGS = -std=c++1y -g
endif endif

View File

@ -7,7 +7,7 @@
#include <peglib.h> #include <peglib.h>
#include <iostream> #include <iostream>
#include <map> #include <cstdlib>
using namespace peglib; using namespace peglib;
using namespace std; using namespace std;
@ -22,36 +22,16 @@ using namespace std;
// FACTOR_OPERATOR <- [/*] // FACTOR_OPERATOR <- [/*]
// NUMBER <- [0-9]+ // NUMBER <- [0-9]+
// //
class Calculator int main(int argc, const char** argv)
{ {
public: if (argc < 2 || string("--help") == argv[1]) {
Calculator() { cout << "usage: calc [formula]" << endl;
const char* syntax = return 1;
" EXPRESSION <- TERM (TERM_OPERATOR TERM)* "
" TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* "
" FACTOR <- NUMBER / '(' EXPRESSION ')' "
" TERM_OPERATOR <- [-+] "
" FACTOR_OPERATOR <- [/*] "
" NUMBER <- [0-9]+ "
;
parser.load_syntax(syntax);
parser["EXPRESSION"] = reduce;
parser["TERM"] = reduce;
parser["TERM_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; };
parser["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; };
parser["NUMBER"] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
} }
bool execute(const char* s, long& v) const { const char* s = argv[1];
return parser.parse(s, v);
}
private: auto reduce = [](const vector<Any>& v) -> long {
Parser parser;
static long reduce(const vector<Any>& v) {
auto result = v[0].get<long>(); auto result = v[0].get<long>();
for (auto i = 1u; i < v.size(); i += 2) { for (auto i = 1u; i < v.size(); i += 2) {
auto num = v[i + 1].get<long>(); auto num = v[i + 1].get<long>();
@ -64,22 +44,27 @@ private:
} }
} }
return result; return result;
} };
};
int main(int argc, const char** argv) const char* syntax =
{ " EXPRESSION <- TERM (TERM_OPERATOR TERM)* "
if (argc < 2 || string("--help") == argv[1]) { " TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* "
cout << "usage: calc [formula]" << endl; " FACTOR <- NUMBER / '(' EXPRESSION ')' "
return 1; " TERM_OPERATOR <- [-+] "
} " FACTOR_OPERATOR <- [/*] "
" NUMBER <- [0-9]+ "
;
const char* s = argv[1]; Parser parser = make_parser(syntax);
Calculator calc; parser["EXPRESSION"] = reduce;
parser["TERM"] = reduce;
parser["TERM_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; };
parser["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; };
parser["NUMBER"] = [](const char* s, size_t l) { return atol(s); };
long val = 0; long val = 0;
if (calc.execute(s, val)) { if (parser.parse(s, val)) {
cout << s << " = " << val << endl; cout << s << " = " << val << endl;
return 0; return 0;
} }

View File

@ -7,7 +7,7 @@
#include <peglib.h> #include <peglib.h>
#include <iostream> #include <iostream>
#include <map> #include <cstdlib>
using namespace peglib; using namespace peglib;
using namespace std; using namespace std;
@ -22,32 +22,16 @@ using namespace std;
// FACTOR_OPERATOR <- [/*] // FACTOR_OPERATOR <- [/*]
// NUMBER <- [0-9]+ // NUMBER <- [0-9]+
// //
class Calculator int main(int argc, const char** argv)
{ {
public: if (argc < 2 || string("--help") == argv[1]) {
Calculator() { cout << "usage: calc [formula]" << endl;
EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce; return 1;
TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce;
FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')')));
TERM_OPERATOR <= cls("+-"), [](const char* s, size_t l) { return (char)*s; };
FACTOR_OPERATOR <= cls("*/"), [](const char* s, size_t l) { return (char)*s; };
NUMBER <= oom(cls("0-9")), [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
} }
bool execute(const char* s, long& v) const { const char* s = argv[1];
Any val;
auto ret = EXPRESSION.parse(s, actions, val);
if (ret) {
v = val.get<long>();
}
return ret;
}
private: auto reduce = [](const vector<Any>& v) -> long {
Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
SemanticActions<Any> actions;
static long reduce(const vector<Any>& v) {
auto result = v[0].get<long>(); auto result = v[0].get<long>();
for (auto i = 1u; i < v.size(); i += 2) { for (auto i = 1u; i < v.size(); i += 2) {
auto num = v[i + 1].get<long>(); auto num = v[i + 1].get<long>();
@ -60,28 +44,23 @@ private:
} }
} }
return result; return result;
} };
};
int main(int argc, const char** argv) Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
{
if (argc < 2 || string("--help") == argv[1]) {
cout << "usage: calc [formula]" << endl;
return 1;
}
const char* s = argv[1]; EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce;
TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce;
Calculator calc; FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')')));
TERM_OPERATOR <= cls("+-"), [](const char* s, size_t l) { return (char)*s; };
FACTOR_OPERATOR <= cls("*/"), [](const char* s, size_t l) { return (char)*s; };
NUMBER <= oom(cls("0-9")), [](const char* s, size_t l) { return atol(s); };
long val = 0; long val = 0;
if (calc.execute(s, val)) { if (EXPRESSION.parse(s, val)) {
cout << s << " = " << val << endl; cout << s << " = " << val << endl;
return 0; return 0;
} }
cout << "syntax error..." << endl;
return -1; return -1;
} }

931
peglib.h

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,13 @@
USE_CLANG = 1
ifdef USE_CLANG
CC = clang++ CC = clang++
CCFLAGS = -std=c++11 -stdlib=libc++ -g CCFLAGS = -std=c++1y -stdlib=libc++ -g
else
CC = g++
CCFLAGS = -std=c++1y -g
endif
all : test all : test
./test ./test

View File

@ -7,7 +7,6 @@
TEST_CASE("String capture test", "[general]") TEST_CASE("String capture test", "[general]")
{ {
{
auto parser = peglib::make_parser( auto parser = peglib::make_parser(
" ROOT <- _ ('[' TAG_NAME ']' _)* " " ROOT <- _ ('[' TAG_NAME ']' _)* "
" TAG_NAME <- (!']' .)+ " " TAG_NAME <- (!']' .)+ "
@ -27,9 +26,6 @@ TEST_CASE("String capture test", "[general]")
REQUIRE(tags[0] == "tag1"); REQUIRE(tags[0] == "tag1");
REQUIRE(tags[1] == "tag:2"); REQUIRE(tags[1] == "tag:2");
REQUIRE(tags[2] == "tag-3"); REQUIRE(tags[2] == "tag-3");
}
REQUIRE(peglib::VARINT_COUNT == 0);
} }
using namespace peglib; using namespace peglib;
@ -37,7 +33,6 @@ using namespace std;
TEST_CASE("String capture test2", "[general]") TEST_CASE("String capture test2", "[general]")
{ {
{
vector<string> tags; vector<string> tags;
Definition ROOT, TAG, TAG_NAME, WS; Definition ROOT, TAG, TAG_NAME, WS;
@ -53,14 +48,10 @@ TEST_CASE("String capture test2", "[general]")
REQUIRE(tags[0] == "tag1"); REQUIRE(tags[0] == "tag1");
REQUIRE(tags[1] == "tag:2"); REQUIRE(tags[1] == "tag:2");
REQUIRE(tags[2] == "tag-3"); REQUIRE(tags[2] == "tag-3");
}
REQUIRE(VARINT_COUNT == 0);
} }
TEST_CASE("String capture test with embedded match action", "[general]") TEST_CASE("String capture test with embedded match action", "[general]")
{ {
{
Definition ROOT, TAG, TAG_NAME, WS; Definition ROOT, TAG, TAG_NAME, WS;
vector<string> tags; vector<string> tags;
@ -77,22 +68,15 @@ TEST_CASE("String capture test with embedded match action", "[general]")
REQUIRE(tags[0] == "tag1"); REQUIRE(tags[0] == "tag1");
REQUIRE(tags[1] == "tag:2"); REQUIRE(tags[1] == "tag:2");
REQUIRE(tags[2] == "tag-3"); REQUIRE(tags[2] == "tag-3");
}
REQUIRE(VARINT_COUNT == 0);
} }
TEST_CASE("Cyclic grammer test", "[general]") TEST_CASE("Cyclic grammer test", "[general]")
{ {
{
Definition PARENT; Definition PARENT;
Definition CHILD; Definition CHILD;
PARENT <= seq(CHILD); PARENT <= seq(CHILD);
CHILD <= seq(PARENT); CHILD <= seq(PARENT);
}
REQUIRE(VARINT_COUNT == 0);
} }
TEST_CASE("Lambda action test", "[general]") TEST_CASE("Lambda action test", "[general]")
@ -113,7 +97,6 @@ TEST_CASE("Lambda action test", "[general]")
TEST_CASE("Calculator test", "[general]") TEST_CASE("Calculator test", "[general]")
{ {
{
// Construct grammer // Construct grammer
Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER; Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
@ -139,120 +122,106 @@ TEST_CASE("Calculator test", "[general]")
return ret; return ret;
}; };
SemanticActions<Any> actions; EXPRESSION.action = reduce;
actions[EXPRESSION] = reduce; TERM.action = reduce;
actions[TERM] = reduce; TERM_OPERATOR.action = [](const char* s, size_t l) { return *s; };
actions[TERM_OPERATOR] = [](const char* s, size_t l) { return *s; }; FACTOR_OPERATOR.action = [](const char* s, size_t l) { return *s; };
actions[FACTOR_OPERATOR] = [](const char* s, size_t l) { return *s; }; NUMBER.action = [&](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
actions[NUMBER] = [&](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
// Parse // Parse
Any val; Any val;
auto ret = EXPRESSION.parse("1+2*3*(4-5+6)/7-8", actions, val); auto ret = EXPRESSION.parse("1+2*3*(4-5+6)/7-8", val);
REQUIRE(ret == true); REQUIRE(ret == true);
REQUIRE(val.get<long>() == -3); REQUIRE(val.get<long>() == -3);
}
REQUIRE(VARINT_COUNT == 0);
} }
TEST_CASE("Calculator test2", "[general]") TEST_CASE("Calculator test2", "[general]")
{ {
{ // Parse syntax
// Parse syntax auto syntax =
auto syntax = " # Grammar for Calculator...\n "
" # Grammar for Calculator...\n " " EXPRESSION <- TERM (TERM_OPERATOR TERM)* "
" EXPRESSION <- TERM (TERM_OPERATOR TERM)* " " TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* "
" TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* " " FACTOR <- NUMBER / '(' EXPRESSION ')' "
" FACTOR <- NUMBER / '(' EXPRESSION ')' " " TERM_OPERATOR <- [-+] "
" TERM_OPERATOR <- [-+] " " FACTOR_OPERATOR <- [/*] "
" FACTOR_OPERATOR <- [/*] " " NUMBER <- [0-9]+ "
" NUMBER <- [0-9]+ " ;
;
string start; string start;
auto grammar = make_grammar(syntax, start); auto grammar = make_grammar(syntax, start);
auto& g = *grammar; auto& g = *grammar;
// Setup actions // Setup actions
SemanticActions<Any> a; auto reduce = [](const vector<Any>& v) -> long {
long ret = v[0].get<long>();
auto reduce = [](const vector<Any>& v) -> long { for (auto i = 1u; i < v.size(); i += 2) {
long ret = v[0].get<long>(); auto num = v[i + 1].get<long>();
for (auto i = 1u; i < v.size(); i += 2) { switch (v[i].get<char>()) {
auto num = v[i + 1].get<long>(); case '+': ret += num; break;
switch (v[i].get<char>()) { case '-': ret -= num; break;
case '+': ret += num; break; case '*': ret *= num; break;
case '-': ret -= num; break; case '/': ret /= num; break;
case '*': ret *= num; break;
case '/': ret /= num; break;
}
} }
return ret; }
}; return ret;
};
a[g["EXPRESSION"]] = reduce; g["EXPRESSION"].action = reduce;
a[g["TERM"]] = reduce; g["TERM"].action = reduce;
a[g["TERM_OPERATOR"]] = [](const char* s, size_t l) { return *s; }; g["TERM_OPERATOR"].action = [](const char* s, size_t l) { return *s; };
a[g["FACTOR_OPERATOR"]] = [](const char* s, size_t l) { return *s; }; g["FACTOR_OPERATOR"].action = [](const char* s, size_t l) { return *s; };
a[g["NUMBER"]] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); }; g["NUMBER"].action = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
// Parse // Parse
Any val; Any val;
auto ret = g[start].parse("1+2*3*(4-5+6)/7-8", a, val); auto ret = g[start].parse("1+2*3*(4-5+6)/7-8", val);
REQUIRE(ret == true); REQUIRE(ret == true);
REQUIRE(val.get<long>() == -3); REQUIRE(val.get<long>() == -3);
}
REQUIRE(VARINT_COUNT == 0);
} }
TEST_CASE("Calculator test3", "[general]") TEST_CASE("Calculator test3", "[general]")
{ {
{ // Parse syntax
// Parse syntax auto parser = make_parser(
auto parser = make_parser( " # Grammar for Calculator...\n "
" # Grammar for Calculator...\n " " EXPRESSION <- TERM (TERM_OPERATOR TERM)* "
" EXPRESSION <- TERM (TERM_OPERATOR TERM)* " " TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* "
" TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* " " FACTOR <- NUMBER / '(' EXPRESSION ')' "
" FACTOR <- NUMBER / '(' EXPRESSION ')' " " TERM_OPERATOR <- [-+] "
" TERM_OPERATOR <- [-+] " " FACTOR_OPERATOR <- [/*] "
" FACTOR_OPERATOR <- [/*] " " NUMBER <- [0-9]+ "
" NUMBER <- [0-9]+ " );
);
auto reduce = [](const vector<Any>& v) -> long { auto reduce = [](const vector<Any>& v) -> long {
long ret = v[0].get<long>(); long ret = v[0].get<long>();
for (auto i = 1u; i < v.size(); i += 2) { for (auto i = 1u; i < v.size(); i += 2) {
auto num = v[i + 1].get<long>(); auto num = v[i + 1].get<long>();
switch (v[i].get<char>()) { switch (v[i].get<char>()) {
case '+': ret += num; break; case '+': ret += num; break;
case '-': ret -= num; break; case '-': ret -= num; break;
case '*': ret *= num; break; case '*': ret *= num; break;
case '/': ret /= num; break; case '/': ret /= num; break;
}
} }
return ret; }
}; return ret;
};
// Setup actions // Setup actions
parser["EXPRESSION"] = reduce; parser["EXPRESSION"] = reduce;
parser["TERM"] = reduce; parser["TERM"] = reduce;
parser["TERM_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; }; parser["TERM_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; };
parser["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; }; parser["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; };
parser["NUMBER"] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); }; parser["NUMBER"] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
// Parse // Parse
long val; long val;
auto ret = parser.parse("1+2*3*(4-5+6)/7-8", val); auto ret = parser.parse("1+2*3*(4-5+6)/7-8", val);
REQUIRE(ret == true); REQUIRE(ret == true);
REQUIRE(val == -3); REQUIRE(val == -3);
}
REQUIRE(VARINT_COUNT == 0);
} }
TEST_CASE("PEG Grammar", "[peg]") TEST_CASE("PEG Grammar", "[peg]")