mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2025-01-22 13:25:30 +00:00
Work in progress.
This commit is contained in:
parent
56304269d0
commit
9cb926f7a0
@ -26,18 +26,12 @@ class Calculator
|
||||
{
|
||||
public:
|
||||
Calculator() {
|
||||
EXPRESSION = seq(TERM, zom(seq(TERM_OPERATOR, TERM)));
|
||||
TERM = seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR)));
|
||||
FACTOR = cho(NUMBER, seq(chr('('), EXPRESSION, chr(')')));
|
||||
TERM_OPERATOR = cls("+-");
|
||||
FACTOR_OPERATOR = cls("*/");
|
||||
NUMBER = oom(cls("0-9"));
|
||||
|
||||
actions[EXPRESSION] = reduce;
|
||||
actions[TERM] = reduce;
|
||||
actions[TERM_OPERATOR] = [](const char* s, size_t l) { return (char)*s; };
|
||||
actions[FACTOR_OPERATOR] = [](const char* s, size_t l) { return (char)*s; };
|
||||
actions[NUMBER] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
|
||||
EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce;
|
||||
TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce;
|
||||
FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')')));
|
||||
TERM_OPERATOR <= cls("+-"), [](const char* s, size_t l) { return (char)*s; };
|
||||
FACTOR_OPERATOR <= cls("*/"), [](const char* s, size_t l) { return (char)*s; };
|
||||
NUMBER <= oom(cls("0-9")), [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
|
||||
}
|
||||
|
||||
bool execute(const char* s, long& v) const {
|
||||
|
148
peglib.h
148
peglib.h
@ -553,12 +553,14 @@ class Grouping
|
||||
{
|
||||
public:
|
||||
Grouping(const std::shared_ptr<Rule>& rule) : rule_(rule) {}
|
||||
Grouping(const std::shared_ptr<Rule>& rule, std::function<void(const char* s, size_t l)> match) : rule_(rule), match_(match) {}
|
||||
|
||||
template <typename T>
|
||||
Match parse(const char* s, size_t l, const SemanticActions<T>* sa, SemanticValues<T>* sv) const;
|
||||
|
||||
private:
|
||||
std::shared_ptr<Rule> rule_;
|
||||
std::shared_ptr<Rule> rule_;
|
||||
std::function<void(const char* s, size_t l)> match_;
|
||||
};
|
||||
|
||||
class NonTerminal
|
||||
@ -892,8 +894,9 @@ public:
|
||||
return static_cast<void*>(this);
|
||||
}
|
||||
|
||||
void operator=(const std::shared_ptr<Rule>& rule) {
|
||||
Definition& operator<=(const std::shared_ptr<Rule>& rule) {
|
||||
set_rule(rule);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -915,8 +918,21 @@ public:
|
||||
return parse(s, strlen(s), sa, val);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool parse(const char* s, size_t l, T& val) const {
|
||||
SemanticValues<Any> sv;
|
||||
auto m = rule_->parse<Any>(s, l, nullptr, &sv);
|
||||
return m.ret && m.len == l;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool parse(const char* s, T& val) const {
|
||||
return parse(s, strlen(s), val);
|
||||
}
|
||||
|
||||
bool parse(const char* s, size_t l) const {
|
||||
auto m = rule_->parse<int>(s, l, nullptr, nullptr);
|
||||
SemanticValues<Any> sv;
|
||||
auto m = rule_->parse<Any>(s, l, nullptr, &sv);
|
||||
return m.ret && m.len == l;
|
||||
}
|
||||
|
||||
@ -928,6 +944,12 @@ public:
|
||||
|
||||
std::function<void (const char* s, size_t l)> match;
|
||||
|
||||
template <typename F>
|
||||
void operator,(F fn) {
|
||||
action = fn;
|
||||
}
|
||||
SemanticActionAdaptor<Any> action;
|
||||
|
||||
private:
|
||||
friend class DefinitionReference;
|
||||
|
||||
@ -1027,7 +1049,11 @@ Match NotPredicate::parse(const char* s, size_t l, const SemanticActions<T>* sa,
|
||||
template <typename T>
|
||||
Match Grouping::parse(const char* s, size_t l, const SemanticActions<T>* sa, SemanticValues<T>* sv) const {
|
||||
assert(rule_);
|
||||
return rule_->parse<T>(s, l, sa, sv);
|
||||
auto m = rule_->parse<T>(s, l, sa, sv);
|
||||
if (m.ret && match_) {
|
||||
match_(s, m.len);
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -1047,17 +1073,19 @@ Match NonTerminal::parse(const char* s, size_t l, const SemanticActions<T>* sa,
|
||||
outer_->match(s, m.len);
|
||||
}
|
||||
|
||||
typedef std::function<T (const char* s, size_t l, const std::vector<T>& v, const std::vector<std::string>& n)> Action;
|
||||
Action action;
|
||||
|
||||
if (sa) {
|
||||
auto it = sa->find(outer_);
|
||||
if (it != sa->end()) {
|
||||
action = it->second;
|
||||
}
|
||||
}
|
||||
|
||||
if (sv) {
|
||||
typedef std::function<T (const char* s, size_t l, const std::vector<T>& v, const std::vector<std::string>& n)> Action;
|
||||
Action action;
|
||||
|
||||
if (outer_->action) {
|
||||
action = outer_->action;
|
||||
} else if (sa) {
|
||||
auto it = sa->find(outer_);
|
||||
if (it != sa->end()) {
|
||||
action = it->second;
|
||||
}
|
||||
}
|
||||
|
||||
sv->names.push_back(outer_->name);
|
||||
auto val = reduce<T>(s, m.len, chldsv->values, chldsv->names, action);
|
||||
sv->values.push_back(val);
|
||||
@ -1160,6 +1188,10 @@ inline std::shared_ptr<Rule> grp(const std::shared_ptr<Rule>& rule) {
|
||||
return std::make_shared<Rule>(Grouping(rule));
|
||||
}
|
||||
|
||||
inline std::shared_ptr<Rule> grp(const std::shared_ptr<Rule>& rule, std::function<void (const char* s, size_t l)> match) {
|
||||
return std::make_shared<Rule>(Grouping(rule, match));
|
||||
}
|
||||
|
||||
inline std::shared_ptr<Rule> ref(const std::map<std::string, Definition>& grammar, const std::string& name) {
|
||||
return std::make_shared<Rule>(DefinitionReference(grammar, name));
|
||||
}
|
||||
@ -1175,52 +1207,52 @@ Grammar make_peg_grammar()
|
||||
Grammar g;
|
||||
|
||||
// Setup PEG syntax parser
|
||||
g["Grammar"] = seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]);
|
||||
g["Definition"] = seq(g["Identifier"], g["LEFTARROW"], g["Expression"]);
|
||||
g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]);
|
||||
g["Definition"] <= seq(g["Identifier"], g["LEFTARROW"], g["Expression"]);
|
||||
|
||||
g["Expression"] = seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"])));
|
||||
g["Sequence"] = zom(g["Prefix"]);
|
||||
g["Prefix"] = seq(opt(cho(g["AND"], g["NOT"])), g["Suffix"]);
|
||||
g["Suffix"] = seq(g["Primary"], opt(cho(g["QUESTION"], g["STAR"], g["PLUS"])));
|
||||
g["Primary"] = cho(seq(g["Identifier"], npd(g["LEFTARROW"])),
|
||||
seq(g["OPEN"], g["Expression"], g["CLOSE"]),
|
||||
g["Literal"], g["Class"], g["DOT"]);
|
||||
g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"])));
|
||||
g["Sequence"] <= zom(g["Prefix"]);
|
||||
g["Prefix"] <= seq(opt(cho(g["AND"], g["NOT"])), g["Suffix"]);
|
||||
g["Suffix"] <= seq(g["Primary"], opt(cho(g["QUESTION"], g["STAR"], g["PLUS"])));
|
||||
g["Primary"] <= cho(seq(g["Identifier"], npd(g["LEFTARROW"])),
|
||||
seq(g["OPEN"], g["Expression"], g["CLOSE"]),
|
||||
g["Literal"], g["Class"], g["DOT"]);
|
||||
|
||||
g["Identifier"] = seq(g["IdentifierContent"], g["Spacing"]);
|
||||
g["IdentifierContent"] = seq(g["IdentStart"], zom(g["IdentCont"]));
|
||||
g["IdentStart"] = cls("a-zA-Z_");
|
||||
g["IdentCont"] = cho(g["IdentStart"], cls("0-9"));
|
||||
g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
|
||||
g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"]));
|
||||
g["IdentStart"] <= cls("a-zA-Z_");
|
||||
g["IdentRest"] <= cho(g["IdentStart"], cls("0-9"));
|
||||
|
||||
g["Literal"] = cho(seq(cls("'"), g["SingleQuotesContent"], cls("'"), g["Spacing"]),
|
||||
seq(cls("\""), g["DoubleQuotesContent"], cls("\""), g["Spacing"]));
|
||||
g["SingleQuotesContent"] = zom(seq(npd(cls("'")), g["Char"]));
|
||||
g["DoubleQuotesContent"] = zom(seq(npd(cls("\"")), g["Char"]));
|
||||
g["Literal"] <= cho(seq(cls("'"), g["SQCont"], cls("'"), g["Spacing"]),
|
||||
seq(cls("\""), g["DQCont"], cls("\""), g["Spacing"]));
|
||||
g["SQCont"] <= zom(seq(npd(cls("'")), g["Char"]));
|
||||
g["DQCont"] <= zom(seq(npd(cls("\"")), g["Char"]));
|
||||
|
||||
g["Class"] = seq(chr('['), g["ClassContent"], chr(']'), g["Spacing"]);
|
||||
g["ClassContent"] = zom(seq(npd(chr(']')), g["Range"]));
|
||||
g["Class"] <= seq(chr('['), g["ClassCont"], chr(']'), g["Spacing"]);
|
||||
g["ClassCont"] <= zom(seq(npd(chr(']')), g["Range"]));
|
||||
|
||||
g["Range"] = cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]);
|
||||
g["Char"] = cho(seq(chr('\\'), cls("nrt'\"[]\\")),
|
||||
seq(chr('\\'), cls("0-2"), cls("0-7"), cls("0-7")),
|
||||
seq(chr('\\'), cls("0-7"), opt(cls("0-7"))),
|
||||
seq(npd(chr('\\')), any()));
|
||||
g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]);
|
||||
g["Char"] <= cho(seq(chr('\\'), cls("nrt'\"[]\\")),
|
||||
seq(chr('\\'), cls("0-2"), cls("0-7"), cls("0-7")),
|
||||
seq(chr('\\'), cls("0-7"), opt(cls("0-7"))),
|
||||
seq(npd(chr('\\')), any()));
|
||||
|
||||
g["LEFTARROW"] = seq(lit("<-"), g["Spacing"]);
|
||||
g["SLASH"] = seq(chr('/'), g["Spacing"]);
|
||||
g["AND"] = seq(chr('&'), g["Spacing"]);
|
||||
g["NOT"] = seq(chr('!'), g["Spacing"]);
|
||||
g["QUESTION"] = seq(chr('?'), g["Spacing"]);
|
||||
g["STAR"] = seq(chr('*'), g["Spacing"]);
|
||||
g["PLUS"] = seq(chr('+'), g["Spacing"]);
|
||||
g["OPEN"] = seq(chr('('), g["Spacing"]);
|
||||
g["CLOSE"] = seq(chr(')'), g["Spacing"]);
|
||||
g["DOT"] = seq(chr('.'), g["Spacing"]);
|
||||
g["LEFTARROW"] <= seq(lit("<-"), g["Spacing"]);
|
||||
g["SLASH"] <= seq(chr('/'), g["Spacing"]);
|
||||
g["AND"] <= seq(chr('&'), g["Spacing"]);
|
||||
g["NOT"] <= seq(chr('!'), g["Spacing"]);
|
||||
g["QUESTION"] <= seq(chr('?'), g["Spacing"]);
|
||||
g["STAR"] <= seq(chr('*'), g["Spacing"]);
|
||||
g["PLUS"] <= seq(chr('+'), g["Spacing"]);
|
||||
g["OPEN"] <= seq(chr('('), g["Spacing"]);
|
||||
g["CLOSE"] <= seq(chr(')'), g["Spacing"]);
|
||||
g["DOT"] <= seq(chr('.'), g["Spacing"]);
|
||||
|
||||
g["Spacing"] = zom(cho(g["Space"], g["Comment"]));
|
||||
g["Comment"] = seq(chr('#'), zom(seq(npd(g["EndOfLine"]), any())), g["EndOfLine"]);
|
||||
g["Space"] = cho(chr(' '), chr('\t'), g["EndOfLine"]);
|
||||
g["EndOfLine"] = cho(lit("\r\n"), chr('\n'), chr('\r'));
|
||||
g["EndOfFile"] = npd(any());
|
||||
g["Spacing"] <= zom(cho(g["Space"], g["Comment"]));
|
||||
g["Comment"] <= seq(chr('#'), zom(seq(npd(g["EndOfLine"]), any())), g["EndOfLine"]);
|
||||
g["Space"] <= cho(chr(' '), chr('\t'), g["EndOfLine"]);
|
||||
g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r'));
|
||||
g["EndOfFile"] <= npd(any());
|
||||
|
||||
// Set definition names
|
||||
for (auto& x: g) {
|
||||
@ -1244,7 +1276,7 @@ std::shared_ptr<Grammar> make_grammar(const char* syntax, std::string& start)
|
||||
|
||||
sa[peg["Definition"]] = [&](const std::vector<Any>& v) {
|
||||
const auto& name = v[0].get<std::string>();
|
||||
(*grammar)[name] = v[2].get<std::shared_ptr<Rule>>();
|
||||
(*grammar)[name] <= v[2].get<std::shared_ptr<Rule>>();
|
||||
(*grammar)[name].name = name;
|
||||
|
||||
if (start.empty()) {
|
||||
@ -1315,24 +1347,24 @@ std::shared_ptr<Grammar> make_grammar(const char* syntax, std::string& start)
|
||||
}
|
||||
};
|
||||
|
||||
sa[peg["IdentifierContent"]] = [](const char*s, size_t l) {
|
||||
sa[peg["IdentCont"]] = [](const char*s, size_t l) {
|
||||
return std::string(s, l);
|
||||
};
|
||||
|
||||
sa[peg["Literal"]] = [](const std::vector<Any>& v) {
|
||||
return lit(v[0].get<std::string>().c_str());
|
||||
};
|
||||
sa[peg["SingleQuotesContent"]] = [](const char*s, size_t l) {
|
||||
sa[peg["SQCont"]] = [](const char*s, size_t l) {
|
||||
return std::string(s, l);
|
||||
};
|
||||
sa[peg["DoubleQuotesContent"]] = [](const char*s, size_t l) {
|
||||
sa[peg["DQCont"]] = [](const char*s, size_t l) {
|
||||
return std::string(s, l);
|
||||
};
|
||||
|
||||
sa[peg["Class"]] = [](const std::vector<Any>& v) {
|
||||
return cls(v[0].get<std::string>().c_str());
|
||||
};
|
||||
sa[peg["ClassContent"]] = [](const char*s, size_t l) {
|
||||
sa[peg["ClassCont"]] = [](const char*s, size_t l) {
|
||||
return std::string(s, l);
|
||||
};
|
||||
|
||||
|
78
test/test.cc
78
test/test.cc
@ -35,21 +35,40 @@ TEST_CASE("String capture test", "[general]")
|
||||
using namespace peglib;
|
||||
using namespace std;
|
||||
|
||||
TEST_CASE("String capture test with match", "[general]")
|
||||
TEST_CASE("String capture test2", "[general]")
|
||||
{
|
||||
{
|
||||
vector<string> tags;
|
||||
|
||||
Definition ROOT, TAG, TAG_NAME, WS;
|
||||
ROOT <= seq(WS, zom(TAG));
|
||||
TAG <= seq(chr('['), TAG_NAME, chr(']'), WS);
|
||||
TAG_NAME <= oom(seq(npd(chr(']')), any())), [&](const char* s, size_t l) { tags.push_back(string(s, l)); };
|
||||
WS <= zom(cls(" \t"));
|
||||
|
||||
auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] ");
|
||||
|
||||
REQUIRE(ret == true);
|
||||
REQUIRE(tags.size() == 3);
|
||||
REQUIRE(tags[0] == "tag1");
|
||||
REQUIRE(tags[1] == "tag:2");
|
||||
REQUIRE(tags[2] == "tag-3");
|
||||
}
|
||||
|
||||
REQUIRE(VARINT_COUNT == 0);
|
||||
}
|
||||
|
||||
TEST_CASE("String capture test with embedded match action", "[general]")
|
||||
{
|
||||
{
|
||||
Definition ROOT, TAG, TAG_NAME, WS;
|
||||
|
||||
ROOT = seq(WS, zom(TAG));
|
||||
TAG = seq(chr('['), TAG_NAME, chr(']'), WS);
|
||||
TAG_NAME = oom(seq(npd(chr(']')), any()));
|
||||
WS = zom(cls(" \t"));
|
||||
|
||||
vector<string> tags;
|
||||
|
||||
TAG_NAME.match = [&](const char* s, size_t l) {
|
||||
tags.push_back(string(s, l));
|
||||
};
|
||||
ROOT <= seq(WS, zom(TAG));
|
||||
TAG <= seq(chr('['), grp(TAG_NAME, [&](const char* s, size_t l) { tags.push_back(string(s, l)); }), chr(']'), WS);
|
||||
TAG_NAME <= oom(seq(npd(chr(']')), any()));
|
||||
WS <= zom(cls(" \t"));
|
||||
|
||||
auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] ");
|
||||
|
||||
@ -69,8 +88,8 @@ TEST_CASE("Cyclic grammer test", "[general]")
|
||||
Definition PARENT;
|
||||
Definition CHILD;
|
||||
|
||||
PARENT = seq(CHILD);
|
||||
CHILD = seq(PARENT);
|
||||
PARENT <= seq(CHILD);
|
||||
CHILD <= seq(PARENT);
|
||||
}
|
||||
|
||||
REQUIRE(VARINT_COUNT == 0);
|
||||
@ -98,16 +117,14 @@ TEST_CASE("Calculator test", "[general]")
|
||||
// Construct grammer
|
||||
Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER;
|
||||
|
||||
EXPRESSION = seq(TERM, zom(seq(TERM_OPERATOR, TERM)));
|
||||
TERM = seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR)));
|
||||
FACTOR = cho(NUMBER, seq(chr('('), EXPRESSION, chr(')')));
|
||||
TERM_OPERATOR = cls("+-");
|
||||
FACTOR_OPERATOR = cls("*/");
|
||||
NUMBER = oom(cls("0-9"));
|
||||
EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM)));
|
||||
TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR)));
|
||||
FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')')));
|
||||
TERM_OPERATOR <= cls("+-");
|
||||
FACTOR_OPERATOR <= cls("*/");
|
||||
NUMBER <= oom(cls("0-9"));
|
||||
|
||||
// Setup actions
|
||||
SemanticActions<Any> actions;
|
||||
|
||||
auto reduce = [](const vector<Any>& v) -> long {
|
||||
long ret = v[0].get<long>();
|
||||
for (auto i = 1u; i < v.size(); i += 2) {
|
||||
@ -122,6 +139,7 @@ TEST_CASE("Calculator test", "[general]")
|
||||
return ret;
|
||||
};
|
||||
|
||||
SemanticActions<Any> actions;
|
||||
actions[EXPRESSION] = reduce;
|
||||
actions[TERM] = reduce;
|
||||
actions[TERM_OPERATOR] = [](const char* s, size_t l) { return *s; };
|
||||
@ -174,11 +192,11 @@ TEST_CASE("Calculator test2", "[general]")
|
||||
return ret;
|
||||
};
|
||||
|
||||
a[g["EXPRESSION"]] = reduce;
|
||||
a[g["TERM"]] = reduce;
|
||||
a[g["TERM_OPERATOR"]] = [](const char* s, size_t l) { return *s; };
|
||||
a[g["EXPRESSION"]] = reduce;
|
||||
a[g["TERM"]] = reduce;
|
||||
a[g["TERM_OPERATOR"]] = [](const char* s, size_t l) { return *s; };
|
||||
a[g["FACTOR_OPERATOR"]] = [](const char* s, size_t l) { return *s; };
|
||||
a[g["NUMBER"]] = [&](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
|
||||
a[g["NUMBER"]] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); };
|
||||
|
||||
// Parse
|
||||
Any val;
|
||||
@ -336,15 +354,15 @@ TEST_CASE("PEG IdentStart", "[peg]")
|
||||
REQUIRE(g["IdentStart"].parse("0") == false);
|
||||
}
|
||||
|
||||
TEST_CASE("PEG IdentCont", "[peg]")
|
||||
TEST_CASE("PEG IdentRest", "[peg]")
|
||||
{
|
||||
Grammar g = make_peg_grammar();
|
||||
REQUIRE(g["IdentCont"].parse("_") == true);
|
||||
REQUIRE(g["IdentCont"].parse("a") == true);
|
||||
REQUIRE(g["IdentCont"].parse("Z") == true);
|
||||
REQUIRE(g["IdentCont"].parse("") == false);
|
||||
REQUIRE(g["IdentCont"].parse(" ") == false);
|
||||
REQUIRE(g["IdentCont"].parse("0") == true);
|
||||
REQUIRE(g["IdentRest"].parse("_") == true);
|
||||
REQUIRE(g["IdentRest"].parse("a") == true);
|
||||
REQUIRE(g["IdentRest"].parse("Z") == true);
|
||||
REQUIRE(g["IdentRest"].parse("") == false);
|
||||
REQUIRE(g["IdentRest"].parse(" ") == false);
|
||||
REQUIRE(g["IdentRest"].parse("0") == true);
|
||||
}
|
||||
|
||||
TEST_CASE("PEG Literal", "[peg]")
|
||||
|
Loading…
Reference in New Issue
Block a user