From 9cb926f7a03dc4f49b5a6fedba79f5adb8a45255 Mon Sep 17 00:00:00 2001 From: yhirose Date: Mon, 9 Feb 2015 12:01:59 -0500 Subject: [PATCH] Work in progress. --- example/calc2.cc | 18 ++---- peglib.h | 148 ++++++++++++++++++++++++++++------------------- test/test.cc | 78 +++++++++++++++---------- 3 files changed, 144 insertions(+), 100 deletions(-) diff --git a/example/calc2.cc b/example/calc2.cc index 5fcd4a1..f844564 100644 --- a/example/calc2.cc +++ b/example/calc2.cc @@ -26,18 +26,12 @@ class Calculator { public: Calculator() { - EXPRESSION = seq(TERM, zom(seq(TERM_OPERATOR, TERM))); - TERM = seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))); - FACTOR = cho(NUMBER, seq(chr('('), EXPRESSION, chr(')'))); - TERM_OPERATOR = cls("+-"); - FACTOR_OPERATOR = cls("*/"); - NUMBER = oom(cls("0-9")); - - actions[EXPRESSION] = reduce; - actions[TERM] = reduce; - actions[TERM_OPERATOR] = [](const char* s, size_t l) { return (char)*s; }; - actions[FACTOR_OPERATOR] = [](const char* s, size_t l) { return (char)*s; }; - actions[NUMBER] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); }; + EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce; + TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce; + FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')'))); + TERM_OPERATOR <= cls("+-"), [](const char* s, size_t l) { return (char)*s; }; + FACTOR_OPERATOR <= cls("*/"), [](const char* s, size_t l) { return (char)*s; }; + NUMBER <= oom(cls("0-9")), [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); }; } bool execute(const char* s, long& v) const { diff --git a/peglib.h b/peglib.h index 8dc3e04..5bec074 100644 --- a/peglib.h +++ b/peglib.h @@ -553,12 +553,14 @@ class Grouping { public: Grouping(const std::shared_ptr& rule) : rule_(rule) {} + Grouping(const std::shared_ptr& rule, std::function match) : rule_(rule), match_(match) {} template Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const; private: - std::shared_ptr rule_; + std::shared_ptr rule_; + std::function match_; }; class NonTerminal @@ -892,8 +894,9 @@ public: return static_cast(this); } - void operator=(const std::shared_ptr& rule) { + Definition& operator<=(const std::shared_ptr& rule) { set_rule(rule); + return *this; } template @@ -915,8 +918,21 @@ public: return parse(s, strlen(s), sa, val); } + template + bool parse(const char* s, size_t l, T& val) const { + SemanticValues sv; + auto m = rule_->parse(s, l, nullptr, &sv); + return m.ret && m.len == l; + } + + template + bool parse(const char* s, T& val) const { + return parse(s, strlen(s), val); + } + bool parse(const char* s, size_t l) const { - auto m = rule_->parse(s, l, nullptr, nullptr); + SemanticValues sv; + auto m = rule_->parse(s, l, nullptr, &sv); return m.ret && m.len == l; } @@ -928,6 +944,12 @@ public: std::function match; + template + void operator,(F fn) { + action = fn; + } + SemanticActionAdaptor action; + private: friend class DefinitionReference; @@ -1027,7 +1049,11 @@ Match NotPredicate::parse(const char* s, size_t l, const SemanticActions* sa, template Match Grouping::parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const { assert(rule_); - return rule_->parse(s, l, sa, sv); + auto m = rule_->parse(s, l, sa, sv); + if (m.ret && match_) { + match_(s, m.len); + } + return m; } template @@ -1047,17 +1073,19 @@ Match NonTerminal::parse(const char* s, size_t l, const SemanticActions* sa, outer_->match(s, m.len); } - typedef std::function& v, const std::vector& n)> Action; - Action action; - - if (sa) { - auto it = sa->find(outer_); - if (it != sa->end()) { - action = it->second; - } - } - if (sv) { + typedef std::function& v, const std::vector& n)> Action; + Action action; + + if (outer_->action) { + action = outer_->action; + } else if (sa) { + auto it = sa->find(outer_); + if (it != sa->end()) { + action = it->second; + } + } + sv->names.push_back(outer_->name); auto val = reduce(s, m.len, chldsv->values, chldsv->names, action); sv->values.push_back(val); @@ -1160,6 +1188,10 @@ inline std::shared_ptr grp(const std::shared_ptr& rule) { return std::make_shared(Grouping(rule)); } +inline std::shared_ptr grp(const std::shared_ptr& rule, std::function match) { + return std::make_shared(Grouping(rule, match)); +} + inline std::shared_ptr ref(const std::map& grammar, const std::string& name) { return std::make_shared(DefinitionReference(grammar, name)); } @@ -1175,52 +1207,52 @@ Grammar make_peg_grammar() Grammar g; // Setup PEG syntax parser - g["Grammar"] = seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]); - g["Definition"] = seq(g["Identifier"], g["LEFTARROW"], g["Expression"]); + g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]); + g["Definition"] <= seq(g["Identifier"], g["LEFTARROW"], g["Expression"]); - g["Expression"] = seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"]))); - g["Sequence"] = zom(g["Prefix"]); - g["Prefix"] = seq(opt(cho(g["AND"], g["NOT"])), g["Suffix"]); - g["Suffix"] = seq(g["Primary"], opt(cho(g["QUESTION"], g["STAR"], g["PLUS"]))); - g["Primary"] = cho(seq(g["Identifier"], npd(g["LEFTARROW"])), - seq(g["OPEN"], g["Expression"], g["CLOSE"]), - g["Literal"], g["Class"], g["DOT"]); + g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"]))); + g["Sequence"] <= zom(g["Prefix"]); + g["Prefix"] <= seq(opt(cho(g["AND"], g["NOT"])), g["Suffix"]); + g["Suffix"] <= seq(g["Primary"], opt(cho(g["QUESTION"], g["STAR"], g["PLUS"]))); + g["Primary"] <= cho(seq(g["Identifier"], npd(g["LEFTARROW"])), + seq(g["OPEN"], g["Expression"], g["CLOSE"]), + g["Literal"], g["Class"], g["DOT"]); - g["Identifier"] = seq(g["IdentifierContent"], g["Spacing"]); - g["IdentifierContent"] = seq(g["IdentStart"], zom(g["IdentCont"])); - g["IdentStart"] = cls("a-zA-Z_"); - g["IdentCont"] = cho(g["IdentStart"], cls("0-9")); + g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]); + g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"])); + g["IdentStart"] <= cls("a-zA-Z_"); + g["IdentRest"] <= cho(g["IdentStart"], cls("0-9")); - g["Literal"] = cho(seq(cls("'"), g["SingleQuotesContent"], cls("'"), g["Spacing"]), - seq(cls("\""), g["DoubleQuotesContent"], cls("\""), g["Spacing"])); - g["SingleQuotesContent"] = zom(seq(npd(cls("'")), g["Char"])); - g["DoubleQuotesContent"] = zom(seq(npd(cls("\"")), g["Char"])); + g["Literal"] <= cho(seq(cls("'"), g["SQCont"], cls("'"), g["Spacing"]), + seq(cls("\""), g["DQCont"], cls("\""), g["Spacing"])); + g["SQCont"] <= zom(seq(npd(cls("'")), g["Char"])); + g["DQCont"] <= zom(seq(npd(cls("\"")), g["Char"])); - g["Class"] = seq(chr('['), g["ClassContent"], chr(']'), g["Spacing"]); - g["ClassContent"] = zom(seq(npd(chr(']')), g["Range"])); + g["Class"] <= seq(chr('['), g["ClassCont"], chr(']'), g["Spacing"]); + g["ClassCont"] <= zom(seq(npd(chr(']')), g["Range"])); - g["Range"] = cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]); - g["Char"] = cho(seq(chr('\\'), cls("nrt'\"[]\\")), - seq(chr('\\'), cls("0-2"), cls("0-7"), cls("0-7")), - seq(chr('\\'), cls("0-7"), opt(cls("0-7"))), - seq(npd(chr('\\')), any())); + g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]); + g["Char"] <= cho(seq(chr('\\'), cls("nrt'\"[]\\")), + seq(chr('\\'), cls("0-2"), cls("0-7"), cls("0-7")), + seq(chr('\\'), cls("0-7"), opt(cls("0-7"))), + seq(npd(chr('\\')), any())); - g["LEFTARROW"] = seq(lit("<-"), g["Spacing"]); - g["SLASH"] = seq(chr('/'), g["Spacing"]); - g["AND"] = seq(chr('&'), g["Spacing"]); - g["NOT"] = seq(chr('!'), g["Spacing"]); - g["QUESTION"] = seq(chr('?'), g["Spacing"]); - g["STAR"] = seq(chr('*'), g["Spacing"]); - g["PLUS"] = seq(chr('+'), g["Spacing"]); - g["OPEN"] = seq(chr('('), g["Spacing"]); - g["CLOSE"] = seq(chr(')'), g["Spacing"]); - g["DOT"] = seq(chr('.'), g["Spacing"]); + g["LEFTARROW"] <= seq(lit("<-"), g["Spacing"]); + g["SLASH"] <= seq(chr('/'), g["Spacing"]); + g["AND"] <= seq(chr('&'), g["Spacing"]); + g["NOT"] <= seq(chr('!'), g["Spacing"]); + g["QUESTION"] <= seq(chr('?'), g["Spacing"]); + g["STAR"] <= seq(chr('*'), g["Spacing"]); + g["PLUS"] <= seq(chr('+'), g["Spacing"]); + g["OPEN"] <= seq(chr('('), g["Spacing"]); + g["CLOSE"] <= seq(chr(')'), g["Spacing"]); + g["DOT"] <= seq(chr('.'), g["Spacing"]); - g["Spacing"] = zom(cho(g["Space"], g["Comment"])); - g["Comment"] = seq(chr('#'), zom(seq(npd(g["EndOfLine"]), any())), g["EndOfLine"]); - g["Space"] = cho(chr(' '), chr('\t'), g["EndOfLine"]); - g["EndOfLine"] = cho(lit("\r\n"), chr('\n'), chr('\r')); - g["EndOfFile"] = npd(any()); + g["Spacing"] <= zom(cho(g["Space"], g["Comment"])); + g["Comment"] <= seq(chr('#'), zom(seq(npd(g["EndOfLine"]), any())), g["EndOfLine"]); + g["Space"] <= cho(chr(' '), chr('\t'), g["EndOfLine"]); + g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r')); + g["EndOfFile"] <= npd(any()); // Set definition names for (auto& x: g) { @@ -1244,7 +1276,7 @@ std::shared_ptr make_grammar(const char* syntax, std::string& start) sa[peg["Definition"]] = [&](const std::vector& v) { const auto& name = v[0].get(); - (*grammar)[name] = v[2].get>(); + (*grammar)[name] <= v[2].get>(); (*grammar)[name].name = name; if (start.empty()) { @@ -1315,24 +1347,24 @@ std::shared_ptr make_grammar(const char* syntax, std::string& start) } }; - sa[peg["IdentifierContent"]] = [](const char*s, size_t l) { + sa[peg["IdentCont"]] = [](const char*s, size_t l) { return std::string(s, l); }; sa[peg["Literal"]] = [](const std::vector& v) { return lit(v[0].get().c_str()); }; - sa[peg["SingleQuotesContent"]] = [](const char*s, size_t l) { + sa[peg["SQCont"]] = [](const char*s, size_t l) { return std::string(s, l); }; - sa[peg["DoubleQuotesContent"]] = [](const char*s, size_t l) { + sa[peg["DQCont"]] = [](const char*s, size_t l) { return std::string(s, l); }; sa[peg["Class"]] = [](const std::vector& v) { return cls(v[0].get().c_str()); }; - sa[peg["ClassContent"]] = [](const char*s, size_t l) { + sa[peg["ClassCont"]] = [](const char*s, size_t l) { return std::string(s, l); }; diff --git a/test/test.cc b/test/test.cc index 7e78cac..0d3a728 100644 --- a/test/test.cc +++ b/test/test.cc @@ -35,21 +35,40 @@ TEST_CASE("String capture test", "[general]") using namespace peglib; using namespace std; -TEST_CASE("String capture test with match", "[general]") +TEST_CASE("String capture test2", "[general]") +{ + { + vector tags; + + Definition ROOT, TAG, TAG_NAME, WS; + ROOT <= seq(WS, zom(TAG)); + TAG <= seq(chr('['), TAG_NAME, chr(']'), WS); + TAG_NAME <= oom(seq(npd(chr(']')), any())), [&](const char* s, size_t l) { tags.push_back(string(s, l)); }; + WS <= zom(cls(" \t")); + + auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] "); + + REQUIRE(ret == true); + REQUIRE(tags.size() == 3); + REQUIRE(tags[0] == "tag1"); + REQUIRE(tags[1] == "tag:2"); + REQUIRE(tags[2] == "tag-3"); + } + + REQUIRE(VARINT_COUNT == 0); +} + +TEST_CASE("String capture test with embedded match action", "[general]") { { Definition ROOT, TAG, TAG_NAME, WS; - ROOT = seq(WS, zom(TAG)); - TAG = seq(chr('['), TAG_NAME, chr(']'), WS); - TAG_NAME = oom(seq(npd(chr(']')), any())); - WS = zom(cls(" \t")); - vector tags; - TAG_NAME.match = [&](const char* s, size_t l) { - tags.push_back(string(s, l)); - }; + ROOT <= seq(WS, zom(TAG)); + TAG <= seq(chr('['), grp(TAG_NAME, [&](const char* s, size_t l) { tags.push_back(string(s, l)); }), chr(']'), WS); + TAG_NAME <= oom(seq(npd(chr(']')), any())); + WS <= zom(cls(" \t")); auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] "); @@ -69,8 +88,8 @@ TEST_CASE("Cyclic grammer test", "[general]") Definition PARENT; Definition CHILD; - PARENT = seq(CHILD); - CHILD = seq(PARENT); + PARENT <= seq(CHILD); + CHILD <= seq(PARENT); } REQUIRE(VARINT_COUNT == 0); @@ -98,16 +117,14 @@ TEST_CASE("Calculator test", "[general]") // Construct grammer Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER; - EXPRESSION = seq(TERM, zom(seq(TERM_OPERATOR, TERM))); - TERM = seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))); - FACTOR = cho(NUMBER, seq(chr('('), EXPRESSION, chr(')'))); - TERM_OPERATOR = cls("+-"); - FACTOR_OPERATOR = cls("*/"); - NUMBER = oom(cls("0-9")); + EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))); + TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))); + FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')'))); + TERM_OPERATOR <= cls("+-"); + FACTOR_OPERATOR <= cls("*/"); + NUMBER <= oom(cls("0-9")); // Setup actions - SemanticActions actions; - auto reduce = [](const vector& v) -> long { long ret = v[0].get(); for (auto i = 1u; i < v.size(); i += 2) { @@ -122,6 +139,7 @@ TEST_CASE("Calculator test", "[general]") return ret; }; + SemanticActions actions; actions[EXPRESSION] = reduce; actions[TERM] = reduce; actions[TERM_OPERATOR] = [](const char* s, size_t l) { return *s; }; @@ -174,11 +192,11 @@ TEST_CASE("Calculator test2", "[general]") return ret; }; - a[g["EXPRESSION"]] = reduce; - a[g["TERM"]] = reduce; - a[g["TERM_OPERATOR"]] = [](const char* s, size_t l) { return *s; }; + a[g["EXPRESSION"]] = reduce; + a[g["TERM"]] = reduce; + a[g["TERM_OPERATOR"]] = [](const char* s, size_t l) { return *s; }; a[g["FACTOR_OPERATOR"]] = [](const char* s, size_t l) { return *s; }; - a[g["NUMBER"]] = [&](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); }; + a[g["NUMBER"]] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); }; // Parse Any val; @@ -336,15 +354,15 @@ TEST_CASE("PEG IdentStart", "[peg]") REQUIRE(g["IdentStart"].parse("0") == false); } -TEST_CASE("PEG IdentCont", "[peg]") +TEST_CASE("PEG IdentRest", "[peg]") { Grammar g = make_peg_grammar(); - REQUIRE(g["IdentCont"].parse("_") == true); - REQUIRE(g["IdentCont"].parse("a") == true); - REQUIRE(g["IdentCont"].parse("Z") == true); - REQUIRE(g["IdentCont"].parse("") == false); - REQUIRE(g["IdentCont"].parse(" ") == false); - REQUIRE(g["IdentCont"].parse("0") == true); + REQUIRE(g["IdentRest"].parse("_") == true); + REQUIRE(g["IdentRest"].parse("a") == true); + REQUIRE(g["IdentRest"].parse("Z") == true); + REQUIRE(g["IdentRest"].parse("") == false); + REQUIRE(g["IdentRest"].parse(" ") == false); + REQUIRE(g["IdentRest"].parse("0") == true); } TEST_CASE("PEG Literal", "[peg]")