From 4c5fd705039ffbf8dfec6018b48f6c12a5f06602 Mon Sep 17 00:00:00 2001 From: yhirose Date: Sat, 14 Feb 2015 19:02:54 -0500 Subject: [PATCH] Removed the name vector and added the context data in semantic action. --- README.md | 6 +- example/calc2.cc | 2 +- peglib.h | 228 +++++++++++++++++++++++++---------------------- 3 files changed, 126 insertions(+), 110 deletions(-) diff --git a/README.md b/README.md index 08968ed..582466c 100644 --- a/README.md +++ b/README.md @@ -54,17 +54,17 @@ This action `[&](const char* s, size_t l)` gives a pointer and length of the mat There are more actions available. Here is a complete list: ```c++ -[](const char* s, size_t l, const std::vector& v, const std::vector& n) +[](const char* s, size_t l, const std::vector& v, any& c) [](const char* s, size_t l, const std::vector& v) [](const char* s, size_t l) -[](const std::vector& v, const std::vector& n) +[](const std::vector& v, any& c) [](const std::vector& v) []() ``` `const std::vector& v` contains semantic values. `peglib::any` class is very similar to [boost::any](http://www.boost.org/doc/libs/1_57_0/doc/html/any.html). You can obtain a value by castning it to the actual type. In order to determine the actual type, you have to check the return value type of the child action for the semantic value. -`const std::vector& n` contains definition names of semantic values. +`any& c` is a context data which can be used by the user for whatever purposes. This is a complete code of a simple calculator. It shows how to associate actions to definitions and set/get semantic values. diff --git a/example/calc2.cc b/example/calc2.cc index aa27dfc..67a1f7b 100644 --- a/example/calc2.cc +++ b/example/calc2.cc @@ -46,7 +46,7 @@ int main(int argc, const char** argv) return result; }; - Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER; + rule EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER; EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce; TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce; diff --git a/peglib.h b/peglib.h index 2296063..166dd97 100644 --- a/peglib.h +++ b/peglib.h @@ -173,11 +173,7 @@ private: /* * Semantic values */ -struct Values -{ - std::vector names; - std::vector values; -}; +typedef std::vector Values; /* * Semantic action @@ -234,26 +230,26 @@ public: return (bool)fn_; } - any operator()(const char* s, size_t l, const std::vector& v, const std::vector& n) const { - return fn_(s, l, v, n); + any operator()(const char* s, size_t l, const std::vector& v, any& c) const { + return fn_(s, l, v, c); } private: template struct TypeAdaptor { - TypeAdaptor(std::function& v, const std::vector& n)> fn) + TypeAdaptor(std::function& v, any& c)> fn) : fn_(fn) {} - any operator()(const char* s, size_t l, const std::vector& v, const std::vector& n) { - return call(fn_, s, l, v, n); + any operator()(const char* s, size_t l, const std::vector& v, any& c) { + return call(fn_, s, l, v, c); } - std::function& v, const std::vector& n)> fn_; + std::function& v, any& c)> fn_; }; template struct TypeAdaptor_s_l_v { TypeAdaptor_s_l_v(std::function& v)> fn) : fn_(fn) {} - any operator()(const char* s, size_t l, const std::vector& v, const std::vector& n) { + any operator()(const char* s, size_t l, const std::vector& v, any& c) { return call(fn_, s, l, v); } std::function& v)> fn_; @@ -262,7 +258,7 @@ private: template struct TypeAdaptor_s_l { TypeAdaptor_s_l(std::function fn) : fn_(fn) {} - any operator()(const char* s, size_t l, const std::vector& v, const std::vector& n) { + any operator()(const char* s, size_t l, const std::vector& v, any& c) { return call(fn_, s, l); } std::function fn_; @@ -270,17 +266,17 @@ private: template struct TypeAdaptor_v_n { - TypeAdaptor_v_n(std::function& v, const std::vector& n)> fn) : fn_(fn) {} - any operator()(const char* s, size_t l, const std::vector& v, const std::vector& n) { - return call(fn_, v, n); + TypeAdaptor_v_n(std::function& v, any& c)> fn) : fn_(fn) {} + any operator()(const char* s, size_t l, const std::vector& v, any& c) { + return call(fn_, v, c); } - std::function& v, const std::vector& n)> fn_; + std::function& v, any& c)> fn_; }; template struct TypeAdaptor_v { TypeAdaptor_v(std::function& v)> fn) : fn_(fn) {} - any operator()(const char* s, size_t l, const std::vector& v, const std::vector& n) { + any operator()(const char* s, size_t l, const std::vector& v, any& c) { return call(fn_, v); } std::function& v)> fn_; @@ -289,21 +285,21 @@ private: template struct TypeAdaptor_empty { TypeAdaptor_empty(std::function fn) : fn_(fn) {} - any operator()(const char* s, size_t l, const std::vector& v, const std::vector& n) { + any operator()(const char* s, size_t l, const std::vector& v, any& c) { return call(fn_); } std::function fn_; }; - typedef std::function& v, const std::vector& n)> Fty; + typedef std::function& v, any& c)> Fty; template - Fty make_adaptor(F fn, R (F::*mf)(const char*, size_t, const std::vector& v, const std::vector& n) const) { + Fty make_adaptor(F fn, R (F::*mf)(const char*, size_t, const std::vector& v, any& c) const) { return TypeAdaptor(fn); } template - Fty make_adaptor(F fn, R(*mf)(const char*, size_t, const std::vector& v, const std::vector& n)) { + Fty make_adaptor(F fn, R(*mf)(const char*, size_t, const std::vector& v, any& c)) { return TypeAdaptor(fn); } @@ -328,12 +324,12 @@ private: } template - Fty make_adaptor(F fn, R (F::*mf)(const std::vector& v, const std::vector& n) const) { + Fty make_adaptor(F fn, R (F::*mf)(const std::vector& v, any& c) const) { return TypeAdaptor_v_n(fn); } template - Fty make_adaptor(F fn, R (*mf)(const std::vector& v, const std::vector& n)) { + Fty make_adaptor(F fn, R (*mf)(const std::vector& v, any& c)) { return TypeAdaptor_v_n(fn); } @@ -387,7 +383,7 @@ class Ope { public: virtual ~Ope() {}; - virtual Result parse(const char* s, size_t l, Values& v) const = 0; + virtual Result parse(const char* s, size_t l, Values& v, any& c) const = 0; }; class Sequence : public Ope @@ -414,11 +410,11 @@ public: Sequence(const std::vector>& opes) : opes_(opes) {} Sequence(std::vector>&& opes) : opes_(std::move(opes)) {} - Result parse(const char* s, size_t l, Values& v) const { + Result parse(const char* s, size_t l, Values& v, any& c) const { size_t i = 0; for (const auto& ope : opes_) { const auto& rule = *ope; - auto r = rule.parse(s + i, l - i, v); + auto r = rule.parse(s + i, l - i, v, c); if (!r.ret) { auto err = r.err; if (err.empty()) { @@ -457,19 +453,16 @@ public: PrioritizedChoice(const std::vector>& opes) : opes_(opes) {} PrioritizedChoice(std::vector>&& opes) : opes_(std::move(opes)) {} - Result parse(const char* s, size_t l, Values& v) const { + Result parse(const char* s, size_t l, Values& v, any& c) const { size_t id = 0; for (const auto& ope : opes_) { const auto& rule = *ope; Values chldsv; - auto r = rule.parse(s, l, chldsv); + auto r = rule.parse(s, l, chldsv, c); if (r.ret) { - if (!chldsv.values.empty()) { - for (const auto& x: chldsv.values) { - v.values.push_back(x); - } - for (const auto& x: chldsv.names) { - v.names.push_back(x); + if (!chldsv.empty()) { + for (const auto& x: chldsv) { + v.push_back(x); } } return success(r.len, id); @@ -490,11 +483,11 @@ class ZeroOrMore : public Ope public: ZeroOrMore(const std::shared_ptr& ope) : ope_(ope) {} - Result parse(const char* s, size_t l, Values& v) const { + Result parse(const char* s, size_t l, Values& v, any& c) const { auto i = 0; while (l - i > 0) { const auto& rule = *ope_; - auto r = rule.parse(s + i, l - i, v); + auto r = rule.parse(s + i, l - i, v, c); if (!r.ret) { break; } @@ -512,9 +505,9 @@ class OneOrMore : public Ope public: OneOrMore(const std::shared_ptr& ope) : ope_(ope) {} - Result parse(const char* s, size_t l, Values& v) const { + Result parse(const char* s, size_t l, Values& v, any& c) const { const auto& rule = *ope_; - auto r = rule.parse(s, l, v); + auto r = rule.parse(s, l, v, c); if (!r.ret) { auto err = r.err; if (err.empty()) { @@ -525,7 +518,7 @@ public: auto i = r.len; while (l - i > 0) { const auto& rule = *ope_; - auto r = rule.parse(s + i, l - i, v); + auto r = rule.parse(s + i, l - i, v, c); if (!r.ret) { break; } @@ -543,9 +536,9 @@ class Option : public Ope public: Option(const std::shared_ptr& ope) : ope_(ope) {} - Result parse(const char* s, size_t l, Values& v) const { + Result parse(const char* s, size_t l, Values& v, any& c) const { const auto& rule = *ope_; - auto r = rule.parse(s, l, v); + auto r = rule.parse(s, l, v, c); return success(r.ret ? r.len : 0); } @@ -558,9 +551,9 @@ class AndPredicate : public Ope public: AndPredicate(const std::shared_ptr& ope) : ope_(ope) {} - Result parse(const char* s, size_t l, Values& v) const { + Result parse(const char* s, size_t l, Values& v, any& c) const { const auto& rule = *ope_; - auto r = rule.parse(s, l, v); + auto r = rule.parse(s, l, v, c); if (r.ret) { return success(0); } else { @@ -577,9 +570,9 @@ class NotPredicate : public Ope public: NotPredicate(const std::shared_ptr& ope) : ope_(ope) {} - Result parse(const char* s, size_t l, Values& v) const { + Result parse(const char* s, size_t l, Values& v, any& c) const { const auto& rule = *ope_; - auto r = rule.parse(s, l, v); + auto r = rule.parse(s, l, v, c); if (r.ret) { return fail(s); } else { @@ -596,7 +589,7 @@ class LiteralString : public Ope public: LiteralString(const std::string& s) : lit_(s) {} - Result parse(const char* s, size_t l, Values& v) const { + Result parse(const char* s, size_t l, Values& v, any& c) const { auto i = 0u; for (; i < lit_.size(); i++) { if (i >= l || s[i] != lit_[i]) { @@ -615,7 +608,7 @@ class CharacterClass : public Ope public: CharacterClass(const std::string& chars) : chars_(chars) {} - Result parse(const char* s, size_t l, Values& v) const { + Result parse(const char* s, size_t l, Values& v, any& c) const { // TODO: UTF8 support if (l < 1) { return fail(s); @@ -647,7 +640,7 @@ class Character : public Ope public: Character(char ch) : ch_(ch) {} - Result parse(const char* s, size_t l, Values& v) const { + Result parse(const char* s, size_t l, Values& v, any& c) const { // TODO: UTF8 support if (l < 1 || s[0] != ch_) { return fail(s); @@ -662,7 +655,7 @@ private: class AnyCharacter : public Ope { public: - Result parse(const char* s, size_t l, Values& v) const { + Result parse(const char* s, size_t l, Values& v, any& c) const { // TODO: UTF8 support if (l < 1) { return fail(s); @@ -678,10 +671,10 @@ public: Grouping(const std::shared_ptr& ope) : ope_(ope) {} Grouping(const std::shared_ptr& ope, std::function match) : ope_(ope), match_(match) {} - Result parse(const char* s, size_t l, Values& v) const { + Result parse(const char* s, size_t l, Values& v, any& c) const { assert(ope_); const auto& rule = *ope_; - auto r = rule.parse(s, l, v); + auto r = rule.parse(s, l, v, c); if (r.ret && match_) { match_(s, r.len); } @@ -698,11 +691,11 @@ class WeakHolder : public Ope public: WeakHolder(const std::shared_ptr& ope) : weak_(ope) {} - Result parse(const char* s, size_t l, Values& v) const { + Result parse(const char* s, size_t l, Values& v, any& c) const { auto ope = weak_.lock(); assert(ope); const auto& rule = *ope; - return rule.parse(s, l, v); + return rule.parse(s, l, v, c); } private: @@ -751,12 +744,17 @@ public: return *this; } + Result parse(const char* s, size_t l, Values& v, any& c) const { + return holder_->parse(s, l, v, c); + } + template Result parse(const char* s, size_t l, T& val) const { Values v; - auto r = holder_->parse(s, l, v); - if (r.ret && !v.values.empty() && !v.values.front().is_undefined()) { - val = v.values[0].get(); + any c; + auto r = holder_->parse(s, l, v, c); + if (r.ret && !v.empty() && !v.front().is_undefined()) { + val = v[0].get(); } return r; } @@ -770,7 +768,8 @@ public: Result parse(const char* s) const { auto l = strlen(s); Values v; - return holder_->parse(s, l, v); + any c; + return holder_->parse(s, l, v, c); } Definition& operator=(Action ac) { @@ -802,17 +801,15 @@ private: Holder(Definition* outer) : outer_(outer) {} - Result parse(const char* s, size_t l, Values& v) const { + Result parse(const char* s, size_t l, Values& v, any& c) const { if (!ope_) { throw std::logic_error("Uninitialized definition ope was used..."); } const auto& rule = *ope_; Values chldsv; - auto r = rule.parse(s, l, chldsv); + auto r = rule.parse(s, l, chldsv, c); if (r.ret) { - v.names.push_back(outer_->name); - assert(!outer_->actions.empty()); auto id = r.choice + 1; @@ -820,7 +817,7 @@ private: ? outer_->actions[id] : outer_->actions[0]; - v.values.push_back(reduce(s, r.len, chldsv, ac)); + v.push_back(reduce(s, r.len, chldsv, c, ac)); } return r; } @@ -828,13 +825,13 @@ private: private: friend class Definition; - any reduce(const char* s, size_t l, const Values& v, const Action& action) const { + any reduce(const char* s, size_t l, const Values& v, any& c, const Action& action) const { if (action) { - return action(s, l, v.values, v.names); - } else if (v.values.empty()) { + return action(s, l, v, c); + } else if (v.empty()) { return any(); } else { - return v.values.front(); + return v.front(); } } @@ -856,9 +853,9 @@ public: : grammar_(grammar) , name_(name) {} - Result parse(const char* s, size_t l, Values& v) const { + Result parse(const char* s, size_t l, Values& v, any& c) const { const auto& rule = *grammar_.at(name_).holder_; - return rule.parse(s, l, v); + return rule.parse(s, l, v, c); } private: @@ -962,6 +959,7 @@ public: return get().perform_core(s, l, start, log); } + // For debuging purpose static Grammar& grammar() { return get().g; } @@ -977,6 +975,12 @@ private: setup_actions(); } + struct Context { + std::shared_ptr grammar = std::make_shared(); + std::string start; + std::map refs; + }; + void make_grammar() { // Setup PEG syntax parser g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]); @@ -1033,6 +1037,17 @@ private: } void setup_actions() { + g["Definition"] = [&](const std::vector& v, any& c) { + Context& cxt = *c.get(); + + const auto& name = v[0].get(); + (*cxt.grammar)[name] <= v[2].get>(); + (*cxt.grammar)[name].name = name; + + if (cxt.start.empty()) { + cxt.start = name; + } + }; g["Expression"] = [&](const std::vector& v) { if (v.size() == 1) { @@ -1062,38 +1077,54 @@ private: } }; - g["Prefix"] = [&](const std::vector& v, const std::vector& n) { + g["Prefix"] = [&](const std::vector& v, any& c) { std::shared_ptr ope; if (v.size() == 1) { ope = v[0].get>(); } else { assert(v.size() == 2); + auto tok = v[0].get(); ope = v[1].get>(); - if (n[0] == "AND") { + if (tok == '&') { ope = apd(ope); - } else { // "NOT" + } else { // '!' ope = npd(ope); } } return ope; }; - g["Suffix"] = [&](const std::vector& v, const std::vector& n) { + g["Suffix"] = [&](const std::vector& v, any& c) { auto ope = v[0].get>(); if (v.size() == 1) { return ope; } else { assert(v.size() == 2); - if (n[1] == "QUESTION") { + auto tok = v[1].get(); + if (tok == '?') { return opt(ope); - } else if (n[1] == "STAR") { + } else if (tok == '*') { return zom(ope); - } else { // "PLUS" + } else { // '+' return oom(ope); } } }; + g["Primary"].actions = { + [&](const std::vector& v) { + return v[0]; + }, + [&](const char* s, size_t l, const std::vector& v, any& c) { + Context& cxt = *c.get(); + cxt.refs[v[0]] = s; + return ref(*cxt.grammar, v[0]); + }, + [&](const std::vector& v) { + return v[1]; + } + }; + g["IdentCont"] = [](const char*s, size_t l) { return std::string(s, l); }; @@ -1115,40 +1146,23 @@ private: return resolve_escape_sequence(s, l); }; + g["AND"] = [](const char*s, size_t l) { return *s; }; + g["NOT"] = [](const char*s, size_t l) { return *s; }; + g["QUESTION"] = [](const char*s, size_t l) { return *s; }; + g["STAR"] = [](const char*s, size_t l) { return *s; }; + g["PLUS"] = [](const char*s, size_t l) { return *s; }; + g["DOT"] = []() { return dot(); }; } std::shared_ptr perform_core(const char* s, size_t l, std::string& start, Log log) { - auto grammar = std::make_shared(); - start.clear(); - std::map refs; + Values v; + Context cxt; + any c = &cxt; + auto r = g["Grammar"].parse(s, l, v, c); - g["Definition"] = [&](const std::vector& v) { - const auto& name = v[0].get(); - (*grammar)[name] <= v[2].get>(); - (*grammar)[name].name = name; - - if (start.empty()) { - start = name; - } - }; - - g["Primary"].actions = { - [&](const std::vector& v) { - return v[0]; - }, - [&](const char* s, size_t l, const std::vector& v) { - refs[v[0]] = s; - return ref(*grammar, v[0]); - }, - [&](const std::vector& v) { - return v[1]; - } - }; - - auto r = g["Grammar"].parse(s, l); if (!r.ret) { if (log) { auto line = line_info(s, r.ptr); @@ -1157,10 +1171,10 @@ private: return nullptr; } - for (const auto& x : refs) { + for (const auto& x : cxt.refs) { const auto& name = x.first; auto ptr = x.second; - if (grammar->find(name) == grammar->end()) { + if (cxt.grammar->find(name) == cxt.grammar->end()) { if (log) { auto line = line_info(s, ptr); log(line.first, line.second, "'" + name + "' is not defined."); @@ -1169,7 +1183,9 @@ private: } } - return grammar; + start = cxt.start; + + return cxt.grammar; } std::string resolve_escape_sequence(const char*s, size_t l) {