// // peglib.h // // Copyright (c) 2015 Yuji Hirose. All rights reserved. // MIT License // #ifndef _CPPPEGLIB_PEGLIB_H_ #define _CPPPEGLIB_PEGLIB_H_ #include #include #include #include #include #include #include #include #include #include namespace peglib { void* enabler; /*----------------------------------------------------------------------------- * any *---------------------------------------------------------------------------*/ class any { public: any() : content_(nullptr) {} any(const any& rhs) : content_(rhs.clone()) {} any(any&& rhs) : content_(rhs.content_) { rhs.content_ = nullptr; } template any(const T& value) : content_(new holder(value)) {} any& operator=(const any& rhs) { if (this != &rhs) { if (content_) { delete content_; } content_ = rhs.clone(); } return *this; } any& operator=(any&& rhs) { if (this != &rhs) { if (content_) { delete content_; } content_ = rhs.content_; rhs.content_ = nullptr; } return *this; } template any& operator=(const T& value) { if (content_) { delete content_; } content_ = new holder(value); return *this; } ~any() { delete content_; } bool is_undefined() const { return content_ == nullptr; } template < typename T, typename std::enable_if::value>::type*& = enabler > T& get() { assert(content_); auto p = dynamic_cast*>(content_); assert(p); if (!p) { throw std::bad_cast(); } return p->value_; } template < typename T, typename std::enable_if::value>::type*& = enabler > T& get() { return *this; } template < typename T, typename std::enable_if::value>::type*& = enabler > const T& get() const { assert(content_); auto p = dynamic_cast*>(content_); assert(p); if (!p) { throw std::bad_cast(); } return p->value_; } template < typename T, typename std::enable_if::value>::type*& = enabler > const any& get() const { return *this; } private: struct placeholder { virtual ~placeholder() {}; virtual placeholder* clone() const = 0; }; template struct holder : placeholder { holder(const T& value) : value_(value) {} placeholder* clone() const override { return new holder(value_); } T value_; }; placeholder* clone() const { return content_ ? content_->clone() : nullptr; } placeholder* content_; }; /*----------------------------------------------------------------------------- * PEG *---------------------------------------------------------------------------*/ /* * Semantic values */ struct SemanticValue { any val; std::string name; const char* s; size_t l; }; struct SemanticValues : protected std::vector { const char* s; size_t l; SemanticValues() : s(nullptr), l(0) {} typedef SemanticValue T; using std::vector::iterator; using std::vector::const_iterator; using std::vector::size; using std::vector::empty; using std::vector::assign; using std::vector::begin; using std::vector::end; using std::vector::rbegin; using std::vector::rend; using std::vector::operator[]; using std::vector::at; using std::vector::resize; using std::vector::front; using std::vector::back; using std::vector::push_back; using std::vector::pop_back; using std::vector::insert; using std::vector::erase; using std::vector::clear; using std::vector::swap; template static U reduce(T i, T end, U val, V f){ if (i == end) { return val; } std::tie(val, i) = f(val, i); return reduce(i, end, val, f); }; }; /* * Semantic action */ template < typename R, typename F, typename std::enable_if::value>::type*& = enabler, typename... Args> any call(F fn, Args&&... args) { fn(std::forward(args)...); return any(); } template < typename R, typename F, typename std::enable_if::value>::type*& = enabler, typename... Args> any call(F fn, Args&&... args) { return fn(std::forward(args)...); } template < typename R, typename F, typename std::enable_if::value>::type*& = enabler, typename... Args> any call(F fn, Args&&... args) { return fn(std::forward(args)...).val; } template < typename R, typename F, typename std::enable_if::value && !std::is_same::value && !std::is_same::value>::type*& = enabler, typename... Args> any call(F fn, Args&&... args) { return any(fn(std::forward(args)...)); } class Action { public: Action() = default; Action(const Action& rhs) : fn_(rhs.fn_) {} //Action(Action&& rhs) : fn_(std::move(rhs.fn_)) {} template ::value && !std::is_same::value>::type*& = enabler> Action(F fn) : fn_(make_adaptor(fn, &F::operator())) {} template ::value>::type*& = enabler> Action(F fn) : fn_(make_adaptor(fn, fn)) {} template ::value>::type*& = enabler> Action(F fn) {} template ::value && !std::is_same::value>::type*& = enabler> void operator=(F fn) { fn_ = make_adaptor(fn, &F::operator()); } template ::value>::type*& = enabler> void operator=(F fn) { fn_ = make_adaptor(fn, fn); } template ::value>::type*& = enabler> void operator=(F fn) {} operator bool() const { return (bool)fn_; } any operator()(const SemanticValues& sv, any& dt) const { return fn_(sv, dt); } private: template struct TypeAdaptor { TypeAdaptor(std::function fn) : fn_(fn) {} any operator()(const SemanticValues& sv, any& dt) { return call(fn_, sv); } std::function fn_; }; template struct TypeAdaptor_c { TypeAdaptor_c(std::function fn) : fn_(fn) {} any operator()(const SemanticValues& sv, any& dt) { return call(fn_, sv, dt); } std::function fn_; }; template struct TypeAdaptor_s_l { TypeAdaptor_s_l(std::function fn) : fn_(fn) {} any operator()(const SemanticValues& sv, any& dt) { return call(fn_, sv.s, sv.l); } std::function fn_; }; template struct TypeAdaptor_empty { TypeAdaptor_empty(std::function fn) : fn_(fn) {} any operator()(const SemanticValues& sv, any& dt) { return call(fn_); } std::function fn_; }; typedef std::function Fty; template Fty make_adaptor(F fn, R (F::*mf)(const SemanticValues& sv) const) { return TypeAdaptor(fn); } template Fty make_adaptor(F fn, R (F::*mf)(const SemanticValues& sv)) { return TypeAdaptor(fn); } template Fty make_adaptor(F fn, R(*mf)(const SemanticValues& sv)) { return TypeAdaptor(fn); } template Fty make_adaptor(F fn, R (F::*mf)(const SemanticValues& sv, any& dt) const) { return TypeAdaptor_c(fn); } template Fty make_adaptor(F fn, R (F::*mf)(const SemanticValues& sv, any& dt)) { return TypeAdaptor_c(fn); } template Fty make_adaptor(F fn, R(*mf)(const SemanticValues& sv, any& dt)) { return TypeAdaptor_c(fn); } template Fty make_adaptor(F fn, R (F::*mf)(const char*, size_t) const) { return TypeAdaptor_s_l(fn); } template Fty make_adaptor(F fn, R (F::*mf)(const char*, size_t)) { return TypeAdaptor_s_l(fn); } template Fty make_adaptor(F fn, R (*mf)(const char*, size_t)) { return TypeAdaptor_s_l(fn); } template Fty make_adaptor(F fn, R (F::*mf)() const) { return TypeAdaptor_empty(fn); } template Fty make_adaptor(F fn, R (F::*mf)()) { return TypeAdaptor_empty(fn); } template Fty make_adaptor(F fn, R (*mf)()) { return TypeAdaptor_empty(fn); } Fty fn_; }; /* * Match action */ typedef std::function MatchAction; /* * Result */ struct Result { bool ret; size_t len; size_t choice; const char* ptr; const std::string err; // TODO: should be `int`. }; Result success(size_t len, size_t choice = 0) { return Result{ true, len, choice, nullptr, std::string() }; } Result fail(const char* ptr, std::string err = std::string(), std::string name = std::string()) { return Result{ false, 0, (size_t)-1, ptr, err }; } /* * Parser operators */ class Ope { public: virtual ~Ope() {}; virtual Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const = 0; }; class Sequence : public Ope { public: Sequence(const Sequence& rhs) : opes_(rhs.opes_) {} #if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 // NOTE: Compiler Error C2797 on Visual Studio 2013 // "The C++ compiler in Visual Studio does not implement list // initialization inside either a member initializer list or a non-static // data member initializer. Before Visual Studio 2013 Update 3, this was // silently converted to a function call, which could lead to bad code // generation. Visual Studio 2013 Update 3 reports this as an error." template Sequence(const Args& ...args) { opes_ = std::vector>{ static_cast>(args)... }; } #else template Sequence(const Args& ...args) : opes_{ static_cast>(args)... } {} #endif Sequence(const std::vector>& opes) : opes_(opes) {} Sequence(std::vector>&& opes) : opes_(std::move(opes)) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { size_t i = 0; for (const auto& ope : opes_) { const auto& rule = *ope; auto r = rule.parse(s + i, l - i, sv, dt); if (!r.ret) { auto err = r.err; if (err.empty()) { err = "missing an element in the 'sequence'"; } return fail(r.ptr, err); } i += r.len; } return success(i); } private: std::vector> opes_; }; class PrioritizedChoice : public Ope { public: #if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 // NOTE: Compiler Error C2797 on Visual Studio 2013 // "The C++ compiler in Visual Studio does not implement list // initialization inside either a member initializer list or a non-static // data member initializer. Before Visual Studio 2013 Update 3, this was // silently converted to a function call, which could lead to bad code // generation. Visual Studio 2013 Update 3 reports this as an error." template PrioritizedChoice(const Args& ...args) { opes_ = std::vector>{ static_cast>(args)... }; } #else template PrioritizedChoice(const Args& ...args) : opes_{ static_cast>(args)... } {} #endif PrioritizedChoice(const std::vector>& opes) : opes_(opes) {} PrioritizedChoice(std::vector>&& opes) : opes_(std::move(opes)) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { size_t id = 0; for (const auto& ope : opes_) { const auto& rule = *ope; SemanticValues chldsv; auto r = rule.parse(s, l, chldsv, dt); if (r.ret) { if (!chldsv.empty()) { sv.insert(sv.end(), chldsv.begin(), chldsv.end()); } sv.s = chldsv.s; sv.l = chldsv.l; return success(r.len, id); } id++; } return fail(s, "nothing was matched in the 'prioritized choice'"); } size_t size() const { return opes_.size(); } private: std::vector> opes_; }; class ZeroOrMore : public Ope { public: ZeroOrMore(const std::shared_ptr& ope) : ope_(ope) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { auto i = 0; while (l - i > 0) { const auto& rule = *ope_; auto r = rule.parse(s + i, l - i, sv, dt); if (!r.ret) { break; } i += r.len; } return success(i); } private: std::shared_ptr ope_; }; class OneOrMore : public Ope { public: OneOrMore(const std::shared_ptr& ope) : ope_(ope) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { const auto& rule = *ope_; auto r = rule.parse(s, l, sv, dt); if (!r.ret) { auto err = r.err; if (err.empty()) { err = "nothing occurred in the 'one-or-more'"; } return fail(r.ptr, r.err); } auto i = r.len; while (l - i > 0) { const auto& rule = *ope_; auto r = rule.parse(s + i, l - i, sv, dt); if (!r.ret) { break; } i += r.len; } return success(i); } private: std::shared_ptr ope_; }; class Option : public Ope { public: Option(const std::shared_ptr& ope) : ope_(ope) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { const auto& rule = *ope_; auto r = rule.parse(s, l, sv, dt); return success(r.ret ? r.len : 0); } private: std::shared_ptr ope_; }; class AndPredicate : public Ope { public: AndPredicate(const std::shared_ptr& ope) : ope_(ope) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { const auto& rule = *ope_; auto r = rule.parse(s, l, sv, dt); if (r.ret) { return success(0); } else { return fail(r.ptr, r.err); } } private: std::shared_ptr ope_; }; class NotPredicate : public Ope { public: NotPredicate(const std::shared_ptr& ope) : ope_(ope) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { const auto& rule = *ope_; auto r = rule.parse(s, l, sv, dt); if (r.ret) { return fail(s); } else { return success(0); } } private: std::shared_ptr ope_; }; class LiteralString : public Ope { public: LiteralString(const std::string& s) : lit_(s) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { auto i = 0u; for (; i < lit_.size(); i++) { if (i >= l || s[i] != lit_[i]) { return fail(s); } } return success(i); } private: std::string lit_; }; class CharacterClass : public Ope { public: CharacterClass(const std::string& chars) : chars_(chars) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { // TODO: UTF8 support if (l < 1) { return fail(s); } auto ch = s[0]; auto i = 0u; while (i < chars_.size()) { if (i + 2 < chars_.size() && chars_[i + 1] == '-') { if (chars_[i] <= ch && ch <= chars_[i + 2]) { return success(1); } i += 3; } else { if (chars_[i] == ch) { return success(1); } i += 1; } } return fail(s); } private: std::string chars_; }; class Character : public Ope { public: Character(char ch) : ch_(ch) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { // TODO: UTF8 support if (l < 1 || s[0] != ch_) { return fail(s); } return success(1); } private: char ch_; }; class AnyCharacter : public Ope { public: Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { // TODO: UTF8 support if (l < 1) { return fail(s); } return success(1); } }; class Capture : public Ope { public: Capture(const std::shared_ptr& ope, MatchAction ma, size_t ci) : ope_(ope), match_action_(ma), capture_id(ci) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { assert(ope_); const auto& rule = *ope_; auto r = rule.parse(s, l, sv, dt); if (r.ret && match_action_) { match_action_(s, r.len, capture_id); } return r; } private: std::shared_ptr ope_; MatchAction match_action_; size_t capture_id; }; class Anchor : public Ope { public: Anchor(const std::shared_ptr& ope) : ope_(ope) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { assert(ope_); const auto& rule = *ope_; auto r = rule.parse(s, l, sv, dt); if (r.ret) { sv.s = s; sv.l = r.len; } return r; } private: std::shared_ptr ope_; }; typedef std::function Parser; class User : public Ope { public: User(Parser fn) : fn_(fn) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { assert(fn_); return fn_(s, l, sv, dt); } private: std::function fn_; }; class WeakHolder : public Ope { public: WeakHolder(const std::shared_ptr& ope) : weak_(ope) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { auto ope = weak_.lock(); assert(ope); const auto& rule = *ope; return rule.parse(s, l, sv, dt); } private: std::weak_ptr weak_; }; /* * Definition */ class Definition { public: Definition() : actions(1) , ignore(false) , holder_(std::make_shared(this)) {} Definition(const Definition& rhs) : name(rhs.name) , actions(1) , ignore(false) , holder_(rhs.holder_) { holder_->outer_ = this; } Definition(Definition&& rhs) : name(std::move(rhs.name)) , actions(1) , ignore(rhs.ignore) , holder_(std::move(rhs.holder_)) { holder_->outer_ = this; } Definition(const std::shared_ptr& ope) : actions(1) , ignore(false) , holder_(std::make_shared(this)) { holder_->ope_ = ope; } operator std::shared_ptr() { return std::make_shared(holder_); } Definition& operator<=(const std::shared_ptr& ope) { holder_->ope_ = ope; return *this; } Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { return holder_->parse(s, l, sv, dt); } template Result parse(const char* s, size_t l, T& val) const { SemanticValues sv; any dt; auto r = holder_->parse(s, l, sv, dt); if (r.ret && !sv.empty() && !sv.front().val.is_undefined()) { val = sv[0].val.get(); } return r; } template Result parse(const char* s, T& val) const { auto l = strlen(s); return parse(s, l, val); } Result parse(const char* s) const { auto l = strlen(s); SemanticValues sv; any dt; return holder_->parse(s, l, sv, dt); } Definition& operator=(Action ac) { assert(!actions.empty()); actions[0] = ac; return *this; } Definition& operator=(std::initializer_list acs) { actions = acs; return *this; } template Definition& operator,(T fn) { operator=(fn); return *this; } Definition& operator~() { ignore = true; return *this; } std::string name; std::vector actions; bool ignore; private: friend class DefinitionReference; class Holder : public Ope { public: Holder(Definition* outer) : outer_(outer) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { if (!ope_) { throw std::logic_error("Uninitialized definition ope was used..."); } const auto& rule = *ope_; SemanticValues chldsv; auto r = rule.parse(s, l, chldsv, dt); if (r.ret && !outer_->ignore) { assert(!outer_->actions.empty()); auto id = r.choice + 1; const auto& action = (id < outer_->actions.size() && outer_->actions[id]) ? outer_->actions[id] : outer_->actions[0]; if (!chldsv.s) { chldsv.s = s; chldsv.l = r.len; } auto val = reduce(chldsv, dt, action); sv.push_back(SemanticValue{ val, outer_->name, nullptr, 0 }); } return r; } private: friend class Definition; any reduce(const SemanticValues& sv, any& dt, const Action& action) const { if (action) { return action(sv, dt); } else if (sv.empty()) { return any(); } else { return sv.front().val; } } std::shared_ptr ope_; Definition* outer_; }; Definition& operator=(const Definition& rhs); Definition& operator=(Definition&& rhs); std::shared_ptr holder_; }; class DefinitionReference : public Ope { public: DefinitionReference( const std::map& grammar, const std::string& name) : grammar_(grammar) , name_(name) {} Result parse(const char* s, size_t l, SemanticValues& sv, any& dt) const { const auto& rule = *grammar_.at(name_).holder_; return rule.parse(s, l, sv, dt); } private: const std::map& grammar_; const std::string name_; }; typedef Definition rule; /* * Factories */ template std::shared_ptr seq(Args&& ...args) { return std::make_shared(static_cast>(args)...); } template std::shared_ptr cho(Args&& ...args) { return std::make_shared(static_cast>(args)...); } inline std::shared_ptr zom(const std::shared_ptr& ope) { return std::make_shared(ope); } inline std::shared_ptr oom(const std::shared_ptr& ope) { return std::make_shared(ope); } inline std::shared_ptr opt(const std::shared_ptr& ope) { return std::make_shared