// // peglib.h // // Copyright (c) 2015 Yuji Hirose. All rights reserved. // MIT License // #ifndef _CPPEXPATLIB_PEGLIB_H_ #define _CPPEXPATLIB_PEGLIB_H_ #include #include #include #include #include #include #include #include #include namespace peglib { void* enabler; /*----------------------------------------------------------------------------- * any *---------------------------------------------------------------------------*/ class any { public: any() : content_(nullptr) {} any(const any& rhs) : content_(rhs.clone()) {} any(any&& rhs) : content_(rhs.content_) { rhs.content_ = nullptr; } template any(const T& value) : content_(new holder(value)) {} any& operator=(const any& rhs) { if (this != &rhs) { if (content_) { delete content_; } content_ = rhs.clone(); } return *this; } any& operator=(any&& rhs) { if (this != &rhs) { if (content_) { delete content_; } content_ = rhs.content_; rhs.content_ = nullptr; } return *this; } template any& operator=(const T& value) { if (content_) { delete content_; } content_ = new holder(value); return *this; } ~any() { delete content_; } bool is_undefined() const { return content_ == nullptr; } template < typename T, typename std::enable_if::value>::type*& = enabler > T& get() { assert(content_); auto p = dynamic_cast*>(content_); assert(p); if (!p) { throw std::bad_cast(); } return p->value_; } template < typename T, typename std::enable_if::value>::type*& = enabler > T& get() { return *this; } template < typename T, typename std::enable_if::value>::type*& = enabler > const T& get() const { assert(content_); auto p = dynamic_cast*>(content_); assert(p); if (!p) { throw std::bad_cast(); } return p->value_; } template < typename T, typename std::enable_if::value>::type*& = enabler > const any& get() const { return *this; } operator bool() const { return get(); } operator char() const { return get(); } operator wchar_t() const { return get(); } operator unsigned char() const { return get(); } operator int() const { return get(); } operator unsigned int() const { return get(); } operator short() const { return get(); } operator unsigned short() const { return get(); } operator long() const { return get(); } operator unsigned long() const { return get(); } operator long long() const { return get(); } operator unsigned long long() const { return get(); } operator float() const { return get(); } operator double() const { return get(); } operator const std::string&() const { return get(); } #if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 #else operator char16_t() const { return get(); } operator char32_t() const { return get(); } #endif private: struct placeholder { virtual ~placeholder() {}; virtual placeholder* clone() const = 0; }; template struct holder : placeholder { holder(const T& value) : value_(value) {} placeholder* clone() const override { return new holder(value_); } T value_; }; placeholder* clone() const { return content_ ? content_->clone() : nullptr; } placeholder* content_; }; /*----------------------------------------------------------------------------- * PEG *---------------------------------------------------------------------------*/ /* * Semantic values */ struct Values { std::vector names; std::vector values; }; /* * Semantic action */ template < typename R, typename F, typename std::enable_if::value>::type*& = enabler, typename... Args> any call(F fn, Args&&... args) { return any(fn(std::forward(args)...)); } template < typename R, typename F, typename std::enable_if::value>::type*& = enabler, typename... Args> any call(F fn, Args&&... args) { fn(std::forward(args)...); return any(); } class Action { public: Action() = default; Action(const Action& rhs) : fn_(rhs.fn_) {} //Action(Action&& rhs) : fn_(std::move(rhs.fn_)) {} template ::value && !std::is_same::value>::type*& = enabler> Action(F fn) : fn_(make_adaptor(fn, &F::operator())) {} template ::value>::type*& = enabler> Action(F fn) : fn_(make_adaptor(fn, fn)) {} template ::value>::type*& = enabler> Action(F fn) {} template ::value && !std::is_same::value>::type*& = enabler> void operator=(F fn) { fn_ = make_adaptor(fn, &F::operator()); } template ::value>::type*& = enabler> void operator=(F fn) { fn_ = make_adaptor(fn, fn); } template ::value>::type*& = enabler> void operator=(F fn) {} operator bool() const { return (bool)fn_; } any operator()(const char* s, size_t l, const std::vector& v, const std::vector& n) const { return fn_(s, l, v, n); } private: template struct TypeAdaptor { TypeAdaptor(std::function& v, const std::vector& n)> fn) : fn_(fn) {} any operator()(const char* s, size_t l, const std::vector& v, const std::vector& n) { return call(fn_, s, l, v, n); } std::function& v, const std::vector& n)> fn_; }; template struct TypeAdaptor_s_l_v { TypeAdaptor_s_l_v(std::function& v)> fn) : fn_(fn) {} any operator()(const char* s, size_t l, const std::vector& v, const std::vector& n) { return call(fn_, s, l, v); } std::function& v)> fn_; }; template struct TypeAdaptor_s_l { TypeAdaptor_s_l(std::function fn) : fn_(fn) {} any operator()(const char* s, size_t l, const std::vector& v, const std::vector& n) { return call(fn_, s, l); } std::function fn_; }; template struct TypeAdaptor_v_n { TypeAdaptor_v_n(std::function& v, const std::vector& n)> fn) : fn_(fn) {} any operator()(const char* s, size_t l, const std::vector& v, const std::vector& n) { return call(fn_, v, n); } std::function& v, const std::vector& n)> fn_; }; template struct TypeAdaptor_v { TypeAdaptor_v(std::function& v)> fn) : fn_(fn) {} any operator()(const char* s, size_t l, const std::vector& v, const std::vector& n) { return call(fn_, v); } std::function& v)> fn_; }; template struct TypeAdaptor_empty { TypeAdaptor_empty(std::function fn) : fn_(fn) {} any operator()(const char* s, size_t l, const std::vector& v, const std::vector& n) { return call(fn_); } std::function fn_; }; typedef std::function& v, const std::vector& n)> Fty; template Fty make_adaptor(F fn, R (F::*mf)(const char*, size_t, const std::vector& v, const std::vector& n) const) { return TypeAdaptor(fn); } template Fty make_adaptor(F fn, R(*mf)(const char*, size_t, const std::vector& v, const std::vector& n)) { return TypeAdaptor(fn); } template Fty make_adaptor(F fn, R (F::*mf)(const char*, size_t, const std::vector& v) const) { return TypeAdaptor_s_l_v(fn); } template Fty make_adaptor(F fn, R(*mf)(const char*, size_t, const std::vector& v)) { return TypeAdaptor_s_l_v(fn); } template Fty make_adaptor(F fn, R (F::*mf)(const char*, size_t) const) { return TypeAdaptor_s_l(fn); } template Fty make_adaptor(F fn, R (*mf)(const char*, size_t)) { return TypeAdaptor_s_l(fn); } template Fty make_adaptor(F fn, R (F::*mf)(const std::vector& v, const std::vector& n) const) { return TypeAdaptor_v_n(fn); } template Fty make_adaptor(F fn, R (*mf)(const std::vector& v, const std::vector& n)) { return TypeAdaptor_v_n(fn); } template Fty make_adaptor(F fn, R (F::*mf)(const std::vector& v) const) { return TypeAdaptor_v(fn); } template Fty make_adaptor(F fn, R (*mf)(const std::vector& v)) { return TypeAdaptor_v(fn); } template Fty make_adaptor(F fn, R (F::*mf)() const) { return TypeAdaptor_empty(fn); } template Fty make_adaptor(F fn, R (*mf)()) { return TypeAdaptor_empty(fn); } Fty fn_; }; /* * Result */ struct Result { bool ret; size_t len; size_t choice; const char* ptr; const std::string err; // TODO: should be `int`. }; Result success(size_t len, size_t choice = 0) { return Result{ true, len, choice, nullptr, std::string() }; } Result fail(const char* ptr, std::string err = std::string(), std::string name = std::string()) { return Result{ false, 0, (size_t)-1, ptr, err }; } /* * Parser operators */ class Ope { public: virtual ~Ope() {}; virtual Result parse(const char* s, size_t l, Values& v) const = 0; }; class Sequence : public Ope { public: Sequence(const Sequence& rhs) : opes_(rhs.opes_) {} #if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 // NOTE: Compiler Error C2797 on Visual Studio 2013 // "The C++ compiler in Visual Studio does not implement list // initialization inside either a member initializer list or a non-static // data member initializer. Before Visual Studio 2013 Update 3, this was // silently converted to a function call, which could lead to bad code // generation. Visual Studio 2013 Update 3 reports this as an error." template Sequence(const Args& ...args) { opes_ = std::vector>{ static_cast>(args)... }; } #else template Sequence(const Args& ...args) : opes_{ static_cast>(args)... } {} #endif Sequence(const std::vector>& opes) : opes_(opes) {} Sequence(std::vector>&& opes) : opes_(std::move(opes)) {} Result parse(const char* s, size_t l, Values& v) const { size_t i = 0; for (const auto& ope : opes_) { const auto& rule = *ope; auto r = rule.parse(s + i, l - i, v); if (!r.ret) { auto err = r.err; if (err.empty()) { err = "missing an element in the 'sequence'"; } return fail(r.ptr, err); } i += r.len; } return success(i); } private: std::vector> opes_; }; class PrioritizedChoice : public Ope { public: #if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 // NOTE: Compiler Error C2797 on Visual Studio 2013 // "The C++ compiler in Visual Studio does not implement list // initialization inside either a member initializer list or a non-static // data member initializer. Before Visual Studio 2013 Update 3, this was // silently converted to a function call, which could lead to bad code // generation. Visual Studio 2013 Update 3 reports this as an error." template PrioritizedChoice(const Args& ...args) { opes_ = std::vector>{ static_cast>(args)... }; } #else template PrioritizedChoice(const Args& ...args) : opes_{ static_cast>(args)... } {} #endif PrioritizedChoice(const std::vector>& opes) : opes_(opes) {} PrioritizedChoice(std::vector>&& opes) : opes_(std::move(opes)) {} Result parse(const char* s, size_t l, Values& v) const { size_t id = 0; for (const auto& ope : opes_) { const auto& rule = *ope; Values chldsv; auto r = rule.parse(s, l, chldsv); if (r.ret) { if (!chldsv.values.empty()) { for (const auto& x: chldsv.values) { v.values.push_back(x); } for (const auto& x: chldsv.names) { v.names.push_back(x); } } return success(r.len, id); } id++; } return fail(s, "nothing was matched in the 'prioritized choice'"); } size_t size() const { return opes_.size(); } private: std::vector> opes_; }; class ZeroOrMore : public Ope { public: ZeroOrMore(const std::shared_ptr& ope) : ope_(ope) {} Result parse(const char* s, size_t l, Values& v) const { auto i = 0; while (l - i > 0) { const auto& rule = *ope_; auto r = rule.parse(s + i, l - i, v); if (!r.ret) { break; } i += r.len; } return success(i); } private: std::shared_ptr ope_; }; class OneOrMore : public Ope { public: OneOrMore(const std::shared_ptr& ope) : ope_(ope) {} Result parse(const char* s, size_t l, Values& v) const { const auto& rule = *ope_; auto r = rule.parse(s, l, v); if (!r.ret) { auto err = r.err; if (err.empty()) { err = "nothing occurred in the 'one-or-more'"; } return fail(r.ptr, r.err); } auto i = r.len; while (l - i > 0) { const auto& rule = *ope_; auto r = rule.parse(s + i, l - i, v); if (!r.ret) { break; } i += r.len; } return success(i); } private: std::shared_ptr ope_; }; class Option : public Ope { public: Option(const std::shared_ptr& ope) : ope_(ope) {} Result parse(const char* s, size_t l, Values& v) const { const auto& rule = *ope_; auto r = rule.parse(s, l, v); return success(r.ret ? r.len : 0); } private: std::shared_ptr ope_; }; class AndPredicate : public Ope { public: AndPredicate(const std::shared_ptr& ope) : ope_(ope) {} Result parse(const char* s, size_t l, Values& v) const { const auto& rule = *ope_; auto r = rule.parse(s, l, v); if (r.ret) { return success(0); } else { return fail(r.ptr, r.err); } } private: std::shared_ptr ope_; }; class NotPredicate : public Ope { public: NotPredicate(const std::shared_ptr& ope) : ope_(ope) {} Result parse(const char* s, size_t l, Values& v) const { const auto& rule = *ope_; auto r = rule.parse(s, l, v); if (r.ret) { return fail(s); } else { return success(0); } } private: std::shared_ptr ope_; }; class LiteralString : public Ope { public: LiteralString(const std::string& s) : lit_(s) {} Result parse(const char* s, size_t l, Values& v) const { auto i = 0u; for (; i < lit_.size(); i++) { if (i >= l || s[i] != lit_[i]) { return fail(s); } } return success(i); } private: std::string lit_; }; class CharacterClass : public Ope { public: CharacterClass(const std::string& chars) : chars_(chars) {} Result parse(const char* s, size_t l, Values& v) const { // TODO: UTF8 support if (l < 1) { return fail(s); } auto ch = s[0]; auto i = 0u; while (i < chars_.size()) { if (i + 2 < chars_.size() && chars_[i + 1] == '-') { if (chars_[i] <= ch && ch <= chars_[i + 2]) { return success(1); } i += 3; } else { if (chars_[i] == ch) { return success(1); } i += 1; } } return fail(s); } private: std::string chars_; }; class Character : public Ope { public: Character(char ch) : ch_(ch) {} Result parse(const char* s, size_t l, Values& v) const { // TODO: UTF8 support if (l < 1 || s[0] != ch_) { return fail(s); } return success(1); } private: char ch_; }; class AnyCharacter : public Ope { public: Result parse(const char* s, size_t l, Values& v) const { // TODO: UTF8 support if (l < 1) { return fail(s); } return success(1); } }; class Grouping : public Ope { public: Grouping(const std::shared_ptr& ope) : ope_(ope) {} Grouping(const std::shared_ptr& ope, std::function match) : ope_(ope), match_(match) {} Result parse(const char* s, size_t l, Values& v) const { assert(ope_); const auto& rule = *ope_; auto r = rule.parse(s, l, v); if (r.ret && match_) { match_(s, r.len); } return r; } private: std::shared_ptr ope_; std::function match_; }; class WeakHolder : public Ope { public: WeakHolder(const std::shared_ptr& ope) : weak_(ope) {} Result parse(const char* s, size_t l, Values& v) const { auto ope = weak_.lock(); assert(ope); const auto& rule = *ope; return rule.parse(s, l, v); } private: std::weak_ptr weak_; }; /* * Definition */ class Definition { public: Definition() : actions(1) , holder_(std::make_shared(this)) {} Definition(const Definition& rhs) : name(rhs.name) , actions(1) , holder_(rhs.holder_) { holder_->outer_ = this; } Definition(Definition&& rhs) : name(std::move(rhs.name)) , actions(1) , holder_(std::move(rhs.holder_)) { holder_->outer_ = this; } Definition(const std::shared_ptr& ope) : actions(1) , holder_(std::make_shared(this)) { holder_->ope_ = ope; } operator std::shared_ptr() { return std::make_shared(holder_); } Definition& operator<=(const std::shared_ptr& ope) { holder_->ope_ = ope; return *this; } template Result parse(const char* s, size_t l, T& val) const { Values v; auto r = holder_->parse(s, l, v); if (r.ret && !v.values.empty() && !v.values.front().is_undefined()) { val = v.values[0].get(); } return r; } template Result parse(const char* s, T& val) const { auto l = strlen(s); return parse(s, l, val); } Result parse(const char* s) const { auto l = strlen(s); Values v; return holder_->parse(s, l, v); } Definition& operator=(Action ac) { assert(!actions.empty()); actions[0] = ac; return *this; } Definition& operator=(std::initializer_list acs) { actions = acs; return *this; } template Definition& operator,(T fn) { operator=(fn); return *this; } std::string name; std::vector actions; private: friend class DefinitionReference; class Holder : public Ope { public: Holder(Definition* outer) : outer_(outer) {} Result parse(const char* s, size_t l, Values& v) const { if (!ope_) { throw std::logic_error("Uninitialized definition ope was used..."); } const auto& rule = *ope_; Values chldsv; auto r = rule.parse(s, l, chldsv); if (r.ret) { v.names.push_back(outer_->name); assert(!outer_->actions.empty()); auto id = r.choice + 1; const auto& ac = (id < outer_->actions.size() && outer_->actions[id]) ? outer_->actions[id] : outer_->actions[0]; v.values.push_back(reduce(s, r.len, chldsv, ac)); } return r; } private: friend class Definition; any reduce(const char* s, size_t l, const Values& v, const Action& action) const { if (action) { return action(s, l, v.values, v.names); } else if (v.values.empty()) { return any(); } else { return v.values.front(); } } std::shared_ptr ope_; Definition* outer_; }; Definition& operator=(const Definition& rhs); Definition& operator=(Definition&& rhs); std::shared_ptr holder_; }; class DefinitionReference : public Ope { public: DefinitionReference( const std::map& grammar, const std::string& name) : grammar_(grammar) , name_(name) {} Result parse(const char* s, size_t l, Values& v) const { const auto& rule = *grammar_.at(name_).holder_; return rule.parse(s, l, v); } private: const std::map& grammar_; const std::string name_; }; typedef Definition rule; /* * Factories */ template std::shared_ptr seq(Args&& ...args) { return std::make_shared(static_cast>(args)...); } template std::shared_ptr cho(Args&& ...args) { return std::make_shared(static_cast>(args)...); } inline std::shared_ptr zom(const std::shared_ptr& ope) { return std::make_shared(ope); } inline std::shared_ptr oom(const std::shared_ptr& ope) { return std::make_shared(ope); } inline std::shared_ptr opt(const std::shared_ptr& ope) { return std::make_shared