// // peglib.h // // Copyright (c) 2015 Yuji Hirose. All rights reserved. // MIT License // #ifndef _CPPPEGLIB_PEGLIB_H_ #define _CPPPEGLIB_PEGLIB_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include // guard for older versions of VC++ #ifdef _MSC_VER // VS2013 has no constexpr #if (_MSC_VER == 1800) #define PEGLIB_NO_CONSTEXPR_SUPPORT #elif (_MSC_VER >= 1800) // good to go #else (_MSC_VER < 1800) #error "Requires C+11 support" #endif #endif namespace peg { extern void* enabler; /*----------------------------------------------------------------------------- * any *---------------------------------------------------------------------------*/ class any { public: any() : content_(nullptr) {} any(const any& rhs) : content_(rhs.clone()) {} any(any&& rhs) : content_(rhs.content_) { rhs.content_ = nullptr; } template any(const T& value) : content_(new holder(value)) {} any& operator=(const any& rhs) { if (this != &rhs) { if (content_) { delete content_; } content_ = rhs.clone(); } return *this; } any& operator=(any&& rhs) { if (this != &rhs) { if (content_) { delete content_; } content_ = rhs.content_; rhs.content_ = nullptr; } return *this; } ~any() { delete content_; } bool is_undefined() const { return content_ == nullptr; } template < typename T, typename std::enable_if::value>::type*& = enabler > T& get() { if (!content_) { throw std::bad_cast(); } auto p = dynamic_cast*>(content_); assert(p); if (!p) { throw std::bad_cast(); } return p->value_; } template < typename T, typename std::enable_if::value>::type*& = enabler > T& get() { return *this; } template < typename T, typename std::enable_if::value>::type*& = enabler > const T& get() const { assert(content_); auto p = dynamic_cast*>(content_); assert(p); if (!p) { throw std::bad_cast(); } return p->value_; } template < typename T, typename std::enable_if::value>::type*& = enabler > const any& get() const { return *this; } private: struct placeholder { virtual ~placeholder() {}; virtual placeholder* clone() const = 0; }; template struct holder : placeholder { holder(const T& value) : value_(value) {} placeholder* clone() const override { return new holder(value_); } T value_; }; placeholder* clone() const { return content_ ? content_->clone() : nullptr; } placeholder* content_; }; /*----------------------------------------------------------------------------- * scope_exit *---------------------------------------------------------------------------*/ // This is based on "http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4189". template struct scope_exit { explicit scope_exit(EF&& f) : exit_function(std::move(f)) , execute_on_destruction{true} {} scope_exit(scope_exit&& rhs) : exit_function(std::move(rhs.exit_function)) , execute_on_destruction{rhs.execute_on_destruction} { rhs.release(); } ~scope_exit() { if (execute_on_destruction) { this->exit_function(); } } void release() { this->execute_on_destruction = false; } private: scope_exit(const scope_exit&) = delete; void operator=(const scope_exit&) = delete; scope_exit& operator=(scope_exit&&) = delete; EF exit_function; bool execute_on_destruction; }; template auto make_scope_exit(EF&& exit_function) -> scope_exit { return scope_exit>(std::forward(exit_function)); } /*----------------------------------------------------------------------------- * PEG *---------------------------------------------------------------------------*/ /* * Semantic values */ struct SemanticValue { any val; const char* s; size_t n; SemanticValue() : s(nullptr), n(0) {} SemanticValue(const any& val, const char* s, size_t n) : val(val), s(s), n(n) {} template T& get() { return val.get(); } template const T& get() const { return val.get(); } std::string str() const { return std::string(s, n); } }; struct SemanticValues : protected std::vector { const char* path; const char* ss; const char* s; size_t n; size_t choice; SemanticValues() : s(nullptr), n(0), choice(0) {} typedef SemanticValue T; using std::vector::iterator; using std::vector::const_iterator; using std::vector::size; using std::vector::empty; using std::vector::assign; using std::vector::begin; using std::vector::end; using std::vector::rbegin; using std::vector::rend; using std::vector::operator[]; using std::vector::at; using std::vector::resize; using std::vector::front; using std::vector::back; using std::vector::push_back; using std::vector::pop_back; using std::vector::insert; using std::vector::erase; using std::vector::clear; using std::vector::swap; using std::vector::emplace; using std::vector::emplace_back; std::string str() const { return std::string(s, n); } template auto transform(size_t beg = 0, size_t end = -1) const -> vector { return this->transform(beg, end, [](const SemanticValue& v) { return v.get(); }); } private: template auto transform(F f) const -> vector::type> { vector::type> r; for (const auto& v: *this) { r.emplace_back(f(v)); } return r; } template auto transform(size_t beg, size_t end, F f) const -> vector::type> { vector::type> r; end = (std::min)(end, size()); for (size_t i = beg; i < end; i++) { r.emplace_back(f((*this)[i])); } return r; } }; /* * Semantic action */ template < typename R, typename F, typename std::enable_if::value>::type*& = enabler, typename... Args> any call(F fn, Args&&... args) { fn(std::forward(args)...); return any(); } template < typename R, typename F, typename std::enable_if::type, any>::value>::type*& = enabler, typename... Args> any call(F fn, Args&&... args) { return fn(std::forward(args)...); } template < typename R, typename F, typename std::enable_if::type, SemanticValue>::value>::type*& = enabler, typename... Args> any call(F fn, Args&&... args) { return fn(std::forward(args)...).val; } template < typename R, typename F, typename std::enable_if< !std::is_void::value && !std::is_same::type, any>::value && !std::is_same::type, SemanticValue>::value>::type*& = enabler, typename... Args> any call(F fn, Args&&... args) { return any(fn(std::forward(args)...)); } class Action { public: Action() = default; Action(const Action& rhs) : fn_(rhs.fn_) {} template ::value && !std::is_same::value>::type*& = enabler> Action(F fn) : fn_(make_adaptor(fn, &F::operator())) {} template ::value>::type*& = enabler> Action(F fn) : fn_(make_adaptor(fn, fn)) {} template ::value>::type*& = enabler> Action(F fn) {} template ::value && !std::is_same::value>::type*& = enabler> void operator=(F fn) { fn_ = make_adaptor(fn, &F::operator()); } template ::value>::type*& = enabler> void operator=(F fn) { fn_ = make_adaptor(fn, fn); } template ::value>::type*& = enabler> void operator=(F fn) {} operator bool() const { return (bool)fn_; } any operator()(const SemanticValues& sv, any& dt) const { return fn_(sv, dt); } private: template struct TypeAdaptor { TypeAdaptor(std::function fn) : fn_(fn) {} any operator()(const SemanticValues& sv, any& dt) { return call(fn_, sv); } std::function fn_; }; template struct TypeAdaptor_c { TypeAdaptor_c(std::function fn) : fn_(fn) {} any operator()(const SemanticValues& sv, any& dt) { return call(fn_, sv, dt); } std::function fn_; }; typedef std::function Fty; template Fty make_adaptor(F fn, R (F::*mf)(const SemanticValues& sv) const) { return TypeAdaptor(fn); } template Fty make_adaptor(F fn, R (F::*mf)(const SemanticValues& sv)) { return TypeAdaptor(fn); } template Fty make_adaptor(F fn, R (*mf)(const SemanticValues& sv)) { return TypeAdaptor(fn); } template Fty make_adaptor(F fn, R (F::*mf)(const SemanticValues& sv, any& dt) const) { return TypeAdaptor_c(fn); } template Fty make_adaptor(F fn, R (F::*mf)(const SemanticValues& sv, any& dt)) { return TypeAdaptor_c(fn); } template Fty make_adaptor(F fn, R(*mf)(const SemanticValues& sv, any& dt)) { return TypeAdaptor_c(fn); } Fty fn_; }; /* * Semantic predicate */ // Note: 'parse_error' exception class should be be used in sematic action handlers to reject the rule. struct parse_error { parse_error() = default; parse_error(const char* s) : s_(s) {} const char* what() const { return s_.empty() ? nullptr : s_.c_str(); } private: std::string s_; }; /* * Match action */ typedef std::function MatchAction; /* * Result */ inline bool success(size_t len) { return len != -1; } inline bool fail(size_t len) { return len == -1; } /* * Context */ class Ope; class Context; class Definition; typedef std::function Tracer; class Context { public: const char* path; const char* s; const size_t l; const char* error_pos; const char* message_pos; std::string message; // TODO: should be `int`. std::vector> value_stack; size_t value_stack_size; size_t nest_level; std::vector definition_stack; std::shared_ptr whitespaceOpe; bool in_whiltespace; bool in_token; const size_t def_count; const bool enablePackratParsing; std::vector cache_register; std::vector cache_success; std::map, std::tuple> cache_result; std::function tracer; Context( const char* path, const char* s, size_t l, size_t def_count, std::shared_ptr whitespaceOpe, bool enablePackratParsing, Tracer tracer) : path(path) , s(s) , l(l) , error_pos(nullptr) , message_pos(nullptr) , value_stack_size(0) , nest_level(0) , whitespaceOpe(whitespaceOpe) , in_whiltespace(false) , in_token(false) , def_count(def_count) , enablePackratParsing(enablePackratParsing) , cache_register(enablePackratParsing ? def_count * (l + 1) : 0) , cache_success(enablePackratParsing ? def_count * (l + 1) : 0) , tracer(tracer) { } template void packrat(const char* s, size_t def_id, size_t& len, any& val, T fn) { if (!enablePackratParsing) { fn(val); return; } auto col = s - this->s; auto has_cache = cache_register[def_count * col + def_id]; if (has_cache) { if (cache_success[def_count * col + def_id]) { const auto& key = std::make_pair(s - this->s, def_id); std::tie(len, val) = cache_result[key]; return; } else { len = -1; return; } } else { fn(val); cache_register[def_count * col + def_id] = true; cache_success[def_count * col + def_id] = success(len); if (success(len)) { const auto& key = std::make_pair(s - this->s, def_id); cache_result[key] = std::make_pair(len, val); } return; } } SemanticValues& push() { assert(value_stack_size <= value_stack.size()); if (value_stack_size == value_stack.size()) { value_stack.emplace_back(std::make_shared()); } auto& sv = *value_stack[value_stack_size++]; if (!sv.empty()) { sv.clear(); } sv.path = path; sv.ss = s; sv.s = nullptr; sv.n = 0; return sv; } void pop() { value_stack_size--; } void set_error_pos(const char* s) { if (error_pos < s) error_pos = s; } void trace(const char* name, const char* s, size_t n, SemanticValues& sv, any& dt) const { if (tracer) tracer(name, s, n, sv, *this, dt); } }; /* * Parser operators */ class Ope { public: struct Visitor; virtual ~Ope() {}; virtual size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const = 0; virtual void accept(Visitor& v) = 0; }; class Sequence : public Ope { public: Sequence(const Sequence& rhs) : opes_(rhs.opes_) {} #if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 // NOTE: Compiler Error C2797 on Visual Studio 2013 // "The C++ compiler in Visual Studio does not implement list // initialization inside either a member initializer list or a non-static // data member initializer. Before Visual Studio 2013 Update 3, this was // silently converted to a function call, which could lead to bad code // generation. Visual Studio 2013 Update 3 reports this as an error." template Sequence(const Args& ...args) { opes_ = std::vector>{ static_cast>(args)... }; } #else template Sequence(const Args& ...args) : opes_{ static_cast>(args)... } {} #endif Sequence(const std::vector>& opes) : opes_(opes) {} Sequence(std::vector>&& opes) : opes_(opes) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { c.trace("Sequence", s, n, sv, dt); size_t i = 0; for (const auto& ope : opes_) { c.nest_level++; auto se = make_scope_exit([&]() { c.nest_level--; }); const auto& rule = *ope; auto len = rule.parse(s + i, n - i, sv, c, dt); if (fail(len)) { return -1; } i += len; } return i; } void accept(Visitor& v) override; std::vector> opes_; }; class PrioritizedChoice : public Ope { public: #if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 // NOTE: Compiler Error C2797 on Visual Studio 2013 // "The C++ compiler in Visual Studio does not implement list // initialization inside either a member initializer list or a non-static // data member initializer. Before Visual Studio 2013 Update 3, this was // silently converted to a function call, which could lead to bad code // generation. Visual Studio 2013 Update 3 reports this as an error." template PrioritizedChoice(const Args& ...args) { opes_ = std::vector>{ static_cast>(args)... }; } #else template PrioritizedChoice(const Args& ...args) : opes_{ static_cast>(args)... } {} #endif PrioritizedChoice(const std::vector>& opes) : opes_(opes) {} PrioritizedChoice(std::vector>&& opes) : opes_(opes) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { c.trace("PrioritizedChoice", s, n, sv, dt); size_t id = 0; for (const auto& ope : opes_) { c.nest_level++; auto& chldsv = c.push(); auto se = make_scope_exit([&]() { c.nest_level--; c.pop(); }); const auto& rule = *ope; auto len = rule.parse(s, n, chldsv, c, dt); if (success(len)) { if (!chldsv.empty()) { sv.insert(sv.end(), chldsv.begin(), chldsv.end()); } sv.s = chldsv.s; sv.n = chldsv.n; sv.choice = id; return len; } id++; } return -1; } void accept(Visitor& v) override; size_t size() const { return opes_.size(); } std::vector> opes_; }; class ZeroOrMore : public Ope { public: ZeroOrMore(const std::shared_ptr& ope) : ope_(ope) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { c.trace("ZeroOrMore", s, n, sv, dt); auto save_error_pos = c.error_pos; size_t i = 0; while (n - i > 0) { c.nest_level++; auto se = make_scope_exit([&]() { c.nest_level--; }); auto save_sv_size = sv.size(); const auto& rule = *ope_; auto len = rule.parse(s + i, n - i, sv, c, dt); if (fail(len)) { if (sv.size() != save_sv_size) { sv.erase(sv.begin() + save_sv_size); } c.error_pos = save_error_pos; break; } i += len; } return i; } void accept(Visitor& v) override; std::shared_ptr ope_; }; class OneOrMore : public Ope { public: OneOrMore(const std::shared_ptr& ope) : ope_(ope) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { c.trace("OneOrMore", s, n, sv, dt); auto len = 0; { c.nest_level++; auto se = make_scope_exit([&]() { c.nest_level--; }); const auto& rule = *ope_; len = rule.parse(s, n, sv, c, dt); if (fail(len)) { return -1; } } auto save_error_pos = c.error_pos; auto i = len; while (n - i > 0) { c.nest_level++; auto se = make_scope_exit([&]() { c.nest_level--; }); auto save_sv_size = sv.size(); const auto& rule = *ope_; auto len = rule.parse(s + i, n - i, sv, c, dt); if (fail(len)) { if (sv.size() != save_sv_size) { sv.erase(sv.begin() + save_sv_size); } c.error_pos = save_error_pos; break; } i += len; } return i; } void accept(Visitor& v) override; std::shared_ptr ope_; }; class Option : public Ope { public: Option(const std::shared_ptr& ope) : ope_(ope) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { c.trace("Option", s, n, sv, dt); auto save_error_pos = c.error_pos; c.nest_level++; auto save_sv_size = sv.size(); auto se = make_scope_exit([&]() { c.nest_level--; }); const auto& rule = *ope_; auto len = rule.parse(s, n, sv, c, dt); if (success(len)) { return len; } else { if (sv.size() != save_sv_size) { sv.erase(sv.begin() + save_sv_size); } c.error_pos = save_error_pos; return 0; } } void accept(Visitor& v) override; std::shared_ptr ope_; }; class AndPredicate : public Ope { public: AndPredicate(const std::shared_ptr& ope) : ope_(ope) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { c.trace("AndPredicate", s, n, sv, dt); c.nest_level++; auto& chldsv = c.push(); auto se = make_scope_exit([&]() { c.nest_level--; c.pop(); }); const auto& rule = *ope_; auto len = rule.parse(s, n, chldsv, c, dt); if (success(len)) { return 0; } else { return -1; } } void accept(Visitor& v) override; std::shared_ptr ope_; }; class NotPredicate : public Ope { public: NotPredicate(const std::shared_ptr& ope) : ope_(ope) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { c.trace("NotPredicate", s, n, sv, dt); auto save_error_pos = c.error_pos; c.nest_level++; auto& chldsv = c.push(); auto se = make_scope_exit([&]() { c.nest_level--; c.pop(); }); const auto& rule = *ope_; auto len = rule.parse(s, n, chldsv, c, dt); if (success(len)) { c.set_error_pos(s); return -1; } else { c.error_pos = save_error_pos; return 0; } } void accept(Visitor& v) override; std::shared_ptr ope_; }; class LiteralString : public Ope { public: LiteralString(const std::string& s) : lit_(s) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override; void accept(Visitor& v) override; std::string lit_; }; class CharacterClass : public Ope { public: CharacterClass(const std::string& chars) : chars_(chars) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { c.trace("CharacterClass", s, n, sv, dt); // TODO: UTF8 support if (n < 1) { c.set_error_pos(s); return -1; } auto ch = s[0]; auto i = 0u; while (i < chars_.size()) { if (i + 2 < chars_.size() && chars_[i + 1] == '-') { if (chars_[i] <= ch && ch <= chars_[i + 2]) { return 1; } i += 3; } else { if (chars_[i] == ch) { return 1; } i += 1; } } c.set_error_pos(s); return -1; } void accept(Visitor& v) override; std::string chars_; }; class Character : public Ope { public: Character(char ch) : ch_(ch) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { c.trace("Character", s, n, sv, dt); // TODO: UTF8 support if (n < 1 || s[0] != ch_) { c.set_error_pos(s); return -1; } return 1; } void accept(Visitor& v) override; char ch_; }; class AnyCharacter : public Ope { public: size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { c.trace("AnyCharacter", s, n, sv, dt); // TODO: UTF8 support if (n < 1) { c.set_error_pos(s); return -1; } return 1; } void accept(Visitor& v) override; }; class Capture : public Ope { public: Capture(const std::shared_ptr& ope, MatchAction ma, size_t id, const std::string& name) : ope_(ope), match_action_(ma), id_(id), name_(name) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { const auto& rule = *ope_; auto len = rule.parse(s, n, sv, c, dt); if (success(len) && match_action_) { match_action_(s, len, id_, name_); } return len; } void accept(Visitor& v) override; std::shared_ptr ope_; private: MatchAction match_action_; size_t id_; std::string name_; }; class TokenBoundary : public Ope { public: TokenBoundary(const std::shared_ptr& ope) : ope_(ope) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { const auto& rule = *ope_; auto len = rule.parse(s, n, sv, c, dt); if (success(len)) { sv.s = s; sv.n = len; } return len; } void accept(Visitor& v) override; std::shared_ptr ope_; }; class Ignore : public Ope { public: Ignore(const std::shared_ptr& ope) : ope_(ope) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { const auto& rule = *ope_; auto& chldsv = c.push(); auto se = make_scope_exit([&]() { c.pop(); }); return rule.parse(s, n, chldsv, c, dt); } void accept(Visitor& v) override; std::shared_ptr ope_; }; typedef std::function Parser; class User : public Ope { public: User(Parser fn) : fn_(fn) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { c.trace("User", s, n, sv, dt); assert(fn_); return fn_(s, n, sv, dt); } void accept(Visitor& v) override; std::function fn_; }; class WeakHolder : public Ope { public: WeakHolder(const std::shared_ptr& ope) : weak_(ope) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { auto ope = weak_.lock(); assert(ope); const auto& rule = *ope; return rule.parse(s, n, sv, c, dt); } void accept(Visitor& v) override; std::weak_ptr weak_; }; class Holder : public Ope { public: Holder(Definition* outer) : outer_(outer) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override; void accept(Visitor& v) override; any reduce(const SemanticValues& sv, any& dt) const; std::shared_ptr ope_; Definition* outer_; friend class Definition; }; class DefinitionReference : public Ope { public: DefinitionReference( const std::unordered_map& grammar, const std::string& name, const char* s) : grammar_(grammar) , name_(name) , s_(s) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override; void accept(Visitor& v) override; std::shared_ptr get_rule() const; const std::unordered_map& grammar_; const std::string name_; const char* s_; private: mutable std::once_flag init_; mutable std::shared_ptr rule_; }; class Whitespace : public Ope { public: Whitespace(const std::shared_ptr& ope) : ope_(ope) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { if (c.in_whiltespace) { return 0; } c.in_whiltespace = true; auto se = make_scope_exit([&]() { c.in_whiltespace = false; }); const auto& rule = *ope_; return rule.parse(s, n, sv, c, dt); } void accept(Visitor& v) override; std::shared_ptr ope_; }; /* * Visitor */ struct Ope::Visitor { virtual void visit(Sequence& ope) {} virtual void visit(PrioritizedChoice& ope) {} virtual void visit(ZeroOrMore& ope) {} virtual void visit(OneOrMore& ope) {} virtual void visit(Option& ope) {} virtual void visit(AndPredicate& ope) {} virtual void visit(NotPredicate& ope) {} virtual void visit(LiteralString& ope) {} virtual void visit(CharacterClass& ope) {} virtual void visit(Character& ope) {} virtual void visit(AnyCharacter& ope) {} virtual void visit(Capture& ope) {} virtual void visit(TokenBoundary& ope) {} virtual void visit(Ignore& ope) {} virtual void visit(User& ope) {} virtual void visit(WeakHolder& ope) {} virtual void visit(Holder& ope) {} virtual void visit(DefinitionReference& ope) {} virtual void visit(Whitespace& ope) {} }; struct AssignIDToDefinition : public Ope::Visitor { void visit(Sequence& ope) override { for (auto op: ope.opes_) { op->accept(*this); } } void visit(PrioritizedChoice& ope) override { for (auto op: ope.opes_) { op->accept(*this); } } void visit(ZeroOrMore& ope) override { ope.ope_->accept(*this); } void visit(OneOrMore& ope) override { ope.ope_->accept(*this); } void visit(Option& ope) override { ope.ope_->accept(*this); } void visit(AndPredicate& ope) override { ope.ope_->accept(*this); } void visit(NotPredicate& ope) override { ope.ope_->accept(*this); } void visit(Capture& ope) override { ope.ope_->accept(*this); } void visit(TokenBoundary& ope) override { ope.ope_->accept(*this); } void visit(Ignore& ope) override { ope.ope_->accept(*this); } void visit(WeakHolder& ope) override { ope.weak_.lock()->accept(*this); } void visit(Holder& ope) override; void visit(DefinitionReference& ope) override { ope.get_rule()->accept(*this); } std::unordered_map ids; }; struct IsToken : public Ope::Visitor { IsToken() : has_token_boundary(false), has_rule(false) {} void visit(Sequence& ope) override { for (auto op: ope.opes_) { op->accept(*this); } } void visit(PrioritizedChoice& ope) override { for (auto op: ope.opes_) { op->accept(*this); } } void visit(ZeroOrMore& ope) override { ope.ope_->accept(*this); } void visit(OneOrMore& ope) override { ope.ope_->accept(*this); } void visit(Option& ope) override { ope.ope_->accept(*this); } void visit(Capture& ope) override { ope.ope_->accept(*this); } void visit(TokenBoundary& ope) override { has_token_boundary = true; } void visit(Ignore& ope) override { ope.ope_->accept(*this); } void visit(WeakHolder& ope) override { ope.weak_.lock()->accept(*this); } void visit(DefinitionReference& ope) override { has_rule = true; } bool is_token() const { return has_token_boundary || !has_rule; } bool has_token_boundary; bool has_rule; }; static const char* WHITESPACE_DEFINITION_NAME = "%whitespace"; /* * Definition */ class Definition { public: struct Result { bool ret; size_t len; const char* error_pos; const char* message_pos; const std::string message; }; Definition() : ignoreSemanticValue(false) , enablePackratParsing(false) , is_token(false) , has_token_boundary(false) , holder_(std::make_shared(this)) {} Definition(const Definition& rhs) : name(rhs.name) , ignoreSemanticValue(false) , enablePackratParsing(false) , is_token(false) , has_token_boundary(false) , holder_(rhs.holder_) { holder_->outer_ = this; } Definition(Definition&& rhs) : name(std::move(rhs.name)) , ignoreSemanticValue(rhs.ignoreSemanticValue) , whitespaceOpe(rhs.whitespaceOpe) , enablePackratParsing(rhs.enablePackratParsing) , is_token(rhs.is_token) , has_token_boundary(rhs.has_token_boundary) , holder_(std::move(rhs.holder_)) { holder_->outer_ = this; } Definition(const std::shared_ptr& ope) : ignoreSemanticValue(false) , enablePackratParsing(false) , is_token(false) , has_token_boundary(false) , holder_(std::make_shared(this)) { *this <= ope; } operator std::shared_ptr() { return std::make_shared(holder_); } Definition& operator<=(const std::shared_ptr& ope) { IsToken isToken; ope->accept(isToken); is_token = isToken.is_token(); has_token_boundary = isToken.has_token_boundary; holder_->ope_ = ope; return *this; } Result parse(const char* s, size_t n, const char* path = nullptr) const { SemanticValues sv; any dt; return parse_core(s, n, sv, dt, path); } Result parse(const char* s, const char* path = nullptr) const { auto n = strlen(s); return parse(s, n, path); } Result parse(const char* s, size_t n, any& dt, const char* path = nullptr) const { SemanticValues sv; return parse_core(s, n, sv, dt, path); } Result parse(const char* s, any& dt, const char* path = nullptr) const { auto n = strlen(s); return parse(s, n, dt, path); } template Result parse_and_get_value(const char* s, size_t n, T& val, const char* path = nullptr) const { SemanticValues sv; any dt; auto r = parse_core(s, n, sv, dt, path); if (r.ret && !sv.empty() && !sv.front().val.is_undefined()) { val = sv[0].val.get(); } return r; } template Result parse_and_get_value(const char* s, T& val, const char* path = nullptr) const { auto n = strlen(s); return parse_and_get_value(s, n, val, path); } template Result parse_and_get_value(const char* s, size_t n, any& dt, T& val, const char* path = nullptr) const { SemanticValues sv; auto r = parse_core(s, n, sv, dt, path); if (r.ret && !sv.empty() && !sv.front().val.is_undefined()) { val = sv[0].val.get(); } return r; } template Result parse_and_get_value(const char* s, any& dt, T& val, const char* path = nullptr) const { auto n = strlen(s); return parse_and_get_value(s, n, dt, val, path); } Definition& operator=(Action a) { action = a; return *this; } template Definition& operator,(T fn) { operator=(fn); return *this; } Definition& operator~() { ignoreSemanticValue = true; return *this; } void accept(Ope::Visitor& v) { holder_->accept(v); } std::shared_ptr get_core_operator() { return holder_->ope_; } std::string name; size_t id; Action action; std::function enter; std::function exit; std::function error_message; bool ignoreSemanticValue; std::shared_ptr whitespaceOpe; bool enablePackratParsing; bool is_token; bool has_token_boundary; Tracer tracer; private: friend class DefinitionReference; Definition& operator=(const Definition& rhs); Definition& operator=(Definition&& rhs); Result parse_core(const char* s, size_t n, SemanticValues& sv, any& dt, const char* path) const { AssignIDToDefinition assignId; holder_->accept(assignId); Context cxt(path, s, n, assignId.ids.size(), whitespaceOpe, enablePackratParsing, tracer); auto len = holder_->parse(s, n, sv, cxt, dt); return Result{ success(len), len, cxt.error_pos, cxt.message_pos, cxt.message }; } std::shared_ptr holder_; }; /* * Implementations */ inline size_t LiteralString::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const { c.trace("LiteralString", s, n, sv, dt); auto i = 0u; for (; i < lit_.size(); i++) { if (i >= n || s[i] != lit_[i]) { c.set_error_pos(s); return -1; } } // Skip whiltespace const auto d = c.definition_stack.back(); if (!d->is_token && c.whitespaceOpe) { auto len = c.whitespaceOpe->parse(s + i, n - i, sv, c, dt); if (fail(len)) { return -1; } i += len; } return i; } inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const { if (!ope_) { throw std::logic_error("Uninitialized definition ope was used..."); } c.trace(outer_->name.c_str(), s, n, sv, dt); c.nest_level++; auto se = make_scope_exit([&]() { c.nest_level--; }); size_t len; any val; const char* token_boundary_s = s; size_t token_boundary_n = n; c.packrat(s, outer_->id, len, val, [&](any& val) { c.definition_stack.push_back(outer_); auto& chldsv = c.push(); if (outer_->enter) { outer_->enter(dt); } auto se = make_scope_exit([&]() { c.definition_stack.pop_back(); c.pop(); if (outer_->exit) { outer_->exit(dt); } }); auto ope = ope_; if (!c.in_token && c.whitespaceOpe) { if (c.definition_stack.size() == 1) { if (outer_->is_token && !outer_->has_token_boundary) { ope = std::make_shared(c.whitespaceOpe, std::make_shared(ope_)); } else { ope = std::make_shared(c.whitespaceOpe, ope_); } } else if (outer_->is_token) { if (!outer_->has_token_boundary) { ope = std::make_shared(std::make_shared(ope_), c.whitespaceOpe); } else { ope = std::make_shared(ope_, c.whitespaceOpe); } } } const auto& rule = *ope; if (!c.in_token && outer_->is_token) { c.in_token = true; auto se = make_scope_exit([&]() { c.in_token = false; }); len = rule.parse(s, n, chldsv, c, dt); } else { len = rule.parse(s, n, chldsv, c, dt); } token_boundary_n = len; // Invoke action if (success(len)) { if (chldsv.s) { token_boundary_s = chldsv.s; token_boundary_n = chldsv.n; } else { chldsv.s = s; chldsv.n = len; } try { val = reduce(chldsv, dt); } catch (const parse_error& e) { if (e.what()) { if (c.message_pos < s) { c.message_pos = s; c.message = e.what(); } } len = -1; } } }); if (success(len)) { if (!outer_->ignoreSemanticValue) { sv.emplace_back(val, token_boundary_s, token_boundary_n); } } else { if (outer_->error_message) { if (c.message_pos < s) { c.message_pos = s; c.message = outer_->error_message(); } } } return len; } inline any Holder::reduce(const SemanticValues& sv, any& dt) const { if (outer_->action) { return outer_->action(sv, dt); } else if (sv.empty()) { return any(); } else { return sv.front().val; } } inline size_t DefinitionReference::parse( const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const { const auto& rule = *get_rule(); return rule.parse(s, n, sv, c, dt); } inline std::shared_ptr DefinitionReference::get_rule() const { if (!rule_) { std::call_once(init_, [this]() { rule_ = grammar_.at(name_).holder_; }); } assert(rule_); return rule_; }; inline void Sequence::accept(Visitor& v) { v.visit(*this); } inline void PrioritizedChoice::accept(Visitor& v) { v.visit(*this); } inline void ZeroOrMore::accept(Visitor& v) { v.visit(*this); } inline void OneOrMore::accept(Visitor& v) { v.visit(*this); } inline void Option::accept(Visitor& v) { v.visit(*this); } inline void AndPredicate::accept(Visitor& v) { v.visit(*this); } inline void NotPredicate::accept(Visitor& v) { v.visit(*this); } inline void LiteralString::accept(Visitor& v) { v.visit(*this); } inline void CharacterClass::accept(Visitor& v) { v.visit(*this); } inline void Character::accept(Visitor& v) { v.visit(*this); } inline void AnyCharacter::accept(Visitor& v) { v.visit(*this); } inline void Capture::accept(Visitor& v) { v.visit(*this); } inline void TokenBoundary::accept(Visitor& v) { v.visit(*this); } inline void Ignore::accept(Visitor& v) { v.visit(*this); } inline void User::accept(Visitor& v) { v.visit(*this); } inline void WeakHolder::accept(Visitor& v) { v.visit(*this); } inline void Holder::accept(Visitor& v) { v.visit(*this); } inline void DefinitionReference::accept(Visitor& v) { v.visit(*this); } inline void Whitespace::accept(Visitor& v) { v.visit(*this); } inline void AssignIDToDefinition::visit(Holder& ope) { auto p = (void*)ope.outer_; if (ids.count(p)) { return; } auto id = ids.size(); ids[p] = id; ope.outer_->id = id; ope.ope_->accept(*this); } /* * Factories */ template std::shared_ptr seq(Args&& ...args) { return std::make_shared(static_cast>(args)...); } template std::shared_ptr cho(Args&& ...args) { return std::make_shared(static_cast>(args)...); } inline std::shared_ptr zom(const std::shared_ptr& ope) { return std::make_shared(ope); } inline std::shared_ptr oom(const std::shared_ptr& ope) { return std::make_shared(ope); } inline std::shared_ptr opt(const std::shared_ptr& ope) { return std::make_shared