// // peglib.h // // Copyright (c) 2020 Yuji Hirose. All rights reserved. // MIT License // #ifndef CPPPEGLIB_PEGLIB_H #define CPPPEGLIB_PEGLIB_H #ifndef PEGLIB_USE_STD_ANY #ifdef _MSVC_LANG #define PEGLIB_USE_STD_ANY _MSVC_LANG >= 201703L #elif defined(__cplusplus) #define PEGLIB_USE_STD_ANY __cplusplus >= 201703L #endif #endif // PEGLIB_USE_STD_ANY #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if PEGLIB_USE_STD_ANY #include #endif // guard for older versions of VC++ #ifdef _MSC_VER #if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 #error "Requires complete C+11 support" #endif #endif namespace peg { /*----------------------------------------------------------------------------- * any *---------------------------------------------------------------------------*/ #if PEGLIB_USE_STD_ANY using any = std::any; // Define a function alias to std::any_cast using perfect forwarding template auto any_cast(Args &&... args) -> decltype(std::any_cast(std::forward(args)...)) { return std::any_cast(std::forward(args)...); } #else class any { public: any() = default; any(const any &rhs) : content_(rhs.clone()) {} any(any &&rhs) : content_(rhs.content_) { rhs.content_ = nullptr; } template any(const T &value) : content_(new holder(value)) {} any &operator=(const any &rhs) { if (this != &rhs) { if (content_) { delete content_; } content_ = rhs.clone(); } return *this; } any &operator=(any &&rhs) { if (this != &rhs) { if (content_) { delete content_; } content_ = rhs.content_; rhs.content_ = nullptr; } return *this; } ~any() { delete content_; } bool has_value() const { return content_ != nullptr; } template friend T &any_cast(any &val); template friend const T &any_cast(const any &val); private: struct placeholder { virtual ~placeholder() {} virtual placeholder *clone() const = 0; }; template struct holder : placeholder { holder(const T &value) : value_(value) {} placeholder *clone() const override { return new holder(value_); } T value_; }; placeholder *clone() const { return content_ ? content_->clone() : nullptr; } placeholder *content_ = nullptr; }; template T &any_cast(any &val) { if (!val.content_) { throw std::bad_cast(); } auto p = dynamic_cast *>(val.content_); assert(p); if (!p) { throw std::bad_cast(); } return p->value_; } template <> inline any &any_cast(any &val) { return val; } template const T &any_cast(const any &val) { assert(val.content_); auto p = dynamic_cast *>(val.content_); assert(p); if (!p) { throw std::bad_cast(); } return p->value_; } template <> inline const any &any_cast(const any &val) { return val; } #endif /*----------------------------------------------------------------------------- * scope_exit *---------------------------------------------------------------------------*/ // This is based on // "http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4189". template struct scope_exit { explicit scope_exit(EF &&f) : exit_function(std::move(f)), execute_on_destruction{true} {} scope_exit(scope_exit &&rhs) : exit_function(std::move(rhs.exit_function)), execute_on_destruction{rhs.execute_on_destruction} { rhs.release(); } ~scope_exit() { if (execute_on_destruction) { this->exit_function(); } } void release() { this->execute_on_destruction = false; } private: scope_exit(const scope_exit &) = delete; void operator=(const scope_exit &) = delete; scope_exit &operator=(scope_exit &&) = delete; EF exit_function; bool execute_on_destruction; }; template auto make_scope_exit(EF &&exit_function) -> scope_exit { return scope_exit::type>( std::forward(exit_function)); } /*----------------------------------------------------------------------------- * UTF8 functions *---------------------------------------------------------------------------*/ inline size_t codepoint_length(const char *s8, size_t l) { if (l) { auto b = static_cast(s8[0]); if ((b & 0x80) == 0) { return 1; } else if ((b & 0xE0) == 0xC0) { return 2; } else if ((b & 0xF0) == 0xE0) { return 3; } else if ((b & 0xF8) == 0xF0) { return 4; } } return 0; } inline size_t encode_codepoint(char32_t cp, char *buff) { if (cp < 0x0080) { buff[0] = static_cast(cp & 0x7F); return 1; } else if (cp < 0x0800) { buff[0] = static_cast(0xC0 | ((cp >> 6) & 0x1F)); buff[1] = static_cast(0x80 | (cp & 0x3F)); return 2; } else if (cp < 0xD800) { buff[0] = static_cast(0xE0 | ((cp >> 12) & 0xF)); buff[1] = static_cast(0x80 | ((cp >> 6) & 0x3F)); buff[2] = static_cast(0x80 | (cp & 0x3F)); return 3; } else if (cp < 0xE000) { // D800 - DFFF is invalid... return 0; } else if (cp < 0x10000) { buff[0] = static_cast(0xE0 | ((cp >> 12) & 0xF)); buff[1] = static_cast(0x80 | ((cp >> 6) & 0x3F)); buff[2] = static_cast(0x80 | (cp & 0x3F)); return 3; } else if (cp < 0x110000) { buff[0] = static_cast(0xF0 | ((cp >> 18) & 0x7)); buff[1] = static_cast(0x80 | ((cp >> 12) & 0x3F)); buff[2] = static_cast(0x80 | ((cp >> 6) & 0x3F)); buff[3] = static_cast(0x80 | (cp & 0x3F)); return 4; } return 0; } inline std::string encode_codepoint(char32_t cp) { char buff[4]; auto l = encode_codepoint(cp, buff); return std::string(buff, l); } inline bool decode_codepoint(const char *s8, size_t l, size_t &bytes, char32_t &cp) { if (l) { auto b = static_cast(s8[0]); if ((b & 0x80) == 0) { bytes = 1; cp = b; return true; } else if ((b & 0xE0) == 0xC0) { if (l >= 2) { bytes = 2; cp = ((static_cast(s8[0] & 0x1F)) << 6) | (static_cast(s8[1] & 0x3F)); return true; } } else if ((b & 0xF0) == 0xE0) { if (l >= 3) { bytes = 3; cp = ((static_cast(s8[0] & 0x0F)) << 12) | ((static_cast(s8[1] & 0x3F)) << 6) | (static_cast(s8[2] & 0x3F)); return true; } } else if ((b & 0xF8) == 0xF0) { if (l >= 4) { bytes = 4; cp = ((static_cast(s8[0] & 0x07)) << 18) | ((static_cast(s8[1] & 0x3F)) << 12) | ((static_cast(s8[2] & 0x3F)) << 6) | (static_cast(s8[3] & 0x3F)); return true; } } } return false; } inline size_t decode_codepoint(const char *s8, size_t l, char32_t &out) { size_t bytes; if (decode_codepoint(s8, l, bytes, out)) { return bytes; } return 0; } inline char32_t decode_codepoint(const char *s8, size_t l) { char32_t out = 0; decode_codepoint(s8, l, out); return out; } inline std::u32string decode(const char *s8, size_t l) { std::u32string out; size_t i = 0; while (i < l) { auto beg = i++; while (i < l && (s8[i] & 0xc0) == 0x80) { i++; } out += decode_codepoint(&s8[beg], (i - beg)); } return out; } /*----------------------------------------------------------------------------- * resolve_escape_sequence *---------------------------------------------------------------------------*/ inline bool is_hex(char c, int &v) { if ('0' <= c && c <= '9') { v = c - '0'; return true; } else if ('a' <= c && c <= 'f') { v = c - 'a' + 10; return true; } else if ('A' <= c && c <= 'F') { v = c - 'A' + 10; return true; } return false; } inline bool is_digit(char c, int &v) { if ('0' <= c && c <= '9') { v = c - '0'; return true; } return false; } inline std::pair parse_hex_number(const char *s, size_t n, size_t i) { int ret = 0; int val; while (i < n && is_hex(s[i], val)) { ret = static_cast(ret * 16 + val); i++; } return std::make_pair(ret, i); } inline std::pair parse_octal_number(const char *s, size_t n, size_t i) { int ret = 0; int val; while (i < n && is_digit(s[i], val)) { ret = static_cast(ret * 8 + val); i++; } return std::make_pair(ret, i); } inline std::string resolve_escape_sequence(const char *s, size_t n) { std::string r; r.reserve(n); size_t i = 0; while (i < n) { auto ch = s[i]; if (ch == '\\') { i++; if (i == n) { throw std::runtime_error("Invalid escape sequence..."); } switch (s[i]) { case 'n': r += '\n'; i++; break; case 'r': r += '\r'; i++; break; case 't': r += '\t'; i++; break; case '\'': r += '\''; i++; break; case '"': r += '"'; i++; break; case '[': r += '['; i++; break; case ']': r += ']'; i++; break; case '\\': r += '\\'; i++; break; case 'x': case 'u': { char32_t cp; std::tie(cp, i) = parse_hex_number(s, n, i + 1); r += encode_codepoint(cp); break; } default: { char32_t cp; std::tie(cp, i) = parse_octal_number(s, n, i); r += encode_codepoint(cp); break; } } } else { r += ch; i++; } } return r; } /*----------------------------------------------------------------------------- * Trie *---------------------------------------------------------------------------*/ class Trie { public: Trie() = default; Trie(const Trie &) = default; Trie(const std::vector &items) { for (const auto &item : items) { for (size_t len = 1; len <= item.size(); len++) { auto last = len == item.size(); std::string s(item.c_str(), len); auto it = dic_.find(s); if (it == dic_.end()) { dic_.emplace(s, Info{last, last}); } else if (last) { it->second.match = true; } else { it->second.done = false; } } } } size_t match(const char *text, size_t text_len) const { size_t match_len = 0; { auto done = false; size_t len = 1; while (!done && len <= text_len) { std::string s(text, len); auto it = dic_.find(s); if (it == dic_.end()) { done = true; } else { if (it->second.match) { match_len = len; } if (it->second.done) { done = true; } } len += 1; } } return match_len; } private: struct Info { bool done; bool match; }; std::unordered_map dic_; }; /*----------------------------------------------------------------------------- * PEG *---------------------------------------------------------------------------*/ /* * Line information utility function */ inline std::pair line_info(const char *start, const char *cur) { auto p = start; auto col_ptr = p; auto no = 1; while (p < cur) { if (*p == '\n') { no++; col_ptr = p + 1; } p++; } auto col = p - col_ptr + 1; return std::make_pair(no, col); } /* * String tag */ inline constexpr unsigned int str2tag(const char *str, unsigned int h = 0) { return (*str == '\0') ? h : str2tag(str + 1, (h * 33) ^ static_cast(*str)); } namespace udl { inline constexpr unsigned int operator"" _(const char *s, size_t) { return str2tag(s); } } // namespace udl /* * Semantic values */ struct SemanticValues : protected std::vector { // Input text const char *path = nullptr; const char *ss = nullptr; const std::vector *source_line_index = nullptr; // Matched string const char *c_str() const { return s_; } size_t length() const { return n_; } std::string str() const { return std::string(s_, n_); } // Definition name const std::string &name() const { return name_; } std::vector tags; // Line number and column at which the matched string is std::pair line_info() const { const auto &idx = *source_line_index; auto cur = static_cast(std::distance(ss, s_)); auto it = std::lower_bound( idx.begin(), idx.end(), cur, [](size_t element, size_t value) { return element < value; }); auto id = static_cast(std::distance(idx.begin(), it)); auto off = cur - (id == 0 ? 0 : idx[id - 1] + 1); return std::make_pair(id + 1, off + 1); } // Choice count size_t choice_count() const { return choice_count_; } // Choice number (0 based index) size_t choice() const { return choice_; } // Tokens std::vector> tokens; std::string token(size_t id = 0) const { if (!tokens.empty()) { assert(id < tokens.size()); const auto &tok = tokens[id]; return std::string(tok.first, tok.second); } return std::string(s_, n_); } // Transform the semantic value vector to another vector template auto transform(size_t beg = 0, size_t end = static_cast(-1)) const -> vector { return this->transform(beg, end, [](const any &v) { return any_cast(v); }); } using std::vector::iterator; using std::vector::const_iterator; using std::vector::size; using std::vector::empty; using std::vector::assign; using std::vector::begin; using std::vector::end; using std::vector::rbegin; using std::vector::rend; using std::vector::operator[]; using std::vector::at; using std::vector::resize; using std::vector::front; using std::vector::back; using std::vector::push_back; using std::vector::pop_back; using std::vector::insert; using std::vector::erase; using std::vector::clear; using std::vector::swap; using std::vector::emplace; using std::vector::emplace_back; private: friend class Context; friend class Sequence; friend class PrioritizedChoice; friend class Holder; friend class PrecedenceClimbing; const char *s_ = nullptr; size_t n_ = 0; size_t choice_count_ = 0; size_t choice_ = 0; std::string name_; template auto transform(F f) const -> vector::type> { vector::type> r; for (const auto &v : *this) { r.emplace_back(f(v)); } return r; } template auto transform(size_t beg, size_t end, F f) const -> vector::type> { vector::type> r; end = (std::min)(end, size()); for (size_t i = beg; i < end; i++) { r.emplace_back(f((*this)[i])); } return r; } }; /* * Semantic action */ template ::value, std::nullptr_t>::type = nullptr, typename... Args> any call(F fn, Args &&... args) { fn(std::forward(args)...); return any(); } template ::type, any>::value, std::nullptr_t>::type = nullptr, typename... Args> any call(F fn, Args &&... args) { return fn(std::forward(args)...); } template ::value && !std::is_same::type, any>::value, std::nullptr_t>::type = nullptr, typename... Args> any call(F fn, Args &&... args) { return any(fn(std::forward(args)...)); } class Action { public: Action() = default; Action(const Action &rhs) = default; template ::value && !std::is_same::value, std::nullptr_t>::type = nullptr> Action(F fn) : fn_(make_adaptor(fn, &F::operator())) {} template ::value, std::nullptr_t>::type = nullptr> Action(F fn) : fn_(make_adaptor(fn, fn)) {} template ::value, std::nullptr_t>::type = nullptr> Action(F /*fn*/) {} template ::value && !std::is_same::value, std::nullptr_t>::type = nullptr> void operator=(F fn) { fn_ = make_adaptor(fn, &F::operator()); } template ::value, std::nullptr_t>::type = nullptr> void operator=(F fn) { fn_ = make_adaptor(fn, fn); } template ::value, std::nullptr_t>::type = nullptr> void operator=(F /*fn*/) {} Action &operator=(const Action &rhs) = default; operator bool() const { return bool(fn_); } any operator()(SemanticValues &sv, any &dt) const { return fn_(sv, dt); } private: template struct TypeAdaptor_sv { TypeAdaptor_sv(std::function fn) : fn_(fn) {} any operator()(SemanticValues &sv, any & /*dt*/) { return call(fn_, sv); } std::function fn_; }; template struct TypeAdaptor_csv { TypeAdaptor_csv(std::function fn) : fn_(fn) {} any operator()(SemanticValues &sv, any & /*dt*/) { return call(fn_, sv); } std::function fn_; }; template struct TypeAdaptor_sv_dt { TypeAdaptor_sv_dt(std::function fn) : fn_(fn) {} any operator()(SemanticValues &sv, any &dt) { return call(fn_, sv, dt); } std::function fn_; }; template struct TypeAdaptor_csv_dt { TypeAdaptor_csv_dt(std::function fn) : fn_(fn) {} any operator()(SemanticValues &sv, any &dt) { return call(fn_, sv, dt); } std::function fn_; }; typedef std::function Fty; template Fty make_adaptor(F fn, R (F::*)(SemanticValues &sv) const) { return TypeAdaptor_sv(fn); } template Fty make_adaptor(F fn, R (F::*)(const SemanticValues &sv) const) { return TypeAdaptor_csv(fn); } template Fty make_adaptor(F fn, R (F::*)(SemanticValues &sv)) { return TypeAdaptor_sv(fn); } template Fty make_adaptor(F fn, R (F::*)(const SemanticValues &sv)) { return TypeAdaptor_csv(fn); } template Fty make_adaptor(F fn, R (*)(SemanticValues &sv)) { return TypeAdaptor_sv(fn); } template Fty make_adaptor(F fn, R (*)(const SemanticValues &sv)) { return TypeAdaptor_csv(fn); } template Fty make_adaptor(F fn, R (F::*)(SemanticValues &sv, any &dt) const) { return TypeAdaptor_sv_dt(fn); } template Fty make_adaptor(F fn, R (F::*)(const SemanticValues &sv, any &dt) const) { return TypeAdaptor_csv_dt(fn); } template Fty make_adaptor(F fn, R (F::*)(SemanticValues &sv, any &dt)) { return TypeAdaptor_sv_dt(fn); } template Fty make_adaptor(F fn, R (F::*)(const SemanticValues &sv, any &dt)) { return TypeAdaptor_csv_dt(fn); } template Fty make_adaptor(F fn, R (*)(SemanticValues &sv, any &dt)) { return TypeAdaptor_sv_dt(fn); } template Fty make_adaptor(F fn, R (*)(const SemanticValues &sv, any &dt)) { return TypeAdaptor_csv_dt(fn); } Fty fn_; }; /* * Semantic predicate */ // Note: 'parse_error' exception class should be be used in sematic action // handlers to reject the rule. struct parse_error { parse_error() = default; parse_error(const char *s) : s_(s) {} const char *what() const { return s_.empty() ? nullptr : s_.c_str(); } private: std::string s_; }; /* * Result */ inline bool success(size_t len) { return len != static_cast(-1); } inline bool fail(size_t len) { return len == static_cast(-1); } /* * Context */ class Context; class Ope; class Definition; typedef std::function TracerEnter; typedef std::function TracerLeave; class Context { public: const char *path; const char *s; const size_t l; std::vector source_line_index; const char *error_pos = nullptr; const char *message_pos = nullptr; std::string message; // TODO: should be `int`. std::vector> value_stack; size_t value_stack_size = 0; std::vector>> args_stack; bool in_token = false; std::shared_ptr whitespaceOpe; bool in_whitespace = false; std::shared_ptr wordOpe; std::vector> capture_scope_stack; size_t capture_scope_stack_size = 0; const size_t def_count; const bool enablePackratParsing; std::vector cache_registered; std::vector cache_success; std::map, std::tuple> cache_values; TracerEnter tracer_enter; TracerLeave tracer_leave; Context(const char *a_path, const char *a_s, size_t a_l, size_t a_def_count, std::shared_ptr a_whitespaceOpe, std::shared_ptr a_wordOpe, bool a_enablePackratParsing, TracerEnter a_tracer_enter, TracerLeave a_tracer_leave) : path(a_path), s(a_s), l(a_l), whitespaceOpe(a_whitespaceOpe), wordOpe(a_wordOpe), def_count(a_def_count), enablePackratParsing(a_enablePackratParsing), cache_registered(enablePackratParsing ? def_count * (l + 1) : 0), cache_success(enablePackratParsing ? def_count * (l + 1) : 0), tracer_enter(a_tracer_enter), tracer_leave(a_tracer_leave) { for (size_t pos = 0; pos < l; pos++) { if (s[pos] == '\n') { source_line_index.push_back(pos); } } source_line_index.push_back(l); args_stack.resize(1); push_capture_scope(); } ~Context() { assert(!value_stack_size); } Context(const Context &) = delete; Context(Context &&) = delete; Context operator=(const Context &) = delete; template void packrat(const char *a_s, size_t def_id, size_t &len, any &val, T fn) { if (!enablePackratParsing) { fn(val); return; } auto col = a_s - s; auto idx = def_count * static_cast(col) + def_id; if (cache_registered[idx]) { if (cache_success[idx]) { auto key = std::make_pair(col, def_id); std::tie(len, val) = cache_values[key]; return; } else { len = static_cast(-1); return; } } else { fn(val); cache_registered[idx] = true; cache_success[idx] = success(len); if (success(len)) { auto key = std::make_pair(col, def_id); cache_values[key] = std::make_pair(len, val); } return; } } SemanticValues &push() { assert(value_stack_size <= value_stack.size()); if (value_stack_size == value_stack.size()) { value_stack.emplace_back(std::make_shared()); } else { auto &sv = *value_stack[value_stack_size]; if (!sv.empty()) { sv.clear(); sv.tags.clear(); } sv.s_ = nullptr; sv.n_ = 0; sv.choice_count_ = 0; sv.choice_ = 0; sv.tokens.clear(); } auto &sv = *value_stack[value_stack_size++]; sv.path = path; sv.ss = s; sv.source_line_index = &source_line_index; return sv; } void pop() { value_stack_size--; } void push_args(std::vector> &&args) { args_stack.emplace_back(args); } void pop_args() { args_stack.pop_back(); } const std::vector> &top_args() const { return args_stack[args_stack.size() - 1]; } void push_capture_scope() { assert(capture_scope_stack_size <= capture_scope_stack.size()); if (capture_scope_stack_size == capture_scope_stack.size()) { capture_scope_stack.emplace_back(std::map()); } else { auto &cs = capture_scope_stack[capture_scope_stack_size]; cs.clear(); } capture_scope_stack_size++; } void pop_capture_scope() { capture_scope_stack_size--; } void shift_capture_values() { assert(capture_scope_stack.size() >= 2); auto curr = &capture_scope_stack[capture_scope_stack_size - 1]; auto prev = curr - 1; for (const auto &kv : *curr) { (*prev)[kv.first] = kv.second; } } void set_error_pos(const char *a_s) { if (error_pos < a_s) error_pos = a_s; } void trace_enter(const char *name, const char *a_s, size_t n, SemanticValues &sv, any &dt) const; void trace_leave(const char *name, const char *a_s, size_t n, SemanticValues &sv, any &dt, size_t len) const; bool is_traceable(const Ope &ope) const; mutable size_t next_trace_id = 0; mutable std::list trace_ids; }; /* * Parser operators */ class Ope { public: struct Visitor; virtual ~Ope() {} size_t parse(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const; virtual size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const = 0; virtual void accept(Visitor &v) = 0; }; class Sequence : public Ope { public: template Sequence(const Args &... args) : opes_{static_cast>(args)...} {} Sequence(const std::vector> &opes) : opes_(opes) {} Sequence(std::vector> &&opes) : opes_(opes) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override { auto &chldsv = c.push(); auto pop_se = make_scope_exit([&]() { c.pop(); }); size_t i = 0; for (const auto &ope : opes_) { const auto &rule = *ope; auto len = rule.parse(s + i, n - i, chldsv, c, dt); if (fail(len)) { return static_cast(-1); } i += len; } if (!chldsv.empty()) { for (size_t j = 0; j < chldsv.size(); j++) { sv.emplace_back(std::move(chldsv[j])); } } if (!chldsv.tags.empty()) { for (size_t j = 0; j < chldsv.tags.size(); j++) { sv.tags.emplace_back(std::move(chldsv.tags[j])); } } sv.s_ = chldsv.c_str(); sv.n_ = chldsv.length(); if (!chldsv.tokens.empty()) { for (size_t j = 0; j < chldsv.tokens.size(); j++) { sv.tokens.emplace_back(std::move(chldsv.tokens[j])); } } return i; } void accept(Visitor &v) override; std::vector> opes_; }; class PrioritizedChoice : public Ope { public: template PrioritizedChoice(const Args &... args) : opes_{static_cast>(args)...} {} PrioritizedChoice(const std::vector> &opes) : opes_(opes) {} PrioritizedChoice(std::vector> &&opes) : opes_(opes) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override { size_t id = 0; for (const auto &ope : opes_) { auto &chldsv = c.push(); c.push_capture_scope(); auto se = make_scope_exit([&]() { c.pop(); c.pop_capture_scope(); }); const auto &rule = *ope; auto len = rule.parse(s, n, chldsv, c, dt); if (success(len)) { if (!chldsv.empty()) { for (size_t i = 0; i < chldsv.size(); i++) { sv.emplace_back(std::move(chldsv[i])); } } if (!chldsv.tags.empty()) { for (size_t i = 0; i < chldsv.tags.size(); i++) { sv.tags.emplace_back(std::move(chldsv.tags[i])); } } sv.s_ = chldsv.c_str(); sv.n_ = chldsv.length(); sv.choice_count_ = opes_.size(); sv.choice_ = id; if (!chldsv.tokens.empty()) { for (size_t i = 0; i < chldsv.tokens.size(); i++) { sv.tokens.emplace_back(std::move(chldsv.tokens[i])); } } c.shift_capture_values(); return len; } id++; } return static_cast(-1); } void accept(Visitor &v) override; size_t size() const { return opes_.size(); } std::vector> opes_; }; class ZeroOrMore : public Ope { public: ZeroOrMore(const std::shared_ptr &ope) : ope_(ope) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override { auto save_error_pos = c.error_pos; size_t i = 0; while (n - i > 0) { c.push_capture_scope(); auto se = make_scope_exit([&]() { c.pop_capture_scope(); }); auto save_sv_size = sv.size(); auto save_tok_size = sv.tokens.size(); const auto &rule = *ope_; auto len = rule.parse(s + i, n - i, sv, c, dt); if (success(len)) { c.shift_capture_values(); } else { if (sv.size() != save_sv_size) { sv.erase(sv.begin() + static_cast(save_sv_size)); sv.tags.erase(sv.tags.begin() + static_cast(save_sv_size)); } if (sv.tokens.size() != save_tok_size) { sv.tokens.erase(sv.tokens.begin() + static_cast(save_tok_size)); } c.error_pos = save_error_pos; break; } i += len; } return i; } void accept(Visitor &v) override; std::shared_ptr ope_; }; class OneOrMore : public Ope { public: OneOrMore(const std::shared_ptr &ope) : ope_(ope) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override { size_t len = 0; { c.push_capture_scope(); auto se = make_scope_exit([&]() { c.pop_capture_scope(); }); const auto &rule = *ope_; len = rule.parse(s, n, sv, c, dt); if (success(len)) { c.shift_capture_values(); } else { return static_cast(-1); } } auto save_error_pos = c.error_pos; auto i = len; while (n - i > 0) { c.push_capture_scope(); auto se = make_scope_exit([&]() { c.pop_capture_scope(); }); auto save_sv_size = sv.size(); auto save_tok_size = sv.tokens.size(); const auto &rule = *ope_; len = rule.parse(s + i, n - i, sv, c, dt); if (success(len)) { c.shift_capture_values(); } else { if (sv.size() != save_sv_size) { sv.erase(sv.begin() + static_cast(save_sv_size)); sv.tags.erase(sv.tags.begin() + static_cast(save_sv_size)); } if (sv.tokens.size() != save_tok_size) { sv.tokens.erase(sv.tokens.begin() + static_cast(save_tok_size)); } c.error_pos = save_error_pos; break; } i += len; } return i; } void accept(Visitor &v) override; std::shared_ptr ope_; }; class Option : public Ope { public: Option(const std::shared_ptr &ope) : ope_(ope) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override { auto save_error_pos = c.error_pos; auto save_sv_size = sv.size(); auto save_tok_size = sv.tokens.size(); c.push_capture_scope(); auto se = make_scope_exit([&]() { c.pop_capture_scope(); }); const auto &rule = *ope_; auto len = rule.parse(s, n, sv, c, dt); if (success(len)) { c.shift_capture_values(); return len; } else { if (sv.size() != save_sv_size) { sv.erase(sv.begin() + static_cast(save_sv_size)); sv.tags.erase(sv.tags.begin() + static_cast(save_sv_size)); } if (sv.tokens.size() != save_tok_size) { sv.tokens.erase(sv.tokens.begin() + static_cast(save_tok_size)); } c.error_pos = save_error_pos; return 0; } } void accept(Visitor &v) override; std::shared_ptr ope_; }; class AndPredicate : public Ope { public: AndPredicate(const std::shared_ptr &ope) : ope_(ope) {} size_t parse_core(const char *s, size_t n, SemanticValues & /*sv*/, Context &c, any &dt) const override { auto &chldsv = c.push(); c.push_capture_scope(); auto se = make_scope_exit([&]() { c.pop(); c.pop_capture_scope(); }); const auto &rule = *ope_; auto len = rule.parse(s, n, chldsv, c, dt); if (success(len)) { return 0; } else { return static_cast(-1); } } void accept(Visitor &v) override; std::shared_ptr ope_; }; class NotPredicate : public Ope { public: NotPredicate(const std::shared_ptr &ope) : ope_(ope) {} size_t parse_core(const char *s, size_t n, SemanticValues & /*sv*/, Context &c, any &dt) const override { auto save_error_pos = c.error_pos; auto &chldsv = c.push(); c.push_capture_scope(); auto se = make_scope_exit([&]() { c.pop(); c.pop_capture_scope(); }); const auto &rule = *ope_; auto len = rule.parse(s, n, chldsv, c, dt); if (success(len)) { c.set_error_pos(s); return static_cast(-1); } else { c.error_pos = save_error_pos; return 0; } } void accept(Visitor &v) override; std::shared_ptr ope_; }; class Dictionary : public Ope, public std::enable_shared_from_this { public: Dictionary(const std::vector &v) : trie_(v) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override; void accept(Visitor &v) override; Trie trie_; }; class LiteralString : public Ope, public std::enable_shared_from_this { public: LiteralString(const std::string &s, bool ignore_case) : lit_(s), ignore_case_(ignore_case), init_is_word_(false), is_word_(false) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override; void accept(Visitor &v) override; std::string lit_; bool ignore_case_; mutable bool init_is_word_; mutable bool is_word_; }; class CharacterClass : public Ope, public std::enable_shared_from_this { public: CharacterClass(const std::string &s, bool negated) : negated_(negated) { auto chars = decode(s.c_str(), s.length()); auto i = 0u; while (i < chars.size()) { if (i + 2 < chars.size() && chars[i + 1] == '-') { auto cp1 = chars[i]; auto cp2 = chars[i + 2]; ranges_.emplace_back(std::make_pair(cp1, cp2)); i += 3; } else { auto cp = chars[i]; ranges_.emplace_back(std::make_pair(cp, cp)); i += 1; } } assert(!ranges_.empty()); } CharacterClass(const std::vector> &ranges, bool negated) : ranges_(ranges), negated_(negated) { assert(!ranges_.empty()); } size_t parse_core(const char *s, size_t n, SemanticValues & /*sv*/, Context &c, any & /*dt*/) const override { if (n < 1) { c.set_error_pos(s); return static_cast(-1); } char32_t cp = 0; auto len = decode_codepoint(s, n, cp); for (const auto &range : ranges_) { if (range.first <= cp && cp <= range.second) { if (negated_) { c.set_error_pos(s); return static_cast(-1); } else { return len; } } } if (negated_) { return len; } else { c.set_error_pos(s); return static_cast(-1); } } void accept(Visitor &v) override; std::vector> ranges_; bool negated_; }; class Character : public Ope, public std::enable_shared_from_this { public: Character(char ch) : ch_(ch) {} size_t parse_core(const char *s, size_t n, SemanticValues & /*sv*/, Context &c, any & /*dt*/) const override { if (n < 1 || s[0] != ch_) { c.set_error_pos(s); return static_cast(-1); } return 1; } void accept(Visitor &v) override; char ch_; }; class AnyCharacter : public Ope, public std::enable_shared_from_this { public: size_t parse_core(const char *s, size_t n, SemanticValues & /*sv*/, Context &c, any & /*dt*/) const override { auto len = codepoint_length(s, n); if (len < 1) { c.set_error_pos(s); return static_cast(-1); } return len; } void accept(Visitor &v) override; }; class CaptureScope : public Ope { public: CaptureScope(const std::shared_ptr &ope) : ope_(ope) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override { c.push_capture_scope(); auto se = make_scope_exit([&]() { c.pop_capture_scope(); }); const auto &rule = *ope_; auto len = rule.parse(s, n, sv, c, dt); return len; } void accept(Visitor &v) override; std::shared_ptr ope_; }; class Capture : public Ope { public: typedef std::function MatchAction; Capture(const std::shared_ptr &ope, MatchAction ma) : ope_(ope), match_action_(ma) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override { const auto &rule = *ope_; auto len = rule.parse(s, n, sv, c, dt); if (success(len) && match_action_) { match_action_(s, len, c); } return len; } void accept(Visitor &v) override; std::shared_ptr ope_; MatchAction match_action_; }; class TokenBoundary : public Ope { public: TokenBoundary(const std::shared_ptr &ope) : ope_(ope) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override; void accept(Visitor &v) override; std::shared_ptr ope_; }; class Ignore : public Ope { public: Ignore(const std::shared_ptr &ope) : ope_(ope) {} size_t parse_core(const char *s, size_t n, SemanticValues & /*sv*/, Context &c, any &dt) const override { const auto &rule = *ope_; auto &chldsv = c.push(); auto se = make_scope_exit([&]() { c.pop(); }); return rule.parse(s, n, chldsv, c, dt); } void accept(Visitor &v) override; std::shared_ptr ope_; }; typedef std::function Parser; class User : public Ope { public: User(Parser fn) : fn_(fn) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context & /*c*/, any &dt) const override { assert(fn_); return fn_(s, n, sv, dt); } void accept(Visitor &v) override; std::function fn_; }; class WeakHolder : public Ope { public: WeakHolder(const std::shared_ptr &ope) : weak_(ope) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override { auto ope = weak_.lock(); assert(ope); const auto &rule = *ope; return rule.parse(s, n, sv, c, dt); } void accept(Visitor &v) override; std::weak_ptr weak_; }; class Holder : public Ope { public: Holder(Definition *outer) : outer_(outer) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override; void accept(Visitor &v) override; any reduce(SemanticValues &sv, any &dt) const; const char *trace_name() const; std::shared_ptr ope_; Definition *outer_; mutable std::string trace_name_; friend class Definition; }; typedef std::unordered_map Grammar; class Reference : public Ope, public std::enable_shared_from_this { public: Reference(const Grammar &grammar, const std::string &name, const char *s, bool is_macro, const std::vector> &args) : grammar_(grammar), name_(name), s_(s), is_macro_(is_macro), args_(args), rule_(nullptr), iarg_(0) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override; void accept(Visitor &v) override; std::shared_ptr get_core_operator() const; const Grammar &grammar_; const std::string name_; const char *s_; const bool is_macro_; const std::vector> args_; Definition *rule_; size_t iarg_; }; class Whitespace : public Ope { public: Whitespace(const std::shared_ptr &ope) : ope_(ope) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override { if (c.in_whitespace) { return 0; } c.in_whitespace = true; auto se = make_scope_exit([&]() { c.in_whitespace = false; }); const auto &rule = *ope_; return rule.parse(s, n, sv, c, dt); } void accept(Visitor &v) override; std::shared_ptr ope_; }; class BackReference : public Ope { public: BackReference(const std::string &name) : name_(name) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override; void accept(Visitor &v) override; std::string name_; }; class PrecedenceClimbing : public Ope { public: using BinOpeInfo = std::map>; PrecedenceClimbing(const std::shared_ptr &atom, const std::shared_ptr &binop, const BinOpeInfo &info, const Definition &rule) : atom_(atom), binop_(binop), info_(info), rule_(rule) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override { return parse_expression(s, n, sv, c, dt, 0); } void accept(Visitor &v) override; std::shared_ptr atom_; std::shared_ptr binop_; BinOpeInfo info_; const Definition &rule_; private: size_t parse_expression(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt, size_t min_prec) const; Definition &get_reference_for_binop(Context &c) const; }; /* * Factories */ template std::shared_ptr seq(Args &&... args) { return std::make_shared(static_cast>(args)...); } template std::shared_ptr cho(Args &&... args) { return std::make_shared( static_cast>(args)...); } inline std::shared_ptr zom(const std::shared_ptr &ope) { return std::make_shared(ope); } inline std::shared_ptr oom(const std::shared_ptr &ope) { return std::make_shared(ope); } inline std::shared_ptr opt(const std::shared_ptr &ope) { return std::make_shared