From c416d84fea3bb88801dd140e749b12e9aa6fe443 Mon Sep 17 00:00:00 2001 From: yhirose Date: Sat, 28 Mar 2020 18:38:01 -0400 Subject: [PATCH] Regex-like repetition support. Fix #95 --- README.md | 1 + peglib.h | 285 +++++++++++++++++++++++--------------------------- test/test2.cc | 60 +++++++++++ test/test3.cc | 8 ++ 4 files changed, 201 insertions(+), 153 deletions(-) diff --git a/README.md b/README.md index 90a3268..cb76c15 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ The PEG syntax is well described on page 2 in the [document](http://www.brynosau * `'...'i` (Case-insensitive literal operator) * `[^...]` (Negated character class operator) + * `{2,5}` (Regex-like repetition operator) * `<` ... `>` (Token boundary operator) * `~` (Ignore operator) * `\x20` (Hex number char) diff --git a/peglib.h b/peglib.h index 4b08916..22fef39 100644 --- a/peglib.h +++ b/peglib.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -1107,73 +1108,37 @@ public: std::vector> opes_; }; -class ZeroOrMore : public Ope { +class Repetition : public Ope { public: - ZeroOrMore(const std::shared_ptr &ope) : ope_(ope) {} + Repetition(const std::shared_ptr &ope, size_t min, size_t max) + : ope_(ope), min_(min), max_(max) {} size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const override { - auto save_error_pos = c.error_pos; + size_t count = 0; size_t i = 0; - while (n - i > 0) { + while (count < min_) { c.push_capture_scope(); auto se = make_scope_exit([&]() { c.pop_capture_scope(); }); - auto save_sv_size = sv.size(); - auto save_tok_size = sv.tokens.size(); const auto &rule = *ope_; auto len = rule.parse(s + i, n - i, sv, c, dt); if (success(len)) { c.shift_capture_values(); } else { - if (sv.size() != save_sv_size) { - sv.erase(sv.begin() + static_cast(save_sv_size)); - sv.tags.erase(sv.tags.begin() + - static_cast(save_sv_size)); - } - if (sv.tokens.size() != save_tok_size) { - sv.tokens.erase(sv.tokens.begin() + - static_cast(save_tok_size)); - } - c.error_pos = save_error_pos; - break; + return static_cast(-1); } i += len; + count++; } - return i; - } - - void accept(Visitor &v) override; - - std::shared_ptr ope_; -}; - -class OneOrMore : public Ope { -public: - OneOrMore(const std::shared_ptr &ope) : ope_(ope) {} - size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, - any &dt) const override { - size_t len = 0; - { - c.push_capture_scope(); - auto se = make_scope_exit([&]() { c.pop_capture_scope(); }); - const auto &rule = *ope_; - len = rule.parse(s, n, sv, c, dt); - if (success(len)) { - c.shift_capture_values(); - } else { - return static_cast(-1); - } - } auto save_error_pos = c.error_pos; - auto i = len; - while (n - i > 0) { + while (n - i > 0 && count < max_) { c.push_capture_scope(); auto se = make_scope_exit([&]() { c.pop_capture_scope(); }); auto save_sv_size = sv.size(); auto save_tok_size = sv.tokens.size(); const auto &rule = *ope_; - len = rule.parse(s + i, n - i, sv, c, dt); + auto len = rule.parse(s + i, n - i, sv, c, dt); if (success(len)) { c.shift_capture_values(); } else { @@ -1190,49 +1155,34 @@ public: break; } i += len; + count++; } return i; } void accept(Visitor &v) override; - std::shared_ptr ope_; -}; + bool is_zom() const { + return min_ == 0 && max_ == std::numeric_limits::max(); + } -class Option : public Ope { -public: - Option(const std::shared_ptr &ope) : ope_(ope) {} + static std::shared_ptr zom(const std::shared_ptr &ope) { + return std::make_shared(ope, 0, + std::numeric_limits::max()); + } - size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, - any &dt) const override { - auto save_error_pos = c.error_pos; - auto save_sv_size = sv.size(); - auto save_tok_size = sv.tokens.size(); - c.push_capture_scope(); - auto se = make_scope_exit([&]() { c.pop_capture_scope(); }); - const auto &rule = *ope_; - auto len = rule.parse(s, n, sv, c, dt); - if (success(len)) { - c.shift_capture_values(); - return len; - } else { - if (sv.size() != save_sv_size) { - sv.erase(sv.begin() + static_cast(save_sv_size)); - sv.tags.erase(sv.tags.begin() + - static_cast(save_sv_size)); - } - if (sv.tokens.size() != save_tok_size) { - sv.tokens.erase(sv.tokens.begin() + - static_cast(save_tok_size)); - } - c.error_pos = save_error_pos; - return 0; - } + static std::shared_ptr oom(const std::shared_ptr &ope) { + return std::make_shared(ope, 1, + std::numeric_limits::max()); } - void accept(Visitor &v) override; + static std::shared_ptr opt(const std::shared_ptr &ope) { + return std::make_shared(ope, 0, 1); + } std::shared_ptr ope_; + size_t min_; + size_t max_; }; class AndPredicate : public Ope { @@ -1636,15 +1586,20 @@ template std::shared_ptr cho(Args &&... args) { } inline std::shared_ptr zom(const std::shared_ptr &ope) { - return std::make_shared(ope); + return Repetition::zom(ope); } inline std::shared_ptr oom(const std::shared_ptr &ope) { - return std::make_shared(ope); + return Repetition::oom(ope); } inline std::shared_ptr opt(const std::shared_ptr &ope) { - return std::make_shared