diff --git a/README.md b/README.md index 95342f1..dfb8a60 100644 --- a/README.md +++ b/README.md @@ -380,6 +380,39 @@ The following are available operators: | csc | Capture scope | | cap | Capture | | bkr | Back reference | +| usr | User defined parser | + +Adjust definitions +------------------ + +It's possible to add/override definitions. + +```cpp +auto syntax = R"( + ROOT <- _ 'Hello' _ NAME '!' _ +)"; + +Rules additional_rules = { + { + "NAME", usr([](const char* s, size_t n, SemanticValues& sv, any& dt) -> size_t { + static vector names = { "PEG", "BNF" }; + for (const auto& name: names) { + if (name.size() <= n && !name.compare(0, name.size(), s, name.size())) { + return name.size(); // processed length + } + } + return -1; // parse error + }) + }, + { + "~_", zom(cls(" \t\r\n")) + } +}; + +auto g = parser(syntax, additional_rules); + +assert(g.parse(" Hello BNF! ")); +``` Unicode support --------------- diff --git a/peglib.h b/peglib.h index 80f6ddb..d446ab3 100644 --- a/peglib.h +++ b/peglib.h @@ -1127,6 +1127,19 @@ public: typedef std::function Parser; +class User : public Ope +{ +public: + User(Parser fn) : fn_(fn) {} + size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { + c.trace("User", s, n, sv, dt); + assert(fn_); + return fn_(s, n, sv, dt); + } + void accept(Visitor& v) override; + std::function fn_; +}; + class WeakHolder : public Ope { public: @@ -1297,6 +1310,10 @@ inline std::shared_ptr ign(const std::shared_ptr& ope) { return std::make_shared(ope); } +inline std::shared_ptr usr(std::function fn) { + return std::make_shared(fn); +} + inline std::shared_ptr ref(const Grammar& grammar, const std::string& name, const char* s, bool is_macro, const std::vector>& args) { return std::make_shared(grammar, name, s, is_macro, args); } @@ -1330,6 +1347,7 @@ struct Ope::Visitor virtual void visit(Capture& /*ope*/) {} virtual void visit(TokenBoundary& /*ope*/) {} virtual void visit(Ignore& /*ope*/) {} + virtual void visit(User& /*ope*/) {} virtual void visit(WeakHolder& /*ope*/) {} virtual void visit(Holder& /*ope*/) {} virtual void visit(Reference& /*ope*/) {} @@ -1443,6 +1461,7 @@ struct DetectLeftRecursion : public Ope::Visitor { void visit(Capture& ope) override { ope.ope_->accept(*this); } void visit(TokenBoundary& ope) override { ope.ope_->accept(*this); } void visit(Ignore& ope) override { ope.ope_->accept(*this); } + void visit(User& /*ope*/) override { done_ = true; } void visit(WeakHolder& ope) override { ope.weak_.lock()->accept(*this); } void visit(Holder& ope) override { ope.ope_->accept(*this); } void visit(Reference& ope) override; @@ -2010,6 +2029,7 @@ inline void CaptureScope::accept(Visitor& v) { v.visit(*this); } inline void Capture::accept(Visitor& v) { v.visit(*this); } inline void TokenBoundary::accept(Visitor& v) { v.visit(*this); } inline void Ignore::accept(Visitor& v) { v.visit(*this); } +inline void User::accept(Visitor& v) { v.visit(*this); } inline void WeakHolder::accept(Visitor& v) { v.visit(*this); } inline void Holder::accept(Visitor& v) { v.visit(*this); } inline void Reference::accept(Visitor& v) { v.visit(*this); } @@ -2115,6 +2135,7 @@ inline void FindReference::visit(Reference& ope) { * PEG parser generator *---------------------------------------------------------------------------*/ +typedef std::unordered_map> Rules; typedef std::function Log; class ParserGenerator @@ -2123,10 +2144,21 @@ public: static std::shared_ptr parse( const char* s, size_t n, + const Rules& rules, std::string& start, Log log) { - return get_instance().perform_core(s, n, start, log); + return get_instance().perform_core(s, n, rules, start, log); + } + + static std::shared_ptr parse( + const char* s, + size_t n, + std::string& start, + Log log) + { + Rules dummy; + return parse(s, n, dummy, start, log); } // For debuging purpose @@ -2412,6 +2444,7 @@ private: std::shared_ptr perform_core( const char* s, size_t n, + const Rules& rules, std::string& start, Log log) { @@ -2434,6 +2467,22 @@ private: auto& grammar = *data.grammar; + // User provided rules + for (const auto& x: rules) { + auto name = x.first; + bool ignore = false; + if (!name.empty() && name[0] == '~') { + ignore = true; + name.erase(0, 1); + } + if (!name.empty()) { + auto& rule = grammar[name]; + rule <= x.second; + rule.name = name; + rule.ignoreSemanticValue = ignore; + } + } + // Check duplicated definitions bool ret = data.duplicates.empty(); @@ -2763,22 +2812,37 @@ class parser public: parser() = default; - parser(const char* s, size_t n) { - load_grammar(s, n); + parser(const char* s, size_t n, const Rules& rules) { + load_grammar(s, n, rules); } + parser(const char* s, const Rules& rules) + : parser(s, strlen(s), rules) {} + + parser(const char* s, size_t n) + : parser(s, n, Rules()) {} + parser(const char* s) - : parser(s, strlen(s)) {} + : parser(s, strlen(s), Rules()) {} operator bool() { return grammar_ != nullptr; } - bool load_grammar(const char* s, size_t n) { - grammar_ = ParserGenerator::parse(s, n, start_, log); + bool load_grammar(const char* s, size_t n, const Rules& rules) { + grammar_ = ParserGenerator::parse(s, n, rules, start_, log); return grammar_ != nullptr; } + bool load_grammar(const char* s, size_t n) { + return load_grammar(s, n, Rules()); + } + + bool load_grammar(const char* s, const Rules& rules) { + auto n = strlen(s); + return load_grammar(s, n, rules); + } + bool load_grammar(const char* s) { auto n = strlen(s); return load_grammar(s, n); diff --git a/test/test.cc b/test/test.cc index 9fb0080..227905f 100644 --- a/test/test.cc +++ b/test/test.cc @@ -964,6 +964,31 @@ TEST_CASE("Left recursive with empty string test", "[left recursive]") REQUIRE(!parser); } +TEST_CASE("User defined rule test", "[user rule]") +{ + auto g = parser(R"( + ROOT <- _ 'Hello' _ NAME '!' _ + )", + { + { + "NAME", usr([](const char* s, size_t n, SemanticValues& /*sv*/, any& /*dt*/) -> size_t { + static vector names = { "PEG", "BNF" }; + for (const auto& name: names) { + if (name.size() <= n && !name.compare(0, name.size(), s, name.size())) { + return name.size(); + } + } + return static_cast(-1); + }) + }, + { + "~_", zom(cls(" \t\r\n")) + } + }); + + REQUIRE(g.parse(" Hello BNF! ") == true); +} + TEST_CASE("Semantic predicate test", "[predicate]") { parser parser("NUMBER <- [0-9]+");