From 681c7543021e49928f00e027718e3666532edc41 Mon Sep 17 00:00:00 2001 From: yhirose Date: Thu, 19 Feb 2015 22:27:47 -0500 Subject: [PATCH] Added 'usr' operator. --- README.md | 63 ++++++++++++++++++++++++++++--------- peglib.h | 94 ++++++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 127 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index eac751c..d49d14a 100644 --- a/README.md +++ b/README.md @@ -224,21 +224,54 @@ auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] "); The following are available operators: -| Operator | Description | -|:---------|:-------------------| -| seq | Sequence | -| cho | Prioritized Choice | -| zom | Zero or More | -| oom | One or More | -| opt | Optional | -| apd | And predicate | -| npd | Not predicate | -| lit | Literal string | -| cls | Character class | -| chr | Character | -| dot | Any character | -| anc | Anchor character | -| cap | Capture character | +| Operator | Description | +| :------- | :------------------ | +| seq | Sequence | +| cho | Prioritized Choice | +| zom | Zero or More | +| oom | One or More | +| opt | Optional | +| apd | And predicate | +| npd | Not predicate | +| lit | Literal string | +| cls | Character class | +| chr | Character | +| dot | Any character | +| anc | Anchor character | +| cap | Capture character | +| usr | User defiend parser | + +Hybrid Parser +------------- + +It's even possible to mix parser grammar with parser operaters. + +```c++ +auto syntax = R"( + ROOT <- _ 'Hello' _ NAME '!' _ +)"; + +Rules rules = { + { + "NAME", usr([](const char* s, size_t l, SemanticValues& v, any& c) { + static vector names = { "PEG", "BNF" }; + for (const auto& n: names) { + if (n.size() <= l && !n.compare(0, n.size(), s, n.size())) { + return success(n.size()); + } + } + return fail(s); + }) + }, + { + "~_", zom(cls(" \t\r\n")) + } +}; + +peg g = peg(syntax, rules); + +assert(g.parse(" Hello BNF! ")); +``` Sample codes ------------ diff --git a/peglib.h b/peglib.h index fad2c0d..f1d605c 100644 --- a/peglib.h +++ b/peglib.h @@ -781,6 +781,22 @@ private: std::shared_ptr ope_; }; +typedef std::function Parser; + +class User : public Ope +{ +public: + User(Parser fn) : fn_(fn) {} + + Result parse(const char* s, size_t l, SemanticValues& v, any& c) const { + assert(fn_); + return fn_(s, l, v, c); + } + +private: + std::function fn_; +}; + class WeakHolder : public Ope { public: @@ -1038,6 +1054,10 @@ inline std::shared_ptr anc(const std::shared_ptr& ope) { return std::make_shared(ope); } +inline std::shared_ptr usr(std::function fn) { + return std::make_shared(fn); +} + inline std::shared_ptr ref(const std::map& grammar, const std::string& name) { return std::make_shared(grammar, name); } @@ -1067,12 +1087,32 @@ inline std::pair line_info(const char* s, const char* ptr) { typedef std::map Grammar; typedef std::function Log; +typedef std::map> Rules; + class PEGParser { public: - static std::shared_ptr parse(const char* s, size_t l, std::string& start, MatchAction ma, Log log) { + static std::shared_ptr parse( + const char* s, + size_t l, + const Rules& rules, + std::string& start, + MatchAction ma, + Log log) + { static PEGParser instance; - return get().perform_core(s, l, start, ma, log); + return get().perform_core(s, l, rules, start, ma, log); + } + + static std::shared_ptr parse( + const char* s, + size_t l, + std::string& start, + MatchAction ma, + Log log) + { + Rules dummy; + return parse(s, l, dummy, start, ma, log); } // For debuging purpose @@ -1290,7 +1330,14 @@ private: g["DOT"] = []() { return dot(); }; } - std::shared_ptr perform_core(const char* s, size_t l, std::string& start, MatchAction ma, Log log) { + std::shared_ptr perform_core( + const char* s, + size_t l, + const Rules& rules, + std::string& start, + MatchAction ma, + Log log) + { Context cxt; cxt.match_action = ma; @@ -1308,6 +1355,25 @@ private: auto& grammar = *cxt.grammar; + // User provided rules + for (const auto& x: rules) { + auto name = x.first; + + bool ignore = false; + if (!name.empty() && name[0] == '~') { + ignore = true; + name.erase(0, 1); + } + + if (!name.empty()) { + auto& def = grammar[name]; + def <= x.second; + def.name = name; + def.ignore = ignore; + } + } + + // Check missing definitions for (const auto& x : cxt.references) { const auto& name = x.first; auto ptr = x.second; @@ -1367,9 +1433,9 @@ class peg public: peg() = default; - peg(const char* s, size_t l, Log log = nullptr) { + peg(const char* s, size_t l, const Rules& rules, Log log = nullptr) { grammar_ = PEGParser::parse( - s, l, + s, l, rules, start_, [&](const char* s, size_t l, size_t i) { if (match_action) match_action(s, l, i); @@ -1377,16 +1443,14 @@ public: log); } - peg(const char* s, Log log = nullptr) { - auto l = strlen(s); - grammar_ = PEGParser::parse( - s, l, - start_, - [&](const char* s, size_t l, size_t i) { - if (match_action) match_action(s, l, i); - }, - log); - } + peg(const char* s, const Rules& rules, Log log = nullptr) + : peg(s, strlen(s), rules, log) {} + + peg(const char* s, size_t l, Log log = nullptr) + : peg(s, l, Rules(), log) {} + + peg(const char* s, Log log = nullptr) + : peg(s, strlen(s), Rules(), log) {} operator bool() { return grammar_ != nullptr;