diff --git a/README.md b/README.md index 324b7e5..ecd1fdb 100644 --- a/README.md +++ b/README.md @@ -84,12 +84,32 @@ Here is a complete list of available actions: `any& c` is a context data which can be used by the user for whatever purposes. +In the following example, `<` and ` >` are the *capture* operators. Each capture operator creates a semantic value that contains `const char*` of the position. It could be useful to eliminate unnecessary characters. + +```c++ +auto syntax = R"( + ROOT <- _ TOKEN (',' _ TOKEN)* + TOKEN <- < [a-z0-9]+ > _ + _ <- [ \t\r\n]* +)"; + +peg pg(syntax); + +pg["TOKEN"] = [](const char* s, size_t l, const vector& v) { + auto b = v[0].get(); // '<' + auto e = v[1].get(); // '>' + auto token = string(b, e - b); // 'token' doesn't include trailing whitespaces +}; + +auto ret = pg.parse(" token1, token2 "); +``` + Simple interface ---------------- *cpp-peglib* provides std::regex-like simple interface for trivial tasks. -In the following example, `< ... >` means the *capture* operator. `peglib::peg_match` tries to capture strings in the `< ... >` operator and store them into `peglib::match` object. +`peglib::peg_match` tries to capture strings in the `< ... >` operator and store them into `peglib::match` object. ```c++ peglib::match m; diff --git a/peglib.h b/peglib.h index a571312..551832f 100644 --- a/peglib.h +++ b/peglib.h @@ -122,28 +122,6 @@ public: return *this; } - operator bool() const { return get(); } - operator char() const { return get(); } - operator wchar_t() const { return get(); } - operator unsigned char() const { return get(); } - operator int() const { return get(); } - operator unsigned int() const { return get(); } - operator short() const { return get(); } - operator unsigned short() const { return get(); } - operator long() const { return get(); } - operator unsigned long() const { return get(); } - operator long long() const { return get(); } - operator unsigned long long() const { return get(); } - operator float() const { return get(); } - operator double() const { return get(); } - operator const std::string&() const { return get(); } - -#if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 -#else - operator char16_t() const { return get(); } - operator char32_t() const { return get(); } -#endif - private: struct placeholder { virtual ~placeholder() {}; @@ -693,6 +671,15 @@ private: size_t capture_id; }; +class Anchor : public Ope +{ +public: + Result parse(const char* s, size_t l, Values& v, any& c) const { + return success(0); + } + +}; + class WeakHolder : public Ope { public: @@ -929,6 +916,10 @@ inline std::shared_ptr cap(const std::shared_ptr& ope, MatchAction ma) return std::make_shared(ope, ma, (size_t)-1); } +inline std::shared_ptr anc() { + return std::make_shared(); +} + inline std::shared_ptr ref(const std::map& grammar, const std::string& name) { return std::make_shared(grammar, name); } @@ -1132,46 +1123,50 @@ private: }, [&](const char* s, size_t l, const std::vector& v, any& c) { Context& cxt = *c.get(); - cxt.references[v[0]] = s; - return ref(*cxt.grammar, v[0]); + const auto& ident = v[0].get(); + cxt.references[ident] = s; // for error handling + return ref(*cxt.grammar, ident); }, [&](const std::vector& v) { return v[1]; }, // Capture - [&](const std::vector& v, any& c) { + [&](const char* s, size_t l, const std::vector& v, any& c) { Context& cxt = *c.get(); auto ope = v[1].get>(); - return cap(ope, cxt.match_action, ++cxt.capture_count); + return seq( + ref(*cxt.grammar, "%ANCHOR%"), + cap(ope, cxt.match_action, ++cxt.capture_count), + ref(*cxt.grammar, "%ANCHOR%")); } }; - g["IdentCont"] = [](const char*s, size_t l) { + g["IdentCont"] = [](const char* s, size_t l) { return std::string(s, l); }; g["Literal"] = [](const std::vector& v) { - return lit(v[0]); + return lit(v[0].get()); }; - g["SQCont"] = [this](const char*s, size_t l) { + g["SQCont"] = [this](const char* s, size_t l) { return resolve_escape_sequence(s, l); }; - g["DQCont"] = [this](const char*s, size_t l) { + g["DQCont"] = [this](const char* s, size_t l) { return resolve_escape_sequence(s, l); }; g["Class"] = [](const std::vector& v) { - return cls(v[0]); + return cls(v[0].get()); }; - g["ClassCont"] = [this](const char*s, size_t l) { + g["ClassCont"] = [this](const char* s, size_t l) { return resolve_escape_sequence(s, l); }; - g["AND"] = [](const char*s, size_t l) { return *s; }; - g["NOT"] = [](const char*s, size_t l) { return *s; }; - g["QUESTION"] = [](const char*s, size_t l) { return *s; }; - g["STAR"] = [](const char*s, size_t l) { return *s; }; - g["PLUS"] = [](const char*s, size_t l) { return *s; }; + g["AND"] = [](const char* s, size_t l) { return *s; }; + g["NOT"] = [](const char* s, size_t l) { return *s; }; + g["QUESTION"] = [](const char* s, size_t l) { return *s; }; + g["STAR"] = [](const char* s, size_t l) { return *s; }; + g["PLUS"] = [](const char* s, size_t l) { return *s; }; g["DOT"] = []() { return dot(); @@ -1194,10 +1189,12 @@ private: return nullptr; } + auto& grammar = *cxt.grammar; + for (const auto& x : cxt.references) { const auto& name = x.first; auto ptr = x.second; - if (cxt.grammar->find(name) == cxt.grammar->end()) { + if (grammar.find(name) == grammar.end()) { if (log) { auto line = line_info(s, ptr); log(line.first, line.second, "'" + name + "' is not defined."); @@ -1208,10 +1205,13 @@ private: start = cxt.start; + grammar["%ANCHOR%"] <= anc(); + grammar["%ANCHOR%"] = [](const char* s, size_t l) { return s; }; + return cxt.grammar; } - std::string resolve_escape_sequence(const char*s, size_t l) { + std::string resolve_escape_sequence(const char* s, size_t l) { std::string r; r.reserve(l); @@ -1364,7 +1364,7 @@ private: }; /*----------------------------------------------------------------------------- - * Utilities + * Simple interface *---------------------------------------------------------------------------*/ struct match diff --git a/test/test.cc b/test/test.cc index 9ab8989..440d6cc 100644 --- a/test/test.cc +++ b/test/test.cc @@ -161,7 +161,7 @@ TEST_CASE("Simple calculator test", "[general]") }; parser["Multitive"] = [](const vector& v) { - return v.size() == 1 ? int(v[0]) : v[0].get() * v[1].get(); + return v.size() == 1 ? v[0].get() : v[0].get() * v[1].get(); }; parser["Primary"] = [](const vector& v) {