From 1ad9e73d670d39be466d363f4fdbe2f55cb41fe9 Mon Sep 17 00:00:00 2001 From: yhirose Date: Sat, 7 Feb 2015 20:52:26 -0500 Subject: [PATCH] Uploaded files. --- .gitignore | 4 + README.md | 182 +- example/Makefile | 18 + example/calc.cc | 92 + example/calc.vcxproj | 92 + example/calc2.cc | 94 + example/calc2.vcxproj | 92 + example/example.sln | 28 + peglib.h | 1430 +++++++ test/Makefile | 9 + test/catch.hpp | 8974 +++++++++++++++++++++++++++++++++++++++++ test/test.cc | 505 +++ test/test.sln | 28 + test/test.vcxproj | 159 + 14 files changed, 11705 insertions(+), 2 deletions(-) create mode 100644 example/Makefile create mode 100644 example/calc.cc create mode 100644 example/calc.vcxproj create mode 100644 example/calc2.cc create mode 100644 example/calc2.vcxproj create mode 100644 example/example.sln create mode 100644 peglib.h create mode 100644 test/Makefile create mode 100644 test/catch.hpp create mode 100644 test/test.cc create mode 100644 test/test.sln create mode 100644 test/test.vcxproj diff --git a/.gitignore b/.gitignore index b8bd026..f9c9c57 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,7 @@ *.exe *.out *.app + +# Others +*.dSYM +*.swp diff --git a/README.md b/README.md index 8572067..2f8d71e 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,180 @@ -# cpp-peglib -A C++11 header-only PEG (Parsing Expression Grammars) library +cpp-peglib +========== + +C++11 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar) (Parsing Expression Grammars) library. + +*cpp-peglib* tries to provide more expressive parsing experience than common regular expression libraries such as std::regex. It also keeps it in mind that users can easily start using it. + +The PEG syntax that *cpp-peglib* understands is described on page 2 in the [document](http://pdos.csail.mit.edu/papers/parsing:popl04.pdf). + +How to use +---------- + +What if we want to extract only tag names in brackets from ` [tag1] [tag2] [tag3] [tag4]... `? It's a bit hard to do it with `std::regex`. We have to write a loop logic, since it doesn't support [Repeated Captures](http://www.boost.org/doc/libs/1_57_0/libs/regex/doc/html/boost_regex/captures.html#boost_regex.captures.repeated_captures). PEG can handle it pretty easily. + +PEG grammar for this task could be like this: + +``` +ROOT <- _ ('[' TAG_NAME ']' _)* +TAG_NAME <- (!']' .)+ +_ <- [ \t]* +``` + +Here is how to parse text with the PEG syntax and retreive tag names: + + +```c++ +// (1) Include the header file +#include "peglib.h" + +// (2) Make a parser +auto parser = peglib::make_parser(R"( + ROOT <- _ ('[' TAG_NAME ']' _)* + TAG_NAME <- (!']' .)+ + _ <- [ \t]* +)"); + +// (3) Setup an action +std::vector tags; +parser["TAG_NAME"] = [&](const char* s, size_t l) { + tags.push_back(std::string(s, l)); +}; + +// (4) Parse +auto ret = parser.parse(" [tag1] [tag:2] [tag-3] "); + +assert(ret == true); +assert(tags[0] == "tag1"); +assert(tags[1] == "tag:2"); +assert(tags[2] == "tag-3"); +``` + +You may have a question regarding '(3) Setup an action'. When the parser recognizes the definition 'TAG_NAME', it calls back the action `[&](const char* s, size_t l)` where `const char* s, size_t l` refers to the matched string, so that the user could use the string for something else. + +We can do more with actions. A more complex example is here: + +```c++ +// Calculator example +using namespace peglib; +using namespace std; + +auto parser = make_parser(R"( + # Grammar for Calculator... + EXPRESSION <- TERM (TERM_OPERATOR TERM)* + TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* + FACTOR <- NUMBER / '(' EXPRESSION ')' + TERM_OPERATOR <- [-+] + FACTOR_OPERATOR <- [/*] + NUMBER <- [0-9]+ +)"); + +auto reduce = [](const vector& v) -> long { + long ret = v[0].get(); + for (auto i = 1u; i < v.size(); i += 2) { + auto num = v[i + 1].get(); + switch (v[i].get()) { + case '+': ret += num; break; + case '-': ret -= num; break; + case '*': ret *= num; break; + case '/': ret /= num; break; + } + } + return ret; +}; + +parser["EXPRESSION"] = reduce; +parser["TERM"] = reduce; +parser["TERM_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; }; +parser["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; }; +parser["NUMBER"] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); }; + +long val; +auto ret = parser.parse("1+2*3*(4-5+6)/7-8", val); + +assert(ret == true); +assert(val == -3); +``` + +It may be helpful to keep in mind that the action behavior is similar to the YACC semantic action model ($$ = $1, $2, ...). + +In this example, the actions return values. These samentic values will be pushed up to the parent definition which can be referred to in the parent action `[](const vector& v)`. In other words, when a certain definition has been accepted, we can find all semantic values which are associated with the child definitions in `const vector& v`. The values are wrapped by peblib::Any class which is like `boost::any`. We can retrieve the value by using `get` method where `T` is the actual type of the value. If no value is returned in an action, an undefined `Any` will be pushed up to the parent. Finally, the resulting value of the root definition is received in the out parameter of `parse` method in the parser. `long val` is the resulting value in this case. + +Here are available user actions: + +```c++ +[](const char* s, size_t l, const std::vector& v, const std::vector& n) +[](const char* s, size_t l, const std::vector& v) +[](const char* s, size_t l) +[](const std::vector& v, const std::vector& n) +[](const std::vector& v) +[]() +``` + +`const std::vector& n` holds names of child definitions that could be helpful when we want to check what are the actual child definitions. + +Make a parser with parser operators and simple actions +------------------------------------------------------ + +Instead of makeing a parser by parsing PEG syntax text, we can also construct a parser by hand with *parser operators* and use the *simple action* method rather than the semantic action method. Here is an example: + +```c++ +using namespace peglib; +using namespace std; + +Definition ROOT, TAG, TAG_NAME, _; +ROOT = seq(_, zom(TAG)); +TAG = seq(chr('['), TAG_NAME, chr(']'), _); +TAG_NAME = oom(seq(npd(chr(']')), any())); +_ = zom(cls(" \t")); + +vector tags; +TAG_NAME.match = [&](const char* s, size_t l) { + tags.push_back(string(s, l)); +}; + +auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] "); +``` + +In fact, the PEG parser generator is made with operators. You can see the code at `make_peg_grammar` function in `peglib.h`. + +The following are available operators: + +| Description | Operator | +|--------------------|----------| +| Sequence | seq | +| Prioritized Choice | cho | +| Grouping | grp | +| Zero or More | zom | +| One or More | oom | +| Optional | opt | +| And predicate | apd | +| Not predicate | npd | +| Literal string | lit | +| Character class | cls | +| Character | chr | +| Any character | any | + +Tested Compilers +---------------- + + * Visual Studio 2013 + * Clang 3.5 + +TODO +---- + + * Linear-time parsing (Packrat parsing) + * Optimization of grammars + * Unicode support + +Other C++ PEG parser libraries that inspired cpp-peblib +------------------------------------------------------- + + * [PEGTL](https://github.com/ColinH/PEGTL) - Parsing Expression Grammar Template Library + * [lars::Parser](https://github.com/TheLartians/Parser) - A header-only linear-time c++ parsing expression grammar (PEG) parser generator supporting left-recursion and grammar ambiguity + + +License +------- + +MIT license (© 2015 Yuji Hirose) diff --git a/example/Makefile b/example/Makefile new file mode 100644 index 0000000..9b576f9 --- /dev/null +++ b/example/Makefile @@ -0,0 +1,18 @@ + +USE_CLANG = 1 + +ifdef USE_CLANG +CC = clang++ +CFLAGS = -std=c++1y -stdlib=libc++ -g +else +CC = g++-4.9 +CFLAGS = -std=c++1y -g +endif + +all: calc calc2 + +calc : calc.cc ../peglib.h + $(CC) -o calc $(CFLAGS) -I.. calc.cc + +calc2 : calc2.cc ../peglib.h + $(CC) -o calc2 $(CFLAGS) -I.. calc2.cc diff --git a/example/calc.cc b/example/calc.cc new file mode 100644 index 0000000..f214a43 --- /dev/null +++ b/example/calc.cc @@ -0,0 +1,92 @@ +// +// calc.cc +// +// Copyright (c) 2015 Yuji Hirose. All rights reserved. +// MIT License +// + +#include +#include +#include + +using namespace peglib; +using namespace std; + +// +// PEG syntax: +// +// EXPRESSION <- TERM (TERM_OPERATOR TERM)* +// TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* +// FACTOR <- NUMBER / '(' EXPRESSION ')' +// TERM_OPERATOR <- [-+] +// FACTOR_OPERATOR <- [/*] +// NUMBER <- [0-9]+ +// +class Calculator +{ +public: + Calculator() { + const char* syntax = + " EXPRESSION <- TERM (TERM_OPERATOR TERM)* " + " TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* " + " FACTOR <- NUMBER / '(' EXPRESSION ')' " + " TERM_OPERATOR <- [-+] " + " FACTOR_OPERATOR <- [/*] " + " NUMBER <- [0-9]+ " + ; + + parser.load_syntax(syntax); + + parser["EXPRESSION"] = reduce; + parser["TERM"] = reduce; + parser["TERM_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; }; + parser["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; }; + parser["NUMBER"] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); }; + } + + bool execute(const char* s, long& v) const { + return parser.parse(s, v); + } + +private: + Parser parser; + + static long reduce(const vector& v) { + auto result = v[0].get(); + for (auto i = 1u; i < v.size(); i += 2) { + auto num = v[i + 1].get(); + auto ope = v[i].get(); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + } +}; + +int main(int argc, const char** argv) +{ + if (argc < 2 || string("--help") == argv[1]) { + cout << "usage: calc [formula]" << endl; + return 1; + } + + const char* s = argv[1]; + + Calculator calc; + + long val = 0; + if (calc.execute(s, val)) { + cout << s << " = " << val << endl; + return 0; + } + + cout << "syntax error..." << endl; + + return -1; +} + +// vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/example/calc.vcxproj b/example/calc.vcxproj new file mode 100644 index 0000000..63df6de --- /dev/null +++ b/example/calc.vcxproj @@ -0,0 +1,92 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + + + + + + + {F85B641A-7538-4809-8175-C528FF632CF6} + Win32Proj + sample + calc + + + + Application + true + Unicode + v120 + + + Application + false + true + Unicode + v120 + + + + + + + + + + + + + true + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + .. + + + Console + true + Ws2_32.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + .. + + + Console + true + true + true + Ws2_32.lib;%(AdditionalDependencies) + + + + + + \ No newline at end of file diff --git a/example/calc2.cc b/example/calc2.cc new file mode 100644 index 0000000..5fcd4a1 --- /dev/null +++ b/example/calc2.cc @@ -0,0 +1,94 @@ +// +// calc2.cc +// +// Copyright (c) 2015 Yuji Hirose. All rights reserved. +// MIT License +// + +#include +#include +#include + +using namespace peglib; +using namespace std; + +// +// PEG syntax: +// +// EXPRESSION <- TERM (TERM_OPERATOR TERM)* +// TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* +// FACTOR <- NUMBER / '(' EXPRESSION ')' +// TERM_OPERATOR <- [-+] +// FACTOR_OPERATOR <- [/*] +// NUMBER <- [0-9]+ +// +class Calculator +{ +public: + Calculator() { + EXPRESSION = seq(TERM, zom(seq(TERM_OPERATOR, TERM))); + TERM = seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))); + FACTOR = cho(NUMBER, seq(chr('('), EXPRESSION, chr(')'))); + TERM_OPERATOR = cls("+-"); + FACTOR_OPERATOR = cls("*/"); + NUMBER = oom(cls("0-9")); + + actions[EXPRESSION] = reduce; + actions[TERM] = reduce; + actions[TERM_OPERATOR] = [](const char* s, size_t l) { return (char)*s; }; + actions[FACTOR_OPERATOR] = [](const char* s, size_t l) { return (char)*s; }; + actions[NUMBER] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); }; + } + + bool execute(const char* s, long& v) const { + Any val; + auto ret = EXPRESSION.parse(s, actions, val); + if (ret) { + v = val.get(); + } + return ret; + } + +private: + Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER; + SemanticActions actions; + + static long reduce(const vector& v) { + auto result = v[0].get(); + for (auto i = 1u; i < v.size(); i += 2) { + auto num = v[i + 1].get(); + auto ope = v[i].get(); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + } +}; + +int main(int argc, const char** argv) +{ + if (argc < 2 || string("--help") == argv[1]) { + cout << "usage: calc [formula]" << endl; + return 1; + } + + const char* s = argv[1]; + + Calculator calc; + + long val = 0; + if (calc.execute(s, val)) { + cout << s << " = " << val << endl; + return 0; + } + + cout << "syntax error..." << endl; + + return -1; +} + +// vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/example/calc2.vcxproj b/example/calc2.vcxproj new file mode 100644 index 0000000..a6aadc2 --- /dev/null +++ b/example/calc2.vcxproj @@ -0,0 +1,92 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + + + + + + + {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4} + Win32Proj + sample + calc2 + + + + Application + true + Unicode + v120 + + + Application + false + true + Unicode + v120 + + + + + + + + + + + + + true + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + .. + + + Console + true + Ws2_32.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + .. + + + Console + true + true + true + Ws2_32.lib;%(AdditionalDependencies) + + + + + + \ No newline at end of file diff --git a/example/example.sln b/example/example.sln new file mode 100644 index 0000000..d0fb016 --- /dev/null +++ b/example/example.sln @@ -0,0 +1,28 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2013 +VisualStudioVersion = 12.0.31101.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc", "calc.vcxproj", "{F85B641A-7538-4809-8175-C528FF632CF6}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc", "calc2.vcxproj", "{1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {F85B641A-7538-4809-8175-C528FF632CF6}.Debug|Win32.ActiveCfg = Debug|Win32 + {F85B641A-7538-4809-8175-C528FF632CF6}.Debug|Win32.Build.0 = Debug|Win32 + {F85B641A-7538-4809-8175-C528FF632CF6}.Release|Win32.ActiveCfg = Release|Win32 + {F85B641A-7538-4809-8175-C528FF632CF6}.Release|Win32.Build.0 = Release|Win32 + {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Debug|Win32.ActiveCfg = Debug|Win32 + {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Debug|Win32.Build.0 = Debug|Win32 + {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Release|Win32.ActiveCfg = Release|Win32 + {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/peglib.h b/peglib.h new file mode 100644 index 0000000..cb2e24f --- /dev/null +++ b/peglib.h @@ -0,0 +1,1430 @@ +// +// peglib.h +// +// Copyright (c) 2015 Yuji Hirose. All rights reserved. +// MIT License +// + +#ifndef _CPPEXPATLIB_PEGLIB_H_ +#define _CPPEXPATLIB_PEGLIB_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace peglib { + +void* enabler; + +/*----------------------------------------------------------------------------- + * Any + *---------------------------------------------------------------------------*/ + +class Any +{ +public: + Any() : content_(nullptr) { + } + + Any(const Any& rhs) : content_(rhs.clone()) { + } + + Any(Any&& rhs) : content_(rhs.content_) { + rhs.content_ = nullptr; + } + + template + Any(const T& value) : content_(new holder(value)) { + } + + Any& operator=(const Any& rhs) { + if (this != &rhs) { + if (content_) { + delete content_; + } + content_ = rhs.clone(); + } + return *this; + } + + Any& operator=(Any&& rhs) { + if (this != &rhs) { + if (content_) { + delete content_; + } + content_ = rhs.content_; + rhs.content_ = nullptr; + } + return *this; + } + + ~Any() { + delete content_; + } + + template + T& get() { + assert(content_); + return dynamic_cast*>(content_)->value_; + } + + template + const T& get() const { + assert(content_); + return dynamic_cast*>(content_)->value_; + } + +private: + struct placeholder { + virtual ~placeholder() {}; + virtual placeholder* clone() const = 0; + }; + + template + struct holder : placeholder { + holder(const T& value) : value_(value) {} + placeholder* clone() const override { + return new holder(value_); + } + T value_; + }; + + placeholder* clone() const { + return content_ ? content_->clone() : nullptr; + } + + placeholder* content_; +}; + +/*----------------------------------------------------------------------------- + * Variant + *---------------------------------------------------------------------------*/ + +#if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 +#define static_max(a, b) (a > b ? a : b) +#define alignof _alignof +#else +template +constexpr T static_max(T a, T b) { return a > b ? a : b; } +#endif + +/* + * For debug + */ +static int VARINT_COUNT = 0; + +template void log_copy_construct() { + VARINT_COUNT++; +} + +template void log_move_construct() { + VARINT_COUNT++; +} + +template void log_destruct() { + VARINT_COUNT--; +} + +void log_variant_count() { + std::cout << "VARIANT COUNT (" << VARINT_COUNT << ")" << std::endl; +}; + +/* + * Type list + */ +template +struct typelist; + +template +struct typelist +{ + static const size_t max_elem_size = static_max(sizeof(T), typelist::max_elem_size); + static const size_t max_elem_align = static_max(alignof(T), typelist::max_elem_align); +}; + +template <> +struct typelist<> +{ + static const size_t max_elem_size = 0; + static const size_t max_elem_align = 0; +}; + +template +struct typelist_index; + +template +struct typelist_index +{ + static const size_t value = 1 + typelist_index::value; +}; + +template +struct typelist_index +{ + static const size_t value = 0; +}; + +template +struct typelist_index +{ + static const size_t value = 0; +}; + +/* + * Variant helper + */ +template +struct variant_helper; + +template +struct variant_helper +{ + template + static void copy_construct(size_t type_index, void* data, const VT& vt) { + if (N == type_index) { + log_copy_construct(); + new (data) T(vt.template get()); + return; + } + variant_helper::copy_construct(type_index, data, vt); + } + + template + static void move_construct(size_t type_index, void* data, VT&& vt) { + if (N == type_index) { + log_move_construct(); + new (data) T(std::move(vt.template get())); + return; + } + variant_helper::move_construct(type_index, data, vt); + } + + static void destruct(size_t type_index, void* data) { + if (N == type_index) { + log_destruct(); + reinterpret_cast(data)->~T(); + return; + } + variant_helper::destruct(type_index, data); + } +}; + +template +struct variant_helper +{ + template + static void copy_construct(size_t type_index, void* data, const VT& vt) {} + + template + static void move_construct(size_t type_index, void* data, VT&& vt) {} + + static void destruct(size_t type_index, void* data) {} +}; + + +/* + * Variant + */ +template +struct Variant +{ + typedef typelist tlist; + typedef typename std::aligned_storage::type data_type; + + data_type data; + size_t type_index; + + template + explicit Variant(const T& val) : type_index(typelist_index::value) { + static_assert(typelist_index::value < sizeof...(Ts), "Invalid variant type."); + log_copy_construct(); + new (&data) T(val); + } + + template + explicit Variant(T&& val) : type_index(typelist_index::value) { + static_assert(typelist_index::value < sizeof...(Ts), "Invalid variant type."); + log_move_construct(); + new (&data) T(std::move(val)); + } + + Variant() : type_index(sizeof...(Ts)) {} + + Variant(const Variant& rhs) : type_index(rhs.type_index) { + variant_helper<0, Ts...>::copy_construct(type_index, &data, rhs); + } + + Variant(Variant&& rhs) : type_index(rhs.type_index) { + variant_helper<0, Ts...>::move_construct(type_index, &data, rhs); + } + + Variant& operator=(const Variant& rhs) { + if (this != &rhs) { + variant_helper<0, Ts...>::destruct(type_index, &data); + type_index = rhs.type_index; + variant_helper<0, Ts...>::copy_construct(type_index, &data, rhs); + } + return *this; + } + + Variant& operator=(Variant&& rhs) { + if (this != &rhs) { + variant_helper<0, Ts...>::destruct(type_index, &data); + type_index = rhs.type_index; + variant_helper<0, Ts...>::move_construct(type_index, &data, rhs); + } + return *this; + } + + ~Variant() { + variant_helper<0, Ts...>::destruct(type_index, &data); + } + + template + T& get() { + if (type_index != typelist_index::value) { + throw std::invalid_argument("Invalid template argument."); + } + return *reinterpret_cast(&data); + } + + template + const T& get() const { + if (type_index != typelist_index::value) { + throw std::invalid_argument("Invalid template argument."); + } + return *reinterpret_cast(&data); + } +}; + +#if _MSC_VER < 1900 // Less than Visual Studio 2015 +#undef static_max +#undef alignof +#endif + +/*----------------------------------------------------------------------------- + * PEG + *---------------------------------------------------------------------------*/ + +/* + * Forward declalations + */ +class Rule; +class Definition; + +template +struct SemanticActions; + +template +struct SemanticStack; + +/* + * Match + */ +struct Match +{ + Match(bool _ret, size_t _len) : ret(_ret), len(_len) {} + bool ret; + size_t len; +}; + +Match success(size_t len) { + return Match(true, len); +} + +Match fail() { + return Match(false, 0); +} + +/* + * Rules + */ +class Sequence +{ +public: + Sequence(const Sequence& rhs) : rules_(rhs.rules_) {} + +#if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 + // NOTE: Compiler Error C2797 on Visual Studio 2013 + // "The C++ compiler in Visual Studio does not implement list + // initialization inside either a member initializer list or a non-static + // data member initializer. Before Visual Studio 2013 Update 3, this was + // silently converted to a function call, which could lead to bad code + // generation. Visual Studio 2013 Update 3 reports this as an error." + template + Sequence(const Args& ...args) { + rules_ = std::vector>{ static_cast>(args)... }; + } +#else + template + Sequence(const Args& ...args) : rules_{ static_cast>(args)... } {} +#endif + + Sequence(const std::vector>& rules) : rules_(rules) {} + Sequence(std::vector>&& rules) : rules_(std::move(rules)) {} + + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const; + +private: + std::vector> rules_; +}; + +class PrioritizedChoice +{ +public: +#if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 + // NOTE: Compiler Error C2797 on Visual Studio 2013 + // "The C++ compiler in Visual Studio does not implement list + // initialization inside either a member initializer list or a non-static + // data member initializer. Before Visual Studio 2013 Update 3, this was + // silently converted to a function call, which could lead to bad code + // generation. Visual Studio 2013 Update 3 reports this as an error." + template + PrioritizedChoice(const Args& ...args) { + rules_ = std::vector>{ static_cast>(args)... }; + } +#else + template + PrioritizedChoice(const Args& ...args) : rules_{ static_cast>(args)... } {} +#endif + + PrioritizedChoice(const std::vector>& rules) : rules_(rules) {} + PrioritizedChoice(std::vector>&& rules) : rules_(std::move(rules)) {} + + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const; + +private: + std::vector> rules_; +}; + +class ZeroOrMore +{ +public: + ZeroOrMore(const std::shared_ptr& rule) : rule_(rule) {} + + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const; + +private: + std::shared_ptr rule_; +}; + +class OneOrMore +{ +public: + OneOrMore(const std::shared_ptr& rule) : rule_(rule) {} + + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const; + +private: + std::shared_ptr rule_; +}; + +class Option +{ +public: + Option(const std::shared_ptr& rule) : rule_(rule) {} + + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const; + +private: + std::shared_ptr rule_; +}; + +class AndPredicate +{ +public: + AndPredicate(const std::shared_ptr& rule) : rule_(rule) {} + + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const; + +private: + std::shared_ptr rule_; +}; + +class NotPredicate +{ +public: + NotPredicate(const std::shared_ptr& rule) : rule_(rule) {} + + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const; + +private: + std::shared_ptr rule_; +}; + +class LiteralString +{ +public: + LiteralString(const char* s) : lit_(s) {} + + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const { + auto i = 0u; + for (; i < lit_.size(); i++) { + if (i >= l || s[i] != lit_[i]) { + return fail(); + } + } + return success(i); + } + +private: + std::string lit_; +}; + +class CharacterClass +{ +public: + CharacterClass(const char* chars) : chars_(chars) {} + + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const { + if (l < 1) { + return fail(); + } + auto ch = s[0]; + auto i = 0u; + while (i < chars_.size()) { + if (i + 2 < chars_.size() && chars_[i + 1] == '-') { + if (chars_[i] <= ch && ch <= chars_[i + 2]) { + return success(1); + } + i += 3; + } else { + if (chars_[i] == ch) { + return success(1); + } + i += 1; + } + } + return fail(); + } + +private: + std::string chars_; +}; + +class Character +{ +public: + Character(char ch) : ch_(ch) {} + + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const { + if (l < 1 || s[0] != ch_) { + return fail(); + } + return success(1); + } + +private: + char ch_; +}; + +class AnyCharacter +{ +public: + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const { + if (l < 1) { + return fail(); + } + return success(1); + } +}; + +class Grouping +{ +public: + Grouping(const std::shared_ptr& rule) : rule_(rule) {} + + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const; + +private: + std::shared_ptr rule_; +}; + +class NonTerminal +{ +public: + NonTerminal(Definition* outer) : outer_(outer) {}; + + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const; + +private: + friend class Definition; + + template + void reduce(SemanticStack& ss, const char* s, size_t l, Action action) const; + + template + T reduce(const char* s, size_t l, const std::vector& v, const std::vector& n, Action action) const; + + std::shared_ptr rule_; + Definition* outer_; +}; + +class DefinitionReference +{ +public: + DefinitionReference( + const std::map& grammar, const std::string& name) + : grammar_(grammar) + , name_(name) {} + + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const; + +private: + const std::map& grammar_; + std::string name_; +}; + +class WeakHolder +{ +public: + WeakHolder(const std::shared_ptr& rule) : weak_(rule) {} + + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const; + +private: + std::weak_ptr weak_; +}; + +/* + * Rule + */ +template +class TRule +{ +public: + template + TRule(const T& val) : vt(val) {} + + template + TRule(T&& val) : vt(std::move(val)) {} + + template + Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticStack* ss) const { + switch (vt.type_index) { + case 0: return vt.template get().template parse(s, l, sa, ss); + case 1: return vt.template get().template parse(s, l, sa, ss); + case 2: return vt.template get().template parse(s, l, sa, ss); + case 3: return vt.template get().template parse(s, l, sa, ss); + case 4: return vt.template get