From d386f2f3b9b8dbc18817c25f9d6a9db781e09933 Mon Sep 17 00:00:00 2001 From: yhirose Date: Mon, 9 Feb 2015 17:12:59 -0500 Subject: [PATCH] Major refactoring. --- README.md | 53 +-- example/Makefile | 2 +- example/calc.cc | 61 ++-- example/calc2.cc | 53 +-- peglib.h | 931 +++++++++++++++-------------------------------- test/Makefile | 9 +- test/test.cc | 181 ++++----- 7 files changed, 446 insertions(+), 844 deletions(-) diff --git a/README.md b/README.md index 2ca9b78..a41f737 100644 --- a/README.md +++ b/README.md @@ -112,46 +112,51 @@ Here are available user actions: `const std::vector& n` holds names of child definitions that could be helpful when we want to check what are the actual child definitions. -Make a parser with parser operators and simple actions ------------------------------------------------------- +Make a parser with parser operators +----------------------------------- -Instead of makeing a parser by parsing PEG syntax text, we can also construct a parser by hand with *parser operators* and use the *simple action* method rather than the semantic action method. Here is an example: +Instead of makeing a parser by parsing PEG syntax text, we can also construct a parser by hand with *parser operators*. Here is an example: ```c++ using namespace peglib; using namespace std; +vector tags; + Definition ROOT, TAG_NAME, _; ROOT = seq(_, zom(seq(chr('['), TAG_NAME, chr(']'), _))); -TAG_NAME = oom(seq(npd(chr(']')), any())); +TAG_NAME = oom(seq(npd(chr(']')), any())), [&](const char* s, size_t l) { tags.push_back(string(s, l)); }; _ = zom(cls(" \t")); -vector tags; -TAG_NAME.match = [&](const char* s, size_t l) { - tags.push_back(string(s, l)); -}; - auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] "); ``` -In fact, the PEG parser generator is made with operators. You can see the code at `make_peg_grammar` function in `peglib.h`. +It is also possible to specify a *string match action* with a *grp* operator. The string match action doesn't affect the resular semantic action behavior. + +```c++ +ROOT = seq(_, zom(seq(chr('['), grp(TAG_NAME, [&](const char* s, size_t l) { tags.push_back(string(s, l)); }), chr(']'), _))); +TAG_NAME = oom(seq(npd(chr(']')), any())); +_ = zom(cls(" \t")); +``` + +In fact, the PEG parser generator is made with the parser operators. You can see the code at `make_peg_grammar` function in `peglib.h`. The following are available operators: -| Description | Operator | -|:-------------------|:---------| -| Sequence | seq | -| Prioritized Choice | cho | -| Grouping | grp | -| Zero or More | zom | -| One or More | oom | -| Optional | opt | -| And predicate | apd | -| Not predicate | npd | -| Literal string | lit | -| Character class | cls | -| Character | chr | -| Any character | any | +| Operator | Description | +|:---------|:-------------------| +| seq | Sequence | +| cho | Prioritized Choice | +| grp | Grouping | +| zom | Zero or More | +| oom | One or More | +| opt | Optional | +| apd | And predicate | +| npd | Not predicate | +| lit | Literal string | +| cls | Character class | +| chr | Character | +| any | Any character | Sample codes ------------ diff --git a/example/Makefile b/example/Makefile index 9b576f9..3c0fc71 100644 --- a/example/Makefile +++ b/example/Makefile @@ -5,7 +5,7 @@ ifdef USE_CLANG CC = clang++ CFLAGS = -std=c++1y -stdlib=libc++ -g else -CC = g++-4.9 +CC = g++ CFLAGS = -std=c++1y -g endif diff --git a/example/calc.cc b/example/calc.cc index f214a43..7d5ce9e 100644 --- a/example/calc.cc +++ b/example/calc.cc @@ -7,7 +7,7 @@ #include #include -#include +#include using namespace peglib; using namespace std; @@ -22,36 +22,16 @@ using namespace std; // FACTOR_OPERATOR <- [/*] // NUMBER <- [0-9]+ // -class Calculator +int main(int argc, const char** argv) { -public: - Calculator() { - const char* syntax = - " EXPRESSION <- TERM (TERM_OPERATOR TERM)* " - " TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* " - " FACTOR <- NUMBER / '(' EXPRESSION ')' " - " TERM_OPERATOR <- [-+] " - " FACTOR_OPERATOR <- [/*] " - " NUMBER <- [0-9]+ " - ; - - parser.load_syntax(syntax); - - parser["EXPRESSION"] = reduce; - parser["TERM"] = reduce; - parser["TERM_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; }; - parser["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; }; - parser["NUMBER"] = [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); }; + if (argc < 2 || string("--help") == argv[1]) { + cout << "usage: calc [formula]" << endl; + return 1; } - bool execute(const char* s, long& v) const { - return parser.parse(s, v); - } + const char* s = argv[1]; -private: - Parser parser; - - static long reduce(const vector& v) { + auto reduce = [](const vector& v) -> long { auto result = v[0].get(); for (auto i = 1u; i < v.size(); i += 2) { auto num = v[i + 1].get(); @@ -64,22 +44,27 @@ private: } } return result; - } -}; + }; -int main(int argc, const char** argv) -{ - if (argc < 2 || string("--help") == argv[1]) { - cout << "usage: calc [formula]" << endl; - return 1; - } + const char* syntax = + " EXPRESSION <- TERM (TERM_OPERATOR TERM)* " + " TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* " + " FACTOR <- NUMBER / '(' EXPRESSION ')' " + " TERM_OPERATOR <- [-+] " + " FACTOR_OPERATOR <- [/*] " + " NUMBER <- [0-9]+ " + ; - const char* s = argv[1]; + Parser parser = make_parser(syntax); - Calculator calc; + parser["EXPRESSION"] = reduce; + parser["TERM"] = reduce; + parser["TERM_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; }; + parser["FACTOR_OPERATOR"] = [](const char* s, size_t l) { return (char)*s; }; + parser["NUMBER"] = [](const char* s, size_t l) { return atol(s); }; long val = 0; - if (calc.execute(s, val)) { + if (parser.parse(s, val)) { cout << s << " = " << val << endl; return 0; } diff --git a/example/calc2.cc b/example/calc2.cc index f844564..0f8485c 100644 --- a/example/calc2.cc +++ b/example/calc2.cc @@ -7,7 +7,7 @@ #include #include -#include +#include using namespace peglib; using namespace std; @@ -22,32 +22,16 @@ using namespace std; // FACTOR_OPERATOR <- [/*] // NUMBER <- [0-9]+ // -class Calculator +int main(int argc, const char** argv) { -public: - Calculator() { - EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce; - TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce; - FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')'))); - TERM_OPERATOR <= cls("+-"), [](const char* s, size_t l) { return (char)*s; }; - FACTOR_OPERATOR <= cls("*/"), [](const char* s, size_t l) { return (char)*s; }; - NUMBER <= oom(cls("0-9")), [](const char* s, size_t l) { return stol(string(s, l), nullptr, 10); }; + if (argc < 2 || string("--help") == argv[1]) { + cout << "usage: calc [formula]" << endl; + return 1; } - bool execute(const char* s, long& v) const { - Any val; - auto ret = EXPRESSION.parse(s, actions, val); - if (ret) { - v = val.get(); - } - return ret; - } + const char* s = argv[1]; -private: - Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER; - SemanticActions actions; - - static long reduce(const vector& v) { + auto reduce = [](const vector& v) -> long { auto result = v[0].get(); for (auto i = 1u; i < v.size(); i += 2) { auto num = v[i + 1].get(); @@ -60,28 +44,23 @@ private: } } return result; - } -}; + }; -int main(int argc, const char** argv) -{ - if (argc < 2 || string("--help") == argv[1]) { - cout << "usage: calc [formula]" << endl; - return 1; - } + Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER; - const char* s = argv[1]; - - Calculator calc; + EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce; + TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce; + FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')'))); + TERM_OPERATOR <= cls("+-"), [](const char* s, size_t l) { return (char)*s; }; + FACTOR_OPERATOR <= cls("*/"), [](const char* s, size_t l) { return (char)*s; }; + NUMBER <= oom(cls("0-9")), [](const char* s, size_t l) { return atol(s); }; long val = 0; - if (calc.execute(s, val)) { + if (EXPRESSION.parse(s, val)) { cout << s << " = " << val << endl; return 0; } - cout << "syntax error..." << endl; - return -1; } diff --git a/peglib.h b/peglib.h index 5bec074..7295355 100644 --- a/peglib.h +++ b/peglib.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include namespace peglib { @@ -63,6 +63,15 @@ public: return *this; } + template + Any& operator=(const T& value) { + if (content_) { + delete content_; + } + content_ = new holder(value); + return *this; + } + ~Any() { delete content_; } @@ -71,18 +80,40 @@ public: return content_ == nullptr; } - template + template < + typename T, + typename std::enable_if::value>::type*& = enabler + > T& get() { assert(content_); return dynamic_cast*>(content_)->value_; } - template + template < + typename T, + typename std::enable_if::value>::type*& = enabler + > + T& get() { + return *this; + } + + template < + typename T, + typename std::enable_if::value>::type*& = enabler + > const T& get() const { assert(content_); return dynamic_cast*>(content_)->value_; } + template < + typename T, + typename std::enable_if::value>::type*& = enabler + > + const Any& get() const { + return *this; + } + private: struct placeholder { virtual ~placeholder() {}; @@ -105,212 +136,6 @@ private: placeholder* content_; }; -/*----------------------------------------------------------------------------- - * Variant - *---------------------------------------------------------------------------*/ - -#if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 -#define static_max(a, b) (a > b ? a : b) -#define alignof _alignof -#else -template -constexpr T static_max(T a, T b) { return a > b ? a : b; } -#endif - -/* - * For debug - */ -static int VARINT_COUNT = 0; - -template void log_copy_construct() { - VARINT_COUNT++; -} - -template void log_move_construct() { - VARINT_COUNT++; -} - -template void log_destruct() { - VARINT_COUNT--; -} - -void log_variant_count() { - std::cout << "VARIANT COUNT (" << VARINT_COUNT << ")" << std::endl; -}; - -/* - * Type list - */ -template -struct typelist; - -template -struct typelist -{ - static const size_t max_elem_size = static_max(sizeof(T), typelist::max_elem_size); - static const size_t max_elem_align = static_max(alignof(T), typelist::max_elem_align); -}; - -template <> -struct typelist<> -{ - static const size_t max_elem_size = 0; - static const size_t max_elem_align = 0; -}; - -template -struct typelist_index; - -template -struct typelist_index -{ - static const size_t value = 1 + typelist_index::value; -}; - -template -struct typelist_index -{ - static const size_t value = 0; -}; - -template -struct typelist_index -{ - static const size_t value = 0; -}; - -/* - * Variant helper - */ -template -struct variant_helper; - -template -struct variant_helper -{ - template - static void copy_construct(size_t type_index, void* data, const VT& vt) { - if (N == type_index) { - log_copy_construct(); - new (data) T(vt.template get()); - return; - } - variant_helper::copy_construct(type_index, data, vt); - } - - template - static void move_construct(size_t type_index, void* data, VT&& vt) { - if (N == type_index) { - log_move_construct(); - new (data) T(std::move(vt.template get())); - return; - } - variant_helper::move_construct(type_index, data, vt); - } - - static void destruct(size_t type_index, void* data) { - if (N == type_index) { - log_destruct(); - reinterpret_cast(data)->~T(); - return; - } - variant_helper::destruct(type_index, data); - } -}; - -template -struct variant_helper -{ - template - static void copy_construct(size_t type_index, void* data, const VT& vt) {} - - template - static void move_construct(size_t type_index, void* data, VT&& vt) {} - - static void destruct(size_t type_index, void* data) {} -}; - - -/* - * Variant - */ -template -struct Variant -{ - typedef typelist tlist; - typedef typename std::aligned_storage::type data_type; - - data_type data; - size_t type_index; - - template - explicit Variant(const T& val) : type_index(typelist_index::value) { - static_assert(typelist_index::value < sizeof...(Ts), "Invalid variant type."); - log_copy_construct(); - new (&data) T(val); - } - - template - explicit Variant(T&& val) : type_index(typelist_index::value) { - static_assert(typelist_index::value < sizeof...(Ts), "Invalid variant type."); - log_move_construct(); - new (&data) T(std::move(val)); - } - - Variant() : type_index(sizeof...(Ts)) {} - - Variant(const Variant& rhs) : type_index(rhs.type_index) { - variant_helper<0, Ts...>::copy_construct(type_index, &data, rhs); - } - - Variant(Variant&& rhs) : type_index(rhs.type_index) { - variant_helper<0, Ts...>::move_construct(type_index, &data, rhs); - } - - Variant& operator=(const Variant& rhs) { - if (this != &rhs) { - variant_helper<0, Ts...>::destruct(type_index, &data); - type_index = rhs.type_index; - variant_helper<0, Ts...>::copy_construct(type_index, &data, rhs); - } - return *this; - } - - Variant& operator=(Variant&& rhs) { - if (this != &rhs) { - variant_helper<0, Ts...>::destruct(type_index, &data); - type_index = rhs.type_index; - variant_helper<0, Ts...>::move_construct(type_index, &data, rhs); - } - return *this; - } - - ~Variant() { - variant_helper<0, Ts...>::destruct(type_index, &data); - } - - template - T& get() { - if (type_index != typelist_index::value) { - throw std::invalid_argument("Invalid template argument."); - } - return *reinterpret_cast(&data); - } - - template - const T& get() const { - if (type_index != typelist_index::value) { - throw std::invalid_argument("Invalid template argument."); - } - return *reinterpret_cast(&data); - } -}; - -#if _MSC_VER < 1900 // Less than Visual Studio 2015 -#undef static_max -#undef alignof -#endif - /*----------------------------------------------------------------------------- * PEG *---------------------------------------------------------------------------*/ @@ -318,14 +143,16 @@ struct Variant /* * Forward declalations */ -class Rule; class Definition; -template -struct SemanticActions; - -template -struct SemanticValues; +/* +* Semantic values +*/ +struct SemanticValues +{ + std::vector names; + std::vector values; +}; /* * Match @@ -348,7 +175,14 @@ Match fail() { /* * Rules */ -class Sequence +class Rule +{ + public: + virtual ~Rule() {}; + virtual Match parse(const char* s, size_t l, SemanticValues& sv) const = 0; +}; + +class Sequence : public Rule { public: Sequence(const Sequence& rhs) : rules_(rhs.rules_) {} @@ -372,14 +206,23 @@ public: Sequence(const std::vector>& rules) : rules_(rules) {} Sequence(std::vector>&& rules) : rules_(std::move(rules)) {} - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const; + Match parse(const char* s, size_t l, SemanticValues& sv) const { + size_t i = 0; + for (const auto& rule : rules_) { + auto m = rule->parse(s + i, l - i, sv); + if (!m.ret) { + return fail(); + } + i += m.len; + } + return success(i); + } private: std::vector> rules_; }; -class PrioritizedChoice +class PrioritizedChoice : public Rule { public: #if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 @@ -401,80 +244,123 @@ public: PrioritizedChoice(const std::vector>& rules) : rules_(rules) {} PrioritizedChoice(std::vector>&& rules) : rules_(std::move(rules)) {} - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const; + Match parse(const char* s, size_t l, SemanticValues& sv) const { + for (const auto& rule : rules_) { + auto m = rule->parse(s, l, sv); + if (m.ret) { + return success(m.len); + } + } + return fail(); + } + private: std::vector> rules_; }; -class ZeroOrMore +class ZeroOrMore : public Rule { public: ZeroOrMore(const std::shared_ptr& rule) : rule_(rule) {} - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const; + Match parse(const char* s, size_t l, SemanticValues& sv) const { + auto i = 0; + while (l - i > 0) { + auto m = rule_->parse(s + i, l - i, sv); + if (!m.ret) { + break; + } + i += m.len; + } + return success(i); + } private: std::shared_ptr rule_; }; -class OneOrMore +class OneOrMore : public Rule { public: OneOrMore(const std::shared_ptr& rule) : rule_(rule) {} - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const; + Match parse(const char* s, size_t l, SemanticValues& sv) const { + auto m = rule_->parse(s, l, sv); + if (!m.ret) { + return fail(); + } + auto i = m.len; + while (l - i > 0) { + auto m = rule_->parse(s + i, l - i, sv); + if (!m.ret) { + break; + } + i += m.len; + } + return success(i); + } private: std::shared_ptr rule_; }; -class Option +class Option : public Rule { public: Option(const std::shared_ptr& rule) : rule_(rule) {} - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const; + Match parse(const char* s, size_t l, SemanticValues& sv) const { + auto m = rule_->parse(s, l, sv); + return success(m.ret ? m.len : 0); + } private: std::shared_ptr rule_; }; -class AndPredicate +class AndPredicate : public Rule { public: AndPredicate(const std::shared_ptr& rule) : rule_(rule) {} - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const; + Match parse(const char* s, size_t l, SemanticValues& sv) const { + auto m = rule_->parse(s, l, sv); + if (m.ret) { + return success(0); + } else { + return fail(); + } + } private: std::shared_ptr rule_; }; -class NotPredicate +class NotPredicate : public Rule { public: NotPredicate(const std::shared_ptr& rule) : rule_(rule) {} - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const; + Match parse(const char* s, size_t l, SemanticValues& sv) const { + auto m = rule_->parse(s, l, sv); + if (m.ret) { + return fail(); + } else { + return success(0); + } + } private: std::shared_ptr rule_; }; -class LiteralString +class LiteralString : public Rule { public: LiteralString(const char* s) : lit_(s) {} - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const { + Match parse(const char* s, size_t l, SemanticValues& sv) const { auto i = 0u; for (; i < lit_.size(); i++) { if (i >= l || s[i] != lit_[i]) { @@ -488,13 +374,12 @@ private: std::string lit_; }; -class CharacterClass +class CharacterClass : public Rule { public: CharacterClass(const char* chars) : chars_(chars) {} - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const { + Match parse(const char* s, size_t l, SemanticValues& sv) const { if (l < 1) { return fail(); } @@ -520,13 +405,12 @@ private: std::string chars_; }; -class Character +class Character : public Rule { public: Character(char ch) : ch_(ch) {} - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const { + Match parse(const char* s, size_t l, SemanticValues& sv) const { if (l < 1 || s[0] != ch_) { return fail(); } @@ -537,11 +421,10 @@ private: char ch_; }; -class AnyCharacter +class AnyCharacter : public Rule { public: - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const { + Match parse(const char* s, size_t l, SemanticValues& sv) const { if (l < 1) { return fail(); } @@ -549,160 +432,69 @@ public: } }; -class Grouping +class Grouping : public Rule { public: Grouping(const std::shared_ptr& rule) : rule_(rule) {} Grouping(const std::shared_ptr& rule, std::function match) : rule_(rule), match_(match) {} - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const; + Match parse(const char* s, size_t l, SemanticValues& sv) const { + assert(rule_); + auto m = rule_->parse(s, l, sv); + if (m.ret && match_) { + match_(s, m.len); + } + return m; + } private: std::shared_ptr rule_; std::function match_; }; -class NonTerminal -{ -public: - NonTerminal(Definition* outer) : outer_(outer) {}; - - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const; - -private: - friend class Definition; - - template - T reduce(const char* s, size_t l, const std::vector& v, const std::vector& n, Action action) const; - - std::shared_ptr rule_; - Definition* outer_; -}; - -class DefinitionReference -{ -public: - DefinitionReference( - const std::map& grammar, const std::string& name) - : grammar_(grammar) - , name_(name) {} - - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const; - -private: - const std::map& grammar_; - std::string name_; -}; - -class WeakHolder +class WeakHolder : public Rule { public: WeakHolder(const std::shared_ptr& rule) : weak_(rule) {} - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const; + Match parse(const char* s, size_t l, SemanticValues& sv) const { + auto rule = weak_.lock(); + assert(rule); + return rule->parse(s, l, sv); + } private: std::weak_ptr weak_; }; -/* - * Rule - */ -template -class TRule -{ -public: - template - TRule(const T& val) : vt(val) {} - - template - TRule(T&& val) : vt(std::move(val)) {} - - template - Match parse(const char* s, size_t l, const SemanticActions* sa, SemanticValues* sv) const { - switch (vt.type_index) { - case 0: return vt.template get().template parse(s, l, sa, sv); - case 1: return vt.template get().template parse(s, l, sa, sv); - case 2: return vt.template get().template parse(s, l, sa, sv); - case 3: return vt.template get().template parse(s, l, sa, sv); - case 4: return vt.template get