diff --git a/README.md b/README.md index 27647be..cabc1cd 100644 --- a/README.md +++ b/README.md @@ -42,23 +42,16 @@ using namespace std; int main(void) { // (2) Make a parser - auto grammar = R"( + parser parser(R"( # Grammar for Calculator... Additive <- Multitive '+' Additive / Multitive Multitive <- Primary '*' Multitive / Primary Primary <- '(' Additive ')' / Number Number <- < [0-9]+ > %whitespace <- [ \t]* - )"; + )"); - parser parser; - - parser.log = [](size_t line, size_t col, const string& msg) { - cerr << line << ":" << col << ": " << msg << "\n"; - }; - - auto ok = parser.load_grammar(grammar); - assert(ok); + assert((bool)parser == true); // (3) Setup actions parser["Additive"] = [](const SemanticValues& sv) { @@ -93,6 +86,28 @@ int main(void) { } ``` +To show syntax errors in grammar text: + +```cpp +auto grammar = R"( + # Grammar for Calculator... + Additive <- Multitive '+' Additive / Multitive + Multitive <- Primary '*' Multitive / Primary + Primary <- '(' Additive ')' / Number + Number <- < [0-9]+ > + %whitespace <- [ \t]* +)"; + +parser parser; + +parser.log = [](size_t line, size_t col, const string& msg) { + cerr << line << ":" << col << ": " << msg << "\n"; +}; + +auto ok = parser.load_grammar(grammar); +assert(ok); +``` + There are four semantic actions available: ```cpp @@ -326,6 +341,46 @@ List(I, D) ← I (D I)* T(x) ← < x > _ ``` +Parsing expressions by precedence climbing altorithm +---------------------------------------------------- + +*cpp-peglib* supports [operator-precedence parsering](https://en.wikipedia.org/wiki/Operator-precedence_parser) by [**precedence climbing algorithm**](https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing) + +```cpp + parser parser(R"( + EXPRESSION <- ATOM (OPERATOR ATOM)* { + precedence + L - + + L / * + } + ATOM <- NUMBER / '(' EXPRESSION ')' + OPERATOR <- < [-+/*] > + NUMBER <- < '-'? [0-9]+ > + %whitespace <- [ \t\r\n]* + )"); + + parser["EXPRESSION"] = [](const SemanticValues& sv) -> long { + auto result = any_cast(sv[0]); + if (sv.size() > 1) { + auto ope = any_cast(sv[1]); + auto num = any_cast(sv[2]); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + }; + parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; + parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; + + long val; + parser.parse(" -1 + (1 + 2) * 3 - -1", val); + assert(val == 9); +``` + AST generation -------------- diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 3703aae..7669594 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -17,3 +17,9 @@ target_link_libraries(calc2 ${add_link_deps}) add_executable(calc3 calc3.cc) target_link_libraries(calc3 ${add_link_deps}) + +add_executable(calc4 calc4.cc) +target_link_libraries(calc4 ${add_link_deps}) + +add_executable(calc5 calc5.cc) +target_link_libraries(calc5 ${add_link_deps}) diff --git a/example/calc.cc b/example/calc.cc index 2b0c4db..a8d9a23 100644 --- a/example/calc.cc +++ b/example/calc.cc @@ -1,64 +1,51 @@ -// -// calc.cc -// -// Copyright (c) 2015 Yuji Hirose. All rights reserved. -// MIT License -// - #include +#include #include -#include using namespace peg; +using namespace std; -int main(int argc, const char** argv) -{ - if (argc < 2 || std::string("--help") == argv[1]) { - std::cout << "usage: calc [formula]" << std::endl; - return 1; - } - - auto reduce = [](const SemanticValues& sv) -> long { - auto result = any_cast(sv[0]); - for (auto i = 1u; i < sv.size(); i += 2) { - auto num = any_cast(sv[i + 1]); - auto ope = any_cast(sv[i]); - switch (ope) { - case '+': result += num; break; - case '-': result -= num; break; - case '*': result *= num; break; - case '/': result /= num; break; - } - } - return result; - }; - +int main(void) { + // (2) Make a parser parser parser(R"( - EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* - TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* - FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ - TERM_OPERATOR <- < [-+] > _ - FACTOR_OPERATOR <- < [/*] > _ - NUMBER <- < [0-9]+ > _ - ~_ <- [ \t\r\n]* + # Grammar for Calculator... + Additive <- Multitive '+' Additive / Multitive + Multitive <- Primary '*' Multitive / Primary + Primary <- '(' Additive ')' / Number + Number <- < [0-9]+ > + %whitespace <- [ \t]* )"); - parser["EXPRESSION"] = reduce; - parser["TERM"] = reduce; - parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return static_cast(*sv.c_str()); }; - parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return static_cast(*sv.c_str()); }; - parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; + assert((bool)parser == true); - auto expr = argv[1]; - long val = 0; - if (parser.parse(expr, val)) { - std::cout << expr << " = " << val << std::endl; - return 0; - } + // (3) Setup actions + parser["Additive"] = [](const SemanticValues& sv) { + switch (sv.choice()) { + case 0: // "Multitive '+' Additive" + return any_cast(sv[0]) + any_cast(sv[1]); + default: // "Multitive" + return any_cast(sv[0]); + } + }; - std::cout << "syntax error..." << std::endl; + parser["Multitive"] = [](const SemanticValues& sv) { + switch (sv.choice()) { + case 0: // "Primary '*' Multitive" + return any_cast(sv[0]) * any_cast(sv[1]); + default: // "Primary" + return any_cast(sv[0]); + } + }; - return -1; + parser["Number"] = [](const SemanticValues& sv) { + return stoi(sv.token(), nullptr, 10); + }; + + // (4) Parse + parser.enable_packrat_parsing(); // Enable packrat parsing. + + int val; + parser.parse(" (1 + 2) * 3 ", val); + + assert(val == 9); } - -// vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/example/calc4.cc b/example/calc4.cc new file mode 100644 index 0000000..1404dfc --- /dev/null +++ b/example/calc4.cc @@ -0,0 +1,42 @@ +#include +#include +#include + +using namespace peg; +using namespace std; + +int main(void) { + parser parser(R"( + EXPRESSION <- ATOM (OPERATOR ATOM)* { + precedence + L - + + L / * + } + ATOM <- NUMBER / '(' EXPRESSION ')' + OPERATOR <- < [-+/*] > + NUMBER <- < '-'? [0-9]+ > + %whitespace <- [ \t\r\n]* + )"); + + parser["EXPRESSION"] = [](const SemanticValues& sv) -> long { + auto result = any_cast(sv[0]); + if (sv.size() > 1) { + auto ope = any_cast(sv[1]); + auto num = any_cast(sv[2]); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + }; + parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; + parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; + + long val; + parser.parse(" -1 + (1 + 2) * 3 - -1", val); + + assert(val == 9); +} diff --git a/example/calc5.cc b/example/calc5.cc new file mode 100644 index 0000000..2b9a750 --- /dev/null +++ b/example/calc5.cc @@ -0,0 +1,69 @@ +// +// calc5.cc +// +// Copyright (c) 2015 Yuji Hirose. All rights reserved. +// MIT License +// + +#include +#include +#include + +using namespace peg; + +int main(int argc, const char** argv) +{ + if (argc < 2 || std::string("--help") == argv[1]) { + std::cout << "usage: calc5 [formula]" << std::endl; + return 1; + } + + std::function eval = [&](const Ast& ast) { + if (ast.name == "NUMBER") { + return stol(ast.token); + } else { + const auto& nodes = ast.nodes; + auto result = eval(*nodes[0]); + if (nodes.size() > 1) { + auto ope = nodes[1]->token[0]; + auto num = eval(*nodes[2]); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + } + }; + + parser parser(R"( + EXPRESSION <- ATOM (OPERATOR ATOM)* { + precedence + L - + + L / * + } + ATOM <- NUMBER / '(' EXPRESSION ')' + OPERATOR <- < [-+/*] > + NUMBER <- < '-'? [0-9]+ > + %whitespace <- [ \t\r\n]* + )"); + + parser.enable_ast(); + + auto expr = argv[1]; + std::shared_ptr ast; + if (parser.parse(expr, ast)) { + ast = AstOptimizer(true).optimize(ast); + std::cout << ast_to_s(ast); + std::cout << expr << " = " << eval(*ast) << std::endl; + return 0; + } + + std::cout << "syntax error..." << std::endl; + + return -1; +} + +// vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/peglib.h b/peglib.h index cfe0c71..d706b83 100644 --- a/peglib.h +++ b/peglib.h @@ -119,7 +119,7 @@ template T &any_cast(any &val) { return p->value_; } -template <> any &any_cast(any &val) { return val; } +template <> inline any &any_cast(any &val) { return val; } template const T &any_cast(const any &val) { assert(val.content_); @@ -129,7 +129,7 @@ template const T &any_cast(const any &val) { return p->value_; } -template <> const any &any_cast(const any &val) { return val; } +template <> inline const any &any_cast(const any &val) { return val; } #endif /*----------------------------------------------------------------------------- @@ -532,6 +532,7 @@ private: friend class Sequence; friend class PrioritizedChoice; friend class Holder; + friend class PrecedenceClimbing; const char *s_ = nullptr; size_t n_ = 0; @@ -671,63 +672,63 @@ private: typedef std::function Fty; template - Fty make_adaptor(F fn, R (F::* /*mf*/)(SemanticValues &sv) const) { + Fty make_adaptor(F fn, R (F::*)(SemanticValues &sv) const) { return TypeAdaptor_sv(fn); } template - Fty make_adaptor(F fn, R (F::* /*mf*/)(const SemanticValues &sv) const) { + Fty make_adaptor(F fn, R (F::*)(const SemanticValues &sv) const) { return TypeAdaptor_csv(fn); } template - Fty make_adaptor(F fn, R (F::* /*mf*/)(SemanticValues &sv)) { + Fty make_adaptor(F fn, R (F::*)(SemanticValues &sv)) { return TypeAdaptor_sv(fn); } template - Fty make_adaptor(F fn, R (F::* /*mf*/)(const SemanticValues &sv)) { + Fty make_adaptor(F fn, R (F::*)(const SemanticValues &sv)) { return TypeAdaptor_csv(fn); } template - Fty make_adaptor(F fn, R (*/*mf*/)(SemanticValues &sv)) { + Fty make_adaptor(F fn, R (*)(SemanticValues &sv)) { return TypeAdaptor_sv(fn); } template - Fty make_adaptor(F fn, R (*/*mf*/)(const SemanticValues &sv)) { + Fty make_adaptor(F fn, R (*)(const SemanticValues &sv)) { return TypeAdaptor_csv(fn); } template - Fty make_adaptor(F fn, R (F::* /*mf*/)(SemanticValues &sv, any &dt) const) { + Fty make_adaptor(F fn, R (F::*)(SemanticValues &sv, any &dt) const) { return TypeAdaptor_sv_dt(fn); } template Fty make_adaptor(F fn, - R (F::* /*mf*/)(const SemanticValues &sv, any &dt) const) { + R (F::*)(const SemanticValues &sv, any &dt) const) { return TypeAdaptor_csv_dt(fn); } template - Fty make_adaptor(F fn, R (F::* /*mf*/)(SemanticValues &sv, any &dt)) { + Fty make_adaptor(F fn, R (F::*)(SemanticValues &sv, any &dt)) { return TypeAdaptor_sv_dt(fn); } template - Fty make_adaptor(F fn, R (F::* /*mf*/)(const SemanticValues &sv, any &dt)) { + Fty make_adaptor(F fn, R (F::*)(const SemanticValues &sv, any &dt)) { return TypeAdaptor_csv_dt(fn); } template - Fty make_adaptor(F fn, R (*/*mf*/)(SemanticValues &sv, any &dt)) { + Fty make_adaptor(F fn, R (*)(SemanticValues &sv, any &dt)) { return TypeAdaptor_sv_dt(fn); } template - Fty make_adaptor(F fn, R (*/*mf*/)(const SemanticValues &sv, any &dt)) { + Fty make_adaptor(F fn, R (*)(const SemanticValues &sv, any &dt)) { return TypeAdaptor_csv_dt(fn); } @@ -1529,6 +1530,32 @@ public: std::string name_; }; +class PrecedenceClimbing : public Ope { +public: + using BinOpeInfo = std::map>; + + PrecedenceClimbing(const std::shared_ptr &atom, + const std::shared_ptr &binop, const BinOpeInfo &info, + const Action &action) + : atom_(atom), binop_(binop), info_(info), action_(action) {} + + size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, + any &dt) const override { + return parse_expression(s, n, sv, c, dt, 0); + } + + void accept(Visitor &v) override; + + std::shared_ptr atom_; + std::shared_ptr binop_; + BinOpeInfo info_; + const Action &action_; + +private: + size_t parse_expression(const char *s, size_t n, SemanticValues &sv, + Context &c, any &dt, size_t min_prec) const; +}; + /* * Factories */ @@ -1630,6 +1657,13 @@ inline std::shared_ptr bkr(const std::string &name) { return std::make_shared(name); } +inline std::shared_ptr pre(const std::shared_ptr &atom, + const std::shared_ptr &binop, + const PrecedenceClimbing::BinOpeInfo &info, + const Action &action) { + return std::make_shared(atom, binop, info, action); +} + /* * Visitor */ @@ -1656,6 +1690,7 @@ struct Ope::Visitor { virtual void visit(Reference & /*ope*/) {} virtual void visit(Whitespace & /*ope*/) {} virtual void visit(BackReference & /*ope*/) {} + virtual void visit(PrecedenceClimbing & /*ope*/) {} }; struct IsReference : public Ope::Visitor { @@ -1685,6 +1720,7 @@ struct TraceOpeName : public Ope::Visitor { void visit(Reference &ope) override { name = "Reference"; } void visit(Whitespace &ope) override { name = "Whitespace"; } void visit(BackReference &ope) override { name = "BackReference"; } + void visit(PrecedenceClimbing &ope) override { name = "PrecedenceClimbing"; } const char *name = nullptr; }; @@ -1758,6 +1794,7 @@ struct TokenChecker : public Ope::Visitor { void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); } void visit(Reference &ope) override; void visit(Whitespace &ope) override { ope.ope_->accept(*this); } + void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } static bool is_token(Ope &ope) { if (IsLiteralToken::check(ope)) { return true; } @@ -1829,6 +1866,7 @@ struct DetectLeftRecursion : public Ope::Visitor { void visit(Reference &ope) override; void visit(Whitespace &ope) override { ope.ope_->accept(*this); } void visit(BackReference & /*ope*/) override { done_ = true; } + void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } const char *error_s = nullptr; @@ -1878,6 +1916,7 @@ struct HasEmptyElement : public Ope::Visitor { void visit(Holder &ope) override { ope.ope_->accept(*this); } void visit(Reference &ope) override; void visit(Whitespace &ope) override { ope.ope_->accept(*this); } + void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } bool is_empty = false; const char *error_s = nullptr; @@ -1938,6 +1977,7 @@ struct DetectInfiniteLoop : public Ope::Visitor { void visit(Holder &ope) override { ope.ope_->accept(*this); } void visit(Reference &ope) override; void visit(Whitespace &ope) override { ope.ope_->accept(*this); } + void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } bool has_error = false; const char *error_s = nullptr; @@ -1975,6 +2015,7 @@ struct ReferenceChecker : public Ope::Visitor { void visit(Holder &ope) override { ope.ope_->accept(*this); } void visit(Reference &ope) override; void visit(Whitespace &ope) override { ope.ope_->accept(*this); } + void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } std::unordered_map error_s; std::unordered_map error_message; @@ -2011,6 +2052,7 @@ struct LinkReferences : public Ope::Visitor { void visit(Holder &ope) override { ope.ope_->accept(*this); } void visit(Reference &ope) override; void visit(Whitespace &ope) override { ope.ope_->accept(*this); } + void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); } private: Grammar &grammar_; @@ -2089,6 +2131,10 @@ struct FindReference : public Ope::Visitor { ope.ope_->accept(*this); found_ope = wsp(found_ope); } + void visit(PrecedenceClimbing &ope) override { + ope.atom_->accept(*this); + found_ope = csc(found_ope); + } std::shared_ptr found_ope; @@ -2250,9 +2296,11 @@ public: std::vector params; TracerEnter tracer_enter; TracerLeave tracer_leave; + bool disable_action = false; private: friend class Reference; + friend class ParserGenerator; Definition &operator=(const Definition &rhs); Definition &operator=(Definition &&rhs); @@ -2471,7 +2519,7 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &sv, } inline any Holder::reduce(SemanticValues &sv, any &dt) const { - if (outer_->action) { + if (outer_->action && !outer_->disable_action) { return outer_->action(sv, dt); } else if (sv.empty()) { return any(); @@ -2523,7 +2571,8 @@ inline std::shared_ptr Reference::get_core_operator() const { inline size_t BackReference::parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, any &dt) const { - for (int i = c.capture_scope_stack_size - 1; i >= 0; i--) { + auto size = static_cast(c.capture_scope_stack_size); + for (auto i = size - 1; i >= 0; i--) { const auto &cs = c.capture_scope_stack[i]; if (cs.find(name_) != cs.end()) { const auto &lit = cs.at(name_); @@ -2535,6 +2584,87 @@ inline size_t BackReference::parse_core(const char *s, size_t n, throw std::runtime_error("Invalid back reference..."); } +inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n, + SemanticValues &sv, + Context &c, any &dt, + size_t min_prec) const { + auto len = atom_->parse(s, n, sv, c, dt); + if (fail(len)) { return len; } + + std::string tok; + auto &rule = dynamic_cast(*binop_).rule_; + auto action = rule->action; + + rule->action = [&](SemanticValues &sv, any &dt) -> any { + tok = sv.token(); + if (action) { + return action(sv, dt); + } else if (!sv.empty()) { + return sv[0]; + } + return any(); + }; + auto action_se = make_scope_exit([&]() { rule->action = action; }); + + auto save_error_pos = c.error_pos; + + auto i = len; + while (i < n) { + std::vector save_values(sv.begin(), sv.end()); + auto save_tokens = sv.tokens; + + auto chv = c.push(); + auto chl = binop_->parse(s + i, n - i, chv, c, dt); + c.pop(); + + if (fail(chl)) { + c.error_pos = save_error_pos; + break; + } + + auto it = info_.find(tok); + if (it == info_.end()) { break; } + + auto level = std::get<0>(it->second); + auto assoc = std::get<1>(it->second); + + if (level < min_prec) { break; } + + sv.emplace_back(std::move(chv[0])); + i += chl; + + auto next_min_prec = level; + if (assoc == 'L') { next_min_prec = level + 1; } + + chv = c.push(); + chl = parse_expression(s + i, n - i, chv, c, dt, next_min_prec); + c.pop(); + + if (fail(chl)) { + sv.assign(save_values.begin(), save_values.end()); + sv.tokens = save_tokens; + c.error_pos = save_error_pos; + break; + } + + sv.emplace_back(std::move(chv[0])); + i += chl; + + any val; + if (action_) { + sv.s_ = s; + sv.n_ = i; + val = action_(sv, dt); + } else if (!sv.empty()) { + val = sv[0]; + } + sv.clear(); + sv.emplace_back(std::move(val)); + } + + return i; +} + inline void Sequence::accept(Visitor &v) { v.visit(*this); } inline void PrioritizedChoice::accept(Visitor &v) { v.visit(*this); } inline void ZeroOrMore::accept(Visitor &v) { v.visit(*this); } @@ -2556,6 +2686,7 @@ inline void Holder::accept(Visitor &v) { v.visit(*this); } inline void Reference::accept(Visitor &v) { v.visit(*this); } inline void Whitespace::accept(Visitor &v) { v.visit(*this); } inline void BackReference::accept(Visitor &v) { v.visit(*this); } +inline void PrecedenceClimbing::accept(Visitor &v) { v.visit(*this); } inline void AssignIDToDefinition::visit(Holder &ope) { auto p = static_cast(ope.outer_); @@ -2717,11 +2848,17 @@ private: setup_actions(); } + struct Instruction { + std::string type; + any data; + }; + struct Data { std::shared_ptr grammar; std::string start; const char *start_pos = nullptr; std::vector> duplicates; + std::map instructions; Data() : grammar(std::make_shared()) {} }; @@ -2731,9 +2868,9 @@ private: g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]); g["Definition"] <= cho(seq(g["Ignore"], g["IdentCont"], g["Parameters"], g["LEFTARROW"], - g["Expression"]), - seq(g["Ignore"], g["Identifier"], g["LEFTARROW"], g["Expression"])); - + g["Expression"], opt(g["Instruction"])), + seq(g["Ignore"], g["Identifier"], g["LEFTARROW"], g["Expression"], + opt(g["Instruction"]))); g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"]))); g["Sequence"] <= zom(g["Prefix"]); g["Prefix"] <= seq(opt(cho(g["AND"], g["NOT"])), g["Suffix"]); @@ -2826,6 +2963,27 @@ private: zom(seq(g["COMMA"], g["Expression"])), g["CLOSE"]); ~g["COMMA"] <= seq(chr(','), g["Spacing"]); + // Instruction grammars + g["Instruction"] <= + seq(g["BeginBlacket"], cho(g["PrecedenceClimbing"]), g["EndBlacket"]); + + ~g["SpacesZom"] <= zom(g["Space"]); + ~g["SpacesOom"] <= oom(g["Space"]); + ~g["BeginBlacket"] <= seq(chr('{'), g["Spacing"]); + ~g["EndBlacket"] <= seq(chr('}'), g["Spacing"]); + + // PrecedenceClimbing instruction + g["PrecedenceClimbing"] <= + seq(lit("precedence"), g["SpacesZom"], g["PrecedenceInfo"], + zom(seq(g["SpacesOom"], g["PrecedenceInfo"])), g["SpacesZom"]); + g["PrecedenceInfo"] <= + seq(g["PrecedenceAssoc"], + oom(seq(ign(g["SpacesOom"]), g["PrecedenceOpe"]))); + g["PrecedenceOpe"] <= + tok(oom( + seq(npd(cho(g["PrecedenceAssoc"], g["Space"], chr('}'))), dot()))); + g["PrecedenceAssoc"] <= cls("LR"); + // Set definition names for (auto &x : g) { x.second.name = x.first; @@ -2834,6 +2992,8 @@ private: void setup_actions() { g["Definition"] = [&](const SemanticValues &sv, any &dt) { + Data &data = *any_cast(dt); + auto is_macro = sv.choice() == 0; auto ignore = any_cast(sv[0]); auto name = any_cast(sv[1]); @@ -2843,12 +3003,16 @@ private: if (is_macro) { params = any_cast>(sv[2]); ope = any_cast>(sv[4]); + if (sv.size() == 6) { + data.instructions[name] = any_cast(sv[5]); + } } else { ope = any_cast>(sv[3]); + if (sv.size() == 5) { + data.instructions[name] = any_cast(sv[4]); + } } - Data &data = *any_cast(dt); - auto &grammar = *data.grammar; if (!grammar.count(name)) { auto &rule = grammar[name]; @@ -2928,8 +3092,7 @@ private: } }; - g["Primary"] = [&](const SemanticValues &sv, - any &dt) -> std::shared_ptr { + g["Primary"] = [&](const SemanticValues &sv, any &dt) { Data &data = *any_cast(dt); switch (sv.choice()) { @@ -2944,10 +3107,13 @@ private: args = any_cast>>(sv[2]); } + std::shared_ptr ope = + ref(*data.grammar, ident, sv.c_str(), is_macro, args); + if (ignore) { - return ign(ref(*data.grammar, ident, sv.c_str(), is_macro, args)); + return ign(ope); } else { - return ref(*data.grammar, ident, sv.c_str(), is_macro, args); + return ope; } } case 2: { // (Expression) @@ -3036,6 +3202,29 @@ private: g["Arguments"] = [](const SemanticValues &sv) { return sv.transform>(); }; + + g["PrecedenceClimbing"] = [](const SemanticValues &sv) { + PrecedenceClimbing::BinOpeInfo binOpeInfo; + size_t level = 1; + for (auto v : sv) { + auto tokens = any_cast>(v); + auto assoc = tokens[0][0]; + for (size_t i = 1; i < tokens.size(); i++) { + const auto &tok = tokens[i]; + binOpeInfo[tok] = std::make_pair(level, assoc); + } + level++; + } + Instruction instruction; + instruction.type = "precedence"; + instruction.data = binOpeInfo; + return instruction; + }; + g["PrecedenceInfo"] = [](const SemanticValues &sv) { + return sv.transform(); + }; + g["PrecedenceOpe"] = [](const SemanticValues &sv) { return sv.token(); }; + g["PrecedenceAssoc"] = [](const SemanticValues &sv) { return sv.token(); }; } std::shared_ptr perform_core(const char *s, size_t n, @@ -3170,6 +3359,33 @@ private: (*data.grammar)[WORD_DEFINITION_NAME].get_core_operator(); } + // Apply instructions + for (const auto &item : data.instructions) { + const auto &name = item.first; + const auto &instruction = item.second; + + if (instruction.type == "precedence") { + auto &rule = grammar[name]; + + auto &seq = dynamic_cast(*rule.get_core_operator()); + auto &atom = seq.opes_[0]; + auto &seq1 = dynamic_cast( + *dynamic_cast(*seq.opes_[1]).ope_); + auto &binop = seq1.opes_[0]; + auto &atom1 = seq1.opes_[1]; + + if (atom != atom1) { + // TODO: check + } + + const auto &info = + any_cast(instruction.data); + + rule.holder_->ope_ = pre(atom, binop, info, rule.action); + rule.disable_action = true; + } + } + // Set root definition start = data.start; @@ -3241,7 +3457,6 @@ template struct AstBase : public Annotation { template void ast_to_s_core(const std::shared_ptr &ptr, std::string &s, int level, std::function fn) { - const auto &ast = *ptr; for (auto i = 0; i < level; i++) { s += " "; @@ -3266,7 +3481,6 @@ template std::string ast_to_s(const std::shared_ptr &ptr, std::function fn = nullptr) { - std::string s; ast_to_s_core(ptr, s, 0, fn); return s; @@ -3280,7 +3494,6 @@ struct AstOptimizer { template std::shared_ptr optimize(std::shared_ptr original, std::shared_ptr parent = nullptr) { - auto found = std::find(filters_.begin(), filters_.end(), original->name) != filters_.end(); bool opt = optimize_nodes_ ? !found : found; @@ -3504,4 +3717,4 @@ private: #endif -// vim: et ts=4 sw=4 cin cino={1s ff=unix +// vim: et ts=2 sw=2 cin cino={1s ff=unix diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d364d71..9944772 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -7,7 +7,7 @@ set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_EXTENSIONS OFF) include_directories(..) -add_executable(test-main test.cc) +add_executable(test-main test-main.cc test1.cc test2.cc test3.cc) target_link_libraries(test-main ${add_link_deps}) add_test(TestMain test-main) diff --git a/test/test-main.cc b/test/test-main.cc new file mode 100644 index 0000000..e916cd5 --- /dev/null +++ b/test/test-main.cc @@ -0,0 +1,2 @@ +#define CATCH_CONFIG_MAIN +#include "catch.hh" diff --git a/test/test.cc b/test/test.cc deleted file mode 100644 index 189ac42..0000000 --- a/test/test.cc +++ /dev/null @@ -1,1913 +0,0 @@ - -#define CATCH_CONFIG_MAIN -#include "catch.hh" - -#include -#include - -#if !defined(PEGLIB_NO_UNICODE_CHARS) -TEST_CASE("Simple syntax test (with unicode)", "[general]") -{ - peg::parser parser( - u8" ROOT ← _ " - " _ <- ' ' " - ); - - bool ret = parser; - REQUIRE(ret == true); -} -#endif - -TEST_CASE("Simple syntax test", "[general]") -{ - peg::parser parser(R"( - ROOT <- _ - _ <- ' ' - )"); - - bool ret = parser; - REQUIRE(ret == true); -} - -TEST_CASE("Empty syntax test", "[general]") -{ - peg::parser parser(""); - bool ret = parser; - REQUIRE(ret == false); -} - -TEST_CASE("Backslash escape sequence test", "[general]") -{ - peg::parser parser(R"( - ROOT <- _ - _ <- '\\' - )"); - - bool ret = parser; - REQUIRE(ret == true); -} - -TEST_CASE("Invalid escape sequence test", "[general]") -{ - peg::parser parser(R"( - ROOT <- _ - _ <- '\' - )"); - - bool ret = parser; - REQUIRE(ret == false); -} - -TEST_CASE("Infinite loop 1", "[infinite loop]") -{ - peg::parser pg(R"( - ROOT <- WH TOKEN* WH - TOKEN <- [a-z0-9]* - WH <- [ \t]* - )"); - - REQUIRE(!pg); -} - -TEST_CASE("Infinite loop 2", "[infinite loop]") -{ - peg::parser pg(R"( - ROOT <- WH TOKEN+ WH - TOKEN <- [a-z0-9]* - WH <- [ \t]* - )"); - - REQUIRE(!pg); -} - -TEST_CASE("Infinite loop 3", "[infinite loop]") -{ - peg::parser pg(R"( - ROOT <- WH TOKEN* WH - TOKEN <- !'word1' - WH <- [ \t]* - )"); - - REQUIRE(!pg); -} - -TEST_CASE("Infinite loop 4", "[infinite loop]") -{ - peg::parser pg(R"( - ROOT <- WH TOKEN* WH - TOKEN <- &'word1' - WH <- [ \t]* - )"); - - REQUIRE(!pg); -} - -TEST_CASE("Infinite loop 5", "[infinite loop]") -{ - peg::parser pg(R"( - Numbers <- Number* - Number <- [0-9]+ / Spacing - Spacing <- ' ' / '\t' / '\n' / EOF # EOF is empty - EOF <- !. - )"); - - REQUIRE(!pg); -} - -TEST_CASE("Not infinite 1", "[infinite loop]") -{ - peg::parser pg(R"( - Numbers <- Number* EOF - Number <- [0-9]+ / Spacing - Spacing <- ' ' / '\t' / '\n' - EOF <- !. - )"); - - REQUIRE(!!pg); // OK -} - -TEST_CASE("Not infinite 2", "[infinite loop]") -{ - peg::parser pg(R"( - ROOT <- _ ('[' TAG_NAME ']' _)* - # In a sequence operator, if there is at least one non-empty element, we can treat it as non-empty - TAG_NAME <- (!']' .)+ - _ <- [ \t]* - )"); - - REQUIRE(!!pg); // OK -} - -TEST_CASE("Not infinite 3", "[infinite loop]") -{ - peg::parser pg(R"( - EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* - TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* - FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ # Recursive... - TERM_OPERATOR <- < [-+] > _ - FACTOR_OPERATOR <- < [/*] > _ - NUMBER <- < [0-9]+ > _ - _ <- [ \t\r\n]* - )"); - - REQUIRE(!!pg); // OK -} - -TEST_CASE("Action taking non const Semantic Values parameter", "[general]") -{ - peg::parser parser(R"( - ROOT <- TEXT - TEXT <- [a-zA-Z]+ - )"); - - parser["ROOT"] = [&](peg::SemanticValues& sv) { - auto s = peg::any_cast(sv[0]); - s[0] = 'H'; // mutate - return std::string(std::move(s)); // move - }; - - parser["TEXT"] = [&](peg::SemanticValues& sv) { - return sv.token(); - }; - - std::string val; - auto ret = parser.parse("hello", val); - REQUIRE(ret == true); - REQUIRE(val == "Hello"); -} - -TEST_CASE("String capture test", "[general]") -{ - peg::parser parser(R"( - ROOT <- _ ('[' TAG_NAME ']' _)* - TAG_NAME <- (!']' .)+ - _ <- [ \t]* - )"); - - std::vector tags; - - parser["TAG_NAME"] = [&](const peg::SemanticValues& sv) { - tags.push_back(sv.str()); - }; - - auto ret = parser.parse(" [tag1] [tag:2] [tag-3] "); - - REQUIRE(ret == true); - REQUIRE(tags.size() == 3); - REQUIRE(tags[0] == "tag1"); - REQUIRE(tags[1] == "tag:2"); - REQUIRE(tags[2] == "tag-3"); -} - -using namespace peg; - -TEST_CASE("String capture test2", "[general]") -{ - std::vector tags; - - Definition ROOT, TAG, TAG_NAME, WS; - ROOT <= seq(WS, zom(TAG)); - TAG <= seq(chr('['), TAG_NAME, chr(']'), WS); - TAG_NAME <= oom(seq(npd(chr(']')), dot())), [&](const SemanticValues& sv) { tags.push_back(sv.str()); }; - WS <= zom(cls(" \t")); - - auto r = ROOT.parse(" [tag1] [tag:2] [tag-3] "); - - REQUIRE(r.ret == true); - REQUIRE(tags.size() == 3); - REQUIRE(tags[0] == "tag1"); - REQUIRE(tags[1] == "tag:2"); - REQUIRE(tags[2] == "tag-3"); -} - -TEST_CASE("String capture test3", "[general]") -{ - parser pg(R"( - ROOT <- _ TOKEN* - TOKEN <- '[' < (!']' .)+ > ']' _ - _ <- [ \t\r\n]* - )"); - - - std::vector tags; - - pg["TOKEN"] = [&](const SemanticValues& sv) { - tags.push_back(sv.token()); - }; - - auto ret = pg.parse(" [tag1] [tag:2] [tag-3] "); - - REQUIRE(ret == true); - REQUIRE(tags.size() == 3); - REQUIRE(tags[0] == "tag1"); - REQUIRE(tags[1] == "tag:2"); - REQUIRE(tags[2] == "tag-3"); -} - -TEST_CASE("Cyclic grammer test", "[general]") -{ - Definition PARENT; - Definition CHILD; - - PARENT <= seq(CHILD); - CHILD <= seq(PARENT); -} - -TEST_CASE("Visit test", "[general]") -{ - Definition ROOT, TAG, TAG_NAME, WS; - - ROOT <= seq(WS, zom(TAG)); - TAG <= seq(chr('['), TAG_NAME, chr(']'), WS); - TAG_NAME <= oom(seq(npd(chr(']')), dot())); - WS <= zom(cls(" \t")); - - AssignIDToDefinition defIds; - ROOT.accept(defIds); - - REQUIRE(defIds.ids.size() == 4); -} - -TEST_CASE("Token check test", "[general]") -{ - parser parser(R"( - EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* - TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* - FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ - TERM_OPERATOR <- < [-+] > _ - FACTOR_OPERATOR <- < [/*] > _ - NUMBER <- < [0-9]+ > _ - _ <- [ \t\r\n]* - )"); - - REQUIRE(parser["EXPRESSION"].is_token() == false); - REQUIRE(parser["FACTOR"].is_token() == false); - REQUIRE(parser["FACTOR_OPERATOR"].is_token() == true); - REQUIRE(parser["NUMBER"].is_token() == true); - REQUIRE(parser["_"].is_token() == true); -} - -TEST_CASE("Lambda action test", "[general]") -{ - parser parser(R"( - START <- (CHAR)* - CHAR <- . - )"); - - std::string ss; - parser["CHAR"] = [&](const SemanticValues& sv) { - ss += *sv.c_str(); - }; - - bool ret = parser.parse("hello"); - REQUIRE(ret == true); - REQUIRE(ss == "hello"); -} - -TEST_CASE("enter/leave handlers test", "[general]") -{ - parser parser(R"( - START <- LTOKEN '=' RTOKEN - LTOKEN <- TOKEN - RTOKEN <- TOKEN - TOKEN <- [A-Za-z]+ - )"); - - parser["LTOKEN"].enter = [&](const char*, size_t, any& dt) { - auto& require_upper_case = *any_cast(dt); - require_upper_case = false; - }; - parser["LTOKEN"].leave = [&](const char*, size_t, size_t, any&, any& dt) { - auto& require_upper_case = *any_cast(dt); - require_upper_case = true; - }; - - auto message = "should be upper case string..."; - - parser["TOKEN"] = [&](const SemanticValues& sv, any& dt) { - auto& require_upper_case = *any_cast(dt); - if (require_upper_case) { - const auto& s = sv.str(); - if (!std::all_of(s.begin(), s.end(), ::isupper)) { - throw parse_error(message); - } - } - }; - - bool require_upper_case = false; - any dt = &require_upper_case; - REQUIRE(parser.parse("hello=world", dt) == false); - REQUIRE(parser.parse("HELLO=world", dt) == false); - REQUIRE(parser.parse("hello=WORLD", dt) == true); - REQUIRE(parser.parse("HELLO=WORLD", dt) == true); - - parser.log = [&](size_t ln, size_t col, const std::string& msg) { - REQUIRE(ln == 1); - REQUIRE(col == 7); - REQUIRE(msg == message); - }; - parser.parse("hello=world", dt); -} - -TEST_CASE("WHITESPACE test", "[general]") -{ - peg::parser parser(R"( - # Rules - ROOT <- ITEM (',' ITEM)* - ITEM <- WORD / PHRASE - - # Tokens - WORD <- < [a-zA-Z0-9_]+ > - PHRASE <- < '"' (!'"' .)* '"' > - - %whitespace <- [ \t\r\n]* - )"); - - auto ret = parser.parse(R"( one, "two, three", four )"); - - REQUIRE(ret == true); -} - -TEST_CASE("WHITESPACE test2", "[general]") -{ - peg::parser parser(R"( - # Rules - ROOT <- ITEM (',' ITEM)* - ITEM <- '[' < [a-zA-Z0-9_]+ > ']' - - %whitespace <- (SPACE / TAB)* - SPACE <- ' ' - TAB <- '\t' - )"); - - std::vector items; - parser["ITEM"] = [&](const SemanticValues& sv) { - items.push_back(sv.token()); - }; - - auto ret = parser.parse(R"([one], [two] ,[three] )"); - - REQUIRE(ret == true); - REQUIRE(items.size() == 3); - REQUIRE(items[0] == "one"); - REQUIRE(items[1] == "two"); - REQUIRE(items[2] == "three"); -} - -TEST_CASE("WHITESPACE test3", "[general]") { - peg::parser parser(R"( - StrQuot <- < '"' < (StrEscape / StrChars)* > '"' > - StrEscape <- '\\' any - StrChars <- (!'"' !'\\' any)+ - any <- . - %whitespace <- [ \t]* - )"); - - parser["StrQuot"] = [](const SemanticValues& sv) { - REQUIRE(sv.token() == R"( aaa \" bbb )"); - }; - - auto ret = parser.parse(R"( " aaa \" bbb " )"); - REQUIRE(ret == true); -} - -TEST_CASE("WHITESPACE test4", "[general]") { - peg::parser parser(R"( - ROOT <- HELLO OPE WORLD - HELLO <- 'hello' - OPE <- < [-+] > - WORLD <- 'world' / 'WORLD' - %whitespace <- [ \t\r\n]* - )"); - - parser["HELLO"] = [](const SemanticValues& sv) { - REQUIRE(sv.token() == "hello"); - }; - - parser["OPE"] = [](const SemanticValues& sv) { - REQUIRE(sv.token() == "+"); - }; - - parser["WORLD"] = [](const SemanticValues& sv) { - REQUIRE(sv.token() == "world"); - }; - - auto ret = parser.parse(" hello + world "); - REQUIRE(ret == true); -} - -TEST_CASE("Word expression test", "[general]") { - peg::parser parser(R"( - ROOT <- 'hello' ','? 'world' - %whitespace <- [ \t\r\n]* - %word <- [a-z]+ - )"); - - REQUIRE(parser.parse("helloworld") == false); - REQUIRE(parser.parse("hello world") == true); - REQUIRE(parser.parse("hello,world") == true); - REQUIRE(parser.parse("hello, world") == true); - REQUIRE(parser.parse("hello , world") == true); -} - -TEST_CASE("Skip token test", "[general]") -{ - peg::parser parser( - " ROOT <- _ ITEM (',' _ ITEM _)* " - " ITEM <- ([a-z0-9])+ " - " ~_ <- [ \t]* " - ); - - parser["ROOT"] = [&](const SemanticValues& sv) { - REQUIRE(sv.size() == 2); - }; - - auto ret = parser.parse(" item1, item2 "); - - REQUIRE(ret == true); -} - -TEST_CASE("Skip token test2", "[general]") -{ - peg::parser parser(R"( - ROOT <- ITEM (',' ITEM)* - ITEM <- < ([a-z0-9])+ > - %whitespace <- [ \t]* - )"); - - parser["ROOT"] = [&](const SemanticValues& sv) { - REQUIRE(sv.size() == 2); - }; - - auto ret = parser.parse(" item1, item2 "); - - REQUIRE(ret == true); -} - -TEST_CASE("Backtracking test", "[general]") -{ - peg::parser parser(R"( - START <- PAT1 / PAT2 - PAT1 <- HELLO ' One' - PAT2 <- HELLO ' Two' - HELLO <- 'Hello' - )"); - - size_t count = 0; - parser["HELLO"] = [&](const SemanticValues& /*sv*/) { - count++; - }; - - parser.enable_packrat_parsing(); - - bool ret = parser.parse("Hello Two"); - REQUIRE(ret == true); - REQUIRE(count == 1); // Skip second time -} - -TEST_CASE("Backtracking with AST", "[general]") -{ - parser parser(R"( - S <- A? B (A B)* A - A <- 'a' - B <- 'b' - )"); - - parser.enable_ast(); - std::shared_ptr ast; - bool ret = parser.parse("ba", ast); - REQUIRE(ret == true); - REQUIRE(ast->nodes.size() == 2); -} - -TEST_CASE("Octal/Hex/Unicode value test", "[general]") -{ - peg::parser parser( - R"( ROOT <- '\132\x7a\u30f3' )" - ); - - auto ret = parser.parse("Zzン"); - - REQUIRE(ret == true); -} - -TEST_CASE("Ignore case test", "[general]") { - peg::parser parser(R"( - ROOT <- HELLO WORLD - HELLO <- 'hello'i - WORLD <- 'world'i - %whitespace <- [ \t\r\n]* - )"); - - parser["HELLO"] = [](const SemanticValues& sv) { - REQUIRE(sv.token() == "Hello"); - }; - - parser["WORLD"] = [](const SemanticValues& sv) { - REQUIRE(sv.token() == "World"); - }; - - auto ret = parser.parse(" Hello World "); - REQUIRE(ret == true); -} - -TEST_CASE("mutable lambda test", "[general]") -{ - std::vector vec; - - parser pg("ROOT <- 'mutable lambda test'"); - - // This test makes sure if the following code can be compiled. - pg["TOKEN"] = [=](const SemanticValues& sv) mutable { - vec.push_back(sv.str()); - }; -} - -TEST_CASE("Simple calculator test", "[general]") -{ - parser parser(R"( - Additive <- Multitive '+' Additive / Multitive - Multitive <- Primary '*' Multitive / Primary - Primary <- '(' Additive ')' / Number - Number <- [0-9]+ - )"); - - parser["Additive"] = [](const SemanticValues& sv) { - switch (sv.choice()) { - case 0: - return any_cast(sv[0]) + any_cast(sv[1]); - default: - return any_cast(sv[0]); - } - }; - - parser["Multitive"] = [](const SemanticValues& sv) { - switch (sv.choice()) { - case 0: - return any_cast(sv[0]) * any_cast(sv[1]); - default: - return any_cast(sv[0]); - } - }; - - parser["Number"] = [](const SemanticValues& sv) { - return atoi(sv.c_str()); - }; - - int val; - parser.parse("(1+2)*3", val); - - REQUIRE(val == 9); -} - -TEST_CASE("Calculator test", "[general]") -{ - // Construct grammer - Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER; - - EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))); - TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))); - FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')'))); - TERM_OPERATOR <= cls("+-"); - FACTOR_OPERATOR <= cls("*/"); - NUMBER <= oom(cls("0-9")); - - // Setup actions - auto reduce = [](const SemanticValues& sv) -> long { - long ret = any_cast(sv[0]); - for (auto i = 1u; i < sv.size(); i += 2) { - auto num = any_cast(sv[i + 1]); - switch (any_cast(sv[i])) { - case '+': ret += num; break; - case '-': ret -= num; break; - case '*': ret *= num; break; - case '/': ret /= num; break; - } - } - return ret; - }; - - EXPRESSION = reduce; - TERM = reduce; - TERM_OPERATOR = [](const SemanticValues& sv) { return *sv.c_str(); }; - FACTOR_OPERATOR = [](const SemanticValues& sv) { return *sv.c_str(); }; - NUMBER = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); }; - - // Parse - long val; - auto r = EXPRESSION.parse_and_get_value("1+2*3*(4-5+6)/7-8", val); - - REQUIRE(r.ret == true); - REQUIRE(val == -3); -} - -TEST_CASE("Calculator test2", "[general]") -{ - // Parse syntax - auto syntax = R"( - # Grammar for Calculator... - EXPRESSION <- TERM (TERM_OPERATOR TERM)* - TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* - FACTOR <- NUMBER / '(' EXPRESSION ')' - TERM_OPERATOR <- [-+] - FACTOR_OPERATOR <- [/*] - NUMBER <- [0-9]+ - )"; - - std::string start; - auto grammar = ParserGenerator::parse(syntax, strlen(syntax), start, nullptr); - auto& g = *grammar; - - // Setup actions - auto reduce = [](const SemanticValues& sv) -> long { - long ret = any_cast(sv[0]); - for (auto i = 1u; i < sv.size(); i += 2) { - auto num = any_cast(sv[i + 1]); - switch (any_cast(sv[i])) { - case '+': ret += num; break; - case '-': ret -= num; break; - case '*': ret *= num; break; - case '/': ret /= num; break; - } - } - return ret; - }; - - g["EXPRESSION"] = reduce; - g["TERM"] = reduce; - g["TERM_OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; - g["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; - g["NUMBER"] = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); }; - - // Parse - long val; - auto r = g[start].parse_and_get_value("1+2*3*(4-5+6)/7-8", val); - - REQUIRE(r.ret == true); - REQUIRE(val == -3); -} - -TEST_CASE("Calculator test3", "[general]") -{ - // Parse syntax - parser parser(R"( - # Grammar for Calculator... - EXPRESSION <- TERM (TERM_OPERATOR TERM)* - TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* - FACTOR <- NUMBER / '(' EXPRESSION ')' - TERM_OPERATOR <- [-+] - FACTOR_OPERATOR <- [/*] - NUMBER <- [0-9]+ - )"); - - auto reduce = [](const SemanticValues& sv) -> long { - long ret = any_cast(sv[0]); - for (auto i = 1u; i < sv.size(); i += 2) { - auto num = any_cast(sv[i + 1]); - switch (any_cast(sv[i])) { - case '+': ret += num; break; - case '-': ret -= num; break; - case '*': ret *= num; break; - case '/': ret /= num; break; - } - } - return ret; - }; - - // Setup actions - parser["EXPRESSION"] = reduce; - parser["TERM"] = reduce; - parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return static_cast(*sv.c_str()); }; - parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return static_cast(*sv.c_str()); }; - parser["NUMBER"] = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); }; - - // Parse - long val; - auto ret = parser.parse("1+2*3*(4-5+6)/7-8", val); - - REQUIRE(ret == true); - REQUIRE(val == -3); -} - -TEST_CASE("Calculator test with AST", "[general]") -{ - parser parser(R"( - EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* - TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* - FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ - TERM_OPERATOR <- < [-+] > _ - FACTOR_OPERATOR <- < [/*] > _ - NUMBER <- < [0-9]+ > _ - ~_ <- [ \t\r\n]* - )"); - - parser.enable_ast(); - - std::function eval = [&](const Ast& ast) { - if (ast.name == "NUMBER") { - return stol(ast.token); - } else { - const auto& nodes = ast.nodes; - auto result = eval(*nodes[0]); - for (auto i = 1u; i < nodes.size(); i += 2) { - auto num = eval(*nodes[i + 1]); - auto ope = nodes[i]->token[0]; - switch (ope) { - case '+': result += num; break; - case '-': result -= num; break; - case '*': result *= num; break; - case '/': result /= num; break; - } - } - return result; - } - }; - - std::shared_ptr ast; - auto ret = parser.parse("1+2*3*(4-5+6)/7-8", ast); - ast = peg::AstOptimizer(true).optimize(ast); - auto val = eval(*ast); - - REQUIRE(ret == true); - REQUIRE(val == -3); -} - -TEST_CASE("Ignore semantic value test", "[general]") -{ - parser parser(R"( - START <- ~HELLO WORLD - HELLO <- 'Hello' _ - WORLD <- 'World' _ - _ <- [ \t\r\n]* - )"); - - parser.enable_ast(); - - std::shared_ptr ast; - auto ret = parser.parse("Hello World", ast); - - REQUIRE(ret == true); - REQUIRE(ast->nodes.size() == 1); - REQUIRE(ast->nodes[0]->name == "WORLD"); -} - -TEST_CASE("Ignore semantic value of 'or' predicate test", "[general]") -{ - parser parser(R"( - START <- _ !DUMMY HELLO_WORLD '.' - HELLO_WORLD <- HELLO 'World' _ - HELLO <- 'Hello' _ - DUMMY <- 'dummy' _ - ~_ <- [ \t\r\n]* - )"); - - parser.enable_ast(); - - std::shared_ptr ast; - auto ret = parser.parse("Hello World.", ast); - - REQUIRE(ret == true); - REQUIRE(ast->nodes.size() == 1); - REQUIRE(ast->nodes[0]->name == "HELLO_WORLD"); -} - -TEST_CASE("Ignore semantic value of 'and' predicate test", "[general]") -{ - parser parser(R"( - START <- _ &HELLO HELLO_WORLD '.' - HELLO_WORLD <- HELLO 'World' _ - HELLO <- 'Hello' _ - ~_ <- [ \t\r\n]* - )"); - - parser.enable_ast(); - - std::shared_ptr ast; - auto ret = parser.parse("Hello World.", ast); - - REQUIRE(ret == true); - REQUIRE(ast->nodes.size() == 1); - REQUIRE(ast->nodes[0]->name == "HELLO_WORLD"); -} - -TEST_CASE("Literal token on AST test1", "[general]") -{ - parser parser(R"( - STRING_LITERAL <- '"' (('\\"' / '\\t' / '\\n') / (!["] .))* '"' - )"); - parser.enable_ast(); - - std::shared_ptr ast; - auto ret = parser.parse(R"("a\tb")", ast); - - REQUIRE(ret == true); - REQUIRE(ast->is_token == true); - REQUIRE(ast->token == R"("a\tb")"); - REQUIRE(ast->nodes.empty()); -} - -TEST_CASE("Literal token on AST test2", "[general]") -{ - parser parser(R"( - STRING_LITERAL <- '"' (ESC / CHAR)* '"' - ESC <- ('\\"' / '\\t' / '\\n') - CHAR <- (!["] .) - )"); - parser.enable_ast(); - - std::shared_ptr ast; - auto ret = parser.parse(R"("a\tb")", ast); - - REQUIRE(ret == true); - REQUIRE(ast->is_token == false); - REQUIRE(ast->token.empty()); - REQUIRE(ast->nodes.size() == 3); -} - -TEST_CASE("Literal token on AST test3", "[general]") -{ - parser parser(R"( - STRING_LITERAL <- < '"' (ESC / CHAR)* '"' > - ESC <- ('\\"' / '\\t' / '\\n') - CHAR <- (!["] .) - )"); - parser.enable_ast(); - - std::shared_ptr ast; - auto ret = parser.parse(R"("a\tb")", ast); - - REQUIRE(ret == true); - REQUIRE(ast->is_token == true); - REQUIRE(ast->token == R"("a\tb")"); - REQUIRE(ast->nodes.empty()); -} - -TEST_CASE("Missing missing definitions test", "[general]") -{ - parser parser(R"( - A <- B C - )"); - - REQUIRE(!parser); -} - -TEST_CASE("Definition duplicates test", "[general]") -{ - parser parser(R"( - A <- '' - A <- '' - )"); - - REQUIRE(!parser); -} - -TEST_CASE("Semantic values test", "[general]") -{ - parser parser(R"( - term <- ( a b c x )? a b c - a <- 'a' - b <- 'b' - c <- 'c' - x <- 'x' - )"); - - for (const auto& rule: parser.get_rule_names()){ - parser[rule.c_str()] = [rule](const SemanticValues& sv, any&) { - if (rule == "term") { - REQUIRE(any_cast(sv[0]) == "a at 0"); - REQUIRE(any_cast(sv[1]) == "b at 1"); - REQUIRE(any_cast(sv[2]) == "c at 2"); - return std::string(); - } else { - return rule + " at " + std::to_string(sv.c_str() - sv.ss); - } - }; - } - - REQUIRE(parser.parse("abc")); -} - -TEST_CASE("Ordered choice count", "[general]") -{ - parser parser(R"( - S <- 'a' / 'b' - )"); - - parser["S"] = [](const SemanticValues& sv) { - REQUIRE(sv.choice() == 1); - REQUIRE(sv.choice_count() == 2); - }; - - parser.parse("b"); -} - -TEST_CASE("Ordered choice count 2", "[general]") -{ - parser parser(R"( - S <- ('a' / 'b')* - )"); - - parser["S"] = [](const SemanticValues& sv) { - REQUIRE(sv.choice() == 0); - REQUIRE(sv.choice_count() == 0); - }; - - parser.parse("b"); -} - -TEST_CASE("Semantic value tag", "[general]") -{ - parser parser(R"( - S <- A? B* C? - A <- 'a' - B <- 'b' - C <- 'c' - )"); - - { - using namespace udl; - parser["S"] = [](const SemanticValues& sv) { - REQUIRE(sv.size() == 1); - REQUIRE(sv.tags.size() == 1); - REQUIRE(sv.tags[0] == "C"_); - }; - auto ret = parser.parse("c"); - REQUIRE(ret == true); - } - - { - using namespace udl; - parser["S"] = [](const SemanticValues& sv) { - REQUIRE(sv.size() == 2); - REQUIRE(sv.tags.size() == 2); - REQUIRE(sv.tags[0] == "B"_); - REQUIRE(sv.tags[1] == "B"_); - }; - auto ret = parser.parse("bb"); - REQUIRE(ret == true); - } - - { - using namespace udl; - parser["S"] = [](const SemanticValues& sv) { - REQUIRE(sv.size() == 2); - REQUIRE(sv.tags.size() == 2); - REQUIRE(sv.tags[0] == "A"_); - REQUIRE(sv.tags[1] == "C"_); - }; - auto ret = parser.parse("ac"); - REQUIRE(ret == true); - } -} - -TEST_CASE("Negated Class test", "[general]") -{ - peg::parser parser(R"( - ROOT <- [^a-z_]+ - )"); - - bool ret = parser; - REQUIRE(ret == true); - - REQUIRE(parser.parse("ABC123")); - REQUIRE_FALSE(parser.parse("ABcZ")); - REQUIRE_FALSE(parser.parse("ABCZ_")); - REQUIRE_FALSE(parser.parse("")); -} - -TEST_CASE("Packrat parser test with %whitespace%", "[packrat]") -{ - peg::parser parser(R"( - ROOT <- 'a' - %whitespace <- SPACE* - SPACE <- ' ' - )"); - - parser.enable_packrat_parsing(); - - auto ret = parser.parse("a"); - REQUIRE(ret == true); -} - -TEST_CASE("Packrat parser test with macro", "[packrat]") -{ - parser parser(R"( - EXPRESSION <- _ LIST(TERM, TERM_OPERATOR) - TERM <- LIST(FACTOR, FACTOR_OPERATOR) - FACTOR <- NUMBER / T('(') EXPRESSION T(')') - TERM_OPERATOR <- T([-+]) - FACTOR_OPERATOR <- T([/*]) - NUMBER <- T([0-9]+) - ~_ <- [ \t]* - LIST(I, D) <- I (D I)* - T(S) <- < S > _ - )"); - - parser.enable_packrat_parsing(); - - auto ret = parser.parse(" 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "); - REQUIRE(ret == true); -} - -TEST_CASE("Backreference test", "[backreference]") -{ - parser parser(R"( - START <- _ LQUOTE < (!RQUOTE .)* > RQUOTE _ - LQUOTE <- 'R"' $delm< [a-zA-Z]* > '(' - RQUOTE <- ')' $delm '"' - ~_ <- [ \t\r\n]* - )"); - - std::string token; - parser["START"] = [&](const SemanticValues& sv) { - token = sv.token(); - }; - - { - token.clear(); - auto ret = parser.parse(R"delm( - R"("hello world")" - )delm"); - - REQUIRE(ret == true); - REQUIRE(token == "\"hello world\""); - } - - { - token.clear(); - auto ret = parser.parse(R"delm( - R"foo("(hello world)")foo" - )delm"); - - REQUIRE(ret == true); - REQUIRE(token == "\"(hello world)\""); - } - - { - token.clear(); - auto ret = parser.parse(R"delm( - R"foo("(hello world)foo")foo" - )delm"); - - REQUIRE(ret == false); - REQUIRE(token == "\"(hello world"); - } - - { - token.clear(); - auto ret = parser.parse(R"delm( - R"foo("(hello world)")bar" - )delm"); - - REQUIRE(ret == false); - REQUIRE(token.empty()); - } -} - -TEST_CASE("Invalid backreference test", "[backreference]") -{ - parser parser(R"( - START <- _ LQUOTE (!RQUOTE .)* RQUOTE _ - LQUOTE <- 'R"' $delm< [a-zA-Z]* > '(' - RQUOTE <- ')' $delm2 '"' - ~_ <- [ \t\r\n]* - )"); - - REQUIRE_THROWS_AS( - parser.parse(R"delm( - R"foo("(hello world)")foo" - )delm"), - std::runtime_error); -} - - -TEST_CASE("Nested capture test", "[backreference]") -{ - parser parser(R"( - ROOT <- CONTENT - CONTENT <- (ELEMENT / TEXT)* - ELEMENT <- $(STAG CONTENT ETAG) - STAG <- '<' $tag< TAG_NAME > '>' - ETAG <- '' - TAG_NAME <- 'b' / 'u' - TEXT <- TEXT_DATA - TEXT_DATA <- ![<] . - )"); - - REQUIRE(parser.parse("This is a test text.")); - REQUIRE(!parser.parse("This is a test text.")); - REQUIRE(!parser.parse("This is a test text.")); - REQUIRE(!parser.parse("This is a test text.")); -} - -TEST_CASE("Backreference with Prioritized Choice test", "[backreference]") -{ - parser parser(R"( - TREE <- WRONG_BRANCH / CORRECT_BRANCH - WRONG_BRANCH <- BRANCH THAT IS_capture WRONG - CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT - BRANCH <- 'branch' - THAT <- 'that' - IS_capture <- $ref<..> - IS_backref <- $ref - WRONG <- 'wrong' - CORRECT <- 'correct' - )"); - - REQUIRE_THROWS_AS(parser.parse("branchthatiscorrect"), std::runtime_error); -} - -TEST_CASE("Backreference with Zero or More test", "[backreference]") -{ - parser parser(R"( - TREE <- WRONG_BRANCH* CORRECT_BRANCH - WRONG_BRANCH <- BRANCH THAT IS_capture WRONG - CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT - BRANCH <- 'branch' - THAT <- 'that' - IS_capture <- $ref<..> - IS_backref <- $ref - WRONG <- 'wrong' - CORRECT <- 'correct' - )"); - - REQUIRE(parser.parse("branchthatiswrongbranchthatiscorrect")); - REQUIRE(!parser.parse("branchthatiswrongbranchthatIscorrect")); - REQUIRE(!parser.parse("branchthatiswrongbranchthatIswrongbranchthatiscorrect")); - REQUIRE(parser.parse("branchthatiswrongbranchthatIswrongbranchthatIscorrect")); - REQUIRE_THROWS_AS(parser.parse("branchthatiscorrect"), std::runtime_error); - REQUIRE_THROWS_AS(parser.parse("branchthatiswron_branchthatiscorrect"), std::runtime_error); -} - -TEST_CASE("Backreference with One or More test", "[backreference]") -{ - parser parser(R"( - TREE <- WRONG_BRANCH+ CORRECT_BRANCH - WRONG_BRANCH <- BRANCH THAT IS_capture WRONG - CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT - BRANCH <- 'branch' - THAT <- 'that' - IS_capture <- $ref<..> - IS_backref <- $ref - WRONG <- 'wrong' - CORRECT <- 'correct' - )"); - - REQUIRE(parser.parse("branchthatiswrongbranchthatiscorrect")); - REQUIRE(!parser.parse("branchthatiswrongbranchthatIscorrect")); - REQUIRE(!parser.parse("branchthatiswrongbranchthatIswrongbranchthatiscorrect")); - REQUIRE(parser.parse("branchthatiswrongbranchthatIswrongbranchthatIscorrect")); - REQUIRE(!parser.parse("branchthatiscorrect")); - REQUIRE(!parser.parse("branchthatiswron_branchthatiscorrect")); -} - -TEST_CASE("Backreference with Option test", "[backreference]") -{ - parser parser(R"( - TREE <- WRONG_BRANCH? CORRECT_BRANCH - WRONG_BRANCH <- BRANCH THAT IS_capture WRONG - CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT - BRANCH <- 'branch' - THAT <- 'that' - IS_capture <- $ref<..> - IS_backref <- $ref - WRONG <- 'wrong' - CORRECT <- 'correct' - )"); - - REQUIRE(parser.parse("branchthatiswrongbranchthatiscorrect")); - REQUIRE(!parser.parse("branchthatiswrongbranchthatIscorrect")); - REQUIRE(!parser.parse("branchthatiswrongbranchthatIswrongbranchthatiscorrect")); - REQUIRE(!parser.parse("branchthatiswrongbranchthatIswrongbranchthatIscorrect")); - REQUIRE_THROWS_AS(parser.parse("branchthatiscorrect"), std::runtime_error); - REQUIRE_THROWS_AS(parser.parse("branchthatiswron_branchthatiscorrect"), std::runtime_error); -} - -TEST_CASE("Left recursive test", "[left recursive]") -{ - parser parser(R"( - A <- A 'a' - B <- A 'a' - )"); - - REQUIRE(!parser); -} - -TEST_CASE("Left recursive with option test", "[left recursive]") -{ - parser parser(R"( - A <- 'a' / 'b'? B 'c' - B <- A - )"); - - REQUIRE(!parser); -} - -TEST_CASE("Left recursive with zom test", "[left recursive]") -{ - parser parser(R"( - A <- 'a'* A* - )"); - - REQUIRE(!parser); -} - -TEST_CASE("Left recursive with a ZOM content rule", "[left recursive]") -{ - parser parser(R"( - A <- B - B <- _ A - _ <- ' '* # Zero or more - )"); - - REQUIRE(!parser); -} - -TEST_CASE("Left recursive with empty string test", "[left recursive]") -{ - parser parser( - " A <- '' A" - ); - - REQUIRE(!parser); -} - -TEST_CASE("User defined rule test", "[user rule]") -{ - auto g = parser(R"( - ROOT <- _ 'Hello' _ NAME '!' _ - )", - { - { - "NAME", usr([](const char* s, size_t n, SemanticValues& /*sv*/, any& /*dt*/) -> size_t { - static std::vector names = { "PEG", "BNF" }; - for (const auto& name: names) { - if (name.size() <= n && !name.compare(0, name.size(), s, name.size())) { - return name.size(); - } - } - return static_cast(-1); - }) - }, - { - "~_", zom(cls(" \t\r\n")) - } - }); - - REQUIRE(g.parse(" Hello BNF! ") == true); -} - -TEST_CASE("Semantic predicate test", "[predicate]") -{ - parser parser("NUMBER <- [0-9]+"); - - parser["NUMBER"] = [](const SemanticValues& sv) { - auto val = stol(sv.token(), nullptr, 10); - if (val != 100) { - throw parse_error("value error!!"); - } - return val; - }; - - long val; - REQUIRE(parser.parse("100", val)); - REQUIRE(val == 100); - - REQUIRE(!parser.parse("200", val)); -} - -TEST_CASE("Japanese character", "[unicode]") -{ - peg::parser parser(u8R"( - 文 <- 修飾語? 主語 述語 '。' - 主語 <- 名詞 助詞 - 述語 <- 動詞 助詞 - 修飾語 <- 形容詞 - 名詞 <- 'サーバー' / 'クライアント' - 形容詞 <- '古い' / '新しい' - 動詞 <- '落ち' / '復旧し' - 助詞 <- 'が' / 'を' / 'た' / 'ます' / 'に' - )"); - - bool ret = parser; - REQUIRE(ret == true); - - REQUIRE(parser.parse(u8R"(サーバーを復旧します。)")); -} - -TEST_CASE("dot with a code", "[unicode]") -{ - peg::parser parser(" S <- 'a' . 'b' "); - REQUIRE(parser.parse(u8R"(aあb)")); -} - -TEST_CASE("dot with a char", "[unicode]") -{ - peg::parser parser(" S <- 'a' . 'b' "); - REQUIRE(parser.parse(u8R"(aåb)")); -} - -TEST_CASE("character class", "[unicode]") -{ - peg::parser parser(R"( - S <- 'a' [い-おAさC-Eた-とは] 'b' - )"); - - bool ret = parser; - REQUIRE(ret == true); - - REQUIRE(!parser.parse(u8R"(aあb)")); - REQUIRE(parser.parse(u8R"(aいb)")); - REQUIRE(parser.parse(u8R"(aうb)")); - REQUIRE(parser.parse(u8R"(aおb)")); - REQUIRE(!parser.parse(u8R"(aかb)")); - REQUIRE(parser.parse(u8R"(aAb)")); - REQUIRE(!parser.parse(u8R"(aBb)")); - REQUIRE(parser.parse(u8R"(aEb)")); - REQUIRE(!parser.parse(u8R"(aFb)")); - REQUIRE(!parser.parse(u8R"(aそb)")); - REQUIRE(parser.parse(u8R"(aたb)")); - REQUIRE(parser.parse(u8R"(aちb)")); - REQUIRE(parser.parse(u8R"(aとb)")); - REQUIRE(!parser.parse(u8R"(aなb)")); - REQUIRE(parser.parse(u8R"(aはb)")); - REQUIRE(!parser.parse(u8R"(a?b)")); -} - -#if 0 // TODO: Unicode Grapheme support -TEST_CASE("dot with a grapheme", "[unicode]") -{ - peg::parser parser(" S <- 'a' . 'b' "); - REQUIRE(parser.parse(u8R"(aसिb)")); -} -#endif - -TEST_CASE("Macro simple test", "[macro]") -{ - parser parser(R"( - S <- HELLO WORLD - HELLO <- T('hello') - WORLD <- T('world') - T(a) <- a [ \t]* - )"); - - REQUIRE(parser.parse("hello \tworld ")); -} - -TEST_CASE("Macro two parameters", "[macro]") -{ - parser parser(R"( - S <- HELLO_WORLD - HELLO_WORLD <- T('hello', 'world') - T(a, b) <- a [ \t]* b [ \t]* - )"); - - REQUIRE(parser.parse("hello \tworld ")); -} - -TEST_CASE("Macro syntax error", "[macro]") -{ - parser parser(R"( - S <- T('hello') - T (a) <- a [ \t]* - )"); - - bool ret = parser; - REQUIRE(ret == false); -} - -TEST_CASE("Macro missing argument", "[macro]") -{ - parser parser(R"( - S <- T ('hello') - T(a, b) <- a [ \t]* b - )"); - - bool ret = parser; - REQUIRE(ret == false); -} - -TEST_CASE("Macro reference syntax error", "[macro]") -{ - parser parser(R"( - S <- T ('hello') - T(a) <- a [ \t]* - )"); - - bool ret = parser; - REQUIRE(ret == false); -} - -TEST_CASE("Macro invalid macro reference error", "[macro]") -{ - parser parser(R"( - S <- T('hello') - T <- 'world' - )"); - - bool ret = parser; - REQUIRE(ret == false); -} - -TEST_CASE("Macro calculator", "[macro]") -{ - // Create a PEG parser - parser parser(R"( - # Grammar for simple calculator... - EXPRESSION <- _ LIST(TERM, TERM_OPERATOR) - TERM <- LIST(FACTOR, FACTOR_OPERATOR) - FACTOR <- NUMBER / T('(') EXPRESSION T(')') - TERM_OPERATOR <- T([-+]) - FACTOR_OPERATOR <- T([/*]) - NUMBER <- T([0-9]+) - ~_ <- [ \t]* - LIST(I, D) <- I (D I)* - T(S) <- < S > _ - )"); - - // Setup actions - auto reduce = [](const SemanticValues& sv) -> long { - auto result = any_cast(sv[0]); - for (auto i = 1u; i < sv.size(); i += 2) { - auto num = any_cast(sv[i + 1]); - auto ope = any_cast(sv[i]); - switch (ope) { - case '+': result += num; break; - case '-': result -= num; break; - case '*': result *= num; break; - case '/': result /= num; break; - } - } - return result; - }; - - parser["EXPRESSION"] = reduce; - parser["TERM"] = reduce; - parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return static_cast(*sv.c_str()); }; - parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return static_cast(*sv.c_str()); }; - parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; - - bool ret = parser; - REQUIRE(ret == true); - - auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "; - long val = 0; - ret = parser.parse(expr, val); - - REQUIRE(ret == true); - REQUIRE(val == -3); -} - -TEST_CASE("Macro expression arguments", "[macro]") -{ - parser parser(R"( - S <- M('hello' / 'Hello', 'world' / 'World') - M(arg0, arg1) <- arg0 [ \t]+ arg1 - )"); - - REQUIRE(parser.parse("Hello world")); -} - -TEST_CASE("Macro recursive", "[macro]") -{ - parser parser(R"( - S <- M('abc') - M(s) <- !s / s ' ' M(s / '123') / s - )"); - - REQUIRE(parser.parse("")); - REQUIRE(parser.parse("abc")); - REQUIRE(parser.parse("abc abc")); - REQUIRE(parser.parse("abc 123 abc")); -} - -TEST_CASE("Macro recursive2", "[macro]") -{ - auto syntaxes = std::vector{ - "S <- M('abc') M(s) <- !s / s ' ' M(s* '-' '123') / s", - "S <- M('abc') M(s) <- !s / s ' ' M(s+ '-' '123') / s", - "S <- M('abc') M(s) <- !s / s ' ' M(s? '-' '123') / s", - "S <- M('abc') M(s) <- !s / s ' ' M(&s s+ '-' '123') / s", - "S <- M('abc') M(s) <- !s / s ' ' M(s '-' !s '123') / s", - "S <- M('abc') M(s) <- !s / s ' ' M(< s > '-' '123') / s", - "S <- M('abc') M(s) <- !s / s ' ' M(~s '-' '123') / s", - }; - - for (const auto& syntax: syntaxes) { - parser parser(syntax); - REQUIRE(parser.parse("abc abc-123")); - } -} - -TEST_CASE("Macro exclusive modifiers", "[macro]") -{ - parser parser(R"( - S <- Modifiers(!"") _ - Modifiers(Appeared) <- (!Appeared) ( - Token('public') Modifiers(Appeared / 'public') / - Token('static') Modifiers(Appeared / 'static') / - Token('final') Modifiers(Appeared / 'final') / - "") - Token(t) <- t _ - _ <- [ \t\r\n]* - )"); - - REQUIRE(parser.parse("public")); - REQUIRE(parser.parse("static")); - REQUIRE(parser.parse("final")); - REQUIRE(parser.parse("public static final")); - REQUIRE(!parser.parse("public public")); - REQUIRE(!parser.parse("public static public")); -} - -TEST_CASE("Macro token check test", "[macro]") -{ - parser parser(R"( - # Grammar for simple calculator... - EXPRESSION <- _ LIST(TERM, TERM_OPERATOR) - TERM <- LIST(FACTOR, FACTOR_OPERATOR) - FACTOR <- NUMBER / T('(') EXPRESSION T(')') - TERM_OPERATOR <- T([-+]) - FACTOR_OPERATOR <- T([/*]) - NUMBER <- T([0-9]+) - ~_ <- [ \t]* - LIST(I, D) <- I (D I)* - T(S) <- < S > _ - )"); - - REQUIRE(parser["EXPRESSION"].is_token() == false); - REQUIRE(parser["TERM"].is_token() == false); - REQUIRE(parser["FACTOR"].is_token() == false); - REQUIRE(parser["FACTOR_OPERATOR"].is_token() == true); - REQUIRE(parser["NUMBER"].is_token() == true); - REQUIRE(parser["_"].is_token() == true); - REQUIRE(parser["LIST"].is_token() == false); - REQUIRE(parser["T"].is_token() == true); -} - -TEST_CASE("Macro rule-parameter collision", "[macro]") -{ - parser parser(R"( - A <- B(C) - B(D) <- D - C <- 'c' - D <- 'd' - )"); - - REQUIRE(parser.parse("c")); -} - -TEST_CASE("Line information test", "[line information]") -{ - parser parser(R"( - S <- _ (WORD _)+ - WORD <- [A-Za-z]+ - ~_ <- [ \t\r\n]+ - )"); - - std::vector> locations; - parser["WORD"] = [&](const peg::SemanticValues& sv) { - locations.push_back(sv.line_info()); - }; - - bool ret = parser; - REQUIRE(ret == true); - - ret = parser.parse(" Mon Tue Wed \nThu Fri Sat\nSun\n"); - REQUIRE(ret == true); - - REQUIRE(locations[0] == std::make_pair(1, 2)); - REQUIRE(locations[1] == std::make_pair(1, 6)); - REQUIRE(locations[2] == std::make_pair(1, 10)); - REQUIRE(locations[3] == std::make_pair(2, 1)); - REQUIRE(locations[4] == std::make_pair(2, 6)); - REQUIRE(locations[5] == std::make_pair(2, 11)); - REQUIRE(locations[6] == std::make_pair(3, 1)); -} - -bool exact(Grammar& g, const char* d, const char* s) { - auto n = strlen(s); - auto r = g[d].parse(s, n); - return r.ret && r.len == n; -} - -Grammar& make_peg_grammar() { - return ParserGenerator::grammar(); -} - -TEST_CASE("PEG Grammar", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "Grammar", " Definition <- a / ( b c ) / d \n rule2 <- [a-zA-Z][a-z0-9-]+ ") == true); -} - -TEST_CASE("PEG Definition", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "Definition", "Definition <- a / (b c) / d ") == true); - REQUIRE(exact(g, "Definition", "Definition <- a / b c / d ") == true); - REQUIRE(exact(g, "Definition", u8"Definitiond ← a ") == true); - REQUIRE(exact(g, "Definition", "Definition ") == false); - REQUIRE(exact(g, "Definition", " ") == false); - REQUIRE(exact(g, "Definition", "") == false); - REQUIRE(exact(g, "Definition", "Definition = a / (b c) / d ") == false); - REQUIRE(exact(g, "Definition", "Macro(param) <- a ") == true); - REQUIRE(exact(g, "Definition", "Macro (param) <- a ") == false); -} - -TEST_CASE("PEG Expression", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "Expression", "a / (b c) / d ") == true); - REQUIRE(exact(g, "Expression", "a / b c / d ") == true); - REQUIRE(exact(g, "Expression", "a b ") == true); - REQUIRE(exact(g, "Expression", "") == true); - REQUIRE(exact(g, "Expression", " ") == false); - REQUIRE(exact(g, "Expression", " a b ") == false); -} - -TEST_CASE("PEG Sequence", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "Sequence", "a b c d ") == true); - REQUIRE(exact(g, "Sequence", "") == true); - REQUIRE(exact(g, "Sequence", "!") == false); - REQUIRE(exact(g, "Sequence", "<-") == false); - REQUIRE(exact(g, "Sequence", " a") == false); -} - -TEST_CASE("PEG Prefix", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "Prefix", "&[a]") == true); - REQUIRE(exact(g, "Prefix", "![']") == true); - REQUIRE(exact(g, "Prefix", "-[']") == false); - REQUIRE(exact(g, "Prefix", "") == false); - REQUIRE(exact(g, "Prefix", " a") == false); -} - -TEST_CASE("PEG Suffix", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "Suffix", "aaa ") == true); - REQUIRE(exact(g, "Suffix", "aaa? ") == true); - REQUIRE(exact(g, "Suffix", "aaa* ") == true); - REQUIRE(exact(g, "Suffix", "aaa+ ") == true); - REQUIRE(exact(g, "Suffix", ". + ") == true); - REQUIRE(exact(g, "Suffix", "?") == false); - REQUIRE(exact(g, "Suffix", "") == false); - REQUIRE(exact(g, "Suffix", " a") == false); -} - -TEST_CASE("PEG Primary", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "Primary", "_Identifier0_ ") == true); - REQUIRE(exact(g, "Primary", "_Identifier0_<-") == false); - REQUIRE(exact(g, "Primary", "( _Identifier0_ _Identifier1_ )") == true); - REQUIRE(exact(g, "Primary", "'Literal String'") == true); - REQUIRE(exact(g, "Primary", "\"Literal String\"") == true); - REQUIRE(exact(g, "Primary", "[a-zA-Z]") == true); - REQUIRE(exact(g, "Primary", ".") == true); - REQUIRE(exact(g, "Primary", "") == false); - REQUIRE(exact(g, "Primary", " ") == false); - REQUIRE(exact(g, "Primary", " a") == false); - REQUIRE(exact(g, "Primary", "") == false); -} - -TEST_CASE("PEG Identifier", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "Identifier", "_Identifier0_ ") == true); - REQUIRE(exact(g, "Identifier", "0Identifier_ ") == false); - REQUIRE(exact(g, "Identifier", "Iden|t ") == false); - REQUIRE(exact(g, "Identifier", " ") == false); - REQUIRE(exact(g, "Identifier", " a") == false); - REQUIRE(exact(g, "Identifier", "") == false); -} - -TEST_CASE("PEG IdentStart", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "IdentStart", "_") == true); - REQUIRE(exact(g, "IdentStart", "a") == true); - REQUIRE(exact(g, "IdentStart", "Z") == true); - REQUIRE(exact(g, "IdentStart", "") == false); - REQUIRE(exact(g, "IdentStart", " ") == false); - REQUIRE(exact(g, "IdentStart", "0") == false); -} - -TEST_CASE("PEG IdentRest", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "IdentRest", "_") == true); - REQUIRE(exact(g, "IdentRest", "a") == true); - REQUIRE(exact(g, "IdentRest", "Z") == true); - REQUIRE(exact(g, "IdentRest", "") == false); - REQUIRE(exact(g, "IdentRest", " ") == false); - REQUIRE(exact(g, "IdentRest", "0") == true); -} - -TEST_CASE("PEG Literal", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "Literal", "'abc' ") == true); - REQUIRE(exact(g, "Literal", "'a\\nb\\tc' ") == true); - REQUIRE(exact(g, "Literal", "'a\\277\tc' ") == true); - REQUIRE(exact(g, "Literal", "'a\\77\tc' ") == true); - REQUIRE(exact(g, "Literal", "'a\\80\tc' ") == false); - REQUIRE(exact(g, "Literal", "'\n' ") == true); - REQUIRE(exact(g, "Literal", "'a\\'b' ") == true); - REQUIRE(exact(g, "Literal", "'a'b' ") == false); - REQUIRE(exact(g, "Literal", "'a\"'b' ") == false); - REQUIRE(exact(g, "Literal", "\"'\\\"abc\\\"'\" ") == true); - REQUIRE(exact(g, "Literal", "\"'\"abc\"'\" ") == false); - REQUIRE(exact(g, "Literal", "abc") == false); - REQUIRE(exact(g, "Literal", "") == false); - REQUIRE(exact(g, "Literal", "\\") == false); - REQUIRE(exact(g, "Literal", u8"'日本語'") == true); - REQUIRE(exact(g, "Literal", u8"\"日本語\"") == true); - REQUIRE(exact(g, "Literal", u8"日本語") == false); -} - -TEST_CASE("PEG Class", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "Class", "[]") == false); // NOTE: This is different from the Brian Ford's paper, but same as RegExp - REQUIRE(exact(g, "Class", "[a]") == true); - REQUIRE(exact(g, "Class", "[a-z]") == true); - REQUIRE(exact(g, "Class", "[az]") == true); - REQUIRE(exact(g, "Class", "[a-zA-Z-]") == true); - REQUIRE(exact(g, "Class", "[a-zA-Z-0-9]") == true); - REQUIRE(exact(g, "Class", "[a-]") == false); - REQUIRE(exact(g, "Class", "[-a]") == true); - REQUIRE(exact(g, "Class", "[") == false); - REQUIRE(exact(g, "Class", "[a") == false); - REQUIRE(exact(g, "Class", "]") == false); - REQUIRE(exact(g, "Class", "a]") == false); - REQUIRE(exact(g, "Class", u8"[あ-ん]") == true); - REQUIRE(exact(g, "Class", u8"あ-ん") == false); - REQUIRE(exact(g, "Class", "[-+]") == true); - REQUIRE(exact(g, "Class", "[+-]") == false); - REQUIRE(exact(g, "Class", "[\\^]") == true); -} - -TEST_CASE("PEG Negated Class", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "NegatedClass", "[^]") == false); - REQUIRE(exact(g, "NegatedClass", "[^a]") == true); - REQUIRE(exact(g, "NegatedClass", "[^a-z]") == true); - REQUIRE(exact(g, "NegatedClass", "[^az]") == true); - REQUIRE(exact(g, "NegatedClass", "[^a-zA-Z-]") == true); - REQUIRE(exact(g, "NegatedClass", "[^a-zA-Z-0-9]") == true); - REQUIRE(exact(g, "NegatedClass", "[^a-]") == false); - REQUIRE(exact(g, "NegatedClass", "[^-a]") == true); - REQUIRE(exact(g, "NegatedClass", "[^") == false); - REQUIRE(exact(g, "NegatedClass", "[^a") == false); - REQUIRE(exact(g, "NegatedClass", "^]") == false); - REQUIRE(exact(g, "NegatedClass", "^a]") == false); - REQUIRE(exact(g, "NegatedClass", u8"[^あ-ん]") == true); - REQUIRE(exact(g, "NegatedClass", u8"^あ-ん") == false); - REQUIRE(exact(g, "NegatedClass", "[^-+]") == true); - REQUIRE(exact(g, "NegatedClass", "[^+-]") == false); - REQUIRE(exact(g, "NegatedClass", "[^^]") == true); -} - -TEST_CASE("PEG Range", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "Range", "a") == true); - REQUIRE(exact(g, "Range", "a-z") == true); - REQUIRE(exact(g, "Range", "az") == false); - REQUIRE(exact(g, "Range", "") == false); - REQUIRE(exact(g, "Range", "a-") == false); - REQUIRE(exact(g, "Range", "-a") == false); -} - -TEST_CASE("PEG Char", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "Char", "\\n") == true); - REQUIRE(exact(g, "Char", "\\r") == true); - REQUIRE(exact(g, "Char", "\\t") == true); - REQUIRE(exact(g, "Char", "\\'") == true); - REQUIRE(exact(g, "Char", "\\\"") == true); - REQUIRE(exact(g, "Char", "\\[") == true); - REQUIRE(exact(g, "Char", "\\]") == true); - REQUIRE(exact(g, "Char", "\\\\") == true); - REQUIRE(exact(g, "Char", "\\000") == true); - REQUIRE(exact(g, "Char", "\\377") == true); - REQUIRE(exact(g, "Char", "\\477") == false); - REQUIRE(exact(g, "Char", "\\087") == false); - REQUIRE(exact(g, "Char", "\\079") == false); - REQUIRE(exact(g, "Char", "\\00") == true); - REQUIRE(exact(g, "Char", "\\77") == true); - REQUIRE(exact(g, "Char", "\\80") == false); - REQUIRE(exact(g, "Char", "\\08") == false); - REQUIRE(exact(g, "Char", "\\0") == true); - REQUIRE(exact(g, "Char", "\\7") == true); - REQUIRE(exact(g, "Char", "\\8") == false); - REQUIRE(exact(g, "Char", "a") == true); - REQUIRE(exact(g, "Char", ".") == true); - REQUIRE(exact(g, "Char", "0") == true); - REQUIRE(exact(g, "Char", "\\") == false); - REQUIRE(exact(g, "Char", " ") == true); - REQUIRE(exact(g, "Char", " ") == false); - REQUIRE(exact(g, "Char", "") == false); - REQUIRE(exact(g, "Char", u8"あ") == true); -} - -TEST_CASE("PEG Operators", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "LEFTARROW", "<-") == true); - REQUIRE(exact(g, "SLASH", "/ ") == true); - REQUIRE(exact(g, "AND", "& ") == true); - REQUIRE(exact(g, "NOT", "! ") == true); - REQUIRE(exact(g, "QUESTION", "? ") == true); - REQUIRE(exact(g, "STAR", "* ") == true); - REQUIRE(exact(g, "PLUS", "+ ") == true); - REQUIRE(exact(g, "OPEN", "( ") == true); - REQUIRE(exact(g, "CLOSE", ") ") == true); - REQUIRE(exact(g, "DOT", ". ") == true); -} - -TEST_CASE("PEG Comment", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "Comment", "# Comment.\n") == true); - REQUIRE(exact(g, "Comment", "# Comment.") == false); - REQUIRE(exact(g, "Comment", " ") == false); - REQUIRE(exact(g, "Comment", "a") == false); -} - -TEST_CASE("PEG Space", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "Space", " ") == true); - REQUIRE(exact(g, "Space", "\t") == true); - REQUIRE(exact(g, "Space", "\n") == true); - REQUIRE(exact(g, "Space", "") == false); - REQUIRE(exact(g, "Space", "a") == false); -} - -TEST_CASE("PEG EndOfLine", "[peg]") -{ - auto g = ParserGenerator::grammar(); - REQUIRE(exact(g, "EndOfLine", "\r\n") == true); - REQUIRE(exact(g, "EndOfLine", "\n") == true); - REQUIRE(exact(g, "EndOfLine", "\r") == true); - REQUIRE(exact(g, "EndOfLine", " ") == false); - REQUIRE(exact(g, "EndOfLine", "") == false); - REQUIRE(exact(g, "EndOfLine", "a") == false); -} - -TEST_CASE("PEG EndOfFile", "[peg]") -{ - Grammar g = make_peg_grammar(); - REQUIRE(exact(g, "EndOfFile", "") == true); - REQUIRE(exact(g, "EndOfFile", " ") == false); -} - -// vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/test/test1.cc b/test/test1.cc new file mode 100644 index 0000000..de48f06 --- /dev/null +++ b/test/test1.cc @@ -0,0 +1,921 @@ +#include "catch.hh" +#include + +using namespace peg; + +#if !defined(PEGLIB_NO_UNICODE_CHARS) +TEST_CASE("Simple syntax test (with unicode)", "[general]") +{ + parser parser( + u8" ROOT ← _ " + " _ <- ' ' " + ); + + bool ret = parser; + REQUIRE(ret == true); +} +#endif + +TEST_CASE("Simple syntax test", "[general]") +{ + parser parser(R"( + ROOT <- _ + _ <- ' ' + )"); + + bool ret = parser; + REQUIRE(ret == true); +} + +TEST_CASE("Empty syntax test", "[general]") +{ + parser parser(""); + bool ret = parser; + REQUIRE(ret == false); +} + +TEST_CASE("Backslash escape sequence test", "[general]") +{ + parser parser(R"( + ROOT <- _ + _ <- '\\' + )"); + + bool ret = parser; + REQUIRE(ret == true); +} + +TEST_CASE("Invalid escape sequence test", "[general]") +{ + parser parser(R"( + ROOT <- _ + _ <- '\' + )"); + + bool ret = parser; + REQUIRE(ret == false); +} + +TEST_CASE("Action taking non const Semantic Values parameter", "[general]") +{ + parser parser(R"( + ROOT <- TEXT + TEXT <- [a-zA-Z]+ + )"); + + parser["ROOT"] = [&](SemanticValues& sv) { + auto s = any_cast(sv[0]); + s[0] = 'H'; // mutate + return std::string(std::move(s)); // move + }; + + parser["TEXT"] = [&](SemanticValues& sv) { + return sv.token(); + }; + + std::string val; + auto ret = parser.parse("hello", val); + REQUIRE(ret == true); + REQUIRE(val == "Hello"); +} + +TEST_CASE("String capture test", "[general]") +{ + parser parser(R"( + ROOT <- _ ('[' TAG_NAME ']' _)* + TAG_NAME <- (!']' .)+ + _ <- [ \t]* + )"); + + std::vector tags; + + parser["TAG_NAME"] = [&](const SemanticValues& sv) { + tags.push_back(sv.str()); + }; + + auto ret = parser.parse(" [tag1] [tag:2] [tag-3] "); + + REQUIRE(ret == true); + REQUIRE(tags.size() == 3); + REQUIRE(tags[0] == "tag1"); + REQUIRE(tags[1] == "tag:2"); + REQUIRE(tags[2] == "tag-3"); +} + +using namespace peg; + +TEST_CASE("String capture test2", "[general]") +{ + std::vector tags; + + Definition ROOT, TAG, TAG_NAME, WS; + ROOT <= seq(WS, zom(TAG)); + TAG <= seq(chr('['), TAG_NAME, chr(']'), WS); + TAG_NAME <= oom(seq(npd(chr(']')), dot())), [&](const SemanticValues& sv) { tags.push_back(sv.str()); }; + WS <= zom(cls(" \t")); + + auto r = ROOT.parse(" [tag1] [tag:2] [tag-3] "); + + REQUIRE(r.ret == true); + REQUIRE(tags.size() == 3); + REQUIRE(tags[0] == "tag1"); + REQUIRE(tags[1] == "tag:2"); + REQUIRE(tags[2] == "tag-3"); +} + +TEST_CASE("String capture test3", "[general]") +{ + parser pg(R"( + ROOT <- _ TOKEN* + TOKEN <- '[' < (!']' .)+ > ']' _ + _ <- [ \t\r\n]* + )"); + + + std::vector tags; + + pg["TOKEN"] = [&](const SemanticValues& sv) { + tags.push_back(sv.token()); + }; + + auto ret = pg.parse(" [tag1] [tag:2] [tag-3] "); + + REQUIRE(ret == true); + REQUIRE(tags.size() == 3); + REQUIRE(tags[0] == "tag1"); + REQUIRE(tags[1] == "tag:2"); + REQUIRE(tags[2] == "tag-3"); +} + +TEST_CASE("Cyclic grammer test", "[general]") +{ + Definition PARENT; + Definition CHILD; + + PARENT <= seq(CHILD); + CHILD <= seq(PARENT); +} + +TEST_CASE("Visit test", "[general]") +{ + Definition ROOT, TAG, TAG_NAME, WS; + + ROOT <= seq(WS, zom(TAG)); + TAG <= seq(chr('['), TAG_NAME, chr(']'), WS); + TAG_NAME <= oom(seq(npd(chr(']')), dot())); + WS <= zom(cls(" \t")); + + AssignIDToDefinition defIds; + ROOT.accept(defIds); + + REQUIRE(defIds.ids.size() == 4); +} + +TEST_CASE("Token check test", "[general]") +{ + parser parser(R"( + EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* + TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* + FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ + TERM_OPERATOR <- < [-+] > _ + FACTOR_OPERATOR <- < [/*] > _ + NUMBER <- < [0-9]+ > _ + _ <- [ \t\r\n]* + )"); + + REQUIRE(parser["EXPRESSION"].is_token() == false); + REQUIRE(parser["FACTOR"].is_token() == false); + REQUIRE(parser["FACTOR_OPERATOR"].is_token() == true); + REQUIRE(parser["NUMBER"].is_token() == true); + REQUIRE(parser["_"].is_token() == true); +} + +TEST_CASE("Lambda action test", "[general]") +{ + parser parser(R"( + START <- (CHAR)* + CHAR <- . + )"); + + std::string ss; + parser["CHAR"] = [&](const SemanticValues& sv) { + ss += *sv.c_str(); + }; + + bool ret = parser.parse("hello"); + REQUIRE(ret == true); + REQUIRE(ss == "hello"); +} + +TEST_CASE("enter/leave handlers test", "[general]") +{ + parser parser(R"( + START <- LTOKEN '=' RTOKEN + LTOKEN <- TOKEN + RTOKEN <- TOKEN + TOKEN <- [A-Za-z]+ + )"); + + parser["LTOKEN"].enter = [&](const char*, size_t, any& dt) { + auto& require_upper_case = *any_cast(dt); + require_upper_case = false; + }; + parser["LTOKEN"].leave = [&](const char*, size_t, size_t, any&, any& dt) { + auto& require_upper_case = *any_cast(dt); + require_upper_case = true; + }; + + auto message = "should be upper case string..."; + + parser["TOKEN"] = [&](const SemanticValues& sv, any& dt) { + auto& require_upper_case = *any_cast(dt); + if (require_upper_case) { + const auto& s = sv.str(); + if (!std::all_of(s.begin(), s.end(), ::isupper)) { + throw parse_error(message); + } + } + }; + + bool require_upper_case = false; + any dt = &require_upper_case; + REQUIRE(parser.parse("hello=world", dt) == false); + REQUIRE(parser.parse("HELLO=world", dt) == false); + REQUIRE(parser.parse("hello=WORLD", dt) == true); + REQUIRE(parser.parse("HELLO=WORLD", dt) == true); + + parser.log = [&](size_t ln, size_t col, const std::string& msg) { + REQUIRE(ln == 1); + REQUIRE(col == 7); + REQUIRE(msg == message); + }; + parser.parse("hello=world", dt); +} + +TEST_CASE("WHITESPACE test", "[general]") +{ + parser parser(R"( + # Rules + ROOT <- ITEM (',' ITEM)* + ITEM <- WORD / PHRASE + + # Tokens + WORD <- < [a-zA-Z0-9_]+ > + PHRASE <- < '"' (!'"' .)* '"' > + + %whitespace <- [ \t\r\n]* + )"); + + auto ret = parser.parse(R"( one, "two, three", four )"); + + REQUIRE(ret == true); +} + +TEST_CASE("WHITESPACE test2", "[general]") +{ + parser parser(R"( + # Rules + ROOT <- ITEM (',' ITEM)* + ITEM <- '[' < [a-zA-Z0-9_]+ > ']' + + %whitespace <- (SPACE / TAB)* + SPACE <- ' ' + TAB <- '\t' + )"); + + std::vector items; + parser["ITEM"] = [&](const SemanticValues& sv) { + items.push_back(sv.token()); + }; + + auto ret = parser.parse(R"([one], [two] ,[three] )"); + + REQUIRE(ret == true); + REQUIRE(items.size() == 3); + REQUIRE(items[0] == "one"); + REQUIRE(items[1] == "two"); + REQUIRE(items[2] == "three"); +} + +TEST_CASE("WHITESPACE test3", "[general]") { + parser parser(R"( + StrQuot <- < '"' < (StrEscape / StrChars)* > '"' > + StrEscape <- '\\' any + StrChars <- (!'"' !'\\' any)+ + any <- . + %whitespace <- [ \t]* + )"); + + parser["StrQuot"] = [](const SemanticValues& sv) { + REQUIRE(sv.token() == R"( aaa \" bbb )"); + }; + + auto ret = parser.parse(R"( " aaa \" bbb " )"); + REQUIRE(ret == true); +} + +TEST_CASE("WHITESPACE test4", "[general]") { + parser parser(R"( + ROOT <- HELLO OPE WORLD + HELLO <- 'hello' + OPE <- < [-+] > + WORLD <- 'world' / 'WORLD' + %whitespace <- [ \t\r\n]* + )"); + + parser["HELLO"] = [](const SemanticValues& sv) { + REQUIRE(sv.token() == "hello"); + }; + + parser["OPE"] = [](const SemanticValues& sv) { + REQUIRE(sv.token() == "+"); + }; + + parser["WORLD"] = [](const SemanticValues& sv) { + REQUIRE(sv.token() == "world"); + }; + + auto ret = parser.parse(" hello + world "); + REQUIRE(ret == true); +} + +TEST_CASE("Word expression test", "[general]") { + parser parser(R"( + ROOT <- 'hello' ','? 'world' + %whitespace <- [ \t\r\n]* + %word <- [a-z]+ + )"); + + REQUIRE(parser.parse("helloworld") == false); + REQUIRE(parser.parse("hello world") == true); + REQUIRE(parser.parse("hello,world") == true); + REQUIRE(parser.parse("hello, world") == true); + REQUIRE(parser.parse("hello , world") == true); +} + +TEST_CASE("Skip token test", "[general]") +{ + parser parser( + " ROOT <- _ ITEM (',' _ ITEM _)* " + " ITEM <- ([a-z0-9])+ " + " ~_ <- [ \t]* " + ); + + parser["ROOT"] = [&](const SemanticValues& sv) { + REQUIRE(sv.size() == 2); + }; + + auto ret = parser.parse(" item1, item2 "); + + REQUIRE(ret == true); +} + +TEST_CASE("Skip token test2", "[general]") +{ + parser parser(R"( + ROOT <- ITEM (',' ITEM)* + ITEM <- < ([a-z0-9])+ > + %whitespace <- [ \t]* + )"); + + parser["ROOT"] = [&](const SemanticValues& sv) { + REQUIRE(sv.size() == 2); + }; + + auto ret = parser.parse(" item1, item2 "); + + REQUIRE(ret == true); +} + +TEST_CASE("Backtracking test", "[general]") +{ + parser parser(R"( + START <- PAT1 / PAT2 + PAT1 <- HELLO ' One' + PAT2 <- HELLO ' Two' + HELLO <- 'Hello' + )"); + + size_t count = 0; + parser["HELLO"] = [&](const SemanticValues& /*sv*/) { + count++; + }; + + parser.enable_packrat_parsing(); + + bool ret = parser.parse("Hello Two"); + REQUIRE(ret == true); + REQUIRE(count == 1); // Skip second time +} + +TEST_CASE("Backtracking with AST", "[general]") +{ + parser parser(R"( + S <- A? B (A B)* A + A <- 'a' + B <- 'b' + )"); + + parser.enable_ast(); + std::shared_ptr ast; + bool ret = parser.parse("ba", ast); + REQUIRE(ret == true); + REQUIRE(ast->nodes.size() == 2); +} + +TEST_CASE("Octal/Hex/Unicode value test", "[general]") +{ + parser parser( + R"( ROOT <- '\132\x7a\u30f3' )" + ); + + auto ret = parser.parse("Zzン"); + + REQUIRE(ret == true); +} + +TEST_CASE("Ignore case test", "[general]") { + parser parser(R"( + ROOT <- HELLO WORLD + HELLO <- 'hello'i + WORLD <- 'world'i + %whitespace <- [ \t\r\n]* + )"); + + parser["HELLO"] = [](const SemanticValues& sv) { + REQUIRE(sv.token() == "Hello"); + }; + + parser["WORLD"] = [](const SemanticValues& sv) { + REQUIRE(sv.token() == "World"); + }; + + auto ret = parser.parse(" Hello World "); + REQUIRE(ret == true); +} + +TEST_CASE("mutable lambda test", "[general]") +{ + std::vector vec; + + parser pg("ROOT <- 'mutable lambda test'"); + + // This test makes sure if the following code can be compiled. + pg["TOKEN"] = [=](const SemanticValues& sv) mutable { + vec.push_back(sv.str()); + }; +} + +TEST_CASE("Simple calculator test", "[general]") +{ + parser parser(R"( + Additive <- Multitive '+' Additive / Multitive + Multitive <- Primary '*' Multitive / Primary + Primary <- '(' Additive ')' / Number + Number <- [0-9]+ + )"); + + parser["Additive"] = [](const SemanticValues& sv) { + switch (sv.choice()) { + case 0: + return any_cast(sv[0]) + any_cast(sv[1]); + default: + return any_cast(sv[0]); + } + }; + + parser["Multitive"] = [](const SemanticValues& sv) { + switch (sv.choice()) { + case 0: + return any_cast(sv[0]) * any_cast(sv[1]); + default: + return any_cast(sv[0]); + } + }; + + parser["Number"] = [](const SemanticValues& sv) { + return atoi(sv.c_str()); + }; + + int val; + parser.parse("(1+2)*3", val); + + REQUIRE(val == 9); +} + +TEST_CASE("Calculator test", "[general]") +{ + // Construct grammer + Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER; + + EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))); + TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))); + FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')'))); + TERM_OPERATOR <= cls("+-"); + FACTOR_OPERATOR <= cls("*/"); + NUMBER <= oom(cls("0-9")); + + // Setup actions + auto reduce = [](const SemanticValues& sv) -> long { + long ret = any_cast(sv[0]); + for (auto i = 1u; i < sv.size(); i += 2) { + auto num = any_cast(sv[i + 1]); + switch (any_cast(sv[i])) { + case '+': ret += num; break; + case '-': ret -= num; break; + case '*': ret *= num; break; + case '/': ret /= num; break; + } + } + return ret; + }; + + EXPRESSION = reduce; + TERM = reduce; + TERM_OPERATOR = [](const SemanticValues& sv) { return *sv.c_str(); }; + FACTOR_OPERATOR = [](const SemanticValues& sv) { return *sv.c_str(); }; + NUMBER = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); }; + + // Parse + long val; + auto r = EXPRESSION.parse_and_get_value("1+2*3*(4-5+6)/7-8", val); + + REQUIRE(r.ret == true); + REQUIRE(val == -3); +} + +TEST_CASE("Calculator test2", "[general]") +{ + // Parse syntax + auto syntax = R"( + # Grammar for Calculator... + EXPRESSION <- TERM (TERM_OPERATOR TERM)* + TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* + FACTOR <- NUMBER / '(' EXPRESSION ')' + TERM_OPERATOR <- [-+] + FACTOR_OPERATOR <- [/*] + NUMBER <- [0-9]+ + )"; + + std::string start; + auto grammar = ParserGenerator::parse(syntax, strlen(syntax), start, nullptr); + auto& g = *grammar; + + // Setup actions + auto reduce = [](const SemanticValues& sv) -> long { + long ret = any_cast(sv[0]); + for (auto i = 1u; i < sv.size(); i += 2) { + auto num = any_cast(sv[i + 1]); + switch (any_cast(sv[i])) { + case '+': ret += num; break; + case '-': ret -= num; break; + case '*': ret *= num; break; + case '/': ret /= num; break; + } + } + return ret; + }; + + g["EXPRESSION"] = reduce; + g["TERM"] = reduce; + g["TERM_OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; + g["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; + g["NUMBER"] = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); }; + + // Parse + long val; + auto r = g[start].parse_and_get_value("1+2*3*(4-5+6)/7-8", val); + + REQUIRE(r.ret == true); + REQUIRE(val == -3); +} + +TEST_CASE("Calculator test3", "[general]") +{ + // Parse syntax + parser parser(R"( + # Grammar for Calculator... + EXPRESSION <- TERM (TERM_OPERATOR TERM)* + TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* + FACTOR <- NUMBER / '(' EXPRESSION ')' + TERM_OPERATOR <- [-+] + FACTOR_OPERATOR <- [/*] + NUMBER <- [0-9]+ + )"); + + auto reduce = [](const SemanticValues& sv) -> long { + long ret = any_cast(sv[0]); + for (auto i = 1u; i < sv.size(); i += 2) { + auto num = any_cast(sv[i + 1]); + switch (any_cast(sv[i])) { + case '+': ret += num; break; + case '-': ret -= num; break; + case '*': ret *= num; break; + case '/': ret /= num; break; + } + } + return ret; + }; + + // Setup actions + parser["EXPRESSION"] = reduce; + parser["TERM"] = reduce; + parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return static_cast(*sv.c_str()); }; + parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return static_cast(*sv.c_str()); }; + parser["NUMBER"] = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); }; + + // Parse + long val; + auto ret = parser.parse("1+2*3*(4-5+6)/7-8", val); + + REQUIRE(ret == true); + REQUIRE(val == -3); +} + +TEST_CASE("Calculator test with AST", "[general]") +{ + parser parser(R"( + EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* + TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* + FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ + TERM_OPERATOR <- < [-+] > _ + FACTOR_OPERATOR <- < [/*] > _ + NUMBER <- < [0-9]+ > _ + ~_ <- [ \t\r\n]* + )"); + + parser.enable_ast(); + + std::function eval = [&](const Ast& ast) { + if (ast.name == "NUMBER") { + return stol(ast.token); + } else { + const auto& nodes = ast.nodes; + auto result = eval(*nodes[0]); + for (auto i = 1u; i < nodes.size(); i += 2) { + auto num = eval(*nodes[i + 1]); + auto ope = nodes[i]->token[0]; + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + } + }; + + std::shared_ptr ast; + auto ret = parser.parse("1+2*3*(4-5+6)/7-8", ast); + ast = AstOptimizer(true).optimize(ast); + auto val = eval(*ast); + + REQUIRE(ret == true); + REQUIRE(val == -3); +} + +TEST_CASE("Ignore semantic value test", "[general]") +{ + parser parser(R"( + START <- ~HELLO WORLD + HELLO <- 'Hello' _ + WORLD <- 'World' _ + _ <- [ \t\r\n]* + )"); + + parser.enable_ast(); + + std::shared_ptr ast; + auto ret = parser.parse("Hello World", ast); + + REQUIRE(ret == true); + REQUIRE(ast->nodes.size() == 1); + REQUIRE(ast->nodes[0]->name == "WORLD"); +} + +TEST_CASE("Ignore semantic value of 'or' predicate test", "[general]") +{ + parser parser(R"( + START <- _ !DUMMY HELLO_WORLD '.' + HELLO_WORLD <- HELLO 'World' _ + HELLO <- 'Hello' _ + DUMMY <- 'dummy' _ + ~_ <- [ \t\r\n]* + )"); + + parser.enable_ast(); + + std::shared_ptr ast; + auto ret = parser.parse("Hello World.", ast); + + REQUIRE(ret == true); + REQUIRE(ast->nodes.size() == 1); + REQUIRE(ast->nodes[0]->name == "HELLO_WORLD"); +} + +TEST_CASE("Ignore semantic value of 'and' predicate test", "[general]") +{ + parser parser(R"( + START <- _ &HELLO HELLO_WORLD '.' + HELLO_WORLD <- HELLO 'World' _ + HELLO <- 'Hello' _ + ~_ <- [ \t\r\n]* + )"); + + parser.enable_ast(); + + std::shared_ptr ast; + auto ret = parser.parse("Hello World.", ast); + + REQUIRE(ret == true); + REQUIRE(ast->nodes.size() == 1); + REQUIRE(ast->nodes[0]->name == "HELLO_WORLD"); +} + +TEST_CASE("Literal token on AST test1", "[general]") +{ + parser parser(R"( + STRING_LITERAL <- '"' (('\\"' / '\\t' / '\\n') / (!["] .))* '"' + )"); + parser.enable_ast(); + + std::shared_ptr ast; + auto ret = parser.parse(R"("a\tb")", ast); + + REQUIRE(ret == true); + REQUIRE(ast->is_token == true); + REQUIRE(ast->token == R"("a\tb")"); + REQUIRE(ast->nodes.empty()); +} + +TEST_CASE("Literal token on AST test2", "[general]") +{ + parser parser(R"( + STRING_LITERAL <- '"' (ESC / CHAR)* '"' + ESC <- ('\\"' / '\\t' / '\\n') + CHAR <- (!["] .) + )"); + parser.enable_ast(); + + std::shared_ptr ast; + auto ret = parser.parse(R"("a\tb")", ast); + + REQUIRE(ret == true); + REQUIRE(ast->is_token == false); + REQUIRE(ast->token.empty()); + REQUIRE(ast->nodes.size() == 3); +} + +TEST_CASE("Literal token on AST test3", "[general]") +{ + parser parser(R"( + STRING_LITERAL <- < '"' (ESC / CHAR)* '"' > + ESC <- ('\\"' / '\\t' / '\\n') + CHAR <- (!["] .) + )"); + parser.enable_ast(); + + std::shared_ptr ast; + auto ret = parser.parse(R"("a\tb")", ast); + + REQUIRE(ret == true); + REQUIRE(ast->is_token == true); + REQUIRE(ast->token == R"("a\tb")"); + REQUIRE(ast->nodes.empty()); +} + +TEST_CASE("Missing missing definitions test", "[general]") +{ + parser parser(R"( + A <- B C + )"); + + REQUIRE(!parser); +} + +TEST_CASE("Definition duplicates test", "[general]") +{ + parser parser(R"( + A <- '' + A <- '' + )"); + + REQUIRE(!parser); +} + +TEST_CASE("Semantic values test", "[general]") +{ + parser parser(R"( + term <- ( a b c x )? a b c + a <- 'a' + b <- 'b' + c <- 'c' + x <- 'x' + )"); + + for (const auto& rule: parser.get_rule_names()){ + parser[rule.c_str()] = [rule](const SemanticValues& sv, any&) { + if (rule == "term") { + REQUIRE(any_cast(sv[0]) == "a at 0"); + REQUIRE(any_cast(sv[1]) == "b at 1"); + REQUIRE(any_cast(sv[2]) == "c at 2"); + return std::string(); + } else { + return rule + " at " + std::to_string(sv.c_str() - sv.ss); + } + }; + } + + REQUIRE(parser.parse("abc")); +} + +TEST_CASE("Ordered choice count", "[general]") +{ + parser parser(R"( + S <- 'a' / 'b' + )"); + + parser["S"] = [](const SemanticValues& sv) { + REQUIRE(sv.choice() == 1); + REQUIRE(sv.choice_count() == 2); + }; + + parser.parse("b"); +} + +TEST_CASE("Ordered choice count 2", "[general]") +{ + parser parser(R"( + S <- ('a' / 'b')* + )"); + + parser["S"] = [](const SemanticValues& sv) { + REQUIRE(sv.choice() == 0); + REQUIRE(sv.choice_count() == 0); + }; + + parser.parse("b"); +} + +TEST_CASE("Semantic value tag", "[general]") +{ + parser parser(R"( + S <- A? B* C? + A <- 'a' + B <- 'b' + C <- 'c' + )"); + + { + using namespace udl; + parser["S"] = [](const SemanticValues& sv) { + REQUIRE(sv.size() == 1); + REQUIRE(sv.tags.size() == 1); + REQUIRE(sv.tags[0] == "C"_); + }; + auto ret = parser.parse("c"); + REQUIRE(ret == true); + } + + { + using namespace udl; + parser["S"] = [](const SemanticValues& sv) { + REQUIRE(sv.size() == 2); + REQUIRE(sv.tags.size() == 2); + REQUIRE(sv.tags[0] == "B"_); + REQUIRE(sv.tags[1] == "B"_); + }; + auto ret = parser.parse("bb"); + REQUIRE(ret == true); + } + + { + using namespace udl; + parser["S"] = [](const SemanticValues& sv) { + REQUIRE(sv.size() == 2); + REQUIRE(sv.tags.size() == 2); + REQUIRE(sv.tags[0] == "A"_); + REQUIRE(sv.tags[1] == "C"_); + }; + auto ret = parser.parse("ac"); + REQUIRE(ret == true); + } +} + +TEST_CASE("Negated Class test", "[general]") +{ + parser parser(R"( + ROOT <- [^a-z_]+ + )"); + + bool ret = parser; + REQUIRE(ret == true); + + REQUIRE(parser.parse("ABC123")); + REQUIRE_FALSE(parser.parse("ABcZ")); + REQUIRE_FALSE(parser.parse("ABCZ_")); + REQUIRE_FALSE(parser.parse("")); +} + +// vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/test/test2.cc b/test/test2.cc new file mode 100644 index 0000000..0947d9f --- /dev/null +++ b/test/test2.cc @@ -0,0 +1,769 @@ +#include "catch.hh" +#include + +using namespace peg; + +TEST_CASE("Infinite loop 1", "[infinite loop]") +{ + parser pg(R"( + ROOT <- WH TOKEN* WH + TOKEN <- [a-z0-9]* + WH <- [ \t]* + )"); + + REQUIRE(!pg); +} + +TEST_CASE("Infinite loop 2", "[infinite loop]") +{ + parser pg(R"( + ROOT <- WH TOKEN+ WH + TOKEN <- [a-z0-9]* + WH <- [ \t]* + )"); + + REQUIRE(!pg); +} + +TEST_CASE("Infinite loop 3", "[infinite loop]") +{ + parser pg(R"( + ROOT <- WH TOKEN* WH + TOKEN <- !'word1' + WH <- [ \t]* + )"); + + REQUIRE(!pg); +} + +TEST_CASE("Infinite loop 4", "[infinite loop]") +{ + parser pg(R"( + ROOT <- WH TOKEN* WH + TOKEN <- &'word1' + WH <- [ \t]* + )"); + + REQUIRE(!pg); +} + +TEST_CASE("Infinite loop 5", "[infinite loop]") +{ + parser pg(R"( + Numbers <- Number* + Number <- [0-9]+ / Spacing + Spacing <- ' ' / '\t' / '\n' / EOF # EOF is empty + EOF <- !. + )"); + + REQUIRE(!pg); +} + +TEST_CASE("Not infinite 1", "[infinite loop]") +{ + parser pg(R"( + Numbers <- Number* EOF + Number <- [0-9]+ / Spacing + Spacing <- ' ' / '\t' / '\n' + EOF <- !. + )"); + + REQUIRE(!!pg); // OK +} + +TEST_CASE("Not infinite 2", "[infinite loop]") +{ + parser pg(R"( + ROOT <- _ ('[' TAG_NAME ']' _)* + # In a sequence operator, if there is at least one non-empty element, we can treat it as non-empty + TAG_NAME <- (!']' .)+ + _ <- [ \t]* + )"); + + REQUIRE(!!pg); // OK +} + +TEST_CASE("Not infinite 3", "[infinite loop]") +{ + parser pg(R"( + EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* + TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* + FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ # Recursive... + TERM_OPERATOR <- < [-+] > _ + FACTOR_OPERATOR <- < [/*] > _ + NUMBER <- < [0-9]+ > _ + _ <- [ \t\r\n]* + )"); + + REQUIRE(!!pg); // OK +} + +TEST_CASE("Precedence climbing", "[precedence]") +{ + // Create a PEG parser + parser parser(R"( + # Grammar for simple calculator... + START <- _ EXPRESSION + EXPRESSION <- ATOM (OPERATOR ATOM)* { + precedence + L + - + L * / + } + ATOM <- NUMBER / T('(') EXPRESSION T(')') + OPERATOR <- T([-+/*]) + NUMBER <- T('-'? [0-9]+) + ~_ <- [ \t]* + T(S) <- < S > _ + )"); + + // Setup actions + parser["EXPRESSION"] = [](const SemanticValues& sv) -> long { + auto result = any_cast(sv[0]); + if (sv.size() > 1) { + auto ope = any_cast(sv[1]); + auto num = any_cast(sv[2]); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + }; + parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; + parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; + + bool ret = parser; + REQUIRE(ret == true); + + { + auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "; + long val = 0; + ret = parser.parse(expr, val); + + REQUIRE(ret == true); + REQUIRE(val == -3); + } + + { + auto expr = "-1+-2--3"; // -1 + -2 - -3 = 0 + long val = 0; + ret = parser.parse(expr, val); + + REQUIRE(ret == true); + REQUIRE(val == 0); + } +} + +TEST_CASE("Packrat parser test with %whitespace%", "[packrat]") +{ + peg::parser parser(R"( + ROOT <- 'a' + %whitespace <- SPACE* + SPACE <- ' ' + )"); + + parser.enable_packrat_parsing(); + + auto ret = parser.parse("a"); + REQUIRE(ret == true); +} + +TEST_CASE("Packrat parser test with macro", "[packrat]") +{ + parser parser(R"( + EXPRESSION <- _ LIST(TERM, TERM_OPERATOR) + TERM <- LIST(FACTOR, FACTOR_OPERATOR) + FACTOR <- NUMBER / T('(') EXPRESSION T(')') + TERM_OPERATOR <- T([-+]) + FACTOR_OPERATOR <- T([/*]) + NUMBER <- T([0-9]+) + ~_ <- [ \t]* + LIST(I, D) <- I (D I)* + T(S) <- < S > _ + )"); + + parser.enable_packrat_parsing(); + + auto ret = parser.parse(" 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "); + REQUIRE(ret == true); +} + +TEST_CASE("Backreference test", "[backreference]") +{ + parser parser(R"( + START <- _ LQUOTE < (!RQUOTE .)* > RQUOTE _ + LQUOTE <- 'R"' $delm< [a-zA-Z]* > '(' + RQUOTE <- ')' $delm '"' + ~_ <- [ \t\r\n]* + )"); + + std::string token; + parser["START"] = [&](const SemanticValues& sv) { + token = sv.token(); + }; + + { + token.clear(); + auto ret = parser.parse(R"delm( + R"("hello world")" + )delm"); + + REQUIRE(ret == true); + REQUIRE(token == "\"hello world\""); + } + + { + token.clear(); + auto ret = parser.parse(R"delm( + R"foo("(hello world)")foo" + )delm"); + + REQUIRE(ret == true); + REQUIRE(token == "\"(hello world)\""); + } + + { + token.clear(); + auto ret = parser.parse(R"delm( + R"foo("(hello world)foo")foo" + )delm"); + + REQUIRE(ret == false); + REQUIRE(token == "\"(hello world"); + } + + { + token.clear(); + auto ret = parser.parse(R"delm( + R"foo("(hello world)")bar" + )delm"); + + REQUIRE(ret == false); + REQUIRE(token.empty()); + } +} + +TEST_CASE("Invalid backreference test", "[backreference]") +{ + parser parser(R"( + START <- _ LQUOTE (!RQUOTE .)* RQUOTE _ + LQUOTE <- 'R"' $delm< [a-zA-Z]* > '(' + RQUOTE <- ')' $delm2 '"' + ~_ <- [ \t\r\n]* + )"); + + REQUIRE_THROWS_AS( + parser.parse(R"delm( + R"foo("(hello world)")foo" + )delm"), + std::runtime_error); +} + + +TEST_CASE("Nested capture test", "[backreference]") +{ + parser parser(R"( + ROOT <- CONTENT + CONTENT <- (ELEMENT / TEXT)* + ELEMENT <- $(STAG CONTENT ETAG) + STAG <- '<' $tag< TAG_NAME > '>' + ETAG <- '' + TAG_NAME <- 'b' / 'u' + TEXT <- TEXT_DATA + TEXT_DATA <- ![<] . + )"); + + REQUIRE(parser.parse("This is a test text.")); + REQUIRE(!parser.parse("This is a test text.")); + REQUIRE(!parser.parse("This is a test text.")); + REQUIRE(!parser.parse("This is a test text.")); +} + +TEST_CASE("Backreference with Prioritized Choice test", "[backreference]") +{ + parser parser(R"( + TREE <- WRONG_BRANCH / CORRECT_BRANCH + WRONG_BRANCH <- BRANCH THAT IS_capture WRONG + CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT + BRANCH <- 'branch' + THAT <- 'that' + IS_capture <- $ref<..> + IS_backref <- $ref + WRONG <- 'wrong' + CORRECT <- 'correct' + )"); + + REQUIRE_THROWS_AS(parser.parse("branchthatiscorrect"), std::runtime_error); +} + +TEST_CASE("Backreference with Zero or More test", "[backreference]") +{ + parser parser(R"( + TREE <- WRONG_BRANCH* CORRECT_BRANCH + WRONG_BRANCH <- BRANCH THAT IS_capture WRONG + CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT + BRANCH <- 'branch' + THAT <- 'that' + IS_capture <- $ref<..> + IS_backref <- $ref + WRONG <- 'wrong' + CORRECT <- 'correct' + )"); + + REQUIRE(parser.parse("branchthatiswrongbranchthatiscorrect")); + REQUIRE(!parser.parse("branchthatiswrongbranchthatIscorrect")); + REQUIRE(!parser.parse("branchthatiswrongbranchthatIswrongbranchthatiscorrect")); + REQUIRE(parser.parse("branchthatiswrongbranchthatIswrongbranchthatIscorrect")); + REQUIRE_THROWS_AS(parser.parse("branchthatiscorrect"), std::runtime_error); + REQUIRE_THROWS_AS(parser.parse("branchthatiswron_branchthatiscorrect"), std::runtime_error); +} + +TEST_CASE("Backreference with One or More test", "[backreference]") +{ + parser parser(R"( + TREE <- WRONG_BRANCH+ CORRECT_BRANCH + WRONG_BRANCH <- BRANCH THAT IS_capture WRONG + CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT + BRANCH <- 'branch' + THAT <- 'that' + IS_capture <- $ref<..> + IS_backref <- $ref + WRONG <- 'wrong' + CORRECT <- 'correct' + )"); + + REQUIRE(parser.parse("branchthatiswrongbranchthatiscorrect")); + REQUIRE(!parser.parse("branchthatiswrongbranchthatIscorrect")); + REQUIRE(!parser.parse("branchthatiswrongbranchthatIswrongbranchthatiscorrect")); + REQUIRE(parser.parse("branchthatiswrongbranchthatIswrongbranchthatIscorrect")); + REQUIRE(!parser.parse("branchthatiscorrect")); + REQUIRE(!parser.parse("branchthatiswron_branchthatiscorrect")); +} + +TEST_CASE("Backreference with Option test", "[backreference]") +{ + parser parser(R"( + TREE <- WRONG_BRANCH? CORRECT_BRANCH + WRONG_BRANCH <- BRANCH THAT IS_capture WRONG + CORRECT_BRANCH <- BRANCH THAT IS_backref CORRECT + BRANCH <- 'branch' + THAT <- 'that' + IS_capture <- $ref<..> + IS_backref <- $ref + WRONG <- 'wrong' + CORRECT <- 'correct' + )"); + + REQUIRE(parser.parse("branchthatiswrongbranchthatiscorrect")); + REQUIRE(!parser.parse("branchthatiswrongbranchthatIscorrect")); + REQUIRE(!parser.parse("branchthatiswrongbranchthatIswrongbranchthatiscorrect")); + REQUIRE(!parser.parse("branchthatiswrongbranchthatIswrongbranchthatIscorrect")); + REQUIRE_THROWS_AS(parser.parse("branchthatiscorrect"), std::runtime_error); + REQUIRE_THROWS_AS(parser.parse("branchthatiswron_branchthatiscorrect"), std::runtime_error); +} + +TEST_CASE("Left recursive test", "[left recursive]") +{ + parser parser(R"( + A <- A 'a' + B <- A 'a' + )"); + + REQUIRE(!parser); +} + +TEST_CASE("Left recursive with option test", "[left recursive]") +{ + parser parser(R"( + A <- 'a' / 'b'? B 'c' + B <- A + )"); + + REQUIRE(!parser); +} + +TEST_CASE("Left recursive with zom test", "[left recursive]") +{ + parser parser(R"( + A <- 'a'* A* + )"); + + REQUIRE(!parser); +} + +TEST_CASE("Left recursive with a ZOM content rule", "[left recursive]") +{ + parser parser(R"( + A <- B + B <- _ A + _ <- ' '* # Zero or more + )"); + + REQUIRE(!parser); +} + +TEST_CASE("Left recursive with empty string test", "[left recursive]") +{ + parser parser( + " A <- '' A" + ); + + REQUIRE(!parser); +} + +TEST_CASE("User defined rule test", "[user rule]") +{ + auto g = parser(R"( + ROOT <- _ 'Hello' _ NAME '!' _ + )", + { + { + "NAME", usr([](const char* s, size_t n, SemanticValues& /*sv*/, any& /*dt*/) -> size_t { + static std::vector names = { "PEG", "BNF" }; + for (const auto& name: names) { + if (name.size() <= n && !name.compare(0, name.size(), s, name.size())) { + return name.size(); + } + } + return static_cast(-1); + }) + }, + { + "~_", zom(cls(" \t\r\n")) + } + }); + + REQUIRE(g.parse(" Hello BNF! ") == true); +} + +TEST_CASE("Semantic predicate test", "[predicate]") +{ + parser parser("NUMBER <- [0-9]+"); + + parser["NUMBER"] = [](const SemanticValues& sv) { + auto val = stol(sv.token(), nullptr, 10); + if (val != 100) { + throw parse_error("value error!!"); + } + return val; + }; + + long val; + REQUIRE(parser.parse("100", val)); + REQUIRE(val == 100); + + REQUIRE(!parser.parse("200", val)); +} + +TEST_CASE("Japanese character", "[unicode]") +{ + peg::parser parser(u8R"( + 文 <- 修飾語? 主語 述語 '。' + 主語 <- 名詞 助詞 + 述語 <- 動詞 助詞 + 修飾語 <- 形容詞 + 名詞 <- 'サーバー' / 'クライアント' + 形容詞 <- '古い' / '新しい' + 動詞 <- '落ち' / '復旧し' + 助詞 <- 'が' / 'を' / 'た' / 'ます' / 'に' + )"); + + bool ret = parser; + REQUIRE(ret == true); + + REQUIRE(parser.parse(u8R"(サーバーを復旧します。)")); +} + +TEST_CASE("dot with a code", "[unicode]") +{ + peg::parser parser(" S <- 'a' . 'b' "); + REQUIRE(parser.parse(u8R"(aあb)")); +} + +TEST_CASE("dot with a char", "[unicode]") +{ + peg::parser parser(" S <- 'a' . 'b' "); + REQUIRE(parser.parse(u8R"(aåb)")); +} + +TEST_CASE("character class", "[unicode]") +{ + peg::parser parser(R"( + S <- 'a' [い-おAさC-Eた-とは] 'b' + )"); + + bool ret = parser; + REQUIRE(ret == true); + + REQUIRE(!parser.parse(u8R"(aあb)")); + REQUIRE(parser.parse(u8R"(aいb)")); + REQUIRE(parser.parse(u8R"(aうb)")); + REQUIRE(parser.parse(u8R"(aおb)")); + REQUIRE(!parser.parse(u8R"(aかb)")); + REQUIRE(parser.parse(u8R"(aAb)")); + REQUIRE(!parser.parse(u8R"(aBb)")); + REQUIRE(parser.parse(u8R"(aEb)")); + REQUIRE(!parser.parse(u8R"(aFb)")); + REQUIRE(!parser.parse(u8R"(aそb)")); + REQUIRE(parser.parse(u8R"(aたb)")); + REQUIRE(parser.parse(u8R"(aちb)")); + REQUIRE(parser.parse(u8R"(aとb)")); + REQUIRE(!parser.parse(u8R"(aなb)")); + REQUIRE(parser.parse(u8R"(aはb)")); + REQUIRE(!parser.parse(u8R"(a?b)")); +} + +#if 0 // TODO: Unicode Grapheme support +TEST_CASE("dot with a grapheme", "[unicode]") +{ + peg::parser parser(" S <- 'a' . 'b' "); + REQUIRE(parser.parse(u8R"(aसिb)")); +} +#endif + +TEST_CASE("Macro simple test", "[macro]") +{ + parser parser(R"( + S <- HELLO WORLD + HELLO <- T('hello') + WORLD <- T('world') + T(a) <- a [ \t]* + )"); + + REQUIRE(parser.parse("hello \tworld ")); +} + +TEST_CASE("Macro two parameters", "[macro]") +{ + parser parser(R"( + S <- HELLO_WORLD + HELLO_WORLD <- T('hello', 'world') + T(a, b) <- a [ \t]* b [ \t]* + )"); + + REQUIRE(parser.parse("hello \tworld ")); +} + +TEST_CASE("Macro syntax error", "[macro]") +{ + parser parser(R"( + S <- T('hello') + T (a) <- a [ \t]* + )"); + + bool ret = parser; + REQUIRE(ret == false); +} + +TEST_CASE("Macro missing argument", "[macro]") +{ + parser parser(R"( + S <- T ('hello') + T(a, b) <- a [ \t]* b + )"); + + bool ret = parser; + REQUIRE(ret == false); +} + +TEST_CASE("Macro reference syntax error", "[macro]") +{ + parser parser(R"( + S <- T ('hello') + T(a) <- a [ \t]* + )"); + + bool ret = parser; + REQUIRE(ret == false); +} + +TEST_CASE("Macro invalid macro reference error", "[macro]") +{ + parser parser(R"( + S <- T('hello') + T <- 'world' + )"); + + bool ret = parser; + REQUIRE(ret == false); +} + +TEST_CASE("Macro calculator", "[macro]") +{ + // Create a PEG parser + parser parser(R"( + # Grammar for simple calculator... + EXPRESSION <- _ LIST(TERM, TERM_OPERATOR) + TERM <- LIST(FACTOR, FACTOR_OPERATOR) + FACTOR <- NUMBER / T('(') EXPRESSION T(')') + TERM_OPERATOR <- T([-+]) + FACTOR_OPERATOR <- T([/*]) + NUMBER <- T([0-9]+) + ~_ <- [ \t]* + LIST(I, D) <- I (D I)* + T(S) <- < S > _ + )"); + + // Setup actions + auto reduce = [](const SemanticValues& sv) -> long { + auto result = any_cast(sv[0]); + for (auto i = 1u; i < sv.size(); i += 2) { + auto num = any_cast(sv[i + 1]); + auto ope = any_cast(sv[i]); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + }; + + parser["EXPRESSION"] = reduce; + parser["TERM"] = reduce; + parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return static_cast(*sv.c_str()); }; + parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return static_cast(*sv.c_str()); }; + parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; + + bool ret = parser; + REQUIRE(ret == true); + + auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "; + long val = 0; + ret = parser.parse(expr, val); + + REQUIRE(ret == true); + REQUIRE(val == -3); +} + +TEST_CASE("Macro expression arguments", "[macro]") +{ + parser parser(R"( + S <- M('hello' / 'Hello', 'world' / 'World') + M(arg0, arg1) <- arg0 [ \t]+ arg1 + )"); + + REQUIRE(parser.parse("Hello world")); +} + +TEST_CASE("Macro recursive", "[macro]") +{ + parser parser(R"( + S <- M('abc') + M(s) <- !s / s ' ' M(s / '123') / s + )"); + + REQUIRE(parser.parse("")); + REQUIRE(parser.parse("abc")); + REQUIRE(parser.parse("abc abc")); + REQUIRE(parser.parse("abc 123 abc")); +} + +TEST_CASE("Macro recursive2", "[macro]") +{ + auto syntaxes = std::vector{ + "S <- M('abc') M(s) <- !s / s ' ' M(s* '-' '123') / s", + "S <- M('abc') M(s) <- !s / s ' ' M(s+ '-' '123') / s", + "S <- M('abc') M(s) <- !s / s ' ' M(s? '-' '123') / s", + "S <- M('abc') M(s) <- !s / s ' ' M(&s s+ '-' '123') / s", + "S <- M('abc') M(s) <- !s / s ' ' M(s '-' !s '123') / s", + "S <- M('abc') M(s) <- !s / s ' ' M(< s > '-' '123') / s", + "S <- M('abc') M(s) <- !s / s ' ' M(~s '-' '123') / s", + }; + + for (const auto& syntax: syntaxes) { + parser parser(syntax); + REQUIRE(parser.parse("abc abc-123")); + } +} + +TEST_CASE("Macro exclusive modifiers", "[macro]") +{ + parser parser(R"( + S <- Modifiers(!"") _ + Modifiers(Appeared) <- (!Appeared) ( + Token('public') Modifiers(Appeared / 'public') / + Token('static') Modifiers(Appeared / 'static') / + Token('final') Modifiers(Appeared / 'final') / + "") + Token(t) <- t _ + _ <- [ \t\r\n]* + )"); + + REQUIRE(parser.parse("public")); + REQUIRE(parser.parse("static")); + REQUIRE(parser.parse("final")); + REQUIRE(parser.parse("public static final")); + REQUIRE(!parser.parse("public public")); + REQUIRE(!parser.parse("public static public")); +} + +TEST_CASE("Macro token check test", "[macro]") +{ + parser parser(R"( + # Grammar for simple calculator... + EXPRESSION <- _ LIST(TERM, TERM_OPERATOR) + TERM <- LIST(FACTOR, FACTOR_OPERATOR) + FACTOR <- NUMBER / T('(') EXPRESSION T(')') + TERM_OPERATOR <- T([-+]) + FACTOR_OPERATOR <- T([/*]) + NUMBER <- T([0-9]+) + ~_ <- [ \t]* + LIST(I, D) <- I (D I)* + T(S) <- < S > _ + )"); + + REQUIRE(parser["EXPRESSION"].is_token() == false); + REQUIRE(parser["TERM"].is_token() == false); + REQUIRE(parser["FACTOR"].is_token() == false); + REQUIRE(parser["FACTOR_OPERATOR"].is_token() == true); + REQUIRE(parser["NUMBER"].is_token() == true); + REQUIRE(parser["_"].is_token() == true); + REQUIRE(parser["LIST"].is_token() == false); + REQUIRE(parser["T"].is_token() == true); +} + +TEST_CASE("Macro rule-parameter collision", "[macro]") +{ + parser parser(R"( + A <- B(C) + B(D) <- D + C <- 'c' + D <- 'd' + )"); + + REQUIRE(parser.parse("c")); +} + +TEST_CASE("Line information test", "[line information]") +{ + parser parser(R"( + S <- _ (WORD _)+ + WORD <- [A-Za-z]+ + ~_ <- [ \t\r\n]+ + )"); + + std::vector> locations; + parser["WORD"] = [&](const peg::SemanticValues& sv) { + locations.push_back(sv.line_info()); + }; + + bool ret = parser; + REQUIRE(ret == true); + + ret = parser.parse(" Mon Tue Wed \nThu Fri Sat\nSun\n"); + REQUIRE(ret == true); + + REQUIRE(locations[0] == std::make_pair(1, 2)); + REQUIRE(locations[1] == std::make_pair(1, 6)); + REQUIRE(locations[2] == std::make_pair(1, 10)); + REQUIRE(locations[3] == std::make_pair(2, 1)); + REQUIRE(locations[4] == std::make_pair(2, 6)); + REQUIRE(locations[5] == std::make_pair(2, 11)); + REQUIRE(locations[6] == std::make_pair(3, 1)); +} + +// vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/test/test3.cc b/test/test3.cc new file mode 100644 index 0000000..a253071 --- /dev/null +++ b/test/test3.cc @@ -0,0 +1,291 @@ +#include "catch.hh" +#include + +using namespace peg; + +bool exact(Grammar& g, const char* d, const char* s) { + auto n = strlen(s); + auto r = g[d].parse(s, n); + return r.ret && r.len == n; +} + +Grammar& make_peg_grammar() { + return ParserGenerator::grammar(); +} + +TEST_CASE("PEG Grammar", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "Grammar", " Definition <- a / ( b c ) / d \n rule2 <- [a-zA-Z][a-z0-9-]+ ") == true); +} + +TEST_CASE("PEG Definition", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "Definition", "Definition <- a / (b c) / d ") == true); + REQUIRE(exact(g, "Definition", "Definition <- a / b c / d ") == true); + REQUIRE(exact(g, "Definition", u8"Definitiond ← a ") == true); + REQUIRE(exact(g, "Definition", "Definition ") == false); + REQUIRE(exact(g, "Definition", " ") == false); + REQUIRE(exact(g, "Definition", "") == false); + REQUIRE(exact(g, "Definition", "Definition = a / (b c) / d ") == false); + REQUIRE(exact(g, "Definition", "Macro(param) <- a ") == true); + REQUIRE(exact(g, "Definition", "Macro (param) <- a ") == false); +} + +TEST_CASE("PEG Expression", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "Expression", "a / (b c) / d ") == true); + REQUIRE(exact(g, "Expression", "a / b c / d ") == true); + REQUIRE(exact(g, "Expression", "a b ") == true); + REQUIRE(exact(g, "Expression", "") == true); + REQUIRE(exact(g, "Expression", " ") == false); + REQUIRE(exact(g, "Expression", " a b ") == false); +} + +TEST_CASE("PEG Sequence", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "Sequence", "a b c d ") == true); + REQUIRE(exact(g, "Sequence", "") == true); + REQUIRE(exact(g, "Sequence", "!") == false); + REQUIRE(exact(g, "Sequence", "<-") == false); + REQUIRE(exact(g, "Sequence", " a") == false); +} + +TEST_CASE("PEG Prefix", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "Prefix", "&[a]") == true); + REQUIRE(exact(g, "Prefix", "![']") == true); + REQUIRE(exact(g, "Prefix", "-[']") == false); + REQUIRE(exact(g, "Prefix", "") == false); + REQUIRE(exact(g, "Prefix", " a") == false); +} + +TEST_CASE("PEG Suffix", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "Suffix", "aaa ") == true); + REQUIRE(exact(g, "Suffix", "aaa? ") == true); + REQUIRE(exact(g, "Suffix", "aaa* ") == true); + REQUIRE(exact(g, "Suffix", "aaa+ ") == true); + REQUIRE(exact(g, "Suffix", ". + ") == true); + REQUIRE(exact(g, "Suffix", "?") == false); + REQUIRE(exact(g, "Suffix", "") == false); + REQUIRE(exact(g, "Suffix", " a") == false); +} + +TEST_CASE("PEG Primary", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "Primary", "_Identifier0_ ") == true); + REQUIRE(exact(g, "Primary", "_Identifier0_<-") == false); + REQUIRE(exact(g, "Primary", "( _Identifier0_ _Identifier1_ )") == true); + REQUIRE(exact(g, "Primary", "'Literal String'") == true); + REQUIRE(exact(g, "Primary", "\"Literal String\"") == true); + REQUIRE(exact(g, "Primary", "[a-zA-Z]") == true); + REQUIRE(exact(g, "Primary", ".") == true); + REQUIRE(exact(g, "Primary", "") == false); + REQUIRE(exact(g, "Primary", " ") == false); + REQUIRE(exact(g, "Primary", " a") == false); + REQUIRE(exact(g, "Primary", "") == false); +} + +TEST_CASE("PEG Identifier", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "Identifier", "_Identifier0_ ") == true); + REQUIRE(exact(g, "Identifier", "0Identifier_ ") == false); + REQUIRE(exact(g, "Identifier", "Iden|t ") == false); + REQUIRE(exact(g, "Identifier", " ") == false); + REQUIRE(exact(g, "Identifier", " a") == false); + REQUIRE(exact(g, "Identifier", "") == false); +} + +TEST_CASE("PEG IdentStart", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "IdentStart", "_") == true); + REQUIRE(exact(g, "IdentStart", "a") == true); + REQUIRE(exact(g, "IdentStart", "Z") == true); + REQUIRE(exact(g, "IdentStart", "") == false); + REQUIRE(exact(g, "IdentStart", " ") == false); + REQUIRE(exact(g, "IdentStart", "0") == false); +} + +TEST_CASE("PEG IdentRest", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "IdentRest", "_") == true); + REQUIRE(exact(g, "IdentRest", "a") == true); + REQUIRE(exact(g, "IdentRest", "Z") == true); + REQUIRE(exact(g, "IdentRest", "") == false); + REQUIRE(exact(g, "IdentRest", " ") == false); + REQUIRE(exact(g, "IdentRest", "0") == true); +} + +TEST_CASE("PEG Literal", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "Literal", "'abc' ") == true); + REQUIRE(exact(g, "Literal", "'a\\nb\\tc' ") == true); + REQUIRE(exact(g, "Literal", "'a\\277\tc' ") == true); + REQUIRE(exact(g, "Literal", "'a\\77\tc' ") == true); + REQUIRE(exact(g, "Literal", "'a\\80\tc' ") == false); + REQUIRE(exact(g, "Literal", "'\n' ") == true); + REQUIRE(exact(g, "Literal", "'a\\'b' ") == true); + REQUIRE(exact(g, "Literal", "'a'b' ") == false); + REQUIRE(exact(g, "Literal", "'a\"'b' ") == false); + REQUIRE(exact(g, "Literal", "\"'\\\"abc\\\"'\" ") == true); + REQUIRE(exact(g, "Literal", "\"'\"abc\"'\" ") == false); + REQUIRE(exact(g, "Literal", "abc") == false); + REQUIRE(exact(g, "Literal", "") == false); + REQUIRE(exact(g, "Literal", "\\") == false); + REQUIRE(exact(g, "Literal", u8"'日本語'") == true); + REQUIRE(exact(g, "Literal", u8"\"日本語\"") == true); + REQUIRE(exact(g, "Literal", u8"日本語") == false); +} + +TEST_CASE("PEG Class", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "Class", "[]") == false); // NOTE: This is different from the Brian Ford's paper, but same as RegExp + REQUIRE(exact(g, "Class", "[a]") == true); + REQUIRE(exact(g, "Class", "[a-z]") == true); + REQUIRE(exact(g, "Class", "[az]") == true); + REQUIRE(exact(g, "Class", "[a-zA-Z-]") == true); + REQUIRE(exact(g, "Class", "[a-zA-Z-0-9]") == true); + REQUIRE(exact(g, "Class", "[a-]") == false); + REQUIRE(exact(g, "Class", "[-a]") == true); + REQUIRE(exact(g, "Class", "[") == false); + REQUIRE(exact(g, "Class", "[a") == false); + REQUIRE(exact(g, "Class", "]") == false); + REQUIRE(exact(g, "Class", "a]") == false); + REQUIRE(exact(g, "Class", u8"[あ-ん]") == true); + REQUIRE(exact(g, "Class", u8"あ-ん") == false); + REQUIRE(exact(g, "Class", "[-+]") == true); + REQUIRE(exact(g, "Class", "[+-]") == false); + REQUIRE(exact(g, "Class", "[\\^]") == true); +} + +TEST_CASE("PEG Negated Class", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "NegatedClass", "[^]") == false); + REQUIRE(exact(g, "NegatedClass", "[^a]") == true); + REQUIRE(exact(g, "NegatedClass", "[^a-z]") == true); + REQUIRE(exact(g, "NegatedClass", "[^az]") == true); + REQUIRE(exact(g, "NegatedClass", "[^a-zA-Z-]") == true); + REQUIRE(exact(g, "NegatedClass", "[^a-zA-Z-0-9]") == true); + REQUIRE(exact(g, "NegatedClass", "[^a-]") == false); + REQUIRE(exact(g, "NegatedClass", "[^-a]") == true); + REQUIRE(exact(g, "NegatedClass", "[^") == false); + REQUIRE(exact(g, "NegatedClass", "[^a") == false); + REQUIRE(exact(g, "NegatedClass", "^]") == false); + REQUIRE(exact(g, "NegatedClass", "^a]") == false); + REQUIRE(exact(g, "NegatedClass", u8"[^あ-ん]") == true); + REQUIRE(exact(g, "NegatedClass", u8"^あ-ん") == false); + REQUIRE(exact(g, "NegatedClass", "[^-+]") == true); + REQUIRE(exact(g, "NegatedClass", "[^+-]") == false); + REQUIRE(exact(g, "NegatedClass", "[^^]") == true); +} + +TEST_CASE("PEG Range", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "Range", "a") == true); + REQUIRE(exact(g, "Range", "a-z") == true); + REQUIRE(exact(g, "Range", "az") == false); + REQUIRE(exact(g, "Range", "") == false); + REQUIRE(exact(g, "Range", "a-") == false); + REQUIRE(exact(g, "Range", "-a") == false); +} + +TEST_CASE("PEG Char", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "Char", "\\n") == true); + REQUIRE(exact(g, "Char", "\\r") == true); + REQUIRE(exact(g, "Char", "\\t") == true); + REQUIRE(exact(g, "Char", "\\'") == true); + REQUIRE(exact(g, "Char", "\\\"") == true); + REQUIRE(exact(g, "Char", "\\[") == true); + REQUIRE(exact(g, "Char", "\\]") == true); + REQUIRE(exact(g, "Char", "\\\\") == true); + REQUIRE(exact(g, "Char", "\\000") == true); + REQUIRE(exact(g, "Char", "\\377") == true); + REQUIRE(exact(g, "Char", "\\477") == false); + REQUIRE(exact(g, "Char", "\\087") == false); + REQUIRE(exact(g, "Char", "\\079") == false); + REQUIRE(exact(g, "Char", "\\00") == true); + REQUIRE(exact(g, "Char", "\\77") == true); + REQUIRE(exact(g, "Char", "\\80") == false); + REQUIRE(exact(g, "Char", "\\08") == false); + REQUIRE(exact(g, "Char", "\\0") == true); + REQUIRE(exact(g, "Char", "\\7") == true); + REQUIRE(exact(g, "Char", "\\8") == false); + REQUIRE(exact(g, "Char", "a") == true); + REQUIRE(exact(g, "Char", ".") == true); + REQUIRE(exact(g, "Char", "0") == true); + REQUIRE(exact(g, "Char", "\\") == false); + REQUIRE(exact(g, "Char", " ") == true); + REQUIRE(exact(g, "Char", " ") == false); + REQUIRE(exact(g, "Char", "") == false); + REQUIRE(exact(g, "Char", u8"あ") == true); +} + +TEST_CASE("PEG Operators", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "LEFTARROW", "<-") == true); + REQUIRE(exact(g, "SLASH", "/ ") == true); + REQUIRE(exact(g, "AND", "& ") == true); + REQUIRE(exact(g, "NOT", "! ") == true); + REQUIRE(exact(g, "QUESTION", "? ") == true); + REQUIRE(exact(g, "STAR", "* ") == true); + REQUIRE(exact(g, "PLUS", "+ ") == true); + REQUIRE(exact(g, "OPEN", "( ") == true); + REQUIRE(exact(g, "CLOSE", ") ") == true); + REQUIRE(exact(g, "DOT", ". ") == true); +} + +TEST_CASE("PEG Comment", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "Comment", "# Comment.\n") == true); + REQUIRE(exact(g, "Comment", "# Comment.") == false); + REQUIRE(exact(g, "Comment", " ") == false); + REQUIRE(exact(g, "Comment", "a") == false); +} + +TEST_CASE("PEG Space", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "Space", " ") == true); + REQUIRE(exact(g, "Space", "\t") == true); + REQUIRE(exact(g, "Space", "\n") == true); + REQUIRE(exact(g, "Space", "") == false); + REQUIRE(exact(g, "Space", "a") == false); +} + +TEST_CASE("PEG EndOfLine", "[peg]") +{ + auto g = ParserGenerator::grammar(); + REQUIRE(exact(g, "EndOfLine", "\r\n") == true); + REQUIRE(exact(g, "EndOfLine", "\n") == true); + REQUIRE(exact(g, "EndOfLine", "\r") == true); + REQUIRE(exact(g, "EndOfLine", " ") == false); + REQUIRE(exact(g, "EndOfLine", "") == false); + REQUIRE(exact(g, "EndOfLine", "a") == false); +} + +TEST_CASE("PEG EndOfFile", "[peg]") +{ + Grammar g = make_peg_grammar(); + REQUIRE(exact(g, "EndOfFile", "") == true); + REQUIRE(exact(g, "EndOfFile", " ") == false); +} + +// vim: et ts=4 sw=4 cin cino={1s ff=unix