diff --git a/README.md b/README.md index 27647be..cabc1cd 100644 --- a/README.md +++ b/README.md @@ -42,23 +42,16 @@ using namespace std; int main(void) { // (2) Make a parser - auto grammar = R"( + parser parser(R"( # Grammar for Calculator... Additive <- Multitive '+' Additive / Multitive Multitive <- Primary '*' Multitive / Primary Primary <- '(' Additive ')' / Number Number <- < [0-9]+ > %whitespace <- [ \t]* - )"; + )"); - parser parser; - - parser.log = [](size_t line, size_t col, const string& msg) { - cerr << line << ":" << col << ": " << msg << "\n"; - }; - - auto ok = parser.load_grammar(grammar); - assert(ok); + assert((bool)parser == true); // (3) Setup actions parser["Additive"] = [](const SemanticValues& sv) { @@ -93,6 +86,28 @@ int main(void) { } ``` +To show syntax errors in grammar text: + +```cpp +auto grammar = R"( + # Grammar for Calculator... + Additive <- Multitive '+' Additive / Multitive + Multitive <- Primary '*' Multitive / Primary + Primary <- '(' Additive ')' / Number + Number <- < [0-9]+ > + %whitespace <- [ \t]* +)"; + +parser parser; + +parser.log = [](size_t line, size_t col, const string& msg) { + cerr << line << ":" << col << ": " << msg << "\n"; +}; + +auto ok = parser.load_grammar(grammar); +assert(ok); +``` + There are four semantic actions available: ```cpp @@ -326,6 +341,46 @@ List(I, D) ← I (D I)* T(x) ← < x > _ ``` +Parsing expressions by precedence climbing altorithm +---------------------------------------------------- + +*cpp-peglib* supports [operator-precedence parsering](https://en.wikipedia.org/wiki/Operator-precedence_parser) by [**precedence climbing algorithm**](https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing) + +```cpp + parser parser(R"( + EXPRESSION <- ATOM (OPERATOR ATOM)* { + precedence + L - + + L / * + } + ATOM <- NUMBER / '(' EXPRESSION ')' + OPERATOR <- < [-+/*] > + NUMBER <- < '-'? [0-9]+ > + %whitespace <- [ \t\r\n]* + )"); + + parser["EXPRESSION"] = [](const SemanticValues& sv) -> long { + auto result = any_cast(sv[0]); + if (sv.size() > 1) { + auto ope = any_cast(sv[1]); + auto num = any_cast(sv[2]); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + }; + parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; + parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; + + long val; + parser.parse(" -1 + (1 + 2) * 3 - -1", val); + assert(val == 9); +``` + AST generation -------------- diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 3703aae..7669594 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -17,3 +17,9 @@ target_link_libraries(calc2 ${add_link_deps}) add_executable(calc3 calc3.cc) target_link_libraries(calc3 ${add_link_deps}) + +add_executable(calc4 calc4.cc) +target_link_libraries(calc4 ${add_link_deps}) + +add_executable(calc5 calc5.cc) +target_link_libraries(calc5 ${add_link_deps}) diff --git a/example/calc.cc b/example/calc.cc index 2b0c4db..a8d9a23 100644 --- a/example/calc.cc +++ b/example/calc.cc @@ -1,64 +1,51 @@ -// -// calc.cc -// -// Copyright (c) 2015 Yuji Hirose. All rights reserved. -// MIT License -// - #include +#include #include -#include using namespace peg; +using namespace std; -int main(int argc, const char** argv) -{ - if (argc < 2 || std::string("--help") == argv[1]) { - std::cout << "usage: calc [formula]" << std::endl; - return 1; - } - - auto reduce = [](const SemanticValues& sv) -> long { - auto result = any_cast(sv[0]); - for (auto i = 1u; i < sv.size(); i += 2) { - auto num = any_cast(sv[i + 1]); - auto ope = any_cast(sv[i]); - switch (ope) { - case '+': result += num; break; - case '-': result -= num; break; - case '*': result *= num; break; - case '/': result /= num; break; - } - } - return result; - }; - +int main(void) { + // (2) Make a parser parser parser(R"( - EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* - TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* - FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ - TERM_OPERATOR <- < [-+] > _ - FACTOR_OPERATOR <- < [/*] > _ - NUMBER <- < [0-9]+ > _ - ~_ <- [ \t\r\n]* + # Grammar for Calculator... + Additive <- Multitive '+' Additive / Multitive + Multitive <- Primary '*' Multitive / Primary + Primary <- '(' Additive ')' / Number + Number <- < [0-9]+ > + %whitespace <- [ \t]* )"); - parser["EXPRESSION"] = reduce; - parser["TERM"] = reduce; - parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return static_cast(*sv.c_str()); }; - parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return static_cast(*sv.c_str()); }; - parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; + assert((bool)parser == true); - auto expr = argv[1]; - long val = 0; - if (parser.parse(expr, val)) { - std::cout << expr << " = " << val << std::endl; - return 0; - } + // (3) Setup actions + parser["Additive"] = [](const SemanticValues& sv) { + switch (sv.choice()) { + case 0: // "Multitive '+' Additive" + return any_cast(sv[0]) + any_cast(sv[1]); + default: // "Multitive" + return any_cast(sv[0]); + } + }; - std::cout << "syntax error..." << std::endl; + parser["Multitive"] = [](const SemanticValues& sv) { + switch (sv.choice()) { + case 0: // "Primary '*' Multitive" + return any_cast(sv[0]) * any_cast(sv[1]); + default: // "Primary" + return any_cast(sv[0]); + } + }; - return -1; + parser["Number"] = [](const SemanticValues& sv) { + return stoi(sv.token(), nullptr, 10); + }; + + // (4) Parse + parser.enable_packrat_parsing(); // Enable packrat parsing. + + int val; + parser.parse(" (1 + 2) * 3 ", val); + + assert(val == 9); } - -// vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/example/calc4.cc b/example/calc4.cc new file mode 100644 index 0000000..1404dfc --- /dev/null +++ b/example/calc4.cc @@ -0,0 +1,42 @@ +#include +#include +#include + +using namespace peg; +using namespace std; + +int main(void) { + parser parser(R"( + EXPRESSION <- ATOM (OPERATOR ATOM)* { + precedence + L - + + L / * + } + ATOM <- NUMBER / '(' EXPRESSION ')' + OPERATOR <- < [-+/*] > + NUMBER <- < '-'? [0-9]+ > + %whitespace <- [ \t\r\n]* + )"); + + parser["EXPRESSION"] = [](const SemanticValues& sv) -> long { + auto result = any_cast(sv[0]); + if (sv.size() > 1) { + auto ope = any_cast(sv[1]); + auto num = any_cast(sv[2]); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + }; + parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; + parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; + + long val; + parser.parse(" -1 + (1 + 2) * 3 - -1", val); + + assert(val == 9); +} diff --git a/example/calc5.cc b/example/calc5.cc new file mode 100644 index 0000000..2b9a750 --- /dev/null +++ b/example/calc5.cc @@ -0,0 +1,69 @@ +// +// calc5.cc +// +// Copyright (c) 2015 Yuji Hirose. All rights reserved. +// MIT License +// + +#include +#include +#include + +using namespace peg; + +int main(int argc, const char** argv) +{ + if (argc < 2 || std::string("--help") == argv[1]) { + std::cout << "usage: calc5 [formula]" << std::endl; + return 1; + } + + std::function eval = [&](const Ast& ast) { + if (ast.name == "NUMBER") { + return stol(ast.token); + } else { + const auto& nodes = ast.nodes; + auto result = eval(*nodes[0]); + if (nodes.size() > 1) { + auto ope = nodes[1]->token[0]; + auto num = eval(*nodes[2]); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + } + }; + + parser parser(R"( + EXPRESSION <- ATOM (OPERATOR ATOM)* { + precedence + L - + + L / * + } + ATOM <- NUMBER / '(' EXPRESSION ')' + OPERATOR <- < [-+/*] > + NUMBER <- < '-'? [0-9]+ > + %whitespace <- [ \t\r\n]* + )"); + + parser.enable_ast(); + + auto expr = argv[1]; + std::shared_ptr ast; + if (parser.parse(expr, ast)) { + ast = AstOptimizer(true).optimize(ast); + std::cout << ast_to_s(ast); + std::cout << expr << " = " << eval(*ast) << std::endl; + return 0; + } + + std::cout << "syntax error..." << std::endl; + + return -1; +} + +// vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/test/test.cc b/test/test.cc index 5d4264f..85d052a 100644 --- a/test/test.cc +++ b/test/test.cc @@ -220,11 +220,11 @@ TEST_CASE("Precedence climbing", "[precedence]") )"); // Setup actions - auto reduce = [](const SemanticValues& sv) -> long { + parser["EXPRESSION"] = [](const SemanticValues& sv) -> long { auto result = any_cast(sv[0]); - for (auto i = 1u; i < sv.size(); i += 2) { - auto num = any_cast(sv[i + 1]); - auto ope = any_cast(sv[i]); + if (sv.size() > 1) { + auto ope = any_cast(sv[1]); + auto num = any_cast(sv[2]); switch (ope) { case '+': result += num; break; case '-': result -= num; break; @@ -234,10 +234,8 @@ TEST_CASE("Precedence climbing", "[precedence]") } return result; }; - - parser["EXPRESSION"] = reduce; - parser["OPERATOR"] = [](const SemanticValues& sv) { return static_cast(*sv.c_str()); }; - parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; + parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; + parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; bool ret = parser; REQUIRE(ret == true);