From 23c080af49f975d3d20f4e6d847f9dbb79b4761a Mon Sep 17 00:00:00 2001 From: yhirose Date: Thu, 30 Jun 2022 19:58:23 -0400 Subject: [PATCH] Removed `peg::parse_error` and introduced `predicate` action --- README.md | 17 +++--- example/CMakeLists.txt | 3 ++ example/indent.cc | 11 ++-- example/symbol_check.cc | 69 +++++++++++++++++++++++++ peglib.h | 112 ++++++++++++++++++++-------------------- test/test1.cc | 7 ++- test/test2.cc | 13 +++-- 7 files changed, 162 insertions(+), 70 deletions(-) create mode 100644 example/symbol_check.cc diff --git a/README.md b/README.md index 9b9c320..f3e33de 100644 --- a/README.md +++ b/README.md @@ -216,17 +216,22 @@ peg::parser parser(R"( )"); ``` -*Semantic predicate* support is available. We can do it by throwing a `peg::parse_error` exception in a semantic action. +*Semantic predicate* support is available with a *predicate* action. ```cpp peg::parser parser("NUMBER <- [0-9]+"); -parser["NUMBER"] = [](const SemanticValues& vs) { - auto val = vs.token_to_number(); - if (val != 100) { - throw peg::parse_error("value error!!"); +parser["NUMBER"] = [](const SemanticValues &vs) { + return vs.token_to_number(); +}; + +parser["NUMBER"].predicate = [](const SemanticValues &vs, + const std::any & /*dt*/, std::string &msg) { + if (vs.token_to_number() != 100) { + msg = "value error!!"; + return false; } - return val; + return true; }; long val; diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 0443aba..f888901 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -20,3 +20,6 @@ target_link_libraries(calc5 ${add_link_deps}) add_executable(indent indent.cc) target_link_libraries(indent ${add_link_deps}) + +add_executable(symbol_check symbol_check.cc) +target_link_libraries(symbol_check ${add_link_deps}) diff --git a/example/indent.cc b/example/indent.cc index aec502f..74c46ab 100644 --- a/example/indent.cc +++ b/example/indent.cc @@ -39,9 +39,14 @@ Block <- Statements {} size_t /*matchlen*/, std::any & /*value*/, std::any & /*dt*/) { indent -= 2; }; - parser["Samedent"] = [&](const SemanticValues &vs, std::any & /*dt*/) { - if (indent != vs.sv().size()) { throw parse_error("different indent..."); } - }; + parser["Samedent"].predicate = + [&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) { + if (indent != vs.sv().size()) { + msg = "different indent..."; + return false; + } + return true; + }; parser.enable_ast(); diff --git a/example/symbol_check.cc b/example/symbol_check.cc new file mode 100644 index 0000000..da22174 --- /dev/null +++ b/example/symbol_check.cc @@ -0,0 +1,69 @@ +// +// symbol_check.cc +// +// Copyright (c) 2022 Yuji Hirose. All rights reserved. +// MIT License +// + +#include +#include +#include +#include + +using namespace peg; + +int main(void) { + parser parser(R"( +S <- (Decl / Ref)* +Decl <- 'decl' symbol(Name) +Ref <- 'ref' symbol_reference(Name) +Name <- < [a-zA-Z]+ > +%whitespace <- [ \t\r\n]* + +symbol(s) <- < s > +symbol_reference(s) <- < s > +)"); + + std::set dic; + + parser[R"(symbol)"].predicate = + [&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) { + auto tok = vs.token_to_string(); + if (dic.find(tok) != dic.end()) { + msg = "'" + tok + "' already exists..."; + return false; + } + dic.insert(tok); + return true; + }; + + parser[R"(symbol_reference)"].predicate = + [&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) { + auto tok = vs.token_to_string(); + if (dic.find(tok) == dic.end()) { + msg = "'" + tok + "' doesn't exists..."; + return false; + } + return true; + }; + + parser.enable_ast(); + + parser.log = [](size_t line, size_t col, const std::string &msg) { + std::cerr << line << ":" << col << ": " << msg << "\n"; + }; + + const auto source = R"(decl aaa +ref aaa +ref bbb +)"; + + std::shared_ptr ast; + if (parser.parse(source, ast)) { + ast = parser.optimize_ast(ast); + std::cout << ast_to_s(ast); + return 0; + } + + return -1; +} diff --git a/peglib.h b/peglib.h index c61dc17..4503d55 100644 --- a/peglib.h +++ b/peglib.h @@ -631,16 +631,6 @@ private: Fty fn_; }; -/* - * Semantic predicate - */ -// Note: 'parse_error' exception class should be be used in sematic action -// handlers to reject the rule. -class parse_error : public std::runtime_error { -public: - parse_error(const char *what_arg) : std::runtime_error(what_arg) {} -}; - /* * Parse result helper */ @@ -996,29 +986,29 @@ public: size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override { - auto &chldsv = c.push(); + auto &chldvs = c.push(); auto pop_se = scope_exit([&]() { c.pop(); }); size_t i = 0; for (const auto &ope : opes_) { const auto &rule = *ope; - auto len = rule.parse(s + i, n - i, chldsv, c, dt); + auto len = rule.parse(s + i, n - i, chldvs, c, dt); if (fail(len)) { return len; } i += len; } - if (!chldsv.empty()) { - for (size_t j = 0; j < chldsv.size(); j++) { - vs.emplace_back(std::move(chldsv[j])); + if (!chldvs.empty()) { + for (size_t j = 0; j < chldvs.size(); j++) { + vs.emplace_back(std::move(chldvs[j])); } } - if (!chldsv.tags.empty()) { - for (size_t j = 0; j < chldsv.tags.size(); j++) { - vs.tags.emplace_back(std::move(chldsv.tags[j])); + if (!chldvs.tags.empty()) { + for (size_t j = 0; j < chldvs.tags.size(); j++) { + vs.tags.emplace_back(std::move(chldvs.tags[j])); } } - vs.sv_ = chldsv.sv_; - if (!chldsv.tokens.empty()) { - for (size_t j = 0; j < chldsv.tokens.size(); j++) { - vs.tokens.emplace_back(std::move(chldsv.tokens[j])); + vs.sv_ = chldvs.sv_; + if (!chldvs.tokens.empty()) { + for (size_t j = 0; j < chldvs.tokens.size(); j++) { + vs.tokens.emplace_back(std::move(chldvs.tokens[j])); } } return i; @@ -1049,7 +1039,7 @@ public: for (const auto &ope : opes_) { if (!c.cut_stack.empty()) { c.cut_stack.back() = false; } - auto &chldsv = c.push(); + auto &chldvs = c.push(); c.push_capture_scope(); c.error_info.keep_previous_token = id > 0; @@ -1059,25 +1049,25 @@ public: c.error_info.keep_previous_token = false; }); - len = ope->parse(s, n, chldsv, c, dt); + len = ope->parse(s, n, chldvs, c, dt); if (success(len)) { - if (!chldsv.empty()) { - for (size_t i = 0; i < chldsv.size(); i++) { - vs.emplace_back(std::move(chldsv[i])); + if (!chldvs.empty()) { + for (size_t i = 0; i < chldvs.size(); i++) { + vs.emplace_back(std::move(chldvs[i])); } } - if (!chldsv.tags.empty()) { - for (size_t i = 0; i < chldsv.tags.size(); i++) { - vs.tags.emplace_back(std::move(chldsv.tags[i])); + if (!chldvs.tags.empty()) { + for (size_t i = 0; i < chldvs.tags.size(); i++) { + vs.tags.emplace_back(std::move(chldvs.tags[i])); } } - vs.sv_ = chldsv.sv_; + vs.sv_ = chldvs.sv_; vs.choice_count_ = opes_.size(); vs.choice_ = id; - if (!chldsv.tokens.empty()) { - for (size_t i = 0; i < chldsv.tokens.size(); i++) { - vs.tokens.emplace_back(std::move(chldsv.tokens[i])); + if (!chldvs.tokens.empty()) { + for (size_t i = 0; i < chldvs.tokens.size(); i++) { + vs.tokens.emplace_back(std::move(chldvs.tokens[i])); } } c.shift_capture_values(); @@ -1184,14 +1174,14 @@ public: size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, Context &c, std::any &dt) const override { - auto &chldsv = c.push(); + auto &chldvs = c.push(); c.push_capture_scope(); auto se = scope_exit([&]() { c.pop(); c.pop_capture_scope(); }); const auto &rule = *ope_; - auto len = rule.parse(s, n, chldsv, c, dt); + auto len = rule.parse(s, n, chldvs, c, dt); if (success(len)) { return 0; } else { @@ -1210,13 +1200,13 @@ public: size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, Context &c, std::any &dt) const override { - auto &chldsv = c.push(); + auto &chldvs = c.push(); c.push_capture_scope(); auto se = scope_exit([&]() { c.pop(); c.pop_capture_scope(); }); - auto len = ope_->parse(s, n, chldsv, c, dt); + auto len = ope_->parse(s, n, chldvs, c, dt); if (success(len)) { c.set_error_pos(s); return static_cast(-1); @@ -1429,9 +1419,9 @@ public: size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, Context &c, std::any &dt) const override { const auto &rule = *ope_; - auto &chldsv = c.push(); + auto &chldvs = c.push(); auto se = scope_exit([&]() { c.pop(); }); - return rule.parse(s, n, chldsv, c, dt); + return rule.parse(s, n, chldvs, c, dt); } void accept(Visitor &v) override; @@ -2408,6 +2398,10 @@ public: std::string name; const char *s_ = nullptr; + std::function + predicate; + size_t id = 0; Action action; std::function enter; @@ -2695,6 +2689,15 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs, c.rule_stack.push_back(outer_); auto len = ope_->parse(s, n, vs, c, dt); c.rule_stack.pop_back(); + + std::string msg; + if (outer_->predicate && !outer_->predicate(vs, dt, msg)) { + if (c.log && !msg.empty() && c.error_info.message_pos < s) { + c.error_info.message_pos = s; + c.error_info.message = msg; + } + len = static_cast(-1); + } return len; } @@ -2709,34 +2712,31 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs, if (outer_->leave) { outer_->leave(s, n, len, a_val, dt); } }); - auto &chldsv = c.push(); + auto &chldvs = c.push(); c.rule_stack.push_back(outer_); - len = ope_->parse(s, n, chldsv, c, dt); + len = ope_->parse(s, n, chldvs, c, dt); c.rule_stack.pop_back(); // Invoke action if (success(len)) { - chldsv.sv_ = std::string_view(s, len); - chldsv.name_ = outer_->name; + chldvs.sv_ = std::string_view(s, len); + chldvs.name_ = outer_->name; if (!dynamic_cast(ope_.get())) { - chldsv.choice_count_ = 0; - chldsv.choice_ = 0; + chldvs.choice_count_ = 0; + chldvs.choice_ = 0; } - try { - a_val = reduce(chldsv, dt); - } catch (const parse_error &e) { - if (c.log) { - if (e.what()) { - if (c.error_info.message_pos < s) { - c.error_info.message_pos = s; - c.error_info.message = e.what(); - } - } + std::string msg; + if (outer_->predicate && !outer_->predicate(chldvs, dt, msg)) { + if (c.log && !msg.empty() && c.error_info.message_pos < s) { + c.error_info.message_pos = s; + c.error_info.message = msg; } len = static_cast(-1); + } else { + a_val = reduce(chldvs, dt); } } }); diff --git a/test/test1.cc b/test/test1.cc index f3f1ef9..9a761c8 100644 --- a/test/test1.cc +++ b/test/test1.cc @@ -226,14 +226,17 @@ TEST(GeneralTest, enter_leave_handlers_test) { auto message = "should be upper case string..."; - parser["TOKEN"] = [&](const SemanticValues &vs, std::any &dt) { + parser["TOKEN"].predicate = [&](const SemanticValues &vs, const std::any &dt, + std::string &msg) { auto &require_upper_case = *std::any_cast(dt); if (require_upper_case) { const auto &s = vs.sv(); if (!std::all_of(s.begin(), s.end(), ::isupper)) { - throw parse_error(message); + msg = message; + return false; } } + return true; }; bool require_upper_case = false; diff --git a/test/test2.cc b/test/test2.cc index 0dfaf4f..71db796 100644 --- a/test/test2.cc +++ b/test/test2.cc @@ -837,9 +837,16 @@ TEST(PredicateTest, Semantic_predicate_test) { parser parser("NUMBER <- [0-9]+"); parser["NUMBER"] = [](const SemanticValues &vs) { - auto val = vs.token_to_number(); - if (val != 100) { throw parse_error("value error!!"); } - return val; + return vs.token_to_number(); + }; + + parser["NUMBER"].predicate = [](const SemanticValues &vs, + const std::any & /*dt*/, std::string &msg) { + if (vs.token_to_number() != 100) { + msg = "value error!!"; + return false; + } + return true; }; long val;