Removed peg::parse_error and introduced predicate action

This commit is contained in:
yhirose 2022-06-30 19:58:23 -04:00
parent 97e24e667e
commit 23c080af49
7 changed files with 162 additions and 70 deletions

View File

@ -216,17 +216,22 @@ peg::parser parser(R"(
)"); )");
``` ```
*Semantic predicate* support is available. We can do it by throwing a `peg::parse_error` exception in a semantic action. *Semantic predicate* support is available with a *predicate* action.
```cpp ```cpp
peg::parser parser("NUMBER <- [0-9]+"); peg::parser parser("NUMBER <- [0-9]+");
parser["NUMBER"] = [](const SemanticValues &vs) { parser["NUMBER"] = [](const SemanticValues &vs) {
auto val = vs.token_to_number<long>(); return vs.token_to_number<long>();
if (val != 100) { };
throw peg::parse_error("value error!!");
parser["NUMBER"].predicate = [](const SemanticValues &vs,
const std::any & /*dt*/, std::string &msg) {
if (vs.token_to_number<long>() != 100) {
msg = "value error!!";
return false;
} }
return val; return true;
}; };
long val; long val;

View File

@ -20,3 +20,6 @@ target_link_libraries(calc5 ${add_link_deps})
add_executable(indent indent.cc) add_executable(indent indent.cc)
target_link_libraries(indent ${add_link_deps}) target_link_libraries(indent ${add_link_deps})
add_executable(symbol_check symbol_check.cc)
target_link_libraries(symbol_check ${add_link_deps})

View File

@ -39,8 +39,13 @@ Block <- Statements {}
size_t /*matchlen*/, std::any & /*value*/, size_t /*matchlen*/, std::any & /*value*/,
std::any & /*dt*/) { indent -= 2; }; std::any & /*dt*/) { indent -= 2; };
parser["Samedent"] = [&](const SemanticValues &vs, std::any & /*dt*/) { parser["Samedent"].predicate =
if (indent != vs.sv().size()) { throw parse_error("different indent..."); } [&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
if (indent != vs.sv().size()) {
msg = "different indent...";
return false;
}
return true;
}; };
parser.enable_ast(); parser.enable_ast();

69
example/symbol_check.cc Normal file
View File

@ -0,0 +1,69 @@
//
// symbol_check.cc
//
// Copyright (c) 2022 Yuji Hirose. All rights reserved.
// MIT License
//
#include <cstdlib>
#include <iostream>
#include <peglib.h>
#include <set>
using namespace peg;
int main(void) {
parser parser(R"(
S <- (Decl / Ref)*
Decl <- 'decl' symbol(Name)
Ref <- 'ref' symbol_reference(Name)
Name <- < [a-zA-Z]+ >
%whitespace <- [ \t\r\n]*
symbol(s) <- < s >
symbol_reference(s) <- < s >
)");
std::set<std::string> dic;
parser[R"(symbol)"].predicate =
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
auto tok = vs.token_to_string();
if (dic.find(tok) != dic.end()) {
msg = "'" + tok + "' already exists...";
return false;
}
dic.insert(tok);
return true;
};
parser[R"(symbol_reference)"].predicate =
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
auto tok = vs.token_to_string();
if (dic.find(tok) == dic.end()) {
msg = "'" + tok + "' doesn't exists...";
return false;
}
return true;
};
parser.enable_ast();
parser.log = [](size_t line, size_t col, const std::string &msg) {
std::cerr << line << ":" << col << ": " << msg << "\n";
};
const auto source = R"(decl aaa
ref aaa
ref bbb
)";
std::shared_ptr<Ast> ast;
if (parser.parse(source, ast)) {
ast = parser.optimize_ast(ast);
std::cout << ast_to_s(ast);
return 0;
}
return -1;
}

110
peglib.h
View File

@ -631,16 +631,6 @@ private:
Fty fn_; Fty fn_;
}; };
/*
* Semantic predicate
*/
// Note: 'parse_error' exception class should be be used in sematic action
// handlers to reject the rule.
class parse_error : public std::runtime_error {
public:
parse_error(const char *what_arg) : std::runtime_error(what_arg) {}
};
/* /*
* Parse result helper * Parse result helper
*/ */
@ -996,29 +986,29 @@ public:
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
std::any &dt) const override { std::any &dt) const override {
auto &chldsv = c.push(); auto &chldvs = c.push();
auto pop_se = scope_exit([&]() { c.pop(); }); auto pop_se = scope_exit([&]() { c.pop(); });
size_t i = 0; size_t i = 0;
for (const auto &ope : opes_) { for (const auto &ope : opes_) {
const auto &rule = *ope; const auto &rule = *ope;
auto len = rule.parse(s + i, n - i, chldsv, c, dt); auto len = rule.parse(s + i, n - i, chldvs, c, dt);
if (fail(len)) { return len; } if (fail(len)) { return len; }
i += len; i += len;
} }
if (!chldsv.empty()) { if (!chldvs.empty()) {
for (size_t j = 0; j < chldsv.size(); j++) { for (size_t j = 0; j < chldvs.size(); j++) {
vs.emplace_back(std::move(chldsv[j])); vs.emplace_back(std::move(chldvs[j]));
} }
} }
if (!chldsv.tags.empty()) { if (!chldvs.tags.empty()) {
for (size_t j = 0; j < chldsv.tags.size(); j++) { for (size_t j = 0; j < chldvs.tags.size(); j++) {
vs.tags.emplace_back(std::move(chldsv.tags[j])); vs.tags.emplace_back(std::move(chldvs.tags[j]));
} }
} }
vs.sv_ = chldsv.sv_; vs.sv_ = chldvs.sv_;
if (!chldsv.tokens.empty()) { if (!chldvs.tokens.empty()) {
for (size_t j = 0; j < chldsv.tokens.size(); j++) { for (size_t j = 0; j < chldvs.tokens.size(); j++) {
vs.tokens.emplace_back(std::move(chldsv.tokens[j])); vs.tokens.emplace_back(std::move(chldvs.tokens[j]));
} }
} }
return i; return i;
@ -1049,7 +1039,7 @@ public:
for (const auto &ope : opes_) { for (const auto &ope : opes_) {
if (!c.cut_stack.empty()) { c.cut_stack.back() = false; } if (!c.cut_stack.empty()) { c.cut_stack.back() = false; }
auto &chldsv = c.push(); auto &chldvs = c.push();
c.push_capture_scope(); c.push_capture_scope();
c.error_info.keep_previous_token = id > 0; c.error_info.keep_previous_token = id > 0;
@ -1059,25 +1049,25 @@ public:
c.error_info.keep_previous_token = false; c.error_info.keep_previous_token = false;
}); });
len = ope->parse(s, n, chldsv, c, dt); len = ope->parse(s, n, chldvs, c, dt);
if (success(len)) { if (success(len)) {
if (!chldsv.empty()) { if (!chldvs.empty()) {
for (size_t i = 0; i < chldsv.size(); i++) { for (size_t i = 0; i < chldvs.size(); i++) {
vs.emplace_back(std::move(chldsv[i])); vs.emplace_back(std::move(chldvs[i]));
} }
} }
if (!chldsv.tags.empty()) { if (!chldvs.tags.empty()) {
for (size_t i = 0; i < chldsv.tags.size(); i++) { for (size_t i = 0; i < chldvs.tags.size(); i++) {
vs.tags.emplace_back(std::move(chldsv.tags[i])); vs.tags.emplace_back(std::move(chldvs.tags[i]));
} }
} }
vs.sv_ = chldsv.sv_; vs.sv_ = chldvs.sv_;
vs.choice_count_ = opes_.size(); vs.choice_count_ = opes_.size();
vs.choice_ = id; vs.choice_ = id;
if (!chldsv.tokens.empty()) { if (!chldvs.tokens.empty()) {
for (size_t i = 0; i < chldsv.tokens.size(); i++) { for (size_t i = 0; i < chldvs.tokens.size(); i++) {
vs.tokens.emplace_back(std::move(chldsv.tokens[i])); vs.tokens.emplace_back(std::move(chldvs.tokens[i]));
} }
} }
c.shift_capture_values(); c.shift_capture_values();
@ -1184,14 +1174,14 @@ public:
size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
Context &c, std::any &dt) const override { Context &c, std::any &dt) const override {
auto &chldsv = c.push(); auto &chldvs = c.push();
c.push_capture_scope(); c.push_capture_scope();
auto se = scope_exit([&]() { auto se = scope_exit([&]() {
c.pop(); c.pop();
c.pop_capture_scope(); c.pop_capture_scope();
}); });
const auto &rule = *ope_; const auto &rule = *ope_;
auto len = rule.parse(s, n, chldsv, c, dt); auto len = rule.parse(s, n, chldvs, c, dt);
if (success(len)) { if (success(len)) {
return 0; return 0;
} else { } else {
@ -1210,13 +1200,13 @@ public:
size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
Context &c, std::any &dt) const override { Context &c, std::any &dt) const override {
auto &chldsv = c.push(); auto &chldvs = c.push();
c.push_capture_scope(); c.push_capture_scope();
auto se = scope_exit([&]() { auto se = scope_exit([&]() {
c.pop(); c.pop();
c.pop_capture_scope(); c.pop_capture_scope();
}); });
auto len = ope_->parse(s, n, chldsv, c, dt); auto len = ope_->parse(s, n, chldvs, c, dt);
if (success(len)) { if (success(len)) {
c.set_error_pos(s); c.set_error_pos(s);
return static_cast<size_t>(-1); return static_cast<size_t>(-1);
@ -1429,9 +1419,9 @@ public:
size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
Context &c, std::any &dt) const override { Context &c, std::any &dt) const override {
const auto &rule = *ope_; const auto &rule = *ope_;
auto &chldsv = c.push(); auto &chldvs = c.push();
auto se = scope_exit([&]() { c.pop(); }); auto se = scope_exit([&]() { c.pop(); });
return rule.parse(s, n, chldsv, c, dt); return rule.parse(s, n, chldvs, c, dt);
} }
void accept(Visitor &v) override; void accept(Visitor &v) override;
@ -2408,6 +2398,10 @@ public:
std::string name; std::string name;
const char *s_ = nullptr; const char *s_ = nullptr;
std::function<bool(const SemanticValues &vs, const std::any &dt,
std::string &msg)>
predicate;
size_t id = 0; size_t id = 0;
Action action; Action action;
std::function<void(const char *s, size_t n, std::any &dt)> enter; std::function<void(const char *s, size_t n, std::any &dt)> enter;
@ -2695,6 +2689,15 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
c.rule_stack.push_back(outer_); c.rule_stack.push_back(outer_);
auto len = ope_->parse(s, n, vs, c, dt); auto len = ope_->parse(s, n, vs, c, dt);
c.rule_stack.pop_back(); c.rule_stack.pop_back();
std::string msg;
if (outer_->predicate && !outer_->predicate(vs, dt, msg)) {
if (c.log && !msg.empty() && c.error_info.message_pos < s) {
c.error_info.message_pos = s;
c.error_info.message = msg;
}
len = static_cast<size_t>(-1);
}
return len; return len;
} }
@ -2709,34 +2712,31 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
if (outer_->leave) { outer_->leave(s, n, len, a_val, dt); } if (outer_->leave) { outer_->leave(s, n, len, a_val, dt); }
}); });
auto &chldsv = c.push(); auto &chldvs = c.push();
c.rule_stack.push_back(outer_); c.rule_stack.push_back(outer_);
len = ope_->parse(s, n, chldsv, c, dt); len = ope_->parse(s, n, chldvs, c, dt);
c.rule_stack.pop_back(); c.rule_stack.pop_back();
// Invoke action // Invoke action
if (success(len)) { if (success(len)) {
chldsv.sv_ = std::string_view(s, len); chldvs.sv_ = std::string_view(s, len);
chldsv.name_ = outer_->name; chldvs.name_ = outer_->name;
if (!dynamic_cast<const peg::PrioritizedChoice *>(ope_.get())) { if (!dynamic_cast<const peg::PrioritizedChoice *>(ope_.get())) {
chldsv.choice_count_ = 0; chldvs.choice_count_ = 0;
chldsv.choice_ = 0; chldvs.choice_ = 0;
} }
try { std::string msg;
a_val = reduce(chldsv, dt); if (outer_->predicate && !outer_->predicate(chldvs, dt, msg)) {
} catch (const parse_error &e) { if (c.log && !msg.empty() && c.error_info.message_pos < s) {
if (c.log) {
if (e.what()) {
if (c.error_info.message_pos < s) {
c.error_info.message_pos = s; c.error_info.message_pos = s;
c.error_info.message = e.what(); c.error_info.message = msg;
}
}
} }
len = static_cast<size_t>(-1); len = static_cast<size_t>(-1);
} else {
a_val = reduce(chldvs, dt);
} }
} }
}); });

View File

@ -226,14 +226,17 @@ TEST(GeneralTest, enter_leave_handlers_test) {
auto message = "should be upper case string..."; auto message = "should be upper case string...";
parser["TOKEN"] = [&](const SemanticValues &vs, std::any &dt) { parser["TOKEN"].predicate = [&](const SemanticValues &vs, const std::any &dt,
std::string &msg) {
auto &require_upper_case = *std::any_cast<bool *>(dt); auto &require_upper_case = *std::any_cast<bool *>(dt);
if (require_upper_case) { if (require_upper_case) {
const auto &s = vs.sv(); const auto &s = vs.sv();
if (!std::all_of(s.begin(), s.end(), ::isupper)) { if (!std::all_of(s.begin(), s.end(), ::isupper)) {
throw parse_error(message); msg = message;
return false;
} }
} }
return true;
}; };
bool require_upper_case = false; bool require_upper_case = false;

View File

@ -837,9 +837,16 @@ TEST(PredicateTest, Semantic_predicate_test) {
parser parser("NUMBER <- [0-9]+"); parser parser("NUMBER <- [0-9]+");
parser["NUMBER"] = [](const SemanticValues &vs) { parser["NUMBER"] = [](const SemanticValues &vs) {
auto val = vs.token_to_number<long>(); return vs.token_to_number<long>();
if (val != 100) { throw parse_error("value error!!"); } };
return val;
parser["NUMBER"].predicate = [](const SemanticValues &vs,
const std::any & /*dt*/, std::string &msg) {
if (vs.token_to_number<long>() != 100) {
msg = "value error!!";
return false;
}
return true;
}; };
long val; long val;