mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2024-12-22 20:05:31 +00:00
Removed peg::parse_error
and introduced predicate
action
This commit is contained in:
parent
97e24e667e
commit
23c080af49
17
README.md
17
README.md
@ -216,17 +216,22 @@ peg::parser parser(R"(
|
|||||||
)");
|
)");
|
||||||
```
|
```
|
||||||
|
|
||||||
*Semantic predicate* support is available. We can do it by throwing a `peg::parse_error` exception in a semantic action.
|
*Semantic predicate* support is available with a *predicate* action.
|
||||||
|
|
||||||
```cpp
|
```cpp
|
||||||
peg::parser parser("NUMBER <- [0-9]+");
|
peg::parser parser("NUMBER <- [0-9]+");
|
||||||
|
|
||||||
parser["NUMBER"] = [](const SemanticValues& vs) {
|
parser["NUMBER"] = [](const SemanticValues &vs) {
|
||||||
auto val = vs.token_to_number<long>();
|
return vs.token_to_number<long>();
|
||||||
if (val != 100) {
|
};
|
||||||
throw peg::parse_error("value error!!");
|
|
||||||
|
parser["NUMBER"].predicate = [](const SemanticValues &vs,
|
||||||
|
const std::any & /*dt*/, std::string &msg) {
|
||||||
|
if (vs.token_to_number<long>() != 100) {
|
||||||
|
msg = "value error!!";
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
return val;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
long val;
|
long val;
|
||||||
|
@ -20,3 +20,6 @@ target_link_libraries(calc5 ${add_link_deps})
|
|||||||
|
|
||||||
add_executable(indent indent.cc)
|
add_executable(indent indent.cc)
|
||||||
target_link_libraries(indent ${add_link_deps})
|
target_link_libraries(indent ${add_link_deps})
|
||||||
|
|
||||||
|
add_executable(symbol_check symbol_check.cc)
|
||||||
|
target_link_libraries(symbol_check ${add_link_deps})
|
||||||
|
@ -39,8 +39,13 @@ Block <- Statements {}
|
|||||||
size_t /*matchlen*/, std::any & /*value*/,
|
size_t /*matchlen*/, std::any & /*value*/,
|
||||||
std::any & /*dt*/) { indent -= 2; };
|
std::any & /*dt*/) { indent -= 2; };
|
||||||
|
|
||||||
parser["Samedent"] = [&](const SemanticValues &vs, std::any & /*dt*/) {
|
parser["Samedent"].predicate =
|
||||||
if (indent != vs.sv().size()) { throw parse_error("different indent..."); }
|
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
|
||||||
|
if (indent != vs.sv().size()) {
|
||||||
|
msg = "different indent...";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
parser.enable_ast();
|
parser.enable_ast();
|
||||||
|
69
example/symbol_check.cc
Normal file
69
example/symbol_check.cc
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
//
|
||||||
|
// symbol_check.cc
|
||||||
|
//
|
||||||
|
// Copyright (c) 2022 Yuji Hirose. All rights reserved.
|
||||||
|
// MIT License
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <iostream>
|
||||||
|
#include <peglib.h>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
|
using namespace peg;
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
parser parser(R"(
|
||||||
|
S <- (Decl / Ref)*
|
||||||
|
Decl <- 'decl' symbol(Name)
|
||||||
|
Ref <- 'ref' symbol_reference(Name)
|
||||||
|
Name <- < [a-zA-Z]+ >
|
||||||
|
%whitespace <- [ \t\r\n]*
|
||||||
|
|
||||||
|
symbol(s) <- < s >
|
||||||
|
symbol_reference(s) <- < s >
|
||||||
|
)");
|
||||||
|
|
||||||
|
std::set<std::string> dic;
|
||||||
|
|
||||||
|
parser[R"(symbol)"].predicate =
|
||||||
|
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
|
||||||
|
auto tok = vs.token_to_string();
|
||||||
|
if (dic.find(tok) != dic.end()) {
|
||||||
|
msg = "'" + tok + "' already exists...";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
dic.insert(tok);
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
parser[R"(symbol_reference)"].predicate =
|
||||||
|
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
|
||||||
|
auto tok = vs.token_to_string();
|
||||||
|
if (dic.find(tok) == dic.end()) {
|
||||||
|
msg = "'" + tok + "' doesn't exists...";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
parser.enable_ast();
|
||||||
|
|
||||||
|
parser.log = [](size_t line, size_t col, const std::string &msg) {
|
||||||
|
std::cerr << line << ":" << col << ": " << msg << "\n";
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto source = R"(decl aaa
|
||||||
|
ref aaa
|
||||||
|
ref bbb
|
||||||
|
)";
|
||||||
|
|
||||||
|
std::shared_ptr<Ast> ast;
|
||||||
|
if (parser.parse(source, ast)) {
|
||||||
|
ast = parser.optimize_ast(ast);
|
||||||
|
std::cout << ast_to_s(ast);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
110
peglib.h
110
peglib.h
@ -631,16 +631,6 @@ private:
|
|||||||
Fty fn_;
|
Fty fn_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* Semantic predicate
|
|
||||||
*/
|
|
||||||
// Note: 'parse_error' exception class should be be used in sematic action
|
|
||||||
// handlers to reject the rule.
|
|
||||||
class parse_error : public std::runtime_error {
|
|
||||||
public:
|
|
||||||
parse_error(const char *what_arg) : std::runtime_error(what_arg) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Parse result helper
|
* Parse result helper
|
||||||
*/
|
*/
|
||||||
@ -996,29 +986,29 @@ public:
|
|||||||
|
|
||||||
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
|
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
|
||||||
std::any &dt) const override {
|
std::any &dt) const override {
|
||||||
auto &chldsv = c.push();
|
auto &chldvs = c.push();
|
||||||
auto pop_se = scope_exit([&]() { c.pop(); });
|
auto pop_se = scope_exit([&]() { c.pop(); });
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
for (const auto &ope : opes_) {
|
for (const auto &ope : opes_) {
|
||||||
const auto &rule = *ope;
|
const auto &rule = *ope;
|
||||||
auto len = rule.parse(s + i, n - i, chldsv, c, dt);
|
auto len = rule.parse(s + i, n - i, chldvs, c, dt);
|
||||||
if (fail(len)) { return len; }
|
if (fail(len)) { return len; }
|
||||||
i += len;
|
i += len;
|
||||||
}
|
}
|
||||||
if (!chldsv.empty()) {
|
if (!chldvs.empty()) {
|
||||||
for (size_t j = 0; j < chldsv.size(); j++) {
|
for (size_t j = 0; j < chldvs.size(); j++) {
|
||||||
vs.emplace_back(std::move(chldsv[j]));
|
vs.emplace_back(std::move(chldvs[j]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!chldsv.tags.empty()) {
|
if (!chldvs.tags.empty()) {
|
||||||
for (size_t j = 0; j < chldsv.tags.size(); j++) {
|
for (size_t j = 0; j < chldvs.tags.size(); j++) {
|
||||||
vs.tags.emplace_back(std::move(chldsv.tags[j]));
|
vs.tags.emplace_back(std::move(chldvs.tags[j]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
vs.sv_ = chldsv.sv_;
|
vs.sv_ = chldvs.sv_;
|
||||||
if (!chldsv.tokens.empty()) {
|
if (!chldvs.tokens.empty()) {
|
||||||
for (size_t j = 0; j < chldsv.tokens.size(); j++) {
|
for (size_t j = 0; j < chldvs.tokens.size(); j++) {
|
||||||
vs.tokens.emplace_back(std::move(chldsv.tokens[j]));
|
vs.tokens.emplace_back(std::move(chldvs.tokens[j]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return i;
|
return i;
|
||||||
@ -1049,7 +1039,7 @@ public:
|
|||||||
for (const auto &ope : opes_) {
|
for (const auto &ope : opes_) {
|
||||||
if (!c.cut_stack.empty()) { c.cut_stack.back() = false; }
|
if (!c.cut_stack.empty()) { c.cut_stack.back() = false; }
|
||||||
|
|
||||||
auto &chldsv = c.push();
|
auto &chldvs = c.push();
|
||||||
c.push_capture_scope();
|
c.push_capture_scope();
|
||||||
|
|
||||||
c.error_info.keep_previous_token = id > 0;
|
c.error_info.keep_previous_token = id > 0;
|
||||||
@ -1059,25 +1049,25 @@ public:
|
|||||||
c.error_info.keep_previous_token = false;
|
c.error_info.keep_previous_token = false;
|
||||||
});
|
});
|
||||||
|
|
||||||
len = ope->parse(s, n, chldsv, c, dt);
|
len = ope->parse(s, n, chldvs, c, dt);
|
||||||
|
|
||||||
if (success(len)) {
|
if (success(len)) {
|
||||||
if (!chldsv.empty()) {
|
if (!chldvs.empty()) {
|
||||||
for (size_t i = 0; i < chldsv.size(); i++) {
|
for (size_t i = 0; i < chldvs.size(); i++) {
|
||||||
vs.emplace_back(std::move(chldsv[i]));
|
vs.emplace_back(std::move(chldvs[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!chldsv.tags.empty()) {
|
if (!chldvs.tags.empty()) {
|
||||||
for (size_t i = 0; i < chldsv.tags.size(); i++) {
|
for (size_t i = 0; i < chldvs.tags.size(); i++) {
|
||||||
vs.tags.emplace_back(std::move(chldsv.tags[i]));
|
vs.tags.emplace_back(std::move(chldvs.tags[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
vs.sv_ = chldsv.sv_;
|
vs.sv_ = chldvs.sv_;
|
||||||
vs.choice_count_ = opes_.size();
|
vs.choice_count_ = opes_.size();
|
||||||
vs.choice_ = id;
|
vs.choice_ = id;
|
||||||
if (!chldsv.tokens.empty()) {
|
if (!chldvs.tokens.empty()) {
|
||||||
for (size_t i = 0; i < chldsv.tokens.size(); i++) {
|
for (size_t i = 0; i < chldvs.tokens.size(); i++) {
|
||||||
vs.tokens.emplace_back(std::move(chldsv.tokens[i]));
|
vs.tokens.emplace_back(std::move(chldvs.tokens[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
c.shift_capture_values();
|
c.shift_capture_values();
|
||||||
@ -1184,14 +1174,14 @@ public:
|
|||||||
|
|
||||||
size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
|
size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
|
||||||
Context &c, std::any &dt) const override {
|
Context &c, std::any &dt) const override {
|
||||||
auto &chldsv = c.push();
|
auto &chldvs = c.push();
|
||||||
c.push_capture_scope();
|
c.push_capture_scope();
|
||||||
auto se = scope_exit([&]() {
|
auto se = scope_exit([&]() {
|
||||||
c.pop();
|
c.pop();
|
||||||
c.pop_capture_scope();
|
c.pop_capture_scope();
|
||||||
});
|
});
|
||||||
const auto &rule = *ope_;
|
const auto &rule = *ope_;
|
||||||
auto len = rule.parse(s, n, chldsv, c, dt);
|
auto len = rule.parse(s, n, chldvs, c, dt);
|
||||||
if (success(len)) {
|
if (success(len)) {
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
@ -1210,13 +1200,13 @@ public:
|
|||||||
|
|
||||||
size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
|
size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
|
||||||
Context &c, std::any &dt) const override {
|
Context &c, std::any &dt) const override {
|
||||||
auto &chldsv = c.push();
|
auto &chldvs = c.push();
|
||||||
c.push_capture_scope();
|
c.push_capture_scope();
|
||||||
auto se = scope_exit([&]() {
|
auto se = scope_exit([&]() {
|
||||||
c.pop();
|
c.pop();
|
||||||
c.pop_capture_scope();
|
c.pop_capture_scope();
|
||||||
});
|
});
|
||||||
auto len = ope_->parse(s, n, chldsv, c, dt);
|
auto len = ope_->parse(s, n, chldvs, c, dt);
|
||||||
if (success(len)) {
|
if (success(len)) {
|
||||||
c.set_error_pos(s);
|
c.set_error_pos(s);
|
||||||
return static_cast<size_t>(-1);
|
return static_cast<size_t>(-1);
|
||||||
@ -1429,9 +1419,9 @@ public:
|
|||||||
size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
|
size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
|
||||||
Context &c, std::any &dt) const override {
|
Context &c, std::any &dt) const override {
|
||||||
const auto &rule = *ope_;
|
const auto &rule = *ope_;
|
||||||
auto &chldsv = c.push();
|
auto &chldvs = c.push();
|
||||||
auto se = scope_exit([&]() { c.pop(); });
|
auto se = scope_exit([&]() { c.pop(); });
|
||||||
return rule.parse(s, n, chldsv, c, dt);
|
return rule.parse(s, n, chldvs, c, dt);
|
||||||
}
|
}
|
||||||
|
|
||||||
void accept(Visitor &v) override;
|
void accept(Visitor &v) override;
|
||||||
@ -2408,6 +2398,10 @@ public:
|
|||||||
std::string name;
|
std::string name;
|
||||||
const char *s_ = nullptr;
|
const char *s_ = nullptr;
|
||||||
|
|
||||||
|
std::function<bool(const SemanticValues &vs, const std::any &dt,
|
||||||
|
std::string &msg)>
|
||||||
|
predicate;
|
||||||
|
|
||||||
size_t id = 0;
|
size_t id = 0;
|
||||||
Action action;
|
Action action;
|
||||||
std::function<void(const char *s, size_t n, std::any &dt)> enter;
|
std::function<void(const char *s, size_t n, std::any &dt)> enter;
|
||||||
@ -2695,6 +2689,15 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
|
|||||||
c.rule_stack.push_back(outer_);
|
c.rule_stack.push_back(outer_);
|
||||||
auto len = ope_->parse(s, n, vs, c, dt);
|
auto len = ope_->parse(s, n, vs, c, dt);
|
||||||
c.rule_stack.pop_back();
|
c.rule_stack.pop_back();
|
||||||
|
|
||||||
|
std::string msg;
|
||||||
|
if (outer_->predicate && !outer_->predicate(vs, dt, msg)) {
|
||||||
|
if (c.log && !msg.empty() && c.error_info.message_pos < s) {
|
||||||
|
c.error_info.message_pos = s;
|
||||||
|
c.error_info.message = msg;
|
||||||
|
}
|
||||||
|
len = static_cast<size_t>(-1);
|
||||||
|
}
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2709,34 +2712,31 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
|
|||||||
if (outer_->leave) { outer_->leave(s, n, len, a_val, dt); }
|
if (outer_->leave) { outer_->leave(s, n, len, a_val, dt); }
|
||||||
});
|
});
|
||||||
|
|
||||||
auto &chldsv = c.push();
|
auto &chldvs = c.push();
|
||||||
|
|
||||||
c.rule_stack.push_back(outer_);
|
c.rule_stack.push_back(outer_);
|
||||||
len = ope_->parse(s, n, chldsv, c, dt);
|
len = ope_->parse(s, n, chldvs, c, dt);
|
||||||
c.rule_stack.pop_back();
|
c.rule_stack.pop_back();
|
||||||
|
|
||||||
// Invoke action
|
// Invoke action
|
||||||
if (success(len)) {
|
if (success(len)) {
|
||||||
chldsv.sv_ = std::string_view(s, len);
|
chldvs.sv_ = std::string_view(s, len);
|
||||||
chldsv.name_ = outer_->name;
|
chldvs.name_ = outer_->name;
|
||||||
|
|
||||||
if (!dynamic_cast<const peg::PrioritizedChoice *>(ope_.get())) {
|
if (!dynamic_cast<const peg::PrioritizedChoice *>(ope_.get())) {
|
||||||
chldsv.choice_count_ = 0;
|
chldvs.choice_count_ = 0;
|
||||||
chldsv.choice_ = 0;
|
chldvs.choice_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
std::string msg;
|
||||||
a_val = reduce(chldsv, dt);
|
if (outer_->predicate && !outer_->predicate(chldvs, dt, msg)) {
|
||||||
} catch (const parse_error &e) {
|
if (c.log && !msg.empty() && c.error_info.message_pos < s) {
|
||||||
if (c.log) {
|
|
||||||
if (e.what()) {
|
|
||||||
if (c.error_info.message_pos < s) {
|
|
||||||
c.error_info.message_pos = s;
|
c.error_info.message_pos = s;
|
||||||
c.error_info.message = e.what();
|
c.error_info.message = msg;
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
len = static_cast<size_t>(-1);
|
len = static_cast<size_t>(-1);
|
||||||
|
} else {
|
||||||
|
a_val = reduce(chldvs, dt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -226,14 +226,17 @@ TEST(GeneralTest, enter_leave_handlers_test) {
|
|||||||
|
|
||||||
auto message = "should be upper case string...";
|
auto message = "should be upper case string...";
|
||||||
|
|
||||||
parser["TOKEN"] = [&](const SemanticValues &vs, std::any &dt) {
|
parser["TOKEN"].predicate = [&](const SemanticValues &vs, const std::any &dt,
|
||||||
|
std::string &msg) {
|
||||||
auto &require_upper_case = *std::any_cast<bool *>(dt);
|
auto &require_upper_case = *std::any_cast<bool *>(dt);
|
||||||
if (require_upper_case) {
|
if (require_upper_case) {
|
||||||
const auto &s = vs.sv();
|
const auto &s = vs.sv();
|
||||||
if (!std::all_of(s.begin(), s.end(), ::isupper)) {
|
if (!std::all_of(s.begin(), s.end(), ::isupper)) {
|
||||||
throw parse_error(message);
|
msg = message;
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool require_upper_case = false;
|
bool require_upper_case = false;
|
||||||
|
@ -837,9 +837,16 @@ TEST(PredicateTest, Semantic_predicate_test) {
|
|||||||
parser parser("NUMBER <- [0-9]+");
|
parser parser("NUMBER <- [0-9]+");
|
||||||
|
|
||||||
parser["NUMBER"] = [](const SemanticValues &vs) {
|
parser["NUMBER"] = [](const SemanticValues &vs) {
|
||||||
auto val = vs.token_to_number<long>();
|
return vs.token_to_number<long>();
|
||||||
if (val != 100) { throw parse_error("value error!!"); }
|
};
|
||||||
return val;
|
|
||||||
|
parser["NUMBER"].predicate = [](const SemanticValues &vs,
|
||||||
|
const std::any & /*dt*/, std::string &msg) {
|
||||||
|
if (vs.token_to_number<long>() != 100) {
|
||||||
|
msg = "value error!!";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
long val;
|
long val;
|
||||||
|
Loading…
Reference in New Issue
Block a user