mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2025-01-22 13:25:30 +00:00
commit
86468ec00e
15
README.md
15
README.md
@ -25,10 +25,11 @@ The PEG syntax is well described on page 2 in the [document](http://www.brynosau
|
||||
* `$name<` ... `>` (Named capture operator)
|
||||
* `$name` (Backreference operator)
|
||||
* `|` (Dictionary operator)
|
||||
* `↑` (Cut operator)
|
||||
* `MACRO_NAME(` ... `)` (Parameterized rule or Macro)
|
||||
* `{ precedence L - + L / * }` (Parsing infix expression)
|
||||
* `%recovery(` ... `)` (Error recovery operator)
|
||||
* `exp^label` (Syntax sugar for `(exp / %recover(label))`)
|
||||
* `exp⇑label` (Syntax sugar for `(exp / %recover(label))`)
|
||||
* `label { message "..." }` (Error message instruction)
|
||||
|
||||
This library supports the linear-time parsing known as the [*Packrat*](http://pdos.csail.mit.edu/~baford/packrat/thesis/thesis.pdf) parsing.
|
||||
@ -324,6 +325,18 @@ START <- 'This month is ' MONTH '.'
|
||||
MONTH <- 'Jan' | 'January' | 'Feb' | 'February' | '...'
|
||||
```
|
||||
|
||||
Cut operator
|
||||
------------
|
||||
|
||||
`↑` operator could mitigate backtrack performance problem, but has a risk to change the meaning of grammar.
|
||||
|
||||
```peg
|
||||
S <- '(' ↑ P ')' / '"' ↑ P '"' / P
|
||||
P <- 'a' / 'b' / 'c'
|
||||
```
|
||||
|
||||
When we parse `(z` with the above grammar, we don't have to backtrack in `S` after `(` is matched because a cut operator is inserted there.
|
||||
|
||||
Parameterized Rule or Macro
|
||||
---------------------------
|
||||
|
||||
|
File diff suppressed because one or more lines are too long
BIN
docs/native.wasm
BIN
docs/native.wasm
Binary file not shown.
123
peglib.h
123
peglib.h
@ -767,6 +767,8 @@ public:
|
||||
std::vector<std::map<std::string_view, std::string>> capture_scope_stack;
|
||||
size_t capture_scope_stack_size = 0;
|
||||
|
||||
std::vector<bool> cut_stack;
|
||||
|
||||
const size_t def_count;
|
||||
const bool enablePackratParsing;
|
||||
std::vector<bool> cache_registered;
|
||||
@ -971,24 +973,33 @@ public:
|
||||
class PrioritizedChoice : public Ope {
|
||||
public:
|
||||
template <typename... Args>
|
||||
PrioritizedChoice(const Args &... args)
|
||||
: opes_{static_cast<std::shared_ptr<Ope>>(args)...} {}
|
||||
PrioritizedChoice(bool for_label, const Args &... args)
|
||||
: opes_{static_cast<std::shared_ptr<Ope>>(args)...},
|
||||
for_label_(for_label) {}
|
||||
PrioritizedChoice(const std::vector<std::shared_ptr<Ope>> &opes)
|
||||
: opes_(opes) {}
|
||||
PrioritizedChoice(std::vector<std::shared_ptr<Ope>> &&opes) : opes_(opes) {}
|
||||
|
||||
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
|
||||
std::any &dt) const override {
|
||||
size_t len = static_cast<size_t>(-1);
|
||||
|
||||
if (!for_label_) { c.cut_stack.push_back(false); }
|
||||
|
||||
size_t id = 0;
|
||||
for (const auto &ope : opes_) {
|
||||
if (!c.cut_stack.empty()) { c.cut_stack.back() = false; }
|
||||
|
||||
auto &chldsv = c.push();
|
||||
c.push_capture_scope();
|
||||
|
||||
auto se = scope_exit([&]() {
|
||||
c.pop();
|
||||
c.pop_capture_scope();
|
||||
});
|
||||
|
||||
auto len = ope->parse(s, n, chldsv, c, dt);
|
||||
len = ope->parse(s, n, chldsv, c, dt);
|
||||
|
||||
if (success(len)) {
|
||||
if (!chldsv.empty()) {
|
||||
for (size_t i = 0; i < chldsv.size(); i++) {
|
||||
@ -1008,14 +1019,18 @@ public:
|
||||
vs.tokens.emplace_back(std::move(chldsv.tokens[i]));
|
||||
}
|
||||
}
|
||||
|
||||
c.shift_capture_values();
|
||||
return len;
|
||||
break;
|
||||
} else if (!c.cut_stack.empty() && c.cut_stack.back()) {
|
||||
break;
|
||||
}
|
||||
|
||||
id++;
|
||||
}
|
||||
return static_cast<size_t>(-1);
|
||||
|
||||
if (!for_label_) { c.cut_stack.pop_back(); }
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
void accept(Visitor &v) override;
|
||||
@ -1023,6 +1038,7 @@ public:
|
||||
size_t size() const { return opes_.size(); }
|
||||
|
||||
std::vector<std::shared_ptr<Ope>> opes_;
|
||||
bool for_label_ = false;
|
||||
};
|
||||
|
||||
class Repetition : public Ope {
|
||||
@ -1501,6 +1517,17 @@ public:
|
||||
std::shared_ptr<Ope> ope_;
|
||||
};
|
||||
|
||||
class Cut : public Ope, public std::enable_shared_from_this<Cut> {
|
||||
public:
|
||||
size_t parse_core(const char * /*s*/, size_t /*n*/, SemanticValues & /*vs*/,
|
||||
Context &c, std::any & /*dt*/) const override {
|
||||
c.cut_stack.back() = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void accept(Visitor &v) override;
|
||||
};
|
||||
|
||||
/*
|
||||
* Factories
|
||||
*/
|
||||
@ -1510,7 +1537,12 @@ template <typename... Args> std::shared_ptr<Ope> seq(Args &&... args) {
|
||||
|
||||
template <typename... Args> std::shared_ptr<Ope> cho(Args &&... args) {
|
||||
return std::make_shared<PrioritizedChoice>(
|
||||
static_cast<std::shared_ptr<Ope>>(args)...);
|
||||
false, static_cast<std::shared_ptr<Ope>>(args)...);
|
||||
}
|
||||
|
||||
template <typename... Args> std::shared_ptr<Ope> cho4label_(Args &&... args) {
|
||||
return std::make_shared<PrioritizedChoice>(
|
||||
true, static_cast<std::shared_ptr<Ope>>(args)...);
|
||||
}
|
||||
|
||||
inline std::shared_ptr<Ope> zom(const std::shared_ptr<Ope> &ope) {
|
||||
@ -1623,6 +1655,8 @@ inline std::shared_ptr<Ope> rec(const std::shared_ptr<Ope> &ope) {
|
||||
return std::make_shared<Recovery>(ope);
|
||||
}
|
||||
|
||||
inline std::shared_ptr<Ope> cut() { return std::make_shared<Cut>(); }
|
||||
|
||||
/*
|
||||
* Visitor
|
||||
*/
|
||||
@ -1650,6 +1684,7 @@ struct Ope::Visitor {
|
||||
virtual void visit(BackReference &) {}
|
||||
virtual void visit(PrecedenceClimbing &) {}
|
||||
virtual void visit(Recovery &) {}
|
||||
virtual void visit(Cut &) {}
|
||||
};
|
||||
|
||||
struct IsReference : public Ope::Visitor {
|
||||
@ -1688,6 +1723,7 @@ struct TraceOpeName : public Ope::Visitor {
|
||||
void visit(BackReference &) override { name_ = "BackReference"; }
|
||||
void visit(PrecedenceClimbing &) override { name_ = "PrecedenceClimbing"; }
|
||||
void visit(Recovery &) override { name_ = "Recovery"; }
|
||||
void visit(Cut &) override { name_ = "Cut"; }
|
||||
|
||||
static std::string get(Ope &ope) {
|
||||
TraceOpeName vis;
|
||||
@ -1853,6 +1889,7 @@ struct DetectLeftRecursion : public Ope::Visitor {
|
||||
void visit(BackReference &) override { done_ = true; }
|
||||
void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
|
||||
void visit(Recovery &ope) override { ope.ope_->accept(*this); }
|
||||
void visit(Cut &) override { done_ = true; }
|
||||
|
||||
const char *error_s = nullptr;
|
||||
|
||||
@ -2119,6 +2156,7 @@ struct FindReference : public Ope::Visitor {
|
||||
ope.ope_->accept(*this);
|
||||
found_ope = rec(found_ope);
|
||||
}
|
||||
void visit(Cut &ope) override { found_ope = ope.shared_from_this(); }
|
||||
|
||||
std::shared_ptr<Ope> found_ope;
|
||||
|
||||
@ -2516,10 +2554,12 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
|
||||
try {
|
||||
a_val = reduce(chldsv, dt);
|
||||
} catch (const parse_error &e) {
|
||||
if (e.what()) {
|
||||
if (c.error_info.message_pos < s) {
|
||||
c.error_info.message_pos = s;
|
||||
c.error_info.message = e.what();
|
||||
if (c.log) {
|
||||
if (e.what()) {
|
||||
if (c.error_info.message_pos < s) {
|
||||
c.error_info.message_pos = s;
|
||||
c.error_info.message = e.what();
|
||||
}
|
||||
}
|
||||
}
|
||||
len = static_cast<size_t>(-1);
|
||||
@ -2696,31 +2736,49 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
|
||||
inline size_t Recovery::parse_core(const char *s, size_t n,
|
||||
SemanticValues & /*vs*/, Context &c,
|
||||
std::any & /*dt*/) const {
|
||||
auto save_log = c.log;
|
||||
c.log = nullptr;
|
||||
|
||||
const auto &rule = dynamic_cast<Reference &>(*ope_);
|
||||
|
||||
SemanticValues dummy_vs;
|
||||
std::any dummy_dt;
|
||||
auto len = rule.parse(s, n, dummy_vs, c, dummy_dt);
|
||||
// Custom error message
|
||||
if (c.log) {
|
||||
auto label = dynamic_cast<Reference *>(rule.args_[0].get());
|
||||
if (label) {
|
||||
if (!label->rule_->error_message.empty()) {
|
||||
c.error_info.message_pos = c.error_info.error_pos;
|
||||
c.error_info.message = label->rule_->error_message;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
c.log = save_log;
|
||||
// Recovery
|
||||
size_t len = static_cast<size_t>(-1);
|
||||
{
|
||||
auto save_log = c.log;
|
||||
c.log = nullptr;
|
||||
auto se = scope_exit([&]() { c.log = save_log; });
|
||||
|
||||
SemanticValues dummy_vs;
|
||||
std::any dummy_dt;
|
||||
|
||||
len = rule.parse(s, n, dummy_vs, c, dummy_dt);
|
||||
}
|
||||
|
||||
if (success(len)) {
|
||||
c.recovered = true;
|
||||
|
||||
if (c.log) {
|
||||
auto label = dynamic_cast<Reference *>(rule.args_[0].get());
|
||||
if (label) {
|
||||
if (!label->rule_->error_message.empty()) {
|
||||
c.error_info.message_pos = c.error_info.error_pos;
|
||||
c.error_info.message = label->rule_->error_message;
|
||||
}
|
||||
}
|
||||
c.error_info.output_log(c.log, c.s, c.l);
|
||||
c.error_info.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// Cut
|
||||
if (!c.cut_stack.empty()) {
|
||||
c.cut_stack.back() = true;
|
||||
|
||||
if (c.cut_stack.size() == 1) {
|
||||
// TODO: Remove unneeded entries in packrat memoise table
|
||||
}
|
||||
}
|
||||
c.error_info.clear();
|
||||
|
||||
return len;
|
||||
}
|
||||
@ -2747,6 +2805,7 @@ inline void Whitespace::accept(Visitor &v) { v.visit(*this); }
|
||||
inline void BackReference::accept(Visitor &v) { v.visit(*this); }
|
||||
inline void PrecedenceClimbing::accept(Visitor &v) { v.visit(*this); }
|
||||
inline void Recovery::accept(Visitor &v) { v.visit(*this); }
|
||||
inline void Cut::accept(Visitor &v) { v.visit(*this); }
|
||||
|
||||
inline void AssignIDToDefinition::visit(Holder &ope) {
|
||||
auto p = static_cast<void *>(ope.outer_);
|
||||
@ -2947,10 +3006,10 @@ private:
|
||||
seq(g["Ignore"], g["Identifier"], g["LEFTARROW"], g["Expression"],
|
||||
opt(g["Instruction"])));
|
||||
g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"])));
|
||||
g["Sequence"] <= zom(g["Prefix"]);
|
||||
g["Sequence"] <= zom(cho(g["CUT"], g["Prefix"]));
|
||||
g["Prefix"] <= seq(opt(cho(g["AND"], g["NOT"])), g["SuffixWithLabel"]);
|
||||
g["SuffixWithLabel"] <=
|
||||
seq(g["Suffix"], opt(seq(g["HAT"], g["Identifier"])));
|
||||
seq(g["Suffix"], opt(seq(g["LABEL"], g["Identifier"])));
|
||||
g["Suffix"] <= seq(g["Primary"], opt(g["Loop"]));
|
||||
g["Loop"] <= cho(g["QUESTION"], g["STAR"], g["PLUS"], g["Repetition"]);
|
||||
g["Primary"] <=
|
||||
@ -3023,7 +3082,6 @@ private:
|
||||
~g["PIPE"] <= seq(chr('|'), g["Spacing"]);
|
||||
g["AND"] <= seq(chr('&'), g["Spacing"]);
|
||||
g["NOT"] <= seq(chr('!'), g["Spacing"]);
|
||||
~g["HAT"] <= seq(chr('^'), g["Spacing"]);
|
||||
g["QUESTION"] <= seq(chr('?'), g["Spacing"]);
|
||||
g["STAR"] <= seq(chr('*'), g["Spacing"]);
|
||||
g["PLUS"] <= seq(chr('+'), g["Spacing"]);
|
||||
@ -3031,6 +3089,9 @@ private:
|
||||
~g["CLOSE"] <= seq(chr(')'), g["Spacing"]);
|
||||
g["DOT"] <= seq(chr('.'), g["Spacing"]);
|
||||
|
||||
g["CUT"] <= seq(lit(u8"↑"), g["Spacing"]);
|
||||
~g["LABEL"] <= seq(cho(chr('^'), lit(u8"⇑")), g["Spacing"]);
|
||||
|
||||
~g["Spacing"] <= zom(cho(g["Space"], g["Comment"]));
|
||||
g["Comment"] <=
|
||||
seq(chr('#'), zom(seq(npd(g["EndOfLine"]), dot())), g["EndOfLine"]);
|
||||
@ -3193,7 +3254,7 @@ private:
|
||||
auto label = ref(*data.grammar, ident, vs.sv().data(), false, {});
|
||||
auto recovery = rec(ref(*data.grammar, RECOVER_DEFINITION_NAME,
|
||||
vs.sv().data(), true, {label}));
|
||||
return cho(ope, recovery);
|
||||
return cho4label_(ope, recovery);
|
||||
}
|
||||
};
|
||||
|
||||
@ -3363,6 +3424,8 @@ private:
|
||||
|
||||
g["DOT"] = [](const SemanticValues & /*vs*/) { return dot(); };
|
||||
|
||||
g["CUT"] = [](const SemanticValues & /*vs*/) { return cut(); };
|
||||
|
||||
g["BeginCap"] = [](const SemanticValues &vs) { return vs.token(); };
|
||||
|
||||
g["BackRef"] = [&](const SemanticValues &vs) {
|
||||
|
@ -1239,7 +1239,7 @@ R"(+ START
|
||||
|
||||
TEST_CASE("Error recovery 2", "[error]") {
|
||||
parser pg(R"(
|
||||
START <- ENTRY ((',' ENTRY) / %recover((!(',' / Space) .)+))* (_ / %recover((!'!.' .)+))
|
||||
START <- ENTRY ((',' ENTRY) / %recover((!(',' / Space) .)+))* (_ / %recover(.*))
|
||||
ENTRY <- '[' ITEM (',' ITEM)* ']'
|
||||
ITEM <- WORD / NUM / %recover((!(',' / ']') .)+)
|
||||
NUM <- [0-9]+ ![a-z]
|
||||
@ -1259,7 +1259,7 @@ TEST_CASE("Error recovery 2", "[error]") {
|
||||
R"(1:38: syntax error, unexpected 'ddd', expecting <NUM>.)",
|
||||
R"(1:55: syntax error, unexpected ']', expecting <WORD>.)",
|
||||
R"(1:58: syntax error, unexpected '\n', expecting <NUM>.)",
|
||||
R"(1:56: syntax error, unexpected ',', expecting <Space>.)",
|
||||
R"(2:3: syntax error.)",
|
||||
};
|
||||
|
||||
size_t i = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user