Support expression parsing in macro

This commit is contained in:
yhirose 2020-02-07 15:50:06 -05:00
parent 4b25458012
commit 375794e344
3 changed files with 232 additions and 62 deletions

View File

@ -347,40 +347,54 @@ Parsing expressions by precedence climbing altorithm
*cpp-peglib* supports [operator-precedence parsering](https://en.wikipedia.org/wiki/Operator-precedence_parser) by [**precedence climbing algorithm**](https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing) *cpp-peglib* supports [operator-precedence parsering](https://en.wikipedia.org/wiki/Operator-precedence_parser) by [**precedence climbing algorithm**](https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing)
```cpp ```cpp
parser parser(R"( parser parser(R"(
EXPRESSION <- ATOM (OPERATOR ATOM)* { EXPRESSION <- PRECEDENCE_PARSING(ATOM, OPERATOR)
precedence PRECEDENCE_PARSING(A, O) <- A (O A)* {
L - + precedence
L / * L + -
} L * /
ATOM <- NUMBER / '(' EXPRESSION ')' }
OPERATOR <- < [-+/*] > ATOM <- NUMBER / '(' EXPRESSION ')'
NUMBER <- < '-'? [0-9]+ > OPERATOR <- < [-+/*] >
%whitespace <- [ \t\r\n]* NUMBER <- < '-'? [0-9]+ >
)"); %whitespace <- [ \t]*
)");
parser["EXPRESSION"] = [](const SemanticValues& sv) -> long { parser["PRECEDENCE_PARSING"] = [](const SemanticValues& sv) -> long {
auto result = any_cast<long>(sv[0]); auto result = any_cast<long>(sv[0]);
if (sv.size() > 1) { if (sv.size() > 1) {
auto ope = any_cast<char>(sv[1]); auto ope = any_cast<char>(sv[1]);
auto num = any_cast<long>(sv[2]); auto num = any_cast<long>(sv[2]);
switch (ope) { switch (ope) {
case '+': result += num; break; case '+': result += num; break;
case '-': result -= num; break; case '-': result -= num; break;
case '*': result *= num; break; case '*': result *= num; break;
case '/': result /= num; break; case '/': result /= num; break;
} }
} }
return result; return result;
}; };
parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); };
long val; long val;
parser.parse(" -1 + (1 + 2) * 3 - -1", val); parser.parse(" -1 + (1 + 2) * 3 - -1", val);
assert(val == 9); assert(val == 9);
``` ```
*precedence* instruction can be applied only to the following 'list' style rule.
```
R <- A (B A)* {
precedence
L - +
L / *
R ^
}
```
*precedence* instruction contains precedence info entries. Each entry starts with *associativity* which is 'L' (left) or 'R' (right), then operator tokens follow. The first entry has the highest order.
AST generation AST generation
-------------- --------------

102
peglib.h
View File

@ -707,8 +707,7 @@ private:
} }
template <typename F, typename R> template <typename F, typename R>
Fty make_adaptor(F fn, Fty make_adaptor(F fn, R (F::*)(const SemanticValues &sv, any &dt) const) {
R (F::*)(const SemanticValues &sv, any &dt) const) {
return TypeAdaptor_csv_dt<R>(fn); return TypeAdaptor_csv_dt<R>(fn);
} }
@ -1536,8 +1535,8 @@ public:
PrecedenceClimbing(const std::shared_ptr<Ope> &atom, PrecedenceClimbing(const std::shared_ptr<Ope> &atom,
const std::shared_ptr<Ope> &binop, const BinOpeInfo &info, const std::shared_ptr<Ope> &binop, const BinOpeInfo &info,
const Action &action) const Definition &rule)
: atom_(atom), binop_(binop), info_(info), action_(action) {} : atom_(atom), binop_(binop), info_(info), rule_(rule) {}
size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c, size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c,
any &dt) const override { any &dt) const override {
@ -1549,11 +1548,13 @@ public:
std::shared_ptr<Ope> atom_; std::shared_ptr<Ope> atom_;
std::shared_ptr<Ope> binop_; std::shared_ptr<Ope> binop_;
BinOpeInfo info_; BinOpeInfo info_;
const Action &action_; const Definition &rule_;
private: private:
size_t parse_expression(const char *s, size_t n, SemanticValues &sv, size_t parse_expression(const char *s, size_t n, SemanticValues &sv,
Context &c, any &dt, size_t min_prec) const; Context &c, any &dt, size_t min_prec) const;
Definition &get_reference_for_binop(Context &c) const;
}; };
/* /*
@ -1660,8 +1661,8 @@ inline std::shared_ptr<Ope> bkr(const std::string &name) {
inline std::shared_ptr<Ope> pre(const std::shared_ptr<Ope> &atom, inline std::shared_ptr<Ope> pre(const std::shared_ptr<Ope> &atom,
const std::shared_ptr<Ope> &binop, const std::shared_ptr<Ope> &binop,
const PrecedenceClimbing::BinOpeInfo &info, const PrecedenceClimbing::BinOpeInfo &info,
const Action &action) { const Definition &rule) {
return std::make_shared<PrecedenceClimbing>(atom, binop, info, action); return std::make_shared<PrecedenceClimbing>(atom, binop, info, rule);
} }
/* /*
@ -2281,6 +2282,8 @@ public:
} }
std::string name; std::string name;
const char *s = nullptr;
;
size_t id = 0; size_t id = 0;
Action action; Action action;
std::function<void(const char *s, size_t n, any &dt)> enter; std::function<void(const char *s, size_t n, any &dt)> enter;
@ -2584,6 +2587,18 @@ inline size_t BackReference::parse_core(const char *s, size_t n,
throw std::runtime_error("Invalid back reference..."); throw std::runtime_error("Invalid back reference...");
} }
inline Definition& PrecedenceClimbing::get_reference_for_binop(Context &c) const {
if (rule_.is_macro) {
// Reference parameter in macro
const auto &args = c.top_args();
auto iarg = dynamic_cast<Reference &>(*binop_).iarg_;
auto arg = args[iarg];
return *dynamic_cast<Reference &>(*arg).rule_;
}
return *dynamic_cast<Reference &>(*binop_).rule_;
}
inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n, inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
SemanticValues &sv, SemanticValues &sv,
Context &c, any &dt, Context &c, any &dt,
@ -2592,10 +2607,11 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
if (fail(len)) { return len; } if (fail(len)) { return len; }
std::string tok; std::string tok;
auto &rule = dynamic_cast<Reference &>(*binop_).rule_; //auto &rule = dynamic_cast<Reference &>(*binop_).rule_;
auto action = rule->action; auto &rule = get_reference_for_binop(c);
auto action = rule.action;
rule->action = [&](SemanticValues &sv, any &dt) -> any { rule.action = [&](SemanticValues &sv, any &dt) -> any {
tok = sv.token(); tok = sv.token();
if (action) { if (action) {
return action(sv, dt); return action(sv, dt);
@ -2604,7 +2620,7 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
} }
return any(); return any();
}; };
auto action_se = make_scope_exit([&]() { rule->action = action; }); auto action_se = make_scope_exit([&]() { rule.action = action; });
auto save_error_pos = c.error_pos; auto save_error_pos = c.error_pos;
@ -2651,10 +2667,10 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
i += chl; i += chl;
any val; any val;
if (action_) { if (rule_.action) {
sv.s_ = s; sv.s_ = s;
sv.n_ = i; sv.n_ = i;
val = action_(sv, dt); val = rule_.action(sv, dt);
} else if (!sv.empty()) { } else if (!sv.empty()) {
val = sv[0]; val = sv[0];
} }
@ -3018,6 +3034,7 @@ private:
auto &rule = grammar[name]; auto &rule = grammar[name];
rule <= ope; rule <= ope;
rule.name = name; rule.name = name;
rule.s = sv.c_str();
rule.ignoreSemanticValue = ignore; rule.ignoreSemanticValue = ignore;
rule.is_macro = is_macro; rule.is_macro = is_macro;
rule.params = params; rule.params = params;
@ -3227,6 +3244,45 @@ private:
g["PrecedenceAssoc"] = [](const SemanticValues &sv) { return sv.token(); }; g["PrecedenceAssoc"] = [](const SemanticValues &sv) { return sv.token(); };
} }
bool apply_precedence_instruction(Definition &rule,
const PrecedenceClimbing::BinOpeInfo &info,
const char *s, Log log) {
try {
auto &seq = dynamic_cast<Sequence &>(*rule.get_core_operator());
auto atom = seq.opes_[0];
auto &seq1 = dynamic_cast<Sequence &>(
*dynamic_cast<ZeroOrMore &>(*seq.opes_[1]).ope_);
auto binop = seq1.opes_[0];
auto atom1 = seq1.opes_[1];
auto atom_name = dynamic_cast<Reference &>(*atom).name_;
auto binop_name = dynamic_cast<Reference &>(*binop).name_;
auto atom1_name = dynamic_cast<Reference &>(*atom1).name_;
if (atom_name != atom1_name || atom_name == binop_name) {
if (log) {
auto line = line_info(s, rule.s);
log(line.first, line.second,
"'precedence' instruction cannt be applied to '" + rule.name +
"'.");
}
return false;
}
rule.holder_->ope_ = pre(atom, binop, info, rule);
rule.disable_action = true;
} catch (...) {
if (log) {
auto line = line_info(s, rule.s);
log(line.first, line.second,
"'precedence' instruction cannt be applied to '" + rule.name +
"'.");
}
return false;
}
return true;
}
std::shared_ptr<Grammar> perform_core(const char *s, size_t n, std::shared_ptr<Grammar> perform_core(const char *s, size_t n,
const Rules &rules, std::string &start, const Rules &rules, std::string &start,
Log log) { Log log) {
@ -3318,7 +3374,6 @@ private:
log(line.first, line.second, "'" + name + "' is left recursive."); log(line.first, line.second, "'" + name + "' is left recursive.");
} }
ret = false; ret = false;
;
} }
} }
@ -3363,26 +3418,15 @@ private:
for (const auto &item : data.instructions) { for (const auto &item : data.instructions) {
const auto &name = item.first; const auto &name = item.first;
const auto &instruction = item.second; const auto &instruction = item.second;
auto &rule = grammar[name];
if (instruction.type == "precedence") { if (instruction.type == "precedence") {
auto &rule = grammar[name];
auto &seq = dynamic_cast<Sequence &>(*rule.get_core_operator());
auto &atom = seq.opes_[0];
auto &seq1 = dynamic_cast<Sequence &>(
*dynamic_cast<ZeroOrMore &>(*seq.opes_[1]).ope_);
auto &binop = seq1.opes_[0];
auto &atom1 = seq1.opes_[1];
if (atom != atom1) {
// TODO: check
}
const auto &info = const auto &info =
any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data); any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data);
rule.holder_->ope_ = pre(atom, binop, info, rule.action); if (!apply_precedence_instruction(rule, info, s, log)) {
rule.disable_action = true; return nullptr;
}
} }
} }

View File

@ -100,9 +100,7 @@ TEST_CASE("Not infinite 3", "[infinite loop]")
TEST_CASE("Precedence climbing", "[precedence]") TEST_CASE("Precedence climbing", "[precedence]")
{ {
// Create a PEG parser
parser parser(R"( parser parser(R"(
# Grammar for simple calculator...
START <- _ EXPRESSION START <- _ EXPRESSION
EXPRESSION <- ATOM (OPERATOR ATOM)* { EXPRESSION <- ATOM (OPERATOR ATOM)* {
precedence precedence
@ -156,8 +154,122 @@ TEST_CASE("Precedence climbing", "[precedence]")
} }
} }
TEST_CASE("Packrat parser test with %whitespace%", "[packrat]") TEST_CASE("Precedence climbing with macro", "[precedence]")
{ {
// Create a PEG parser
parser parser(R"(
EXPRESSION <- PRECEDENCE_PARSING(ATOM, OPERATOR)
PRECEDENCE_PARSING(A, O) <- A (O A)* {
precedence
L + -
L * /
}
ATOM <- NUMBER / '(' EXPRESSION ')'
OPERATOR <- < [-+/*] >
NUMBER <- < '-'? [0-9]+ >
%whitespace <- [ \t]*
)");
bool ret = parser;
REQUIRE(ret == true);
// Setup actions
parser["PRECEDENCE_PARSING"] = [](const SemanticValues& sv) -> long {
auto result = any_cast<long>(sv[0]);
if (sv.size() > 1) {
auto ope = any_cast<char>(sv[1]);
auto num = any_cast<long>(sv[2]);
switch (ope) {
case '+': result += num; break;
case '-': result -= num; break;
case '*': result *= num; break;
case '/': result /= num; break;
}
}
return result;
};
parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); };
{
auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 ";
long val = 0;
ret = parser.parse(expr, val);
REQUIRE(ret == true);
REQUIRE(val == -3);
}
{
auto expr = "-1+-2--3"; // -1 + -2 - -3 = 0
long val = 0;
ret = parser.parse(expr, val);
REQUIRE(ret == true);
REQUIRE(val == 0);
}
}
TEST_CASE("Precedence climbing error1", "[precedence]")
{
parser parser(R"(
START <- _ EXPRESSION
EXPRESSION <- ATOM (OPERATOR ATOM1)* {
precedence
L + -
L * /
}
ATOM <- NUMBER / T('(') EXPRESSION T(')')
ATOM1 <- NUMBER / T('(') EXPRESSION T(')')
OPERATOR <- T([-+/*])
NUMBER <- T('-'? [0-9]+)
~_ <- [ \t]*
T(S) <- < S > _
)");
bool ret = parser;
REQUIRE(ret == false);
}
TEST_CASE("Precedence climbing error2", "[precedence]")
{
parser parser(R"(
START <- _ EXPRESSION
EXPRESSION <- ATOM OPERATOR ATOM {
precedence
L + -
L * /
}
ATOM <- NUMBER / T('(') EXPRESSION T(')')
OPERATOR <- T([-+/*])
NUMBER <- T('-'? [0-9]+)
~_ <- [ \t]*
T(S) <- < S > _
)");
bool ret = parser;
REQUIRE(ret == false);
}
TEST_CASE("Precedence climbing error3", "[precedence]") {
parser parser(R"(
EXPRESSION <- PRECEDENCE_PARSING(ATOM, OPERATOR)
PRECEDENCE_PARSING(A, O) <- A (O A)+ {
precedence
L + -
L * /
}
ATOM <- NUMBER / '(' EXPRESSION ')'
OPERATOR <- < [-+/*] >
NUMBER <- < '-'? [0-9]+ >
%whitespace <- [ \t]*
)");
bool ret = parser;
REQUIRE(ret == false);
}
TEST_CASE("Packrat parser test with %whitespace%", "[packrat]") {
peg::parser parser(R"( peg::parser parser(R"(
ROOT <- 'a' ROOT <- 'a'
%whitespace <- SPACE* %whitespace <- SPACE*