mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2025-01-22 13:25:30 +00:00
Support expression parsing in macro
This commit is contained in:
parent
4b25458012
commit
375794e344
74
README.md
74
README.md
@ -347,40 +347,54 @@ Parsing expressions by precedence climbing altorithm
|
||||
*cpp-peglib* supports [operator-precedence parsering](https://en.wikipedia.org/wiki/Operator-precedence_parser) by [**precedence climbing algorithm**](https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing)
|
||||
|
||||
```cpp
|
||||
parser parser(R"(
|
||||
EXPRESSION <- ATOM (OPERATOR ATOM)* {
|
||||
precedence
|
||||
L - +
|
||||
L / *
|
||||
}
|
||||
ATOM <- NUMBER / '(' EXPRESSION ')'
|
||||
OPERATOR <- < [-+/*] >
|
||||
NUMBER <- < '-'? [0-9]+ >
|
||||
%whitespace <- [ \t\r\n]*
|
||||
)");
|
||||
parser parser(R"(
|
||||
EXPRESSION <- PRECEDENCE_PARSING(ATOM, OPERATOR)
|
||||
PRECEDENCE_PARSING(A, O) <- A (O A)* {
|
||||
precedence
|
||||
L + -
|
||||
L * /
|
||||
}
|
||||
ATOM <- NUMBER / '(' EXPRESSION ')'
|
||||
OPERATOR <- < [-+/*] >
|
||||
NUMBER <- < '-'? [0-9]+ >
|
||||
%whitespace <- [ \t]*
|
||||
)");
|
||||
|
||||
parser["EXPRESSION"] = [](const SemanticValues& sv) -> long {
|
||||
auto result = any_cast<long>(sv[0]);
|
||||
if (sv.size() > 1) {
|
||||
auto ope = any_cast<char>(sv[1]);
|
||||
auto num = any_cast<long>(sv[2]);
|
||||
switch (ope) {
|
||||
case '+': result += num; break;
|
||||
case '-': result -= num; break;
|
||||
case '*': result *= num; break;
|
||||
case '/': result /= num; break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
|
||||
parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); };
|
||||
parser["PRECEDENCE_PARSING"] = [](const SemanticValues& sv) -> long {
|
||||
auto result = any_cast<long>(sv[0]);
|
||||
if (sv.size() > 1) {
|
||||
auto ope = any_cast<char>(sv[1]);
|
||||
auto num = any_cast<long>(sv[2]);
|
||||
switch (ope) {
|
||||
case '+': result += num; break;
|
||||
case '-': result -= num; break;
|
||||
case '*': result *= num; break;
|
||||
case '/': result /= num; break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
|
||||
parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); };
|
||||
|
||||
long val;
|
||||
parser.parse(" -1 + (1 + 2) * 3 - -1", val);
|
||||
assert(val == 9);
|
||||
long val;
|
||||
parser.parse(" -1 + (1 + 2) * 3 - -1", val);
|
||||
assert(val == 9);
|
||||
```
|
||||
|
||||
*precedence* instruction can be applied only to the following 'list' style rule.
|
||||
|
||||
```
|
||||
R <- A (B A)* {
|
||||
precedence
|
||||
L - +
|
||||
L / *
|
||||
R ^
|
||||
}
|
||||
```
|
||||
|
||||
*precedence* instruction contains precedence info entries. Each entry starts with *associativity* which is 'L' (left) or 'R' (right), then operator tokens follow. The first entry has the highest order.
|
||||
|
||||
AST generation
|
||||
--------------
|
||||
|
||||
|
102
peglib.h
102
peglib.h
@ -707,8 +707,7 @@ private:
|
||||
}
|
||||
|
||||
template <typename F, typename R>
|
||||
Fty make_adaptor(F fn,
|
||||
R (F::*)(const SemanticValues &sv, any &dt) const) {
|
||||
Fty make_adaptor(F fn, R (F::*)(const SemanticValues &sv, any &dt) const) {
|
||||
return TypeAdaptor_csv_dt<R>(fn);
|
||||
}
|
||||
|
||||
@ -1536,8 +1535,8 @@ public:
|
||||
|
||||
PrecedenceClimbing(const std::shared_ptr<Ope> &atom,
|
||||
const std::shared_ptr<Ope> &binop, const BinOpeInfo &info,
|
||||
const Action &action)
|
||||
: atom_(atom), binop_(binop), info_(info), action_(action) {}
|
||||
const Definition &rule)
|
||||
: atom_(atom), binop_(binop), info_(info), rule_(rule) {}
|
||||
|
||||
size_t parse_core(const char *s, size_t n, SemanticValues &sv, Context &c,
|
||||
any &dt) const override {
|
||||
@ -1549,11 +1548,13 @@ public:
|
||||
std::shared_ptr<Ope> atom_;
|
||||
std::shared_ptr<Ope> binop_;
|
||||
BinOpeInfo info_;
|
||||
const Action &action_;
|
||||
const Definition &rule_;
|
||||
|
||||
private:
|
||||
size_t parse_expression(const char *s, size_t n, SemanticValues &sv,
|
||||
Context &c, any &dt, size_t min_prec) const;
|
||||
|
||||
Definition &get_reference_for_binop(Context &c) const;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1660,8 +1661,8 @@ inline std::shared_ptr<Ope> bkr(const std::string &name) {
|
||||
inline std::shared_ptr<Ope> pre(const std::shared_ptr<Ope> &atom,
|
||||
const std::shared_ptr<Ope> &binop,
|
||||
const PrecedenceClimbing::BinOpeInfo &info,
|
||||
const Action &action) {
|
||||
return std::make_shared<PrecedenceClimbing>(atom, binop, info, action);
|
||||
const Definition &rule) {
|
||||
return std::make_shared<PrecedenceClimbing>(atom, binop, info, rule);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2281,6 +2282,8 @@ public:
|
||||
}
|
||||
|
||||
std::string name;
|
||||
const char *s = nullptr;
|
||||
;
|
||||
size_t id = 0;
|
||||
Action action;
|
||||
std::function<void(const char *s, size_t n, any &dt)> enter;
|
||||
@ -2584,6 +2587,18 @@ inline size_t BackReference::parse_core(const char *s, size_t n,
|
||||
throw std::runtime_error("Invalid back reference...");
|
||||
}
|
||||
|
||||
inline Definition& PrecedenceClimbing::get_reference_for_binop(Context &c) const {
|
||||
if (rule_.is_macro) {
|
||||
// Reference parameter in macro
|
||||
const auto &args = c.top_args();
|
||||
auto iarg = dynamic_cast<Reference &>(*binop_).iarg_;
|
||||
auto arg = args[iarg];
|
||||
return *dynamic_cast<Reference &>(*arg).rule_;
|
||||
}
|
||||
|
||||
return *dynamic_cast<Reference &>(*binop_).rule_;
|
||||
}
|
||||
|
||||
inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
|
||||
SemanticValues &sv,
|
||||
Context &c, any &dt,
|
||||
@ -2592,10 +2607,11 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
|
||||
if (fail(len)) { return len; }
|
||||
|
||||
std::string tok;
|
||||
auto &rule = dynamic_cast<Reference &>(*binop_).rule_;
|
||||
auto action = rule->action;
|
||||
//auto &rule = dynamic_cast<Reference &>(*binop_).rule_;
|
||||
auto &rule = get_reference_for_binop(c);
|
||||
auto action = rule.action;
|
||||
|
||||
rule->action = [&](SemanticValues &sv, any &dt) -> any {
|
||||
rule.action = [&](SemanticValues &sv, any &dt) -> any {
|
||||
tok = sv.token();
|
||||
if (action) {
|
||||
return action(sv, dt);
|
||||
@ -2604,7 +2620,7 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
|
||||
}
|
||||
return any();
|
||||
};
|
||||
auto action_se = make_scope_exit([&]() { rule->action = action; });
|
||||
auto action_se = make_scope_exit([&]() { rule.action = action; });
|
||||
|
||||
auto save_error_pos = c.error_pos;
|
||||
|
||||
@ -2651,10 +2667,10 @@ inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
|
||||
i += chl;
|
||||
|
||||
any val;
|
||||
if (action_) {
|
||||
if (rule_.action) {
|
||||
sv.s_ = s;
|
||||
sv.n_ = i;
|
||||
val = action_(sv, dt);
|
||||
val = rule_.action(sv, dt);
|
||||
} else if (!sv.empty()) {
|
||||
val = sv[0];
|
||||
}
|
||||
@ -3018,6 +3034,7 @@ private:
|
||||
auto &rule = grammar[name];
|
||||
rule <= ope;
|
||||
rule.name = name;
|
||||
rule.s = sv.c_str();
|
||||
rule.ignoreSemanticValue = ignore;
|
||||
rule.is_macro = is_macro;
|
||||
rule.params = params;
|
||||
@ -3227,6 +3244,45 @@ private:
|
||||
g["PrecedenceAssoc"] = [](const SemanticValues &sv) { return sv.token(); };
|
||||
}
|
||||
|
||||
bool apply_precedence_instruction(Definition &rule,
|
||||
const PrecedenceClimbing::BinOpeInfo &info,
|
||||
const char *s, Log log) {
|
||||
try {
|
||||
auto &seq = dynamic_cast<Sequence &>(*rule.get_core_operator());
|
||||
auto atom = seq.opes_[0];
|
||||
auto &seq1 = dynamic_cast<Sequence &>(
|
||||
*dynamic_cast<ZeroOrMore &>(*seq.opes_[1]).ope_);
|
||||
auto binop = seq1.opes_[0];
|
||||
auto atom1 = seq1.opes_[1];
|
||||
|
||||
auto atom_name = dynamic_cast<Reference &>(*atom).name_;
|
||||
auto binop_name = dynamic_cast<Reference &>(*binop).name_;
|
||||
auto atom1_name = dynamic_cast<Reference &>(*atom1).name_;
|
||||
|
||||
if (atom_name != atom1_name || atom_name == binop_name) {
|
||||
if (log) {
|
||||
auto line = line_info(s, rule.s);
|
||||
log(line.first, line.second,
|
||||
"'precedence' instruction cannt be applied to '" + rule.name +
|
||||
"'.");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
rule.holder_->ope_ = pre(atom, binop, info, rule);
|
||||
rule.disable_action = true;
|
||||
} catch (...) {
|
||||
if (log) {
|
||||
auto line = line_info(s, rule.s);
|
||||
log(line.first, line.second,
|
||||
"'precedence' instruction cannt be applied to '" + rule.name +
|
||||
"'.");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<Grammar> perform_core(const char *s, size_t n,
|
||||
const Rules &rules, std::string &start,
|
||||
Log log) {
|
||||
@ -3318,7 +3374,6 @@ private:
|
||||
log(line.first, line.second, "'" + name + "' is left recursive.");
|
||||
}
|
||||
ret = false;
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3363,26 +3418,15 @@ private:
|
||||
for (const auto &item : data.instructions) {
|
||||
const auto &name = item.first;
|
||||
const auto &instruction = item.second;
|
||||
auto &rule = grammar[name];
|
||||
|
||||
if (instruction.type == "precedence") {
|
||||
auto &rule = grammar[name];
|
||||
|
||||
auto &seq = dynamic_cast<Sequence &>(*rule.get_core_operator());
|
||||
auto &atom = seq.opes_[0];
|
||||
auto &seq1 = dynamic_cast<Sequence &>(
|
||||
*dynamic_cast<ZeroOrMore &>(*seq.opes_[1]).ope_);
|
||||
auto &binop = seq1.opes_[0];
|
||||
auto &atom1 = seq1.opes_[1];
|
||||
|
||||
if (atom != atom1) {
|
||||
// TODO: check
|
||||
}
|
||||
|
||||
const auto &info =
|
||||
any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data);
|
||||
|
||||
rule.holder_->ope_ = pre(atom, binop, info, rule.action);
|
||||
rule.disable_action = true;
|
||||
if (!apply_precedence_instruction(rule, info, s, log)) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
118
test/test2.cc
118
test/test2.cc
@ -100,9 +100,7 @@ TEST_CASE("Not infinite 3", "[infinite loop]")
|
||||
|
||||
TEST_CASE("Precedence climbing", "[precedence]")
|
||||
{
|
||||
// Create a PEG parser
|
||||
parser parser(R"(
|
||||
# Grammar for simple calculator...
|
||||
START <- _ EXPRESSION
|
||||
EXPRESSION <- ATOM (OPERATOR ATOM)* {
|
||||
precedence
|
||||
@ -156,8 +154,122 @@ TEST_CASE("Precedence climbing", "[precedence]")
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Packrat parser test with %whitespace%", "[packrat]")
|
||||
TEST_CASE("Precedence climbing with macro", "[precedence]")
|
||||
{
|
||||
// Create a PEG parser
|
||||
parser parser(R"(
|
||||
EXPRESSION <- PRECEDENCE_PARSING(ATOM, OPERATOR)
|
||||
PRECEDENCE_PARSING(A, O) <- A (O A)* {
|
||||
precedence
|
||||
L + -
|
||||
L * /
|
||||
}
|
||||
ATOM <- NUMBER / '(' EXPRESSION ')'
|
||||
OPERATOR <- < [-+/*] >
|
||||
NUMBER <- < '-'? [0-9]+ >
|
||||
%whitespace <- [ \t]*
|
||||
)");
|
||||
|
||||
bool ret = parser;
|
||||
REQUIRE(ret == true);
|
||||
|
||||
// Setup actions
|
||||
parser["PRECEDENCE_PARSING"] = [](const SemanticValues& sv) -> long {
|
||||
auto result = any_cast<long>(sv[0]);
|
||||
if (sv.size() > 1) {
|
||||
auto ope = any_cast<char>(sv[1]);
|
||||
auto num = any_cast<long>(sv[2]);
|
||||
switch (ope) {
|
||||
case '+': result += num; break;
|
||||
case '-': result -= num; break;
|
||||
case '*': result *= num; break;
|
||||
case '/': result /= num; break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
parser["OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
|
||||
parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); };
|
||||
|
||||
{
|
||||
auto expr = " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 ";
|
||||
long val = 0;
|
||||
ret = parser.parse(expr, val);
|
||||
|
||||
REQUIRE(ret == true);
|
||||
REQUIRE(val == -3);
|
||||
}
|
||||
|
||||
{
|
||||
auto expr = "-1+-2--3"; // -1 + -2 - -3 = 0
|
||||
long val = 0;
|
||||
ret = parser.parse(expr, val);
|
||||
|
||||
REQUIRE(ret == true);
|
||||
REQUIRE(val == 0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Precedence climbing error1", "[precedence]")
|
||||
{
|
||||
parser parser(R"(
|
||||
START <- _ EXPRESSION
|
||||
EXPRESSION <- ATOM (OPERATOR ATOM1)* {
|
||||
precedence
|
||||
L + -
|
||||
L * /
|
||||
}
|
||||
ATOM <- NUMBER / T('(') EXPRESSION T(')')
|
||||
ATOM1 <- NUMBER / T('(') EXPRESSION T(')')
|
||||
OPERATOR <- T([-+/*])
|
||||
NUMBER <- T('-'? [0-9]+)
|
||||
~_ <- [ \t]*
|
||||
T(S) <- < S > _
|
||||
)");
|
||||
|
||||
bool ret = parser;
|
||||
REQUIRE(ret == false);
|
||||
}
|
||||
|
||||
TEST_CASE("Precedence climbing error2", "[precedence]")
|
||||
{
|
||||
parser parser(R"(
|
||||
START <- _ EXPRESSION
|
||||
EXPRESSION <- ATOM OPERATOR ATOM {
|
||||
precedence
|
||||
L + -
|
||||
L * /
|
||||
}
|
||||
ATOM <- NUMBER / T('(') EXPRESSION T(')')
|
||||
OPERATOR <- T([-+/*])
|
||||
NUMBER <- T('-'? [0-9]+)
|
||||
~_ <- [ \t]*
|
||||
T(S) <- < S > _
|
||||
)");
|
||||
|
||||
bool ret = parser;
|
||||
REQUIRE(ret == false);
|
||||
}
|
||||
|
||||
TEST_CASE("Precedence climbing error3", "[precedence]") {
|
||||
parser parser(R"(
|
||||
EXPRESSION <- PRECEDENCE_PARSING(ATOM, OPERATOR)
|
||||
PRECEDENCE_PARSING(A, O) <- A (O A)+ {
|
||||
precedence
|
||||
L + -
|
||||
L * /
|
||||
}
|
||||
ATOM <- NUMBER / '(' EXPRESSION ')'
|
||||
OPERATOR <- < [-+/*] >
|
||||
NUMBER <- < '-'? [0-9]+ >
|
||||
%whitespace <- [ \t]*
|
||||
)");
|
||||
|
||||
bool ret = parser;
|
||||
REQUIRE(ret == false);
|
||||
}
|
||||
|
||||
TEST_CASE("Packrat parser test with %whitespace%", "[packrat]") {
|
||||
peg::parser parser(R"(
|
||||
ROOT <- 'a'
|
||||
%whitespace <- SPACE*
|
||||
|
Loading…
Reference in New Issue
Block a user