diff --git a/README.md b/README.md index 94ed9bc..a270e0e 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ The PEG syntax is well described on page 2 in the [document](http://pdos.csail.m * `~` (Ignore operator) * `\x??` (Hex number char) -This library is a [*Packrat*](http://pdos.csail.mit.edu/~baford/packrat/thesis/thesis.pdf) parser which supports the linear-time parsing. +This library also supports the linear-time parsing known as the [*Packrat*](http://pdos.csail.mit.edu/~baford/packrat/thesis/thesis.pdf) parsing. How to use ---------- @@ -41,6 +41,8 @@ int main(void) { peg parser(syntax); + parser.packrat_parsing(true); // Enable packrat parsing + // (3) Setup an action parser["Additive"] = { nullptr, // Default action @@ -59,19 +61,13 @@ int main(void) { } }; - /* This action is not necessary. - parser["Primary"] = [](const SemanticValues& sv) { - return sv[0]; - }; - */ - parser["Number"] = [](const char* s, size_t l) { return stoi(string(s, l), nullptr, 10); }; // (4) Parse int val; - parser.parse("(1+2)*3", val); + parser.parse_with_value("(1+2)*3", val); assert(val == 9); } diff --git a/example/calc3.cc b/example/calc3.cc index d5f879f..6c69e2b 100644 --- a/example/calc3.cc +++ b/example/calc3.cc @@ -108,7 +108,7 @@ int main(int argc, const char** argv) parser["NUMBER"] = ast_num::create; shared_ptr ast; - if (parser.parse(s, ast)) { + if (parser.parse_with_value(s, ast)) { cout << s << " = " << ast->eval() << endl; return 0; } diff --git a/lint/peglint.cc b/lint/peglint.cc index 605e693..4936099 100644 --- a/lint/peglint.cc +++ b/lint/peglint.cc @@ -61,9 +61,7 @@ int main(int argc, const char** argv) return -1; } - peglib::any dt; - auto ret = peg.lint(source.data(), source.size(), true, false, dt, - [&](size_t ln, size_t col, const string& msg) { + auto ret = peg.lint(source.data(), source.size(), [&](size_t ln, size_t col, const string& msg) { cerr << source_path << ":" << ln << ":" << col << ": " << msg << endl; }); diff --git a/peglib.h b/peglib.h index 333c764..70618b8 100644 --- a/peglib.h +++ b/peglib.h @@ -1034,14 +1034,16 @@ public: }; Definition() - : actions(1) - , ignore(false) - , holder_(std::make_shared(this)) {} + : actions(1) + , ignore(false) + , packrat(false) + , holder_(std::make_shared(this)) {} Definition(const Definition& rhs) : name(rhs.name) , actions(1) , ignore(false) + , packrat(false) , holder_(rhs.holder_) { holder_->outer_ = this; @@ -1051,6 +1053,7 @@ public: : name(std::move(rhs.name)) , actions(1) , ignore(rhs.ignore) + , packrat(rhs.packrat) , holder_(std::move(rhs.holder_)) { holder_->outer_ = this; @@ -1059,6 +1062,7 @@ public: Definition(const std::shared_ptr& ope) : actions(1) , ignore(false) + , packrat(false) , holder_(std::make_shared(this)) { holder_->ope_ = ope; @@ -1073,36 +1077,32 @@ public: return *this; } - Result parse_core(const char* s, size_t l, SemanticValues& sv, any& dt, bool packrat) const { - DefinitionIDs defIds; - holder_->accept(defIds); - - Context c(s, l, defIds.ids.size(), packrat); - auto len = holder_->parse(s, l, sv, c, dt); - return Result { success(len), len, c.error_ptr, c.msg }; - } - - Result parse(const char* s, size_t l, any& dt, bool packrat = false) const { - SemanticValues sv; - return parse_core(s, l, sv, dt, packrat); - } - - Result parse(const char* s, size_t l, bool packrat = false) const { + Result parse(const char* s, size_t l) const { SemanticValues sv; any dt; - return parse_core(s, l, sv, dt, packrat); + return parse_core(s, l, sv, dt); } - Result parse(const char* s, bool packrat = false) const { + Result parse(const char* s) const { auto l = strlen(s); - return parse(s, l, packrat); + return parse(s, l); + } + + Result parse_with_data(const char* s, size_t l, any& dt) const { + SemanticValues sv; + return parse_core(s, l, sv, dt); + } + + Result parse_with_data(const char* s, any& dt) const { + auto l = strlen(s); + return parse_with_data(s, l, dt); } template - Result parse_with_value(const char* s, size_t l, T& val, bool packrat = false) const { + Result parse_with_value(const char* s, size_t l, T& val) const { SemanticValues sv; any dt; - auto r = parse_core(s, l, sv, dt, packrat); + auto r = parse_core(s, l, sv, dt); if (r.ret && !sv.empty() && !sv.front().val.is_undefined()) { val = sv[0].val.get(); } @@ -1110,9 +1110,9 @@ public: } template - Result parse_with_value(const char* s, T& val, bool packrat = false) const { + Result parse_with_value(const char* s, T& val) const { auto l = strlen(s); - return parse_with_value(s, l, val, packrat); + return parse_with_value(s, l, val); } Definition& operator=(Action ac) { @@ -1145,6 +1145,7 @@ public: size_t id; std::vector actions; bool ignore; + bool packrat; private: friend class DefinitionReference; @@ -1152,6 +1153,15 @@ private: Definition& operator=(const Definition& rhs); Definition& operator=(Definition&& rhs); + Result parse_core(const char* s, size_t l, SemanticValues& sv, any& dt) const { + DefinitionIDs defIds; + holder_->accept(defIds); + + Context c(s, l, defIds.ids.size(), packrat); + auto len = holder_->parse(s, l, sv, c, dt); + return Result { success(len), len, c.error_ptr, c.msg }; + } + std::shared_ptr holder_; }; @@ -1602,7 +1612,7 @@ private: data.match_action = ma; any dt = &data; - auto r = g["Grammar"].parse(s, l, dt, false); + auto r = g["Grammar"].parse_with_data(s, l, dt); if (!r.ret) { if (log) { @@ -1771,62 +1781,76 @@ public: return grammar_ != nullptr; } - template - bool parse(const char* s, size_t l, T& out, bool exact = true, bool packrat = false) const { + bool parse(const char* s, size_t l) const { if (grammar_ != nullptr) { const auto& rule = (*grammar_)[start_]; - auto r = rule.parse_with_value(s, l, out, packrat); - return r.ret && (!exact || r.len == l); + auto r = rule.parse(s, l); + return r.ret && r.len == l; + } + return false; + } + + bool parse(const char* s) const { + auto l = strlen(s); + return parse(s, l); + } + + bool parse_with_data(const char* s, size_t l, any& dt) const { + if (grammar_ != nullptr) { + const auto& rule = (*grammar_)[start_]; + auto r = rule.parse_with_data(s, l, dt); + return r.ret && r.len == l; + } + return false; + } + + bool parse_with_data(const char* s, any& dt) const { + auto l = strlen(s); + return parse_with_data(s, l, dt); + } + + template + bool parse_with_value(const char* s, size_t l, T& out) const { + if (grammar_ != nullptr) { + const auto& rule = (*grammar_)[start_]; + auto r = rule.parse_with_value(s, l, out); + return r.ret && r.len == l; } return false; } template - bool parse(const char* s, T& out, bool exact = true, bool packrat = false) const { + bool parse_with_value(const char* s, T& out) const { auto l = strlen(s); - return parse(s, l, out, exact, packrat); + return parse_with_value(s, l, out); } - bool parse(const char* s, size_t l, bool exact = true, bool packrat = false) const { - if (grammar_ != nullptr) { - const auto& rule = (*grammar_)[start_]; - auto r = rule.parse(s, l, packrat); - return r.ret && (!exact || r.len == l); - } - return false; - } - - bool parse(const char* s, bool exact = true, bool packrat = false) const { - auto l = strlen(s); - return parse(s, l, exact, packrat); - } - - bool lint(const char* s, size_t l, bool exact, bool packrat, any& dt, Log log) { + bool lint(const char* s, size_t l, Log log) { assert(grammar_); if (grammar_ != nullptr) { const auto& rule = (*grammar_)[start_]; - auto r = rule.parse(s, l, dt, packrat); + auto r = rule.parse(s, l); if (!r.ret) { if (log) { auto line = line_info(s, r.error_ptr); log(line.first, line.second, r.msg ? "syntax error" : r.msg); } - } else if (exact && r.len != l) { + } else if (r.len != l) { auto line = line_info(s, s + r.len); log(line.first, line.second, "syntax error"); } - return r.ret && (!exact || r.len == l); + return r.ret; } return false; } - bool search(const char* s, size_t l, size_t& mpos, size_t& mlen, bool packrat = false) const { + bool search(const char* s, size_t l, size_t& mpos, size_t& mlen) const { const auto& rule = (*grammar_)[start_]; if (grammar_ != nullptr) { size_t pos = 0; while (pos < l) { size_t len = l - pos; - auto r = rule.parse(s + pos, len, packrat); + auto r = rule.parse(s + pos, len); if (r.ret) { mpos = pos; mlen = len; @@ -1840,15 +1864,22 @@ public: return false; } - bool search(const char* s, size_t& mpos, size_t& mlen, bool packrat) const { + bool search(const char* s, size_t& mpos, size_t& mlen) const { auto l = strlen(s); - return search(s, l, mpos, mlen, packrat); + return search(s, l, mpos, mlen); } Definition& operator[](const char* s) { return (*grammar_)[s]; } + void packrat_parsing(bool sw) { + if (grammar_ != nullptr) { + auto& rule = (*grammar_)[start_]; + rule.packrat = sw; + } + } + MatchAction match_action; private: diff --git a/test/test.cc b/test/test.cc index 67314fb..a128aa5 100644 --- a/test/test.cc +++ b/test/test.cc @@ -76,9 +76,9 @@ TEST_CASE("String capture test2", "[general]") TAG_NAME <= oom(seq(npd(chr(']')), dot())), [&](const char* s, size_t l) { tags.push_back(string(s, l)); }; WS <= zom(cls(" \t")); - auto m = ROOT.parse(" [tag1] [tag:2] [tag-3] "); + auto r = ROOT.parse(" [tag1] [tag:2] [tag-3] "); - REQUIRE(m.ret == true); + REQUIRE(r.ret == true); REQUIRE(tags.size() == 3); REQUIRE(tags[0] == "tag1"); REQUIRE(tags[1] == "tag:2"); @@ -124,9 +124,9 @@ TEST_CASE("String capture test with embedded match action", "[general]") TAG_NAME <= oom(seq(npd(chr(']')), dot())); WS <= zom(cls(" \t")); - auto m = ROOT.parse(" [tag1] [tag:2] [tag-3] "); + auto r = ROOT.parse(" [tag1] [tag:2] [tag-3] "); - REQUIRE(m.ret == true); + REQUIRE(r.ret == true); REQUIRE(tags.size() == 3); REQUIRE(tags[0] == "tag1"); REQUIRE(tags[1] == "tag:2"); @@ -204,7 +204,9 @@ TEST_CASE("Backtracking test", "[general]") count++; }; - bool ret = parser.parse("Hello Two", true, true); + parser.packrat_parsing(true); + + bool ret = parser.parse("Hello Two"); REQUIRE(ret == true); REQUIRE(count == 1); // Skip second time } @@ -265,7 +267,7 @@ TEST_CASE("Simple calculator test", "[general]") }; int val; - parser.parse("(1+2)*3", val); + parser.parse_with_value("(1+2)*3", val); REQUIRE(val == 9); } @@ -305,9 +307,9 @@ TEST_CASE("Calculator test", "[general]") // Parse long val; - auto m = EXPRESSION.parse_with_value("1+2*3*(4-5+6)/7-8", val); + auto r = EXPRESSION.parse_with_value("1+2*3*(4-5+6)/7-8", val); - REQUIRE(m.ret == true); + REQUIRE(r.ret == true); REQUIRE(val == -3); } @@ -351,9 +353,9 @@ TEST_CASE("Calculator test2", "[general]") // Parse long val; - auto m = g[start].parse_with_value("1+2*3*(4-5+6)/7-8", val); + auto r = g[start].parse_with_value("1+2*3*(4-5+6)/7-8", val); - REQUIRE(m.ret == true); + REQUIRE(r.ret == true); REQUIRE(val == -3); } @@ -393,7 +395,7 @@ TEST_CASE("Calculator test3", "[general]") // Parse long val; - auto ret = parser.parse("1+2*3*(4-5+6)/7-8", val); + auto ret = parser.parse_with_value("1+2*3*(4-5+6)/7-8", val); REQUIRE(ret == true); REQUIRE(val == -3);