From 5b88443270c3e4d28b13e4686a5ca6f7d9b0ae97 Mon Sep 17 00:00:00 2001 From: yhirose Date: Sat, 23 Jan 2016 20:26:54 -0500 Subject: [PATCH] Major change in the way to handle token boundary. --- README.md | 69 ++++----- example/CMakeLists.txt | 3 + example/calc.cc | 6 +- example/calc2.cc | 6 +- example/calc3.cc | 6 +- peglib.h | 325 +++++++++++++++++++---------------------- test/test.cc | 54 +++---- 7 files changed, 221 insertions(+), 248 deletions(-) diff --git a/README.md b/README.md index 11b11bf..850468b 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ int main(void) { Additive <- Multitive '+' Additive / Multitive Multitive <- Primary '*' Multitive / Primary Primary <- '(' Additive ')' / Number - Number <- [0-9]+ + Number <- < [0-9]+ > %whitespace <- [ \t]* )"; @@ -43,7 +43,7 @@ int main(void) { // (3) Setup an action parser["Additive"] = [](const SemanticValues& sv) { - switch (sv.choice) { + switch (sv.choice()) { case 0: // "Multitive '+' Additive" return sv[0].get() + sv[1].get(); default: // "Multitive" @@ -52,7 +52,7 @@ int main(void) { }; parser["Multitive"] = [](const SemanticValues& sv) { - switch (sv.choice) { + switch (sv.choice()) { case 0: // "Primary '*' Multitive" return sv[0].get() * sv[1].get(); default: // "Primary" @@ -61,11 +61,11 @@ int main(void) { }; parser["Number"] = [](const SemanticValues& sv) { - return stoi(sv.str(), nullptr, 10); + return stoi(sv.token(), nullptr, 10); }; // (4) Parse - parser.packrat_parsing(); // Enable packrat parsing. + parser.enable_packrat_parsing(); // Enable packrat parsing. int val; parser.parse(" (1 + 2) * 3 ", val); @@ -84,28 +84,24 @@ Here are available actions: `const SemanticValues& sv` contains semantic values. `SemanticValues` structure is defined as follows. ```cpp -struct SemanticValue { - any val; // Semantic value - const char* name; // Definition name for the sematic value - const char* s; // Token start for the semantic value - size_t n; // Token length for the semantic value - - // Cast semantic value - template T& get(); - template const T& get() const; - - // Get token - std::string str() const; -}; - -struct SemanticValues : protected std::vector +struct SemanticValues : protected std::vector { - const char* s; // Token start - size_t n; // Token length - size_t choice; // Choice number (0 based index) + // Matched string + std::string str() const; // Matched string + const char* c_str() const; // Matched string start + size_t length() const; // Matched string length - // Get token - std::string str() const; + // Tokens + std::vector< + std::pair< + const char*, // Token start + size_t>> // Token length + tokens; + + std::string token(size_t id = 0) const; + + // Choice number (0 based index) + size_t choice() const; // Transform the semantic value vector to another vector template vector transform(size_t beg = 0, size_t end = -1) const; @@ -114,11 +110,9 @@ struct SemanticValues : protected std::vector `peg::any` class is very similar to [boost::any](http://www.boost.org/doc/libs/1_57_0/doc/html/any.html). You can obtain a value by castning it to the actual type. In order to determine the actual type, you have to check the return value type of the child action for the semantic value. -`const char* s, size_t n` gives a pointer and length of the matched string. This is same as `sv.s` and `sv.n`. - `any& dt` is a data object which can be used by the user for whatever purposes. -The following example uses `<` ... ` >` operators. They are the *token boundary* operators. Each token boundary operator creates a semantic value that contains `const char*` of the position. It could be useful to eliminate unnecessary characters. +The following example uses `<` ... ` >` operators. They are the *token boundary* operators. ```cpp auto syntax = R"( @@ -131,7 +125,7 @@ peg pg(syntax); pg["TOKEN"] = [](const SemanticValues& sv) { // 'token' doesn't include trailing whitespaces - auto token = sv.str(); + auto token = sv.token(); }; auto ret = pg.parse(" token1, token2 "); @@ -185,19 +179,19 @@ ret = parser.parse("200", val); assert(ret == false); ``` -*before* and *after* actions are also avalable. +*enter* and *leave* actions are also avalable. ```cpp -parser["RULE"].before = [](any& dt) { - std::cout << "before" << std::endl; +parser["RULE"].enter = [](any& dt) { + std::cout << "enter" << std::endl; }; parser["RULE"] = [](const SemanticValues& sv, any& dt) { std::cout << "action!" << std::endl; }; -parser["RULE"].after = [](any& dt) { - std::cout << "after" << std::endl; +parser["RULE"].leave = [](any& dt) { + std::cout << "leave" << std::endl; }; ``` @@ -216,7 +210,7 @@ These are valid tokens: ``` KEYWORD <- 'keyword' -WORD <- [a-zA-Z0-9] [a-zA-Z0-9-_]* # no reference rule is used +WORD <- < [a-zA-Z0-9] [a-zA-Z0-9-_]* > # token boundary operator is used. IDNET <- < IDENT_START_CHAR IDENT_CHAR* > # token boundary operator is used. ``` @@ -225,8 +219,8 @@ The following grammar accepts ` one, "two three", four `. ``` ROOT <- ITEM (',' ITEM)* ITEM <- WORD / PHRASE -WORD <- [a-z]+ -PHRASE <- '"' (!'"' .)* '"' +WORD <- < [a-z]+ > +PHRASE <- < '"' (!'"' .)* '"' > %whitespace <- [ \t\r\n]* ``` @@ -413,7 +407,6 @@ Tested compilers TODO ---- - * Semantic predicate (`&{ expr }` and `!{ expr }`) * Unicode support (`.` matches a Unicode char. `\u????`, `\p{L}`) * Allow `←` and `ε` diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index b421ee7..42acab1 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -10,3 +10,6 @@ target_link_libraries(calc2 pthread) add_executable(calc3 calc3.cc) target_link_libraries(calc3 pthread) + +add_executable(calc_readme calc_readme.cc) +target_link_libraries(calc_readme pthread) diff --git a/example/calc.cc b/example/calc.cc index a563d55..2f8c01f 100644 --- a/example/calc.cc +++ b/example/calc.cc @@ -46,9 +46,9 @@ int main(int argc, const char** argv) parser["EXPRESSION"] = reduce; parser["TERM"] = reduce; - parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.s; }; - parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.s; }; - parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.s); }; + parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.c_str(); }; + parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.c_str(); }; + parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; auto expr = argv[1]; long val = 0; diff --git a/example/calc2.cc b/example/calc2.cc index 0d9a040..d0e1a0f 100644 --- a/example/calc2.cc +++ b/example/calc2.cc @@ -49,9 +49,9 @@ int main(int argc, const char** argv) EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce; TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce; FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')'))); - TERM_OPERATOR <= cls("+-"), [](const SemanticValues& sv) { return (char)*sv.s; }; - FACTOR_OPERATOR <= cls("*/"), [](const SemanticValues& sv) { return (char)*sv.s; }; - NUMBER <= oom(cls("0-9")), [](const SemanticValues& sv) { return atol(sv.s); }; + TERM_OPERATOR <= cls("+-"), [](const SemanticValues& sv) { return (char)*sv.c_str(); }; + FACTOR_OPERATOR <= cls("*/"), [](const SemanticValues& sv) { return (char)*sv.c_str(); }; + NUMBER <= oom(cls("0-9")), [](const SemanticValues& sv) { return atol(sv.c_str()); }; auto expr = argv[1]; long val = 0; diff --git a/example/calc3.cc b/example/calc3.cc index d41d99c..723e7bf 100644 --- a/example/calc3.cc +++ b/example/calc3.cc @@ -44,9 +44,9 @@ int main(int argc, const char** argv) TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* FACTOR <- NUMBER / '(' EXPRESSION ')' - TERM_OPERATOR <- [-+] - FACTOR_OPERATOR <- [/*] - NUMBER <- [0-9]+ + TERM_OPERATOR <- < [-+] > + FACTOR_OPERATOR <- < [/*] > + NUMBER <- < [0-9]+ > %whitespace <- [ \t\r\n]* )"); diff --git a/peglib.h b/peglib.h index 912ee4f..11c896e 100644 --- a/peglib.h +++ b/peglib.h @@ -204,81 +204,71 @@ auto make_scope_exit(EF&& exit_function) -> scope_exit { /* * Semantic values */ -struct SemanticValue -{ - any val; - const char* s; - size_t n; - - SemanticValue() - : s(nullptr), n(0) {} - - SemanticValue(const any& val, const char* s, size_t n) - : val(val), s(s), n(n) {} - - template - T& get() { - return val.get(); - } - - template - const T& get() const { - return val.get(); - } - - std::string str() const { - return std::string(s, n); - } - -}; - -struct SemanticValues : protected std::vector +struct SemanticValues : protected std::vector { const char* path; const char* ss; - const char* s; - size_t n; - size_t choice; + const char* c_str() const { return s_; } + size_t length() const { return n_; } + size_t choice() const { return choice_; } - SemanticValues() : s(nullptr), n(0), choice(0) {} + std::vector> tokens; - typedef SemanticValue T; - using std::vector::iterator; - using std::vector::const_iterator; - using std::vector::size; - using std::vector::empty; - using std::vector::assign; - using std::vector::begin; - using std::vector::end; - using std::vector::rbegin; - using std::vector::rend; - using std::vector::operator[]; - using std::vector::at; - using std::vector::resize; - using std::vector::front; - using std::vector::back; - using std::vector::push_back; - using std::vector::pop_back; - using std::vector::insert; - using std::vector::erase; - using std::vector::clear; - using std::vector::swap; - using std::vector::emplace; - using std::vector::emplace_back; + SemanticValues() : s_(nullptr), n_(0), choice_(0) {} + + using std::vector::iterator; + using std::vector::const_iterator; + using std::vector::size; + using std::vector::empty; + using std::vector::assign; + using std::vector::begin; + using std::vector::end; + using std::vector::rbegin; + using std::vector::rend; + using std::vector::operator[]; + using std::vector::at; + using std::vector::resize; + using std::vector::front; + using std::vector::back; + using std::vector::push_back; + using std::vector::pop_back; + using std::vector::insert; + using std::vector::erase; + using std::vector::clear; + using std::vector::swap; + using std::vector::emplace; + using std::vector::emplace_back; std::string str() const { - return std::string(s, n); + return std::string(s_, n_); + } + + std::string token(size_t id = 0) const { + if (!tokens.empty()) { + assert(id < tokens.size()); + const auto& tok = tokens[id]; + return std::string(tok.first, tok.second); + } + return std::string(s_, n_); } template auto transform(size_t beg = 0, size_t end = -1) const -> vector { - return this->transform(beg, end, [](const SemanticValue& v) { return v.get(); }); + return this->transform(beg, end, [](const any& v) { return v.get(); }); } private: + friend class Context; + friend class PrioritizedChoice; + friend class Holder; + + const char* s_; + size_t n_; + size_t choice_; + template - auto transform(F f) const -> vector::type> { - vector::type> r; + auto transform(F f) const -> vector::type> { + vector::type> r; for (const auto& v: *this) { r.emplace_back(f(v)); } @@ -286,8 +276,8 @@ private: } template - auto transform(size_t beg, size_t end, F f) const -> vector::type> { - vector::type> r; + auto transform(size_t beg, size_t end, F f) const -> vector::type> { + vector::type> r; end = (std::min)(end, size()); for (size_t i = beg; i < end; i++) { r.emplace_back(f((*this)[i])); @@ -316,20 +306,11 @@ any call(F fn, Args&&... args) { return fn(std::forward(args)...); } -template < - typename R, typename F, - typename std::enable_if::type, SemanticValue>::value>::type*& = enabler, - typename... Args> -any call(F fn, Args&&... args) { - return fn(std::forward(args)...).val; -} - template < typename R, typename F, typename std::enable_if< !std::is_void::value && - !std::is_same::type, any>::value && - !std::is_same::type, SemanticValue>::value>::type*& = enabler, + !std::is_same::type, any>::value>::type*& = enabler, typename... Args> any call(F fn, Args&&... args) { return any(fn(std::forward(args)...)); @@ -480,11 +461,11 @@ public: size_t value_stack_size; size_t nest_level; - std::vector definition_stack; + + bool in_token; std::shared_ptr whitespaceOpe; - bool in_whiltespace; - bool in_token; + bool in_whitespace; const size_t def_count; const bool enablePackratParsing; @@ -510,9 +491,9 @@ public: , message_pos(nullptr) , value_stack_size(0) , nest_level(0) - , whitespaceOpe(whitespaceOpe) - , in_whiltespace(false) , in_token(false) + , whitespaceOpe(whitespaceOpe) + , in_whitespace(false) , def_count(def_count) , enablePackratParsing(enablePackratParsing) , cache_register(enablePackratParsing ? def_count * (l + 1) : 0) @@ -563,8 +544,9 @@ public: } sv.path = path; sv.ss = s; - sv.s = nullptr; - sv.n = 0; + sv.s_ = nullptr; + sv.n_ = 0; + sv.tokens.clear(); return sv; } @@ -677,9 +659,10 @@ public: if (!chldsv.empty()) { sv.insert(sv.end(), chldsv.begin(), chldsv.end()); } - sv.s = chldsv.s; - sv.n = chldsv.n; - sv.choice = id; + sv.s_ = chldsv.c_str(); + sv.n_ = chldsv.length(); + sv.choice_ = id; + sv.tokens.insert(sv.tokens.end(), chldsv.tokens.begin(), chldsv.tokens.end()); return len; } id++; @@ -707,12 +690,16 @@ public: c.nest_level++; auto se = make_scope_exit([&]() { c.nest_level--; }); auto save_sv_size = sv.size(); + auto save_tok_size = sv.tokens.size(); const auto& rule = *ope_; auto len = rule.parse(s + i, n - i, sv, c, dt); if (fail(len)) { if (sv.size() != save_sv_size) { sv.erase(sv.begin() + save_sv_size); } + if (sv.tokens.size() != save_tok_size) { + sv.tokens.erase(sv.tokens.begin() + save_tok_size); + } c.error_pos = save_error_pos; break; } @@ -749,12 +736,16 @@ public: c.nest_level++; auto se = make_scope_exit([&]() { c.nest_level--; }); auto save_sv_size = sv.size(); + auto save_tok_size = sv.tokens.size(); const auto& rule = *ope_; auto len = rule.parse(s + i, n - i, sv, c, dt); if (fail(len)) { if (sv.size() != save_sv_size) { sv.erase(sv.begin() + save_sv_size); } + if (sv.tokens.size() != save_tok_size) { + sv.tokens.erase(sv.tokens.begin() + save_tok_size); + } c.error_pos = save_error_pos; break; } @@ -778,6 +769,7 @@ public: auto save_error_pos = c.error_pos; c.nest_level++; auto save_sv_size = sv.size(); + auto save_tok_size = sv.tokens.size(); auto se = make_scope_exit([&]() { c.nest_level--; }); const auto& rule = *ope_; auto len = rule.parse(s, n, sv, c, dt); @@ -787,6 +779,9 @@ public: if (sv.size() != save_sv_size) { sv.erase(sv.begin() + save_sv_size); } + if (sv.tokens.size() != save_tok_size) { + sv.tokens.erase(sv.tokens.begin() + save_tok_size); + } c.error_pos = save_error_pos; return 0; } @@ -968,15 +963,7 @@ class TokenBoundary : public Ope public: TokenBoundary(const std::shared_ptr& ope) : ope_(ope) {} - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - const auto& rule = *ope_; - auto len = rule.parse(s, n, sv, c, dt); - if (success(len)) { - sv.s = s; - sv.n = len; - } - return len; - } + size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override; void accept(Visitor& v) override; @@ -1085,11 +1072,11 @@ public: Whitespace(const std::shared_ptr& ope) : ope_(ope) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - if (c.in_whiltespace) { + if (c.in_whitespace) { return 0; } - c.in_whiltespace = true; - auto se = make_scope_exit([&]() { c.in_whiltespace = false; }); + c.in_whitespace = true; + auto se = make_scope_exit([&]() { c.in_whitespace = false; }); const auto& rule = *ope_; return rule.parse(s, n, sv, c, dt); } @@ -1280,8 +1267,8 @@ public: SemanticValues sv; any dt; auto r = parse_core(s, n, sv, dt, path); - if (r.ret && !sv.empty() && !sv.front().val.is_undefined()) { - val = sv[0].val.get(); + if (r.ret && !sv.empty() && !sv.front().is_undefined()) { + val = sv[0].get(); } return r; } @@ -1296,8 +1283,8 @@ public: Result parse_and_get_value(const char* s, size_t n, any& dt, T& val, const char* path = nullptr) const { SemanticValues sv; auto r = parse_core(s, n, sv, dt, path); - if (r.ret && !sv.empty() && !sv.front().val.is_undefined()) { - val = sv[0].val.get(); + if (r.ret && !sv.empty() && !sv.front().is_undefined()) { + val = sv[0].get(); } return r; } @@ -1336,7 +1323,7 @@ public: size_t id; Action action; std::function enter; - std::function exit; + std::function leave; std::function error_message; bool ignoreSemanticValue; std::shared_ptr whitespaceOpe; @@ -1355,8 +1342,13 @@ private: AssignIDToDefinition assignId; holder_->accept(assignId); + std::shared_ptr ope = holder_; + if (whitespaceOpe) { + ope = std::make_shared(whitespaceOpe, ope); + } + Context cxt(path, s, n, assignId.ids.size(), whitespaceOpe, enablePackratParsing, tracer); - auto len = holder_->parse(s, n, sv, cxt, dt); + auto len = ope->parse(s, n, sv, cxt, dt); return Result{ success(len), len, cxt.error_pos, cxt.message_pos, cxt.message }; } @@ -1379,18 +1371,38 @@ inline size_t LiteralString::parse(const char* s, size_t n, SemanticValues& sv, } // Skip whiltespace - const auto d = c.definition_stack.back(); - if (!d->is_token && c.whitespaceOpe) { - auto len = c.whitespaceOpe->parse(s + i, n - i, sv, c, dt); - if (fail(len)) { - return -1; + if (!c.in_token) { + if (c.whitespaceOpe) { + auto len = c.whitespaceOpe->parse(s + i, n - i, sv, c, dt); + if (fail(len)) { + return -1; + } + i += len; } - i += len; } return i; } +inline size_t TokenBoundary::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const { + c.in_token = true; + auto se = make_scope_exit([&]() { c.in_token = false; }); + const auto& rule = *ope_; + auto len = rule.parse(s, n, sv, c, dt); + if (success(len)) { + sv.tokens.push_back(std::make_pair(s, len)); + + if (c.whitespaceOpe) { + auto l = c.whitespaceOpe->parse(s + len, n - len, sv, c, dt); + if (fail(l)) { + return -1; + } + len += l; + } + } + return len; +} + inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const { if (!ope_) { throw std::logic_error("Uninitialized definition ope was used..."); @@ -1402,12 +1414,8 @@ inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context size_t len; any val; - const char* token_boundary_s = s; - size_t token_boundary_n = n; c.packrat(s, outer_->id, len, val, [&](any& val) { - c.definition_stack.push_back(outer_); - auto& chldsv = c.push(); if (outer_->enter) { @@ -1415,54 +1423,20 @@ inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context } auto se = make_scope_exit([&]() { - c.definition_stack.pop_back(); - c.pop(); - if (outer_->exit) { - outer_->exit(dt); + if (outer_->leave) { + outer_->leave(dt); } }); - auto ope = ope_; - - if (!c.in_token && c.whitespaceOpe) { - if (c.definition_stack.size() == 1) { - if (outer_->is_token && !outer_->has_token_boundary) { - ope = std::make_shared(c.whitespaceOpe, std::make_shared(ope_)); - } else { - ope = std::make_shared(c.whitespaceOpe, ope_); - } - } else if (outer_->is_token) { - if (!outer_->has_token_boundary) { - ope = std::make_shared(std::make_shared(ope_), c.whitespaceOpe); - } else { - ope = std::make_shared(ope_, c.whitespaceOpe); - } - } - } - - const auto& rule = *ope; - if (!c.in_token && outer_->is_token) { - c.in_token = true; - auto se = make_scope_exit([&]() { c.in_token = false; }); - - len = rule.parse(s, n, chldsv, c, dt); - } else { - len = rule.parse(s, n, chldsv, c, dt); - } - - token_boundary_n = len; + const auto& rule = *ope_; + len = rule.parse(s, n, chldsv, c, dt); // Invoke action if (success(len)) { - if (chldsv.s) { - token_boundary_s = chldsv.s; - token_boundary_n = chldsv.n; - } else { - chldsv.s = s; - chldsv.n = len; - } + chldsv.s_ = s; + chldsv.n_ = len; try { val = reduce(chldsv, dt); @@ -1480,7 +1454,7 @@ inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context if (success(len)) { if (!outer_->ignoreSemanticValue) { - sv.emplace_back(val, token_boundary_s, token_boundary_n); + sv.emplace_back(val); } } else { if (outer_->error_message) { @@ -1500,7 +1474,7 @@ inline any Holder::reduce(const SemanticValues& sv, any& dt) const { } else if (sv.empty()) { return any(); } else { - return sv.front().val; + return sv.front(); } } @@ -1625,7 +1599,7 @@ inline std::shared_ptr ref(const std::unordered_map wsp(const std::shared_ptr& ope) { - return std::make_shared(std::make_shared(ope)); + return std::make_shared(std::make_shared(ope)); } /*----------------------------------------------------------------------------- @@ -1815,7 +1789,7 @@ private: g["Suffix"] <= seq(g["Primary"], opt(cho(g["QUESTION"], g["STAR"], g["PLUS"]))); g["Primary"] <= cho(seq(opt(g["IGNORE"]), g["Identifier"], npd(g["LEFTARROW"])), seq(g["OPEN"], g["Expression"], g["CLOSE"]), - seq(g["Begin"], g["Expression"], g["End"]), + seq(g["BeginTok"], g["Expression"], g["EndTok"]), seq(g["BeginCap"], g["Expression"], g["EndCap"]), g["Literal"], g["Class"], g["DOT"]); @@ -1853,8 +1827,8 @@ private: g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r')); g["EndOfFile"] <= npd(dot()); - g["Begin"] <= seq(chr('<'), g["Spacing"]); - g["End"] <= seq(chr('>'), g["Spacing"]); + g["BeginTok"] <= seq(chr('<'), g["Spacing"]); + g["EndTok"] <= seq(chr('>'), g["Spacing"]); g["BeginCap"] <= seq(chr('$'), tok(opt(g["Identifier"])), chr('<'), g["Spacing"]); g["EndCap"] <= seq(lit(">"), g["Spacing"]); @@ -1888,7 +1862,7 @@ private: data.start = name; } } else { - data.duplicates.emplace_back(name, sv.s); + data.duplicates.emplace_back(name, sv.c_str()); } }; @@ -1955,7 +1929,7 @@ private: g["Primary"] = [&](const SemanticValues& sv, any& dt) -> std::shared_ptr { Data& data = *dt.get(); - switch (sv.choice) { + switch (sv.choice()) { case 0: { // Reference auto ignore = (sv.size() == 2); auto baseId = ignore ? 1 : 0; @@ -1963,13 +1937,13 @@ private: const auto& ident = sv[baseId].get(); if (!data.references.count(ident)) { - data.references[ident] = sv.s; // for error handling + data.references[ident] = sv.c_str(); // for error handling } if (ignore) { - return ign(ref(*data.grammar, ident, sv.s)); + return ign(ref(*data.grammar, ident, sv.c_str())); } else { - return ref(*data.grammar, ident, sv.s); + return ref(*data.grammar, ident, sv.c_str()); } } case 1: { // (Expression) @@ -1979,7 +1953,7 @@ private: return tok(sv[1].get>()); } case 3: { // Capture - auto name = std::string(sv[0].s, sv[0].n); + const auto& name = sv[0].get(); auto ope = sv[1].get>(); return cap(ope, data.match_action, ++data.capture_count, name); } @@ -1990,24 +1964,27 @@ private: }; g["IdentCont"] = [](const SemanticValues& sv) { - return std::string(sv.s, sv.n); + return std::string(sv.c_str(), sv.length()); }; g["Literal"] = [this](const SemanticValues& sv) { - return lit(resolve_escape_sequence(sv.s, sv.n)); + const auto& tok = sv.tokens.front(); + return lit(resolve_escape_sequence(tok.first, tok.second)); }; g["Class"] = [this](const SemanticValues& sv) { - return cls(resolve_escape_sequence(sv.s, sv.n)); + const auto& tok = sv.tokens.front(); + return cls(resolve_escape_sequence(tok.first, tok.second)); }; - g["AND"] = [](const SemanticValues& sv) { return *sv.s; }; - g["NOT"] = [](const SemanticValues& sv) { return *sv.s; }; - g["QUESTION"] = [](const SemanticValues& sv) { return *sv.s; }; - g["STAR"] = [](const SemanticValues& sv) { return *sv.s; }; - g["PLUS"] = [](const SemanticValues& sv) { return *sv.s; }; - + g["AND"] = [](const SemanticValues& sv) { return *sv.c_str(); }; + g["NOT"] = [](const SemanticValues& sv) { return *sv.c_str(); }; + g["QUESTION"] = [](const SemanticValues& sv) { return *sv.c_str(); }; + g["STAR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; + g["PLUS"] = [](const SemanticValues& sv) { return *sv.c_str(); }; g["DOT"] = [](const SemanticValues& sv) { return dot(); }; + + g["BeginCap"] = [](const SemanticValues& sv) { return sv.token(); }; } std::shared_ptr perform_core( @@ -2509,12 +2486,12 @@ public: if (!rule.action) { auto is_token = rule.is_token; rule.action = [=](const SemanticValues& sv) { + auto line = line_info(sv.ss, sv.c_str()); + if (is_token) { - auto line = line_info(sv.ss, sv.s); - return std::make_shared(sv.path, line.first, line.second, name.c_str(), std::string(sv.s, sv.n)); + return std::make_shared(sv.path, line.first, line.second, name.c_str(), sv.str()); } - auto line = line_info(sv.ss, sv.s); auto ast = std::make_shared(sv.path, line.first, line.second, name.c_str(), sv.transform>()); for (auto node: ast->nodes) { diff --git a/test/test.cc b/test/test.cc index 35c3175..62f2b2d 100644 --- a/test/test.cc +++ b/test/test.cc @@ -98,7 +98,7 @@ TEST_CASE("String capture test3", "[general]") std::vector tags; pg["TOKEN"] = [&](const SemanticValues& sv) { - tags.push_back(sv.str()); + tags.push_back(sv.token()); }; auto ret = pg.parse(" [tag1] [tag:2] [tag-3] "); @@ -205,7 +205,7 @@ TEST_CASE("Lambda action test", "[general]") string ss; parser["CHAR"] = [&](const SemanticValues& sv) { - ss += *sv.s; + ss += *sv.c_str(); }; bool ret = parser.parse("hello"); @@ -213,7 +213,7 @@ TEST_CASE("Lambda action test", "[general]") REQUIRE(ss == "hello"); } -TEST_CASE("enter/exit handlers test", "[general]") +TEST_CASE("enter/leave handlers test", "[general]") { parser parser(R"( START <- LTOKEN '=' RTOKEN @@ -226,7 +226,7 @@ TEST_CASE("enter/exit handlers test", "[general]") auto& require_upper_case = *dt.get(); require_upper_case = false; }; - parser["LTOKEN"].exit = [&](any& dt) { + parser["LTOKEN"].leave = [&](any& dt) { auto& require_upper_case = *dt.get(); require_upper_case = true; }; @@ -266,8 +266,8 @@ TEST_CASE("WHITESPACE test", "[general]") ITEM <- WORD / PHRASE # Tokens - WORD <- [a-zA-Z0-9_]+ - PHRASE <- '"' (!'"' .)* '"' + WORD <- < [a-zA-Z0-9_]+ > + PHRASE <- < '"' (!'"' .)* '"' > %whitespace <- [ \t\r\n]* )"); @@ -291,7 +291,7 @@ TEST_CASE("WHITESPACE test2", "[general]") vector items; parser["ITEM"] = [&](const SemanticValues& sv) { - items.push_back(sv.str()); + items.push_back(sv.token()); }; auto ret = parser.parse(R"([one], [two] ,[three] )"); @@ -324,7 +324,7 @@ TEST_CASE("Skip token test2", "[general]") { peg::parser parser(R"( ROOT <- ITEM (',' ITEM)* - ITEM <- ([a-z0-9])+ + ITEM <- < ([a-z0-9])+ > %whitespace <- [ \t]* )"); @@ -407,7 +407,7 @@ TEST_CASE("Simple calculator test", "[general]") parser parser(syntax); parser["Additive"] = [](const SemanticValues& sv) { - switch (sv.choice) { + switch (sv.choice()) { case 0: return sv[0].get() + sv[1].get(); default: @@ -416,7 +416,7 @@ TEST_CASE("Simple calculator test", "[general]") }; parser["Multitive"] = [](const SemanticValues& sv) { - switch (sv.choice) { + switch (sv.choice()) { case 0: return sv[0].get() * sv[1].get(); default: @@ -425,7 +425,7 @@ TEST_CASE("Simple calculator test", "[general]") }; parser["Number"] = [](const SemanticValues& sv) { - return atoi(sv.s); + return atoi(sv.c_str()); }; int val; @@ -448,10 +448,10 @@ TEST_CASE("Calculator test", "[general]") // Setup actions auto reduce = [](const SemanticValues& sv) -> long { - long ret = sv[0].val.get(); + long ret = sv[0].get(); for (auto i = 1u; i < sv.size(); i += 2) { - auto num = sv[i + 1].val.get(); - switch (sv[i].val.get()) { + auto num = sv[i + 1].get(); + switch (sv[i].get()) { case '+': ret += num; break; case '-': ret -= num; break; case '*': ret *= num; break; @@ -463,8 +463,8 @@ TEST_CASE("Calculator test", "[general]") EXPRESSION = reduce; TERM = reduce; - TERM_OPERATOR = [](const SemanticValues& sv) { return *sv.s; }; - FACTOR_OPERATOR = [](const SemanticValues& sv) { return *sv.s; }; + TERM_OPERATOR = [](const SemanticValues& sv) { return *sv.c_str(); }; + FACTOR_OPERATOR = [](const SemanticValues& sv) { return *sv.c_str(); }; NUMBER = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); }; // Parse @@ -494,10 +494,10 @@ TEST_CASE("Calculator test2", "[general]") // Setup actions auto reduce = [](const SemanticValues& sv) -> long { - long ret = sv[0].val.get(); + long ret = sv[0].get(); for (auto i = 1u; i < sv.size(); i += 2) { - auto num = sv[i + 1].val.get(); - switch (sv[i].val.get()) { + auto num = sv[i + 1].get(); + switch (sv[i].get()) { case '+': ret += num; break; case '-': ret -= num; break; case '*': ret *= num; break; @@ -509,8 +509,8 @@ TEST_CASE("Calculator test2", "[general]") g["EXPRESSION"] = reduce; g["TERM"] = reduce; - g["TERM_OPERATOR"] = [](const SemanticValues& sv) { return *sv.s; }; - g["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return *sv.s; }; + g["TERM_OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; + g["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); }; g["NUMBER"] = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); }; // Parse @@ -535,10 +535,10 @@ TEST_CASE("Calculator test3", "[general]") ); auto reduce = [](const SemanticValues& sv) -> long { - long ret = sv[0].val.get(); + long ret = sv[0].get(); for (auto i = 1u; i < sv.size(); i += 2) { - auto num = sv[i + 1].val.get(); - switch (sv[i].val.get()) { + auto num = sv[i + 1].get(); + switch (sv[i].get()) { case '+': ret += num; break; case '-': ret -= num; break; case '*': ret *= num; break; @@ -551,8 +551,8 @@ TEST_CASE("Calculator test3", "[general]") // Setup actions parser["EXPRESSION"] = reduce; parser["TERM"] = reduce; - parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.s; }; - parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.s; }; + parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.c_str(); }; + parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.c_str(); }; parser["NUMBER"] = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); }; // Parse @@ -805,7 +805,7 @@ TEST_CASE("Semantic predicate test", "[predicate]") parser parser("NUMBER <- [0-9]+"); parser["NUMBER"] = [](const SemanticValues& sv) { - auto val = stol(sv.str(), nullptr, 10); + auto val = stol(sv.token(), nullptr, 10); if (val != 100) { throw parse_error("value error!!"); }