diff --git a/peglib.h b/peglib.h index 86cab9f..53576f7 100644 --- a/peglib.h +++ b/peglib.h @@ -476,8 +476,9 @@ class Definition; typedef std::function Tracer; -struct Context +class Context { +public: const char* path; const char* s; const size_t l; @@ -492,7 +493,9 @@ struct Context std::vector> value_stack; size_t value_stack_size; - std::shared_ptr whiteSpaceOpe; + std::shared_ptr whitespaceOpe; + bool in_whiltespace; + bool in_token; const size_t def_count; const bool enablePackratParsing; @@ -508,7 +511,7 @@ struct Context const char* s, size_t l, size_t def_count, - std::shared_ptr whiteSpaceOpe, + std::shared_ptr whitespaceOpe, bool enablePackratParsing, Tracer tracer) : path(path) @@ -516,7 +519,9 @@ struct Context , l(l) , error_pos(nullptr) , message_pos(nullptr) - , whiteSpaceOpe(whiteSpaceOpe) + , whitespaceOpe(whitespaceOpe) + , in_whiltespace(false) + , in_token(false) , nest_level(0) , value_stack_size(0) , def_count(def_count) @@ -1071,6 +1076,26 @@ private: mutable std::shared_ptr rule_; }; +class Whitespace : public Ope +{ +public: + Whitespace(const std::shared_ptr& ope) : ope_(ope) {} + + size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { + if (c.in_whiltespace) { + return 0; + } + c.in_whiltespace = true; + auto se = make_scope_exit([&]() { c.in_whiltespace = false; }); + const auto& rule = *ope_; + return rule.parse(s, n, sv, c, dt); + } + + void accept(Visitor& v) override; + + std::shared_ptr ope_; +}; + /* * Visitor */ @@ -1094,6 +1119,7 @@ struct Ope::Visitor virtual void visit(WeakHolder& ope) {} virtual void visit(Holder& ope) {} virtual void visit(DefinitionReference& ope) {} + virtual void visit(Whitespace& ope) {} }; struct AssignIDToDefinition : public Ope::Visitor @@ -1174,6 +1200,7 @@ public: : ignoreSemanticValue(false) , enablePackratParsing(false) , is_token(false) + , has_token_boundary(false) , holder_(std::make_shared(this)) {} Definition(const Definition& rhs) @@ -1181,6 +1208,7 @@ public: , ignoreSemanticValue(false) , enablePackratParsing(false) , is_token(false) + , has_token_boundary(false) , holder_(rhs.holder_) { holder_->outer_ = this; @@ -1189,9 +1217,10 @@ public: Definition(Definition&& rhs) : name(std::move(rhs.name)) , ignoreSemanticValue(rhs.ignoreSemanticValue) - , whiteSpaceOpe(rhs.whiteSpaceOpe) + , whitespaceOpe(rhs.whitespaceOpe) , enablePackratParsing(rhs.enablePackratParsing) , is_token(rhs.is_token) + , has_token_boundary(rhs.has_token_boundary) , holder_(std::move(rhs.holder_)) { holder_->outer_ = this; @@ -1201,6 +1230,7 @@ public: : ignoreSemanticValue(false) , enablePackratParsing(false) , is_token(false) + , has_token_boundary(false) , holder_(std::make_shared(this)) { *this <= ope; @@ -1214,6 +1244,7 @@ public: IsToken isToken; ope->accept(isToken); is_token = isToken.is_token(); + has_token_boundary = isToken.has_token_boundary; holder_->ope_ = ope; @@ -1305,9 +1336,10 @@ public: std::function exit; std::function error_message; bool ignoreSemanticValue; - std::shared_ptr whiteSpaceOpe; + std::shared_ptr whitespaceOpe; bool enablePackratParsing; bool is_token; + bool has_token_boundary; Tracer tracer; private: @@ -1320,7 +1352,7 @@ private: AssignIDToDefinition assignId; holder_->accept(assignId); - Context cxt(path, s, n, assignId.ids.size(), whiteSpaceOpe, enablePackratParsing, tracer); + Context cxt(path, s, n, assignId.ids.size(), whitespaceOpe, enablePackratParsing, tracer); auto len = holder_->parse(s, n, sv, cxt, dt); return Result{ success(len), len, cxt.error_pos, cxt.message_pos, cxt.message }; } @@ -1345,8 +1377,8 @@ inline size_t LiteralString::parse(const char* s, size_t n, SemanticValues& sv, // Skip whiltespace const auto d = c.definition_stack.back(); - if (!d->is_token && c.whiteSpaceOpe) { - auto len = c.whiteSpaceOpe->parse(s + i, n - i, sv, c, dt); + if (!d->is_token && c.whitespaceOpe) { + auto len = c.whitespaceOpe->parse(s + i, n - i, sv, c, dt); if (fail(len)) { return -1; } @@ -1362,6 +1394,8 @@ inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context } c.trace(outer_->name.c_str(), s, n, sv, dt); + c.nest_level++; + auto se = make_scope_exit([&]() { c.nest_level--; }); size_t len; any val; @@ -1379,16 +1413,31 @@ inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context auto ope = ope_; - if (outer_->whiteSpaceOpe) { - ope = std::make_shared(outer_->whiteSpaceOpe, ope_); - } else if (outer_->is_token && c.whiteSpaceOpe) { - ope = std::make_shared(std::make_shared(ope_), c.whiteSpaceOpe); + if (!c.in_token && c.whitespaceOpe) { + if (c.definition_stack.size() == 1) { + if (outer_->is_token && !outer_->has_token_boundary) { + ope = std::make_shared(c.whitespaceOpe, std::make_shared(ope_)); + } else { + ope = std::make_shared(c.whitespaceOpe, ope_); + } + } else if (outer_->is_token) { + if (!outer_->has_token_boundary) { + ope = std::make_shared(std::make_shared(ope_), c.whitespaceOpe); + } else { + ope = std::make_shared(ope_, c.whitespaceOpe); + } + } } - c.nest_level++; - auto se = make_scope_exit([&]() { c.nest_level--; }); const auto& rule = *ope; - len = rule.parse(s, n, chldsv, c, dt); + if (!c.in_token && outer_->is_token) { + c.in_token = true; + auto se = make_scope_exit([&]() { c.in_token = false; }); + + len = rule.parse(s, n, chldsv, c, dt); + } else { + len = rule.parse(s, n, chldsv, c, dt); + } token_boundary_n = len; @@ -1483,6 +1532,7 @@ inline void User::accept(Visitor& v) { v.visit(*this); } inline void WeakHolder::accept(Visitor& v) { v.visit(*this); } inline void Holder::accept(Visitor& v) { v.visit(*this); } inline void DefinitionReference::accept(Visitor& v) { v.visit(*this); } +inline void Whitespace::accept(Visitor& v) { v.visit(*this); } inline void AssignIDToDefinition::visit(Holder& ope) { auto p = (void*)ope.outer_; @@ -1568,6 +1618,10 @@ inline std::shared_ptr ref(const std::unordered_map(grammar, name, s); } +inline std::shared_ptr wsp(const std::shared_ptr& ope) { + return std::make_shared(std::make_shared(ope)); +} + /*----------------------------------------------------------------------------- * PEG parser generator *---------------------------------------------------------------------------*/ @@ -2054,7 +2108,7 @@ private: // Automatic whitespace skipping if (grammar.count(WHITESPACE_DEFINITION_NAME)) { auto& rule = (*data.grammar)[start]; - rule.whiteSpaceOpe = (*data.grammar)[WHITESPACE_DEFINITION_NAME].get_core_operator(); + rule.whitespaceOpe = wsp((*data.grammar)[WHITESPACE_DEFINITION_NAME].get_core_operator()); } return data.grammar; diff --git a/test/test.cc b/test/test.cc index 6186fc2..ec1f57d 100644 --- a/test/test.cc +++ b/test/test.cc @@ -274,6 +274,32 @@ TEST_CASE("WHITESPACE test", "[general]") REQUIRE(ret == true); } +TEST_CASE("WHITESPACE test2", "[general]") +{ + peg::parser parser(R"( + # Rules + ROOT <- ITEM (',' ITEM)* + ITEM <- '[' < [a-zA-Z0-9_]+ > ']' + + %whitespace <- (SPACE / TAB)* + SPACE <- ' ' + TAB <- '\t' + )"); + + vector items; + parser["ITEM"] = [&](const SemanticValues& sv) { + items.push_back(sv.str()); + }; + + auto ret = parser.parse(R"([one], [two] ,[three] )"); + + REQUIRE(ret == true); + REQUIRE(items.size() == 3); + REQUIRE(items[0] == "one"); + REQUIRE(items[1] == "two"); + REQUIRE(items[2] == "three"); +} + TEST_CASE("Skip token test", "[general]") { peg::parser parser(