Fixed problems with the %whitespace rule.

This commit is contained in:
yhirose 2015-12-03 18:59:12 -05:00
parent a9cea16325
commit 568bf15ed3
2 changed files with 97 additions and 17 deletions

View File

@ -476,8 +476,9 @@ class Definition;
typedef std::function<void (const char* name, const char* s, size_t n, const SemanticValues& sv, const Context& c, const any& dt)> Tracer; typedef std::function<void (const char* name, const char* s, size_t n, const SemanticValues& sv, const Context& c, const any& dt)> Tracer;
struct Context class Context
{ {
public:
const char* path; const char* path;
const char* s; const char* s;
const size_t l; const size_t l;
@ -492,7 +493,9 @@ struct Context
std::vector<std::shared_ptr<SemanticValues>> value_stack; std::vector<std::shared_ptr<SemanticValues>> value_stack;
size_t value_stack_size; size_t value_stack_size;
std::shared_ptr<Ope> whiteSpaceOpe; std::shared_ptr<Ope> whitespaceOpe;
bool in_whiltespace;
bool in_token;
const size_t def_count; const size_t def_count;
const bool enablePackratParsing; const bool enablePackratParsing;
@ -508,7 +511,7 @@ struct Context
const char* s, const char* s,
size_t l, size_t l,
size_t def_count, size_t def_count,
std::shared_ptr<Ope> whiteSpaceOpe, std::shared_ptr<Ope> whitespaceOpe,
bool enablePackratParsing, bool enablePackratParsing,
Tracer tracer) Tracer tracer)
: path(path) : path(path)
@ -516,7 +519,9 @@ struct Context
, l(l) , l(l)
, error_pos(nullptr) , error_pos(nullptr)
, message_pos(nullptr) , message_pos(nullptr)
, whiteSpaceOpe(whiteSpaceOpe) , whitespaceOpe(whitespaceOpe)
, in_whiltespace(false)
, in_token(false)
, nest_level(0) , nest_level(0)
, value_stack_size(0) , value_stack_size(0)
, def_count(def_count) , def_count(def_count)
@ -1071,6 +1076,26 @@ private:
mutable std::shared_ptr<Ope> rule_; mutable std::shared_ptr<Ope> rule_;
}; };
class Whitespace : public Ope
{
public:
Whitespace(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
if (c.in_whiltespace) {
return 0;
}
c.in_whiltespace = true;
auto se = make_scope_exit([&]() { c.in_whiltespace = false; });
const auto& rule = *ope_;
return rule.parse(s, n, sv, c, dt);
}
void accept(Visitor& v) override;
std::shared_ptr<Ope> ope_;
};
/* /*
* Visitor * Visitor
*/ */
@ -1094,6 +1119,7 @@ struct Ope::Visitor
virtual void visit(WeakHolder& ope) {} virtual void visit(WeakHolder& ope) {}
virtual void visit(Holder& ope) {} virtual void visit(Holder& ope) {}
virtual void visit(DefinitionReference& ope) {} virtual void visit(DefinitionReference& ope) {}
virtual void visit(Whitespace& ope) {}
}; };
struct AssignIDToDefinition : public Ope::Visitor struct AssignIDToDefinition : public Ope::Visitor
@ -1174,6 +1200,7 @@ public:
: ignoreSemanticValue(false) : ignoreSemanticValue(false)
, enablePackratParsing(false) , enablePackratParsing(false)
, is_token(false) , is_token(false)
, has_token_boundary(false)
, holder_(std::make_shared<Holder>(this)) {} , holder_(std::make_shared<Holder>(this)) {}
Definition(const Definition& rhs) Definition(const Definition& rhs)
@ -1181,6 +1208,7 @@ public:
, ignoreSemanticValue(false) , ignoreSemanticValue(false)
, enablePackratParsing(false) , enablePackratParsing(false)
, is_token(false) , is_token(false)
, has_token_boundary(false)
, holder_(rhs.holder_) , holder_(rhs.holder_)
{ {
holder_->outer_ = this; holder_->outer_ = this;
@ -1189,9 +1217,10 @@ public:
Definition(Definition&& rhs) Definition(Definition&& rhs)
: name(std::move(rhs.name)) : name(std::move(rhs.name))
, ignoreSemanticValue(rhs.ignoreSemanticValue) , ignoreSemanticValue(rhs.ignoreSemanticValue)
, whiteSpaceOpe(rhs.whiteSpaceOpe) , whitespaceOpe(rhs.whitespaceOpe)
, enablePackratParsing(rhs.enablePackratParsing) , enablePackratParsing(rhs.enablePackratParsing)
, is_token(rhs.is_token) , is_token(rhs.is_token)
, has_token_boundary(rhs.has_token_boundary)
, holder_(std::move(rhs.holder_)) , holder_(std::move(rhs.holder_))
{ {
holder_->outer_ = this; holder_->outer_ = this;
@ -1201,6 +1230,7 @@ public:
: ignoreSemanticValue(false) : ignoreSemanticValue(false)
, enablePackratParsing(false) , enablePackratParsing(false)
, is_token(false) , is_token(false)
, has_token_boundary(false)
, holder_(std::make_shared<Holder>(this)) , holder_(std::make_shared<Holder>(this))
{ {
*this <= ope; *this <= ope;
@ -1214,6 +1244,7 @@ public:
IsToken isToken; IsToken isToken;
ope->accept(isToken); ope->accept(isToken);
is_token = isToken.is_token(); is_token = isToken.is_token();
has_token_boundary = isToken.has_token_boundary;
holder_->ope_ = ope; holder_->ope_ = ope;
@ -1305,9 +1336,10 @@ public:
std::function<void (any& dt)> exit; std::function<void (any& dt)> exit;
std::function<std::string ()> error_message; std::function<std::string ()> error_message;
bool ignoreSemanticValue; bool ignoreSemanticValue;
std::shared_ptr<Ope> whiteSpaceOpe; std::shared_ptr<Ope> whitespaceOpe;
bool enablePackratParsing; bool enablePackratParsing;
bool is_token; bool is_token;
bool has_token_boundary;
Tracer tracer; Tracer tracer;
private: private:
@ -1320,7 +1352,7 @@ private:
AssignIDToDefinition assignId; AssignIDToDefinition assignId;
holder_->accept(assignId); holder_->accept(assignId);
Context cxt(path, s, n, assignId.ids.size(), whiteSpaceOpe, enablePackratParsing, tracer); Context cxt(path, s, n, assignId.ids.size(), whitespaceOpe, enablePackratParsing, tracer);
auto len = holder_->parse(s, n, sv, cxt, dt); auto len = holder_->parse(s, n, sv, cxt, dt);
return Result{ success(len), len, cxt.error_pos, cxt.message_pos, cxt.message }; return Result{ success(len), len, cxt.error_pos, cxt.message_pos, cxt.message };
} }
@ -1345,8 +1377,8 @@ inline size_t LiteralString::parse(const char* s, size_t n, SemanticValues& sv,
// Skip whiltespace // Skip whiltespace
const auto d = c.definition_stack.back(); const auto d = c.definition_stack.back();
if (!d->is_token && c.whiteSpaceOpe) { if (!d->is_token && c.whitespaceOpe) {
auto len = c.whiteSpaceOpe->parse(s + i, n - i, sv, c, dt); auto len = c.whitespaceOpe->parse(s + i, n - i, sv, c, dt);
if (fail(len)) { if (fail(len)) {
return -1; return -1;
} }
@ -1362,6 +1394,8 @@ inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context
} }
c.trace(outer_->name.c_str(), s, n, sv, dt); c.trace(outer_->name.c_str(), s, n, sv, dt);
c.nest_level++;
auto se = make_scope_exit([&]() { c.nest_level--; });
size_t len; size_t len;
any val; any val;
@ -1379,16 +1413,31 @@ inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context
auto ope = ope_; auto ope = ope_;
if (outer_->whiteSpaceOpe) { if (!c.in_token && c.whitespaceOpe) {
ope = std::make_shared<Sequence>(outer_->whiteSpaceOpe, ope_); if (c.definition_stack.size() == 1) {
} else if (outer_->is_token && c.whiteSpaceOpe) { if (outer_->is_token && !outer_->has_token_boundary) {
ope = std::make_shared<Sequence>(std::make_shared<TokenBoundary>(ope_), c.whiteSpaceOpe); ope = std::make_shared<Sequence>(c.whitespaceOpe, std::make_shared<TokenBoundary>(ope_));
} else {
ope = std::make_shared<Sequence>(c.whitespaceOpe, ope_);
}
} else if (outer_->is_token) {
if (!outer_->has_token_boundary) {
ope = std::make_shared<Sequence>(std::make_shared<TokenBoundary>(ope_), c.whitespaceOpe);
} else {
ope = std::make_shared<Sequence>(ope_, c.whitespaceOpe);
}
}
} }
c.nest_level++;
auto se = make_scope_exit([&]() { c.nest_level--; });
const auto& rule = *ope; const auto& rule = *ope;
if (!c.in_token && outer_->is_token) {
c.in_token = true;
auto se = make_scope_exit([&]() { c.in_token = false; });
len = rule.parse(s, n, chldsv, c, dt); len = rule.parse(s, n, chldsv, c, dt);
} else {
len = rule.parse(s, n, chldsv, c, dt);
}
token_boundary_n = len; token_boundary_n = len;
@ -1483,6 +1532,7 @@ inline void User::accept(Visitor& v) { v.visit(*this); }
inline void WeakHolder::accept(Visitor& v) { v.visit(*this); } inline void WeakHolder::accept(Visitor& v) { v.visit(*this); }
inline void Holder::accept(Visitor& v) { v.visit(*this); } inline void Holder::accept(Visitor& v) { v.visit(*this); }
inline void DefinitionReference::accept(Visitor& v) { v.visit(*this); } inline void DefinitionReference::accept(Visitor& v) { v.visit(*this); }
inline void Whitespace::accept(Visitor& v) { v.visit(*this); }
inline void AssignIDToDefinition::visit(Holder& ope) { inline void AssignIDToDefinition::visit(Holder& ope) {
auto p = (void*)ope.outer_; auto p = (void*)ope.outer_;
@ -1568,6 +1618,10 @@ inline std::shared_ptr<Ope> ref(const std::unordered_map<std::string, Definition
return std::make_shared<DefinitionReference>(grammar, name, s); return std::make_shared<DefinitionReference>(grammar, name, s);
} }
inline std::shared_ptr<Ope> wsp(const std::shared_ptr<Ope>& ope) {
return std::make_shared<Ignore>(std::make_shared<Whitespace>(ope));
}
/*----------------------------------------------------------------------------- /*-----------------------------------------------------------------------------
* PEG parser generator * PEG parser generator
*---------------------------------------------------------------------------*/ *---------------------------------------------------------------------------*/
@ -2054,7 +2108,7 @@ private:
// Automatic whitespace skipping // Automatic whitespace skipping
if (grammar.count(WHITESPACE_DEFINITION_NAME)) { if (grammar.count(WHITESPACE_DEFINITION_NAME)) {
auto& rule = (*data.grammar)[start]; auto& rule = (*data.grammar)[start];
rule.whiteSpaceOpe = (*data.grammar)[WHITESPACE_DEFINITION_NAME].get_core_operator(); rule.whitespaceOpe = wsp((*data.grammar)[WHITESPACE_DEFINITION_NAME].get_core_operator());
} }
return data.grammar; return data.grammar;

View File

@ -274,6 +274,32 @@ TEST_CASE("WHITESPACE test", "[general]")
REQUIRE(ret == true); REQUIRE(ret == true);
} }
TEST_CASE("WHITESPACE test2", "[general]")
{
peg::parser parser(R"(
# Rules
ROOT <- ITEM (',' ITEM)*
ITEM <- '[' < [a-zA-Z0-9_]+ > ']'
%whitespace <- (SPACE / TAB)*
SPACE <- ' '
TAB <- '\t'
)");
vector<string> items;
parser["ITEM"] = [&](const SemanticValues& sv) {
items.push_back(sv.str());
};
auto ret = parser.parse(R"([one], [two] ,[three] )");
REQUIRE(ret == true);
REQUIRE(items.size() == 3);
REQUIRE(items[0] == "one");
REQUIRE(items[1] == "two");
REQUIRE(items[2] == "three");
}
TEST_CASE("Skip token test", "[general]") TEST_CASE("Skip token test", "[general]")
{ {
peg::parser parser( peg::parser parser(