Added %word rule

This commit is contained in:
yhirose 2018-07-13 17:26:57 -04:00
parent ca950520ef
commit a77edadfa7
3 changed files with 73 additions and 4 deletions

View File

@ -15,8 +15,8 @@ The PEG syntax is well described on page 2 in the [document](http://www.brynosau
* `\x20` (Hex number char)
* `$<` ... `>` (Capture operator)
* `$name<` ... `>` (Named capture operator)
This library also supports the linear-time parsing known as the [*Packrat*](http://pdos.csail.mit.edu/~baford/packrat/thesis/thesis.pdf) parsing.
* `%whitespace` (Automatic whitespace skipping)
* `%word` (Word expression)
If you need a Go language version, please see [*go-peg*](https://github.com/yhirose/go-peg).
@ -246,6 +246,20 @@ PHRASE <- < '"' (!'"' .)* '"' >
%whitespace <- [ \t\r\n]*
```
Word expression
---------------
```cpp
peg::parser parser(R"(
ROOT <- 'hello' 'world'
%whitespace <- [ \t\r\n]*
%word <- [a-z]+
)");
parser.parse("hello world") // OK
parser.parse("helloworld") // NG
```
AST generation
--------------

View File

@ -510,6 +510,8 @@ public:
std::shared_ptr<Ope> whitespaceOpe;
bool in_whitespace;
std::shared_ptr<Ope> wordOpe;
const size_t def_count;
const bool enablePackratParsing;
std::vector<bool> cache_registered;
@ -525,6 +527,7 @@ public:
size_t a_l,
size_t a_def_count,
std::shared_ptr<Ope> a_whitespaceOpe,
std::shared_ptr<Ope> a_wordOpe,
bool a_enablePackratParsing,
Tracer a_tracer)
: path(a_path)
@ -537,6 +540,7 @@ public:
, in_token(false)
, whitespaceOpe(a_whitespaceOpe)
, in_whitespace(false)
, wordOpe(a_wordOpe)
, def_count(a_def_count)
, enablePackratParsing(a_enablePackratParsing)
, cache_registered(enablePackratParsing ? def_count * (l + 1) : 0)
@ -895,13 +899,19 @@ public:
class LiteralString : public Ope
{
public:
LiteralString(const std::string& s) : lit_(s) {}
LiteralString(const std::string& s)
: lit_(s)
, init_is_word_(false)
, is_word_(false)
{}
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override;
void accept(Visitor& v) override;
std::string lit_;
mutable bool init_is_word_;
mutable bool is_word_;
};
class CharacterClass : public Ope
@ -1202,6 +1212,7 @@ struct IsToken : public Ope::Visitor
};
static const char* WHITESPACE_DEFINITION_NAME = "%whitespace";
static const char* WORD_DEFINITION_NAME = "%word";
/*
* Definition
@ -1239,6 +1250,7 @@ public:
: name(std::move(rhs.name))
, ignoreSemanticValue(rhs.ignoreSemanticValue)
, whitespaceOpe(rhs.whitespaceOpe)
, wordOpe(rhs.wordOpe)
, enablePackratParsing(rhs.enablePackratParsing)
, is_token(rhs.is_token)
, has_token_boundary(rhs.has_token_boundary)
@ -1358,6 +1370,7 @@ public:
std::function<std::string ()> error_message;
bool ignoreSemanticValue;
std::shared_ptr<Ope> whitespaceOpe;
std::shared_ptr<Ope> wordOpe;
bool enablePackratParsing;
bool is_token;
bool has_token_boundary;
@ -1378,7 +1391,7 @@ private:
ope = std::make_shared<Sequence>(whitespaceOpe, ope);
}
Context cxt(path, s, n, assignId.ids.size(), whitespaceOpe, enablePackratParsing, tracer);
Context cxt(path, s, n, assignId.ids.size(), whitespaceOpe, wordOpe, enablePackratParsing, tracer);
auto len = ope->parse(s, n, sv, cxt, dt);
return Result{ success(len), len, cxt.error_pos, cxt.message_pos, cxt.message };
}
@ -1401,6 +1414,28 @@ inline size_t LiteralString::parse(const char* s, size_t n, SemanticValues& sv,
}
}
// Word check
static Context dummy_c(nullptr, lit_.data(), lit_.size(), 0, nullptr, nullptr, false, nullptr);
static SemanticValues dummy_sv;
static any dummy_dt;
if (!init_is_word_) { // TODO: Protect with mutex
if (c.wordOpe) {
auto len = c.wordOpe->parse(lit_.data(), lit_.size(), dummy_sv, dummy_c, dummy_dt);
is_word_ = success(len);
}
init_is_word_ = true;
}
if (is_word_) {
auto ope = std::make_shared<NotPredicate>(c.wordOpe);
auto len = ope->parse(s + i, n - i, dummy_sv, dummy_c, dummy_dt);
if (fail(len)) {
return static_cast<size_t>(-1);
}
i += len;
}
// Skip whiltespace
if (!c.in_token) {
if (c.whitespaceOpe) {
@ -2068,6 +2103,12 @@ private:
rule.whitespaceOpe = wsp((*data.grammar)[WHITESPACE_DEFINITION_NAME].get_core_operator());
}
// Word expression
if (grammar.count(WORD_DEFINITION_NAME)) {
auto& rule = (*data.grammar)[start];
rule.wordOpe = (*data.grammar)[WORD_DEFINITION_NAME].get_core_operator();
}
return data.grammar;
}

View File

@ -255,6 +255,20 @@ TEST_CASE("WHITESPACE test2", "[general]")
REQUIRE(items[2] == "three");
}
TEST_CASE("Word expression test", "[general]") {
peg::parser parser(R"(
ROOT <- 'hello' ','? 'world'
%whitespace <- [ \t\r\n]*
%word <- [a-z]+
)");
REQUIRE(parser.parse("helloworld") == false);
REQUIRE(parser.parse("hello world") == true);
REQUIRE(parser.parse("hello,world") == true);
REQUIRE(parser.parse("hello, world") == true);
REQUIRE(parser.parse("hello , world") == true);
}
TEST_CASE("Skip token test", "[general]")
{
peg::parser parser(