mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2024-12-22 11:55:30 +00:00
Added %word rule
This commit is contained in:
parent
ca950520ef
commit
a77edadfa7
18
README.md
18
README.md
@ -15,8 +15,8 @@ The PEG syntax is well described on page 2 in the [document](http://www.brynosau
|
||||
* `\x20` (Hex number char)
|
||||
* `$<` ... `>` (Capture operator)
|
||||
* `$name<` ... `>` (Named capture operator)
|
||||
|
||||
This library also supports the linear-time parsing known as the [*Packrat*](http://pdos.csail.mit.edu/~baford/packrat/thesis/thesis.pdf) parsing.
|
||||
* `%whitespace` (Automatic whitespace skipping)
|
||||
* `%word` (Word expression)
|
||||
|
||||
If you need a Go language version, please see [*go-peg*](https://github.com/yhirose/go-peg).
|
||||
|
||||
@ -246,6 +246,20 @@ PHRASE <- < '"' (!'"' .)* '"' >
|
||||
%whitespace <- [ \t\r\n]*
|
||||
```
|
||||
|
||||
Word expression
|
||||
---------------
|
||||
|
||||
```cpp
|
||||
peg::parser parser(R"(
|
||||
ROOT <- 'hello' 'world'
|
||||
%whitespace <- [ \t\r\n]*
|
||||
%word <- [a-z]+
|
||||
)");
|
||||
|
||||
parser.parse("hello world") // OK
|
||||
parser.parse("helloworld") // NG
|
||||
```
|
||||
|
||||
AST generation
|
||||
--------------
|
||||
|
||||
|
45
peglib.h
45
peglib.h
@ -510,6 +510,8 @@ public:
|
||||
std::shared_ptr<Ope> whitespaceOpe;
|
||||
bool in_whitespace;
|
||||
|
||||
std::shared_ptr<Ope> wordOpe;
|
||||
|
||||
const size_t def_count;
|
||||
const bool enablePackratParsing;
|
||||
std::vector<bool> cache_registered;
|
||||
@ -525,6 +527,7 @@ public:
|
||||
size_t a_l,
|
||||
size_t a_def_count,
|
||||
std::shared_ptr<Ope> a_whitespaceOpe,
|
||||
std::shared_ptr<Ope> a_wordOpe,
|
||||
bool a_enablePackratParsing,
|
||||
Tracer a_tracer)
|
||||
: path(a_path)
|
||||
@ -537,6 +540,7 @@ public:
|
||||
, in_token(false)
|
||||
, whitespaceOpe(a_whitespaceOpe)
|
||||
, in_whitespace(false)
|
||||
, wordOpe(a_wordOpe)
|
||||
, def_count(a_def_count)
|
||||
, enablePackratParsing(a_enablePackratParsing)
|
||||
, cache_registered(enablePackratParsing ? def_count * (l + 1) : 0)
|
||||
@ -895,13 +899,19 @@ public:
|
||||
class LiteralString : public Ope
|
||||
{
|
||||
public:
|
||||
LiteralString(const std::string& s) : lit_(s) {}
|
||||
LiteralString(const std::string& s)
|
||||
: lit_(s)
|
||||
, init_is_word_(false)
|
||||
, is_word_(false)
|
||||
{}
|
||||
|
||||
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override;
|
||||
|
||||
void accept(Visitor& v) override;
|
||||
|
||||
std::string lit_;
|
||||
mutable bool init_is_word_;
|
||||
mutable bool is_word_;
|
||||
};
|
||||
|
||||
class CharacterClass : public Ope
|
||||
@ -1202,6 +1212,7 @@ struct IsToken : public Ope::Visitor
|
||||
};
|
||||
|
||||
static const char* WHITESPACE_DEFINITION_NAME = "%whitespace";
|
||||
static const char* WORD_DEFINITION_NAME = "%word";
|
||||
|
||||
/*
|
||||
* Definition
|
||||
@ -1239,6 +1250,7 @@ public:
|
||||
: name(std::move(rhs.name))
|
||||
, ignoreSemanticValue(rhs.ignoreSemanticValue)
|
||||
, whitespaceOpe(rhs.whitespaceOpe)
|
||||
, wordOpe(rhs.wordOpe)
|
||||
, enablePackratParsing(rhs.enablePackratParsing)
|
||||
, is_token(rhs.is_token)
|
||||
, has_token_boundary(rhs.has_token_boundary)
|
||||
@ -1358,6 +1370,7 @@ public:
|
||||
std::function<std::string ()> error_message;
|
||||
bool ignoreSemanticValue;
|
||||
std::shared_ptr<Ope> whitespaceOpe;
|
||||
std::shared_ptr<Ope> wordOpe;
|
||||
bool enablePackratParsing;
|
||||
bool is_token;
|
||||
bool has_token_boundary;
|
||||
@ -1378,7 +1391,7 @@ private:
|
||||
ope = std::make_shared<Sequence>(whitespaceOpe, ope);
|
||||
}
|
||||
|
||||
Context cxt(path, s, n, assignId.ids.size(), whitespaceOpe, enablePackratParsing, tracer);
|
||||
Context cxt(path, s, n, assignId.ids.size(), whitespaceOpe, wordOpe, enablePackratParsing, tracer);
|
||||
auto len = ope->parse(s, n, sv, cxt, dt);
|
||||
return Result{ success(len), len, cxt.error_pos, cxt.message_pos, cxt.message };
|
||||
}
|
||||
@ -1401,6 +1414,28 @@ inline size_t LiteralString::parse(const char* s, size_t n, SemanticValues& sv,
|
||||
}
|
||||
}
|
||||
|
||||
// Word check
|
||||
static Context dummy_c(nullptr, lit_.data(), lit_.size(), 0, nullptr, nullptr, false, nullptr);
|
||||
static SemanticValues dummy_sv;
|
||||
static any dummy_dt;
|
||||
|
||||
if (!init_is_word_) { // TODO: Protect with mutex
|
||||
if (c.wordOpe) {
|
||||
auto len = c.wordOpe->parse(lit_.data(), lit_.size(), dummy_sv, dummy_c, dummy_dt);
|
||||
is_word_ = success(len);
|
||||
}
|
||||
init_is_word_ = true;
|
||||
}
|
||||
|
||||
if (is_word_) {
|
||||
auto ope = std::make_shared<NotPredicate>(c.wordOpe);
|
||||
auto len = ope->parse(s + i, n - i, dummy_sv, dummy_c, dummy_dt);
|
||||
if (fail(len)) {
|
||||
return static_cast<size_t>(-1);
|
||||
}
|
||||
i += len;
|
||||
}
|
||||
|
||||
// Skip whiltespace
|
||||
if (!c.in_token) {
|
||||
if (c.whitespaceOpe) {
|
||||
@ -2068,6 +2103,12 @@ private:
|
||||
rule.whitespaceOpe = wsp((*data.grammar)[WHITESPACE_DEFINITION_NAME].get_core_operator());
|
||||
}
|
||||
|
||||
// Word expression
|
||||
if (grammar.count(WORD_DEFINITION_NAME)) {
|
||||
auto& rule = (*data.grammar)[start];
|
||||
rule.wordOpe = (*data.grammar)[WORD_DEFINITION_NAME].get_core_operator();
|
||||
}
|
||||
|
||||
return data.grammar;
|
||||
}
|
||||
|
||||
|
14
test/test.cc
14
test/test.cc
@ -255,6 +255,20 @@ TEST_CASE("WHITESPACE test2", "[general]")
|
||||
REQUIRE(items[2] == "three");
|
||||
}
|
||||
|
||||
TEST_CASE("Word expression test", "[general]") {
|
||||
peg::parser parser(R"(
|
||||
ROOT <- 'hello' ','? 'world'
|
||||
%whitespace <- [ \t\r\n]*
|
||||
%word <- [a-z]+
|
||||
)");
|
||||
|
||||
REQUIRE(parser.parse("helloworld") == false);
|
||||
REQUIRE(parser.parse("hello world") == true);
|
||||
REQUIRE(parser.parse("hello,world") == true);
|
||||
REQUIRE(parser.parse("hello, world") == true);
|
||||
REQUIRE(parser.parse("hello , world") == true);
|
||||
}
|
||||
|
||||
TEST_CASE("Skip token test", "[general]")
|
||||
{
|
||||
peg::parser parser(
|
||||
|
Loading…
Reference in New Issue
Block a user