mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2024-12-22 20:05:31 +00:00
Major change in the way to handle token boundary.
This commit is contained in:
parent
9ce4597ef6
commit
5b88443270
69
README.md
69
README.md
@ -35,7 +35,7 @@ int main(void) {
|
|||||||
Additive <- Multitive '+' Additive / Multitive
|
Additive <- Multitive '+' Additive / Multitive
|
||||||
Multitive <- Primary '*' Multitive / Primary
|
Multitive <- Primary '*' Multitive / Primary
|
||||||
Primary <- '(' Additive ')' / Number
|
Primary <- '(' Additive ')' / Number
|
||||||
Number <- [0-9]+
|
Number <- < [0-9]+ >
|
||||||
%whitespace <- [ \t]*
|
%whitespace <- [ \t]*
|
||||||
)";
|
)";
|
||||||
|
|
||||||
@ -43,7 +43,7 @@ int main(void) {
|
|||||||
|
|
||||||
// (3) Setup an action
|
// (3) Setup an action
|
||||||
parser["Additive"] = [](const SemanticValues& sv) {
|
parser["Additive"] = [](const SemanticValues& sv) {
|
||||||
switch (sv.choice) {
|
switch (sv.choice()) {
|
||||||
case 0: // "Multitive '+' Additive"
|
case 0: // "Multitive '+' Additive"
|
||||||
return sv[0].get<int>() + sv[1].get<int>();
|
return sv[0].get<int>() + sv[1].get<int>();
|
||||||
default: // "Multitive"
|
default: // "Multitive"
|
||||||
@ -52,7 +52,7 @@ int main(void) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
parser["Multitive"] = [](const SemanticValues& sv) {
|
parser["Multitive"] = [](const SemanticValues& sv) {
|
||||||
switch (sv.choice) {
|
switch (sv.choice()) {
|
||||||
case 0: // "Primary '*' Multitive"
|
case 0: // "Primary '*' Multitive"
|
||||||
return sv[0].get<int>() * sv[1].get<int>();
|
return sv[0].get<int>() * sv[1].get<int>();
|
||||||
default: // "Primary"
|
default: // "Primary"
|
||||||
@ -61,11 +61,11 @@ int main(void) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
parser["Number"] = [](const SemanticValues& sv) {
|
parser["Number"] = [](const SemanticValues& sv) {
|
||||||
return stoi(sv.str(), nullptr, 10);
|
return stoi(sv.token(), nullptr, 10);
|
||||||
};
|
};
|
||||||
|
|
||||||
// (4) Parse
|
// (4) Parse
|
||||||
parser.packrat_parsing(); // Enable packrat parsing.
|
parser.enable_packrat_parsing(); // Enable packrat parsing.
|
||||||
|
|
||||||
int val;
|
int val;
|
||||||
parser.parse(" (1 + 2) * 3 ", val);
|
parser.parse(" (1 + 2) * 3 ", val);
|
||||||
@ -84,28 +84,24 @@ Here are available actions:
|
|||||||
`const SemanticValues& sv` contains semantic values. `SemanticValues` structure is defined as follows.
|
`const SemanticValues& sv` contains semantic values. `SemanticValues` structure is defined as follows.
|
||||||
|
|
||||||
```cpp
|
```cpp
|
||||||
struct SemanticValue {
|
struct SemanticValues : protected std::vector<any>
|
||||||
any val; // Semantic value
|
|
||||||
const char* name; // Definition name for the sematic value
|
|
||||||
const char* s; // Token start for the semantic value
|
|
||||||
size_t n; // Token length for the semantic value
|
|
||||||
|
|
||||||
// Cast semantic value
|
|
||||||
template <typename T> T& get();
|
|
||||||
template <typename T> const T& get() const;
|
|
||||||
|
|
||||||
// Get token
|
|
||||||
std::string str() const;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct SemanticValues : protected std::vector<SemanticValue>
|
|
||||||
{
|
{
|
||||||
const char* s; // Token start
|
// Matched string
|
||||||
size_t n; // Token length
|
std::string str() const; // Matched string
|
||||||
size_t choice; // Choice number (0 based index)
|
const char* c_str() const; // Matched string start
|
||||||
|
size_t length() const; // Matched string length
|
||||||
|
|
||||||
// Get token
|
// Tokens
|
||||||
std::string str() const;
|
std::vector<
|
||||||
|
std::pair<
|
||||||
|
const char*, // Token start
|
||||||
|
size_t>> // Token length
|
||||||
|
tokens;
|
||||||
|
|
||||||
|
std::string token(size_t id = 0) const;
|
||||||
|
|
||||||
|
// Choice number (0 based index)
|
||||||
|
size_t choice() const;
|
||||||
|
|
||||||
// Transform the semantic value vector to another vector
|
// Transform the semantic value vector to another vector
|
||||||
template <typename T> vector<T> transform(size_t beg = 0, size_t end = -1) const;
|
template <typename T> vector<T> transform(size_t beg = 0, size_t end = -1) const;
|
||||||
@ -114,11 +110,9 @@ struct SemanticValues : protected std::vector<SemanticValue>
|
|||||||
|
|
||||||
`peg::any` class is very similar to [boost::any](http://www.boost.org/doc/libs/1_57_0/doc/html/any.html). You can obtain a value by castning it to the actual type. In order to determine the actual type, you have to check the return value type of the child action for the semantic value.
|
`peg::any` class is very similar to [boost::any](http://www.boost.org/doc/libs/1_57_0/doc/html/any.html). You can obtain a value by castning it to the actual type. In order to determine the actual type, you have to check the return value type of the child action for the semantic value.
|
||||||
|
|
||||||
`const char* s, size_t n` gives a pointer and length of the matched string. This is same as `sv.s` and `sv.n`.
|
|
||||||
|
|
||||||
`any& dt` is a data object which can be used by the user for whatever purposes.
|
`any& dt` is a data object which can be used by the user for whatever purposes.
|
||||||
|
|
||||||
The following example uses `<` ... ` >` operators. They are the *token boundary* operators. Each token boundary operator creates a semantic value that contains `const char*` of the position. It could be useful to eliminate unnecessary characters.
|
The following example uses `<` ... ` >` operators. They are the *token boundary* operators.
|
||||||
|
|
||||||
```cpp
|
```cpp
|
||||||
auto syntax = R"(
|
auto syntax = R"(
|
||||||
@ -131,7 +125,7 @@ peg pg(syntax);
|
|||||||
|
|
||||||
pg["TOKEN"] = [](const SemanticValues& sv) {
|
pg["TOKEN"] = [](const SemanticValues& sv) {
|
||||||
// 'token' doesn't include trailing whitespaces
|
// 'token' doesn't include trailing whitespaces
|
||||||
auto token = sv.str();
|
auto token = sv.token();
|
||||||
};
|
};
|
||||||
|
|
||||||
auto ret = pg.parse(" token1, token2 ");
|
auto ret = pg.parse(" token1, token2 ");
|
||||||
@ -185,19 +179,19 @@ ret = parser.parse("200", val);
|
|||||||
assert(ret == false);
|
assert(ret == false);
|
||||||
```
|
```
|
||||||
|
|
||||||
*before* and *after* actions are also avalable.
|
*enter* and *leave* actions are also avalable.
|
||||||
|
|
||||||
```cpp
|
```cpp
|
||||||
parser["RULE"].before = [](any& dt) {
|
parser["RULE"].enter = [](any& dt) {
|
||||||
std::cout << "before" << std::endl;
|
std::cout << "enter" << std::endl;
|
||||||
};
|
};
|
||||||
|
|
||||||
parser["RULE"] = [](const SemanticValues& sv, any& dt) {
|
parser["RULE"] = [](const SemanticValues& sv, any& dt) {
|
||||||
std::cout << "action!" << std::endl;
|
std::cout << "action!" << std::endl;
|
||||||
};
|
};
|
||||||
|
|
||||||
parser["RULE"].after = [](any& dt) {
|
parser["RULE"].leave = [](any& dt) {
|
||||||
std::cout << "after" << std::endl;
|
std::cout << "leave" << std::endl;
|
||||||
};
|
};
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -216,7 +210,7 @@ These are valid tokens:
|
|||||||
|
|
||||||
```
|
```
|
||||||
KEYWORD <- 'keyword'
|
KEYWORD <- 'keyword'
|
||||||
WORD <- [a-zA-Z0-9] [a-zA-Z0-9-_]* # no reference rule is used
|
WORD <- < [a-zA-Z0-9] [a-zA-Z0-9-_]* > # token boundary operator is used.
|
||||||
IDNET <- < IDENT_START_CHAR IDENT_CHAR* > # token boundary operator is used.
|
IDNET <- < IDENT_START_CHAR IDENT_CHAR* > # token boundary operator is used.
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -225,8 +219,8 @@ The following grammar accepts ` one, "two three", four `.
|
|||||||
```
|
```
|
||||||
ROOT <- ITEM (',' ITEM)*
|
ROOT <- ITEM (',' ITEM)*
|
||||||
ITEM <- WORD / PHRASE
|
ITEM <- WORD / PHRASE
|
||||||
WORD <- [a-z]+
|
WORD <- < [a-z]+ >
|
||||||
PHRASE <- '"' (!'"' .)* '"'
|
PHRASE <- < '"' (!'"' .)* '"' >
|
||||||
|
|
||||||
%whitespace <- [ \t\r\n]*
|
%whitespace <- [ \t\r\n]*
|
||||||
```
|
```
|
||||||
@ -413,7 +407,6 @@ Tested compilers
|
|||||||
TODO
|
TODO
|
||||||
----
|
----
|
||||||
|
|
||||||
* Semantic predicate (`&{ expr }` and `!{ expr }`)
|
|
||||||
* Unicode support (`.` matches a Unicode char. `\u????`, `\p{L}`)
|
* Unicode support (`.` matches a Unicode char. `\u????`, `\p{L}`)
|
||||||
* Allow `←` and `ε`
|
* Allow `←` and `ε`
|
||||||
|
|
||||||
|
@ -10,3 +10,6 @@ target_link_libraries(calc2 pthread)
|
|||||||
|
|
||||||
add_executable(calc3 calc3.cc)
|
add_executable(calc3 calc3.cc)
|
||||||
target_link_libraries(calc3 pthread)
|
target_link_libraries(calc3 pthread)
|
||||||
|
|
||||||
|
add_executable(calc_readme calc_readme.cc)
|
||||||
|
target_link_libraries(calc_readme pthread)
|
||||||
|
@ -46,9 +46,9 @@ int main(int argc, const char** argv)
|
|||||||
|
|
||||||
parser["EXPRESSION"] = reduce;
|
parser["EXPRESSION"] = reduce;
|
||||||
parser["TERM"] = reduce;
|
parser["TERM"] = reduce;
|
||||||
parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.s; };
|
parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.c_str(); };
|
||||||
parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.s; };
|
parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.c_str(); };
|
||||||
parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.s); };
|
parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); };
|
||||||
|
|
||||||
auto expr = argv[1];
|
auto expr = argv[1];
|
||||||
long val = 0;
|
long val = 0;
|
||||||
|
@ -49,9 +49,9 @@ int main(int argc, const char** argv)
|
|||||||
EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce;
|
EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce;
|
||||||
TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce;
|
TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce;
|
||||||
FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')')));
|
FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')')));
|
||||||
TERM_OPERATOR <= cls("+-"), [](const SemanticValues& sv) { return (char)*sv.s; };
|
TERM_OPERATOR <= cls("+-"), [](const SemanticValues& sv) { return (char)*sv.c_str(); };
|
||||||
FACTOR_OPERATOR <= cls("*/"), [](const SemanticValues& sv) { return (char)*sv.s; };
|
FACTOR_OPERATOR <= cls("*/"), [](const SemanticValues& sv) { return (char)*sv.c_str(); };
|
||||||
NUMBER <= oom(cls("0-9")), [](const SemanticValues& sv) { return atol(sv.s); };
|
NUMBER <= oom(cls("0-9")), [](const SemanticValues& sv) { return atol(sv.c_str()); };
|
||||||
|
|
||||||
auto expr = argv[1];
|
auto expr = argv[1];
|
||||||
long val = 0;
|
long val = 0;
|
||||||
|
@ -44,9 +44,9 @@ int main(int argc, const char** argv)
|
|||||||
TERM <- FACTOR (FACTOR_OPERATOR FACTOR)*
|
TERM <- FACTOR (FACTOR_OPERATOR FACTOR)*
|
||||||
FACTOR <- NUMBER / '(' EXPRESSION ')'
|
FACTOR <- NUMBER / '(' EXPRESSION ')'
|
||||||
|
|
||||||
TERM_OPERATOR <- [-+]
|
TERM_OPERATOR <- < [-+] >
|
||||||
FACTOR_OPERATOR <- [/*]
|
FACTOR_OPERATOR <- < [/*] >
|
||||||
NUMBER <- [0-9]+
|
NUMBER <- < [0-9]+ >
|
||||||
|
|
||||||
%whitespace <- [ \t\r\n]*
|
%whitespace <- [ \t\r\n]*
|
||||||
)");
|
)");
|
||||||
|
325
peglib.h
325
peglib.h
@ -204,81 +204,71 @@ auto make_scope_exit(EF&& exit_function) -> scope_exit<EF> {
|
|||||||
/*
|
/*
|
||||||
* Semantic values
|
* Semantic values
|
||||||
*/
|
*/
|
||||||
struct SemanticValue
|
struct SemanticValues : protected std::vector<any>
|
||||||
{
|
|
||||||
any val;
|
|
||||||
const char* s;
|
|
||||||
size_t n;
|
|
||||||
|
|
||||||
SemanticValue()
|
|
||||||
: s(nullptr), n(0) {}
|
|
||||||
|
|
||||||
SemanticValue(const any& val, const char* s, size_t n)
|
|
||||||
: val(val), s(s), n(n) {}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
T& get() {
|
|
||||||
return val.get<T>();
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
const T& get() const {
|
|
||||||
return val.get<T>();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string str() const {
|
|
||||||
return std::string(s, n);
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
struct SemanticValues : protected std::vector<SemanticValue>
|
|
||||||
{
|
{
|
||||||
const char* path;
|
const char* path;
|
||||||
const char* ss;
|
const char* ss;
|
||||||
const char* s;
|
const char* c_str() const { return s_; }
|
||||||
size_t n;
|
size_t length() const { return n_; }
|
||||||
size_t choice;
|
size_t choice() const { return choice_; }
|
||||||
|
|
||||||
SemanticValues() : s(nullptr), n(0), choice(0) {}
|
std::vector<std::pair<const char*, size_t>> tokens;
|
||||||
|
|
||||||
typedef SemanticValue T;
|
SemanticValues() : s_(nullptr), n_(0), choice_(0) {}
|
||||||
using std::vector<T>::iterator;
|
|
||||||
using std::vector<T>::const_iterator;
|
using std::vector<any>::iterator;
|
||||||
using std::vector<T>::size;
|
using std::vector<any>::const_iterator;
|
||||||
using std::vector<T>::empty;
|
using std::vector<any>::size;
|
||||||
using std::vector<T>::assign;
|
using std::vector<any>::empty;
|
||||||
using std::vector<T>::begin;
|
using std::vector<any>::assign;
|
||||||
using std::vector<T>::end;
|
using std::vector<any>::begin;
|
||||||
using std::vector<T>::rbegin;
|
using std::vector<any>::end;
|
||||||
using std::vector<T>::rend;
|
using std::vector<any>::rbegin;
|
||||||
using std::vector<T>::operator[];
|
using std::vector<any>::rend;
|
||||||
using std::vector<T>::at;
|
using std::vector<any>::operator[];
|
||||||
using std::vector<T>::resize;
|
using std::vector<any>::at;
|
||||||
using std::vector<T>::front;
|
using std::vector<any>::resize;
|
||||||
using std::vector<T>::back;
|
using std::vector<any>::front;
|
||||||
using std::vector<T>::push_back;
|
using std::vector<any>::back;
|
||||||
using std::vector<T>::pop_back;
|
using std::vector<any>::push_back;
|
||||||
using std::vector<T>::insert;
|
using std::vector<any>::pop_back;
|
||||||
using std::vector<T>::erase;
|
using std::vector<any>::insert;
|
||||||
using std::vector<T>::clear;
|
using std::vector<any>::erase;
|
||||||
using std::vector<T>::swap;
|
using std::vector<any>::clear;
|
||||||
using std::vector<T>::emplace;
|
using std::vector<any>::swap;
|
||||||
using std::vector<T>::emplace_back;
|
using std::vector<any>::emplace;
|
||||||
|
using std::vector<any>::emplace_back;
|
||||||
|
|
||||||
std::string str() const {
|
std::string str() const {
|
||||||
return std::string(s, n);
|
return std::string(s_, n_);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string token(size_t id = 0) const {
|
||||||
|
if (!tokens.empty()) {
|
||||||
|
assert(id < tokens.size());
|
||||||
|
const auto& tok = tokens[id];
|
||||||
|
return std::string(tok.first, tok.second);
|
||||||
|
}
|
||||||
|
return std::string(s_, n_);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
auto transform(size_t beg = 0, size_t end = -1) const -> vector<T> {
|
auto transform(size_t beg = 0, size_t end = -1) const -> vector<T> {
|
||||||
return this->transform(beg, end, [](const SemanticValue& v) { return v.get<T>(); });
|
return this->transform(beg, end, [](const any& v) { return v.get<T>(); });
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
friend class Context;
|
||||||
|
friend class PrioritizedChoice;
|
||||||
|
friend class Holder;
|
||||||
|
|
||||||
|
const char* s_;
|
||||||
|
size_t n_;
|
||||||
|
size_t choice_;
|
||||||
|
|
||||||
template <typename F>
|
template <typename F>
|
||||||
auto transform(F f) const -> vector<typename std::remove_const<decltype(f(SemanticValue()))>::type> {
|
auto transform(F f) const -> vector<typename std::remove_const<decltype(f(any()))>::type> {
|
||||||
vector<typename std::remove_const<decltype(f(SemanticValue()))>::type> r;
|
vector<typename std::remove_const<decltype(f(any()))>::type> r;
|
||||||
for (const auto& v: *this) {
|
for (const auto& v: *this) {
|
||||||
r.emplace_back(f(v));
|
r.emplace_back(f(v));
|
||||||
}
|
}
|
||||||
@ -286,8 +276,8 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename F>
|
template <typename F>
|
||||||
auto transform(size_t beg, size_t end, F f) const -> vector<typename std::remove_const<decltype(f(SemanticValue()))>::type> {
|
auto transform(size_t beg, size_t end, F f) const -> vector<typename std::remove_const<decltype(f(any()))>::type> {
|
||||||
vector<typename std::remove_const<decltype(f(SemanticValue()))>::type> r;
|
vector<typename std::remove_const<decltype(f(any()))>::type> r;
|
||||||
end = (std::min)(end, size());
|
end = (std::min)(end, size());
|
||||||
for (size_t i = beg; i < end; i++) {
|
for (size_t i = beg; i < end; i++) {
|
||||||
r.emplace_back(f((*this)[i]));
|
r.emplace_back(f((*this)[i]));
|
||||||
@ -316,20 +306,11 @@ any call(F fn, Args&&... args) {
|
|||||||
return fn(std::forward<Args>(args)...);
|
return fn(std::forward<Args>(args)...);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <
|
|
||||||
typename R, typename F,
|
|
||||||
typename std::enable_if<std::is_same<typename std::remove_cv<R>::type, SemanticValue>::value>::type*& = enabler,
|
|
||||||
typename... Args>
|
|
||||||
any call(F fn, Args&&... args) {
|
|
||||||
return fn(std::forward<Args>(args)...).val;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <
|
template <
|
||||||
typename R, typename F,
|
typename R, typename F,
|
||||||
typename std::enable_if<
|
typename std::enable_if<
|
||||||
!std::is_void<R>::value &&
|
!std::is_void<R>::value &&
|
||||||
!std::is_same<typename std::remove_cv<R>::type, any>::value &&
|
!std::is_same<typename std::remove_cv<R>::type, any>::value>::type*& = enabler,
|
||||||
!std::is_same<typename std::remove_cv<R>::type, SemanticValue>::value>::type*& = enabler,
|
|
||||||
typename... Args>
|
typename... Args>
|
||||||
any call(F fn, Args&&... args) {
|
any call(F fn, Args&&... args) {
|
||||||
return any(fn(std::forward<Args>(args)...));
|
return any(fn(std::forward<Args>(args)...));
|
||||||
@ -480,11 +461,11 @@ public:
|
|||||||
size_t value_stack_size;
|
size_t value_stack_size;
|
||||||
|
|
||||||
size_t nest_level;
|
size_t nest_level;
|
||||||
std::vector<Definition*> definition_stack;
|
|
||||||
|
bool in_token;
|
||||||
|
|
||||||
std::shared_ptr<Ope> whitespaceOpe;
|
std::shared_ptr<Ope> whitespaceOpe;
|
||||||
bool in_whiltespace;
|
bool in_whitespace;
|
||||||
bool in_token;
|
|
||||||
|
|
||||||
const size_t def_count;
|
const size_t def_count;
|
||||||
const bool enablePackratParsing;
|
const bool enablePackratParsing;
|
||||||
@ -510,9 +491,9 @@ public:
|
|||||||
, message_pos(nullptr)
|
, message_pos(nullptr)
|
||||||
, value_stack_size(0)
|
, value_stack_size(0)
|
||||||
, nest_level(0)
|
, nest_level(0)
|
||||||
, whitespaceOpe(whitespaceOpe)
|
|
||||||
, in_whiltespace(false)
|
|
||||||
, in_token(false)
|
, in_token(false)
|
||||||
|
, whitespaceOpe(whitespaceOpe)
|
||||||
|
, in_whitespace(false)
|
||||||
, def_count(def_count)
|
, def_count(def_count)
|
||||||
, enablePackratParsing(enablePackratParsing)
|
, enablePackratParsing(enablePackratParsing)
|
||||||
, cache_register(enablePackratParsing ? def_count * (l + 1) : 0)
|
, cache_register(enablePackratParsing ? def_count * (l + 1) : 0)
|
||||||
@ -563,8 +544,9 @@ public:
|
|||||||
}
|
}
|
||||||
sv.path = path;
|
sv.path = path;
|
||||||
sv.ss = s;
|
sv.ss = s;
|
||||||
sv.s = nullptr;
|
sv.s_ = nullptr;
|
||||||
sv.n = 0;
|
sv.n_ = 0;
|
||||||
|
sv.tokens.clear();
|
||||||
return sv;
|
return sv;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -677,9 +659,10 @@ public:
|
|||||||
if (!chldsv.empty()) {
|
if (!chldsv.empty()) {
|
||||||
sv.insert(sv.end(), chldsv.begin(), chldsv.end());
|
sv.insert(sv.end(), chldsv.begin(), chldsv.end());
|
||||||
}
|
}
|
||||||
sv.s = chldsv.s;
|
sv.s_ = chldsv.c_str();
|
||||||
sv.n = chldsv.n;
|
sv.n_ = chldsv.length();
|
||||||
sv.choice = id;
|
sv.choice_ = id;
|
||||||
|
sv.tokens.insert(sv.tokens.end(), chldsv.tokens.begin(), chldsv.tokens.end());
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
id++;
|
id++;
|
||||||
@ -707,12 +690,16 @@ public:
|
|||||||
c.nest_level++;
|
c.nest_level++;
|
||||||
auto se = make_scope_exit([&]() { c.nest_level--; });
|
auto se = make_scope_exit([&]() { c.nest_level--; });
|
||||||
auto save_sv_size = sv.size();
|
auto save_sv_size = sv.size();
|
||||||
|
auto save_tok_size = sv.tokens.size();
|
||||||
const auto& rule = *ope_;
|
const auto& rule = *ope_;
|
||||||
auto len = rule.parse(s + i, n - i, sv, c, dt);
|
auto len = rule.parse(s + i, n - i, sv, c, dt);
|
||||||
if (fail(len)) {
|
if (fail(len)) {
|
||||||
if (sv.size() != save_sv_size) {
|
if (sv.size() != save_sv_size) {
|
||||||
sv.erase(sv.begin() + save_sv_size);
|
sv.erase(sv.begin() + save_sv_size);
|
||||||
}
|
}
|
||||||
|
if (sv.tokens.size() != save_tok_size) {
|
||||||
|
sv.tokens.erase(sv.tokens.begin() + save_tok_size);
|
||||||
|
}
|
||||||
c.error_pos = save_error_pos;
|
c.error_pos = save_error_pos;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -749,12 +736,16 @@ public:
|
|||||||
c.nest_level++;
|
c.nest_level++;
|
||||||
auto se = make_scope_exit([&]() { c.nest_level--; });
|
auto se = make_scope_exit([&]() { c.nest_level--; });
|
||||||
auto save_sv_size = sv.size();
|
auto save_sv_size = sv.size();
|
||||||
|
auto save_tok_size = sv.tokens.size();
|
||||||
const auto& rule = *ope_;
|
const auto& rule = *ope_;
|
||||||
auto len = rule.parse(s + i, n - i, sv, c, dt);
|
auto len = rule.parse(s + i, n - i, sv, c, dt);
|
||||||
if (fail(len)) {
|
if (fail(len)) {
|
||||||
if (sv.size() != save_sv_size) {
|
if (sv.size() != save_sv_size) {
|
||||||
sv.erase(sv.begin() + save_sv_size);
|
sv.erase(sv.begin() + save_sv_size);
|
||||||
}
|
}
|
||||||
|
if (sv.tokens.size() != save_tok_size) {
|
||||||
|
sv.tokens.erase(sv.tokens.begin() + save_tok_size);
|
||||||
|
}
|
||||||
c.error_pos = save_error_pos;
|
c.error_pos = save_error_pos;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -778,6 +769,7 @@ public:
|
|||||||
auto save_error_pos = c.error_pos;
|
auto save_error_pos = c.error_pos;
|
||||||
c.nest_level++;
|
c.nest_level++;
|
||||||
auto save_sv_size = sv.size();
|
auto save_sv_size = sv.size();
|
||||||
|
auto save_tok_size = sv.tokens.size();
|
||||||
auto se = make_scope_exit([&]() { c.nest_level--; });
|
auto se = make_scope_exit([&]() { c.nest_level--; });
|
||||||
const auto& rule = *ope_;
|
const auto& rule = *ope_;
|
||||||
auto len = rule.parse(s, n, sv, c, dt);
|
auto len = rule.parse(s, n, sv, c, dt);
|
||||||
@ -787,6 +779,9 @@ public:
|
|||||||
if (sv.size() != save_sv_size) {
|
if (sv.size() != save_sv_size) {
|
||||||
sv.erase(sv.begin() + save_sv_size);
|
sv.erase(sv.begin() + save_sv_size);
|
||||||
}
|
}
|
||||||
|
if (sv.tokens.size() != save_tok_size) {
|
||||||
|
sv.tokens.erase(sv.tokens.begin() + save_tok_size);
|
||||||
|
}
|
||||||
c.error_pos = save_error_pos;
|
c.error_pos = save_error_pos;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -968,15 +963,7 @@ class TokenBoundary : public Ope
|
|||||||
public:
|
public:
|
||||||
TokenBoundary(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
TokenBoundary(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
||||||
|
|
||||||
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
|
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override;
|
||||||
const auto& rule = *ope_;
|
|
||||||
auto len = rule.parse(s, n, sv, c, dt);
|
|
||||||
if (success(len)) {
|
|
||||||
sv.s = s;
|
|
||||||
sv.n = len;
|
|
||||||
}
|
|
||||||
return len;
|
|
||||||
}
|
|
||||||
|
|
||||||
void accept(Visitor& v) override;
|
void accept(Visitor& v) override;
|
||||||
|
|
||||||
@ -1085,11 +1072,11 @@ public:
|
|||||||
Whitespace(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
Whitespace(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
|
||||||
|
|
||||||
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
|
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
|
||||||
if (c.in_whiltespace) {
|
if (c.in_whitespace) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
c.in_whiltespace = true;
|
c.in_whitespace = true;
|
||||||
auto se = make_scope_exit([&]() { c.in_whiltespace = false; });
|
auto se = make_scope_exit([&]() { c.in_whitespace = false; });
|
||||||
const auto& rule = *ope_;
|
const auto& rule = *ope_;
|
||||||
return rule.parse(s, n, sv, c, dt);
|
return rule.parse(s, n, sv, c, dt);
|
||||||
}
|
}
|
||||||
@ -1280,8 +1267,8 @@ public:
|
|||||||
SemanticValues sv;
|
SemanticValues sv;
|
||||||
any dt;
|
any dt;
|
||||||
auto r = parse_core(s, n, sv, dt, path);
|
auto r = parse_core(s, n, sv, dt, path);
|
||||||
if (r.ret && !sv.empty() && !sv.front().val.is_undefined()) {
|
if (r.ret && !sv.empty() && !sv.front().is_undefined()) {
|
||||||
val = sv[0].val.get<T>();
|
val = sv[0].get<T>();
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
@ -1296,8 +1283,8 @@ public:
|
|||||||
Result parse_and_get_value(const char* s, size_t n, any& dt, T& val, const char* path = nullptr) const {
|
Result parse_and_get_value(const char* s, size_t n, any& dt, T& val, const char* path = nullptr) const {
|
||||||
SemanticValues sv;
|
SemanticValues sv;
|
||||||
auto r = parse_core(s, n, sv, dt, path);
|
auto r = parse_core(s, n, sv, dt, path);
|
||||||
if (r.ret && !sv.empty() && !sv.front().val.is_undefined()) {
|
if (r.ret && !sv.empty() && !sv.front().is_undefined()) {
|
||||||
val = sv[0].val.get<T>();
|
val = sv[0].get<T>();
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
@ -1336,7 +1323,7 @@ public:
|
|||||||
size_t id;
|
size_t id;
|
||||||
Action action;
|
Action action;
|
||||||
std::function<void (any& dt)> enter;
|
std::function<void (any& dt)> enter;
|
||||||
std::function<void (any& dt)> exit;
|
std::function<void (any& dt)> leave;
|
||||||
std::function<std::string ()> error_message;
|
std::function<std::string ()> error_message;
|
||||||
bool ignoreSemanticValue;
|
bool ignoreSemanticValue;
|
||||||
std::shared_ptr<Ope> whitespaceOpe;
|
std::shared_ptr<Ope> whitespaceOpe;
|
||||||
@ -1355,8 +1342,13 @@ private:
|
|||||||
AssignIDToDefinition assignId;
|
AssignIDToDefinition assignId;
|
||||||
holder_->accept(assignId);
|
holder_->accept(assignId);
|
||||||
|
|
||||||
|
std::shared_ptr<Ope> ope = holder_;
|
||||||
|
if (whitespaceOpe) {
|
||||||
|
ope = std::make_shared<Sequence>(whitespaceOpe, ope);
|
||||||
|
}
|
||||||
|
|
||||||
Context cxt(path, s, n, assignId.ids.size(), whitespaceOpe, enablePackratParsing, tracer);
|
Context cxt(path, s, n, assignId.ids.size(), whitespaceOpe, enablePackratParsing, tracer);
|
||||||
auto len = holder_->parse(s, n, sv, cxt, dt);
|
auto len = ope->parse(s, n, sv, cxt, dt);
|
||||||
return Result{ success(len), len, cxt.error_pos, cxt.message_pos, cxt.message };
|
return Result{ success(len), len, cxt.error_pos, cxt.message_pos, cxt.message };
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1379,18 +1371,38 @@ inline size_t LiteralString::parse(const char* s, size_t n, SemanticValues& sv,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Skip whiltespace
|
// Skip whiltespace
|
||||||
const auto d = c.definition_stack.back();
|
if (!c.in_token) {
|
||||||
if (!d->is_token && c.whitespaceOpe) {
|
if (c.whitespaceOpe) {
|
||||||
auto len = c.whitespaceOpe->parse(s + i, n - i, sv, c, dt);
|
auto len = c.whitespaceOpe->parse(s + i, n - i, sv, c, dt);
|
||||||
if (fail(len)) {
|
if (fail(len)) {
|
||||||
return -1;
|
return -1;
|
||||||
|
}
|
||||||
|
i += len;
|
||||||
}
|
}
|
||||||
i += len;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline size_t TokenBoundary::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const {
|
||||||
|
c.in_token = true;
|
||||||
|
auto se = make_scope_exit([&]() { c.in_token = false; });
|
||||||
|
const auto& rule = *ope_;
|
||||||
|
auto len = rule.parse(s, n, sv, c, dt);
|
||||||
|
if (success(len)) {
|
||||||
|
sv.tokens.push_back(std::make_pair(s, len));
|
||||||
|
|
||||||
|
if (c.whitespaceOpe) {
|
||||||
|
auto l = c.whitespaceOpe->parse(s + len, n - len, sv, c, dt);
|
||||||
|
if (fail(l)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
len += l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const {
|
inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const {
|
||||||
if (!ope_) {
|
if (!ope_) {
|
||||||
throw std::logic_error("Uninitialized definition ope was used...");
|
throw std::logic_error("Uninitialized definition ope was used...");
|
||||||
@ -1402,12 +1414,8 @@ inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context
|
|||||||
|
|
||||||
size_t len;
|
size_t len;
|
||||||
any val;
|
any val;
|
||||||
const char* token_boundary_s = s;
|
|
||||||
size_t token_boundary_n = n;
|
|
||||||
|
|
||||||
c.packrat(s, outer_->id, len, val, [&](any& val) {
|
c.packrat(s, outer_->id, len, val, [&](any& val) {
|
||||||
c.definition_stack.push_back(outer_);
|
|
||||||
|
|
||||||
auto& chldsv = c.push();
|
auto& chldsv = c.push();
|
||||||
|
|
||||||
if (outer_->enter) {
|
if (outer_->enter) {
|
||||||
@ -1415,54 +1423,20 @@ inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto se = make_scope_exit([&]() {
|
auto se = make_scope_exit([&]() {
|
||||||
c.definition_stack.pop_back();
|
|
||||||
|
|
||||||
c.pop();
|
c.pop();
|
||||||
|
|
||||||
if (outer_->exit) {
|
if (outer_->leave) {
|
||||||
outer_->exit(dt);
|
outer_->leave(dt);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
auto ope = ope_;
|
const auto& rule = *ope_;
|
||||||
|
len = rule.parse(s, n, chldsv, c, dt);
|
||||||
if (!c.in_token && c.whitespaceOpe) {
|
|
||||||
if (c.definition_stack.size() == 1) {
|
|
||||||
if (outer_->is_token && !outer_->has_token_boundary) {
|
|
||||||
ope = std::make_shared<Sequence>(c.whitespaceOpe, std::make_shared<TokenBoundary>(ope_));
|
|
||||||
} else {
|
|
||||||
ope = std::make_shared<Sequence>(c.whitespaceOpe, ope_);
|
|
||||||
}
|
|
||||||
} else if (outer_->is_token) {
|
|
||||||
if (!outer_->has_token_boundary) {
|
|
||||||
ope = std::make_shared<Sequence>(std::make_shared<TokenBoundary>(ope_), c.whitespaceOpe);
|
|
||||||
} else {
|
|
||||||
ope = std::make_shared<Sequence>(ope_, c.whitespaceOpe);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto& rule = *ope;
|
|
||||||
if (!c.in_token && outer_->is_token) {
|
|
||||||
c.in_token = true;
|
|
||||||
auto se = make_scope_exit([&]() { c.in_token = false; });
|
|
||||||
|
|
||||||
len = rule.parse(s, n, chldsv, c, dt);
|
|
||||||
} else {
|
|
||||||
len = rule.parse(s, n, chldsv, c, dt);
|
|
||||||
}
|
|
||||||
|
|
||||||
token_boundary_n = len;
|
|
||||||
|
|
||||||
// Invoke action
|
// Invoke action
|
||||||
if (success(len)) {
|
if (success(len)) {
|
||||||
if (chldsv.s) {
|
chldsv.s_ = s;
|
||||||
token_boundary_s = chldsv.s;
|
chldsv.n_ = len;
|
||||||
token_boundary_n = chldsv.n;
|
|
||||||
} else {
|
|
||||||
chldsv.s = s;
|
|
||||||
chldsv.n = len;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
val = reduce(chldsv, dt);
|
val = reduce(chldsv, dt);
|
||||||
@ -1480,7 +1454,7 @@ inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context
|
|||||||
|
|
||||||
if (success(len)) {
|
if (success(len)) {
|
||||||
if (!outer_->ignoreSemanticValue) {
|
if (!outer_->ignoreSemanticValue) {
|
||||||
sv.emplace_back(val, token_boundary_s, token_boundary_n);
|
sv.emplace_back(val);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (outer_->error_message) {
|
if (outer_->error_message) {
|
||||||
@ -1500,7 +1474,7 @@ inline any Holder::reduce(const SemanticValues& sv, any& dt) const {
|
|||||||
} else if (sv.empty()) {
|
} else if (sv.empty()) {
|
||||||
return any();
|
return any();
|
||||||
} else {
|
} else {
|
||||||
return sv.front().val;
|
return sv.front();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1625,7 +1599,7 @@ inline std::shared_ptr<Ope> ref(const std::unordered_map<std::string, Definition
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline std::shared_ptr<Ope> wsp(const std::shared_ptr<Ope>& ope) {
|
inline std::shared_ptr<Ope> wsp(const std::shared_ptr<Ope>& ope) {
|
||||||
return std::make_shared<Ignore>(std::make_shared<Whitespace>(ope));
|
return std::make_shared<Whitespace>(std::make_shared<Ignore>(ope));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*-----------------------------------------------------------------------------
|
/*-----------------------------------------------------------------------------
|
||||||
@ -1815,7 +1789,7 @@ private:
|
|||||||
g["Suffix"] <= seq(g["Primary"], opt(cho(g["QUESTION"], g["STAR"], g["PLUS"])));
|
g["Suffix"] <= seq(g["Primary"], opt(cho(g["QUESTION"], g["STAR"], g["PLUS"])));
|
||||||
g["Primary"] <= cho(seq(opt(g["IGNORE"]), g["Identifier"], npd(g["LEFTARROW"])),
|
g["Primary"] <= cho(seq(opt(g["IGNORE"]), g["Identifier"], npd(g["LEFTARROW"])),
|
||||||
seq(g["OPEN"], g["Expression"], g["CLOSE"]),
|
seq(g["OPEN"], g["Expression"], g["CLOSE"]),
|
||||||
seq(g["Begin"], g["Expression"], g["End"]),
|
seq(g["BeginTok"], g["Expression"], g["EndTok"]),
|
||||||
seq(g["BeginCap"], g["Expression"], g["EndCap"]),
|
seq(g["BeginCap"], g["Expression"], g["EndCap"]),
|
||||||
g["Literal"], g["Class"], g["DOT"]);
|
g["Literal"], g["Class"], g["DOT"]);
|
||||||
|
|
||||||
@ -1853,8 +1827,8 @@ private:
|
|||||||
g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r'));
|
g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r'));
|
||||||
g["EndOfFile"] <= npd(dot());
|
g["EndOfFile"] <= npd(dot());
|
||||||
|
|
||||||
g["Begin"] <= seq(chr('<'), g["Spacing"]);
|
g["BeginTok"] <= seq(chr('<'), g["Spacing"]);
|
||||||
g["End"] <= seq(chr('>'), g["Spacing"]);
|
g["EndTok"] <= seq(chr('>'), g["Spacing"]);
|
||||||
|
|
||||||
g["BeginCap"] <= seq(chr('$'), tok(opt(g["Identifier"])), chr('<'), g["Spacing"]);
|
g["BeginCap"] <= seq(chr('$'), tok(opt(g["Identifier"])), chr('<'), g["Spacing"]);
|
||||||
g["EndCap"] <= seq(lit(">"), g["Spacing"]);
|
g["EndCap"] <= seq(lit(">"), g["Spacing"]);
|
||||||
@ -1888,7 +1862,7 @@ private:
|
|||||||
data.start = name;
|
data.start = name;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
data.duplicates.emplace_back(name, sv.s);
|
data.duplicates.emplace_back(name, sv.c_str());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1955,7 +1929,7 @@ private:
|
|||||||
g["Primary"] = [&](const SemanticValues& sv, any& dt) -> std::shared_ptr<Ope> {
|
g["Primary"] = [&](const SemanticValues& sv, any& dt) -> std::shared_ptr<Ope> {
|
||||||
Data& data = *dt.get<Data*>();
|
Data& data = *dt.get<Data*>();
|
||||||
|
|
||||||
switch (sv.choice) {
|
switch (sv.choice()) {
|
||||||
case 0: { // Reference
|
case 0: { // Reference
|
||||||
auto ignore = (sv.size() == 2);
|
auto ignore = (sv.size() == 2);
|
||||||
auto baseId = ignore ? 1 : 0;
|
auto baseId = ignore ? 1 : 0;
|
||||||
@ -1963,13 +1937,13 @@ private:
|
|||||||
const auto& ident = sv[baseId].get<std::string>();
|
const auto& ident = sv[baseId].get<std::string>();
|
||||||
|
|
||||||
if (!data.references.count(ident)) {
|
if (!data.references.count(ident)) {
|
||||||
data.references[ident] = sv.s; // for error handling
|
data.references[ident] = sv.c_str(); // for error handling
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ignore) {
|
if (ignore) {
|
||||||
return ign(ref(*data.grammar, ident, sv.s));
|
return ign(ref(*data.grammar, ident, sv.c_str()));
|
||||||
} else {
|
} else {
|
||||||
return ref(*data.grammar, ident, sv.s);
|
return ref(*data.grammar, ident, sv.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case 1: { // (Expression)
|
case 1: { // (Expression)
|
||||||
@ -1979,7 +1953,7 @@ private:
|
|||||||
return tok(sv[1].get<std::shared_ptr<Ope>>());
|
return tok(sv[1].get<std::shared_ptr<Ope>>());
|
||||||
}
|
}
|
||||||
case 3: { // Capture
|
case 3: { // Capture
|
||||||
auto name = std::string(sv[0].s, sv[0].n);
|
const auto& name = sv[0].get<std::string>();
|
||||||
auto ope = sv[1].get<std::shared_ptr<Ope>>();
|
auto ope = sv[1].get<std::shared_ptr<Ope>>();
|
||||||
return cap(ope, data.match_action, ++data.capture_count, name);
|
return cap(ope, data.match_action, ++data.capture_count, name);
|
||||||
}
|
}
|
||||||
@ -1990,24 +1964,27 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
g["IdentCont"] = [](const SemanticValues& sv) {
|
g["IdentCont"] = [](const SemanticValues& sv) {
|
||||||
return std::string(sv.s, sv.n);
|
return std::string(sv.c_str(), sv.length());
|
||||||
};
|
};
|
||||||
|
|
||||||
g["Literal"] = [this](const SemanticValues& sv) {
|
g["Literal"] = [this](const SemanticValues& sv) {
|
||||||
return lit(resolve_escape_sequence(sv.s, sv.n));
|
const auto& tok = sv.tokens.front();
|
||||||
|
return lit(resolve_escape_sequence(tok.first, tok.second));
|
||||||
};
|
};
|
||||||
g["Class"] = [this](const SemanticValues& sv) {
|
g["Class"] = [this](const SemanticValues& sv) {
|
||||||
return cls(resolve_escape_sequence(sv.s, sv.n));
|
const auto& tok = sv.tokens.front();
|
||||||
|
return cls(resolve_escape_sequence(tok.first, tok.second));
|
||||||
};
|
};
|
||||||
|
|
||||||
g["AND"] = [](const SemanticValues& sv) { return *sv.s; };
|
g["AND"] = [](const SemanticValues& sv) { return *sv.c_str(); };
|
||||||
g["NOT"] = [](const SemanticValues& sv) { return *sv.s; };
|
g["NOT"] = [](const SemanticValues& sv) { return *sv.c_str(); };
|
||||||
g["QUESTION"] = [](const SemanticValues& sv) { return *sv.s; };
|
g["QUESTION"] = [](const SemanticValues& sv) { return *sv.c_str(); };
|
||||||
g["STAR"] = [](const SemanticValues& sv) { return *sv.s; };
|
g["STAR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
|
||||||
g["PLUS"] = [](const SemanticValues& sv) { return *sv.s; };
|
g["PLUS"] = [](const SemanticValues& sv) { return *sv.c_str(); };
|
||||||
|
|
||||||
|
|
||||||
g["DOT"] = [](const SemanticValues& sv) { return dot(); };
|
g["DOT"] = [](const SemanticValues& sv) { return dot(); };
|
||||||
|
|
||||||
|
g["BeginCap"] = [](const SemanticValues& sv) { return sv.token(); };
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<Grammar> perform_core(
|
std::shared_ptr<Grammar> perform_core(
|
||||||
@ -2509,12 +2486,12 @@ public:
|
|||||||
if (!rule.action) {
|
if (!rule.action) {
|
||||||
auto is_token = rule.is_token;
|
auto is_token = rule.is_token;
|
||||||
rule.action = [=](const SemanticValues& sv) {
|
rule.action = [=](const SemanticValues& sv) {
|
||||||
|
auto line = line_info(sv.ss, sv.c_str());
|
||||||
|
|
||||||
if (is_token) {
|
if (is_token) {
|
||||||
auto line = line_info(sv.ss, sv.s);
|
return std::make_shared<T>(sv.path, line.first, line.second, name.c_str(), sv.str());
|
||||||
return std::make_shared<T>(sv.path, line.first, line.second, name.c_str(), std::string(sv.s, sv.n));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto line = line_info(sv.ss, sv.s);
|
|
||||||
auto ast = std::make_shared<T>(sv.path, line.first, line.second, name.c_str(), sv.transform<std::shared_ptr<T>>());
|
auto ast = std::make_shared<T>(sv.path, line.first, line.second, name.c_str(), sv.transform<std::shared_ptr<T>>());
|
||||||
|
|
||||||
for (auto node: ast->nodes) {
|
for (auto node: ast->nodes) {
|
||||||
|
54
test/test.cc
54
test/test.cc
@ -98,7 +98,7 @@ TEST_CASE("String capture test3", "[general]")
|
|||||||
std::vector<std::string> tags;
|
std::vector<std::string> tags;
|
||||||
|
|
||||||
pg["TOKEN"] = [&](const SemanticValues& sv) {
|
pg["TOKEN"] = [&](const SemanticValues& sv) {
|
||||||
tags.push_back(sv.str());
|
tags.push_back(sv.token());
|
||||||
};
|
};
|
||||||
|
|
||||||
auto ret = pg.parse(" [tag1] [tag:2] [tag-3] ");
|
auto ret = pg.parse(" [tag1] [tag:2] [tag-3] ");
|
||||||
@ -205,7 +205,7 @@ TEST_CASE("Lambda action test", "[general]")
|
|||||||
|
|
||||||
string ss;
|
string ss;
|
||||||
parser["CHAR"] = [&](const SemanticValues& sv) {
|
parser["CHAR"] = [&](const SemanticValues& sv) {
|
||||||
ss += *sv.s;
|
ss += *sv.c_str();
|
||||||
};
|
};
|
||||||
|
|
||||||
bool ret = parser.parse("hello");
|
bool ret = parser.parse("hello");
|
||||||
@ -213,7 +213,7 @@ TEST_CASE("Lambda action test", "[general]")
|
|||||||
REQUIRE(ss == "hello");
|
REQUIRE(ss == "hello");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("enter/exit handlers test", "[general]")
|
TEST_CASE("enter/leave handlers test", "[general]")
|
||||||
{
|
{
|
||||||
parser parser(R"(
|
parser parser(R"(
|
||||||
START <- LTOKEN '=' RTOKEN
|
START <- LTOKEN '=' RTOKEN
|
||||||
@ -226,7 +226,7 @@ TEST_CASE("enter/exit handlers test", "[general]")
|
|||||||
auto& require_upper_case = *dt.get<bool*>();
|
auto& require_upper_case = *dt.get<bool*>();
|
||||||
require_upper_case = false;
|
require_upper_case = false;
|
||||||
};
|
};
|
||||||
parser["LTOKEN"].exit = [&](any& dt) {
|
parser["LTOKEN"].leave = [&](any& dt) {
|
||||||
auto& require_upper_case = *dt.get<bool*>();
|
auto& require_upper_case = *dt.get<bool*>();
|
||||||
require_upper_case = true;
|
require_upper_case = true;
|
||||||
};
|
};
|
||||||
@ -266,8 +266,8 @@ TEST_CASE("WHITESPACE test", "[general]")
|
|||||||
ITEM <- WORD / PHRASE
|
ITEM <- WORD / PHRASE
|
||||||
|
|
||||||
# Tokens
|
# Tokens
|
||||||
WORD <- [a-zA-Z0-9_]+
|
WORD <- < [a-zA-Z0-9_]+ >
|
||||||
PHRASE <- '"' (!'"' .)* '"'
|
PHRASE <- < '"' (!'"' .)* '"' >
|
||||||
|
|
||||||
%whitespace <- [ \t\r\n]*
|
%whitespace <- [ \t\r\n]*
|
||||||
)");
|
)");
|
||||||
@ -291,7 +291,7 @@ TEST_CASE("WHITESPACE test2", "[general]")
|
|||||||
|
|
||||||
vector<string> items;
|
vector<string> items;
|
||||||
parser["ITEM"] = [&](const SemanticValues& sv) {
|
parser["ITEM"] = [&](const SemanticValues& sv) {
|
||||||
items.push_back(sv.str());
|
items.push_back(sv.token());
|
||||||
};
|
};
|
||||||
|
|
||||||
auto ret = parser.parse(R"([one], [two] ,[three] )");
|
auto ret = parser.parse(R"([one], [two] ,[three] )");
|
||||||
@ -324,7 +324,7 @@ TEST_CASE("Skip token test2", "[general]")
|
|||||||
{
|
{
|
||||||
peg::parser parser(R"(
|
peg::parser parser(R"(
|
||||||
ROOT <- ITEM (',' ITEM)*
|
ROOT <- ITEM (',' ITEM)*
|
||||||
ITEM <- ([a-z0-9])+
|
ITEM <- < ([a-z0-9])+ >
|
||||||
%whitespace <- [ \t]*
|
%whitespace <- [ \t]*
|
||||||
)");
|
)");
|
||||||
|
|
||||||
@ -407,7 +407,7 @@ TEST_CASE("Simple calculator test", "[general]")
|
|||||||
parser parser(syntax);
|
parser parser(syntax);
|
||||||
|
|
||||||
parser["Additive"] = [](const SemanticValues& sv) {
|
parser["Additive"] = [](const SemanticValues& sv) {
|
||||||
switch (sv.choice) {
|
switch (sv.choice()) {
|
||||||
case 0:
|
case 0:
|
||||||
return sv[0].get<int>() + sv[1].get<int>();
|
return sv[0].get<int>() + sv[1].get<int>();
|
||||||
default:
|
default:
|
||||||
@ -416,7 +416,7 @@ TEST_CASE("Simple calculator test", "[general]")
|
|||||||
};
|
};
|
||||||
|
|
||||||
parser["Multitive"] = [](const SemanticValues& sv) {
|
parser["Multitive"] = [](const SemanticValues& sv) {
|
||||||
switch (sv.choice) {
|
switch (sv.choice()) {
|
||||||
case 0:
|
case 0:
|
||||||
return sv[0].get<int>() * sv[1].get<int>();
|
return sv[0].get<int>() * sv[1].get<int>();
|
||||||
default:
|
default:
|
||||||
@ -425,7 +425,7 @@ TEST_CASE("Simple calculator test", "[general]")
|
|||||||
};
|
};
|
||||||
|
|
||||||
parser["Number"] = [](const SemanticValues& sv) {
|
parser["Number"] = [](const SemanticValues& sv) {
|
||||||
return atoi(sv.s);
|
return atoi(sv.c_str());
|
||||||
};
|
};
|
||||||
|
|
||||||
int val;
|
int val;
|
||||||
@ -448,10 +448,10 @@ TEST_CASE("Calculator test", "[general]")
|
|||||||
|
|
||||||
// Setup actions
|
// Setup actions
|
||||||
auto reduce = [](const SemanticValues& sv) -> long {
|
auto reduce = [](const SemanticValues& sv) -> long {
|
||||||
long ret = sv[0].val.get<long>();
|
long ret = sv[0].get<long>();
|
||||||
for (auto i = 1u; i < sv.size(); i += 2) {
|
for (auto i = 1u; i < sv.size(); i += 2) {
|
||||||
auto num = sv[i + 1].val.get<long>();
|
auto num = sv[i + 1].get<long>();
|
||||||
switch (sv[i].val.get<char>()) {
|
switch (sv[i].get<char>()) {
|
||||||
case '+': ret += num; break;
|
case '+': ret += num; break;
|
||||||
case '-': ret -= num; break;
|
case '-': ret -= num; break;
|
||||||
case '*': ret *= num; break;
|
case '*': ret *= num; break;
|
||||||
@ -463,8 +463,8 @@ TEST_CASE("Calculator test", "[general]")
|
|||||||
|
|
||||||
EXPRESSION = reduce;
|
EXPRESSION = reduce;
|
||||||
TERM = reduce;
|
TERM = reduce;
|
||||||
TERM_OPERATOR = [](const SemanticValues& sv) { return *sv.s; };
|
TERM_OPERATOR = [](const SemanticValues& sv) { return *sv.c_str(); };
|
||||||
FACTOR_OPERATOR = [](const SemanticValues& sv) { return *sv.s; };
|
FACTOR_OPERATOR = [](const SemanticValues& sv) { return *sv.c_str(); };
|
||||||
NUMBER = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); };
|
NUMBER = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); };
|
||||||
|
|
||||||
// Parse
|
// Parse
|
||||||
@ -494,10 +494,10 @@ TEST_CASE("Calculator test2", "[general]")
|
|||||||
|
|
||||||
// Setup actions
|
// Setup actions
|
||||||
auto reduce = [](const SemanticValues& sv) -> long {
|
auto reduce = [](const SemanticValues& sv) -> long {
|
||||||
long ret = sv[0].val.get<long>();
|
long ret = sv[0].get<long>();
|
||||||
for (auto i = 1u; i < sv.size(); i += 2) {
|
for (auto i = 1u; i < sv.size(); i += 2) {
|
||||||
auto num = sv[i + 1].val.get<long>();
|
auto num = sv[i + 1].get<long>();
|
||||||
switch (sv[i].val.get<char>()) {
|
switch (sv[i].get<char>()) {
|
||||||
case '+': ret += num; break;
|
case '+': ret += num; break;
|
||||||
case '-': ret -= num; break;
|
case '-': ret -= num; break;
|
||||||
case '*': ret *= num; break;
|
case '*': ret *= num; break;
|
||||||
@ -509,8 +509,8 @@ TEST_CASE("Calculator test2", "[general]")
|
|||||||
|
|
||||||
g["EXPRESSION"] = reduce;
|
g["EXPRESSION"] = reduce;
|
||||||
g["TERM"] = reduce;
|
g["TERM"] = reduce;
|
||||||
g["TERM_OPERATOR"] = [](const SemanticValues& sv) { return *sv.s; };
|
g["TERM_OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
|
||||||
g["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return *sv.s; };
|
g["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
|
||||||
g["NUMBER"] = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); };
|
g["NUMBER"] = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); };
|
||||||
|
|
||||||
// Parse
|
// Parse
|
||||||
@ -535,10 +535,10 @@ TEST_CASE("Calculator test3", "[general]")
|
|||||||
);
|
);
|
||||||
|
|
||||||
auto reduce = [](const SemanticValues& sv) -> long {
|
auto reduce = [](const SemanticValues& sv) -> long {
|
||||||
long ret = sv[0].val.get<long>();
|
long ret = sv[0].get<long>();
|
||||||
for (auto i = 1u; i < sv.size(); i += 2) {
|
for (auto i = 1u; i < sv.size(); i += 2) {
|
||||||
auto num = sv[i + 1].val.get<long>();
|
auto num = sv[i + 1].get<long>();
|
||||||
switch (sv[i].val.get<char>()) {
|
switch (sv[i].get<char>()) {
|
||||||
case '+': ret += num; break;
|
case '+': ret += num; break;
|
||||||
case '-': ret -= num; break;
|
case '-': ret -= num; break;
|
||||||
case '*': ret *= num; break;
|
case '*': ret *= num; break;
|
||||||
@ -551,8 +551,8 @@ TEST_CASE("Calculator test3", "[general]")
|
|||||||
// Setup actions
|
// Setup actions
|
||||||
parser["EXPRESSION"] = reduce;
|
parser["EXPRESSION"] = reduce;
|
||||||
parser["TERM"] = reduce;
|
parser["TERM"] = reduce;
|
||||||
parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.s; };
|
parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.c_str(); };
|
||||||
parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.s; };
|
parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.c_str(); };
|
||||||
parser["NUMBER"] = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); };
|
parser["NUMBER"] = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); };
|
||||||
|
|
||||||
// Parse
|
// Parse
|
||||||
@ -805,7 +805,7 @@ TEST_CASE("Semantic predicate test", "[predicate]")
|
|||||||
parser parser("NUMBER <- [0-9]+");
|
parser parser("NUMBER <- [0-9]+");
|
||||||
|
|
||||||
parser["NUMBER"] = [](const SemanticValues& sv) {
|
parser["NUMBER"] = [](const SemanticValues& sv) {
|
||||||
auto val = stol(sv.str(), nullptr, 10);
|
auto val = stol(sv.token(), nullptr, 10);
|
||||||
if (val != 100) {
|
if (val != 100) {
|
||||||
throw parse_error("value error!!");
|
throw parse_error("value error!!");
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user