Major change in the way to handle token boundary.

This commit is contained in:
yhirose 2016-01-23 20:26:54 -05:00
parent 9ce4597ef6
commit 5b88443270
7 changed files with 221 additions and 248 deletions

View File

@ -35,7 +35,7 @@ int main(void) {
Additive <- Multitive '+' Additive / Multitive
Multitive <- Primary '*' Multitive / Primary
Primary <- '(' Additive ')' / Number
Number <- [0-9]+
Number <- < [0-9]+ >
%whitespace <- [ \t]*
)";
@ -43,7 +43,7 @@ int main(void) {
// (3) Setup an action
parser["Additive"] = [](const SemanticValues& sv) {
switch (sv.choice) {
switch (sv.choice()) {
case 0: // "Multitive '+' Additive"
return sv[0].get<int>() + sv[1].get<int>();
default: // "Multitive"
@ -52,7 +52,7 @@ int main(void) {
};
parser["Multitive"] = [](const SemanticValues& sv) {
switch (sv.choice) {
switch (sv.choice()) {
case 0: // "Primary '*' Multitive"
return sv[0].get<int>() * sv[1].get<int>();
default: // "Primary"
@ -61,11 +61,11 @@ int main(void) {
};
parser["Number"] = [](const SemanticValues& sv) {
return stoi(sv.str(), nullptr, 10);
return stoi(sv.token(), nullptr, 10);
};
// (4) Parse
parser.packrat_parsing(); // Enable packrat parsing.
parser.enable_packrat_parsing(); // Enable packrat parsing.
int val;
parser.parse(" (1 + 2) * 3 ", val);
@ -84,28 +84,24 @@ Here are available actions:
`const SemanticValues& sv` contains semantic values. `SemanticValues` structure is defined as follows.
```cpp
struct SemanticValue {
any val; // Semantic value
const char* name; // Definition name for the sematic value
const char* s; // Token start for the semantic value
size_t n; // Token length for the semantic value
// Cast semantic value
template <typename T> T& get();
template <typename T> const T& get() const;
// Get token
std::string str() const;
};
struct SemanticValues : protected std::vector<SemanticValue>
struct SemanticValues : protected std::vector<any>
{
const char* s; // Token start
size_t n; // Token length
size_t choice; // Choice number (0 based index)
// Matched string
std::string str() const; // Matched string
const char* c_str() const; // Matched string start
size_t length() const; // Matched string length
// Get token
std::string str() const;
// Tokens
std::vector<
std::pair<
const char*, // Token start
size_t>> // Token length
tokens;
std::string token(size_t id = 0) const;
// Choice number (0 based index)
size_t choice() const;
// Transform the semantic value vector to another vector
template <typename T> vector<T> transform(size_t beg = 0, size_t end = -1) const;
@ -114,11 +110,9 @@ struct SemanticValues : protected std::vector<SemanticValue>
`peg::any` class is very similar to [boost::any](http://www.boost.org/doc/libs/1_57_0/doc/html/any.html). You can obtain a value by castning it to the actual type. In order to determine the actual type, you have to check the return value type of the child action for the semantic value.
`const char* s, size_t n` gives a pointer and length of the matched string. This is same as `sv.s` and `sv.n`.
`any& dt` is a data object which can be used by the user for whatever purposes.
The following example uses `<` ... ` >` operators. They are the *token boundary* operators. Each token boundary operator creates a semantic value that contains `const char*` of the position. It could be useful to eliminate unnecessary characters.
The following example uses `<` ... ` >` operators. They are the *token boundary* operators.
```cpp
auto syntax = R"(
@ -131,7 +125,7 @@ peg pg(syntax);
pg["TOKEN"] = [](const SemanticValues& sv) {
// 'token' doesn't include trailing whitespaces
auto token = sv.str();
auto token = sv.token();
};
auto ret = pg.parse(" token1, token2 ");
@ -185,19 +179,19 @@ ret = parser.parse("200", val);
assert(ret == false);
```
*before* and *after* actions are also avalable.
*enter* and *leave* actions are also avalable.
```cpp
parser["RULE"].before = [](any& dt) {
std::cout << "before" << std::endl;
parser["RULE"].enter = [](any& dt) {
std::cout << "enter" << std::endl;
};
parser["RULE"] = [](const SemanticValues& sv, any& dt) {
std::cout << "action!" << std::endl;
};
parser["RULE"].after = [](any& dt) {
std::cout << "after" << std::endl;
parser["RULE"].leave = [](any& dt) {
std::cout << "leave" << std::endl;
};
```
@ -216,7 +210,7 @@ These are valid tokens:
```
KEYWORD <- 'keyword'
WORD <- [a-zA-Z0-9] [a-zA-Z0-9-_]* # no reference rule is used
WORD <- < [a-zA-Z0-9] [a-zA-Z0-9-_]* > # token boundary operator is used.
IDNET <- < IDENT_START_CHAR IDENT_CHAR* > # token boundary operator is used.
```
@ -225,8 +219,8 @@ The following grammar accepts ` one, "two three", four `.
```
ROOT <- ITEM (',' ITEM)*
ITEM <- WORD / PHRASE
WORD <- [a-z]+
PHRASE <- '"' (!'"' .)* '"'
WORD <- < [a-z]+ >
PHRASE <- < '"' (!'"' .)* '"' >
%whitespace <- [ \t\r\n]*
```
@ -413,7 +407,6 @@ Tested compilers
TODO
----
* Semantic predicate (`&{ expr }` and `!{ expr }`)
* Unicode support (`.` matches a Unicode char. `\u????`, `\p{L}`)
* Allow `←` and `ε`

View File

@ -10,3 +10,6 @@ target_link_libraries(calc2 pthread)
add_executable(calc3 calc3.cc)
target_link_libraries(calc3 pthread)
add_executable(calc_readme calc_readme.cc)
target_link_libraries(calc_readme pthread)

View File

@ -46,9 +46,9 @@ int main(int argc, const char** argv)
parser["EXPRESSION"] = reduce;
parser["TERM"] = reduce;
parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.s; };
parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.s; };
parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.s); };
parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.c_str(); };
parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.c_str(); };
parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); };
auto expr = argv[1];
long val = 0;

View File

@ -49,9 +49,9 @@ int main(int argc, const char** argv)
EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce;
TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce;
FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')')));
TERM_OPERATOR <= cls("+-"), [](const SemanticValues& sv) { return (char)*sv.s; };
FACTOR_OPERATOR <= cls("*/"), [](const SemanticValues& sv) { return (char)*sv.s; };
NUMBER <= oom(cls("0-9")), [](const SemanticValues& sv) { return atol(sv.s); };
TERM_OPERATOR <= cls("+-"), [](const SemanticValues& sv) { return (char)*sv.c_str(); };
FACTOR_OPERATOR <= cls("*/"), [](const SemanticValues& sv) { return (char)*sv.c_str(); };
NUMBER <= oom(cls("0-9")), [](const SemanticValues& sv) { return atol(sv.c_str()); };
auto expr = argv[1];
long val = 0;

View File

@ -44,9 +44,9 @@ int main(int argc, const char** argv)
TERM <- FACTOR (FACTOR_OPERATOR FACTOR)*
FACTOR <- NUMBER / '(' EXPRESSION ')'
TERM_OPERATOR <- [-+]
FACTOR_OPERATOR <- [/*]
NUMBER <- [0-9]+
TERM_OPERATOR <- < [-+] >
FACTOR_OPERATOR <- < [/*] >
NUMBER <- < [0-9]+ >
%whitespace <- [ \t\r\n]*
)");

325
peglib.h
View File

@ -204,81 +204,71 @@ auto make_scope_exit(EF&& exit_function) -> scope_exit<EF> {
/*
* Semantic values
*/
struct SemanticValue
{
any val;
const char* s;
size_t n;
SemanticValue()
: s(nullptr), n(0) {}
SemanticValue(const any& val, const char* s, size_t n)
: val(val), s(s), n(n) {}
template <typename T>
T& get() {
return val.get<T>();
}
template <typename T>
const T& get() const {
return val.get<T>();
}
std::string str() const {
return std::string(s, n);
}
};
struct SemanticValues : protected std::vector<SemanticValue>
struct SemanticValues : protected std::vector<any>
{
const char* path;
const char* ss;
const char* s;
size_t n;
size_t choice;
const char* c_str() const { return s_; }
size_t length() const { return n_; }
size_t choice() const { return choice_; }
SemanticValues() : s(nullptr), n(0), choice(0) {}
std::vector<std::pair<const char*, size_t>> tokens;
typedef SemanticValue T;
using std::vector<T>::iterator;
using std::vector<T>::const_iterator;
using std::vector<T>::size;
using std::vector<T>::empty;
using std::vector<T>::assign;
using std::vector<T>::begin;
using std::vector<T>::end;
using std::vector<T>::rbegin;
using std::vector<T>::rend;
using std::vector<T>::operator[];
using std::vector<T>::at;
using std::vector<T>::resize;
using std::vector<T>::front;
using std::vector<T>::back;
using std::vector<T>::push_back;
using std::vector<T>::pop_back;
using std::vector<T>::insert;
using std::vector<T>::erase;
using std::vector<T>::clear;
using std::vector<T>::swap;
using std::vector<T>::emplace;
using std::vector<T>::emplace_back;
SemanticValues() : s_(nullptr), n_(0), choice_(0) {}
using std::vector<any>::iterator;
using std::vector<any>::const_iterator;
using std::vector<any>::size;
using std::vector<any>::empty;
using std::vector<any>::assign;
using std::vector<any>::begin;
using std::vector<any>::end;
using std::vector<any>::rbegin;
using std::vector<any>::rend;
using std::vector<any>::operator[];
using std::vector<any>::at;
using std::vector<any>::resize;
using std::vector<any>::front;
using std::vector<any>::back;
using std::vector<any>::push_back;
using std::vector<any>::pop_back;
using std::vector<any>::insert;
using std::vector<any>::erase;
using std::vector<any>::clear;
using std::vector<any>::swap;
using std::vector<any>::emplace;
using std::vector<any>::emplace_back;
std::string str() const {
return std::string(s, n);
return std::string(s_, n_);
}
std::string token(size_t id = 0) const {
if (!tokens.empty()) {
assert(id < tokens.size());
const auto& tok = tokens[id];
return std::string(tok.first, tok.second);
}
return std::string(s_, n_);
}
template <typename T>
auto transform(size_t beg = 0, size_t end = -1) const -> vector<T> {
return this->transform(beg, end, [](const SemanticValue& v) { return v.get<T>(); });
return this->transform(beg, end, [](const any& v) { return v.get<T>(); });
}
private:
friend class Context;
friend class PrioritizedChoice;
friend class Holder;
const char* s_;
size_t n_;
size_t choice_;
template <typename F>
auto transform(F f) const -> vector<typename std::remove_const<decltype(f(SemanticValue()))>::type> {
vector<typename std::remove_const<decltype(f(SemanticValue()))>::type> r;
auto transform(F f) const -> vector<typename std::remove_const<decltype(f(any()))>::type> {
vector<typename std::remove_const<decltype(f(any()))>::type> r;
for (const auto& v: *this) {
r.emplace_back(f(v));
}
@ -286,8 +276,8 @@ private:
}
template <typename F>
auto transform(size_t beg, size_t end, F f) const -> vector<typename std::remove_const<decltype(f(SemanticValue()))>::type> {
vector<typename std::remove_const<decltype(f(SemanticValue()))>::type> r;
auto transform(size_t beg, size_t end, F f) const -> vector<typename std::remove_const<decltype(f(any()))>::type> {
vector<typename std::remove_const<decltype(f(any()))>::type> r;
end = (std::min)(end, size());
for (size_t i = beg; i < end; i++) {
r.emplace_back(f((*this)[i]));
@ -316,20 +306,11 @@ any call(F fn, Args&&... args) {
return fn(std::forward<Args>(args)...);
}
template <
typename R, typename F,
typename std::enable_if<std::is_same<typename std::remove_cv<R>::type, SemanticValue>::value>::type*& = enabler,
typename... Args>
any call(F fn, Args&&... args) {
return fn(std::forward<Args>(args)...).val;
}
template <
typename R, typename F,
typename std::enable_if<
!std::is_void<R>::value &&
!std::is_same<typename std::remove_cv<R>::type, any>::value &&
!std::is_same<typename std::remove_cv<R>::type, SemanticValue>::value>::type*& = enabler,
!std::is_same<typename std::remove_cv<R>::type, any>::value>::type*& = enabler,
typename... Args>
any call(F fn, Args&&... args) {
return any(fn(std::forward<Args>(args)...));
@ -480,11 +461,11 @@ public:
size_t value_stack_size;
size_t nest_level;
std::vector<Definition*> definition_stack;
bool in_token;
std::shared_ptr<Ope> whitespaceOpe;
bool in_whiltespace;
bool in_token;
bool in_whitespace;
const size_t def_count;
const bool enablePackratParsing;
@ -510,9 +491,9 @@ public:
, message_pos(nullptr)
, value_stack_size(0)
, nest_level(0)
, whitespaceOpe(whitespaceOpe)
, in_whiltespace(false)
, in_token(false)
, whitespaceOpe(whitespaceOpe)
, in_whitespace(false)
, def_count(def_count)
, enablePackratParsing(enablePackratParsing)
, cache_register(enablePackratParsing ? def_count * (l + 1) : 0)
@ -563,8 +544,9 @@ public:
}
sv.path = path;
sv.ss = s;
sv.s = nullptr;
sv.n = 0;
sv.s_ = nullptr;
sv.n_ = 0;
sv.tokens.clear();
return sv;
}
@ -677,9 +659,10 @@ public:
if (!chldsv.empty()) {
sv.insert(sv.end(), chldsv.begin(), chldsv.end());
}
sv.s = chldsv.s;
sv.n = chldsv.n;
sv.choice = id;
sv.s_ = chldsv.c_str();
sv.n_ = chldsv.length();
sv.choice_ = id;
sv.tokens.insert(sv.tokens.end(), chldsv.tokens.begin(), chldsv.tokens.end());
return len;
}
id++;
@ -707,12 +690,16 @@ public:
c.nest_level++;
auto se = make_scope_exit([&]() { c.nest_level--; });
auto save_sv_size = sv.size();
auto save_tok_size = sv.tokens.size();
const auto& rule = *ope_;
auto len = rule.parse(s + i, n - i, sv, c, dt);
if (fail(len)) {
if (sv.size() != save_sv_size) {
sv.erase(sv.begin() + save_sv_size);
}
if (sv.tokens.size() != save_tok_size) {
sv.tokens.erase(sv.tokens.begin() + save_tok_size);
}
c.error_pos = save_error_pos;
break;
}
@ -749,12 +736,16 @@ public:
c.nest_level++;
auto se = make_scope_exit([&]() { c.nest_level--; });
auto save_sv_size = sv.size();
auto save_tok_size = sv.tokens.size();
const auto& rule = *ope_;
auto len = rule.parse(s + i, n - i, sv, c, dt);
if (fail(len)) {
if (sv.size() != save_sv_size) {
sv.erase(sv.begin() + save_sv_size);
}
if (sv.tokens.size() != save_tok_size) {
sv.tokens.erase(sv.tokens.begin() + save_tok_size);
}
c.error_pos = save_error_pos;
break;
}
@ -778,6 +769,7 @@ public:
auto save_error_pos = c.error_pos;
c.nest_level++;
auto save_sv_size = sv.size();
auto save_tok_size = sv.tokens.size();
auto se = make_scope_exit([&]() { c.nest_level--; });
const auto& rule = *ope_;
auto len = rule.parse(s, n, sv, c, dt);
@ -787,6 +779,9 @@ public:
if (sv.size() != save_sv_size) {
sv.erase(sv.begin() + save_sv_size);
}
if (sv.tokens.size() != save_tok_size) {
sv.tokens.erase(sv.tokens.begin() + save_tok_size);
}
c.error_pos = save_error_pos;
return 0;
}
@ -968,15 +963,7 @@ class TokenBoundary : public Ope
public:
TokenBoundary(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
const auto& rule = *ope_;
auto len = rule.parse(s, n, sv, c, dt);
if (success(len)) {
sv.s = s;
sv.n = len;
}
return len;
}
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override;
void accept(Visitor& v) override;
@ -1085,11 +1072,11 @@ public:
Whitespace(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
if (c.in_whiltespace) {
if (c.in_whitespace) {
return 0;
}
c.in_whiltespace = true;
auto se = make_scope_exit([&]() { c.in_whiltespace = false; });
c.in_whitespace = true;
auto se = make_scope_exit([&]() { c.in_whitespace = false; });
const auto& rule = *ope_;
return rule.parse(s, n, sv, c, dt);
}
@ -1280,8 +1267,8 @@ public:
SemanticValues sv;
any dt;
auto r = parse_core(s, n, sv, dt, path);
if (r.ret && !sv.empty() && !sv.front().val.is_undefined()) {
val = sv[0].val.get<T>();
if (r.ret && !sv.empty() && !sv.front().is_undefined()) {
val = sv[0].get<T>();
}
return r;
}
@ -1296,8 +1283,8 @@ public:
Result parse_and_get_value(const char* s, size_t n, any& dt, T& val, const char* path = nullptr) const {
SemanticValues sv;
auto r = parse_core(s, n, sv, dt, path);
if (r.ret && !sv.empty() && !sv.front().val.is_undefined()) {
val = sv[0].val.get<T>();
if (r.ret && !sv.empty() && !sv.front().is_undefined()) {
val = sv[0].get<T>();
}
return r;
}
@ -1336,7 +1323,7 @@ public:
size_t id;
Action action;
std::function<void (any& dt)> enter;
std::function<void (any& dt)> exit;
std::function<void (any& dt)> leave;
std::function<std::string ()> error_message;
bool ignoreSemanticValue;
std::shared_ptr<Ope> whitespaceOpe;
@ -1355,8 +1342,13 @@ private:
AssignIDToDefinition assignId;
holder_->accept(assignId);
std::shared_ptr<Ope> ope = holder_;
if (whitespaceOpe) {
ope = std::make_shared<Sequence>(whitespaceOpe, ope);
}
Context cxt(path, s, n, assignId.ids.size(), whitespaceOpe, enablePackratParsing, tracer);
auto len = holder_->parse(s, n, sv, cxt, dt);
auto len = ope->parse(s, n, sv, cxt, dt);
return Result{ success(len), len, cxt.error_pos, cxt.message_pos, cxt.message };
}
@ -1379,18 +1371,38 @@ inline size_t LiteralString::parse(const char* s, size_t n, SemanticValues& sv,
}
// Skip whiltespace
const auto d = c.definition_stack.back();
if (!d->is_token && c.whitespaceOpe) {
auto len = c.whitespaceOpe->parse(s + i, n - i, sv, c, dt);
if (fail(len)) {
return -1;
if (!c.in_token) {
if (c.whitespaceOpe) {
auto len = c.whitespaceOpe->parse(s + i, n - i, sv, c, dt);
if (fail(len)) {
return -1;
}
i += len;
}
i += len;
}
return i;
}
inline size_t TokenBoundary::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const {
c.in_token = true;
auto se = make_scope_exit([&]() { c.in_token = false; });
const auto& rule = *ope_;
auto len = rule.parse(s, n, sv, c, dt);
if (success(len)) {
sv.tokens.push_back(std::make_pair(s, len));
if (c.whitespaceOpe) {
auto l = c.whitespaceOpe->parse(s + len, n - len, sv, c, dt);
if (fail(l)) {
return -1;
}
len += l;
}
}
return len;
}
inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const {
if (!ope_) {
throw std::logic_error("Uninitialized definition ope was used...");
@ -1402,12 +1414,8 @@ inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context
size_t len;
any val;
const char* token_boundary_s = s;
size_t token_boundary_n = n;
c.packrat(s, outer_->id, len, val, [&](any& val) {
c.definition_stack.push_back(outer_);
auto& chldsv = c.push();
if (outer_->enter) {
@ -1415,54 +1423,20 @@ inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context
}
auto se = make_scope_exit([&]() {
c.definition_stack.pop_back();
c.pop();
if (outer_->exit) {
outer_->exit(dt);
if (outer_->leave) {
outer_->leave(dt);
}
});
auto ope = ope_;
if (!c.in_token && c.whitespaceOpe) {
if (c.definition_stack.size() == 1) {
if (outer_->is_token && !outer_->has_token_boundary) {
ope = std::make_shared<Sequence>(c.whitespaceOpe, std::make_shared<TokenBoundary>(ope_));
} else {
ope = std::make_shared<Sequence>(c.whitespaceOpe, ope_);
}
} else if (outer_->is_token) {
if (!outer_->has_token_boundary) {
ope = std::make_shared<Sequence>(std::make_shared<TokenBoundary>(ope_), c.whitespaceOpe);
} else {
ope = std::make_shared<Sequence>(ope_, c.whitespaceOpe);
}
}
}
const auto& rule = *ope;
if (!c.in_token && outer_->is_token) {
c.in_token = true;
auto se = make_scope_exit([&]() { c.in_token = false; });
len = rule.parse(s, n, chldsv, c, dt);
} else {
len = rule.parse(s, n, chldsv, c, dt);
}
token_boundary_n = len;
const auto& rule = *ope_;
len = rule.parse(s, n, chldsv, c, dt);
// Invoke action
if (success(len)) {
if (chldsv.s) {
token_boundary_s = chldsv.s;
token_boundary_n = chldsv.n;
} else {
chldsv.s = s;
chldsv.n = len;
}
chldsv.s_ = s;
chldsv.n_ = len;
try {
val = reduce(chldsv, dt);
@ -1480,7 +1454,7 @@ inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context
if (success(len)) {
if (!outer_->ignoreSemanticValue) {
sv.emplace_back(val, token_boundary_s, token_boundary_n);
sv.emplace_back(val);
}
} else {
if (outer_->error_message) {
@ -1500,7 +1474,7 @@ inline any Holder::reduce(const SemanticValues& sv, any& dt) const {
} else if (sv.empty()) {
return any();
} else {
return sv.front().val;
return sv.front();
}
}
@ -1625,7 +1599,7 @@ inline std::shared_ptr<Ope> ref(const std::unordered_map<std::string, Definition
}
inline std::shared_ptr<Ope> wsp(const std::shared_ptr<Ope>& ope) {
return std::make_shared<Ignore>(std::make_shared<Whitespace>(ope));
return std::make_shared<Whitespace>(std::make_shared<Ignore>(ope));
}
/*-----------------------------------------------------------------------------
@ -1815,7 +1789,7 @@ private:
g["Suffix"] <= seq(g["Primary"], opt(cho(g["QUESTION"], g["STAR"], g["PLUS"])));
g["Primary"] <= cho(seq(opt(g["IGNORE"]), g["Identifier"], npd(g["LEFTARROW"])),
seq(g["OPEN"], g["Expression"], g["CLOSE"]),
seq(g["Begin"], g["Expression"], g["End"]),
seq(g["BeginTok"], g["Expression"], g["EndTok"]),
seq(g["BeginCap"], g["Expression"], g["EndCap"]),
g["Literal"], g["Class"], g["DOT"]);
@ -1853,8 +1827,8 @@ private:
g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r'));
g["EndOfFile"] <= npd(dot());
g["Begin"] <= seq(chr('<'), g["Spacing"]);
g["End"] <= seq(chr('>'), g["Spacing"]);
g["BeginTok"] <= seq(chr('<'), g["Spacing"]);
g["EndTok"] <= seq(chr('>'), g["Spacing"]);
g["BeginCap"] <= seq(chr('$'), tok(opt(g["Identifier"])), chr('<'), g["Spacing"]);
g["EndCap"] <= seq(lit(">"), g["Spacing"]);
@ -1888,7 +1862,7 @@ private:
data.start = name;
}
} else {
data.duplicates.emplace_back(name, sv.s);
data.duplicates.emplace_back(name, sv.c_str());
}
};
@ -1955,7 +1929,7 @@ private:
g["Primary"] = [&](const SemanticValues& sv, any& dt) -> std::shared_ptr<Ope> {
Data& data = *dt.get<Data*>();
switch (sv.choice) {
switch (sv.choice()) {
case 0: { // Reference
auto ignore = (sv.size() == 2);
auto baseId = ignore ? 1 : 0;
@ -1963,13 +1937,13 @@ private:
const auto& ident = sv[baseId].get<std::string>();
if (!data.references.count(ident)) {
data.references[ident] = sv.s; // for error handling
data.references[ident] = sv.c_str(); // for error handling
}
if (ignore) {
return ign(ref(*data.grammar, ident, sv.s));
return ign(ref(*data.grammar, ident, sv.c_str()));
} else {
return ref(*data.grammar, ident, sv.s);
return ref(*data.grammar, ident, sv.c_str());
}
}
case 1: { // (Expression)
@ -1979,7 +1953,7 @@ private:
return tok(sv[1].get<std::shared_ptr<Ope>>());
}
case 3: { // Capture
auto name = std::string(sv[0].s, sv[0].n);
const auto& name = sv[0].get<std::string>();
auto ope = sv[1].get<std::shared_ptr<Ope>>();
return cap(ope, data.match_action, ++data.capture_count, name);
}
@ -1990,24 +1964,27 @@ private:
};
g["IdentCont"] = [](const SemanticValues& sv) {
return std::string(sv.s, sv.n);
return std::string(sv.c_str(), sv.length());
};
g["Literal"] = [this](const SemanticValues& sv) {
return lit(resolve_escape_sequence(sv.s, sv.n));
const auto& tok = sv.tokens.front();
return lit(resolve_escape_sequence(tok.first, tok.second));
};
g["Class"] = [this](const SemanticValues& sv) {
return cls(resolve_escape_sequence(sv.s, sv.n));
const auto& tok = sv.tokens.front();
return cls(resolve_escape_sequence(tok.first, tok.second));
};
g["AND"] = [](const SemanticValues& sv) { return *sv.s; };
g["NOT"] = [](const SemanticValues& sv) { return *sv.s; };
g["QUESTION"] = [](const SemanticValues& sv) { return *sv.s; };
g["STAR"] = [](const SemanticValues& sv) { return *sv.s; };
g["PLUS"] = [](const SemanticValues& sv) { return *sv.s; };
g["AND"] = [](const SemanticValues& sv) { return *sv.c_str(); };
g["NOT"] = [](const SemanticValues& sv) { return *sv.c_str(); };
g["QUESTION"] = [](const SemanticValues& sv) { return *sv.c_str(); };
g["STAR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
g["PLUS"] = [](const SemanticValues& sv) { return *sv.c_str(); };
g["DOT"] = [](const SemanticValues& sv) { return dot(); };
g["BeginCap"] = [](const SemanticValues& sv) { return sv.token(); };
}
std::shared_ptr<Grammar> perform_core(
@ -2509,12 +2486,12 @@ public:
if (!rule.action) {
auto is_token = rule.is_token;
rule.action = [=](const SemanticValues& sv) {
auto line = line_info(sv.ss, sv.c_str());
if (is_token) {
auto line = line_info(sv.ss, sv.s);
return std::make_shared<T>(sv.path, line.first, line.second, name.c_str(), std::string(sv.s, sv.n));
return std::make_shared<T>(sv.path, line.first, line.second, name.c_str(), sv.str());
}
auto line = line_info(sv.ss, sv.s);
auto ast = std::make_shared<T>(sv.path, line.first, line.second, name.c_str(), sv.transform<std::shared_ptr<T>>());
for (auto node: ast->nodes) {

View File

@ -98,7 +98,7 @@ TEST_CASE("String capture test3", "[general]")
std::vector<std::string> tags;
pg["TOKEN"] = [&](const SemanticValues& sv) {
tags.push_back(sv.str());
tags.push_back(sv.token());
};
auto ret = pg.parse(" [tag1] [tag:2] [tag-3] ");
@ -205,7 +205,7 @@ TEST_CASE("Lambda action test", "[general]")
string ss;
parser["CHAR"] = [&](const SemanticValues& sv) {
ss += *sv.s;
ss += *sv.c_str();
};
bool ret = parser.parse("hello");
@ -213,7 +213,7 @@ TEST_CASE("Lambda action test", "[general]")
REQUIRE(ss == "hello");
}
TEST_CASE("enter/exit handlers test", "[general]")
TEST_CASE("enter/leave handlers test", "[general]")
{
parser parser(R"(
START <- LTOKEN '=' RTOKEN
@ -226,7 +226,7 @@ TEST_CASE("enter/exit handlers test", "[general]")
auto& require_upper_case = *dt.get<bool*>();
require_upper_case = false;
};
parser["LTOKEN"].exit = [&](any& dt) {
parser["LTOKEN"].leave = [&](any& dt) {
auto& require_upper_case = *dt.get<bool*>();
require_upper_case = true;
};
@ -266,8 +266,8 @@ TEST_CASE("WHITESPACE test", "[general]")
ITEM <- WORD / PHRASE
# Tokens
WORD <- [a-zA-Z0-9_]+
PHRASE <- '"' (!'"' .)* '"'
WORD <- < [a-zA-Z0-9_]+ >
PHRASE <- < '"' (!'"' .)* '"' >
%whitespace <- [ \t\r\n]*
)");
@ -291,7 +291,7 @@ TEST_CASE("WHITESPACE test2", "[general]")
vector<string> items;
parser["ITEM"] = [&](const SemanticValues& sv) {
items.push_back(sv.str());
items.push_back(sv.token());
};
auto ret = parser.parse(R"([one], [two] ,[three] )");
@ -324,7 +324,7 @@ TEST_CASE("Skip token test2", "[general]")
{
peg::parser parser(R"(
ROOT <- ITEM (',' ITEM)*
ITEM <- ([a-z0-9])+
ITEM <- < ([a-z0-9])+ >
%whitespace <- [ \t]*
)");
@ -407,7 +407,7 @@ TEST_CASE("Simple calculator test", "[general]")
parser parser(syntax);
parser["Additive"] = [](const SemanticValues& sv) {
switch (sv.choice) {
switch (sv.choice()) {
case 0:
return sv[0].get<int>() + sv[1].get<int>();
default:
@ -416,7 +416,7 @@ TEST_CASE("Simple calculator test", "[general]")
};
parser["Multitive"] = [](const SemanticValues& sv) {
switch (sv.choice) {
switch (sv.choice()) {
case 0:
return sv[0].get<int>() * sv[1].get<int>();
default:
@ -425,7 +425,7 @@ TEST_CASE("Simple calculator test", "[general]")
};
parser["Number"] = [](const SemanticValues& sv) {
return atoi(sv.s);
return atoi(sv.c_str());
};
int val;
@ -448,10 +448,10 @@ TEST_CASE("Calculator test", "[general]")
// Setup actions
auto reduce = [](const SemanticValues& sv) -> long {
long ret = sv[0].val.get<long>();
long ret = sv[0].get<long>();
for (auto i = 1u; i < sv.size(); i += 2) {
auto num = sv[i + 1].val.get<long>();
switch (sv[i].val.get<char>()) {
auto num = sv[i + 1].get<long>();
switch (sv[i].get<char>()) {
case '+': ret += num; break;
case '-': ret -= num; break;
case '*': ret *= num; break;
@ -463,8 +463,8 @@ TEST_CASE("Calculator test", "[general]")
EXPRESSION = reduce;
TERM = reduce;
TERM_OPERATOR = [](const SemanticValues& sv) { return *sv.s; };
FACTOR_OPERATOR = [](const SemanticValues& sv) { return *sv.s; };
TERM_OPERATOR = [](const SemanticValues& sv) { return *sv.c_str(); };
FACTOR_OPERATOR = [](const SemanticValues& sv) { return *sv.c_str(); };
NUMBER = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); };
// Parse
@ -494,10 +494,10 @@ TEST_CASE("Calculator test2", "[general]")
// Setup actions
auto reduce = [](const SemanticValues& sv) -> long {
long ret = sv[0].val.get<long>();
long ret = sv[0].get<long>();
for (auto i = 1u; i < sv.size(); i += 2) {
auto num = sv[i + 1].val.get<long>();
switch (sv[i].val.get<char>()) {
auto num = sv[i + 1].get<long>();
switch (sv[i].get<char>()) {
case '+': ret += num; break;
case '-': ret -= num; break;
case '*': ret *= num; break;
@ -509,8 +509,8 @@ TEST_CASE("Calculator test2", "[general]")
g["EXPRESSION"] = reduce;
g["TERM"] = reduce;
g["TERM_OPERATOR"] = [](const SemanticValues& sv) { return *sv.s; };
g["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return *sv.s; };
g["TERM_OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
g["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
g["NUMBER"] = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); };
// Parse
@ -535,10 +535,10 @@ TEST_CASE("Calculator test3", "[general]")
);
auto reduce = [](const SemanticValues& sv) -> long {
long ret = sv[0].val.get<long>();
long ret = sv[0].get<long>();
for (auto i = 1u; i < sv.size(); i += 2) {
auto num = sv[i + 1].val.get<long>();
switch (sv[i].val.get<char>()) {
auto num = sv[i + 1].get<long>();
switch (sv[i].get<char>()) {
case '+': ret += num; break;
case '-': ret -= num; break;
case '*': ret *= num; break;
@ -551,8 +551,8 @@ TEST_CASE("Calculator test3", "[general]")
// Setup actions
parser["EXPRESSION"] = reduce;
parser["TERM"] = reduce;
parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.s; };
parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.s; };
parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.c_str(); };
parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.c_str(); };
parser["NUMBER"] = [](const SemanticValues& sv) { return stol(sv.str(), nullptr, 10); };
// Parse
@ -805,7 +805,7 @@ TEST_CASE("Semantic predicate test", "[predicate]")
parser parser("NUMBER <- [0-9]+");
parser["NUMBER"] = [](const SemanticValues& sv) {
auto val = stol(sv.str(), nullptr, 10);
auto val = stol(sv.token(), nullptr, 10);
if (val != 100) {
throw parse_error("value error!!");
}