Added 'named capture'. Changed to cache user data.

This commit is contained in:
yhirose 2015-03-03 22:04:14 -05:00
parent fd90882894
commit 4e79b2de95
3 changed files with 131 additions and 69 deletions

View File

@ -61,7 +61,7 @@ int main(int argc, const char** argv)
return -1; return -1;
} }
auto ret = peg.lint(source.data(), source.size(), [&](size_t ln, size_t col, const string& msg) { auto ret = peg.parse(source.data(), source.size(), [&](size_t ln, size_t col, const string& msg) {
cerr << source_path << ":" << ln << ":" << col << ": " << msg << endl; cerr << source_path << ":" << ln << ":" << col << ": " << msg << endl;
}); });

175
peglib.h
View File

@ -401,7 +401,7 @@ private:
/* /*
* Match action * Match action
*/ */
typedef std::function<void (const char* s, size_t l, size_t i)> MatchAction; typedef std::function<void (const char* s, size_t l, size_t id, const std::string& name)> MatchAction;
/* /*
* Result * Result
@ -429,14 +429,11 @@ struct Context
std::vector<bool> cache_register; std::vector<bool> cache_register;
std::vector<bool> cache_success; std::vector<bool> cache_success;
std::map<std::pair<size_t, size_t>, std::pair<int, any>> cache_result; std::map<std::pair<size_t, size_t>, std::tuple<int, any, any>> cache_result;
std::vector<std::shared_ptr<SemanticValues>> stack; std::vector<std::shared_ptr<SemanticValues>> stack;
size_t stack_size; size_t stack_size;
mutable size_t hit;
mutable size_t miss;
Context(const char* _s, size_t _l, size_t _def_count, bool packrat) Context(const char* _s, size_t _l, size_t _def_count, bool packrat)
: s(_s) : s(_s)
, l(_l) , l(_l)
@ -444,19 +441,13 @@ struct Context
, cache_register(packrat ? def_count * (l + 1) : 0) , cache_register(packrat ? def_count * (l + 1) : 0)
, cache_success(packrat ? def_count * (l + 1) : 0) , cache_success(packrat ? def_count * (l + 1) : 0)
, stack_size(0) , stack_size(0)
, hit(0)
, miss(0)
{ {
} }
~Context() {
//std::cout << "hit:" << hit << " miss:" << miss << std::endl;
}
template <typename T> template <typename T>
void packrat(const char* s, size_t def_id, int& len, any& val, T fn) { void packrat(const char* s, size_t def_id, int& len, any& val, any& dt, T fn) {
if (cache_register.empty()) { if (cache_register.empty()) {
fn(len, val); fn(len, val, dt);
return; return;
} }
@ -464,23 +455,21 @@ struct Context
auto has_cache = cache_register[def_count * col + def_id]; auto has_cache = cache_register[def_count * col + def_id];
if (has_cache) { if (has_cache) {
hit++;
if (cache_success[def_count * col + def_id]) { if (cache_success[def_count * col + def_id]) {
const auto& key = std::make_pair((int)(s - this->s), def_id); const auto& key = std::make_pair((int)(s - this->s), def_id);
std::tie(len, val) = cache_result[key]; std::tie(len, val, dt) = cache_result[key];
return; return;
} else { } else {
len = -1; len = -1;
return; return;
} }
} else { } else {
miss++; fn(len, val, dt);
fn(len, val);
cache_register[def_count * col + def_id] = true; cache_register[def_count * col + def_id] = true;
cache_success[def_count * col + def_id] = success(len); cache_success[def_count * col + def_id] = success(len);
if (success(len)) { if (success(len)) {
const auto& key = std::make_pair((int)(s - this->s), def_id); const auto& key = std::make_pair((int)(s - this->s), def_id);
cache_result[key] = std::make_pair(len, val); cache_result[key] = std::make_tuple(len, val, dt);
} }
return; return;
} }
@ -831,15 +820,15 @@ public:
class Capture : public Ope class Capture : public Ope
{ {
public: public:
Capture(const std::shared_ptr<Ope>& ope, MatchAction ma, size_t ci) Capture(const std::shared_ptr<Ope>& ope, MatchAction ma, size_t n, const std::string& s)
: ope_(ope), match_action_(ma), capture_id(ci) {} : ope_(ope), match_action_(ma), id(n), name(s) {}
int parse(const char* s, size_t l, SemanticValues& sv, Context& c, any& dt) const override { int parse(const char* s, size_t l, SemanticValues& sv, Context& c, any& dt) const override {
assert(ope_); assert(ope_);
const auto& rule = *ope_; const auto& rule = *ope_;
auto len = rule.parse(s, l, sv, c, dt); auto len = rule.parse(s, l, sv, c, dt);
if (success(len) && match_action_) { if (success(len) && match_action_) {
match_action_(s, len, capture_id); match_action_(s, len, id, name);
} }
return len; return len;
} }
@ -849,7 +838,8 @@ public:
//private: //private:
std::shared_ptr<Ope> ope_; std::shared_ptr<Ope> ope_;
MatchAction match_action_; MatchAction match_action_;
size_t capture_id; size_t id;
std::string name;
}; };
class Anchor : public Ope class Anchor : public Ope
@ -1156,6 +1146,8 @@ public:
std::vector<Action> actions; std::vector<Action> actions;
bool ignore; bool ignore;
bool packrat; bool packrat;
std::function<void(any& dt)> before;
std::function<void(any& dt)> after;
private: private:
friend class DefinitionReference; friend class DefinitionReference;
@ -1188,11 +1180,19 @@ inline int Holder::parse(const char* s, size_t l, SemanticValues& sv, Context& c
int len; int len;
any val; any val;
c.packrat(s, outer_->id, len, val, [&](int& len, any& val) { const char* ancs = s;
size_t ancl = l;
c.packrat(s, outer_->id, len, val, dt, [&](int& len, any& val, any& dt) {
auto& chldsv = c.push(); auto& chldsv = c.push();
if (outer_->before) {
outer_->before(dt);
}
const auto& rule = *ope_; const auto& rule = *ope_;
len = rule.parse(s, l, chldsv, c, dt); len = rule.parse(s, l, chldsv, c, dt);
ancl = len;
if (success(len) && !outer_->ignore) { if (success(len) && !outer_->ignore) {
assert(!outer_->actions.empty()); assert(!outer_->actions.empty());
@ -1201,7 +1201,10 @@ inline int Holder::parse(const char* s, size_t l, SemanticValues& sv, Context& c
? outer_->actions[i] ? outer_->actions[i]
: outer_->actions[0]; : outer_->actions[0];
if (!chldsv.s) { if (chldsv.s) {
ancs = chldsv.s;
ancl = chldsv.l;
} else {
chldsv.s = s; chldsv.s = s;
chldsv.l = len; chldsv.l = len;
} }
@ -1209,11 +1212,15 @@ inline int Holder::parse(const char* s, size_t l, SemanticValues& sv, Context& c
val = reduce(chldsv, dt, action); val = reduce(chldsv, dt, action);
} }
if (outer_->after) {
outer_->after(dt);
}
c.pop(); c.pop();
}); });
if (success(len) && !outer_->ignore) { if (success(len) && !outer_->ignore) {
sv.emplace_back(val, outer_->name.c_str(), nullptr, 0); sv.emplace_back(val, outer_->name.c_str(), ancs, ancl);
} }
return len; return len;
@ -1323,12 +1330,12 @@ inline std::shared_ptr<Ope> dot() {
return std::make_shared<AnyCharacter>(); return std::make_shared<AnyCharacter>();
} }
inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope>& ope, MatchAction ma, size_t ci) { inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope>& ope, MatchAction ma, size_t n, const std::string& s) {
return std::make_shared<Capture>(ope, ma, ci); return std::make_shared<Capture>(ope, ma, n, s);
} }
inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope>& ope, MatchAction ma) { inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope>& ope, MatchAction ma) {
return std::make_shared<Capture>(ope, ma, (size_t)-1); return std::make_shared<Capture>(ope, ma, (size_t)-1, std::string());
} }
inline std::shared_ptr<Ope> anc(const std::shared_ptr<Ope>& ope) { inline std::shared_ptr<Ope> anc(const std::shared_ptr<Ope>& ope) {
@ -1474,7 +1481,7 @@ private:
g["Begin"] <= seq(chr('<'), g["Spacing"]); g["Begin"] <= seq(chr('<'), g["Spacing"]);
g["End"] <= seq(chr('>'), g["Spacing"]); g["End"] <= seq(chr('>'), g["Spacing"]);
g["BeginCap"] <= seq(lit("$<"), g["Spacing"]); g["BeginCap"] <= seq(chr('$'), anc(opt(g["Identifier"])), chr('<'), g["Spacing"]);
g["EndCap"] <= seq(lit(">"), g["Spacing"]); g["EndCap"] <= seq(lit(">"), g["Spacing"]);
g["IGNORE"] <= chr('~'); g["IGNORE"] <= chr('~');
@ -1586,8 +1593,9 @@ private:
// Capture // Capture
[&](const SemanticValues& sv, any& dt) { [&](const SemanticValues& sv, any& dt) {
Data& data = *dt.get<Data*>(); Data& data = *dt.get<Data*>();
auto name = std::string(sv[0].s, sv[0].l);
auto ope = sv[1].val.get<std::shared_ptr<Ope>>(); auto ope = sv[1].val.get<std::shared_ptr<Ope>>();
return cap(ope, data.match_action, ++data.capture_count); return cap(ope, data.match_action, ++data.capture_count, name);
} }
}; };
@ -1772,8 +1780,8 @@ public:
grammar_ = PEGParser::parse( grammar_ = PEGParser::parse(
s, l, rules, s, l, rules,
start_, start_,
[&](const char* s, size_t l, size_t i) { [&](const char* s, size_t l, size_t id, const std::string& name) {
if (match_action) match_action(s, l, i); if (match_action) match_action(s, l, id, name);
}, },
log); log);
} }
@ -1791,67 +1799,51 @@ public:
return grammar_ != nullptr; return grammar_ != nullptr;
} }
bool parse(const char* s, size_t l) const { bool parse(const char* s, size_t l, Log log = nullptr) const {
if (grammar_ != nullptr) { if (grammar_ != nullptr) {
const auto& rule = (*grammar_)[start_]; const auto& rule = (*grammar_)[start_];
auto r = rule.parse(s, l); auto r = rule.parse(s, l);
output_log(s, l, log, r);
return r.ret && r.len == l; return r.ret && r.len == l;
} }
return false; return false;
} }
bool parse(const char* s) const { bool parse(const char* s, Log log = nullptr) const {
auto l = strlen(s); auto l = strlen(s);
return parse(s, l); return parse(s, l, log);
} }
bool parse_with_data(const char* s, size_t l, any& dt) const { bool parse_with_data(const char* s, size_t l, any& dt, Log log = nullptr) const {
if (grammar_ != nullptr) { if (grammar_ != nullptr) {
const auto& rule = (*grammar_)[start_]; const auto& rule = (*grammar_)[start_];
auto r = rule.parse_with_data(s, l, dt); auto r = rule.parse_with_data(s, l, dt);
output_log(s, l, log, r);
return r.ret && r.len == l; return r.ret && r.len == l;
} }
return false; return false;
} }
bool parse_with_data(const char* s, any& dt) const { bool parse_with_data(const char* s, any& dt, Log log = nullptr) const {
auto l = strlen(s); auto l = strlen(s);
return parse_with_data(s, l, dt); return parse_with_data(s, l, dt);
} }
template <typename T> template <typename T>
bool parse_with_value(const char* s, size_t l, T& out) const { bool parse_with_value(const char* s, size_t l, T& out, Log log = nullptr) const {
if (grammar_ != nullptr) { if (grammar_ != nullptr) {
const auto& rule = (*grammar_)[start_]; const auto& rule = (*grammar_)[start_];
auto r = rule.parse_with_value(s, l, out); auto r = rule.parse_with_value(s, l, out);
output_log(s, l, log, r);
return r.ret && r.len == l; return r.ret && r.len == l;
} }
return false; return false;
} }
template <typename T> template <typename T>
bool parse_with_value(const char* s, T& out) const { bool parse_with_value(const char* s, T& out, Log log = nullptr) const {
auto l = strlen(s); auto l = strlen(s);
return parse_with_value(s, l, out); return parse_with_value(s, l, out, log);
}
bool lint(const char* s, size_t l, Log log) {
assert(grammar_);
if (grammar_ != nullptr) {
const auto& rule = (*grammar_)[start_];
auto r = rule.parse(s, l);
if (!r.ret) {
if (log) {
auto line = line_info(s, r.error_ptr);
log(line.first, line.second, r.msg ? "syntax error" : r.msg);
}
} else if (r.len != l) {
auto line = line_info(s, s + r.len);
log(line.first, line.second, "syntax error");
}
return r.ret;
}
return false;
} }
bool search(const char* s, size_t l, size_t& mpos, size_t& mlen) const { bool search(const char* s, size_t l, size_t& mpos, size_t& mlen) const {
@ -1893,6 +1885,18 @@ public:
MatchAction match_action; MatchAction match_action;
private: private:
void output_log(const char* s, size_t l, Log log, const Definition::Result& r) const {
if (log) {
if (!r.ret) {
auto line = line_info(s, r.error_ptr);
log(line.first, line.second, r.msg ? "syntax error" : r.msg);
} else if (r.len != l) {
auto line = line_info(s, s + r.len);
log(line.first, line.second, "syntax error");
}
}
}
std::shared_ptr<Grammar> grammar_; std::shared_ptr<Grammar> grammar_;
std::string start_; std::string start_;
}; };
@ -1907,6 +1911,7 @@ struct match
const char* s; const char* s;
size_t l; size_t l;
size_t id; size_t id;
std::string name;
size_t length() const { return l; } size_t length() const { return l; }
std::string str() const { return std::string(s, l); } std::string str() const { return std::string(s, l); }
@ -1952,20 +1957,56 @@ struct match
const_iterator end() const { const_iterator end() const {
return matches.cend(); return matches.cend();
} }
std::vector<size_t> named_capture(const std::string& name) const {
std::vector<size_t> ret;
for (auto i = 0u; i < matches.size(); i++) {
if (matches[i].name == name) {
ret.push_back(i);
}
}
return ret;
}
std::map<std::string, std::vector<size_t>> named_captures() const {
std::map<std::string, std::vector<size_t>> ret;
for (auto i = 0u; i < matches.size(); i++) {
ret[matches[i].name].push_back(i);
}
return ret;
}
std::vector<size_t> indexed_capture(size_t id) const {
std::vector<size_t> ret;
for (auto i = 0u; i < matches.size(); i++) {
if (matches[i].id == id) {
ret.push_back(i);
}
}
return ret;
}
std::map<size_t, std::vector<size_t>> indexed_captures() const {
std::map<size_t, std::vector<size_t>> ret;
for (auto i = 0u; i < matches.size(); i++) {
ret[matches[i].id].push_back(i);
}
return ret;
}
}; };
inline bool peg_match(const char* syntax, const char* s, match& m) { inline bool peg_match(const char* syntax, const char* s, match& m) {
m.matches.clear(); m.matches.clear();
peg pg(syntax); peg pg(syntax);
pg.match_action = [&](const char* s, size_t l, size_t i) { pg.match_action = [&](const char* s, size_t l, size_t id, const std::string& name) {
m.matches.push_back(match::Item{ s, l, i }); m.matches.push_back(match::Item{ s, l, id, name });
}; };
auto ret = pg.parse(s); auto ret = pg.parse(s);
if (ret) { if (ret) {
auto l = strlen(s); auto l = strlen(s);
m.matches.insert(m.matches.begin(), match::Item{ s, l, 0 }); m.matches.insert(m.matches.begin(), match::Item{ s, l, 0, std::string() });
} }
return ret; return ret;
@ -1979,14 +2020,14 @@ inline bool peg_match(const char* syntax, const char* s) {
inline bool peg_search(peg& pg, const char* s, size_t l, match& m) { inline bool peg_search(peg& pg, const char* s, size_t l, match& m) {
m.matches.clear(); m.matches.clear();
pg.match_action = [&](const char* s, size_t l, size_t i) { pg.match_action = [&](const char* s, size_t l, size_t id, const std::string& name) {
m.matches.push_back(match::Item{ s, l, i }); m.matches.push_back(match::Item{ s, l, id, name });
}; };
size_t mpos, mlen; size_t mpos, mlen;
auto ret = pg.search(s, l, mpos, mlen); auto ret = pg.search(s, l, mpos, mlen);
if (ret) { if (ret) {
m.matches.insert(m.matches.begin(), match::Item{ s + mpos, mlen, 0 }); m.matches.insert(m.matches.begin(), match::Item{ s + mpos, mlen, 0, std::string() });
return true; return true;
} }
@ -2022,8 +2063,8 @@ public:
, s_(s) , s_(s)
, l_(strlen(s)) , l_(strlen(s))
, pos_(0) { , pos_(0) {
peg_.match_action = [&](const char* s, size_t l, size_t i) { peg_.match_action = [&](const char* s, size_t l, size_t id, const std::string& name) {
m_.matches.push_back(match::Item{ s, l, i }); m_.matches.push_back(match::Item{ s, l, id, name });
}; };
search(); search();
} }

View File

@ -110,6 +110,25 @@ TEST_CASE("String capture test3", "[general]")
REQUIRE(tags[2] == "tag-3"); REQUIRE(tags[2] == "tag-3");
} }
TEST_CASE("Named capture test", "[general]")
{
peglib::match m;
auto ret = peglib::peg_match(
" ROOT <- _ ('[' $test< TAG_NAME > ']' _)* "
" TAG_NAME <- (!']' .)+ "
" _ <- [ \t]* ",
" [tag1] [tag:2] [tag-3] ",
m);
auto cap = m.named_capture("test");
REQUIRE(ret == true);
REQUIRE(m.size() == 4);
REQUIRE(cap.size() == 3);
REQUIRE(m.str(cap[2]) == "tag-3");
}
TEST_CASE("String capture test with embedded match action", "[general]") TEST_CASE("String capture test with embedded match action", "[general]")
{ {
rule ROOT, TAG, TAG_NAME, WS; rule ROOT, TAG, TAG_NAME, WS;
@ -118,7 +137,9 @@ TEST_CASE("String capture test with embedded match action", "[general]")
ROOT <= seq(WS, zom(TAG)); ROOT <= seq(WS, zom(TAG));
TAG <= seq(chr('['), TAG <= seq(chr('['),
cap(TAG_NAME, [&](const char* s, size_t l, size_t id) { tags.push_back(string(s, l)); }), cap(TAG_NAME, [&](const char* s, size_t l, size_t id, const std::string& name) {
tags.push_back(string(s, l));
}),
chr(']'), chr(']'),
WS); WS);
TAG_NAME <= oom(seq(npd(chr(']')), dot())); TAG_NAME <= oom(seq(npd(chr(']')), dot()));