You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1584 lines
43 KiB

// peglib.h
// Copyright (c) 2015 Yuji Hirose. All rights reserved.
// MIT License
#include <functional>
#include <string>
#include <memory>
#include <vector>
#include <map>
#include <cassert>
#include <cstring>
#include <initializer_list>
#include <iostream>
namespace peglib {
void* enabler;
* any
class any
any() : content_(nullptr) {}
any(const any& rhs) : content_(rhs.clone()) {}
any(any&& rhs) : content_(rhs.content_) {
rhs.content_ = nullptr;
template <typename T>
any(const T& value) : content_(new holder<T>(value)) {}
any& operator=(const any& rhs) {
if (this != &rhs) {
if (content_) {
delete content_;
content_ = rhs.clone();
return *this;
any& operator=(any&& rhs) {
if (this != &rhs) {
if (content_) {
delete content_;
content_ = rhs.content_;
rhs.content_ = nullptr;
return *this;
template <typename T>
any& operator=(const T& value) {
if (content_) {
delete content_;
content_ = new holder<T>(value);
return *this;
~any() {
delete content_;
bool is_undefined() const {
return content_ == nullptr;
template <
typename T,
typename std::enable_if<!std::is_same<T, any>::value>::type*& = enabler
T& get() {
auto p = dynamic_cast<holder<T>*>(content_);
if (!p) {
throw std::bad_cast();
return p->value_;
template <
typename T,
typename std::enable_if<std::is_same<T, any>::value>::type*& = enabler
T& get() {
return *this;
template <
typename T,
typename std::enable_if<!std::is_same<T, any>::value>::type*& = enabler
const T& get() const {
auto p = dynamic_cast<holder<T>*>(content_);
if (!p) {
throw std::bad_cast();
return p->value_;
template <
typename T,
typename std::enable_if<std::is_same<T, any>::value>::type*& = enabler
const any& get() const {
return *this;
struct placeholder {
virtual ~placeholder() {};
virtual placeholder* clone() const = 0;
template <typename T>
struct holder : placeholder {
holder(const T& value) : value_(value) {}
placeholder* clone() const override {
return new holder(value_);
T value_;
placeholder* clone() const {
return content_ ? content_->clone() : nullptr;
placeholder* content_;
* Semantic values
typedef std::vector<any> Values;
* Semantic action
template <
typename R, typename F,
typename std::enable_if<!std::is_void<R>::value>::type*& = enabler,
typename... Args>
any call(F fn, Args&&... args) {
return any(fn(std::forward<Args>(args)...));
template <
typename R, typename F,
typename std::enable_if<std::is_void<R>::value>::type*& = enabler,
typename... Args>
any call(F fn, Args&&... args) {
return any();
class Action
Action() = default;
Action(const Action& rhs) : fn_(rhs.fn_) {}
//Action(Action&& rhs) : fn_(std::move(rhs.fn_)) {}
template <typename F, typename std::enable_if<!std::is_pointer<F>::value && !std::is_same<F, std::nullptr_t>::value>::type*& = enabler>
Action(F fn) : fn_(make_adaptor(fn, &F::operator())) {}
template <typename F, typename std::enable_if<std::is_pointer<F>::value>::type*& = enabler>
Action(F fn) : fn_(make_adaptor(fn, fn)) {}
template <typename F, typename std::enable_if<std::is_same<F, std::nullptr_t>::value>::type*& = enabler>
Action(F fn) {}
template <typename F, typename std::enable_if<!std::is_pointer<F>::value && !std::is_same<F, std::nullptr_t>::value>::type*& = enabler>
void operator=(F fn) {
fn_ = make_adaptor(fn, &F::operator());
template <typename F, typename std::enable_if<std::is_pointer<F>::value>::type*& = enabler>
void operator=(F fn) {
fn_ = make_adaptor(fn, fn);
template <typename F, typename std::enable_if<std::is_same<F, std::nullptr_t>::value>::type*& = enabler>
void operator=(F fn) {}
operator bool() const {
return (bool)fn_;
any operator()(const char* s, size_t l, const std::vector<any>& v, any& c) const {
return fn_(s, l, v, c);
template <typename R>
struct TypeAdaptor {
TypeAdaptor(std::function<R (const char* s, size_t l, const std::vector<any>& v, any& c)> fn)
: fn_(fn) {}
any operator()(const char* s, size_t l, const std::vector<any>& v, any& c) {
return call<R>(fn_, s, l, v, c);
std::function<R (const char* s, size_t l, const std::vector<any>& v, any& c)> fn_;
template <typename R>
struct TypeAdaptor_s_l_v {
TypeAdaptor_s_l_v(std::function<R (const char* s, size_t l, const std::vector<any>& v)> fn)
: fn_(fn) {}
any operator()(const char* s, size_t l, const std::vector<any>& v, any& c) {
return call<R>(fn_, s, l, v);
std::function<R (const char* s, size_t l, const std::vector<any>& v)> fn_;
template <typename R>
struct TypeAdaptor_s_l {
TypeAdaptor_s_l(std::function<R (const char* s, size_t l)> fn) : fn_(fn) {}
any operator()(const char* s, size_t l, const std::vector<any>& v, any& c) {
return call<R>(fn_, s, l);
std::function<R (const char* s, size_t l)> fn_;
template <typename R>
struct TypeAdaptor_v_n {
TypeAdaptor_v_n(std::function<R (const std::vector<any>& v, any& c)> fn) : fn_(fn) {}
any operator()(const char* s, size_t l, const std::vector<any>& v, any& c) {
return call<R>(fn_, v, c);
std::function<R (const std::vector<any>& v, any& c)> fn_;
template <typename R>
struct TypeAdaptor_v {
TypeAdaptor_v(std::function<R (const std::vector<any>& v)> fn) : fn_(fn) {}
any operator()(const char* s, size_t l, const std::vector<any>& v, any& c) {
return call<R>(fn_, v);
std::function<R (const std::vector<any>& v)> fn_;
template <typename R>
struct TypeAdaptor_empty {
TypeAdaptor_empty(std::function<R ()> fn) : fn_(fn) {}
any operator()(const char* s, size_t l, const std::vector<any>& v, any& c) {
return call<R>(fn_);
std::function<R ()> fn_;
typedef std::function<any (const char* s, size_t l, const std::vector<any>& v, any& c)> Fty;
template<typename F, typename R>
Fty make_adaptor(F fn, R (F::*mf)(const char*, size_t, const std::vector<any>& v, any& c) const) {
return TypeAdaptor<R>(fn);
template<typename F, typename R>
Fty make_adaptor(F fn, R(*mf)(const char*, size_t, const std::vector<any>& v, any& c)) {
return TypeAdaptor<R>(fn);
template<typename F, typename R>
Fty make_adaptor(F fn, R (F::*mf)(const char*, size_t, const std::vector<any>& v) const) {
return TypeAdaptor_s_l_v<R>(fn);
template<typename F, typename R>
Fty make_adaptor(F fn, R(*mf)(const char*, size_t, const std::vector<any>& v)) {
return TypeAdaptor_s_l_v<R>(fn);
template<typename F, typename R>
Fty make_adaptor(F fn, R (F::*mf)(const char*, size_t) const) {
return TypeAdaptor_s_l<R>(fn);
template<typename F, typename R>
Fty make_adaptor(F fn, R (*mf)(const char*, size_t)) {
return TypeAdaptor_s_l<R>(fn);
template<typename F, typename R>
Fty make_adaptor(F fn, R (F::*mf)(const std::vector<any>& v, any& c) const) {
return TypeAdaptor_v_n<R>(fn);
template<typename F, typename R>
Fty make_adaptor(F fn, R (*mf)(const std::vector<any>& v, any& c)) {
return TypeAdaptor_v_n<R>(fn);
template<typename F, typename R>
Fty make_adaptor(F fn, R (F::*mf)(const std::vector<any>& v) const) {
return TypeAdaptor_v<R>(fn);
template<typename F, typename R>
Fty make_adaptor(F fn, R (*mf)(const std::vector<any>& v)) {
return TypeAdaptor_v<R>(fn);
template<typename F, typename R>
Fty make_adaptor(F fn, R (F::*mf)() const) {
return TypeAdaptor_empty<R>(fn);
template<typename F, typename R>
Fty make_adaptor(F fn, R (*mf)()) {
return TypeAdaptor_empty<R>(fn);
Fty fn_;
* Match action
typedef std::function<void (const char* s, size_t l, size_t i)> MatchAction;
* Result
struct Result
bool ret;
size_t len;
size_t choice;
const char* ptr;
const std::string err; // TODO: should be `int`.
Result success(size_t len, size_t choice = 0) {
return Result{ true, len, choice, nullptr, std::string() };
Result fail(const char* ptr, std::string err = std::string(), std::string name = std::string()) {
return Result{ false, 0, (size_t)-1, ptr, err };
* Parser operators
class Ope
virtual ~Ope() {};
virtual Result parse(const char* s, size_t l, Values& v, any& c) const = 0;
class Sequence : public Ope
Sequence(const Sequence& rhs) : opes_(rhs.opes_) {}
#if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015
// NOTE: Compiler Error C2797 on Visual Studio 2013
// "The C++ compiler in Visual Studio does not implement list
// initialization inside either a member initializer list or a non-static
// data member initializer. Before Visual Studio 2013 Update 3, this was
// silently converted to a function call, which could lead to bad code
// generation. Visual Studio 2013 Update 3 reports this as an error."
template <typename... Args>
Sequence(const Args& ...args) {
opes_ = std::vector<std::shared_ptr<Ope>>{ static_cast<std::shared_ptr<Ope>>(args)... };
template <typename... Args>
Sequence(const Args& ...args) : opes_{ static_cast<std::shared_ptr<Ope>>(args)... } {}
Sequence(const std::vector<std::shared_ptr<Ope>>& opes) : opes_(opes) {}
Sequence(std::vector<std::shared_ptr<Ope>>&& opes) : opes_(std::move(opes)) {}
Result parse(const char* s, size_t l, Values& v, any& c) const {
size_t i = 0;
for (const auto& ope : opes_) {
const auto& rule = *ope;
auto r = rule.parse(s + i, l - i, v, c);
if (!r.ret) {
auto err = r.err;
if (err.empty()) {
err = "missing an element in the 'sequence'";
return fail(r.ptr, err);
i += r.len;
return success(i);
std::vector<std::shared_ptr<Ope>> opes_;
class PrioritizedChoice : public Ope
#if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015
// NOTE: Compiler Error C2797 on Visual Studio 2013
// "The C++ compiler in Visual Studio does not implement list
// initialization inside either a member initializer list or a non-static
// data member initializer. Before Visual Studio 2013 Update 3, this was
// silently converted to a function call, which could lead to bad code
// generation. Visual Studio 2013 Update 3 reports this as an error."
template <typename... Args>
PrioritizedChoice(const Args& ...args) {
opes_ = std::vector<std::shared_ptr<Ope>>{ static_cast<std::shared_ptr<Ope>>(args)... };
template <typename... Args>
PrioritizedChoice(const Args& ...args) : opes_{ static_cast<std::shared_ptr<Ope>>(args)... } {}
PrioritizedChoice(const std::vector<std::shared_ptr<Ope>>& opes) : opes_(opes) {}
PrioritizedChoice(std::vector<std::shared_ptr<Ope>>&& opes) : opes_(std::move(opes)) {}
Result parse(const char* s, size_t l, Values& v, any& c) const {
size_t id = 0;
for (const auto& ope : opes_) {
const auto& rule = *ope;
Values chldsv;
auto r = rule.parse(s, l, chldsv, c);
if (r.ret) {
if (!chldsv.empty()) {
for (const auto& x: chldsv) {
return success(r.len, id);
return fail(s, "nothing was matched in the 'prioritized choice'");
size_t size() const { return opes_.size(); }
std::vector<std::shared_ptr<Ope>> opes_;
class ZeroOrMore : public Ope
ZeroOrMore(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
Result parse(const char* s, size_t l, Values& v, any& c) const {
auto i = 0;
while (l - i > 0) {
const auto& rule = *ope_;
auto r = rule.parse(s + i, l - i, v, c);
if (!r.ret) {
i += r.len;
return success(i);
std::shared_ptr<Ope> ope_;
class OneOrMore : public Ope
OneOrMore(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
Result parse(const char* s, size_t l, Values& v, any& c) const {
const auto& rule = *ope_;
auto r = rule.parse(s, l, v, c);
if (!r.ret) {
auto err = r.err;
if (err.empty()) {
err = "nothing occurred in the 'one-or-more'";
return fail(r.ptr, r.err);
auto i = r.len;
while (l - i > 0) {
const auto& rule = *ope_;
auto r = rule.parse(s + i, l - i, v, c);
if (!r.ret) {
i += r.len;
return success(i);
std::shared_ptr<Ope> ope_;
class Option : public Ope
Option(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
Result parse(const char* s, size_t l, Values& v, any& c) const {
const auto& rule = *ope_;
auto r = rule.parse(s, l, v, c);
return success(r.ret ? r.len : 0);
std::shared_ptr<Ope> ope_;
class AndPredicate : public Ope
AndPredicate(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
Result parse(const char* s, size_t l, Values& v, any& c) const {
const auto& rule = *ope_;
auto r = rule.parse(s, l, v, c);
if (r.ret) {
return success(0);
} else {
return fail(r.ptr, r.err);
std::shared_ptr<Ope> ope_;
class NotPredicate : public Ope
NotPredicate(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
Result parse(const char* s, size_t l, Values& v, any& c) const {
const auto& rule = *ope_;
auto r = rule.parse(s, l, v, c);
if (r.ret) {
return fail(s);
} else {
return success(0);
std::shared_ptr<Ope> ope_;
class LiteralString : public Ope
LiteralString(const std::string& s) : lit_(s) {}
Result parse(const char* s, size_t l, Values& v, any& c) const {
auto i = 0u;
for (; i < lit_.size(); i++) {
if (i >= l || s[i] != lit_[i]) {
return fail(s);
return success(i);
std::string lit_;
class CharacterClass : public Ope
CharacterClass(const std::string& chars) : chars_(chars) {}
Result parse(const char* s, size_t l, Values& v, any& c) const {
// TODO: UTF8 support
if (l < 1) {
return fail(s);
auto ch = s[0];
auto i = 0u;
while (i < chars_.size()) {
if (i + 2 < chars_.size() && chars_[i + 1] == '-') {
if (chars_[i] <= ch && ch <= chars_[i + 2]) {
return success(1);
i += 3;
} else {
if (chars_[i] == ch) {
return success(1);
i += 1;
return fail(s);
std::string chars_;
class Character : public Ope
Character(char ch) : ch_(ch) {}
Result parse(const char* s, size_t l, Values& v, any& c) const {
// TODO: UTF8 support
if (l < 1 || s[0] != ch_) {
return fail(s);
return success(1);
char ch_;
class AnyCharacter : public Ope
Result parse(const char* s, size_t l, Values& v, any& c) const {
// TODO: UTF8 support
if (l < 1) {
return fail(s);
return success(1);
class Capture : public Ope
Capture(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
Capture(const std::shared_ptr<Ope>& ope, MatchAction ma, size_t ci)
: ope_(ope), match_action_(ma), capture_id(ci) {}
Result parse(const char* s, size_t l, Values& v, any& c) const {
const auto& rule = *ope_;
auto r = rule.parse(s, l, v, c);
if (r.ret && match_action_) {
match_action_(s, r.len, capture_id);
return r;
std::shared_ptr<Ope> ope_;
MatchAction match_action_;
size_t capture_id;
class Anchor : public Ope
Result parse(const char* s, size_t l, Values& v, any& c) const {
return success(0);
class WeakHolder : public Ope
WeakHolder(const std::shared_ptr<Ope>& ope) : weak_(ope) {}
Result parse(const char* s, size_t l, Values& v, any& c) const {
auto ope = weak_.lock();
const auto& rule = *ope;
return rule.parse(s, l, v, c);
std::weak_ptr<Ope> weak_;
* Definition
class Definition
: actions(1)
, holder_(std::make_shared<Holder>(this)) {}
Definition(const Definition& rhs)
: name(
, actions(1)
, holder_(rhs.holder_)
holder_->outer_ = this;
Definition(Definition&& rhs)
: name(std::move(
, actions(1)
, holder_(std::move(rhs.holder_))
holder_->outer_ = this;
Definition(const std::shared_ptr<Ope>& ope)
: actions(1)
, holder_(std::make_shared<Holder>(this))
holder_->ope_ = ope;
operator std::shared_ptr<Ope>() {
return std::make_shared<WeakHolder>(holder_);
Definition& operator<=(const std::shared_ptr<Ope>& ope) {
holder_->ope_ = ope;
return *this;
Result parse(const char* s, size_t l, Values& v, any& c) const {
return holder_->parse(s, l, v, c);
template <typename T>
Result parse(const char* s, size_t l, T& val) const {
Values v;
any c;
auto r = holder_->parse(s, l, v, c);
if (r.ret && !v.empty() && !v.front().is_undefined()) {
val = v[0].get<T>();
return r;
template <typename T>
Result parse(const char* s, T& val) const {
auto l = strlen(s);
return parse(s, l, val);
Result parse(const char* s) const {
auto l = strlen(s);
Values v;
any c;
return holder_->parse(s, l, v, c);
Definition& operator=(Action ac) {
actions[0] = ac;
return *this;
Definition& operator=(std::initializer_list<Action> acs) {
actions = acs;
return *this;
template <typename T>
Definition& operator,(T fn) {
return *this;
std::string name;
std::vector<Action> actions;
friend class DefinitionReference;
class Holder : public Ope
Holder(Definition* outer)
: outer_(outer) {}
Result parse(const char* s, size_t l, Values& v, any& c) const {
if (!ope_) {
throw std::logic_error("Uninitialized definition ope was used...");
const auto& rule = *ope_;
Values chldsv;
auto r = rule.parse(s, l, chldsv, c);
if (r.ret) {
auto id = r.choice + 1;
const auto& ac = (id < outer_->actions.size() && outer_->actions[id])
? outer_->actions[id]
: outer_->actions[0];
v.push_back(reduce(s, r.len, chldsv, c, ac));
return r;
friend class Definition;
any reduce(const char* s, size_t l, const Values& v, any& c, const Action& action) const {
if (action) {
return action(s, l, v, c);
} else if (v.empty()) {
return any();
} else {
return v.front();
std::shared_ptr<Ope> ope_;
Definition* outer_;
Definition& operator=(const Definition& rhs);
Definition& operator=(Definition&& rhs);
std::shared_ptr<Holder> holder_;
class DefinitionReference : public Ope
const std::map<std::string, Definition>& grammar, const std::string& name)
: grammar_(grammar)
, name_(name) {}
Result parse(const char* s, size_t l, Values& v, any& c) const {
const auto& rule = *;
return rule.parse(s, l, v, c);
const std::map<std::string, Definition>& grammar_;
const std::string name_;
typedef Definition rule;
* Factories
template <typename... Args>
std::shared_ptr<Ope> seq(Args&& ...args) {
return std::make_shared<Sequence>(static_cast<std::shared_ptr<Ope>>(args)...);
template <typename... Args>
std::shared_ptr<Ope> cho(Args&& ...args) {
return std::make_shared<PrioritizedChoice>(static_cast<std::shared_ptr<Ope>>(args)...);
inline std::shared_ptr<Ope> zom(const std::shared_ptr<Ope>& ope) {
return std::make_shared<ZeroOrMore>(ope);
inline std::shared_ptr<Ope> oom(const std::shared_ptr<Ope>& ope) {
return std::make_shared<OneOrMore>(ope);
inline std::shared_ptr<Ope> opt(const std::shared_ptr<Ope>& ope) {
return std::make_shared<Option>(ope);
inline std::shared_ptr<Ope> apd(const std::shared_ptr<Ope>& ope) {
return std::make_shared<AndPredicate>(ope);
inline std::shared_ptr<Ope> npd(const std::shared_ptr<Ope>& ope) {
return std::make_shared<NotPredicate>(ope);
inline std::shared_ptr<Ope> lit(const std::string& lit) {
return std::make_shared<LiteralString>(lit);
inline std::shared_ptr<Ope> cls(const std::string& chars) {
return std::make_shared<CharacterClass>(chars);
inline std::shared_ptr<Ope> chr(char c) {
return std::make_shared<Character>(c);
inline std::shared_ptr<Ope> dot() {
return std::make_shared<AnyCharacter>();
inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope>& ope, MatchAction ma, size_t ci) {
return std::make_shared<Capture>(ope, ma, ci);
inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope>& ope, MatchAction ma) {
return std::make_shared<Capture>(ope, ma, (size_t)-1);
inline std::shared_ptr<Ope> anc() {
return std::make_shared<Anchor>();
inline std::shared_ptr<Ope> ref(const std::map<std::string, Definition>& grammar, const std::string& name) {
return std::make_shared<DefinitionReference>(grammar, name);
* PEG parser generator
inline std::pair<size_t, size_t> line_info(const char* s, const char* ptr) {
auto p = s;
auto col_ptr = p;
auto no = 1;
while (p < ptr) {
if (*p == '\n') {
col_ptr = p + 1;
auto col = p - col_ptr + 1;
return std::make_pair(no, col);
typedef std::map<std::string, Definition> Grammar;
typedef std::function<void (size_t, size_t, const std::string&)> Log;
class PEGParser
static std::shared_ptr<Grammar> parse(const char* s, size_t l, std::string& start, MatchAction ma, Log log) {
static PEGParser instance;
return get().perform_core(s, l, start, ma, log);
// For debuging purpose
static Grammar& grammar() {
return get().g;
static PEGParser& get() {
static PEGParser instance;
return instance;
PEGParser() {
struct Context {
std::shared_ptr<Grammar> grammar;
std::string start;
MatchAction match_action;
std::map<std::string, const char*> references;
size_t capture_count;
Context() : grammar(std::make_shared<Grammar>()), capture_count(0) {}
void make_grammar() {
// Setup PEG syntax parser
g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]);
g["Definition"] <= seq(g["Identifier"], g["LEFTARROW"], g["Expression"]);
g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"])));
g["Sequence"] <= zom(g["Prefix"]);
g["Prefix"] <= seq(opt(cho(g["AND"], g["NOT"])), g["Suffix"]);
g["Suffix"] <= seq(g["Primary"], opt(cho(g["QUESTION"], g["STAR"], g["PLUS"])));
g["Primary"] <= cho(seq(g["Identifier"], npd(g["LEFTARROW"])),
seq(g["OPEN"], g["Expression"], g["CLOSE"]),
seq(g["CAPTUREOPEN"], g["Expression"], g["CAPTURECLOSE"]),
g["Literal"], g["Class"], g["DOT"]);
g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"]));
g["IdentStart"] <= cls("a-zA-Z_");
g["IdentRest"] <= cho(g["IdentStart"], cls("0-9"));
g["Literal"] <= cho(seq(cls("'"), g["SQCont"], cls("'"), g["Spacing"]),
seq(cls("\""), g["DQCont"], cls("\""), g["Spacing"]));
g["SQCont"] <= zom(seq(npd(cls("'")), g["Char"]));
g["DQCont"] <= zom(seq(npd(cls("\"")), g["Char"]));
g["Class"] <= seq(chr('['), g["ClassCont"], chr(']'), g["Spacing"]);
g["ClassCont"] <= zom(seq(npd(chr(']')), g["Range"]));
g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]);
g["Char"] <= cho(seq(chr('\\'), cls("nrt'\"[]\\")),
seq(chr('\\'), cls("0-2"), cls("0-7"), cls("0-7")), // TODO: 0-2 should be 0-3. bug in the spec...
seq(chr('\\'), cls("0-7"), opt(cls("0-7"))),
seq(npd(chr('\\')), dot()));
g["LEFTARROW"] <= seq(lit("<-"), g["Spacing"]);
g["SLASH"] <= seq(chr('/'), g["Spacing"]);
g["AND"] <= seq(chr('&'), g["Spacing"]);
g["NOT"] <= seq(chr('!'), g["Spacing"]);
g["QUESTION"] <= seq(chr('?'), g["Spacing"]);
g["STAR"] <= seq(chr('*'), g["Spacing"]);
g["PLUS"] <= seq(chr('+'), g["Spacing"]);
g["OPEN"] <= seq(chr('('), g["Spacing"]);
g["CLOSE"] <= seq(chr(')'), g["Spacing"]);
g["DOT"] <= seq(chr('.'), g["Spacing"]);
g["Spacing"] <= zom(cho(g["Space"], g["Comment"]));
g["Comment"] <= seq(chr('#'), zom(seq(npd(g["EndOfLine"]), dot())), g["EndOfLine"]);
g["Space"] <= cho(chr(' '), chr('\t'), g["EndOfLine"]);
g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r'));
g["EndOfFile"] <= npd(dot());
g["CAPTUREOPEN"] <= seq(chr('<'), g["Spacing"]);
g["CAPTURECLOSE"] <= seq(chr('>'), g["Spacing"]);
// Set definition names
for (auto& x: g) { = x.first;
void setup_actions() {
g["Definition"] = [&](const std::vector<any>& v, any& c) {
Context& cxt = *c.get<Context*>();
const auto& name = v[0].get<std::string>();
(*cxt.grammar)[name] <= v[2].get<std::shared_ptr<Ope>>();
(*cxt.grammar)[name].name = name;
if (cxt.start.empty()) {
cxt.start = name;
g["Expression"] = [&](const std::vector<any>& v) {
if (v.size() == 1) {
return v[0].get<std::shared_ptr<Ope>>();
} else {
std::vector<std::shared_ptr<Ope>> opes;
for (auto i = 0u; i < v.size(); i++) {
if (!(i % 2)) {
const std::shared_ptr<Ope> ope = std::make_shared<PrioritizedChoice>(opes);
return ope;
g["Sequence"] = [&](const std::vector<any>& v) {
if (v.size() == 1) {
return v[0].get<std::shared_ptr<Ope>>();
} else {
std::vector<std::shared_ptr<Ope>> opes;
for (const auto& x: v) {
const std::shared_ptr<Ope> ope = std::make_shared<Sequence>(opes);
return ope;
g["Prefix"] = [&](const std::vector<any>& v, any& c) {
std::shared_ptr<Ope> ope;
if (v.size() == 1) {
ope = v[0].get<std::shared_ptr<Ope>>();
} else {
assert(v.size() == 2);
auto tok = v[0].get<char>();
ope = v[1].get<std::shared_ptr<Ope>>();
if (tok == '&') {
ope = apd(ope);
} else { // '!'
ope = npd(ope);
return ope;
g["Suffix"] = [&](const std::vector<any>& v, any& c) {
auto ope = v[0].get<std::shared_ptr<Ope>>();
if (v.size() == 1) {
return ope;
} else {
assert(v.size() == 2);
auto tok = v[1].get<char>();
if (tok == '?') {
return opt(ope);
} else if (tok == '*') {
return zom(ope);
} else { // '+'
return oom(ope);
g["Primary"].actions = {
[&](const std::vector<any>& v) {
return v[0];
[&](const char* s, size_t l, const std::vector<any>& v, any& c) {
Context& cxt = *c.get<Context*>();
const auto& ident = v[0].get<std::string>();
cxt.references[ident] = s; // for error handling
return ref(*cxt.grammar, ident);
[&](const std::vector<any>& v) {
return v[1];
// Capture
[&](const char* s, size_t l, const std::vector<any>& v, any& c) {
Context& cxt = *c.get<Context*>();
auto ope = v[1].get<std::shared_ptr<Ope>>();
return seq(
ref(*cxt.grammar, "%ANCHOR%"),
cap(ope, cxt.match_action, ++cxt.capture_count),
ref(*cxt.grammar, "%ANCHOR%"));
g["IdentCont"] = [](const char* s, size_t l) {
return std::string(s, l);
g["Literal"] = [](const std::vector<any>& v) {
return lit(v[0].get<std::string>());
g["SQCont"] = [this](const char* s, size_t l) {
return resolve_escape_sequence(s, l);
g["DQCont"] = [this](const char* s, size_t l) {
return resolve_escape_sequence(s, l);
g["Class"] = [](const std::vector<any>& v) {
return cls(v[0].get<std::string>());
g["ClassCont"] = [this](const char* s, size_t l) {
return resolve_escape_sequence(s, l);
g["AND"] = [](const char* s, size_t l) { return *s; };
g["NOT"] = [](const char* s, size_t l) { return *s; };
g["QUESTION"] = [](const char* s, size_t l) { return *s; };
g["STAR"] = [](const char* s, size_t l) { return *s; };
g["PLUS"] = [](const char* s, size_t l) { return *s; };
g["DOT"] = []() {
return dot();
std::shared_ptr<Grammar> perform_core(const char* s, size_t l, std::string& start, MatchAction ma, Log log) {
Context cxt;
cxt.match_action = ma;
Values v;
any c = &cxt;
auto r = g["Grammar"].parse(s, l, v, c);
if (!r.ret) {
if (log) {
auto line = line_info(s, r.ptr);
log(line.first, line.second, r.err.empty() ? "syntax error" : r.err);
return nullptr;
auto& grammar = *cxt.grammar;
for (const auto& x : cxt.references) {
const auto& name = x.first;
auto ptr = x.second;
if (grammar.find(name) == grammar.end()) {
if (log) {
auto line = line_info(s, ptr);
log(line.first, line.second, "'" + name + "' is not defined.");
return nullptr;
start = cxt.start;
grammar["%ANCHOR%"] <= anc();
grammar["%ANCHOR%"] = [](const char* s, size_t l) { return s; };
return cxt.grammar;
std::string resolve_escape_sequence(const char* s, size_t l) {
std::string r;
for (auto i = 0u; i < l; i++) {
auto ch = s[i];
if (ch == '\\') {
switch (s[i]) {
case 'n': r += '\n'; break;
case 'r': r += '\r'; break;
case 't': r += '\t'; break;
case '\'': r += '\''; break;
case '"': r += '"'; break;
case '[': r += '['; break;
case ']': r += ']'; break;
case '\\': r += '\\'; break;
default: {
// TODO: Octal number support
} else {
r += ch;
return r;
Grammar g;
* peg
class peg
peg() = default;
peg(const char* s, size_t l, Log log = nullptr) {
grammar_ = PEGParser::parse(
s, l,
[&](const char* s, size_t l, size_t i) {
if (match_action) match_action(s, l, i);
peg(const char* s, Log log = nullptr) {
auto l = strlen(s);
grammar_ = PEGParser::parse(
s, l,
[&](const char* s, size_t l, size_t i) {
if (match_action) match_action(s, l, i);
operator bool() {
return grammar_ != nullptr;
template <typename T>
bool parse(const char* s, size_t l, T& out, bool exact = true) const {
if (grammar_ != nullptr) {
const auto& rule = (*grammar_)[start_];
auto r = rule.parse(s, l, out);
return r.ret && (!exact || r.len == l);
return false;
template <typename T>
bool parse(const char* s, T& out, bool exact = true) const {
auto l = strlen(s);
return parse(s, l, out, exact);
bool parse(const char* s, size_t l, bool exact = true) const {
if (grammar_ != nullptr) {
const auto& rule = (*grammar_)[start_];
auto r = rule.parse(s, l);
return r.ret && (!exact || r.len == l);
return false;
bool parse(const char* s, bool exact = true) const {
auto l = strlen(s);
return parse(s, l, exact);
bool search(const char* s, size_t l, size_t& mpos, size_t& mlen) const {
const auto& rule = (*grammar_)[start_];
if (grammar_ != nullptr) {
size_t pos = 0;
while (pos < l) {
size_t len = l - pos;
auto r = rule.parse(s + pos, len);
if (r.ret) {
mpos = pos;
mlen = r.len;
return true;
mpos = 0;
mlen = 0;
return false;
bool search(const char* s, size_t& mpos, size_t& mlen) const {
auto l = strlen(s);
return search(s, l, mpos, mlen);
bool lint(const char* s, size_t l, bool exact, Log log = nullptr) {
if (grammar_ != nullptr) {
const auto& rule = (*grammar_)[start_];
auto r = rule.parse(s, l);
if (!r.ret) {
if (log) {
auto line = line_info(s, r.ptr);
log(line.first, line.second, r.err.empty() ? "syntax error" : r.err);
} else if (exact && r.len != l) {
auto line = line_info(s, s + r.len);
log(line.first, line.second, "garbage string at the end");
return r.ret && (!exact || r.len == l);
return false;
Definition& operator[](const char* s) {
return (*grammar_)[s];
MatchAction match_action;
std::shared_ptr<Grammar> grammar_;
std::string start_;
* Simple interface
struct match
struct Item {
const char* s;
size_t l;
size_t id;
size_t length() const { return l; }
std::string str() const { return std::string(s, l); }
std::vector<Item> matches;
typedef std::vector<Item>::iterator iterator;
typedef std::vector<Item>::const_iterator const_iterator;
bool empty() const {
return matches.empty();
size_t size() const {
return matches.size();
size_t length(size_t n = 0) {
return matches[n].length();
std::string str(size_t n = 0) const {
return matches[n].str();
const Item& operator[](size_t n) const {
return matches[n];
iterator begin() {
return matches.begin();
iterator end() {
return matches.end();
const_iterator begin() const {
return matches.cbegin();
const_iterator end() const {
return matches.cend();
inline bool peg_match(const char* syntax, const char* s, match& m) {
peg pg(syntax);
pg.match_action = [&](const char* s, size_t l, size_t i) {
m.matches.push_back(match::Item{ s, l, i });
auto ret = pg.parse(s);
if (ret) {
auto l = strlen(s);
m.matches.insert(m.matches.begin(), match::Item{ s, l, 0 });
return ret;
inline bool peg_match(const char* syntax, const char* s) {
peg pg(syntax);
return pg.parse(s);
inline bool peg_search(peg& pg, const char* s, size_t l, match& m) {
pg.match_action = [&](const char* s, size_t l, size_t i) {
m.matches.push_back(match::Item{ s, l, i });
size_t mpos, mlen;
auto ret =, l, mpos, mlen);
if (ret) {
m.matches.insert(m.matches.begin(), match::Item{ s + mpos, mlen, 0 });
return true;
return false;
inline bool peg_search(peg& pg, const char* s, match& m) {
auto l = strlen(s);
return peg_search(pg, s, l, m);
inline bool peg_search(const char* syntax, const char* s, size_t l, match& m) {
peg pg(syntax);
return peg_search(pg, s, l, m);
inline bool peg_search(const char* syntax, const char* s, match& m) {
peg pg(syntax);
auto l = strlen(s);
return peg_search(pg, s, l, m);
class peg_token_iterator : public std::iterator<std::forward_iterator_tag, match>
: s_(nullptr)
, l_(0)
, pos_(std::numeric_limits<size_t>::max()) {}
peg_token_iterator(const char* syntax, const char* s)
: peg_(syntax)
, s_(s)
, l_(strlen(s))
, pos_(0) {
peg_.match_action = [&](const char* s, size_t l, size_t i) {
m_.matches.push_back(match::Item{ s, l, i });
peg_token_iterator(const peg_token_iterator& rhs)
: peg_(rhs.peg_)
, s_(rhs.s_)
, l_(rhs.l_)
, pos_(rhs.pos_)
, m_(rhs.m_) {}
peg_token_iterator& operator++() {
return *this;
peg_token_iterator operator++(int) {
auto it = *this;
return it;
match& operator*() {
return m_;
match* operator->() {
return &m_;
bool operator==(const peg_token_iterator& rhs) {
return pos_ == rhs.pos_;
bool operator!=(const peg_token_iterator& rhs) {
return pos_ != rhs.pos_;
void search() {
size_t mpos, mlen;
if ( + pos_, l_ - pos_, mpos, mlen)) {
m_.matches.insert(m_.matches.begin(), match::Item{ s_ + mpos, mlen, 0 });
pos_ += mpos + mlen;
} else {
pos_ = std::numeric_limits<size_t>::max();
peg peg_;
const char* s_;
size_t l_;
size_t pos_;
match m_;
struct peg_token_range {
typedef peg_token_iterator iterator;
typedef const peg_token_iterator const_iterator;
peg_token_range(const char* syntax, const char* s)
: beg_iter(peg_token_iterator(syntax, s))
, end_iter() {}
iterator begin() {
return beg_iter;
iterator end() {
return end_iter;
const_iterator cbegin() const {
return beg_iter;
const_iterator cend() const {
return end_iter;
peg_token_iterator beg_iter;
peg_token_iterator end_iter;
} // namespace peglib
// vim: et ts=4 sw=4 cin cino={1s ff=unix