cpp-peglib/peglib.h

2805 lines
81 KiB
C
Raw Normal View History

2015-04-08 15:07:41 +00:00
//
2015-02-08 01:52:26 +00:00
// peglib.h
//
// Copyright (c) 2015 Yuji Hirose. All rights reserved.
// MIT License
//
2015-02-19 12:54:15 +00:00
#ifndef _CPPPEGLIB_PEGLIB_H_
#define _CPPPEGLIB_PEGLIB_H_
2015-02-08 01:52:26 +00:00
2015-05-19 13:07:03 +00:00
#include <algorithm>
#include <cassert>
#include <cstring>
2015-02-08 01:52:26 +00:00
#include <functional>
2015-05-19 13:07:03 +00:00
#include <initializer_list>
#include <iostream>
2015-02-19 02:18:20 +00:00
#include <limits>
2015-05-19 13:07:03 +00:00
#include <map>
2015-02-08 01:52:26 +00:00
#include <memory>
2015-02-26 00:31:00 +00:00
#include <mutex>
2015-06-13 04:38:38 +00:00
#include <set>
2015-05-19 13:07:03 +00:00
#include <string>
2015-02-26 00:31:00 +00:00
#include <unordered_map>
2015-05-19 13:07:03 +00:00
#include <vector>
2015-02-08 01:52:26 +00:00
// guard for older versions of VC++
#ifdef _MSC_VER
// VS2013 has no constexpr
#if (_MSC_VER == 1800)
#define PEGLIB_NO_CONSTEXPR_SUPPORT
#elif (_MSC_VER >= 1800)
// good to go
#else (_MSC_VER < 1800)
#error "Requires C+11 support"
#endif
#endif
// define if the compiler doesn't support unicode characters reliably in the
// source code
//#define PEGLIB_NO_UNICODE_CHARS
2015-08-10 20:37:56 +00:00
namespace peg {
2015-02-08 01:52:26 +00:00
2015-03-27 19:40:50 +00:00
extern void* enabler;
2015-02-08 01:52:26 +00:00
/*-----------------------------------------------------------------------------
2015-02-14 15:13:10 +00:00
* any
2015-02-08 01:52:26 +00:00
*---------------------------------------------------------------------------*/
2015-02-14 15:13:10 +00:00
class any
2015-02-08 01:52:26 +00:00
{
public:
2015-02-14 15:13:10 +00:00
any() : content_(nullptr) {}
2015-02-08 01:52:26 +00:00
2015-02-14 15:13:10 +00:00
any(const any& rhs) : content_(rhs.clone()) {}
2015-02-08 01:52:26 +00:00
2015-02-14 15:13:10 +00:00
any(any&& rhs) : content_(rhs.content_) {
2015-02-08 01:52:26 +00:00
rhs.content_ = nullptr;
}
template <typename T>
2015-02-14 15:13:10 +00:00
any(const T& value) : content_(new holder<T>(value)) {}
2015-02-08 01:52:26 +00:00
2015-02-14 15:13:10 +00:00
any& operator=(const any& rhs) {
2015-02-08 01:52:26 +00:00
if (this != &rhs) {
if (content_) {
delete content_;
}
content_ = rhs.clone();
}
return *this;
}
2015-02-14 15:13:10 +00:00
any& operator=(any&& rhs) {
2015-02-08 01:52:26 +00:00
if (this != &rhs) {
if (content_) {
delete content_;
}
content_ = rhs.content_;
rhs.content_ = nullptr;
}
return *this;
}
2015-02-14 15:13:10 +00:00
~any() {
2015-02-08 01:52:26 +00:00
delete content_;
}
2015-02-09 04:02:10 +00:00
bool is_undefined() const {
return content_ == nullptr;
}
2015-02-09 22:12:59 +00:00
template <
typename T,
2015-02-14 15:13:10 +00:00
typename std::enable_if<!std::is_same<T, any>::value>::type*& = enabler
2015-02-09 22:12:59 +00:00
>
2015-02-08 01:52:26 +00:00
T& get() {
2015-05-19 13:07:03 +00:00
if (!content_) {
throw std::bad_cast();
}
2015-02-14 15:20:19 +00:00
auto p = dynamic_cast<holder<T>*>(content_);
assert(p);
if (!p) {
throw std::bad_cast();
}
return p->value_;
2015-02-08 01:52:26 +00:00
}
2015-02-09 22:12:59 +00:00
template <
typename T,
2015-02-14 15:13:10 +00:00
typename std::enable_if<std::is_same<T, any>::value>::type*& = enabler
2015-02-09 22:12:59 +00:00
>
T& get() {
return *this;
}
template <
typename T,
2015-02-14 15:13:10 +00:00
typename std::enable_if<!std::is_same<T, any>::value>::type*& = enabler
2015-02-09 22:12:59 +00:00
>
2015-02-08 01:52:26 +00:00
const T& get() const {
assert(content_);
2015-02-14 15:20:19 +00:00
auto p = dynamic_cast<holder<T>*>(content_);
assert(p);
if (!p) {
throw std::bad_cast();
}
return p->value_;
2015-02-08 01:52:26 +00:00
}
2015-02-09 22:12:59 +00:00
template <
typename T,
2015-02-14 15:13:10 +00:00
typename std::enable_if<std::is_same<T, any>::value>::type*& = enabler
2015-02-09 22:12:59 +00:00
>
2015-02-14 15:13:10 +00:00
const any& get() const {
2015-02-09 22:12:59 +00:00
return *this;
}
2015-02-08 01:52:26 +00:00
private:
struct placeholder {
virtual ~placeholder() {}
2015-02-08 01:52:26 +00:00
virtual placeholder* clone() const = 0;
};
template <typename T>
struct holder : placeholder {
holder(const T& value) : value_(value) {}
placeholder* clone() const override {
return new holder(value_);
}
T value_;
};
placeholder* clone() const {
return content_ ? content_->clone() : nullptr;
}
placeholder* content_;
};
2015-11-30 04:07:02 +00:00
/*-----------------------------------------------------------------------------
* scope_exit
*---------------------------------------------------------------------------*/
// This is based on "http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4189".
2015-11-30 04:07:02 +00:00
template <typename EF>
struct scope_exit
{
2015-12-07 22:33:10 +00:00
explicit scope_exit(EF&& f)
2015-11-30 04:07:02 +00:00
: exit_function(std::move(f))
, execute_on_destruction{true} {}
2015-12-07 22:33:10 +00:00
scope_exit(scope_exit&& rhs)
2015-11-30 04:07:02 +00:00
: exit_function(std::move(rhs.exit_function))
, execute_on_destruction{rhs.execute_on_destruction} {
rhs.release();
}
2015-12-07 22:33:10 +00:00
~scope_exit() {
2015-11-30 04:07:02 +00:00
if (execute_on_destruction) {
this->exit_function();
}
}
2015-12-07 22:33:10 +00:00
void release() {
2015-11-30 04:07:02 +00:00
this->execute_on_destruction = false;
}
private:
scope_exit(const scope_exit&) = delete;
void operator=(const scope_exit&) = delete;
scope_exit& operator=(scope_exit&&) = delete;
EF exit_function;
bool execute_on_destruction;
};
template <typename EF>
2015-12-08 16:46:31 +00:00
auto make_scope_exit(EF&& exit_function) -> scope_exit<EF> {
return scope_exit<typename std::remove_reference<EF>::type>(std::forward<EF>(exit_function));
2015-11-30 04:07:02 +00:00
}
2015-02-08 01:52:26 +00:00
/*-----------------------------------------------------------------------------
* PEG
*---------------------------------------------------------------------------*/
2015-02-09 22:12:59 +00:00
/*
* Semantic values
*/
struct SemanticValues : protected std::vector<any>
{
2015-07-28 10:47:18 +00:00
const char* path;
2015-07-07 19:44:33 +00:00
const char* ss;
const char* c_str() const { return s_; }
size_t length() const { return n_; }
size_t choice() const { return choice_; }
std::vector<std::pair<const char*, size_t>> tokens;
SemanticValues() : s_(nullptr), n_(0), choice_(0) {}
using std::vector<any>::iterator;
using std::vector<any>::const_iterator;
using std::vector<any>::size;
using std::vector<any>::empty;
using std::vector<any>::assign;
using std::vector<any>::begin;
using std::vector<any>::end;
using std::vector<any>::rbegin;
using std::vector<any>::rend;
using std::vector<any>::operator[];
using std::vector<any>::at;
using std::vector<any>::resize;
using std::vector<any>::front;
using std::vector<any>::back;
using std::vector<any>::push_back;
using std::vector<any>::pop_back;
using std::vector<any>::insert;
using std::vector<any>::erase;
using std::vector<any>::clear;
using std::vector<any>::swap;
using std::vector<any>::emplace;
using std::vector<any>::emplace_back;
2015-02-22 04:19:54 +00:00
2015-06-16 04:43:08 +00:00
std::string str() const {
return std::string(s_, n_);
}
std::string token(size_t id = 0) const {
if (!tokens.empty()) {
assert(id < tokens.size());
const auto& tok = tokens[id];
return std::string(tok.first, tok.second);
}
return std::string(s_, n_);
2015-06-16 04:43:08 +00:00
}
2015-06-16 05:01:02 +00:00
template <typename T>
2016-05-25 07:18:41 +00:00
auto transform(size_t beg = 0, size_t end = static_cast<size_t>(-1)) const -> vector<T> {
return this->transform(beg, end, [](const any& v) { return v.get<T>(); });
2015-06-16 05:01:02 +00:00
}
private:
friend class Context;
friend class PrioritizedChoice;
friend class Holder;
const char* s_;
size_t n_;
size_t choice_;
template <typename F>
auto transform(F f) const -> vector<typename std::remove_const<decltype(f(any()))>::type> {
vector<typename std::remove_const<decltype(f(any()))>::type> r;
for (const auto& v: *this) {
2015-06-16 04:25:01 +00:00
r.emplace_back(f(v));
}
return r;
}
template <typename F>
auto transform(size_t beg, size_t end, F f) const -> vector<typename std::remove_const<decltype(f(any()))>::type> {
vector<typename std::remove_const<decltype(f(any()))>::type> r;
end = (std::min)(end, size());
for (size_t i = beg; i < end; i++) {
2015-06-16 04:25:01 +00:00
r.emplace_back(f((*this)[i]));
}
return r;
}
};
2015-02-08 01:52:26 +00:00
/*
* Semantic action
*/
template <
typename R, typename F,
2015-02-22 00:38:30 +00:00
typename std::enable_if<std::is_void<R>::value>::type*& = enabler,
typename... Args>
2015-02-14 15:13:10 +00:00
any call(F fn, Args&&... args) {
2015-02-22 00:38:30 +00:00
fn(std::forward<Args>(args)...);
return any();
}
template <
typename R, typename F,
2015-02-23 15:04:25 +00:00
typename std::enable_if<std::is_same<typename std::remove_cv<R>::type, any>::value>::type*& = enabler,
typename... Args>
2015-02-14 15:13:10 +00:00
any call(F fn, Args&&... args) {
2015-02-22 00:38:30 +00:00
return fn(std::forward<Args>(args)...);
}
template <
typename R, typename F,
2015-02-23 15:04:25 +00:00
typename std::enable_if<
!std::is_void<R>::value &&
!std::is_same<typename std::remove_cv<R>::type, any>::value>::type*& = enabler,
2015-02-22 00:38:30 +00:00
typename... Args>
any call(F fn, Args&&... args) {
return any(fn(std::forward<Args>(args)...));
}
class Action
{
public:
Action() = default;
Action(const Action& rhs) : fn_(rhs.fn_) {}
2015-02-13 00:48:58 +00:00
template <typename F, typename std::enable_if<!std::is_pointer<F>::value && !std::is_same<F, std::nullptr_t>::value>::type*& = enabler>
Action(F fn) : fn_(make_adaptor(fn, &F::operator())) {}
template <typename F, typename std::enable_if<std::is_pointer<F>::value>::type*& = enabler>
Action(F fn) : fn_(make_adaptor(fn, fn)) {}
2015-02-13 00:48:58 +00:00
template <typename F, typename std::enable_if<std::is_same<F, std::nullptr_t>::value>::type*& = enabler>
Action(F /*fn*/) {}
2015-02-13 00:48:58 +00:00
template <typename F, typename std::enable_if<!std::is_pointer<F>::value && !std::is_same<F, std::nullptr_t>::value>::type*& = enabler>
void operator=(F fn) {
fn_ = make_adaptor(fn, &F::operator());
}
template <typename F, typename std::enable_if<std::is_pointer<F>::value>::type*& = enabler>
void operator=(F fn) {
fn_ = make_adaptor(fn, fn);
}
2015-02-13 00:48:58 +00:00
template <typename F, typename std::enable_if<std::is_same<F, std::nullptr_t>::value>::type*& = enabler>
void operator=(F /*fn*/) {}
Action& operator=(const Action& rhs) = default;
operator bool() const {
return bool(fn_);
}
2015-02-22 00:38:30 +00:00
any operator()(const SemanticValues& sv, any& dt) const {
return fn_(sv, dt);
}
private:
template <typename R>
struct TypeAdaptor {
2015-02-22 00:38:30 +00:00
TypeAdaptor(std::function<R (const SemanticValues& sv)> fn)
: fn_(fn) {}
any operator()(const SemanticValues& sv, any& /*dt*/) {
2015-02-22 00:38:30 +00:00
return call<R>(fn_, sv);
2015-02-19 03:28:57 +00:00
}
2015-02-22 00:38:30 +00:00
std::function<R (const SemanticValues& sv)> fn_;
2015-02-19 03:28:57 +00:00
};
template <typename R>
struct TypeAdaptor_c {
2015-02-22 00:38:30 +00:00
TypeAdaptor_c(std::function<R (const SemanticValues& sv, any& dt)> fn)
: fn_(fn) {}
2015-02-22 00:38:30 +00:00
any operator()(const SemanticValues& sv, any& dt) {
return call<R>(fn_, sv, dt);
}
2015-02-22 00:38:30 +00:00
std::function<R (const SemanticValues& sv, any& dt)> fn_;
};
2015-02-22 00:38:30 +00:00
typedef std::function<any (const SemanticValues& sv, any& dt)> Fty;
template<typename F, typename R>
Fty make_adaptor(F fn, R (F::* /*mf*/)(const SemanticValues& sv) const) {
return TypeAdaptor<R>(fn);
}
template<typename F, typename R>
Fty make_adaptor(F fn, R (F::* /*mf*/)(const SemanticValues& sv)) {
return TypeAdaptor<R>(fn);
}
template<typename F, typename R>
Fty make_adaptor(F fn, R (* /*mf*/)(const SemanticValues& sv)) {
return TypeAdaptor<R>(fn);
}
2015-02-19 03:28:57 +00:00
template<typename F, typename R>
Fty make_adaptor(F fn, R (F::* /*mf*/)(const SemanticValues& sv, any& dt) const) {
2015-02-19 03:28:57 +00:00
return TypeAdaptor_c<R>(fn);
}
template<typename F, typename R>
Fty make_adaptor(F fn, R (F::* /*mf*/)(const SemanticValues& sv, any& dt)) {
2015-02-19 03:28:57 +00:00
return TypeAdaptor_c<R>(fn);
}
template<typename F, typename R>
Fty make_adaptor(F fn, R(* /*mf*/)(const SemanticValues& sv, any& dt)) {
2015-02-19 03:28:57 +00:00
return TypeAdaptor_c<R>(fn);
}
2015-06-16 04:30:28 +00:00
Fty fn_;
};
2015-06-16 04:30:28 +00:00
/*
* Semantic predicate
*/
2015-12-07 22:33:10 +00:00
// Note: 'parse_error' exception class should be be used in sematic action handlers to reject the rule.
2015-06-16 04:30:28 +00:00
struct parse_error {
parse_error() = default;
parse_error(const char* s) : s_(s) {}
const char* what() const { return s_.empty() ? nullptr : s_.c_str(); }
private:
std::string s_;
};
2015-02-15 22:52:39 +00:00
/*
* Match action
*/
2015-03-09 18:58:43 +00:00
typedef std::function<void (const char* s, size_t n, size_t id, const std::string& name)> MatchAction;
2015-02-15 22:52:39 +00:00
2015-02-08 01:52:26 +00:00
/*
2015-02-14 02:43:50 +00:00
* Result
2015-02-08 01:52:26 +00:00
*/
2015-05-19 14:51:06 +00:00
inline bool success(size_t len) {
2016-05-25 07:18:41 +00:00
return len != static_cast<size_t>(-1);
2015-02-08 01:52:26 +00:00
}
2015-05-19 14:51:06 +00:00
inline bool fail(size_t len) {
2016-05-25 07:18:41 +00:00
return len == static_cast<size_t>(-1);
2015-02-08 01:52:26 +00:00
}
2015-02-26 00:31:00 +00:00
/*
* Context
*/
class Ope;
2015-11-30 04:07:02 +00:00
class Context;
class Definition;
typedef std::function<void (const char* name, const char* s, size_t n, const SemanticValues& sv, const Context& c, const any& dt)> Tracer;
class Context
2015-02-26 00:31:00 +00:00
{
public:
2015-07-28 10:47:18 +00:00
const char* path;
const char* s;
2015-08-06 02:52:08 +00:00
const size_t l;
const char* error_pos;
const char* message_pos;
std::string message; // TODO: should be `int`.
2015-02-27 02:32:26 +00:00
std::vector<std::shared_ptr<SemanticValues>> value_stack;
size_t value_stack_size;
2015-12-07 04:01:27 +00:00
size_t nest_level;
bool in_token;
std::shared_ptr<Ope> whitespaceOpe;
bool in_whitespace;
2015-08-06 02:52:08 +00:00
const size_t def_count;
const bool enablePackratParsing;
2015-02-27 02:32:26 +00:00
std::vector<bool> cache_register;
std::vector<bool> cache_success;
2015-06-16 01:39:35 +00:00
std::map<std::pair<size_t, size_t>, std::tuple<size_t, any>> cache_result;
2015-11-30 04:07:02 +00:00
std::function<void (const char*, const char*, size_t, const SemanticValues&, const Context&, const any&)> tracer;
Context(
const char* a_path,
const char* a_s,
size_t a_l,
size_t a_def_count,
std::shared_ptr<Ope> a_whitespaceOpe,
bool a_enablePackratParsing,
Tracer a_tracer)
: path(a_path)
, s(a_s)
, l(a_l)
, error_pos(nullptr)
, message_pos(nullptr)
2015-12-07 04:01:27 +00:00
, value_stack_size(0)
, nest_level(0)
, in_token(false)
, whitespaceOpe(a_whitespaceOpe)
, in_whitespace(false)
, def_count(a_def_count)
, enablePackratParsing(a_enablePackratParsing)
, cache_register(enablePackratParsing ? def_count * (l + 1) : 0)
, cache_success(enablePackratParsing ? def_count * (l + 1) : 0)
, tracer(a_tracer)
2015-02-27 02:32:26 +00:00
{
}
2015-03-02 22:35:55 +00:00
template <typename T>
void packrat(const char* a_s, size_t def_id, size_t& len, any& val, T fn) {
2015-08-06 02:52:08 +00:00
if (!enablePackratParsing) {
2015-04-08 15:07:41 +00:00
fn(val);
2015-02-27 02:32:26 +00:00
return;
}
auto col = a_s - s;
2016-05-25 07:18:41 +00:00
auto has_cache = cache_register[def_count * static_cast<size_t>(col) + def_id];
2015-02-27 02:32:26 +00:00
if (has_cache) {
2016-05-25 07:18:41 +00:00
if (cache_success[def_count * static_cast<size_t>(col) + def_id]) {
const auto& key = std::make_pair(a_s - s, def_id);
std::tie(len, val) = cache_result[key];
2015-02-27 02:32:26 +00:00
return;
} else {
2016-05-25 07:18:41 +00:00
len = static_cast<size_t>(-1);
2015-02-27 02:32:26 +00:00
return;
}
} else {
2015-04-08 15:07:41 +00:00
fn(val);
2016-05-25 07:18:41 +00:00
cache_register[def_count * static_cast<size_t>(col) + def_id] = true;
cache_success[def_count * static_cast<size_t>(col) + def_id] = success(len);
2015-02-27 02:32:26 +00:00
if (success(len)) {
const auto& key = std::make_pair(a_s - s, def_id);
cache_result[key] = std::make_pair(len, val);
2015-02-27 02:32:26 +00:00
}
return;
}
}
2015-02-26 00:31:00 +00:00
2015-11-30 04:07:02 +00:00
SemanticValues& push() {
assert(value_stack_size <= value_stack.size());
if (value_stack_size == value_stack.size()) {
value_stack.emplace_back(std::make_shared<SemanticValues>());
2015-02-26 00:31:00 +00:00
}
auto& sv = *value_stack[value_stack_size++];
2015-03-02 22:35:55 +00:00
if (!sv.empty()) {
sv.clear();
}
2015-07-28 10:47:18 +00:00
sv.path = path;
2015-07-07 19:44:33 +00:00
sv.ss = s;
sv.s_ = nullptr;
sv.n_ = 0;
sv.tokens.clear();
2015-02-26 00:31:00 +00:00
return sv;
}
void pop() {
value_stack_size--;
2015-02-26 00:31:00 +00:00
}
void set_error_pos(const char* a_s) {
if (error_pos < a_s) error_pos = a_s;
}
2015-11-30 04:07:02 +00:00
void trace(const char* name, const char* a_s, size_t n, SemanticValues& sv, any& dt) const {
if (tracer) tracer(name, a_s, n, sv, *this, dt);
2015-11-30 04:07:02 +00:00
}
2015-02-26 00:31:00 +00:00
};
2015-02-08 01:52:26 +00:00
/*
* Parser operators
2015-02-08 01:52:26 +00:00
*/
class Ope
2015-02-09 22:12:59 +00:00
{
public:
2015-02-26 00:31:00 +00:00
struct Visitor;
virtual ~Ope() {}
2015-05-19 14:51:06 +00:00
virtual size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const = 0;
2015-02-26 00:31:00 +00:00
virtual void accept(Visitor& v) = 0;
2015-02-09 22:12:59 +00:00
};
class Sequence : public Ope
2015-02-08 01:52:26 +00:00
{
public:
2015-02-12 02:13:12 +00:00
Sequence(const Sequence& rhs) : opes_(rhs.opes_) {}
2015-02-08 01:52:26 +00:00
#if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015
// NOTE: Compiler Error C2797 on Visual Studio 2013
// "The C++ compiler in Visual Studio does not implement list
// initialization inside either a member initializer list or a non-static
// data member initializer. Before Visual Studio 2013 Update 3, this was
// silently converted to a function call, which could lead to bad code
// generation. Visual Studio 2013 Update 3 reports this as an error."
template <typename... Args>
Sequence(const Args& ...args) {
2015-02-12 02:13:12 +00:00
opes_ = std::vector<std::shared_ptr<Ope>>{ static_cast<std::shared_ptr<Ope>>(args)... };
2015-02-08 01:52:26 +00:00
}
#else
template <typename... Args>
2015-02-12 02:13:12 +00:00
Sequence(const Args& ...args) : opes_{ static_cast<std::shared_ptr<Ope>>(args)... } {}
2015-02-08 01:52:26 +00:00
#endif
2015-02-12 02:13:12 +00:00
Sequence(const std::vector<std::shared_ptr<Ope>>& opes) : opes_(opes) {}
2015-09-25 19:21:12 +00:00
Sequence(std::vector<std::shared_ptr<Ope>>&& opes) : opes_(opes) {}
2015-02-08 01:52:26 +00:00
2015-05-19 14:51:06 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
2015-11-30 04:07:02 +00:00
c.trace("Sequence", s, n, sv, dt);
2015-02-09 22:12:59 +00:00
size_t i = 0;
2015-02-12 02:13:12 +00:00
for (const auto& ope : opes_) {
2015-11-30 04:07:02 +00:00
c.nest_level++;
auto se = make_scope_exit([&]() { c.nest_level--; });
2015-02-13 00:48:58 +00:00
const auto& rule = *ope;
2015-03-09 18:58:43 +00:00
auto len = rule.parse(s + i, n - i, sv, c, dt);
if (fail(len)) {
2016-05-25 07:18:41 +00:00
return static_cast<size_t>(-1);
2015-02-09 22:12:59 +00:00
}
i += len;
2015-02-09 22:12:59 +00:00
}
return i;
2015-02-09 22:12:59 +00:00
}
2015-02-08 01:52:26 +00:00
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
2015-02-12 02:13:12 +00:00
std::vector<std::shared_ptr<Ope>> opes_;
2015-02-08 01:52:26 +00:00
};
class PrioritizedChoice : public Ope
2015-02-08 01:52:26 +00:00
{
public:
#if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015
// NOTE: Compiler Error C2797 on Visual Studio 2013
// "The C++ compiler in Visual Studio does not implement list
// initialization inside either a member initializer list or a non-static
// data member initializer. Before Visual Studio 2013 Update 3, this was
// silently converted to a function call, which could lead to bad code
// generation. Visual Studio 2013 Update 3 reports this as an error."
template <typename... Args>
PrioritizedChoice(const Args& ...args) {
2015-02-12 02:13:12 +00:00
opes_ = std::vector<std::shared_ptr<Ope>>{ static_cast<std::shared_ptr<Ope>>(args)... };
2015-02-08 01:52:26 +00:00
}
#else
template <typename... Args>
2015-02-12 02:13:12 +00:00
PrioritizedChoice(const Args& ...args) : opes_{ static_cast<std::shared_ptr<Ope>>(args)... } {}
2015-02-08 01:52:26 +00:00
#endif
2015-02-12 02:13:12 +00:00
PrioritizedChoice(const std::vector<std::shared_ptr<Ope>>& opes) : opes_(opes) {}
2015-09-25 19:21:12 +00:00
PrioritizedChoice(std::vector<std::shared_ptr<Ope>>&& opes) : opes_(opes) {}
2015-02-08 01:52:26 +00:00
2015-05-19 14:51:06 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
2015-11-30 04:07:02 +00:00
c.trace("PrioritizedChoice", s, n, sv, dt);
size_t id = 0;
2015-02-12 02:13:12 +00:00
for (const auto& ope : opes_) {
2015-11-30 04:07:02 +00:00
c.nest_level++;
2015-02-26 00:31:00 +00:00
auto& chldsv = c.push();
2015-12-07 22:33:10 +00:00
auto se = make_scope_exit([&]() {
c.nest_level--;
c.pop();
});
2015-11-30 04:07:02 +00:00
const auto& rule = *ope;
2015-03-09 18:58:43 +00:00
auto len = rule.parse(s, n, chldsv, c, dt);
2015-12-07 04:01:27 +00:00
if (success(len)) {
2015-02-22 00:38:30 +00:00
if (!chldsv.empty()) {
sv.insert(sv.end(), chldsv.begin(), chldsv.end());
}
sv.s_ = chldsv.c_str();
sv.n_ = chldsv.length();
sv.choice_ = id;
sv.tokens.insert(sv.tokens.end(), chldsv.tokens.begin(), chldsv.tokens.end());
return len;
}
id++;
2015-02-09 22:12:59 +00:00
}
2016-05-25 07:18:41 +00:00
return static_cast<size_t>(-1);
2015-02-09 22:12:59 +00:00
}
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
2015-02-12 02:13:12 +00:00
size_t size() const { return opes_.size(); }
2015-02-08 01:52:26 +00:00
2015-02-12 02:13:12 +00:00
std::vector<std::shared_ptr<Ope>> opes_;
2015-02-08 01:52:26 +00:00
};
class ZeroOrMore : public Ope
2015-02-08 01:52:26 +00:00
{
public:
2015-02-12 02:13:12 +00:00
ZeroOrMore(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
2015-02-08 01:52:26 +00:00
2015-05-19 14:51:06 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
2015-11-30 04:07:02 +00:00
c.trace("ZeroOrMore", s, n, sv, dt);
2015-11-17 11:16:46 +00:00
auto save_error_pos = c.error_pos;
2015-08-10 20:37:25 +00:00
size_t i = 0;
2015-03-09 18:58:43 +00:00
while (n - i > 0) {
2015-11-30 04:07:02 +00:00
c.nest_level++;
auto se = make_scope_exit([&]() { c.nest_level--; });
2015-12-07 22:33:10 +00:00
auto save_sv_size = sv.size();
auto save_tok_size = sv.tokens.size();
2015-02-13 00:48:58 +00:00
const auto& rule = *ope_;
2015-03-09 18:58:43 +00:00
auto len = rule.parse(s + i, n - i, sv, c, dt);
if (fail(len)) {
2015-12-07 22:33:10 +00:00
if (sv.size() != save_sv_size) {
2016-05-25 07:18:41 +00:00
sv.erase(sv.begin() + static_cast<std::ptrdiff_t>(save_sv_size));
2015-12-07 22:33:10 +00:00
}
if (sv.tokens.size() != save_tok_size) {
2016-05-25 07:18:41 +00:00
sv.tokens.erase(sv.tokens.begin() + static_cast<std::ptrdiff_t>(save_tok_size));
}
2015-11-17 11:16:46 +00:00
c.error_pos = save_error_pos;
2015-02-09 22:12:59 +00:00
break;
}
i += len;
2015-02-09 22:12:59 +00:00
}
return i;
2015-02-09 22:12:59 +00:00
}
2015-02-08 01:52:26 +00:00
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
2015-02-12 02:13:12 +00:00
std::shared_ptr<Ope> ope_;
2015-02-08 01:52:26 +00:00
};
class OneOrMore : public Ope
2015-02-08 01:52:26 +00:00
{
public:
2015-02-12 02:13:12 +00:00
OneOrMore(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
2015-02-08 01:52:26 +00:00
2015-05-19 14:51:06 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
2015-11-30 04:07:02 +00:00
c.trace("OneOrMore", s, n, sv, dt);
2016-05-25 07:18:41 +00:00
size_t len = 0;
2015-11-30 04:07:02 +00:00
{
c.nest_level++;
auto se = make_scope_exit([&]() { c.nest_level--; });
const auto& rule = *ope_;
len = rule.parse(s, n, sv, c, dt);
if (fail(len)) {
2016-05-25 07:18:41 +00:00
return static_cast<size_t>(-1);
2015-11-30 04:07:02 +00:00
}
2015-02-09 22:12:59 +00:00
}
2015-11-17 11:16:46 +00:00
auto save_error_pos = c.error_pos;
auto i = len;
2015-03-09 18:58:43 +00:00
while (n - i > 0) {
2015-11-30 04:07:02 +00:00
c.nest_level++;
auto se = make_scope_exit([&]() { c.nest_level--; });
2015-12-07 22:33:10 +00:00
auto save_sv_size = sv.size();
auto save_tok_size = sv.tokens.size();
2015-02-13 00:48:58 +00:00
const auto& rule = *ope_;
2016-05-25 07:18:41 +00:00
len = rule.parse(s + i, n - i, sv, c, dt);
if (fail(len)) {
2015-12-07 22:33:10 +00:00
if (sv.size() != save_sv_size) {
2016-05-25 07:18:41 +00:00
sv.erase(sv.begin() + static_cast<std::ptrdiff_t>(save_sv_size));
2015-12-07 22:33:10 +00:00
}
if (sv.tokens.size() != save_tok_size) {
2016-05-25 07:18:41 +00:00
sv.tokens.erase(sv.tokens.begin() + static_cast<std::ptrdiff_t>(save_tok_size));
}
2015-11-17 11:16:46 +00:00
c.error_pos = save_error_pos;
2015-02-09 22:12:59 +00:00
break;
}
i += len;
2015-02-09 22:12:59 +00:00
}
return i;
2015-02-09 22:12:59 +00:00
}
2015-02-08 01:52:26 +00:00
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
2015-02-12 02:13:12 +00:00
std::shared_ptr<Ope> ope_;
2015-02-08 01:52:26 +00:00
};
class Option : public Ope
2015-02-08 01:52:26 +00:00
{
public:
2015-02-12 02:13:12 +00:00
Option(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
2015-02-08 01:52:26 +00:00
2015-05-19 14:51:06 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
2015-11-30 04:07:02 +00:00
c.trace("Option", s, n, sv, dt);
2015-11-17 11:16:46 +00:00
auto save_error_pos = c.error_pos;
2015-11-30 04:07:02 +00:00
c.nest_level++;
2015-12-07 22:33:10 +00:00
auto save_sv_size = sv.size();
auto save_tok_size = sv.tokens.size();
2015-11-30 04:07:02 +00:00
auto se = make_scope_exit([&]() { c.nest_level--; });
2015-02-13 00:48:58 +00:00
const auto& rule = *ope_;
2015-03-09 18:58:43 +00:00
auto len = rule.parse(s, n, sv, c, dt);
2015-08-06 03:45:59 +00:00
if (success(len)) {
return len;
} else {
2015-12-07 22:33:10 +00:00
if (sv.size() != save_sv_size) {
2016-05-25 07:18:41 +00:00
sv.erase(sv.begin() + static_cast<std::ptrdiff_t>(save_sv_size));
2015-12-07 22:33:10 +00:00
}
if (sv.tokens.size() != save_tok_size) {
2016-05-25 07:18:41 +00:00
sv.tokens.erase(sv.tokens.begin() + static_cast<std::ptrdiff_t>(save_tok_size));
}
2015-11-17 11:16:46 +00:00
c.error_pos = save_error_pos;
2015-08-06 03:45:59 +00:00
return 0;
}
2015-02-09 22:12:59 +00:00
}
2015-02-08 01:52:26 +00:00
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
2015-02-12 02:13:12 +00:00
std::shared_ptr<Ope> ope_;
2015-02-08 01:52:26 +00:00
};
class AndPredicate : public Ope
2015-02-08 01:52:26 +00:00
{
public:
2015-02-12 02:13:12 +00:00
AndPredicate(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
2015-02-08 01:52:26 +00:00
2015-05-19 14:51:06 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
2015-11-30 04:07:02 +00:00
c.trace("AndPredicate", s, n, sv, dt);
c.nest_level++;
auto& chldsv = c.push();
2015-12-07 22:33:10 +00:00
auto se = make_scope_exit([&]() {
c.nest_level--;
c.pop();
});
2015-11-30 04:07:02 +00:00
const auto& rule = *ope_;
auto len = rule.parse(s, n, chldsv, c, dt);
if (success(len)) {
return 0;
2015-02-09 22:12:59 +00:00
} else {
2016-05-25 07:18:41 +00:00
return static_cast<size_t>(-1);
2015-02-09 22:12:59 +00:00
}
}
2015-02-08 01:52:26 +00:00
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
2015-02-12 02:13:12 +00:00
std::shared_ptr<Ope> ope_;
2015-02-08 01:52:26 +00:00
};
class NotPredicate : public Ope
2015-02-08 01:52:26 +00:00
{
public:
2015-02-12 02:13:12 +00:00
NotPredicate(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
2015-02-08 01:52:26 +00:00
2015-05-19 14:51:06 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
2015-11-30 04:07:02 +00:00
c.trace("NotPredicate", s, n, sv, dt);
2015-11-17 11:16:46 +00:00
auto save_error_pos = c.error_pos;
2015-11-30 04:07:02 +00:00
c.nest_level++;
auto& chldsv = c.push();
2015-12-07 22:33:10 +00:00
auto se = make_scope_exit([&]() {
c.nest_level--;
c.pop();
});
2015-11-30 04:07:02 +00:00
const auto& rule = *ope_;
auto len = rule.parse(s, n, chldsv, c, dt);
2015-06-12 22:52:26 +00:00
if (success(len)) {
c.set_error_pos(s);
2016-05-25 07:18:41 +00:00
return static_cast<size_t>(-1);
2015-02-09 22:12:59 +00:00
} else {
2015-11-17 11:16:46 +00:00
c.error_pos = save_error_pos;
return 0;
2015-02-09 22:12:59 +00:00
}
}
2015-02-08 01:52:26 +00:00
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
2015-02-12 02:13:12 +00:00
std::shared_ptr<Ope> ope_;
2015-02-08 01:52:26 +00:00
};
class LiteralString : public Ope
2015-02-08 01:52:26 +00:00
{
public:
2015-02-13 00:48:58 +00:00
LiteralString(const std::string& s) : lit_(s) {}
2015-02-08 01:52:26 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override;
2015-02-08 01:52:26 +00:00
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
2015-02-08 01:52:26 +00:00
std::string lit_;
};
class CharacterClass : public Ope
2015-02-08 01:52:26 +00:00
{
public:
2015-02-13 00:48:58 +00:00
CharacterClass(const std::string& chars) : chars_(chars) {}
2015-02-08 01:52:26 +00:00
2015-05-19 14:51:06 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
2015-11-30 04:07:02 +00:00
c.trace("CharacterClass", s, n, sv, dt);
// TODO: UTF8 support
2015-03-09 18:58:43 +00:00
if (n < 1) {
c.set_error_pos(s);
2016-05-25 07:18:41 +00:00
return static_cast<size_t>(-1);
2015-02-08 01:52:26 +00:00
}
auto ch = s[0];
auto i = 0u;
while (i < chars_.size()) {
if (i + 2 < chars_.size() && chars_[i + 1] == '-') {
if (chars_[i] <= ch && ch <= chars_[i + 2]) {
return 1;
2015-02-08 01:52:26 +00:00
}
i += 3;
} else {
if (chars_[i] == ch) {
return 1;
2015-02-08 01:52:26 +00:00
}
i += 1;
}
}
c.set_error_pos(s);
2016-05-25 07:18:41 +00:00
return static_cast<size_t>(-1);
2015-02-08 01:52:26 +00:00
}
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
2015-02-08 01:52:26 +00:00
std::string chars_;
};
class Character : public Ope
2015-02-08 01:52:26 +00:00
{
public:
Character(char ch) : ch_(ch) {}
2015-05-19 14:51:06 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
2015-11-30 04:07:02 +00:00
c.trace("Character", s, n, sv, dt);
// TODO: UTF8 support
2015-03-09 18:58:43 +00:00
if (n < 1 || s[0] != ch_) {
c.set_error_pos(s);
2016-05-25 07:18:41 +00:00
return static_cast<size_t>(-1);
2015-02-08 01:52:26 +00:00
}
return 1;
2015-02-08 01:52:26 +00:00
}
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
2015-02-08 01:52:26 +00:00
char ch_;
};
class AnyCharacter : public Ope
2015-02-08 01:52:26 +00:00
{
public:
2015-05-19 14:51:06 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
2015-11-30 04:07:02 +00:00
c.trace("AnyCharacter", s, n, sv, dt);
// TODO: UTF8 support
2015-03-09 18:58:43 +00:00
if (n < 1) {
c.set_error_pos(s);
2016-05-25 07:18:41 +00:00
return static_cast<size_t>(-1);
2015-02-08 01:52:26 +00:00
}
return 1;
2015-02-08 01:52:26 +00:00
}
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
2015-02-08 01:52:26 +00:00
};
2015-02-15 22:52:39 +00:00
class Capture : public Ope
2015-02-08 01:52:26 +00:00
{
public:
2015-06-13 05:20:33 +00:00
Capture(const std::shared_ptr<Ope>& ope, MatchAction ma, size_t id, const std::string& name)
: ope_(ope), match_action_(ma), id_(id), name_(name) {}
2015-02-08 01:52:26 +00:00
2015-05-19 14:51:06 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
2015-02-13 00:48:58 +00:00
const auto& rule = *ope_;
2015-03-09 18:58:43 +00:00
auto len = rule.parse(s, n, sv, c, dt);
if (success(len) && match_action_) {
2015-06-13 05:20:33 +00:00
match_action_(s, len, id_, name_);
2015-02-09 22:12:59 +00:00
}
return len;
2015-02-09 22:12:59 +00:00
}
2015-02-08 01:52:26 +00:00
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
2015-02-15 22:52:39 +00:00
std::shared_ptr<Ope> ope_;
2015-06-13 05:20:33 +00:00
private:
2015-02-15 22:52:39 +00:00
MatchAction match_action_;
2015-06-13 05:20:33 +00:00
size_t id_;
std::string name_;
2015-02-08 01:52:26 +00:00
};
2015-08-10 20:37:56 +00:00
class TokenBoundary : public Ope
{
public:
2015-08-10 20:37:56 +00:00
TokenBoundary(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override;
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
std::shared_ptr<Ope> ope_;
};
2015-06-12 22:52:26 +00:00
class Ignore : public Ope
{
public:
Ignore(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
size_t parse(const char* s, size_t n, SemanticValues& /*sv*/, Context& c, any& dt) const override {
2015-06-12 22:52:26 +00:00
const auto& rule = *ope_;
auto& chldsv = c.push();
2015-12-07 22:33:10 +00:00
auto se = make_scope_exit([&]() {
c.pop();
});
return rule.parse(s, n, chldsv, c, dt);
2015-06-12 22:52:26 +00:00
}
void accept(Visitor& v) override;
std::shared_ptr<Ope> ope_;
};
2015-06-15 17:47:59 +00:00
typedef std::function<size_t (const char* s, size_t n, SemanticValues& sv, any& dt)> Parser;
2015-02-20 03:27:47 +00:00
class User : public Ope
{
public:
User(Parser fn) : fn_(fn) {}
2015-05-19 14:51:06 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
2015-11-30 04:07:02 +00:00
c.trace("User", s, n, sv, dt);
2015-02-20 03:27:47 +00:00
assert(fn_);
2015-03-09 18:58:43 +00:00
return fn_(s, n, sv, dt);
2015-02-20 03:27:47 +00:00
}
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
2015-06-15 17:47:59 +00:00
std::function<size_t (const char* s, size_t n, SemanticValues& sv, any& dt)> fn_;
2015-02-20 03:27:47 +00:00
};
class WeakHolder : public Ope
2015-02-08 01:52:26 +00:00
{
public:
2015-02-12 02:13:12 +00:00
WeakHolder(const std::shared_ptr<Ope>& ope) : weak_(ope) {}
2015-02-08 01:52:26 +00:00
2015-05-19 14:51:06 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
2015-02-12 02:13:12 +00:00
auto ope = weak_.lock();
assert(ope);
2015-02-13 00:48:58 +00:00
const auto& rule = *ope;
2015-03-09 18:58:43 +00:00
return rule.parse(s, n, sv, c, dt);
2015-02-09 22:12:59 +00:00
}
2015-02-08 01:52:26 +00:00
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
std::weak_ptr<Ope> weak_;
2015-02-08 01:52:26 +00:00
};
2015-02-26 00:31:00 +00:00
class Holder : public Ope
{
public:
Holder(Definition* outer)
: outer_(outer) {}
2015-05-19 14:51:06 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override;
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
2015-06-16 03:26:49 +00:00
any reduce(const SemanticValues& sv, any& dt) const;
2015-02-26 00:31:00 +00:00
std::shared_ptr<Ope> ope_;
Definition* outer_;
2015-06-13 05:20:33 +00:00
friend class Definition;
2015-02-26 00:31:00 +00:00
};
class DefinitionReference : public Ope
{
public:
DefinitionReference(
2015-06-13 04:38:38 +00:00
const std::unordered_map<std::string, Definition>& grammar, const std::string& name, const char* s)
2015-02-26 00:31:00 +00:00
: grammar_(grammar)
2015-06-13 04:38:38 +00:00
, name_(name)
, s_(s) {}
2015-02-26 00:31:00 +00:00
2015-05-19 14:51:06 +00:00
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override;
2015-02-26 00:31:00 +00:00
void accept(Visitor& v) override;
std::shared_ptr<Ope> get_rule() const;
const std::unordered_map<std::string, Definition>& grammar_;
const std::string name_;
2015-06-13 04:38:38 +00:00
const char* s_;
2015-06-13 05:20:33 +00:00
private:
2015-02-26 00:31:00 +00:00
mutable std::once_flag init_;
mutable std::shared_ptr<Ope> rule_;
};
class Whitespace : public Ope
{
public:
Whitespace(const std::shared_ptr<Ope>& ope) : ope_(ope) {}
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override {
if (c.in_whitespace) {
return 0;
}
c.in_whitespace = true;
auto se = make_scope_exit([&]() { c.in_whitespace = false; });
const auto& rule = *ope_;
return rule.parse(s, n, sv, c, dt);
}
void accept(Visitor& v) override;
std::shared_ptr<Ope> ope_;
};
2015-02-26 00:31:00 +00:00
/*
* Visitor
*/
struct Ope::Visitor
{
virtual ~Visitor() {}
virtual void visit(Sequence& /*ope*/) {}
virtual void visit(PrioritizedChoice& /*ope*/) {}
virtual void visit(ZeroOrMore& /*ope*/) {}
virtual void visit(OneOrMore& /*ope*/) {}
virtual void visit(Option& /*ope*/) {}
virtual void visit(AndPredicate& /*ope*/) {}
virtual void visit(NotPredicate& /*ope*/) {}
virtual void visit(LiteralString& /*ope*/) {}
virtual void visit(CharacterClass& /*ope*/) {}
virtual void visit(Character& /*ope*/) {}
virtual void visit(AnyCharacter& /*ope*/) {}
virtual void visit(Capture& /*ope*/) {}
virtual void visit(TokenBoundary& /*ope*/) {}
virtual void visit(Ignore& /*ope*/) {}
virtual void visit(User& /*ope*/) {}
virtual void visit(WeakHolder& /*ope*/) {}
virtual void visit(Holder& /*ope*/) {}
virtual void visit(DefinitionReference& /*ope*/) {}
virtual void visit(Whitespace& /*ope*/) {}
2015-02-26 00:31:00 +00:00
};
2015-06-05 15:45:13 +00:00
struct AssignIDToDefinition : public Ope::Visitor
2015-02-26 00:31:00 +00:00
{
void visit(Sequence& ope) override {
2015-08-08 03:49:21 +00:00
for (auto op: ope.opes_) {
op->accept(*this);
2015-02-26 00:31:00 +00:00
}
}
void visit(PrioritizedChoice& ope) override {
2015-08-08 03:49:21 +00:00
for (auto op: ope.opes_) {
op->accept(*this);
2015-02-26 00:31:00 +00:00
}
}
2015-06-05 17:28:38 +00:00
void visit(ZeroOrMore& ope) override { ope.ope_->accept(*this); }
void visit(OneOrMore& ope) override { ope.ope_->accept(*this); }
void visit(Option& ope) override { ope.ope_->accept(*this); }
void visit(AndPredicate& ope) override { ope.ope_->accept(*this); }
void visit(NotPredicate& ope) override { ope.ope_->accept(*this); }
void visit(Capture& ope) override { ope.ope_->accept(*this); }
2015-08-10 20:37:56 +00:00
void visit(TokenBoundary& ope) override { ope.ope_->accept(*this); }
2015-06-12 22:52:26 +00:00
void visit(Ignore& ope) override { ope.ope_->accept(*this); }
2015-06-05 17:28:38 +00:00
void visit(WeakHolder& ope) override { ope.weak_.lock()->accept(*this); }
void visit(Holder& ope) override;
void visit(DefinitionReference& ope) override { ope.get_rule()->accept(*this); }
std::unordered_map<void*, size_t> ids;
};
struct IsToken : public Ope::Visitor
{
2015-08-10 20:37:56 +00:00
IsToken() : has_token_boundary(false), has_rule(false) {}
2015-06-05 17:28:38 +00:00
void visit(Sequence& ope) override {
2015-08-08 03:49:21 +00:00
for (auto op: ope.opes_) {
op->accept(*this);
2015-06-05 17:28:38 +00:00
}
2015-02-26 00:31:00 +00:00
}
2015-06-05 17:28:38 +00:00
void visit(PrioritizedChoice& ope) override {
2015-08-08 03:49:21 +00:00
for (auto op: ope.opes_) {
op->accept(*this);
2015-06-05 17:28:38 +00:00
}
2015-02-26 00:31:00 +00:00
}
2015-06-05 17:28:38 +00:00
void visit(ZeroOrMore& ope) override { ope.ope_->accept(*this); }
void visit(OneOrMore& ope) override { ope.ope_->accept(*this); }
void visit(Option& ope) override { ope.ope_->accept(*this); }
void visit(Capture& ope) override { ope.ope_->accept(*this); }
void visit(TokenBoundary& /*ope*/) override { has_token_boundary = true; }
2015-06-12 22:52:26 +00:00
void visit(Ignore& ope) override { ope.ope_->accept(*this); }
2015-06-05 17:28:38 +00:00
void visit(WeakHolder& ope) override { ope.weak_.lock()->accept(*this); }
void visit(DefinitionReference& /*ope*/) override { has_rule = true; }
2015-06-05 17:28:38 +00:00
bool is_token() const {
2015-08-10 20:37:56 +00:00
return has_token_boundary || !has_rule;
2015-02-26 00:31:00 +00:00
}
2015-08-10 20:37:56 +00:00
bool has_token_boundary;
2015-06-05 17:28:38 +00:00
bool has_rule;
2015-02-26 00:31:00 +00:00
};
2015-11-26 17:05:00 +00:00
static const char* WHITESPACE_DEFINITION_NAME = "%whitespace";
2015-02-08 01:52:26 +00:00
/*
* Definition
*/
class Definition
{
public:
struct Result {
bool ret;
2015-05-19 14:51:06 +00:00
size_t len;
const char* error_pos;
const char* message_pos;
const std::string message;
};
Definition()
2015-06-16 03:26:49 +00:00
: ignoreSemanticValue(false)
, enablePackratParsing(false)
2015-06-05 17:28:38 +00:00
, is_token(false)
, has_token_boundary(false)
2015-03-03 02:52:09 +00:00
, holder_(std::make_shared<Holder>(this)) {}
2015-02-08 01:52:26 +00:00
Definition(const Definition& rhs)
: name(rhs.name)
, ignoreSemanticValue(false)
, enablePackratParsing(false)
2015-06-05 17:28:38 +00:00
, is_token(false)
, has_token_boundary(false)
, holder_(rhs.holder_)
2015-02-08 01:52:26 +00:00
{
holder_->outer_ = this;
2015-02-08 01:52:26 +00:00
}
Definition(Definition&& rhs)
: name(std::move(rhs.name))
, ignoreSemanticValue(rhs.ignoreSemanticValue)
, whitespaceOpe(rhs.whitespaceOpe)
, enablePackratParsing(rhs.enablePackratParsing)
2015-06-05 17:28:38 +00:00
, is_token(rhs.is_token)
, has_token_boundary(rhs.has_token_boundary)
, holder_(std::move(rhs.holder_))
2015-02-08 01:52:26 +00:00
{
holder_->outer_ = this;
2015-02-08 01:52:26 +00:00
}
2015-02-12 02:13:12 +00:00
Definition(const std::shared_ptr<Ope>& ope)
2015-06-16 03:26:49 +00:00
: ignoreSemanticValue(false)
, enablePackratParsing(false)
2015-06-05 17:28:38 +00:00
, is_token(false)
, has_token_boundary(false)
, holder_(std::make_shared<Holder>(this))
2015-02-08 01:52:26 +00:00
{
2015-06-05 17:28:38 +00:00
*this <= ope;
}
operator std::shared_ptr<Ope>() {
return std::make_shared<WeakHolder>(holder_);
2015-02-08 01:52:26 +00:00
}
2015-02-12 02:13:12 +00:00
Definition& operator<=(const std::shared_ptr<Ope>& ope) {
2015-06-05 17:28:38 +00:00
IsToken isToken;
ope->accept(isToken);
is_token = isToken.is_token();
has_token_boundary = isToken.has_token_boundary;
2015-06-05 17:28:38 +00:00
2015-02-12 02:13:12 +00:00
holder_->ope_ = ope;
2015-06-05 17:28:38 +00:00
return *this;
2015-02-08 01:52:26 +00:00
}
2015-07-28 10:47:18 +00:00
Result parse(const char* s, size_t n, const char* path = nullptr) const {
2015-03-03 02:52:09 +00:00
SemanticValues sv;
any dt;
2015-07-28 10:47:18 +00:00
return parse_core(s, n, sv, dt, path);
2015-02-26 00:31:00 +00:00
}
2015-07-28 10:47:18 +00:00
Result parse(const char* s, const char* path = nullptr) const {
2015-03-09 18:58:43 +00:00
auto n = strlen(s);
2015-07-28 10:47:18 +00:00
return parse(s, n, path);
2015-02-26 00:31:00 +00:00
}
2015-07-28 10:47:18 +00:00
Result parse(const char* s, size_t n, any& dt, const char* path = nullptr) const {
2015-02-26 00:31:00 +00:00
SemanticValues sv;
2015-07-28 10:47:18 +00:00
return parse_core(s, n, sv, dt, path);
2015-02-26 00:31:00 +00:00
}
2015-07-28 10:47:18 +00:00
Result parse(const char* s, any& dt, const char* path = nullptr) const {
2015-03-09 18:58:43 +00:00
auto n = strlen(s);
2015-07-28 10:47:18 +00:00
return parse(s, n, dt, path);
}
2015-02-08 01:52:26 +00:00
template <typename T>
2015-07-28 10:47:18 +00:00
Result parse_and_get_value(const char* s, size_t n, T& val, const char* path = nullptr) const {
2015-02-22 00:38:30 +00:00
SemanticValues sv;
any dt;
2015-07-28 10:47:18 +00:00
auto r = parse_core(s, n, sv, dt, path);
if (r.ret && !sv.empty() && !sv.front().is_undefined()) {
val = sv[0].get<T>();
2015-02-08 01:52:26 +00:00
}
return r;
2015-02-08 01:52:26 +00:00
}
2015-02-09 17:01:59 +00:00
template <typename T>
2015-07-28 10:47:18 +00:00
Result parse_and_get_value(const char* s, T& val, const char* path = nullptr) const {
2015-03-09 18:58:43 +00:00
auto n = strlen(s);
2015-07-28 10:47:18 +00:00
return parse_and_get_value(s, n, val, path);
2015-03-09 18:58:43 +00:00
}
template <typename T>
2015-07-28 10:47:18 +00:00
Result parse_and_get_value(const char* s, size_t n, any& dt, T& val, const char* path = nullptr) const {
2015-03-09 18:58:43 +00:00
SemanticValues sv;
2015-07-28 10:47:18 +00:00
auto r = parse_core(s, n, sv, dt, path);
if (r.ret && !sv.empty() && !sv.front().is_undefined()) {
val = sv[0].get<T>();
2015-03-09 18:58:43 +00:00
}
return r;
}
template <typename T>
2015-07-28 10:47:18 +00:00
Result parse_and_get_value(const char* s, any& dt, T& val, const char* path = nullptr) const {
2015-03-09 18:58:43 +00:00
auto n = strlen(s);
2015-07-28 10:47:18 +00:00
return parse_and_get_value(s, n, dt, val, path);
2015-02-08 01:52:26 +00:00
}
2015-06-16 03:26:49 +00:00
Definition& operator=(Action a) {
action = a;
return *this;
2015-02-09 17:01:59 +00:00
}
2015-02-09 22:12:59 +00:00
template <typename T>
Definition& operator,(T fn) {
operator=(fn);
return *this;
}
2015-02-18 23:00:11 +00:00
Definition& operator~() {
ignoreSemanticValue = true;
2015-02-18 23:00:11 +00:00
return *this;
}
2015-02-26 00:31:00 +00:00
void accept(Ope::Visitor& v) {
holder_->accept(v);
}
std::shared_ptr<Ope> get_core_operator() {
return holder_->ope_;
}
2015-11-17 11:10:32 +00:00
std::string name;
size_t id;
Action action;
std::function<void (any& dt)> enter;
std::function<void (any& dt)> leave;
2015-11-17 11:10:32 +00:00
std::function<std::string ()> error_message;
bool ignoreSemanticValue;
std::shared_ptr<Ope> whitespaceOpe;
2015-11-17 11:10:32 +00:00
bool enablePackratParsing;
bool is_token;
bool has_token_boundary;
2015-11-30 04:07:02 +00:00
Tracer tracer;
2015-02-08 01:52:26 +00:00
private:
friend class DefinitionReference;
2015-02-26 00:31:00 +00:00
Definition& operator=(const Definition& rhs);
Definition& operator=(Definition&& rhs);
2015-02-08 01:52:26 +00:00
2015-07-28 10:47:18 +00:00
Result parse_core(const char* s, size_t n, SemanticValues& sv, any& dt, const char* path) const {
2015-06-05 15:45:13 +00:00
AssignIDToDefinition assignId;
holder_->accept(assignId);
2015-03-03 02:52:09 +00:00
std::shared_ptr<Ope> ope = holder_;
if (whitespaceOpe) {
ope = std::make_shared<Sequence>(whitespaceOpe, ope);
}
Context cxt(path, s, n, assignId.ids.size(), whitespaceOpe, enablePackratParsing, tracer);
auto len = ope->parse(s, n, sv, cxt, dt);
2015-06-05 15:45:13 +00:00
return Result{ success(len), len, cxt.error_pos, cxt.message_pos, cxt.message };
2015-03-03 02:52:09 +00:00
}
2015-02-26 00:31:00 +00:00
std::shared_ptr<Holder> holder_;
};
2015-02-08 01:52:26 +00:00
2015-02-26 00:31:00 +00:00
/*
* Implementations
*/
2015-02-08 01:52:26 +00:00
inline size_t LiteralString::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const {
2015-11-30 04:07:02 +00:00
c.trace("LiteralString", s, n, sv, dt);
auto i = 0u;
for (; i < lit_.size(); i++) {
if (i >= n || s[i] != lit_[i]) {
c.set_error_pos(s);
2016-05-25 07:18:41 +00:00
return static_cast<size_t>(-1);
}
}
// Skip whiltespace
if (!c.in_token) {
if (c.whitespaceOpe) {
auto len = c.whitespaceOpe->parse(s + i, n - i, sv, c, dt);
if (fail(len)) {
2016-05-25 07:18:41 +00:00
return static_cast<size_t>(-1);
}
i += len;
}
}
return i;
}
inline size_t TokenBoundary::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const {
c.in_token = true;
auto se = make_scope_exit([&]() { c.in_token = false; });
const auto& rule = *ope_;
auto len = rule.parse(s, n, sv, c, dt);
if (success(len)) {
sv.tokens.push_back(std::make_pair(s, len));
if (c.whitespaceOpe) {
auto l = c.whitespaceOpe->parse(s + len, n - len, sv, c, dt);
if (fail(l)) {
2016-05-25 07:18:41 +00:00
return static_cast<size_t>(-1);
}
len += l;
}
}
return len;
}
2015-05-19 14:51:06 +00:00
inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const {
2015-02-26 00:31:00 +00:00
if (!ope_) {
throw std::logic_error("Uninitialized definition ope was used...");
}
2015-02-08 01:52:26 +00:00
2015-11-30 04:07:02 +00:00
c.trace(outer_->name.c_str(), s, n, sv, dt);
c.nest_level++;
auto se = make_scope_exit([&]() { c.nest_level--; });
2015-11-30 04:07:02 +00:00
2015-05-19 14:51:06 +00:00
size_t len;
2015-04-08 16:02:26 +00:00
any val;
c.packrat(s, outer_->id, len, val, [&](any& a_val) {
2015-02-27 02:32:26 +00:00
auto& chldsv = c.push();
if (outer_->enter) {
outer_->enter(dt);
2015-11-17 11:10:32 +00:00
}
auto se2 = make_scope_exit([&]() {
2015-12-07 22:33:10 +00:00
c.pop();
if (outer_->leave) {
outer_->leave(dt);
2015-12-07 22:33:10 +00:00
}
});
const auto& rule = *ope_;
len = rule.parse(s, n, chldsv, c, dt);
2015-04-08 16:02:26 +00:00
// Invoke action
2015-06-15 20:07:25 +00:00
if (success(len)) {
chldsv.s_ = s;
chldsv.n_ = len;
2015-02-08 01:52:26 +00:00
2015-06-15 20:07:25 +00:00
try {
a_val = reduce(chldsv, dt);
2015-06-15 20:07:25 +00:00
} catch (const parse_error& e) {
if (e.what()) {
2015-11-17 11:16:46 +00:00
if (c.message_pos < s) {
c.message_pos = s;
c.message = e.what();
}
2015-06-15 20:07:25 +00:00
}
2016-05-25 07:18:41 +00:00
len = static_cast<size_t>(-1);
2015-06-15 20:07:25 +00:00
}
2015-04-08 16:02:26 +00:00
}
2015-02-27 02:32:26 +00:00
});
2015-11-17 11:16:46 +00:00
if (success(len)) {
if (!outer_->ignoreSemanticValue) {
sv.emplace_back(val);
2015-11-17 11:16:46 +00:00
}
} else {
if (outer_->error_message) {
if (c.message_pos < s) {
c.message_pos = s;
c.message = outer_->error_message();
}
}
}
return len;
2015-02-26 00:31:00 +00:00
}
2015-02-08 01:52:26 +00:00
2015-06-16 03:26:49 +00:00
inline any Holder::reduce(const SemanticValues& sv, any& dt) const {
if (outer_->action) {
return outer_->action(sv, dt);
} else if (sv.empty()) {
return any();
} else {
return sv.front();
}
}
2015-05-19 14:51:06 +00:00
inline size_t DefinitionReference::parse(
2015-03-09 18:58:43 +00:00
const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const {
2015-02-26 00:31:00 +00:00
const auto& rule = *get_rule();
2015-03-09 18:58:43 +00:00
return rule.parse(s, n, sv, c, dt);
2015-02-26 00:31:00 +00:00
}
2015-02-08 01:52:26 +00:00
inline std::shared_ptr<Ope> DefinitionReference::get_rule() const {
2015-02-26 00:31:00 +00:00
if (!rule_) {
std::call_once(init_, [this]() {
rule_ = grammar_.at(name_).holder_;
});
2015-02-08 01:52:26 +00:00
}
2015-02-26 00:31:00 +00:00
assert(rule_);
return rule_;
}
2015-02-08 01:52:26 +00:00
2015-02-26 00:31:00 +00:00
inline void Sequence::accept(Visitor& v) { v.visit(*this); }
inline void PrioritizedChoice::accept(Visitor& v) { v.visit(*this); }
inline void ZeroOrMore::accept(Visitor& v) { v.visit(*this); }
inline void OneOrMore::accept(Visitor& v) { v.visit(*this); }
inline void Option::accept(Visitor& v) { v.visit(*this); }
inline void AndPredicate::accept(Visitor& v) { v.visit(*this); }
inline void NotPredicate::accept(Visitor& v) { v.visit(*this); }
inline void LiteralString::accept(Visitor& v) { v.visit(*this); }
inline void CharacterClass::accept(Visitor& v) { v.visit(*this); }
inline void Character::accept(Visitor& v) { v.visit(*this); }
inline void AnyCharacter::accept(Visitor& v) { v.visit(*this); }
inline void Capture::accept(Visitor& v) { v.visit(*this); }
2015-08-10 20:37:56 +00:00
inline void TokenBoundary::accept(Visitor& v) { v.visit(*this); }
2015-06-12 22:52:26 +00:00
inline void Ignore::accept(Visitor& v) { v.visit(*this); }
2015-02-26 00:31:00 +00:00
inline void User::accept(Visitor& v) { v.visit(*this); }
inline void WeakHolder::accept(Visitor& v) { v.visit(*this); }
inline void Holder::accept(Visitor& v) { v.visit(*this); }
inline void DefinitionReference::accept(Visitor& v) { v.visit(*this); }
inline void Whitespace::accept(Visitor& v) { v.visit(*this); }
2015-06-05 15:51:45 +00:00
inline void AssignIDToDefinition::visit(Holder& ope) {
auto p = static_cast<void*>(ope.outer_);
2015-08-06 22:27:38 +00:00
if (ids.count(p)) {
return;
}
auto id = ids.size();
ids[p] = id;
ope.outer_->id = id;
ope.ope_->accept(*this);
}
2015-02-08 01:52:26 +00:00
/*
* Factories
*/
template <typename... Args>
std::shared_ptr<Ope> seq(Args&& ...args) {
return std::make_shared<Sequence>(static_cast<std::shared_ptr<Ope>>(args)...);
2015-02-08 01:52:26 +00:00
}
template <typename... Args>
std::shared_ptr<Ope> cho(Args&& ...args) {
return std::make_shared<PrioritizedChoice>(static_cast<std::shared_ptr<Ope>>(args)...);
2015-02-08 01:52:26 +00:00
}
2015-02-12 02:13:12 +00:00
inline std::shared_ptr<Ope> zom(const std::shared_ptr<Ope>& ope) {
return std::make_shared<ZeroOrMore>(ope);
2015-02-08 01:52:26 +00:00
}
2015-02-12 02:13:12 +00:00
inline std::shared_ptr<Ope> oom(const std::shared_ptr<Ope>& ope) {
return std::make_shared<OneOrMore>(ope);
2015-02-08 01:52:26 +00:00
}
2015-02-12 02:13:12 +00:00
inline std::shared_ptr<Ope> opt(const std::shared_ptr<Ope>& ope) {
return std::make_shared<Option>(ope);
2015-02-08 01:52:26 +00:00
}
2015-02-12 02:13:12 +00:00
inline std::shared_ptr<Ope> apd(const std::shared_ptr<Ope>& ope) {
return std::make_shared<AndPredicate>(ope);
2015-02-08 01:52:26 +00:00
}
2015-02-12 02:13:12 +00:00
inline std::shared_ptr<Ope> npd(const std::shared_ptr<Ope>& ope) {
return std::make_shared<NotPredicate>(ope);
2015-02-08 01:52:26 +00:00
}
2015-02-13 00:48:58 +00:00
inline std::shared_ptr<Ope> lit(const std::string& lit) {
2015-02-09 22:12:59 +00:00
return std::make_shared<LiteralString>(lit);
2015-02-08 01:52:26 +00:00
}
2015-02-13 00:48:58 +00:00
inline std::shared_ptr<Ope> cls(const std::string& chars) {
2015-02-09 22:12:59 +00:00
return std::make_shared<CharacterClass>(chars);
2015-02-08 01:52:26 +00:00
}
2015-02-22 00:38:30 +00:00
inline std::shared_ptr<Ope> chr(char dt) {
return std::make_shared<Character>(dt);
2015-02-08 01:52:26 +00:00
}
2015-02-14 15:13:10 +00:00
inline std::shared_ptr<Ope> dot() {
2015-02-09 22:12:59 +00:00
return std::make_shared<AnyCharacter>();
2015-02-08 01:52:26 +00:00
}
inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope>& ope, MatchAction ma, size_t n, const std::string& s) {
return std::make_shared<Capture>(ope, ma, n, s);
2015-02-08 01:52:26 +00:00
}
2015-02-15 22:52:39 +00:00
inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope>& ope, MatchAction ma) {
2016-05-25 07:18:41 +00:00
return std::make_shared<Capture>(ope, ma, static_cast<size_t>(-1), std::string());
2015-02-09 17:01:59 +00:00
}
2015-08-10 20:37:56 +00:00
inline std::shared_ptr<Ope> tok(const std::shared_ptr<Ope>& ope) {
return std::make_shared<TokenBoundary>(ope);
}
2015-06-12 22:52:26 +00:00
inline std::shared_ptr<Ope> ign(const std::shared_ptr<Ope>& ope) {
return std::make_shared<Ignore>(ope);
}
2015-05-19 14:51:06 +00:00
inline std::shared_ptr<Ope> usr(std::function<size_t (const char* s, size_t n, SemanticValues& sv, any& dt)> fn) {
2015-02-20 03:27:47 +00:00
return std::make_shared<User>(fn);
}
2015-06-13 04:38:38 +00:00
inline std::shared_ptr<Ope> ref(const std::unordered_map<std::string, Definition>& grammar, const std::string& name, const char* s) {
return std::make_shared<DefinitionReference>(grammar, name, s);
2015-02-08 01:52:26 +00:00
}
inline std::shared_ptr<Ope> wsp(const std::shared_ptr<Ope>& ope) {
return std::make_shared<Whitespace>(std::make_shared<Ignore>(ope));
}
2015-02-08 01:52:26 +00:00
/*-----------------------------------------------------------------------------
* PEG parser generator
*---------------------------------------------------------------------------*/
inline std::pair<size_t, size_t> line_info(const char* start, const char* cur) {
auto p = start;
2015-02-12 04:57:00 +00:00
auto col_ptr = p;
auto no = 1;
while (p < cur) {
2015-02-12 04:57:00 +00:00
if (*p == '\n') {
no++;
col_ptr = p + 1;
}
p++;
}
auto col = p - col_ptr + 1;
return std::make_pair(no, col);
}
2015-02-26 00:31:00 +00:00
typedef std::unordered_map<std::string, Definition> Grammar;
typedef std::function<void (size_t, size_t, const std::string&)> Log;
2015-02-26 00:31:00 +00:00
typedef std::unordered_map<std::string, std::shared_ptr<Ope>> Rules;
2015-02-20 03:27:47 +00:00
2015-08-10 20:37:56 +00:00
class ParserGenerator
2015-02-08 01:52:26 +00:00
{
2015-02-14 03:41:17 +00:00
public:
2015-02-20 03:27:47 +00:00
static std::shared_ptr<Grammar> parse(
const char* s,
2015-03-09 18:58:43 +00:00
size_t n,
2015-02-20 03:27:47 +00:00
const Rules& rules,
std::string& start,
MatchAction ma,
Log log)
{
2015-06-13 01:14:10 +00:00
return get_instance().perform_core(s, n, rules, start, ma, log);
2015-02-20 03:27:47 +00:00
}
static std::shared_ptr<Grammar> parse(
const char* s,
2015-03-09 18:58:43 +00:00
size_t n,
2015-02-20 03:27:47 +00:00
std::string& start,
MatchAction ma,
Log log)
{
Rules dummy;
2015-03-09 18:58:43 +00:00
return parse(s, n, dummy, start, ma, log);
2015-02-14 15:13:10 +00:00
}
// For debuging purpose
2015-02-14 15:13:10 +00:00
static Grammar& grammar() {
2015-06-13 01:14:10 +00:00
return get_instance().g;
2015-02-14 03:41:17 +00:00
}
2015-02-08 01:52:26 +00:00
2015-02-14 03:41:17 +00:00
private:
2015-08-10 20:37:56 +00:00
static ParserGenerator& get_instance() {
static ParserGenerator instance;
2015-02-14 15:13:10 +00:00
return instance;
}
2015-08-10 20:37:56 +00:00
ParserGenerator() {
2015-02-14 15:13:10 +00:00
make_grammar();
setup_actions();
}
2015-02-26 00:31:00 +00:00
struct Data {
2015-06-13 04:57:45 +00:00
std::shared_ptr<Grammar> grammar;
std::string start;
MatchAction match_action;
std::vector<std::pair<std::string, const char*>> duplicates;
std::unordered_map<std::string, const char*> references;
size_t capture_count;
2015-02-15 22:52:39 +00:00
2015-05-19 13:07:03 +00:00
Data()
: grammar(std::make_shared<Grammar>())
, capture_count(0)
{}
};
2015-06-13 04:38:38 +00:00
struct DetectLeftRecursion : public Ope::Visitor {
DetectLeftRecursion(const std::string& name)
: s_(nullptr), name_(name), done_(false) {}
void visit(Sequence& ope) override {
2015-08-08 03:49:21 +00:00
for (auto op: ope.opes_) {
op->accept(*this);
2015-06-13 04:38:38 +00:00
if (done_) {
break;
} else if (s_) {
done_ = true;
break;
}
}
}
void visit(PrioritizedChoice& ope) override {
2015-08-08 03:49:21 +00:00
for (auto op: ope.opes_) {
op->accept(*this);
2015-06-13 04:38:38 +00:00
if (s_) {
done_ = true;
break;
}
}
}
void visit(ZeroOrMore& ope) override {
ope.ope_->accept(*this);
done_ = false;
}
void visit(OneOrMore& ope) override {
ope.ope_->accept(*this);
done_ = true;
}
void visit(Option& ope) override {
ope.ope_->accept(*this);
done_ = false;
}
void visit(AndPredicate& ope) override {
ope.ope_->accept(*this);
done_ = false;
}
void visit(NotPredicate& ope) override {
ope.ope_->accept(*this);
done_ = false;
}
void visit(LiteralString& ope) override {
done_ = !ope.lit_.empty();
}
void visit(CharacterClass& /*ope*/) override {
2015-06-13 04:38:38 +00:00
done_ = true;
}
void visit(Character& /*ope*/) override {
2015-06-13 04:38:38 +00:00
done_ = true;
}
void visit(AnyCharacter& /*ope*/) override {
2015-06-13 04:38:38 +00:00
done_ = true;
}
void visit(Capture& ope) override {
ope.ope_->accept(*this);
}
2015-08-10 20:37:56 +00:00
void visit(TokenBoundary& ope) override {
2015-06-13 04:38:38 +00:00
ope.ope_->accept(*this);
}
void visit(Ignore& ope) override {
ope.ope_->accept(*this);
}
void visit(User& /*ope*/) override {
2015-06-13 04:38:38 +00:00
done_ = true;
}
void visit(WeakHolder& ope) override {
ope.weak_.lock()->accept(*this);
}
void visit(Holder& ope) override {
ope.ope_->accept(*this);
}
void visit(DefinitionReference& ope) override {
if (ope.name_ == name_) {
s_ = ope.s_;
2015-08-06 22:27:38 +00:00
} else if (refs_.count(ope.name_)) {
2015-06-13 04:38:38 +00:00
;
} else {
refs_.insert(ope.name_);
ope.get_rule()->accept(*this);
}
done_ = true;
}
const char* s_;
private:
std::string name_;
std::set<std::string> refs_;
bool done_;
};
2015-02-14 15:13:10 +00:00
void make_grammar() {
// Setup PEG syntax parser
g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]);
2015-02-18 23:00:11 +00:00
g["Definition"] <= seq(opt(g["IGNORE"]), g["Identifier"], g["LEFTARROW"], g["Expression"]);
2015-02-14 15:13:10 +00:00
g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"])));
g["Sequence"] <= zom(g["Prefix"]);
g["Prefix"] <= seq(opt(cho(g["AND"], g["NOT"])), g["Suffix"]);
g["Suffix"] <= seq(g["Primary"], opt(cho(g["QUESTION"], g["STAR"], g["PLUS"])));
2015-06-12 22:52:26 +00:00
g["Primary"] <= cho(seq(opt(g["IGNORE"]), g["Identifier"], npd(g["LEFTARROW"])),
2015-02-14 15:13:10 +00:00
seq(g["OPEN"], g["Expression"], g["CLOSE"]),
seq(g["BeginTok"], g["Expression"], g["EndTok"]),
seq(g["BeginCap"], g["Expression"], g["EndCap"]),
2015-02-14 15:13:10 +00:00
g["Literal"], g["Class"], g["DOT"]);
g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"]));
g["IdentStart"] <= cls("a-zA-Z_\x80-\xff%");
2015-02-14 15:13:10 +00:00
g["IdentRest"] <= cho(g["IdentStart"], cls("0-9"));
2015-08-10 20:37:56 +00:00
g["Literal"] <= cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), cls("'"), g["Spacing"]),
seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), cls("\""), g["Spacing"]));
2015-02-14 15:13:10 +00:00
2015-08-10 20:37:56 +00:00
g["Class"] <= seq(chr('['), tok(zom(seq(npd(chr(']')), g["Range"]))), chr(']'), g["Spacing"]);
2015-02-14 15:13:10 +00:00
g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]);
g["Char"] <= cho(seq(chr('\\'), cls("nrt'\"[]\\")),
2015-02-23 03:23:08 +00:00
seq(chr('\\'), cls("0-3"), cls("0-7"), cls("0-7")),
2015-02-14 15:13:10 +00:00
seq(chr('\\'), cls("0-7"), opt(cls("0-7"))),
2015-02-23 03:23:08 +00:00
seq(lit("\\x"), cls("0-9a-fA-F"), opt(cls("0-9a-fA-F"))),
2015-02-14 15:13:10 +00:00
seq(npd(chr('\\')), dot()));
#if !defined(PEGLIB_NO_UNICODE_CHARS)
2016-01-24 03:15:15 +00:00
g["LEFTARROW"] <= seq(cho(lit("<-"), lit("")), g["Spacing"]);
#else
g["LEFTARROW"] <= seq(lit("<-"), g["Spacing"]);
#endif
2015-02-18 23:00:11 +00:00
~g["SLASH"] <= seq(chr('/'), g["Spacing"]);
2015-02-14 15:13:10 +00:00
g["AND"] <= seq(chr('&'), g["Spacing"]);
g["NOT"] <= seq(chr('!'), g["Spacing"]);
g["QUESTION"] <= seq(chr('?'), g["Spacing"]);
g["STAR"] <= seq(chr('*'), g["Spacing"]);
g["PLUS"] <= seq(chr('+'), g["Spacing"]);
g["OPEN"] <= seq(chr('('), g["Spacing"]);
g["CLOSE"] <= seq(chr(')'), g["Spacing"]);
g["DOT"] <= seq(chr('.'), g["Spacing"]);
g["Spacing"] <= zom(cho(g["Space"], g["Comment"]));
g["Comment"] <= seq(chr('#'), zom(seq(npd(g["EndOfLine"]), dot())), g["EndOfLine"]);
g["Space"] <= cho(chr(' '), chr('\t'), g["EndOfLine"]);
g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r'));
g["EndOfFile"] <= npd(dot());
g["BeginTok"] <= seq(chr('<'), g["Spacing"]);
g["EndTok"] <= seq(chr('>'), g["Spacing"]);
2015-08-10 20:37:56 +00:00
g["BeginCap"] <= seq(chr('$'), tok(opt(g["Identifier"])), chr('<'), g["Spacing"]);
g["EndCap"] <= seq(lit(">"), g["Spacing"]);
2015-02-15 22:52:39 +00:00
2015-02-18 23:00:11 +00:00
g["IGNORE"] <= chr('~');
2015-02-14 15:13:10 +00:00
// Set definition names
for (auto& x: g) {
x.second.name = x.first;
}
2015-02-14 03:41:17 +00:00
}
2015-02-08 01:52:26 +00:00
2015-02-14 15:13:10 +00:00
void setup_actions() {
2015-02-22 00:38:30 +00:00
g["Definition"] = [&](const SemanticValues& sv, any& dt) {
2015-02-26 00:31:00 +00:00
Data& data = *dt.get<Data*>();
2015-02-22 00:38:30 +00:00
auto ignore = (sv.size() == 4);
2016-05-25 07:18:41 +00:00
auto baseId = ignore ? 1u : 0u;
2015-02-18 23:00:11 +00:00
2015-06-16 03:26:49 +00:00
const auto& name = sv[baseId].get<std::string>();
auto ope = sv[baseId + 2].get<std::shared_ptr<Ope>>();
2015-02-18 23:00:11 +00:00
2015-06-13 04:57:45 +00:00
auto& grammar = *data.grammar;
2015-08-06 22:27:38 +00:00
if (!grammar.count(name)) {
2015-06-13 04:57:45 +00:00
auto& rule = grammar[name];
rule <= ope;
rule.name = name;
rule.ignoreSemanticValue = ignore;
2015-06-13 04:57:45 +00:00
if (data.start.empty()) {
data.start = name;
}
} else {
data.duplicates.emplace_back(name, sv.c_str());
}
};
2015-02-08 01:52:26 +00:00
2015-02-22 00:38:30 +00:00
g["Expression"] = [&](const SemanticValues& sv) {
if (sv.size() == 1) {
2015-06-16 03:26:49 +00:00
return sv[0].get<std::shared_ptr<Ope>>();
2015-02-14 03:41:17 +00:00
} else {
std::vector<std::shared_ptr<Ope>> opes;
2015-02-22 00:38:30 +00:00
for (auto i = 0u; i < sv.size(); i++) {
2015-06-16 04:25:01 +00:00
opes.emplace_back(sv[i].get<std::shared_ptr<Ope>>());
2015-02-14 03:41:17 +00:00
}
const std::shared_ptr<Ope> ope = std::make_shared<PrioritizedChoice>(opes);
return ope;
}
};
2015-02-08 01:52:26 +00:00
2015-02-22 00:38:30 +00:00
g["Sequence"] = [&](const SemanticValues& sv) {
if (sv.size() == 1) {
2015-06-16 03:26:49 +00:00
return sv[0].get<std::shared_ptr<Ope>>();
2015-02-14 03:41:17 +00:00
} else {
std::vector<std::shared_ptr<Ope>> opes;
2015-02-22 00:38:30 +00:00
for (const auto& x: sv) {
2015-06-16 04:25:01 +00:00
opes.emplace_back(x.get<std::shared_ptr<Ope>>());
2015-02-14 03:41:17 +00:00
}
const std::shared_ptr<Ope> ope = std::make_shared<Sequence>(opes);
return ope;
}
};
2015-02-08 01:52:26 +00:00
2015-03-09 18:58:43 +00:00
g["Prefix"] = [&](const SemanticValues& sv) {
2015-02-14 03:41:17 +00:00
std::shared_ptr<Ope> ope;
2015-02-22 00:38:30 +00:00
if (sv.size() == 1) {
2015-06-16 03:26:49 +00:00
ope = sv[0].get<std::shared_ptr<Ope>>();
2015-02-14 03:41:17 +00:00
} else {
2015-02-22 00:38:30 +00:00
assert(sv.size() == 2);
2015-06-16 03:26:49 +00:00
auto tok = sv[0].get<char>();
ope = sv[1].get<std::shared_ptr<Ope>>();
if (tok == '&') {
2015-02-14 03:41:17 +00:00
ope = apd(ope);
} else { // '!'
2015-02-14 03:41:17 +00:00
ope = npd(ope);
}
}
return ope;
};
2015-02-12 04:57:00 +00:00
2015-03-09 18:58:43 +00:00
g["Suffix"] = [&](const SemanticValues& sv) {
2015-06-16 03:26:49 +00:00
auto ope = sv[0].get<std::shared_ptr<Ope>>();
2015-02-22 00:38:30 +00:00
if (sv.size() == 1) {
2015-02-14 03:41:17 +00:00
return ope;
} else {
2015-02-22 00:38:30 +00:00
assert(sv.size() == 2);
2015-06-16 03:26:49 +00:00
auto tok = sv[1].get<char>();
if (tok == '?') {
2015-02-14 03:41:17 +00:00
return opt(ope);
} else if (tok == '*') {
2015-02-14 03:41:17 +00:00
return zom(ope);
} else { // '+'
2015-02-14 03:41:17 +00:00
return oom(ope);
}
}
};
2015-06-16 15:26:48 +00:00
g["Primary"] = [&](const SemanticValues& sv, any& dt) -> std::shared_ptr<Ope> {
2015-06-16 03:26:49 +00:00
Data& data = *dt.get<Data*>();
2015-06-16 15:26:48 +00:00
switch (sv.choice()) {
2015-06-16 03:26:49 +00:00
case 0: { // Reference
auto ignore = (sv.size() == 2);
2016-05-25 07:18:41 +00:00
auto baseId = ignore ? 1u : 0u;
2015-06-12 22:52:26 +00:00
2015-06-16 03:26:49 +00:00
const auto& ident = sv[baseId].get<std::string>();
2015-06-12 22:52:26 +00:00
2015-08-06 22:27:38 +00:00
if (!data.references.count(ident)) {
data.references[ident] = sv.c_str(); // for error handling
2015-06-16 03:26:49 +00:00
}
2015-06-13 04:38:38 +00:00
2015-06-16 03:26:49 +00:00
if (ignore) {
return ign(ref(*data.grammar, ident, sv.c_str()));
2015-06-16 03:26:49 +00:00
} else {
return ref(*data.grammar, ident, sv.c_str());
2015-06-16 03:26:49 +00:00
}
2015-06-13 04:38:38 +00:00
}
2015-06-16 03:26:49 +00:00
case 1: { // (Expression)
return sv[1].get<std::shared_ptr<Ope>>();
}
2015-08-10 20:37:56 +00:00
case 2: { // TokenBoundary
return tok(sv[1].get<std::shared_ptr<Ope>>());
2015-06-16 03:26:49 +00:00
}
case 3: { // Capture
const auto& name = sv[0].get<std::string>();
2015-06-16 03:26:49 +00:00
auto ope = sv[1].get<std::shared_ptr<Ope>>();
return cap(ope, data.match_action, ++data.capture_count, name);
}
default: {
return sv[0].get<std::shared_ptr<Ope>>();
2015-06-12 22:52:26 +00:00
}
}
};
2015-06-16 04:25:01 +00:00
g["IdentCont"] = [](const SemanticValues& sv) {
return std::string(sv.c_str(), sv.length());
2015-02-14 03:41:17 +00:00
};
2015-05-19 13:07:03 +00:00
2015-06-16 04:25:01 +00:00
g["Literal"] = [this](const SemanticValues& sv) {
const auto& tok = sv.tokens.front();
return lit(resolve_escape_sequence(tok.first, tok.second));
2015-02-14 03:41:17 +00:00
};
2015-06-16 04:25:01 +00:00
g["Class"] = [this](const SemanticValues& sv) {
const auto& tok = sv.tokens.front();
return cls(resolve_escape_sequence(tok.first, tok.second));
2015-02-14 03:41:17 +00:00
};
g["AND"] = [](const SemanticValues& sv) { return *sv.c_str(); };
g["NOT"] = [](const SemanticValues& sv) { return *sv.c_str(); };
g["QUESTION"] = [](const SemanticValues& sv) { return *sv.c_str(); };
g["STAR"] = [](const SemanticValues& sv) { return *sv.c_str(); };
g["PLUS"] = [](const SemanticValues& sv) { return *sv.c_str(); };
2015-05-19 13:07:03 +00:00
g["DOT"] = [](const SemanticValues& /*sv*/) { return dot(); };
g["BeginCap"] = [](const SemanticValues& sv) { return sv.token(); };
2015-02-14 03:41:17 +00:00
}
2015-02-20 03:27:47 +00:00
std::shared_ptr<Grammar> perform_core(
const char* s,
2015-03-09 18:58:43 +00:00
size_t n,
2015-02-20 03:27:47 +00:00
const Rules& rules,
std::string& start,
MatchAction ma,
Log log)
{
2015-02-26 00:31:00 +00:00
Data data;
data.match_action = ma;
2015-02-15 22:52:39 +00:00
2015-02-26 00:31:00 +00:00
any dt = &data;
2015-03-09 18:58:43 +00:00
auto r = g["Grammar"].parse(s, n, dt);
if (!r.ret) {
if (log) {
2015-11-17 11:10:32 +00:00
if (r.message_pos) {
auto line = line_info(s, r.message_pos);
log(line.first, line.second, r.message);
} else {
auto line = line_info(s, r.error_pos);
log(line.first, line.second, "syntax error");
}
}
return nullptr;
}
2015-02-26 00:31:00 +00:00
auto& grammar = *data.grammar;
2015-02-20 03:27:47 +00:00
// User provided rules
for (const auto& x: rules) {
auto name = x.first;
bool ignore = false;
if (!name.empty() && name[0] == '~') {
ignore = true;
name.erase(0, 1);
}
if (!name.empty()) {
2015-06-05 12:56:16 +00:00
auto& rule = grammar[name];
rule <= x.second;
rule.name = name;
rule.ignoreSemanticValue = ignore;
2015-02-20 03:27:47 +00:00
}
}
2015-06-13 04:57:45 +00:00
// Check duplicated definitions
2016-01-01 00:42:14 +00:00
bool ret = data.duplicates.empty();
2015-06-13 04:38:38 +00:00
2015-06-13 04:57:45 +00:00
for (const auto& x: data.duplicates) {
if (log) {
const auto& name = x.first;
auto ptr = x.second;
auto line = line_info(s, ptr);
log(line.first, line.second, "'" + name + "' is already defined.");
}
}
// Check missing definitions
2015-02-26 00:31:00 +00:00
for (const auto& x : data.references) {
const auto& name = x.first;
auto ptr = x.second;
2015-08-06 22:27:38 +00:00
if (!grammar.count(name)) {
if (log) {
auto line = line_info(s, ptr);
log(line.first, line.second, "'" + name + "' is not defined.");
}
2015-06-13 04:38:38 +00:00
ret = false;
}
}
2015-06-13 04:38:38 +00:00
if (!ret) {
return nullptr;
}
// Check left recursion
ret = true;
for (auto& x: grammar) {
const auto& name = x.first;
auto& rule = x.second;
DetectLeftRecursion lr(name);
rule.accept(lr);
if (lr.s_) {
if (log) {
auto line = line_info(s, lr.s_);
log(line.first, line.second, "'" + name + "' is left recursive.");
}
ret = false;;
}
}
if (!ret) {
return nullptr;
}
// Set root definition
2015-02-26 00:31:00 +00:00
start = data.start;
// Automatic whitespace skipping
if (grammar.count(WHITESPACE_DEFINITION_NAME)) {
auto& rule = (*data.grammar)[start];
rule.whitespaceOpe = wsp((*data.grammar)[WHITESPACE_DEFINITION_NAME].get_core_operator());
}
2015-02-26 00:31:00 +00:00
return data.grammar;
}
2015-02-23 03:23:08 +00:00
bool is_hex(char c, int& v) {
if ('0' <= c && c <= '9') {
v = c - '0';
return true;
} else if ('a' <= c && c <= 'f') {
v = c - 'a' + 10;
return true;
} else if ('A' <= c && c <= 'F') {
v = c - 'A' + 10;
return true;
}
return false;
}
bool is_digit(char c, int& v) {
if ('0' <= c && c <= '9') {
v = c - '0';
return true;
}
return false;
}
2015-05-19 14:51:06 +00:00
std::pair<char, size_t> parse_hex_number(const char* s, size_t n, size_t i) {
2015-02-23 03:23:08 +00:00
char ret = 0;
2015-03-09 18:58:43 +00:00
int val;
2015-04-24 23:11:06 +00:00
while (i < n && is_hex(s[i], val)) {
2016-05-25 07:18:41 +00:00
ret = static_cast<char>(ret * 16 + val);
2015-04-24 23:11:06 +00:00
i++;
2015-02-23 03:23:08 +00:00
}
2015-02-27 02:32:26 +00:00
return std::make_pair(ret, i);
2015-02-23 03:23:08 +00:00
}
2015-05-19 14:51:06 +00:00
std::pair<char, size_t> parse_octal_number(const char* s, size_t n, size_t i) {
2015-02-23 03:23:08 +00:00
char ret = 0;
2015-03-09 18:58:43 +00:00
int val;
2015-04-24 23:11:06 +00:00
while (i < n && is_digit(s[i], val)) {
2016-05-25 07:18:41 +00:00
ret = static_cast<char>(ret * 8 + val);
2015-04-24 23:11:06 +00:00
i++;
2015-02-23 03:23:08 +00:00
}
2015-02-27 02:32:26 +00:00
return std::make_pair(ret, i);
2015-02-23 03:23:08 +00:00
}
2015-03-09 18:58:43 +00:00
std::string resolve_escape_sequence(const char* s, size_t n) {
2015-02-14 03:41:17 +00:00
std::string r;
2015-03-09 18:58:43 +00:00
r.reserve(n);
2015-08-10 20:37:25 +00:00
size_t i = 0;
2015-04-24 23:11:06 +00:00
while (i < n) {
2015-02-14 03:41:17 +00:00
auto ch = s[i];
if (ch == '\\') {
2015-02-22 00:38:30 +00:00
i++;
2015-02-14 03:41:17 +00:00
switch (s[i]) {
2015-04-24 23:11:06 +00:00
case 'n': r += '\n'; i++; break;
case 'r': r += '\r'; i++; break;
case 't': r += '\t'; i++; break;
case '\'': r += '\''; i++; break;
case '"': r += '"'; i++; break;
case '[': r += '['; i++; break;
case ']': r += ']'; i++; break;
case '\\': r += '\\'; i++; break;
2015-02-23 03:23:08 +00:00
case 'x': {
2015-03-09 18:58:43 +00:00
std::tie(ch, i) = parse_hex_number(s, n, i + 1);
2015-02-23 03:23:08 +00:00
r += ch;
break;
}
2015-02-14 03:41:17 +00:00
default: {
2015-04-24 23:11:06 +00:00
std::tie(ch, i) = parse_octal_number(s, n, i);
r += ch;
2015-02-14 03:41:17 +00:00
break;
}
}
} else {
r += ch;
2015-04-24 23:11:06 +00:00
i++;
2015-02-08 01:52:26 +00:00
}
}
2015-02-14 03:41:17 +00:00
return r;
2015-02-08 01:52:26 +00:00
}
2015-02-14 15:13:10 +00:00
Grammar g;
2015-02-14 03:41:17 +00:00
};
2015-05-19 13:07:03 +00:00
/*-----------------------------------------------------------------------------
* AST
*---------------------------------------------------------------------------*/
2015-06-04 23:06:37 +00:00
const int AstDefaultTag = -1;
#ifndef PEGLIB_NO_CONSTEXPR_SUPPORT
2015-07-25 01:36:39 +00:00
inline constexpr unsigned int str2tag(const char* str, int h = 0) {
2016-05-25 07:18:41 +00:00
return !str[h] ? 5381 : (str2tag(str, h + 1) * 33) ^ static_cast<unsigned char>(str[h]);
2015-07-25 01:36:39 +00:00
}
inline constexpr unsigned int operator "" _(const char* s, size_t) {
return str2tag(s);
}
#endif
2015-08-04 15:27:37 +00:00
template <typename Annotation>
struct AstBase : public Annotation
2015-05-19 13:07:03 +00:00
{
AstBase(const char* a_path, size_t a_line, size_t a_column, const char* a_name, const std::vector<std::shared_ptr<AstBase>>& a_nodes)
: path(a_path ? a_path : "")
, line(a_line)
, column(a_column)
, name(a_name)
, original_name(a_name)
#ifndef PEGLIB_NO_CONSTEXPR_SUPPORT
, tag(str2tag(a_name))
2015-08-03 21:53:35 +00:00
, original_tag(tag)
2015-07-25 01:36:39 +00:00
#endif
2015-08-03 21:53:35 +00:00
, is_token(false)
, nodes(a_nodes)
2015-07-25 01:36:39 +00:00
{}
2015-05-19 13:07:03 +00:00
AstBase(const char* a_path, size_t a_line, size_t a_column, const char* a_name, const std::string& a_token)
: path(a_path ? a_path : "")
, line(a_line)
, column(a_column)
, name(a_name)
, original_name(a_name)
#ifndef PEGLIB_NO_CONSTEXPR_SUPPORT
, tag(str2tag(a_name))
2015-08-03 21:53:35 +00:00
, original_tag(tag)
2015-07-25 01:36:39 +00:00
#endif
2015-08-03 21:53:35 +00:00
, is_token(true)
, token(a_token)
2015-07-25 01:36:39 +00:00
{}
2015-07-23 01:14:55 +00:00
AstBase(const AstBase& ast, const char* a_original_name)
2015-08-03 21:53:35 +00:00
: path(ast.path)
, line(ast.line)
, column(ast.column)
, name(ast.name)
, original_name(a_original_name)
#ifndef PEGLIB_NO_CONSTEXPR_SUPPORT
2015-08-03 21:53:35 +00:00
, tag(ast.tag)
, original_tag(str2tag(a_original_name))
2015-07-25 01:36:39 +00:00
#endif
2015-08-03 21:53:35 +00:00
, is_token(ast.is_token)
, token(ast.token)
, nodes(ast.nodes)
2015-08-04 17:04:23 +00:00
, parent(ast.parent)
2015-07-25 01:36:39 +00:00
{}
2015-07-23 01:14:55 +00:00
2015-07-31 17:06:31 +00:00
const std::string path;
const size_t line;
const size_t column;
2015-08-03 21:53:35 +00:00
2015-07-31 17:06:31 +00:00
const std::string name;
const std::string original_name;
#ifndef PEGLIB_NO_CONSTEXPR_SUPPORT
2015-07-31 17:06:31 +00:00
const unsigned int tag;
const unsigned int original_tag;
2015-07-25 01:36:39 +00:00
#endif
2015-08-03 21:53:35 +00:00
const bool is_token;
const std::string token;
2015-08-04 15:27:37 +00:00
std::vector<std::shared_ptr<AstBase<Annotation>>> nodes;
2015-08-04 17:04:23 +00:00
std::shared_ptr<AstBase<Annotation>> parent;
2015-05-19 13:07:03 +00:00
};
2015-08-28 02:26:34 +00:00
template <typename T>
2015-11-27 20:32:17 +00:00
void ast_to_s(const std::shared_ptr<T>& ptr, std::string& s, int level = 0) {
2015-08-28 02:26:34 +00:00
const auto& ast = *ptr;
2015-11-27 20:32:17 +00:00
for (auto i = 0; i < level; i++) {
s += " ";
}
2015-08-28 02:26:34 +00:00
std::string name;
if (ast.name == ast.original_name) {
name = ast.name;
} else {
name = ast.original_name + "[" + ast.name + "]";
2015-05-19 13:07:03 +00:00
}
2015-08-28 02:26:34 +00:00
if (ast.is_token) {
s += "- " + name + " (" + ast.token + ")\n";
} else {
2015-11-29 16:06:28 +00:00
s += "+ " + name + "\n";
2015-07-30 21:24:28 +00:00
}
2015-11-27 20:32:17 +00:00
for (auto node : ast.nodes) {
ast_to_s(node, s, level + 1);
}
}
template <typename T>
std::string ast_to_s(const std::shared_ptr<T>& ptr) {
std::string s;
ast_to_s(ptr, s);
return s;
2015-08-28 02:26:34 +00:00
}
2015-05-19 13:07:03 +00:00
2015-08-04 15:27:37 +00:00
struct AstOptimizer
2015-07-31 17:06:31 +00:00
{
2015-08-04 15:27:37 +00:00
AstOptimizer(bool optimize_nodes, const std::vector<std::string>& filters = {})
2015-07-31 17:06:31 +00:00
: optimize_nodes_(optimize_nodes)
, filters_(filters) {}
2015-08-04 15:27:37 +00:00
template <typename T>
std::shared_ptr<T> optimize(std::shared_ptr<T> original, std::shared_ptr<T> parent = nullptr) {
2015-07-31 17:06:31 +00:00
auto found = std::find(filters_.begin(), filters_.end(), original->name) != filters_.end();
bool opt = optimize_nodes_ ? !found : found;
if (opt && original->nodes.size() == 1) {
auto child = optimize(original->nodes[0], parent);
2015-08-04 15:27:37 +00:00
return std::make_shared<T>(*child, original->name.c_str());
2015-07-31 17:06:31 +00:00
}
2015-08-04 15:27:37 +00:00
auto ast = std::make_shared<T>(*original);
2015-08-04 17:04:23 +00:00
ast->parent = parent;
2015-07-31 17:06:31 +00:00
ast->nodes.clear();
for (auto node : original->nodes) {
auto child = optimize(node, ast);
ast->nodes.push_back(child);
}
return ast;
}
private:
const bool optimize_nodes_;
const std::vector<std::string> filters_;
};
2015-08-03 21:53:35 +00:00
struct EmptyType {};
typedef AstBase<EmptyType> Ast;
2015-02-08 01:52:26 +00:00
/*-----------------------------------------------------------------------------
2015-08-10 20:37:56 +00:00
* parser
2015-02-08 01:52:26 +00:00
*---------------------------------------------------------------------------*/
2015-08-10 20:37:56 +00:00
class parser
2015-02-08 01:52:26 +00:00
{
public:
2015-08-10 20:37:56 +00:00
parser() = default;
2015-02-15 22:52:39 +00:00
2015-08-10 20:37:56 +00:00
parser(const char* s, size_t n, const Rules& rules) {
2015-03-09 18:58:43 +00:00
load_grammar(s, n, rules);
}
2015-08-10 20:37:56 +00:00
parser(const char* s, const Rules& rules)
: parser(s, strlen(s), rules) {}
2015-03-09 18:58:43 +00:00
2015-08-10 20:37:56 +00:00
parser(const char* s, size_t n)
: parser(s, n, Rules()) {}
2015-03-09 18:58:43 +00:00
2015-08-10 20:37:56 +00:00
parser(const char* s)
: parser(s, strlen(s), Rules()) {}
2015-03-09 18:58:43 +00:00
operator bool() {
return grammar_ != nullptr;
}
bool load_grammar(const char* s, size_t n, const Rules& rules) {
2015-08-10 20:37:56 +00:00
grammar_ = ParserGenerator::parse(
2015-07-07 19:44:33 +00:00
s, n,
rules,
2015-02-15 22:52:39 +00:00
start_,
[&](const char* a_s, size_t a_n, size_t a_id, const std::string& a_name) {
if (match_action) match_action(a_s, a_n, a_id, a_name);
2015-02-15 22:52:39 +00:00
},
log);
2015-03-09 18:58:43 +00:00
return grammar_ != nullptr;
2015-02-08 01:52:26 +00:00
}
2015-03-09 18:58:43 +00:00
bool load_grammar(const char* s, size_t n) {
return load_grammar(s, n, Rules());
}
2015-02-20 03:27:47 +00:00
2015-03-09 18:58:43 +00:00
bool load_grammar(const char* s, const Rules& rules) {
auto n = strlen(s);
return load_grammar(s, n, rules);
}
2015-02-20 03:27:47 +00:00
2015-03-09 18:58:43 +00:00
bool load_grammar(const char* s) {
auto n = strlen(s);
return load_grammar(s, n);
}
2015-02-14 03:41:17 +00:00
2015-07-28 10:47:18 +00:00
bool parse_n(const char* s, size_t n, const char* path = nullptr) const {
2015-03-09 18:58:43 +00:00
if (grammar_ != nullptr) {
const auto& rule = (*grammar_)[start_];
2015-07-28 10:47:18 +00:00
auto r = rule.parse(s, n, path);
2016-01-01 00:42:14 +00:00
output_log(s, n, r);
2015-03-09 18:58:43 +00:00
return r.ret && r.len == n;
}
return false;
2015-02-08 01:52:26 +00:00
}
2015-07-28 10:47:18 +00:00
bool parse(const char* s, const char* path = nullptr) const {
2015-03-09 18:58:43 +00:00
auto n = strlen(s);
2015-07-28 10:47:18 +00:00
return parse_n(s, n, path);
2015-03-09 18:58:43 +00:00
}
2015-07-28 10:47:18 +00:00
bool parse_n(const char* s, size_t n, any& dt, const char* path = nullptr) const {
2015-02-13 00:48:58 +00:00
if (grammar_ != nullptr) {
const auto& rule = (*grammar_)[start_];
2015-07-28 10:47:18 +00:00
auto r = rule.parse(s, n, dt, path);
2016-01-01 00:42:14 +00:00
output_log(s, n, r);
2015-03-09 18:58:43 +00:00
return r.ret && r.len == n;
2015-02-13 00:48:58 +00:00
}
2015-02-08 01:52:26 +00:00
return false;
}
2015-07-28 10:47:18 +00:00
bool parse(const char* s, any& dt, const char* path = nullptr) const {
2015-03-09 18:58:43 +00:00
auto n = strlen(s);
2015-07-28 10:47:18 +00:00
return parse_n(s, n, dt, path);
2015-02-15 22:52:39 +00:00
}
2015-03-09 18:58:43 +00:00
template <typename T>
2015-07-28 10:47:18 +00:00
bool parse_n(const char* s, size_t n, T& val, const char* path = nullptr) const {
2015-02-08 01:52:26 +00:00
if (grammar_ != nullptr) {
2015-02-13 00:48:58 +00:00
const auto& rule = (*grammar_)[start_];
2015-07-28 10:47:18 +00:00
auto r = rule.parse_and_get_value(s, n, val, path);
2016-01-01 00:42:14 +00:00
output_log(s, n, r);
2015-03-09 18:58:43 +00:00
return r.ret && r.len == n;
2015-02-08 01:52:26 +00:00
}
return false;
}
2015-03-09 18:58:43 +00:00
template <typename T>
2015-07-28 10:47:18 +00:00
bool parse(const char* s, T& val, const char* path = nullptr) const {
2015-03-09 18:58:43 +00:00
auto n = strlen(s);
2015-07-28 10:47:18 +00:00
return parse_n(s, n, val, path);
2015-03-02 22:35:55 +00:00
}
2015-03-03 02:52:09 +00:00
template <typename T>
2015-07-28 10:47:18 +00:00
bool parse_n(const char* s, size_t n, any& dt, T& val, const char* path = nullptr) const {
2015-03-03 02:52:09 +00:00
if (grammar_ != nullptr) {
const auto& rule = (*grammar_)[start_];
2015-07-28 10:47:18 +00:00
auto r = rule.parse_and_get_value(s, n, dt, val, path);
2016-01-01 00:42:14 +00:00
output_log(s, n, r);
2015-03-09 18:58:43 +00:00
return r.ret && r.len == n;
2015-03-03 02:52:09 +00:00
}
return false;
}
template <typename T>
bool parse(const char* s, any& dt, T& val, const char* /*path*/ = nullptr) const {
2015-03-09 18:58:43 +00:00
auto n = strlen(s);
return parse_n(s, n, dt, val);
2015-02-13 00:48:58 +00:00
}
2015-03-09 18:58:43 +00:00
bool search(const char* s, size_t n, size_t& mpos, size_t& mlen) const {
2015-02-15 22:52:39 +00:00
const auto& rule = (*grammar_)[start_];
if (grammar_ != nullptr) {
size_t pos = 0;
2015-03-09 18:58:43 +00:00
while (pos < n) {
size_t len = n - pos;
2015-03-03 02:52:09 +00:00
auto r = rule.parse(s + pos, len);
2015-02-15 22:52:39 +00:00
if (r.ret) {
mpos = pos;
mlen = len;
2015-02-15 22:52:39 +00:00
return true;
}
pos++;
}
}
mpos = 0;
mlen = 0;
return false;
}
2015-03-03 02:52:09 +00:00
bool search(const char* s, size_t& mpos, size_t& mlen) const {
2015-03-09 18:58:43 +00:00
auto n = strlen(s);
return search(s, n, mpos, mlen);
2015-02-08 01:52:26 +00:00
}
Definition& operator[](const char* s) {
return (*grammar_)[s];
2015-02-08 01:52:26 +00:00
}
void enable_packrat_parsing() {
2015-03-03 02:52:09 +00:00
if (grammar_ != nullptr) {
auto& rule = (*grammar_)[start_];
rule.enablePackratParsing = true;
2015-03-03 02:52:09 +00:00
}
}
2015-08-03 21:53:35 +00:00
template <typename T = Ast>
2015-08-10 20:37:56 +00:00
parser& enable_ast() {
2015-07-25 01:36:39 +00:00
for (auto& x: *grammar_) {
const auto& name = x.first;
auto& rule = x.second;
2015-06-02 18:17:08 +00:00
2015-07-25 01:36:39 +00:00
if (!rule.action) {
auto is_token = rule.is_token;
rule.action = [=](const SemanticValues& sv) {
auto line = line_info(sv.ss, sv.c_str());
2015-07-25 01:36:39 +00:00
if (is_token) {
2016-01-24 16:12:50 +00:00
return std::make_shared<T>(sv.path, line.first, line.second, name.c_str(), sv.token());
2015-07-25 01:36:39 +00:00
}
2015-07-29 21:58:20 +00:00
2015-08-03 21:53:35 +00:00
auto ast = std::make_shared<T>(sv.path, line.first, line.second, name.c_str(), sv.transform<std::shared_ptr<T>>());
2015-07-31 17:06:31 +00:00
2015-07-29 21:58:20 +00:00
for (auto node: ast->nodes) {
2015-08-04 17:04:23 +00:00
node->parent = ast;
2015-07-25 01:36:39 +00:00
}
2015-07-29 21:58:20 +00:00
return ast;
2015-07-25 01:36:39 +00:00
};
}
}
return *this;
}
2015-11-30 04:07:02 +00:00
void enable_trace(Tracer tracer) {
if (grammar_ != nullptr) {
auto& rule = (*grammar_)[start_];
rule.tracer = tracer;
}
}
2015-06-02 18:17:08 +00:00
MatchAction match_action;
Log log;
private:
2016-01-01 00:42:14 +00:00
void output_log(const char* s, size_t n, const Definition::Result& r) const {
2015-06-02 18:17:08 +00:00
if (log) {
if (!r.ret) {
2015-11-17 11:10:32 +00:00
if (r.message_pos) {
auto line = line_info(s, r.message_pos);
log(line.first, line.second, r.message);
} else {
auto line = line_info(s, r.error_pos);
log(line.first, line.second, "syntax error");
}
2015-06-02 18:17:08 +00:00
} else if (r.len != n) {
auto line = line_info(s, s + r.len);
log(line.first, line.second, "syntax error");
}
}
}
2015-02-08 01:52:26 +00:00
std::shared_ptr<Grammar> grammar_;
std::string start_;
};
2015-02-15 22:52:39 +00:00
/*-----------------------------------------------------------------------------
* Simple interface
2015-02-15 22:52:39 +00:00
*---------------------------------------------------------------------------*/
struct match
{
struct Item {
const char* s;
2015-03-09 18:58:43 +00:00
size_t n;
2015-02-15 22:52:39 +00:00
size_t id;
std::string name;
2015-02-15 22:52:39 +00:00
2015-03-09 18:58:43 +00:00
size_t length() const { return n; }
std::string str() const { return std::string(s, n); }
2015-02-15 22:52:39 +00:00
};
2015-02-22 00:38:30 +00:00
std::vector<Item> matches;
2015-02-15 22:52:39 +00:00
typedef std::vector<Item>::iterator iterator;
typedef std::vector<Item>::const_iterator const_iterator;
2015-02-22 00:38:30 +00:00
2015-02-15 22:52:39 +00:00
bool empty() const {
return matches.empty();
}
size_t size() const {
return matches.size();
}
size_t length(size_t n = 0) {
return matches[n].length();
}
std::string str(size_t n = 0) const {
return matches[n].str();
}
const Item& operator[](size_t n) const {
return matches[n];
}
iterator begin() {
return matches.begin();
}
2015-02-22 00:38:30 +00:00
2015-02-15 22:52:39 +00:00
iterator end() {
return matches.end();
}
2015-02-22 00:38:30 +00:00
2015-02-15 22:52:39 +00:00
const_iterator begin() const {
return matches.cbegin();
}
2015-02-22 00:38:30 +00:00
2015-02-15 22:52:39 +00:00
const_iterator end() const {
return matches.cend();
}
std::vector<size_t> named_capture(const std::string& name) const {
std::vector<size_t> ret;
for (auto i = 0u; i < matches.size(); i++) {
if (matches[i].name == name) {
ret.push_back(i);
}
}
return ret;
}
std::map<std::string, std::vector<size_t>> named_captures() const {
std::map<std::string, std::vector<size_t>> ret;
for (auto i = 0u; i < matches.size(); i++) {
ret[matches[i].name].push_back(i);
}
return ret;
}
std::vector<size_t> indexed_capture(size_t id) const {
std::vector<size_t> ret;
for (auto i = 0u; i < matches.size(); i++) {
if (matches[i].id == id) {
ret.push_back(i);
}
}
return ret;
}
std::map<size_t, std::vector<size_t>> indexed_captures() const {
std::map<size_t, std::vector<size_t>> ret;
for (auto i = 0u; i < matches.size(); i++) {
ret[matches[i].id].push_back(i);
}
return ret;
}
2015-02-15 22:52:39 +00:00
};
inline bool peg_match(const char* syntax, const char* s, match& m) {
m.matches.clear();
2015-08-10 20:37:56 +00:00
parser pg(syntax);
pg.match_action = [&](const char* a_s, size_t a_n, size_t a_id, const std::string& a_name) {
m.matches.push_back(match::Item{ a_s, a_n, a_id, a_name });
2015-02-15 22:52:39 +00:00
};
auto ret = pg.parse(s);
if (ret) {
2015-03-09 18:58:43 +00:00
auto n = strlen(s);
m.matches.insert(m.matches.begin(), match::Item{ s, n, 0, std::string() });
2015-02-15 22:52:39 +00:00
}
return ret;
}
inline bool peg_match(const char* syntax, const char* s) {
2015-08-10 20:37:56 +00:00
parser parser(syntax);
return parser.parse(s);
2015-02-15 22:52:39 +00:00
}
2015-08-10 20:37:56 +00:00
inline bool peg_search(parser& pg, const char* s, size_t n, match& m) {
2015-02-15 22:52:39 +00:00
m.matches.clear();
pg.match_action = [&](const char* a_s, size_t a_n, size_t a_id, const std::string& a_name) {
m.matches.push_back(match::Item{ a_s, a_n, a_id, a_name });
2015-02-15 22:52:39 +00:00
};
size_t mpos, mlen;
2015-03-09 18:58:43 +00:00
auto ret = pg.search(s, n, mpos, mlen);
2015-02-15 22:52:39 +00:00
if (ret) {
m.matches.insert(m.matches.begin(), match::Item{ s + mpos, mlen, 0, std::string() });
2015-02-15 22:52:39 +00:00
return true;
}
return false;
}
2015-08-10 20:37:56 +00:00
inline bool peg_search(parser& pg, const char* s, match& m) {
2015-03-09 18:58:43 +00:00
auto n = strlen(s);
return peg_search(pg, s, n, m);
2015-02-15 22:52:39 +00:00
}
2015-03-09 18:58:43 +00:00
inline bool peg_search(const char* syntax, const char* s, size_t n, match& m) {
2015-08-10 20:37:56 +00:00
parser pg(syntax);
2015-03-09 18:58:43 +00:00
return peg_search(pg, s, n, m);
2015-02-15 22:52:39 +00:00
}
inline bool peg_search(const char* syntax, const char* s, match& m) {
2015-08-10 20:37:56 +00:00
parser pg(syntax);
2015-03-09 18:58:43 +00:00
auto n = strlen(s);
return peg_search(pg, s, n, m);
2015-02-15 22:52:39 +00:00
}
class peg_token_iterator : public std::iterator<std::forward_iterator_tag, match>
{
public:
peg_token_iterator()
: s_(nullptr)
, l_(0)
, pos_((std::numeric_limits<size_t>::max)()) {}
2015-02-15 22:52:39 +00:00
peg_token_iterator(const char* syntax, const char* s)
: peg_(syntax)
, s_(s)
, l_(strlen(s))
, pos_(0) {
peg_.match_action = [&](const char* a_s, size_t a_n, size_t a_id, const std::string& a_name) {
m_.matches.push_back(match::Item{ a_s, a_n, a_id, a_name });
2015-02-15 22:52:39 +00:00
};
search();
}
peg_token_iterator(const peg_token_iterator& rhs)
: peg_(rhs.peg_)
, s_(rhs.s_)
, l_(rhs.l_)
, pos_(rhs.pos_)
, m_(rhs.m_) {}
peg_token_iterator& operator++() {
search();
return *this;
}
peg_token_iterator operator++(int) {
auto it = *this;
search();
return it;
}
match& operator*() {
return m_;
}
match* operator->() {
return &m_;
}
bool operator==(const peg_token_iterator& rhs) {
return pos_ == rhs.pos_;
}
bool operator!=(const peg_token_iterator& rhs) {
return pos_ != rhs.pos_;
}
private:
void search() {
m_.matches.clear();
size_t mpos, mlen;
if (peg_.search(s_ + pos_, l_ - pos_, mpos, mlen)) {
m_.matches.insert(m_.matches.begin(), match::Item{ s_ + mpos, mlen, 0, std::string() });
2015-02-15 22:52:39 +00:00
pos_ += mpos + mlen;
} else {
pos_ = (std::numeric_limits<size_t>::max)();
2015-02-15 22:52:39 +00:00
}
}
2015-08-10 20:37:56 +00:00
parser peg_;
2015-02-15 22:52:39 +00:00
const char* s_;
size_t l_;
size_t pos_;
match m_;
};
struct peg_token_range {
typedef peg_token_iterator iterator;
typedef const peg_token_iterator const_iterator;
peg_token_range(const char* syntax, const char* s)
: beg_iter(peg_token_iterator(syntax, s))
, end_iter() {}
iterator begin() {
return beg_iter;
}
iterator end() {
return end_iter;
}
const_iterator cbegin() const {
return beg_iter;
}
const_iterator cend() const {
return end_iter;
}
private:
peg_token_iterator beg_iter;
peg_token_iterator end_iter;
};
2015-08-10 20:37:56 +00:00
} // namespace peg
2015-02-08 01:52:26 +00:00
#endif
// vim: et ts=4 sw=4 cin cino={1s ff=unix