cpp-peglib/lint/peglint.cc

202 lines
5.8 KiB
C++
Raw Normal View History

2015-02-13 00:54:43 +00:00
//
// peglint.cc
//
2021-01-22 02:16:47 +00:00
// Copyright (c) 2021 Yuji Hirose. All rights reserved.
2015-02-13 00:54:43 +00:00
// MIT License
//
2015-02-19 02:18:20 +00:00
#include <fstream>
2020-03-29 21:03:47 +00:00
#include <peglib.h>
2020-01-28 21:55:37 +00:00
#include <sstream>
2015-02-13 00:54:43 +00:00
using namespace std;
2020-03-29 21:03:47 +00:00
inline bool read_file(const char *path, vector<char> &buff) {
ifstream ifs(path, ios::in | ios::binary);
if (ifs.fail()) { return false; }
2015-02-19 02:18:20 +00:00
2020-03-29 21:03:47 +00:00
buff.resize(static_cast<unsigned int>(ifs.seekg(0, ios::end).tellg()));
if (!buff.empty()) {
ifs.seekg(0, ios::beg).read(&buff[0], static_cast<streamsize>(buff.size()));
}
return true;
2015-02-19 02:18:20 +00:00
}
inline vector<string> split(const string &s, char delim) {
vector<string> elems;
stringstream ss(s);
string elem;
while (getline(ss, elem, delim)) {
elems.push_back(elem);
}
return elems;
}
2020-03-29 21:03:47 +00:00
int main(int argc, const char **argv) {
2021-01-24 03:58:23 +00:00
auto opt_packrat = false;
2020-03-29 21:03:47 +00:00
auto opt_ast = false;
2020-05-25 21:31:22 +00:00
auto opt_optimize = false;
2021-01-22 02:16:47 +00:00
auto opt_mode = true;
2020-03-29 21:03:47 +00:00
auto opt_help = false;
auto opt_source = false;
vector<char> source;
auto opt_trace = false;
vector<const char *> path_list;
auto argi = 1;
while (argi < argc) {
auto arg = argv[argi++];
if (string("--help") == arg) {
opt_help = true;
2021-01-24 03:58:23 +00:00
} else if (string("--packrat") == arg) {
opt_packrat = true;
2020-03-29 21:03:47 +00:00
} else if (string("--ast") == arg) {
opt_ast = true;
2021-01-22 02:16:47 +00:00
} else if (string("--opt") == arg || string("--opt-all") == arg) {
2020-05-25 21:31:22 +00:00
opt_optimize = true;
2021-01-22 02:16:47 +00:00
opt_mode = true;
} else if (string("--opt-only") == arg) {
opt_optimize = true;
opt_mode = false;
2020-03-29 21:03:47 +00:00
} else if (string("--source") == arg) {
opt_source = true;
if (argi < argc) {
std::string text = argv[argi++];
source.assign(text.begin(), text.end());
}
} else if (string("--trace") == arg) {
opt_trace = true;
} else {
path_list.push_back(arg);
2015-02-13 00:54:43 +00:00
}
2020-03-29 21:03:47 +00:00
}
2015-02-13 00:54:43 +00:00
2020-03-29 21:03:47 +00:00
if (path_list.empty() || opt_help) {
2021-01-22 02:16:47 +00:00
cerr << R"(usage: grammar_file_path [source_file_path]
options:
--source: source text
2021-01-24 03:58:23 +00:00
--packrat: enable packrat memoise
2021-01-22 02:16:47 +00:00
--ast: show AST tree
2021-04-02 14:04:08 +00:00
--opt, --opt-all: optimize all AST nodes except nodes selected with `no_ast_opt` instruction
--opt-only: optimize only AST nodes selected with `no_ast_opt` instruction
2021-01-22 02:16:47 +00:00
--trace: show trace messages
)";
2020-03-29 21:03:47 +00:00
return 1;
}
2015-03-09 18:58:43 +00:00
2020-03-29 21:03:47 +00:00
// Check PEG grammar
auto syntax_path = path_list[0];
2015-02-13 00:54:43 +00:00
2020-03-29 21:03:47 +00:00
vector<char> syntax;
if (!read_file(syntax_path, syntax)) {
cerr << "can't open the grammar file." << endl;
return -1;
}
2015-02-13 00:54:43 +00:00
2020-03-29 21:03:47 +00:00
peg::parser parser;
2015-02-13 00:54:43 +00:00
2020-03-29 21:03:47 +00:00
parser.log = [&](size_t ln, size_t col, const string &msg) {
cerr << syntax_path << ":" << ln << ":" << col << ": " << msg << endl;
};
2015-02-13 00:54:43 +00:00
2020-03-29 21:03:47 +00:00
if (!parser.load_grammar(syntax.data(), syntax.size())) { return -1; }
2015-11-30 04:07:02 +00:00
2020-03-29 21:03:47 +00:00
if (path_list.size() < 2 && !opt_source) { return 0; }
2020-03-29 21:03:47 +00:00
// Check source
std::string source_path = "[commandline]";
if (path_list.size() >= 2) {
if (!read_file(path_list[1], source)) {
cerr << "can't open the code file." << endl;
return -1;
}
2020-03-29 21:03:47 +00:00
source_path = path_list[1];
}
parser.log = [&](size_t ln, size_t col, const string &msg) {
cerr << source_path << ":" << ln << ":" << col << ": " << msg << endl;
};
2021-01-24 03:58:23 +00:00
if (opt_packrat) {
parser.enable_packrat_parsing();
}
2020-03-29 21:03:47 +00:00
if (opt_trace) {
size_t prev_pos = 0;
parser.enable_trace(
2021-01-13 15:11:06 +00:00
[&](const peg::Ope &ope, const char *s, size_t /*n*/,
2020-03-29 21:03:47 +00:00
const peg::SemanticValues & /*sv*/, const peg::Context &c,
2020-10-02 01:26:04 +00:00
const std::any & /*dt*/) {
2020-03-29 21:03:47 +00:00
auto pos = static_cast<size_t>(s - c.s);
auto backtrack = (pos < prev_pos ? "*" : "");
string indent;
auto level = c.trace_ids.size() - 1;
while (level--) {
indent += "";
}
2021-01-13 15:11:06 +00:00
std::string name;
{
name = peg::TraceOpeName::get(const_cast<peg::Ope &>(ope));
auto lit = dynamic_cast<const peg::LiteralString *>(&ope);
2021-01-15 21:51:36 +00:00
if (lit) { name += " '" + peg::escape_characters(lit->lit_) + "'"; }
2021-01-13 15:11:06 +00:00
}
2020-03-29 21:03:47 +00:00
std::cout << "E " << pos << backtrack << "\t" << indent << "" << name
<< " #" << c.trace_ids.back() << std::endl;
prev_pos = static_cast<size_t>(pos);
},
2021-01-13 15:11:06 +00:00
[&](const peg::Ope &ope, const char *s, size_t /*n*/,
2020-03-29 21:03:47 +00:00
const peg::SemanticValues &sv, const peg::Context &c,
2020-10-02 01:26:04 +00:00
const std::any & /*dt*/, size_t len) {
2020-03-29 21:03:47 +00:00
auto pos = static_cast<size_t>(s - c.s);
if (len != static_cast<size_t>(-1)) { pos += len; }
string indent;
auto level = c.trace_ids.size() - 1;
while (level--) {
indent += "";
}
auto ret = len != static_cast<size_t>(-1) ? "└o " : "└x ";
2021-01-13 15:11:06 +00:00
auto name = peg::TraceOpeName::get(const_cast<peg::Ope &>(ope));
2020-03-29 21:03:47 +00:00
std::stringstream choice;
if (sv.choice_count() > 0) {
choice << " " << sv.choice() << "/" << sv.choice_count();
}
std::string token;
if (!sv.tokens.empty()) {
2021-01-13 15:11:06 +00:00
token += ", token '";
2020-10-02 01:26:04 +00:00
token += sv.tokens[0];
token += "'";
2020-03-29 21:03:47 +00:00
}
2021-01-13 15:11:06 +00:00
std::string matched;
if (peg::success(len) &&
peg::TokenChecker::is_token(const_cast<peg::Ope &>(ope))) {
matched = ", match '" + peg::escape_characters(s, len) + "'";
}
2020-03-29 21:03:47 +00:00
std::cout << "L " << pos << "\t" << indent << ret << name << " #"
2021-01-13 15:11:06 +00:00
<< c.trace_ids.back() << choice.str() << token << matched << std::endl;
2020-03-29 21:03:47 +00:00
});
}
if (opt_ast) {
parser.enable_ast();
std::shared_ptr<peg::Ast> ast;
2021-01-13 15:11:06 +00:00
auto ret = parser.parse_n(source.data(), source.size(), ast);
2020-03-29 21:03:47 +00:00
2021-01-13 15:11:06 +00:00
if (ast) {
if (opt_optimize) {
2021-01-22 02:16:47 +00:00
ast = parser.optimize_ast(ast, opt_mode);
2021-01-13 15:11:06 +00:00
}
std::cout << peg::ast_to_s(ast);
}
2020-05-25 21:31:22 +00:00
2021-01-13 15:11:06 +00:00
if (!ret) { return -1; }
2020-03-29 21:03:47 +00:00
} else {
if (!parser.parse_n(source.data(), source.size())) { return -1; }
}
return 0;
2015-02-13 00:54:43 +00:00
}