From 65d1d99b2b1cf05bd17ab34702593a54948ccfd7 Mon Sep 17 00:00:00 2001 From: yhirose Date: Fri, 31 Jul 2015 13:06:31 -0400 Subject: [PATCH] Added AstOptimizer. --- example/calc3.cc | 3 +- example/pl0.cc | 128 +++++++++++++++++++++++++++++++-------------- language/culebra.h | 3 +- peglib.h | 72 ++++++++++++++++--------- 4 files changed, 139 insertions(+), 67 deletions(-) diff --git a/example/calc3.cc b/example/calc3.cc index 3194c52..e4d2304 100644 --- a/example/calc3.cc +++ b/example/calc3.cc @@ -49,11 +49,12 @@ int main(int argc, const char** argv) " ~_ <- [ \t\r\n]* " ); - parser.enable_ast(true); + parser.enable_ast(); auto expr = argv[1]; shared_ptr ast; if (parser.parse(expr, ast)) { + ast = AstOptimizer(true).optimize(ast); ast->print(); cout << expr << " = " << eval(*ast) << endl; return 0; diff --git a/example/pl0.cc b/example/pl0.cc index 3b7b7ff..85567c6 100644 --- a/example/pl0.cc +++ b/example/pl0.cc @@ -8,6 +8,7 @@ #include #include #include +#include using namespace peglib; using namespace std; @@ -49,52 +50,71 @@ auto grammar = R"( ~_ <- [ \t\r\n]* )"; +string format_error_message(const string& path, size_t ln, size_t col, const string& msg) { + stringstream ss; + ss << path << ":" << ln << ":" << col << ": " << msg << endl; + return ss.str(); +} + struct Environment { - Environment(shared_ptr outer = nullptr) : outer(outer) {} + Environment(shared_ptr outer = nullptr) + : outer_(outer) {} + + bool has_value(const string& ident) const { + return has_constant(ident) || has_variable(ident); + } + + bool has_variable(const string& ident) const { + if (variables.find(ident) != variables.end()) { + return true; + } else if (!outer_) { + return false; + } + return outer_->has_variable(ident); + } int get_value(const string& ident) const { - try { - return get_constant(ident); - } catch (...) { - return get_variable(ident); - } + return has_constant(ident) ? get_constant(ident) : get_variable(ident); } void set_variable(const string& ident, int val) { if (variables.find(ident) != variables.end()) { variables[ident] = val; - } else if (outer) { - return outer->set_variable(ident, val); - } else { - throw runtime_error("undefined variable"); - } - } - - int get_constant(const string& ident) const { - if (constants.find(ident) != constants.end()) { - return constants.at(ident); - } else if (outer) { - return outer->get_constant(ident); - } else { - throw runtime_error("undefined constants"); - } - } - - int get_variable(const string& ident) const { - if (variables.find(ident) != variables.end()) { - return variables.at(ident); - } else if (outer) { - return outer->get_variable(ident); - } else { - throw runtime_error("undefined variable"); + return; } + outer_->set_variable(ident, val); } map constants; map variables; map> procedures; - shared_ptr outer; + +private: + bool has_constant(const string& ident) const { + if (constants.find(ident) != constants.end()) { + return true; + } else if (!outer_) { + return false; + } + return outer_->has_constant(ident); + } + + int get_constant(const string& ident) const { + if (constants.find(ident) != constants.end()) { + return constants.at(ident); + } + return outer_->get_constant(ident); + } + + int get_variable(const string& ident) const { + if (variables.find(ident) != variables.end()) { + return variables.at(ident); + } + return outer_->get_variable(ident); + } + + shared_ptr outer_; }; struct Interpreter @@ -164,7 +184,13 @@ private: void exec_assignment(const shared_ptr ast, shared_ptr env) { // assignment <- ident ':=' _ expression const auto& ident = ast->nodes[0]->token; - auto val = eval(ast->nodes[1], env); + if (!env->has_variable(ident)) { + string msg = "undefined variable '" + ident + "'..."; + string s = format_error_message(ast->path, ast->line, ast->column, msg); + throw runtime_error(s); + } + auto expr = ast->nodes[1]; + auto val = eval(expr, env); env->set_variable(ident, val); } @@ -185,8 +211,8 @@ private: void exec_if(const shared_ptr ast, shared_ptr env) { // if <- 'IF' _ condition 'THEN' _ statement auto cond = eval_condition(ast->nodes[0], env); - auto stmt = ast->nodes[1]; if (cond) { + auto stmt = ast->nodes[1]; exec(stmt, env); } } @@ -288,7 +314,9 @@ private: break; case '/': if (rval == 0) { - throw runtime_error("divide by 0 error"); + string msg = "divide by 0 error"; + string s = format_error_message(ast->path, ast->line, ast->column, msg); + throw runtime_error(s); } val = val / rval; break; @@ -298,7 +326,13 @@ private: } int eval_ident(const shared_ptr ast, shared_ptr env) { - return env->get_value(ast->token); + const auto& ident = ast->token; + if (!env->has_value(ident)) { + string msg = "undefined variable '" + ident + "'..."; + string s = format_error_message(ast->path, ast->line, ast->column, msg); + throw runtime_error(s); + } + return env->get_value(ident); } int eval_number(const shared_ptr ast, shared_ptr env) { @@ -326,6 +360,7 @@ int main(int argc, const char** argv) return 1; } + // Read a source file into memory auto path = argv[1]; vector source; if (!read_file(path, source)) { @@ -333,21 +368,34 @@ int main(int argc, const char** argv) return -1; } + // Setup a PEG parser peg parser(grammar); - parser.enable_ast(false, { "program", "statement", "statements", "term", "factor" }); + parser.enable_ast(); + parser.log = [&](size_t ln, size_t col, const string& err_msg) { + cerr << format_error_message(path, ln, col, err_msg) << endl; + }; + // Parse the source and make an AST shared_ptr ast; if (parser.parse_n(source.data(), source.size(), ast, path)) { + vector filters = { "program", "statement", "statements", "term", "factor" }; + ast = AstOptimizer(false, filters).optimize(ast); + if (argc > 2 && string("--ast") == argv[2]) { ast->print(); } - Interpreter interp; - auto env = make_shared(); - interp.exec(ast, env); + + // Run the AST + try { + Interpreter interp; + auto env = make_shared(); + interp.exec(ast, env); + } catch (const runtime_error& e) { + cerr << e.what() << endl; + } return 0; } - cout << "syntax error..." << endl; return -1; } diff --git a/language/culebra.h b/language/culebra.h index d44292c..c43a0fd 100644 --- a/language/culebra.h +++ b/language/culebra.h @@ -90,7 +90,7 @@ inline peglib::peg& get_parser() throw std::logic_error("invalid peg grammar"); } - parser.enable_ast(true, { "PARAMETERS", "ARGUMENTS", "OBJECT", "ARRAY" }); + parser.enable_ast(); } return parser; @@ -924,6 +924,7 @@ inline bool run( }; if (parser.parse_n(expr, len, ast, path.c_str())) { + ast = peglib::AstOptimizer(true, { "PARAMETERS", "ARGUMENTS", "OBJECT", "ARRAY" }).optimize(ast); val = Eval(debugger).eval(*ast, env); return true; } diff --git a/peglib.h b/peglib.h index cc76946..714a7f9 100644 --- a/peglib.h +++ b/peglib.h @@ -1983,18 +1983,18 @@ struct Ast void print() const; - const std::string path; - const size_t line; - const size_t column; - const std::string name; - const std::string original_name; - const bool is_token; - const std::string token; - const std::vector> nodes; - std::shared_ptr parent_node; + const std::string path; + const size_t line; + const size_t column; + const std::string name; + const std::string original_name; + const bool is_token; + const std::string token; + std::vector> nodes; + std::shared_ptr parent_node; #ifndef PEGLIB_NO_CONSTEXPR_SUPPORT - const unsigned int tag; - const unsigned int original_tag; + const unsigned int tag; + const unsigned int original_tag; #endif }; @@ -2027,12 +2027,10 @@ private: }; inline const Ast& Ast::get_smallest_ancestor() const { - assert(nodes.size() <= 1); - + assert(nodes.size() <= 1); if (nodes.empty()) { return *this; } - return nodes[0]->get_smallest_ancestor(); } @@ -2040,6 +2038,37 @@ inline void Ast::print() const { AstPrint().print(*this); } +struct AstOptimizer +{ + AstOptimizer(bool optimize_nodes, const std::vector& filters = {}) + : optimize_nodes_(optimize_nodes) + , filters_(filters) {} + + std::shared_ptr optimize(std::shared_ptr original, std::shared_ptr parent = nullptr) { + + auto found = std::find(filters_.begin(), filters_.end(), original->name) != filters_.end(); + bool opt = optimize_nodes_ ? !found : found; + + if (opt && original->nodes.size() == 1) { + auto child = optimize(original->nodes[0], parent); + return std::make_shared(*child, original->name.c_str()); + } + + auto ast = std::make_shared(*original); + ast->parent_node = parent; + ast->nodes.clear(); + for (auto node : original->nodes) { + auto child = optimize(node, ast); + ast->nodes.push_back(child); + } + return ast; + } + +private: + const bool optimize_nodes_; + const std::vector filters_; +}; + /*----------------------------------------------------------------------------- * peg *---------------------------------------------------------------------------*/ @@ -2193,14 +2222,11 @@ public: } } - peg& enable_ast(bool optimize_nodes, const std::initializer_list& filters = {}) { + peg& enable_ast() { for (auto& x: *grammar_) { const auto& name = x.first; auto& rule = x.second; - auto found = std::find(filters.begin(), filters.end(), name) != filters.end(); - bool opt = optimize_nodes ? !found : found; - if (!rule.action) { auto is_token = rule.is_token; rule.action = [=](const SemanticValues& sv) { @@ -2209,13 +2235,9 @@ public: return std::make_shared(sv.path, line.first, line.second, name.c_str(), std::string(sv.s, sv.n)); } - std::shared_ptr ast; - if (opt && sv.size() == 1) { - ast = std::make_shared(*sv[0].get>(), name.c_str()); - } else { - auto line = line_info(sv.ss, sv.s); - ast = std::make_shared(sv.path, line.first, line.second, name.c_str(), sv.transform>()); - } + auto line = line_info(sv.ss, sv.s); + auto ast = std::make_shared(sv.path, line.first, line.second, name.c_str(), sv.transform>()); + for (auto node: ast->nodes) { node->parent_node = ast; }