diff --git a/example/pl0.cc b/example/pl0.cc index f58c7ea..56be19f 100644 --- a/example/pl0.cc +++ b/example/pl0.cc @@ -13,6 +13,9 @@ using namespace peglib; using namespace std; +/* + * PEG Grammar + */ auto grammar = R"( program <- _ block '.' _ @@ -50,17 +53,15 @@ auto grammar = R"( ~_ <- [ \t\r\n]* )"; +/* + * Utilities + */ string format_error_message(const string& path, size_t ln, size_t col, const string& msg) { stringstream ss; ss << path << ":" << ln << ":" << col << ": " << msg << endl; return ss.str(); } -template -bool has_key(const T& cont, const U& key) { - return cont.find(key) != cont.end(); -} - bool read_file(const char* path, vector& buff) { ifstream ifs(path, ios::in | ios::binary); @@ -74,54 +75,202 @@ bool read_file(const char* path, vector& buff) return true; } -struct Environment -{ - Environment(shared_ptr outer = nullptr) - : outer_(outer) {} +/* + * Ast + */ +struct Scope; - bool has_value(const string& ident) const { - return has_constant(ident) || has_variable(ident); +struct Annotation +{ + shared_ptr scope; +}; + +typedef AstBase AstPL0; + +/* + * Symbol Table + */ +struct Symbol { + int value; + bool is_constant; +}; + +struct Scope +{ + Scope(shared_ptr outer = nullptr) : outer(outer) {} + + bool has_symbol(const string& ident) const { + auto it = symbols.find(ident); + if (it != symbols.end()) { + return true; + } + return outer ? outer->has_symbol(ident) : false; + } + + bool has_constant(const string& ident) const { + auto it = symbols.find(ident); + if (it != symbols.end() && it->second.is_constant) { + return true; + } + return outer ? outer->has_constant(ident) : false; } bool has_variable(const string& ident) const { - return has_key(variables, ident) ? true : (outer_ ? outer_->has_variable(ident) : false); + auto it = symbols.find(ident); + if (it != symbols.end() && !it->second.is_constant) { + return true; + } + return outer ? outer->has_variable(ident) : false; } - int get_value(const string& ident) const { - return has_constant(ident) ? get_constant(ident) : get_variable(ident); + bool has_procedure(const string& ident) const { + auto it = procedures.find(ident); + if (it != procedures.end()) { + return true; + } + return outer ? outer->has_procedure(ident) : false; } - void set_variable(const string& ident, int val) { - if (has_key(variables, ident)) { - variables[ident] = val; - } else { - outer_->set_variable(ident, val); + shared_ptr get_procedure(const string& ident) const { + auto it = procedures.find(ident); + if (it != procedures.end()) { + return it->second; + } + return outer->get_procedure(ident); + } + + map symbols; + map> procedures; + +private: + shared_ptr outer; +}; + +struct SymbolTable +{ + static void build(const shared_ptr ast, shared_ptr scope = nullptr) { + switch (ast->tag) { + case "block"_: visit_block(ast, scope); break; + case "assignment"_: visit_assignment(ast, scope); break; + case "call"_: visit_call(ast, scope); break; + case "ident"_: visit_ident(ast, scope); break; + default: for (auto node: ast->nodes) { build(node, scope); } break; } } - map constants; - map variables; - map> procedures; - private: - bool has_constant(const string& ident) const { - return has_key(constants, ident) ? true : (outer_ ? outer_->has_constant(ident) : false); + static void visit_block(const shared_ptr ast, shared_ptr outer) { + // block <- const var procedure statement + auto scope = make_shared(outer); + const auto& nodes = ast->nodes; + visit_constants(nodes[0], scope); + visit_variables(nodes[1], scope); + visit_procedures(nodes[2], scope); + build(nodes[3], scope); + ast->scope = scope; } - int get_constant(const string& ident) const { - return has_key(constants, ident) ? constants.at(ident) : outer_->get_constant(ident); + static void visit_constants(const shared_ptr ast, shared_ptr scope) { + // const <- ('CONST' _ ident '=' _ number(',' _ ident '=' _ number)* ';' _) ? + const auto& nodes = ast->nodes; + for (auto i = 0u; i < nodes.size(); i += 2) { + const auto& ident = nodes[i + 0]->token; + auto number = stoi(nodes[i + 1]->token); + scope->symbols.emplace(ident, Symbol{ number, true }); + } } - int get_variable(const string& ident) const { - return has_key(variables, ident) ? variables.at(ident) : outer_->get_variable(ident); + static void visit_variables(const shared_ptr ast, shared_ptr scope) { + // var <- ('VAR' _ ident(',' _ ident)* ';' _) ? + const auto& nodes = ast->nodes; + for (auto i = 0u; i < nodes.size(); i += 1) { + const auto& ident = nodes[i]->token; + scope->symbols.emplace(ident, Symbol{ 0, false }); + } } - shared_ptr outer_; + static void visit_procedures(const shared_ptr ast, shared_ptr scope) { + // procedure <- ('PROCEDURE' _ ident ';' _ block ';' _)* + const auto& nodes = ast->nodes; + for (auto i = 0u; i < nodes.size(); i += 2) { + const auto& ident = nodes[i + 0]->token; + auto block = nodes[i + 1]; + scope->procedures[ident] = block; + build(block, scope); + } + } + + static void visit_assignment(const shared_ptr ast, shared_ptr scope) { + // assignment <- ident ':=' _ expression + const auto& ident = ast->nodes[0]->token; + if (!scope->has_variable(ident)) { + string msg = "undefined variable '" + ident + "'..."; + string s = format_error_message(ast->path, ast->line, ast->column, msg); + throw runtime_error(s); + } + } + + static void visit_call(const shared_ptr ast, shared_ptr scope) { + // call <- 'CALL' _ ident + const auto& ident = ast->nodes[0]->token; + if (!scope->has_procedure(ident)) { + string msg = "undefined procedure '" + ident + "'..."; + string s = format_error_message(ast->path, ast->line, ast->column, msg); + throw runtime_error(s); + } + } + + static void visit_ident(const shared_ptr ast, shared_ptr scope) { + const auto& ident = ast->token; + if (!scope->has_symbol(ident)) { + string msg = "undefined variable '" + ident + "'..."; + string s = format_error_message(ast->path, ast->line, ast->column, msg); + throw runtime_error(s); + } + } }; +/* + * Environment + */ +struct Environment +{ + Environment(shared_ptr scope = nullptr, shared_ptr outer = nullptr) : scope(scope), outer(outer) { + if (scope) { + symbols = scope->symbols; + } + } + + int get_value(const string& ident) const { + auto it = symbols.find(ident); + if (it != symbols.end()) { + return it->second.value; + } + return outer->get_value(ident); + } + + void set_variable(const string& ident, int val) { + auto it = symbols.find(ident); + if (it != symbols.end() && !it->second.is_constant) { + symbols[ident].value = val; + } else { + outer->set_variable(ident, val); + } + } + + shared_ptr scope; + +private: + map symbols; + shared_ptr outer; +}; + +/* + * Interpreter + */ struct Interpreter { - void exec(const shared_ptr ast, shared_ptr env) { + static void exec(const shared_ptr ast, shared_ptr env) { switch (ast->tag) { case "block"_: exec_block(ast, env); break; case "statement"_: exec_statement(ast, env); break; @@ -137,80 +286,42 @@ struct Interpreter } private: - void exec_block(const shared_ptr ast, shared_ptr outer) { + static void exec_block(const shared_ptr ast, shared_ptr outer) { // block <- const var procedure statement - auto env = make_shared(outer); - const auto& nodes = ast->nodes; - exec_constants(nodes[0], env); - exec_variables(nodes[1], env); - exec_procedures(nodes[2], env); - exec(nodes[3], env); + auto env = make_shared(ast->scope, outer); + exec(ast->nodes[3], env); } - void exec_constants(const shared_ptr ast, shared_ptr env) { - // const <- ('CONST' _ ident '=' _ number(',' _ ident '=' _ number)* ';' _) ? - const auto& nodes = ast->nodes; - for (auto i = 0u; i < nodes.size(); i += 2) { - const auto& ident = nodes[i + 0]->token; - auto number = stoi(nodes[i + 1]->token); - env->constants[ident] = number; - } - } - - void exec_variables(const shared_ptr ast, shared_ptr env) { - // var <- ('VAR' _ ident(',' _ ident)* ';' _) ? - const auto& nodes = ast->nodes; - for (auto i = 0u; i < nodes.size(); i += 1) { - const auto& ident = nodes[i]->token; - env->variables[ident] = 0; - } - } - - void exec_procedures(const shared_ptr ast, shared_ptr env) { - // procedure <- ('PROCEDURE' _ ident ';' _ block ';' _)* - const auto& nodes = ast->nodes; - for (auto i = 0u; i < nodes.size(); i += 2) { - const auto& ident = nodes[i + 0]->token; - auto block = nodes[i + 1]; - env->procedures[ident] = block; - } - } - - void exec_statement(const shared_ptr ast, shared_ptr env) { + static void exec_statement(const shared_ptr ast, shared_ptr env) { // statement <-(assignment / call / statements / if / while / out / in) ? if (!ast->nodes.empty()) { exec(ast->nodes[0], env); } } - void exec_assignment(const shared_ptr ast, shared_ptr env) { + static void exec_assignment(const shared_ptr ast, shared_ptr env) { // assignment <- ident ':=' _ expression const auto& ident = ast->nodes[0]->token; - if (!env->has_variable(ident)) { - string msg = "undefined variable '" + ident + "'..."; - string s = format_error_message(ast->path, ast->line, ast->column, msg); - throw runtime_error(s); - } auto expr = ast->nodes[1]; auto val = eval(expr, env); env->set_variable(ident, val); } - void exec_call(const shared_ptr ast, shared_ptr env) { + static void exec_call(const shared_ptr ast, shared_ptr env) { // call <- 'CALL' _ ident const auto& ident = ast->nodes[0]->token; - auto proc = env->procedures[ident]; + auto proc = env->scope->get_procedure(ident); exec_block(proc, env); } - void exec_statements(const shared_ptr ast, shared_ptr env) { + static void exec_statements(const shared_ptr ast, shared_ptr env) { // statements <- 'BEGIN' _ statement (';' _ statement )* 'END' _ for (auto stmt: ast->nodes) { exec(stmt, env); } } - void exec_if(const shared_ptr ast, shared_ptr env) { + static void exec_if(const shared_ptr ast, shared_ptr env) { // if <- 'IF' _ condition 'THEN' _ statement auto cond = eval_condition(ast->nodes[0], env); if (cond) { @@ -219,7 +330,7 @@ private: } } - void exec_while(const shared_ptr ast, shared_ptr env) { + static void exec_while(const shared_ptr ast, shared_ptr env) { // while <- 'WHILE' _ condition 'DO' _ statement auto cond = ast->nodes[0]; auto stmt = ast->nodes[1]; @@ -230,37 +341,37 @@ private: } } - void exec_out(const shared_ptr ast, shared_ptr env) { + static void exec_out(const shared_ptr ast, shared_ptr env) { // out <- '!' _ expression auto val = eval(ast->nodes[0], env); cout << val << endl; } - void exec_in(const shared_ptr ast, shared_ptr env) { + static void exec_in(const shared_ptr ast, shared_ptr env) { // in <- '?' _ ident int val; cin >> val; const auto& ident = ast->nodes[0]->token; - env->variables[ident] = val; + env->set_variable(ident, val); } - bool eval_condition(const shared_ptr ast, shared_ptr env) { + static bool eval_condition(const shared_ptr ast, shared_ptr env) { // condition <- odd / compare const auto& node = ast->nodes[0]; switch (node->tag) { case "odd"_: return eval_odd(node, env); case "compare"_: return eval_compare(node, env); - default: throw logic_error("invalid Ast type"); + default: throw logic_error("invalid AstPL0 type"); } } - bool eval_odd(const shared_ptr ast, shared_ptr env) { + static bool eval_odd(const shared_ptr ast, shared_ptr env) { // odd <- 'ODD' _ expression auto val = eval_expression(ast->nodes[0], env); return val != 0; } - bool eval_compare(const shared_ptr ast, shared_ptr env) { + static bool eval_compare(const shared_ptr ast, shared_ptr env) { // compare <- expression compare_op expression auto lval = eval_expression(ast->nodes[0], env); auto op = peglib::str2tag(ast->nodes[1]->token.c_str()); @@ -276,7 +387,7 @@ private: } } - int eval(const shared_ptr ast, shared_ptr env) { + static int eval(const shared_ptr ast, shared_ptr env) { switch (ast->tag) { case "expression"_: return eval_expression(ast, env); case "term"_: return eval_term(ast, env); @@ -286,7 +397,7 @@ private: } } - int eval_expression(const shared_ptr ast, shared_ptr env) { + static int eval_expression(const shared_ptr ast, shared_ptr env) { // expression <- sign term (term_op term)* auto sign = ast->nodes[0]->token; auto sign_val = (sign.empty() || sign == "+") ? 1 : -1; @@ -303,7 +414,7 @@ private: return val; } - int eval_term(const shared_ptr ast, shared_ptr env) { + static int eval_term(const shared_ptr ast, shared_ptr env) { // term <- factor (factor_op factor)* auto val = eval(ast->nodes[0], env); const auto& nodes = ast->nodes; @@ -326,20 +437,19 @@ private: return val; } - int eval_ident(const shared_ptr ast, shared_ptr env) { + static int eval_ident(const shared_ptr ast, shared_ptr env) { const auto& ident = ast->token; - if (!env->has_value(ident)) { - string msg = "undefined variable '" + ident + "'..."; - throw runtime_error(format_error_message(ast->path, ast->line, ast->column, msg)); - } return env->get_value(ident); } - int eval_number(const shared_ptr ast, shared_ptr env) { + static int eval_number(const shared_ptr ast, shared_ptr env) { return stol(ast->token); } }; +/* + * Main + */ int main(int argc, const char** argv) { if (argc < 2) { @@ -357,23 +467,21 @@ int main(int argc, const char** argv) // Setup a PEG parser peg parser(grammar); - parser.enable_ast(); + parser.enable_ast(); parser.log = [&](size_t ln, size_t col, const string& err_msg) { cerr << format_error_message(path, ln, col, err_msg) << endl; }; // Parse the source and make an AST - shared_ptr ast; + shared_ptr ast; if (parser.parse_n(source.data(), source.size(), ast, path)) { if (argc > 2 && string("--ast") == argv[2]) { ast->print(); } - // Run the AST try { - Interpreter interp; - auto env = make_shared(); - interp.exec(ast, env); + SymbolTable::build(ast); + Interpreter::exec(ast, make_shared()); } catch (const runtime_error& e) { cerr << e.what() << endl; } diff --git a/peglib.h b/peglib.h index 714a7f9..8d07fba 100644 --- a/peglib.h +++ b/peglib.h @@ -1955,54 +1955,78 @@ inline constexpr unsigned int operator "" _(const char* s, size_t) { } #endif -struct Ast +template +struct AstBase : public MixedIn { - Ast(const char* path, size_t line, size_t column, const char* name, const std::vector>& nodes) - : path(path ? path : ""), line(line), column(column), name(name), original_name(name), is_token(false), nodes(nodes) + AstBase(const char* path, size_t line, size_t column, const char* name, const std::vector>& nodes) + : path(path ? path : "") + , line(line) + , column(column) + , name(name) + , original_name(name) #ifndef PEGLIB_NO_CONSTEXPR_SUPPORT - , tag(str2tag(name)), original_tag(tag) + , tag(str2tag(name)) + , original_tag(tag) #endif + , is_token(false) + , nodes(nodes) {} - Ast(const char* path, size_t line, size_t column, const char* name, const std::string& token) - : path(path ? path : ""), line(line), column(column), name(name), original_name(name), is_token(true), token(token) + AstBase(const char* path, size_t line, size_t column, const char* name, const std::string& token) + : path(path ? path : "") + , line(line) + , column(column) + , name(name) + , original_name(name) #ifndef PEGLIB_NO_CONSTEXPR_SUPPORT - , tag(str2tag(name)), original_tag(tag) + , tag(str2tag(name)) + , original_tag(tag) #endif + , is_token(true) + , token(token) {} - Ast(const Ast& ast, const char* original_name) - : path(ast.path), line(ast.line), column(ast.column), name(ast.name), original_name(original_name) - , is_token(ast.is_token), token(ast.token), nodes(ast.nodes) + AstBase(const AstBase& ast, const char* original_name) + : path(ast.path) + , line(ast.line) + , column(ast.column) + , name(ast.name) + , original_name(original_name) #ifndef PEGLIB_NO_CONSTEXPR_SUPPORT - , tag(ast.tag), original_tag(str2tag(original_name)) + , tag(ast.tag) + , original_tag(str2tag(original_name)) #endif + , is_token(ast.is_token) + , token(ast.token) + , nodes(ast.nodes) {} - const Ast& get_smallest_ancestor() const; - void print() const; const std::string path; const size_t line; const size_t column; + const std::string name; const std::string original_name; - const bool is_token; - const std::string token; - std::vector> nodes; - std::shared_ptr parent_node; #ifndef PEGLIB_NO_CONSTEXPR_SUPPORT const unsigned int tag; const unsigned int original_tag; #endif + + const bool is_token; + const std::string token; + + std::vector>> nodes; + std::shared_ptr> parent_node; }; -struct AstPrint +template +struct AstPrintBase { - AstPrint() : level_(-1) {} + AstPrintBase() : level_(-1) {} - void print(const Ast& ast) { + void print(const AstBase& ast) { level_ += 1; for (auto i = 0; i < level_; i++) { std::cout << " "; } if (ast.is_token) { @@ -2015,7 +2039,7 @@ struct AstPrint } private: - std::string name(const Ast& ast) { + std::string name(const AstBase& ast) { if (ast.name == ast.original_name) { return ast.name; } else { @@ -2026,35 +2050,29 @@ private: int level_; }; -inline const Ast& Ast::get_smallest_ancestor() const { - assert(nodes.size() <= 1); - if (nodes.empty()) { - return *this; - } - return nodes[0]->get_smallest_ancestor(); +template +inline void AstBase::print() const { + AstPrintBase().print(*this); } -inline void Ast::print() const { - AstPrint().print(*this); -} - -struct AstOptimizer +template +struct AstOptimizerBase { - AstOptimizer(bool optimize_nodes, const std::vector& filters = {}) + AstOptimizerBase(bool optimize_nodes, const std::vector& filters = {}) : optimize_nodes_(optimize_nodes) , filters_(filters) {} - std::shared_ptr optimize(std::shared_ptr original, std::shared_ptr parent = nullptr) { + std::shared_ptr> optimize(std::shared_ptr> original, std::shared_ptr> parent = nullptr) { auto found = std::find(filters_.begin(), filters_.end(), original->name) != filters_.end(); bool opt = optimize_nodes_ ? !found : found; if (opt && original->nodes.size() == 1) { auto child = optimize(original->nodes[0], parent); - return std::make_shared(*child, original->name.c_str()); + return std::make_shared>(*child, original->name.c_str()); } - auto ast = std::make_shared(*original); + auto ast = std::make_shared>(*original); ast->parent_node = parent; ast->nodes.clear(); for (auto node : original->nodes) { @@ -2069,6 +2087,11 @@ private: const std::vector filters_; }; +struct EmptyType {}; +typedef AstBase Ast; +typedef AstPrintBase AstPrint; +typedef AstOptimizerBase AstOptimizer; + /*----------------------------------------------------------------------------- * peg *---------------------------------------------------------------------------*/ @@ -2222,6 +2245,7 @@ public: } } + template peg& enable_ast() { for (auto& x: *grammar_) { const auto& name = x.first; @@ -2232,11 +2256,11 @@ public: rule.action = [=](const SemanticValues& sv) { if (is_token) { auto line = line_info(sv.ss, sv.s); - return std::make_shared(sv.path, line.first, line.second, name.c_str(), std::string(sv.s, sv.n)); + return std::make_shared(sv.path, line.first, line.second, name.c_str(), std::string(sv.s, sv.n)); } auto line = line_info(sv.ss, sv.s); - auto ast = std::make_shared(sv.path, line.first, line.second, name.c_str(), sv.transform>()); + auto ast = std::make_shared(sv.path, line.first, line.second, name.c_str(), sv.transform>()); for (auto node: ast->nodes) { node->parent_node = ast;