diff --git a/example/indent.cc b/example/indent.cc index 74c46ab..af3b614 100644 --- a/example/indent.cc +++ b/example/indent.cc @@ -32,11 +32,12 @@ Block <- Statements {} size_t indent = 0; - parser["Block"].enter = [&](const char * /*s*/, size_t /*n*/, - std::any & /*dt*/) { indent += 2; }; + parser["Block"].enter = [&](const Context & /*c*/, const char * /*s*/, + size_t /*n*/, std::any & /*dt*/) { indent += 2; }; - parser["Block"].leave = [&](const char * /*s*/, size_t /*n*/, - size_t /*matchlen*/, std::any & /*value*/, + parser["Block"].leave = [&](const Context & /*c*/, const char * /*s*/, + size_t /*n*/, size_t /*matchlen*/, + std::any & /*value*/, std::any & /*dt*/) { indent -= 2; }; parser["Samedent"].predicate = diff --git a/lint/peglint.cc b/lint/peglint.cc index c78b9b2..bea46b5 100644 --- a/lint/peglint.cc +++ b/lint/peglint.cc @@ -105,7 +105,8 @@ int main(int argc, const char **argv) { peg::parser parser; - parser.log = [&](size_t ln, size_t col, const string &msg) { + parser.log = [&](size_t ln, size_t col, const string &msg, + const string & /*rule*/) { cerr << syntax_path << ":" << ln << ":" << col << ": " << msg << endl; }; @@ -123,19 +124,16 @@ int main(int argc, const char **argv) { source_path = path_list[1]; } - parser.log = [&](size_t ln, size_t col, const string &msg) { + parser.log = [&](size_t ln, size_t col, const string &msg, + const string & /*rule*/) { cerr << source_path << ":" << ln << ":" << col << ": " << msg << endl; }; if (opt_packrat) { parser.enable_packrat_parsing(); } - if (opt_trace) { - enable_tracing(parser, std::cout); - } + if (opt_trace) { enable_tracing(parser, std::cout); } - if (opt_profile) { - enable_profiling(parser, std::cout); - } + if (opt_profile) { enable_profiling(parser, std::cout); } parser.set_verbose_trace(opt_verbose); diff --git a/peglib.h b/peglib.h index 6052c56..54cbd55 100644 --- a/peglib.h +++ b/peglib.h @@ -495,20 +495,7 @@ struct SemanticValues : protected std::vector { std::vector tags; // Line number and column at which the matched string is - std::pair line_info() const { - if (!c_) { return std::pair(1, 1); } - - auto &idx = source_line_index(); - - auto cur = static_cast(std::distance(ss, sv_.data())); - auto it = std::lower_bound( - idx.begin(), idx.end(), cur, - [](size_t element, size_t value) { return element < value; }); - - auto id = static_cast(std::distance(idx.begin(), it)); - auto off = cur - (id == 0 ? 0 : idx[id - 1] + 1); - return std::pair(id + 1, off + 1); - } + std::pair line_info() const; // Choice count size_t choice_count() const { return choice_count_; } @@ -590,8 +577,6 @@ private: friend class Holder; friend class PrecedenceClimbing; - const std::vector &source_line_index() const; - Context *c_ = nullptr; std::string_view sv_; size_t choice_count_ = 0; @@ -665,7 +650,8 @@ inline bool fail(size_t len) { return len == static_cast(-1); } /* * Log */ -using Log = std::function; +using Log = std::function; /* * ErrorInfo @@ -677,7 +663,8 @@ struct ErrorInfo { std::vector> expected_tokens; const char *message_pos = nullptr; std::string message; - mutable const char *last_output_pos = nullptr; + std::string label; + mutable const char *last_output_pos = nullptr; // TODO: protect... bool keep_previous_token = false; void clear() { @@ -755,7 +742,6 @@ public: const char *path; const char *s; const size_t l; - std::vector source_line_index; ErrorInfo error_info; bool recovered = false; @@ -933,9 +919,30 @@ public: const SemanticValues &vs, std::any &dt, size_t len); bool is_traceable(const Ope &ope) const; - mutable size_t next_trace_id = 0; - mutable std::vector trace_ids; + // Line info + std::pair line_info(const char *cur) const { + if (source_line_index.empty()) { + for (size_t pos = 0; pos < l; pos++) { + if (s[pos] == '\n') { source_line_index.push_back(pos); } + } + source_line_index.push_back(l); + } + + auto pos = static_cast(std::distance(s, cur)); + + auto it = std::lower_bound( + source_line_index.begin(), source_line_index.end(), pos, + [](size_t element, size_t value) { return element < value; }); + + auto id = static_cast(std::distance(source_line_index.begin(), it)); + auto off = pos - (id == 0 ? 0 : source_line_index[id - 1] + 1); + return std::pair(id + 1, off + 1); + } + + size_t next_trace_id = 0; + std::vector trace_ids; bool ignore_trace_state = false; + mutable std::vector source_line_index; // TODO: protect... }; /* @@ -1402,7 +1409,7 @@ public: std::shared_ptr ope_; Definition *outer_; - mutable std::string trace_name_; + mutable std::string trace_name_; // TODO: protect... friend class Definition; }; @@ -2326,9 +2333,10 @@ public: size_t id = 0; Action action; - std::function enter; - std::function + std::function + enter; + std::function leave; bool ignoreSemanticValue = false; std::shared_ptr whitespaceOpe; @@ -2489,15 +2497,9 @@ inline size_t parse_literal(const char *s, size_t n, SemanticValues &vs, return i; } -inline const std::vector &SemanticValues::source_line_index() const { +inline std::pair SemanticValues::line_info() const { assert(c_); - if (c_->source_line_index.empty()) { - for (size_t pos = 0; pos < c_->l; pos++) { - if (c_->s[pos] == '\n') { c_->source_line_index.push_back(pos); } - } - c_->source_line_index.push_back(c_->l); - } - return c_->source_line_index; + return c_->line_info(sv_.data()); } inline void ErrorInfo::output_log(const Log &log, const char *s, @@ -2519,7 +2521,7 @@ inline void ErrorInfo::output_log(const Log &log, const char *s, } else { msg = message; } - log(line.first, line.second, msg); + log(line.first, line.second, msg, label); } } else if (error_pos) { if (error_pos > last_output_pos) { @@ -2562,8 +2564,7 @@ inline void ErrorInfo::output_log(const Log &log, const char *s, } msg += "."; } - - log(line.first, line.second, msg); + log(line.first, line.second, msg, label); } } } @@ -2697,11 +2698,11 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs, std::any val; c.packrat(s, outer_->id, len, val, [&](std::any &a_val) { - if (outer_->enter) { outer_->enter(s, n, dt); } + if (outer_->enter) { outer_->enter(c, s, n, dt); } auto &chvs = c.push_semantic_values_scope(); auto se = scope_exit([&]() { c.pop_semantic_values_scope(); - if (outer_->leave) { outer_->leave(s, n, len, a_val, dt); } + if (outer_->leave) { outer_->leave(c, s, n, len, a_val, dt); } }); c.rule_stack.push_back(outer_); @@ -2723,6 +2724,7 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs, if (c.log && !msg.empty() && c.error_info.message_pos < s) { c.error_info.message_pos = s; c.error_info.message = msg; + c.error_info.label = outer_->name; } len = static_cast(-1); } @@ -2733,6 +2735,7 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs, if (c.log && !msg.empty() && c.error_info.message_pos < s) { c.error_info.message_pos = s; c.error_info.message = msg; + c.error_info.label = outer_->name; } } } else { @@ -2740,6 +2743,7 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs, c.error_info.message_pos < s) { c.error_info.message_pos = s; c.error_info.message = outer_->error_message; + c.error_info.label = outer_->name; } } }); @@ -2935,6 +2939,7 @@ inline size_t Recovery::parse_core(const char *s, size_t n, if (!label->rule_->error_message.empty()) { c.error_info.message_pos = s; c.error_info.message = label->rule_->error_message; + c.error_info.label = label->rule_->name; } } } @@ -3487,7 +3492,8 @@ private: } }; - g["Definition"].enter = [](const char * /*s*/, size_t /*n*/, std::any &dt) { + g["Definition"].enter = [](const Context & /*c*/, const char * /*s*/, + size_t /*n*/, std::any &dt) { auto &data = *std::any_cast(dt); data.captures_in_current_definition.clear(); }; @@ -3723,13 +3729,14 @@ private: return vs.token_to_number(); }; - g["CapScope"].enter = [](const char * /*s*/, size_t /*n*/, std::any &dt) { + g["CapScope"].enter = [](const Context & /*c*/, const char * /*s*/, + size_t /*n*/, std::any &dt) { auto &data = *std::any_cast(dt); data.captures_stack.emplace_back(); }; - g["CapScope"].leave = [](const char * /*s*/, size_t /*n*/, - size_t /*matchlen*/, std::any & /*value*/, - std::any &dt) { + g["CapScope"].leave = [](const Context & /*c*/, const char * /*s*/, + size_t /*n*/, size_t /*matchlen*/, + std::any & /*value*/, std::any &dt) { auto &data = *std::any_cast(dt); data.captures_stack.pop_back(); }; @@ -3849,7 +3856,8 @@ private: auto line = line_info(s, rule.s_); log(line.first, line.second, "'precedence' instruction cannot be applied to '" + rule.name + - "'."); + "'.", + ""); } return false; } @@ -3861,7 +3869,8 @@ private: auto line = line_info(s, rule.s_); log(line.first, line.second, "'precedence' instruction cannot be applied to '" + rule.name + - "'."); + "'.", + ""); } return false; } @@ -3895,10 +3904,11 @@ private: if (log) { if (r.error_info.message_pos) { auto line = line_info(s, r.error_info.message_pos); - log(line.first, line.second, r.error_info.message); + log(line.first, line.second, r.error_info.message, + r.error_info.label); } else { auto line = line_info(s, r.error_info.error_pos); - log(line.first, line.second, "syntax error"); + log(line.first, line.second, "syntax error", r.error_info.label); } } return nullptr; @@ -3928,7 +3938,7 @@ private: if (log) { auto line = line_info(s, ptr); log(line.first, line.second, - "The definition '" + name + "' is already defined."); + "The definition '" + name + "' is already defined.", ""); } } ret = false; @@ -3940,7 +3950,7 @@ private: if (log) { auto line = line_info(s, ptr); log(line.first, line.second, - "The instruction '" + type + "' is already defined."); + "The instruction '" + type + "' is already defined.", ""); } } ret = false; @@ -3952,7 +3962,7 @@ private: if (log) { auto line = line_info(s, ptr); log(line.first, line.second, - "The back reference '" + name + "' is undefined."); + "The back reference '" + name + "' is undefined.", ""); } } ret = false; @@ -3967,8 +3977,8 @@ private: if (log) { auto line = line_info(s, start_rule.s_); log(line.first, line.second, - "Ignore operator cannot be applied to '" + start_rule.name + - "'."); + "Ignore operator cannot be applied to '" + start_rule.name + "'.", + ""); } ret = false; } @@ -3991,7 +4001,7 @@ private: for (const auto &[name, ptr] : vis.error_s) { if (log) { auto line = line_info(s, ptr); - log(line.first, line.second, vis.error_message[name]); + log(line.first, line.second, vis.error_message[name], ""); } ret = false; } @@ -4002,7 +4012,7 @@ private: if (log) { auto line = line_info(s, rule.s_); auto msg = "'" + name + "' is not referenced."; - log(line.first, line.second, msg); + log(line.first, line.second, msg, ""); } } } @@ -4025,7 +4035,7 @@ private: if (vis.error_s) { if (log) { auto line = line_info(s, vis.error_s); - log(line.first, line.second, "'" + name + "' is left recursive."); + log(line.first, line.second, "'" + name + "' is left recursive.", ""); } ret = false; } @@ -4095,7 +4105,7 @@ private: if (log) { auto line = line_info(s, vis.error_s); log(line.first, line.second, - "infinite loop is detected in '" + vis.error_name + "'."); + "infinite loop is detected in '" + vis.error_name + "'.", ""); } return true; } diff --git a/test/test1.cc b/test/test1.cc index 1e3d4bd..69e8e01 100644 --- a/test/test1.cc +++ b/test/test1.cc @@ -214,12 +214,13 @@ TEST(GeneralTest, enter_leave_handlers_test) { TOKEN <- [A-Za-z]+ )"); - parser["LTOKEN"].enter = [&](const char *, size_t, std::any &dt) { + parser["LTOKEN"].enter = [&](const Context & /*c*/, const char *, size_t, + std::any &dt) { auto &require_upper_case = *std::any_cast(dt); require_upper_case = false; }; - parser["LTOKEN"].leave = [&](const char *, size_t, size_t, std::any &, - std::any &dt) { + parser["LTOKEN"].leave = [&](const Context & /*c*/, const char *, size_t, + size_t, std::any &, std::any &dt) { auto &require_upper_case = *std::any_cast(dt); require_upper_case = true; }; @@ -246,7 +247,8 @@ TEST(GeneralTest, enter_leave_handlers_test) { EXPECT_TRUE(parser.parse("hello=WORLD", dt)); EXPECT_TRUE(parser.parse("HELLO=WORLD", dt)); - parser.log = [&](size_t ln, size_t col, const std::string &msg) { + parser.log = [&](size_t ln, size_t col, const std::string &msg, + const std::string & /*rule*/) { EXPECT_EQ(1, ln); EXPECT_EQ(7, col); EXPECT_EQ(message, msg); @@ -1054,7 +1056,7 @@ TEST(GeneralTest, HeuristicErrorTokenTest) { untyped_enum <- '' { message "invalid/missing enum type, expected one of 'sequence' or 'bitmask', got '%t'"} )"); - parser.log = [&](size_t ln, size_t col, const std::string &msg) { + parser.log = [&](size_t ln, size_t col, const std::string &msg, const std::string & /*rule*/) { EXPECT_EQ(1, ln); EXPECT_EQ(6, col); EXPECT_EQ("invalid/missing enum type, expected one of 'sequence' or " diff --git a/test/test2.cc b/test/test2.cc index 33a9cc5..6c2c5bc 100644 --- a/test/test2.cc +++ b/test/test2.cc @@ -853,7 +853,8 @@ TEST(PredicateTest, Semantic_predicate_test) { EXPECT_TRUE(parser.parse("100", val)); EXPECT_EQ(100, val); - parser.log = [](size_t line, size_t col, const std::string &msg) { + parser.log = [](size_t line, size_t col, const std::string &msg, + const std::string & /*rule*/) { EXPECT_EQ(1, line); EXPECT_EQ(1, col); EXPECT_EQ("value error!!", msg); @@ -878,7 +879,8 @@ is_symbol <- Name { check_symbol var_table } ref aaa ref bbb )"; - parser.log = [](size_t line, size_t col, const std::string &msg) { + parser.log = [](size_t line, size_t col, const std::string &msg, + const std::string & /*rule*/) { EXPECT_EQ(3, line); EXPECT_EQ(5, col); EXPECT_EQ("'bbb' doesn't exist.", msg); @@ -891,7 +893,8 @@ ref bbb ref aaa decl aaa )"; - parser.log = [](size_t line, size_t col, const std::string &msg) { + parser.log = [](size_t line, size_t col, const std::string &msg, + const std::string & /*rule*/) { EXPECT_EQ(3, line); EXPECT_EQ(6, col); EXPECT_EQ("'aaa' already exists.", msg); @@ -963,7 +966,8 @@ typedef __off64_t __loff_t; typedef long __off64_t; typedef __off64_T __loff_t; )"; - parser.log = [](size_t line, size_t col, const std::string &msg) { + parser.log = [](size_t line, size_t col, const std::string &msg, + const std::string & /*rule*/) { EXPECT_EQ(3, line); EXPECT_EQ(9, col); EXPECT_EQ("'__off64_T' doesn't exist.", msg); @@ -977,7 +981,8 @@ typedef long __off64_t; typedef __off64_t __loff_t; typedef __off64_t __loff_t; )"; - parser.log = [](size_t line, size_t col, const std::string &msg) { + parser.log = [](size_t line, size_t col, const std::string &msg, + const std::string & /*rule*/) { EXPECT_EQ(4, line); EXPECT_EQ(19, col); EXPECT_EQ("'__loff_t' already exists.", msg); @@ -1029,7 +1034,8 @@ is_symbol <- < Name > ref aaa ref bbb )"; - parser.log = [](size_t line, size_t col, const std::string &msg) { + parser.log = [](size_t line, size_t col, const std::string &msg, + const std::string & /*rule*/) { EXPECT_EQ(3, line); EXPECT_EQ(5, col); EXPECT_EQ("'bbb' doesn't exist.", msg); @@ -1044,7 +1050,8 @@ ref bbb ref aaa decl aaa )"; - parser.log = [](size_t line, size_t col, const std::string &msg) { + parser.log = [](size_t line, size_t col, const std::string &msg, + const std::string & /*rule*/) { std::cerr << line << ":" << col << ": " << msg << "\n"; EXPECT_EQ(3, line); EXPECT_EQ(6, col); @@ -1445,7 +1452,8 @@ TEST(ErrorTest, Default_error_handling_1) { }; size_t i = 0; - pg.log = [&](size_t ln, size_t col, const std::string &msg) { + pg.log = [&](size_t ln, size_t col, const std::string &msg, + const std::string & /*rule*/) { std::stringstream ss; ss << ln << ":" << col << ": " << msg; EXPECT_EQ(errors[i++], ss.str()); @@ -1471,7 +1479,8 @@ TEST(ErrorTest, Default_error_handling_2) { }; size_t i = 0; - pg.log = [&](size_t ln, size_t col, const std::string &msg) { + pg.log = [&](size_t ln, size_t col, const std::string &msg, + const std::string & /*rule*/) { std::stringstream ss; ss << ln << ":" << col << ": " << msg; EXPECT_EQ(errors[i++], ss.str()); @@ -1515,7 +1524,8 @@ TEST(ErrorTest, Default_error_handling_fiblang) { }; size_t i = 0; - pg.log = [&](size_t ln, size_t col, const std::string &msg) { + pg.log = [&](size_t ln, size_t col, const std::string &msg, + const std::string & /*rule*/) { std::stringstream ss; ss << ln << ":" << col << ": " << msg; EXPECT_EQ(errors[i++], ss.str()); @@ -1572,7 +1582,8 @@ entry <- (!(__ / HEADER) .)+ { error_message "invalid entry." } }; size_t i = 0; - pg.log = [&](size_t ln, size_t col, const std::string &msg) { + pg.log = [&](size_t ln, size_t col, const std::string &msg, + const std::string & /*rule*/) { std::stringstream ss; ss << ln << ":" << col << ": " << msg; EXPECT_EQ(errors[i++], ss.str()); @@ -1684,7 +1695,8 @@ TEST(ErrorTest, Error_recovery_2) { }; size_t i = 0; - pg.log = [&](size_t ln, size_t col, const std::string &msg) { + pg.log = [&](size_t ln, size_t col, const std::string &msg, + const std::string & /*rule*/) { std::stringstream ss; ss << ln << ":" << col << ": " << msg; EXPECT_EQ(errors[i++], ss.str()); @@ -1771,7 +1783,8 @@ skip_puncs <- [|=]* _ }; size_t i = 0; - pg.log = [&](size_t ln, size_t col, const std::string &msg) { + pg.log = [&](size_t ln, size_t col, const std::string &msg, + const std::string & /*rule*/) { std::stringstream ss; ss << ln << ":" << col << ": " << msg; EXPECT_EQ(errors[i++], ss.str()); @@ -1987,7 +2000,8 @@ SkipToRCUR ← (!RCUR (LCUR SkipToRCUR / .))* RCUR }; size_t i = 0; - pg.log = [&](size_t ln, size_t col, const std::string &msg) { + pg.log = [&](size_t ln, size_t col, const std::string &msg, + const std::string & /*rule*/) { std::stringstream ss; ss << ln << ":" << col << ": " << msg; EXPECT_EQ(errors[i++], ss.str()); @@ -2075,7 +2089,8 @@ STR <- < [a-z0-9]+ > }; size_t i = 0; - pg.log = [&](size_t ln, size_t col, const std::string &msg) { + pg.log = [&](size_t ln, size_t col, const std::string &msg, + const std::string & /*rule*/) { std::stringstream ss; ss << ln << ":" << col << ": " << msg; EXPECT_EQ(errors[i++], ss.str()); @@ -2087,3 +2102,138 @@ STR <- < [a-z0-9]+ > )")); EXPECT_EQ(i, errors.size()); } + +TEST(StateTest, Indent) { + parser pg(R"(Start <- Statements {} +Statements <- Statement* +Statement <- Samedent (S / I) + +S <- 'S' EOS { no_ast_opt } +I <- 'I' EOL Block / 'I' EOS { no_ast_opt } + +Block <- Statements {} + +~Samedent <- ' '* {} + +~EOS <- EOL / EOF +~EOL <- '\n' +~EOF <- !. + )"); + + EXPECT_TRUE(!!pg); + + size_t indent = 0; + + pg["Block"].enter = [&](const Context & /*c*/, const char * /*s*/, + size_t /*n*/, std::any & /*dt*/) { indent += 2; }; + + pg["Block"].leave = [&](const Context & /*c*/, const char * /*s*/, + size_t /*n*/, size_t /*matchlen*/, + std::any & /*value*/, + std::any & /*dt*/) { indent -= 2; }; + + pg["Samedent"].predicate = [&](const SemanticValues &vs, + const std::any & /*dt*/, std::string &msg) { + if (indent != vs.sv().size()) { + msg = "different indent..."; + return false; + } + return true; + }; + + pg.enable_ast(); + + const auto source = R"(I + S + I + I + S + S + S + S +)"; + + std::shared_ptr ast; + + EXPECT_TRUE(pg.parse(source, ast)); + + ast = pg.optimize_ast(ast); + auto s = ast_to_s(ast); + + EXPECT_EQ(R"(+ Start/0[I] + + Block/0[Statements] + + Statement/0[S] + + Statement/0[I] + + Block/0[Statements] + + Statement/0[I] + + Block/0[Statements] + + Statement/0[S] + + Statement/0[S] + + Statement/0[S] + + Statement/0[S] +)", s); +} + +TEST(StateTest, NestedBlocks) { + parser pg(R"( +program <- (~NL / expr)* + +~BLOCK_COMMENT <- '/*' ('/'+[^*/]+ / BLOCK_COMMENT / '*'+[^*/]+ / [^*/] )* ('*/'^unterminated_comment) +~LINE_COMMENT <- '//' [^\n]* +~NOISE <- [ \f\r\t] / BLOCK_COMMENT + +NL <- NOISE* LINE_COMMENT? '\n' + +# error recovery +unterminated_comment <- '' { error_message "unterminated block comment" } + +expr <- 'hello' + )"); + + EXPECT_TRUE(!!pg); + + std::vector> locations; + + pg["BLOCK_COMMENT"].enter = [&](const Context &c, const char *s, size_t /*n*/, + std::any & /*dt*/) { + locations.push_back(c.line_info(s)); + }; + + pg["BLOCK_COMMENT"].leave = [&](const Context & /*c*/, const char * /*s*/, + size_t /*n*/, size_t /*matchlen*/, + std::any & /*value*/, + std::any & /*dt*/) { locations.pop_back(); }; + + std::vector errors{ + R"(7:1: unterminated block comment)", + }; + + size_t i = 0; + pg.log = [&](size_t ln, size_t col, const std::string &msg, + const std::string & /*rule*/) { + std::stringstream ss; + ss << ln << ":" << col << ": " << msg; + EXPECT_EQ(errors[i++], ss.str()); + + EXPECT_EQ(4, locations.size()); + EXPECT_EQ(1, locations[0].first); + EXPECT_EQ(1, locations[0].second); + EXPECT_EQ(2, locations[1].first); + EXPECT_EQ(2, locations[1].second); + EXPECT_EQ(3, locations[2].first); + EXPECT_EQ(3, locations[2].second); + EXPECT_EQ(4, locations[3].first); + EXPECT_EQ(4, locations[3].second); + }; + + EXPECT_FALSE(pg.parse(R"(/* line 1:1 is the first comment open + /* line 2:2 is the second + /* line 3:3 and so on + /* line 4:4 + /* line 5:5 +*/ +)")); + + EXPECT_EQ(i, errors.size()); +} +