|
|
@ -604,6 +604,7 @@ public: |
|
|
|
std::vector<size_t> source_line_index; |
|
|
|
std::vector<size_t> source_line_index; |
|
|
|
|
|
|
|
|
|
|
|
const char *error_pos = nullptr; |
|
|
|
const char *error_pos = nullptr; |
|
|
|
|
|
|
|
std::vector<const char *> expected_tokens; |
|
|
|
const char *message_pos = nullptr; |
|
|
|
const char *message_pos = nullptr; |
|
|
|
std::string message; // TODO: should be `int`.
|
|
|
|
std::string message; // TODO: should be `int`.
|
|
|
|
|
|
|
|
|
|
|
@ -751,9 +752,7 @@ public: |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void set_error_pos(const char *a_s) { |
|
|
|
void set_error_pos(const char *a_s, const char *literal = nullptr); |
|
|
|
if (error_pos < a_s) error_pos = a_s; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void trace_enter(const char *name, const char *a_s, size_t n, |
|
|
|
void trace_enter(const char *name, const char *a_s, size_t n, |
|
|
|
SemanticValues &vs, std::any &dt) const; |
|
|
|
SemanticValues &vs, std::any &dt) const; |
|
|
@ -1601,6 +1600,22 @@ private: |
|
|
|
bool has_rule_ = false; |
|
|
|
bool has_rule_ = false; |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct FindLiteralToken : public Ope::Visitor { |
|
|
|
|
|
|
|
void visit(LiteralString &ope) override { token_ = ope.lit_.c_str(); } |
|
|
|
|
|
|
|
void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); } |
|
|
|
|
|
|
|
void visit(Ignore &ope) override { ope.ope_->accept(*this); } |
|
|
|
|
|
|
|
void visit(Reference &ope) override; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static const char *token(Ope &ope) { |
|
|
|
|
|
|
|
FindLiteralToken vis; |
|
|
|
|
|
|
|
ope.accept(vis); |
|
|
|
|
|
|
|
return vis.token_; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private: |
|
|
|
|
|
|
|
const char *token_ = nullptr; |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
struct DetectLeftRecursion : public Ope::Visitor { |
|
|
|
struct DetectLeftRecursion : public Ope::Visitor { |
|
|
|
DetectLeftRecursion(const std::string &name) : name_(name) {} |
|
|
|
DetectLeftRecursion(const std::string &name) : name_(name) {} |
|
|
|
|
|
|
|
|
|
|
@ -1946,6 +1961,7 @@ public: |
|
|
|
bool ret; |
|
|
|
bool ret; |
|
|
|
size_t len; |
|
|
|
size_t len; |
|
|
|
const char *error_pos; |
|
|
|
const char *error_pos; |
|
|
|
|
|
|
|
const std::vector<const char *> expected_tokens; |
|
|
|
const char *message_pos; |
|
|
|
const char *message_pos; |
|
|
|
const std::string message; |
|
|
|
const std::string message; |
|
|
|
}; |
|
|
|
}; |
|
|
@ -2100,13 +2116,9 @@ private: |
|
|
|
enablePackratParsing, tracer_enter, tracer_leave); |
|
|
|
enablePackratParsing, tracer_enter, tracer_leave); |
|
|
|
|
|
|
|
|
|
|
|
auto len = ope->parse(s, n, vs, cxt, dt); |
|
|
|
auto len = ope->parse(s, n, vs, cxt, dt); |
|
|
|
if (success(len)) { |
|
|
|
return Result{success(len), len, |
|
|
|
cxt.error_pos = nullptr; |
|
|
|
cxt.error_pos, cxt.expected_tokens, |
|
|
|
cxt.message_pos = nullptr; |
|
|
|
cxt.message_pos, cxt.message}; |
|
|
|
cxt.message.clear(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
return Result{success(len), len, cxt.error_pos, cxt.message_pos, |
|
|
|
|
|
|
|
cxt.message}; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
std::shared_ptr<Holder> holder_; |
|
|
|
std::shared_ptr<Holder> holder_; |
|
|
@ -2129,7 +2141,7 @@ inline size_t parse_literal(const char *s, size_t n, SemanticValues &vs, |
|
|
|
for (; i < lit.size(); i++) { |
|
|
|
for (; i < lit.size(); i++) { |
|
|
|
if (i >= n || (ignore_case ? (std::tolower(s[i]) != std::tolower(lit[i])) |
|
|
|
if (i >= n || (ignore_case ? (std::tolower(s[i]) != std::tolower(lit[i])) |
|
|
|
: (s[i] != lit[i]))) { |
|
|
|
: (s[i] != lit[i]))) { |
|
|
|
c.set_error_pos(s); |
|
|
|
c.set_error_pos(s, lit.c_str()); |
|
|
|
return static_cast<size_t>(-1); |
|
|
|
return static_cast<size_t>(-1); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
@ -2167,6 +2179,25 @@ inline size_t parse_literal(const char *s, size_t n, SemanticValues &vs, |
|
|
|
return i; |
|
|
|
return i; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inline void Context::set_error_pos(const char *a_s, const char *literal) { |
|
|
|
|
|
|
|
if (error_pos <= a_s) { |
|
|
|
|
|
|
|
if (error_pos < a_s) { |
|
|
|
|
|
|
|
error_pos = a_s; |
|
|
|
|
|
|
|
expected_tokens.clear(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (literal) { |
|
|
|
|
|
|
|
expected_tokens.push_back(literal); |
|
|
|
|
|
|
|
} else if (!rule_stack.empty()) { |
|
|
|
|
|
|
|
auto rule = rule_stack.back(); |
|
|
|
|
|
|
|
auto ope = rule->get_core_operator(); |
|
|
|
|
|
|
|
if (auto token = FindLiteralToken::token(*ope); |
|
|
|
|
|
|
|
token && token[0] != '\0') { |
|
|
|
|
|
|
|
expected_tokens.push_back(token); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
inline void Context::trace_enter(const char *name, const char *a_s, size_t n, |
|
|
|
inline void Context::trace_enter(const char *name, const char *a_s, size_t n, |
|
|
|
SemanticValues &vs, std::any &dt) const { |
|
|
|
SemanticValues &vs, std::any &dt) const { |
|
|
|
trace_ids.push_back(next_trace_id++); |
|
|
|
trace_ids.push_back(next_trace_id++); |
|
|
@ -2528,6 +2559,15 @@ inline void TokenChecker::visit(Reference &ope) { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inline void FindLiteralToken::visit(Reference &ope) { |
|
|
|
|
|
|
|
if (ope.is_macro_) { |
|
|
|
|
|
|
|
ope.rule_->accept(*this); |
|
|
|
|
|
|
|
for (auto arg : ope.args_) { |
|
|
|
|
|
|
|
arg->accept(*this); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
inline void DetectLeftRecursion::visit(Reference &ope) { |
|
|
|
inline void DetectLeftRecursion::visit(Reference &ope) { |
|
|
|
if (ope.name_ == name_) { |
|
|
|
if (ope.name_ == name_) { |
|
|
|
error_s = ope.s_; |
|
|
|
error_s = ope.s_; |
|
|
@ -3764,19 +3804,61 @@ public: |
|
|
|
private: |
|
|
|
private: |
|
|
|
void output_log(const char *s, size_t n, const Definition::Result &r) const { |
|
|
|
void output_log(const char *s, size_t n, const Definition::Result &r) const { |
|
|
|
if (log) { |
|
|
|
if (log) { |
|
|
|
if (!r.ret) { |
|
|
|
if (!r.ret || r.len != n) { |
|
|
|
if (r.message_pos) { |
|
|
|
if (r.message_pos) { |
|
|
|
auto line = line_info(s, r.message_pos); |
|
|
|
auto line = line_info(s, r.message_pos); |
|
|
|
log(line.first, line.second, r.message); |
|
|
|
log(line.first, line.second, r.message); |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
auto line = line_info(s, r.error_pos); |
|
|
|
auto line = line_info(s, r.error_pos); |
|
|
|
log(line.first, line.second, "syntax error"); |
|
|
|
|
|
|
|
|
|
|
|
std::string message; |
|
|
|
|
|
|
|
if (r.expected_tokens.empty()) { |
|
|
|
|
|
|
|
message = "syntax error."; |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
message = "syntax error"; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (auto token = heuristic_error_token(s, n, r.error_pos); |
|
|
|
|
|
|
|
!token.empty()) { |
|
|
|
|
|
|
|
message += ", unexpected '"; |
|
|
|
|
|
|
|
message += token; |
|
|
|
|
|
|
|
message += "'"; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
size_t i = 0; |
|
|
|
|
|
|
|
while (i < r.expected_tokens.size()) { |
|
|
|
|
|
|
|
message += (i == 0 ? ", expecting '" : ", '"); |
|
|
|
|
|
|
|
message += r.expected_tokens[r.expected_tokens.size() - i - 1]; |
|
|
|
|
|
|
|
message += "'"; |
|
|
|
|
|
|
|
i++; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
message += "."; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
log(line.first, line.second, message); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::string heuristic_error_token(const char *s, size_t n, |
|
|
|
|
|
|
|
const char *error_pos) const { |
|
|
|
|
|
|
|
auto len = n - std::distance(s, error_pos); |
|
|
|
|
|
|
|
if (len) { |
|
|
|
|
|
|
|
size_t i = 0; |
|
|
|
|
|
|
|
int c = error_pos[i++]; |
|
|
|
|
|
|
|
if (std::ispunct(c)) { |
|
|
|
|
|
|
|
while (i < len && std::ispunct(error_pos[i])) { |
|
|
|
|
|
|
|
i++; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
while (i < len && !std::ispunct(error_pos[i]) && |
|
|
|
|
|
|
|
!std::isspace(error_pos[i])) { |
|
|
|
|
|
|
|
i++; |
|
|
|
} |
|
|
|
} |
|
|
|
} else if (r.len != n) { |
|
|
|
|
|
|
|
auto line = line_info(s, s + r.len); |
|
|
|
|
|
|
|
log(line.first, line.second, "syntax error"); |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return std::string(error_pos, std::min<size_t>(i, 8)); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return std::string(); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
std::shared_ptr<Grammar> grammar_; |
|
|
|
std::shared_ptr<Grammar> grammar_; |
|
|
|