1
0
mirror of https://github.com/yhirose/cpp-peglib.git synced 2025-01-10 17:45:30 +00:00

Added left recursice detection.

This commit is contained in:
yhirose 2015-06-13 00:38:38 -04:00
parent 4dc334e481
commit 67de659288
2 changed files with 184 additions and 17 deletions

161
peglib.h
View File

@ -18,6 +18,7 @@
#include <map> #include <map>
#include <memory> #include <memory>
#include <mutex> #include <mutex>
#include <set>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
@ -973,9 +974,10 @@ class DefinitionReference : public Ope
{ {
public: public:
DefinitionReference( DefinitionReference(
const std::unordered_map<std::string, Definition>& grammar, const std::string& name) const std::unordered_map<std::string, Definition>& grammar, const std::string& name, const char* s)
: grammar_(grammar) : grammar_(grammar)
, name_(name) {} , name_(name)
, s_(s) {}
size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override; size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override;
@ -983,9 +985,10 @@ public:
std::shared_ptr<Ope> get_rule() const; std::shared_ptr<Ope> get_rule() const;
private: //private:
const std::unordered_map<std::string, Definition>& grammar_; const std::unordered_map<std::string, Definition>& grammar_;
const std::string name_; const std::string name_;
const char* s_;
mutable std::once_flag init_; mutable std::once_flag init_;
mutable std::shared_ptr<Ope> rule_; mutable std::shared_ptr<Ope> rule_;
}; };
@ -1018,13 +1021,13 @@ struct Ope::Visitor
struct AssignIDToDefinition : public Ope::Visitor struct AssignIDToDefinition : public Ope::Visitor
{ {
void visit(Sequence& ope) override { void visit(Sequence& ope) override {
for (auto op: ope.opes_) { for (auto ope: ope.opes_) {
op->accept(*this); ope->accept(*this);
} }
} }
void visit(PrioritizedChoice& ope) override { void visit(PrioritizedChoice& ope) override {
for (auto op: ope.opes_) { for (auto ope: ope.opes_) {
op->accept(*this); ope->accept(*this);
} }
} }
void visit(ZeroOrMore& ope) override { ope.ope_->accept(*this); } void visit(ZeroOrMore& ope) override { ope.ope_->accept(*this); }
@ -1047,13 +1050,13 @@ struct IsToken : public Ope::Visitor
IsToken() : has_anchor(false), has_rule(false) {} IsToken() : has_anchor(false), has_rule(false) {}
void visit(Sequence& ope) override { void visit(Sequence& ope) override {
for (auto op: ope.opes_) { for (auto ope: ope.opes_) {
op->accept(*this); ope->accept(*this);
} }
} }
void visit(PrioritizedChoice& ope) override { void visit(PrioritizedChoice& ope) override {
for (auto op: ope.opes_) { for (auto ope: ope.opes_) {
op->accept(*this); ope->accept(*this);
} }
} }
void visit(ZeroOrMore& ope) override { ope.ope_->accept(*this); } void visit(ZeroOrMore& ope) override { ope.ope_->accept(*this); }
@ -1438,8 +1441,8 @@ inline std::shared_ptr<Ope> usr(std::function<size_t (const char* s, size_t n, S
return std::make_shared<User>(fn); return std::make_shared<User>(fn);
} }
inline std::shared_ptr<Ope> ref(const std::unordered_map<std::string, Definition>& grammar, const std::string& name) { inline std::shared_ptr<Ope> ref(const std::unordered_map<std::string, Definition>& grammar, const std::string& name, const char* s) {
return std::make_shared<DefinitionReference>(grammar, name); return std::make_shared<DefinitionReference>(grammar, name, s);
} }
/*----------------------------------------------------------------------------- /*-----------------------------------------------------------------------------
@ -1523,6 +1526,100 @@ private:
{} {}
}; };
struct DetectLeftRecursion : public Ope::Visitor {
DetectLeftRecursion(const std::string& name)
: s_(nullptr), name_(name), done_(false) {}
void visit(Sequence& ope) override {
for (auto ope: ope.opes_) {
ope->accept(*this);
if (done_) {
break;
} else if (s_) {
done_ = true;
break;
}
}
}
void visit(PrioritizedChoice& ope) override {
for (auto ope: ope.opes_) {
ope->accept(*this);
if (s_) {
done_ = true;
break;
}
}
}
void visit(ZeroOrMore& ope) override {
ope.ope_->accept(*this);
done_ = false;
}
void visit(OneOrMore& ope) override {
ope.ope_->accept(*this);
done_ = true;
}
void visit(Option& ope) override {
ope.ope_->accept(*this);
done_ = false;
}
void visit(AndPredicate& ope) override {
ope.ope_->accept(*this);
done_ = false;
}
void visit(NotPredicate& ope) override {
ope.ope_->accept(*this);
done_ = false;
}
void visit(LiteralString& ope) override {
done_ = !ope.lit_.empty();
}
void visit(CharacterClass& ope) override {
done_ = true;
}
void visit(Character& ope) override {
done_ = true;
}
void visit(AnyCharacter& ope) override {
done_ = true;
}
void visit(Capture& ope) override {
ope.ope_->accept(*this);
}
void visit(Anchor& ope) override {
ope.ope_->accept(*this);
}
void visit(Ignore& ope) override {
ope.ope_->accept(*this);
}
void visit(User& ope) override {
done_ = true;
}
void visit(WeakHolder& ope) override {
ope.weak_.lock()->accept(*this);
}
void visit(Holder& ope) override {
ope.ope_->accept(*this);
}
void visit(DefinitionReference& ope) override {
if (ope.name_ == name_) {
s_ = ope.s_;
} else if (refs_.find(ope.name_) != refs_.end()) {
;
} else {
refs_.insert(ope.name_);
ope.get_rule()->accept(*this);
}
done_ = true;
}
const char* s_;
private:
std::string name_;
std::set<std::string> refs_;
bool done_;
};
void make_grammar() { void make_grammar() {
// Setup PEG syntax parser // Setup PEG syntax parser
g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]); g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]);
@ -1679,12 +1776,15 @@ private:
auto baseId = ignore ? 1 : 0; auto baseId = ignore ? 1 : 0;
const auto& ident = sv[baseId].val.get<std::string>(); const auto& ident = sv[baseId].val.get<std::string>();
if (data.references.find(ident) == data.references.end()) {
data.references[ident] = sv.s; // for error handling data.references[ident] = sv.s; // for error handling
}
if (ignore) { if (ignore) {
return ign(ref(*data.grammar, ident)); return ign(ref(*data.grammar, ident, sv.s));
} else { } else {
return ref(*data.grammar, ident); return ref(*data.grammar, ident, sv.s);
} }
}, },
// (Expression) // (Expression)
@ -1769,6 +1869,8 @@ private:
} }
// Check missing definitions // Check missing definitions
bool ret = true;
for (const auto& x : data.references) { for (const auto& x : data.references) {
const auto& name = x.first; const auto& name = x.first;
auto ptr = x.second; auto ptr = x.second;
@ -1777,10 +1879,37 @@ private:
auto line = line_info(s, ptr); auto line = line_info(s, ptr);
log(line.first, line.second, "'" + name + "' is not defined."); log(line.first, line.second, "'" + name + "' is not defined.");
} }
return nullptr; ret = false;
} }
} }
if (!ret) {
return nullptr;
}
// Check left recursion
ret = true;
for (auto& x: grammar) {
const auto& name = x.first;
auto& rule = x.second;
DetectLeftRecursion lr(name);
rule.accept(lr);
if (lr.s_) {
if (log) {
auto line = line_info(s, lr.s_);
log(line.first, line.second, "'" + name + "' is left recursive.");
}
ret = false;;
}
}
if (!ret) {
return nullptr;
}
// Set root definition
start = data.start; start = data.start;
return data.grammar; return data.grammar;

View File

@ -565,6 +565,44 @@ TEST_CASE("Ignore semantic value of 'and' predicate test", "[general]")
REQUIRE(ast->nodes[0]->name == "HELLO_WORLD"); REQUIRE(ast->nodes[0]->name == "HELLO_WORLD");
} }
TEST_CASE("Left recursive test", "[left recursive]")
{
peg parser(
" A <- A 'a'"
" B <- A 'a'"
);
REQUIRE(parser == false);
}
TEST_CASE("Left recursive with option test", "[left recursive]")
{
peg parser(
" A <- 'a' / 'b'? B 'c' "
" B <- A "
);
REQUIRE(parser == false);
}
TEST_CASE("Left recursive with zom test", "[left recursive]")
{
peg parser(
" A <- 'a'* A* "
);
REQUIRE(parser == false);
}
TEST_CASE("Left recursive with empty string test", "[left recursive]")
{
peg parser(
" A <- '' A"
);
REQUIRE(parser == false);
}
bool exact(Grammar& g, const char* d, const char* s) { bool exact(Grammar& g, const char* d, const char* s) {
auto n = strlen(s); auto n = strlen(s);
auto r = g[d].parse(s, n); auto r = g[d].parse(s, n);