This commit is contained in:
yhirose 2024-09-03 10:30:33 -04:00 committed by GitHub
parent 2b022992a4
commit 79eb37c851
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 146 additions and 63 deletions

View File

@ -647,6 +647,29 @@ custom_message.txt:1:8: code format error...
NOTE: If there are more than one elements with error message instruction in a prioritized choice, this feature may not work as you expect.
Change the Start Definition Rule
--------------------------------
We can change the start definition rule as below.
```cpp
auto grammar = R"(
Start <- A
A <- B (',' B)*
B <- '[one]' / '[two]'
%whitespace <- [ \t\n]*
)";
peg::parser parser(grammar, "A"); // Start Rule is "A"
or
peg::parser parser;
parser.load_grammar(grammar, "A"); // Start Rule is "A"
parser.parse(" [one] , [two] "); // OK
```
peglint - PEG syntax lint utility
---------------------------------

View File

@ -20,6 +20,7 @@
<li><span>Source Code</span></li>
<li class="editor-options">
<ul class="editor-header-options">
<li class="option"><label>Start Rule: </label><input id="start-rule" type="text"></li>
<li class="option"><input id="packrat" type="checkbox"><label>Packrat</label></li>
<li class="option"><input id="auto-refresh" type="checkbox"><label>Auto Refresh</label></li>
<li class="option"><button id="parse" class="parse">Parse</button></li>

View File

@ -27,6 +27,7 @@ const codeAstOptimized = setupInfoArea("code-ast-optimized");
const codeProfile = setupInfoArea("code-profile");
$('#opt-mode').val(localStorage.getItem('optimizationMode') || 'all');
$('#start-rule').val(localStorage.getItem('startRule') || '');
$('#packrat').prop('checked', localStorage.getItem('packrat') === 'true');
$('#auto-refresh').prop('checked', localStorage.getItem('autoRefresh') === 'true');
$('#parse').prop('disabled', $('#auto-refresh').prop('checked'));
@ -61,6 +62,7 @@ function updateLocalStorage() {
localStorage.setItem('grammarText', grammar.getValue());
localStorage.setItem('codeText', code.getValue());
localStorage.setItem('optimizationMode', $('#opt-mode').val());
localStorage.setItem('startRule', $('#start-rule').val());
localStorage.setItem('packrat', $('#packrat').prop('checked'));
localStorage.setItem('autoRefresh', $('#auto-refresh').prop('checked'));
}
@ -75,6 +77,7 @@ function parse() {
const codeText = code.getValue();
const optimizationMode = $('#opt-mode').val();
const startRule = $('#start-rule').val();
const packrat = $('#packrat').prop('checked');
$grammarInfo.html('');
@ -97,7 +100,7 @@ function parse() {
'background-color': 'rgba(0, 0, 0, 0.1)'
});
window.setTimeout(() => {
const data = JSON.parse(Module.lint(grammarText, codeText, mode, packrat));
const data = JSON.parse(Module.lint(grammarText, codeText, mode, packrat, startRule));
$('#overlay').css({
'z-index': '-1',
'display': 'none',
@ -165,6 +168,7 @@ $('#code-info').on('click', 'li', makeOnClickInInfo(code));
// Event handing in the AST optimization
$('#opt-mode').on('change', setupTimer);
$('#start-rule').on('keydown', setupTimer);
$('#packrat').on('change', setupTimer);
$('#auto-refresh').on('change', () => {
updateLocalStorage();

View File

@ -21,7 +21,8 @@ std::string escape_json(const std::string &s) {
std::function<void(size_t, size_t, const std::string &, const std::string &)>
makeJSONFormatter(peg::parser &peg, std::string &json, bool &init) {
init = true;
return [&](size_t ln, size_t col, const std::string &msg, const std::string &rule) mutable {
return [&](size_t ln, size_t col, const std::string &msg,
const std::string &rule) mutable {
if (!init) { json += ","; }
json += "{";
json += R"("ln":)" + std::to_string(ln) + ",";
@ -43,11 +44,11 @@ makeJSONFormatter(peg::parser &peg, std::string &json, bool &init) {
}
bool parse_grammar(const std::string &text, peg::parser &peg,
std::string &json) {
const std::string &startRule, std::string &json) {
bool init;
peg.set_logger(makeJSONFormatter(peg, json, init));
json += "[";
auto ret = peg.load_grammar(text.data(), text.size());
auto ret = peg.load_grammar(text.data(), text.size(), startRule);
json += "]";
return ret;
}
@ -64,7 +65,7 @@ bool parse_code(const std::string &text, peg::parser &peg, std::string &json,
}
std::string lint(const std::string &grammarText, const std::string &codeText,
bool opt_mode, bool packrat) {
bool opt_mode, bool packrat, const std::string &startRule) {
std::string grammarResult;
std::string codeResult;
std::string astResult;
@ -72,7 +73,8 @@ std::string lint(const std::string &grammarText, const std::string &codeText,
std::string profileResult;
peg::parser peg;
auto is_grammar_valid = parse_grammar(grammarText, peg, grammarResult);
auto is_grammar_valid =
parse_grammar(grammarText, peg, startRule, grammarResult);
auto is_source_valid = false;
if (is_grammar_valid && peg) {

Binary file not shown.

117
peglib.h
View File

@ -3298,18 +3298,15 @@ using Rules = std::unordered_map<std::string, std::shared_ptr<Ope>>;
class ParserGenerator {
public:
static std::shared_ptr<Grammar> parse(const char *s, size_t n,
const Rules &rules, std::string &start,
bool &enablePackratParsing, Log log) {
return get_instance().perform_core(s, n, rules, start, enablePackratParsing,
log);
}
struct ParserContext {
std::shared_ptr<Grammar> grammar;
std::string start;
bool enablePackratParsing = false;
};
static std::shared_ptr<Grammar> parse(const char *s, size_t n,
std::string &start,
bool &enablePackratParsing, Log log) {
Rules dummy;
return parse(s, n, dummy, start, enablePackratParsing, log);
static ParserContext parse(const char *s, size_t n, const Rules &rules,
Log log, std::string_view start) {
return get_instance().perform_core(s, n, rules, log, std::string(start));
}
// For debugging purpose
@ -3989,9 +3986,8 @@ private:
return true;
}
std::shared_ptr<Grammar> perform_core(const char *s, size_t n,
const Rules &rules, std::string &start,
bool &enablePackratParsing, Log log) {
ParserContext perform_core(const char *s, size_t n, const Rules &rules,
Log log, std::string requested_start) {
Data data;
auto &grammar = *data.grammar;
@ -4023,7 +4019,7 @@ private:
log(line.first, line.second, "syntax error", r.error_info.label);
}
}
return nullptr;
return {};
}
// User provided rules
@ -4081,7 +4077,25 @@ private:
}
// Set root definition
auto &start_rule = grammar[data.start];
auto start = data.start;
if (!requested_start.empty()) {
if (grammar.count(requested_start)) {
start = requested_start;
} else {
if (log) {
auto line = line_info(s, s);
log(line.first, line.second,
"The specified start rule '" + requested_start + "' is undefined.",
"");
}
ret = false;
}
}
if (!ret) { return {}; }
auto &start_rule = grammar[start];
// Check if the start rule has ignore operator
{
@ -4096,7 +4110,7 @@ private:
}
}
if (!ret) { return nullptr; }
if (!ret) { return {}; }
// Check missing definitions
auto referenced = std::unordered_set<std::string>{
@ -4129,7 +4143,7 @@ private:
}
}
if (!ret) { return nullptr; }
if (!ret) { return {}; }
// Link references
for (auto &x : grammar) {
@ -4153,10 +4167,10 @@ private:
}
}
if (!ret) { return nullptr; }
if (!ret) { return {}; }
// Check infinite loop
if (detect_infiniteLoop(data, start_rule, log, s)) { return nullptr; }
if (detect_infiniteLoop(data, start_rule, log, s)) { return {}; }
// Automatic whitespace skipping
if (grammar.count(WHITESPACE_DEFINITION_NAME)) {
@ -4169,7 +4183,7 @@ private:
auto &rule = grammar[WHITESPACE_DEFINITION_NAME];
start_rule.whitespaceOpe = wsp(rule.get_core_operator());
if (detect_infiniteLoop(data, rule, log, s)) { return nullptr; }
if (detect_infiniteLoop(data, rule, log, s)) { return {}; }
}
// Word expression
@ -4177,7 +4191,7 @@ private:
auto &rule = grammar[WORD_DEFINITION_NAME];
start_rule.wordOpe = rule.get_core_operator();
if (detect_infiniteLoop(data, rule, log, s)) { return nullptr; }
if (detect_infiniteLoop(data, rule, log, s)) { return {}; }
}
// Apply instructions
@ -4189,9 +4203,7 @@ private:
const auto &info =
std::any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data);
if (!apply_precedence_instruction(rule, info, s, log)) {
return nullptr;
}
if (!apply_precedence_instruction(rule, info, s, log)) { return {}; }
} else if (instruction.type == "error_message") {
rule.error_message = std::any_cast<std::string>(instruction.data);
} else if (instruction.type == "no_ast_opt") {
@ -4200,11 +4212,7 @@ private:
}
}
// Set root definition
start = data.start;
enablePackratParsing = data.enablePackratParsing;
return data.grammar;
return {data.grammar, start, data.enablePackratParsing};
}
bool detect_infiniteLoop(const Data &data, Definition &rule, const Log &log,
@ -4530,43 +4538,52 @@ class parser {
public:
parser() = default;
parser(const char *s, size_t n, const Rules &rules) {
load_grammar(s, n, rules);
parser(const char *s, size_t n, const Rules &rules,
std::string_view start = {}) {
load_grammar(s, n, rules, start);
}
parser(const char *s, size_t n) : parser(s, n, Rules()) {}
parser(const char *s, size_t n, std::string_view start = {})
: parser(s, n, Rules(), start) {}
parser(std::string_view sv, const Rules &rules)
: parser(sv.data(), sv.size(), rules) {}
parser(std::string_view sv, const Rules &rules, std::string_view start = {})
: parser(sv.data(), sv.size(), rules, start) {}
parser(std::string_view sv) : parser(sv.data(), sv.size(), Rules()) {}
parser(std::string_view sv, std::string_view start = {})
: parser(sv.data(), sv.size(), Rules(), start) {}
#if defined(__cpp_lib_char8_t)
parser(std::u8string_view sv, const Rules &rules)
: parser(reinterpret_cast<const char *>(sv.data()), sv.size(), rules) {}
parser(std::u8string_view sv, const Rules &rules, std::string_view start = {})
: parser(reinterpret_cast<const char *>(sv.data()), sv.size(), rules,
start) {}
parser(std::u8string_view sv)
: parser(reinterpret_cast<const char *>(sv.data()), sv.size(), Rules()) {}
parser(std::u8string_view sv, std::string_view start = {})
: parser(reinterpret_cast<const char *>(sv.data()), sv.size(), Rules(),
start) {}
#endif
operator bool() { return grammar_ != nullptr; }
bool load_grammar(const char *s, size_t n, const Rules &rules) {
grammar_ = ParserGenerator::parse(s, n, rules, start_,
enablePackratParsing_, log_);
bool load_grammar(const char *s, size_t n, const Rules &rules,
std::string_view start = {}) {
auto cxt = ParserGenerator::parse(s, n, rules, log_, start);
grammar_ = cxt.grammar;
start_ = cxt.start;
enablePackratParsing_ = cxt.enablePackratParsing;
return grammar_ != nullptr;
}
bool load_grammar(const char *s, size_t n) {
return load_grammar(s, n, Rules());
bool load_grammar(const char *s, size_t n, std::string_view start = {}) {
return load_grammar(s, n, Rules(), start);
}
bool load_grammar(std::string_view sv, const Rules &rules) {
return load_grammar(sv.data(), sv.size(), rules);
bool load_grammar(std::string_view sv, const Rules &rules,
std::string_view start = {}) {
return load_grammar(sv.data(), sv.size(), rules, start);
}
bool load_grammar(std::string_view sv) {
return load_grammar(sv.data(), sv.size());
bool load_grammar(std::string_view sv, std::string_view start = {}) {
return load_grammar(sv.data(), sv.size(), start);
}
bool parse_n(const char *s, size_t n, const char *path = nullptr) const {
@ -4671,7 +4688,7 @@ public:
void enable_packrat_parsing() {
if (grammar_ != nullptr) {
auto &rule = (*grammar_)[start_];
rule.enablePackratParsing = enablePackratParsing_ && true;
rule.enablePackratParsing = enablePackratParsing_;
}
}

View File

@ -423,7 +423,9 @@ TEST(GeneralTest, Skip_token_test2) {
}
TEST(GeneralTest, Custom_AST_test) {
struct CustomType { bool dummy = false; };
struct CustomType {
bool dummy = false;
};
using CustomAst = AstBase<CustomType>;
parser parser(R"(
@ -646,11 +648,8 @@ TEST(GeneralTest, Calculator_test2) {
NUMBER <- [0-9]+
)";
std::string start;
bool enablePackratParsing = false;
auto grammar = ParserGenerator::parse(syntax, strlen(syntax), start,
enablePackratParsing, nullptr);
auto &g = *grammar;
auto cxt = ParserGenerator::parse(syntax, strlen(syntax), {}, nullptr, {});
auto &g = *cxt.grammar;
// Setup actions
auto reduce = [](const SemanticValues &vs) -> long {
@ -679,7 +678,7 @@ TEST(GeneralTest, Calculator_test2) {
// Parse
long val;
auto r = g[start].parse_and_get_value("1+2*3*(4-5+6)/7-8", val);
auto r = g[cxt.start].parse_and_get_value("1+2*3*(4-5+6)/7-8", val);
EXPECT_TRUE(r.ret);
EXPECT_EQ(-3, val);
@ -1285,3 +1284,40 @@ TEST(GeneralTest, PassingContextAndOutputParameter) {
parser.parse<int>("42", dt, output);
EXPECT_EQ(42, output);
}
TEST(GeneralTest, SpecifyStartRule) {
auto grammar = R"(
Start <- A
A <- B (',' B)*
B <- '[one]' / '[two]'
%whitespace <- [ \t\n]*
)";
{
parser peg(grammar, "AAA");
EXPECT_FALSE(peg);
}
{
parser peg(grammar, "A");
EXPECT_TRUE(peg.parse(" [one] , [two] "));
}
{
parser peg(grammar);
EXPECT_TRUE(peg.parse(" [one] , [two] "));
peg.load_grammar(grammar, "A");
EXPECT_TRUE(peg.parse(" [one] , [two] "));
}
{
parser peg;
peg.load_grammar(grammar);
EXPECT_TRUE(peg.parse(" [one] , [two] "));
peg.load_grammar(grammar, "A");
EXPECT_TRUE(peg.parse(" [one] , [two] "));
}
}