This commit is contained in:
yhirose 2024-09-03 10:30:33 -04:00 committed by GitHub
parent 2b022992a4
commit 79eb37c851
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 146 additions and 63 deletions

View File

@ -647,6 +647,29 @@ custom_message.txt:1:8: code format error...
NOTE: If there are more than one elements with error message instruction in a prioritized choice, this feature may not work as you expect. NOTE: If there are more than one elements with error message instruction in a prioritized choice, this feature may not work as you expect.
Change the Start Definition Rule
--------------------------------
We can change the start definition rule as below.
```cpp
auto grammar = R"(
Start <- A
A <- B (',' B)*
B <- '[one]' / '[two]'
%whitespace <- [ \t\n]*
)";
peg::parser parser(grammar, "A"); // Start Rule is "A"
or
peg::parser parser;
parser.load_grammar(grammar, "A"); // Start Rule is "A"
parser.parse(" [one] , [two] "); // OK
```
peglint - PEG syntax lint utility peglint - PEG syntax lint utility
--------------------------------- ---------------------------------

View File

@ -20,6 +20,7 @@
<li><span>Source Code</span></li> <li><span>Source Code</span></li>
<li class="editor-options"> <li class="editor-options">
<ul class="editor-header-options"> <ul class="editor-header-options">
<li class="option"><label>Start Rule: </label><input id="start-rule" type="text"></li>
<li class="option"><input id="packrat" type="checkbox"><label>Packrat</label></li> <li class="option"><input id="packrat" type="checkbox"><label>Packrat</label></li>
<li class="option"><input id="auto-refresh" type="checkbox"><label>Auto Refresh</label></li> <li class="option"><input id="auto-refresh" type="checkbox"><label>Auto Refresh</label></li>
<li class="option"><button id="parse" class="parse">Parse</button></li> <li class="option"><button id="parse" class="parse">Parse</button></li>

View File

@ -27,6 +27,7 @@ const codeAstOptimized = setupInfoArea("code-ast-optimized");
const codeProfile = setupInfoArea("code-profile"); const codeProfile = setupInfoArea("code-profile");
$('#opt-mode').val(localStorage.getItem('optimizationMode') || 'all'); $('#opt-mode').val(localStorage.getItem('optimizationMode') || 'all');
$('#start-rule').val(localStorage.getItem('startRule') || '');
$('#packrat').prop('checked', localStorage.getItem('packrat') === 'true'); $('#packrat').prop('checked', localStorage.getItem('packrat') === 'true');
$('#auto-refresh').prop('checked', localStorage.getItem('autoRefresh') === 'true'); $('#auto-refresh').prop('checked', localStorage.getItem('autoRefresh') === 'true');
$('#parse').prop('disabled', $('#auto-refresh').prop('checked')); $('#parse').prop('disabled', $('#auto-refresh').prop('checked'));
@ -61,6 +62,7 @@ function updateLocalStorage() {
localStorage.setItem('grammarText', grammar.getValue()); localStorage.setItem('grammarText', grammar.getValue());
localStorage.setItem('codeText', code.getValue()); localStorage.setItem('codeText', code.getValue());
localStorage.setItem('optimizationMode', $('#opt-mode').val()); localStorage.setItem('optimizationMode', $('#opt-mode').val());
localStorage.setItem('startRule', $('#start-rule').val());
localStorage.setItem('packrat', $('#packrat').prop('checked')); localStorage.setItem('packrat', $('#packrat').prop('checked'));
localStorage.setItem('autoRefresh', $('#auto-refresh').prop('checked')); localStorage.setItem('autoRefresh', $('#auto-refresh').prop('checked'));
} }
@ -75,6 +77,7 @@ function parse() {
const codeText = code.getValue(); const codeText = code.getValue();
const optimizationMode = $('#opt-mode').val(); const optimizationMode = $('#opt-mode').val();
const startRule = $('#start-rule').val();
const packrat = $('#packrat').prop('checked'); const packrat = $('#packrat').prop('checked');
$grammarInfo.html(''); $grammarInfo.html('');
@ -97,7 +100,7 @@ function parse() {
'background-color': 'rgba(0, 0, 0, 0.1)' 'background-color': 'rgba(0, 0, 0, 0.1)'
}); });
window.setTimeout(() => { window.setTimeout(() => {
const data = JSON.parse(Module.lint(grammarText, codeText, mode, packrat)); const data = JSON.parse(Module.lint(grammarText, codeText, mode, packrat, startRule));
$('#overlay').css({ $('#overlay').css({
'z-index': '-1', 'z-index': '-1',
'display': 'none', 'display': 'none',
@ -165,6 +168,7 @@ $('#code-info').on('click', 'li', makeOnClickInInfo(code));
// Event handing in the AST optimization // Event handing in the AST optimization
$('#opt-mode').on('change', setupTimer); $('#opt-mode').on('change', setupTimer);
$('#start-rule').on('keydown', setupTimer);
$('#packrat').on('change', setupTimer); $('#packrat').on('change', setupTimer);
$('#auto-refresh').on('change', () => { $('#auto-refresh').on('change', () => {
updateLocalStorage(); updateLocalStorage();

View File

@ -21,7 +21,8 @@ std::string escape_json(const std::string &s) {
std::function<void(size_t, size_t, const std::string &, const std::string &)> std::function<void(size_t, size_t, const std::string &, const std::string &)>
makeJSONFormatter(peg::parser &peg, std::string &json, bool &init) { makeJSONFormatter(peg::parser &peg, std::string &json, bool &init) {
init = true; init = true;
return [&](size_t ln, size_t col, const std::string &msg, const std::string &rule) mutable { return [&](size_t ln, size_t col, const std::string &msg,
const std::string &rule) mutable {
if (!init) { json += ","; } if (!init) { json += ","; }
json += "{"; json += "{";
json += R"("ln":)" + std::to_string(ln) + ","; json += R"("ln":)" + std::to_string(ln) + ",";
@ -43,11 +44,11 @@ makeJSONFormatter(peg::parser &peg, std::string &json, bool &init) {
} }
bool parse_grammar(const std::string &text, peg::parser &peg, bool parse_grammar(const std::string &text, peg::parser &peg,
std::string &json) { const std::string &startRule, std::string &json) {
bool init; bool init;
peg.set_logger(makeJSONFormatter(peg, json, init)); peg.set_logger(makeJSONFormatter(peg, json, init));
json += "["; json += "[";
auto ret = peg.load_grammar(text.data(), text.size()); auto ret = peg.load_grammar(text.data(), text.size(), startRule);
json += "]"; json += "]";
return ret; return ret;
} }
@ -64,7 +65,7 @@ bool parse_code(const std::string &text, peg::parser &peg, std::string &json,
} }
std::string lint(const std::string &grammarText, const std::string &codeText, std::string lint(const std::string &grammarText, const std::string &codeText,
bool opt_mode, bool packrat) { bool opt_mode, bool packrat, const std::string &startRule) {
std::string grammarResult; std::string grammarResult;
std::string codeResult; std::string codeResult;
std::string astResult; std::string astResult;
@ -72,7 +73,8 @@ std::string lint(const std::string &grammarText, const std::string &codeText,
std::string profileResult; std::string profileResult;
peg::parser peg; peg::parser peg;
auto is_grammar_valid = parse_grammar(grammarText, peg, grammarResult); auto is_grammar_valid =
parse_grammar(grammarText, peg, startRule, grammarResult);
auto is_source_valid = false; auto is_source_valid = false;
if (is_grammar_valid && peg) { if (is_grammar_valid && peg) {

Binary file not shown.

117
peglib.h
View File

@ -3298,18 +3298,15 @@ using Rules = std::unordered_map<std::string, std::shared_ptr<Ope>>;
class ParserGenerator { class ParserGenerator {
public: public:
static std::shared_ptr<Grammar> parse(const char *s, size_t n, struct ParserContext {
const Rules &rules, std::string &start, std::shared_ptr<Grammar> grammar;
bool &enablePackratParsing, Log log) { std::string start;
return get_instance().perform_core(s, n, rules, start, enablePackratParsing, bool enablePackratParsing = false;
log); };
}
static std::shared_ptr<Grammar> parse(const char *s, size_t n, static ParserContext parse(const char *s, size_t n, const Rules &rules,
std::string &start, Log log, std::string_view start) {
bool &enablePackratParsing, Log log) { return get_instance().perform_core(s, n, rules, log, std::string(start));
Rules dummy;
return parse(s, n, dummy, start, enablePackratParsing, log);
} }
// For debugging purpose // For debugging purpose
@ -3989,9 +3986,8 @@ private:
return true; return true;
} }
std::shared_ptr<Grammar> perform_core(const char *s, size_t n, ParserContext perform_core(const char *s, size_t n, const Rules &rules,
const Rules &rules, std::string &start, Log log, std::string requested_start) {
bool &enablePackratParsing, Log log) {
Data data; Data data;
auto &grammar = *data.grammar; auto &grammar = *data.grammar;
@ -4023,7 +4019,7 @@ private:
log(line.first, line.second, "syntax error", r.error_info.label); log(line.first, line.second, "syntax error", r.error_info.label);
} }
} }
return nullptr; return {};
} }
// User provided rules // User provided rules
@ -4081,7 +4077,25 @@ private:
} }
// Set root definition // Set root definition
auto &start_rule = grammar[data.start]; auto start = data.start;
if (!requested_start.empty()) {
if (grammar.count(requested_start)) {
start = requested_start;
} else {
if (log) {
auto line = line_info(s, s);
log(line.first, line.second,
"The specified start rule '" + requested_start + "' is undefined.",
"");
}
ret = false;
}
}
if (!ret) { return {}; }
auto &start_rule = grammar[start];
// Check if the start rule has ignore operator // Check if the start rule has ignore operator
{ {
@ -4096,7 +4110,7 @@ private:
} }
} }
if (!ret) { return nullptr; } if (!ret) { return {}; }
// Check missing definitions // Check missing definitions
auto referenced = std::unordered_set<std::string>{ auto referenced = std::unordered_set<std::string>{
@ -4129,7 +4143,7 @@ private:
} }
} }
if (!ret) { return nullptr; } if (!ret) { return {}; }
// Link references // Link references
for (auto &x : grammar) { for (auto &x : grammar) {
@ -4153,10 +4167,10 @@ private:
} }
} }
if (!ret) { return nullptr; } if (!ret) { return {}; }
// Check infinite loop // Check infinite loop
if (detect_infiniteLoop(data, start_rule, log, s)) { return nullptr; } if (detect_infiniteLoop(data, start_rule, log, s)) { return {}; }
// Automatic whitespace skipping // Automatic whitespace skipping
if (grammar.count(WHITESPACE_DEFINITION_NAME)) { if (grammar.count(WHITESPACE_DEFINITION_NAME)) {
@ -4169,7 +4183,7 @@ private:
auto &rule = grammar[WHITESPACE_DEFINITION_NAME]; auto &rule = grammar[WHITESPACE_DEFINITION_NAME];
start_rule.whitespaceOpe = wsp(rule.get_core_operator()); start_rule.whitespaceOpe = wsp(rule.get_core_operator());
if (detect_infiniteLoop(data, rule, log, s)) { return nullptr; } if (detect_infiniteLoop(data, rule, log, s)) { return {}; }
} }
// Word expression // Word expression
@ -4177,7 +4191,7 @@ private:
auto &rule = grammar[WORD_DEFINITION_NAME]; auto &rule = grammar[WORD_DEFINITION_NAME];
start_rule.wordOpe = rule.get_core_operator(); start_rule.wordOpe = rule.get_core_operator();
if (detect_infiniteLoop(data, rule, log, s)) { return nullptr; } if (detect_infiniteLoop(data, rule, log, s)) { return {}; }
} }
// Apply instructions // Apply instructions
@ -4189,9 +4203,7 @@ private:
const auto &info = const auto &info =
std::any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data); std::any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data);
if (!apply_precedence_instruction(rule, info, s, log)) { if (!apply_precedence_instruction(rule, info, s, log)) { return {}; }
return nullptr;
}
} else if (instruction.type == "error_message") { } else if (instruction.type == "error_message") {
rule.error_message = std::any_cast<std::string>(instruction.data); rule.error_message = std::any_cast<std::string>(instruction.data);
} else if (instruction.type == "no_ast_opt") { } else if (instruction.type == "no_ast_opt") {
@ -4200,11 +4212,7 @@ private:
} }
} }
// Set root definition return {data.grammar, start, data.enablePackratParsing};
start = data.start;
enablePackratParsing = data.enablePackratParsing;
return data.grammar;
} }
bool detect_infiniteLoop(const Data &data, Definition &rule, const Log &log, bool detect_infiniteLoop(const Data &data, Definition &rule, const Log &log,
@ -4530,43 +4538,52 @@ class parser {
public: public:
parser() = default; parser() = default;
parser(const char *s, size_t n, const Rules &rules) { parser(const char *s, size_t n, const Rules &rules,
load_grammar(s, n, rules); std::string_view start = {}) {
load_grammar(s, n, rules, start);
} }
parser(const char *s, size_t n) : parser(s, n, Rules()) {} parser(const char *s, size_t n, std::string_view start = {})
: parser(s, n, Rules(), start) {}
parser(std::string_view sv, const Rules &rules) parser(std::string_view sv, const Rules &rules, std::string_view start = {})
: parser(sv.data(), sv.size(), rules) {} : parser(sv.data(), sv.size(), rules, start) {}
parser(std::string_view sv) : parser(sv.data(), sv.size(), Rules()) {} parser(std::string_view sv, std::string_view start = {})
: parser(sv.data(), sv.size(), Rules(), start) {}
#if defined(__cpp_lib_char8_t) #if defined(__cpp_lib_char8_t)
parser(std::u8string_view sv, const Rules &rules) parser(std::u8string_view sv, const Rules &rules, std::string_view start = {})
: parser(reinterpret_cast<const char *>(sv.data()), sv.size(), rules) {} : parser(reinterpret_cast<const char *>(sv.data()), sv.size(), rules,
start) {}
parser(std::u8string_view sv) parser(std::u8string_view sv, std::string_view start = {})
: parser(reinterpret_cast<const char *>(sv.data()), sv.size(), Rules()) {} : parser(reinterpret_cast<const char *>(sv.data()), sv.size(), Rules(),
start) {}
#endif #endif
operator bool() { return grammar_ != nullptr; } operator bool() { return grammar_ != nullptr; }
bool load_grammar(const char *s, size_t n, const Rules &rules) { bool load_grammar(const char *s, size_t n, const Rules &rules,
grammar_ = ParserGenerator::parse(s, n, rules, start_, std::string_view start = {}) {
enablePackratParsing_, log_); auto cxt = ParserGenerator::parse(s, n, rules, log_, start);
grammar_ = cxt.grammar;
start_ = cxt.start;
enablePackratParsing_ = cxt.enablePackratParsing;
return grammar_ != nullptr; return grammar_ != nullptr;
} }
bool load_grammar(const char *s, size_t n) { bool load_grammar(const char *s, size_t n, std::string_view start = {}) {
return load_grammar(s, n, Rules()); return load_grammar(s, n, Rules(), start);
} }
bool load_grammar(std::string_view sv, const Rules &rules) { bool load_grammar(std::string_view sv, const Rules &rules,
return load_grammar(sv.data(), sv.size(), rules); std::string_view start = {}) {
return load_grammar(sv.data(), sv.size(), rules, start);
} }
bool load_grammar(std::string_view sv) { bool load_grammar(std::string_view sv, std::string_view start = {}) {
return load_grammar(sv.data(), sv.size()); return load_grammar(sv.data(), sv.size(), start);
} }
bool parse_n(const char *s, size_t n, const char *path = nullptr) const { bool parse_n(const char *s, size_t n, const char *path = nullptr) const {
@ -4671,7 +4688,7 @@ public:
void enable_packrat_parsing() { void enable_packrat_parsing() {
if (grammar_ != nullptr) { if (grammar_ != nullptr) {
auto &rule = (*grammar_)[start_]; auto &rule = (*grammar_)[start_];
rule.enablePackratParsing = enablePackratParsing_ && true; rule.enablePackratParsing = enablePackratParsing_;
} }
} }

View File

@ -423,7 +423,9 @@ TEST(GeneralTest, Skip_token_test2) {
} }
TEST(GeneralTest, Custom_AST_test) { TEST(GeneralTest, Custom_AST_test) {
struct CustomType { bool dummy = false; }; struct CustomType {
bool dummy = false;
};
using CustomAst = AstBase<CustomType>; using CustomAst = AstBase<CustomType>;
parser parser(R"( parser parser(R"(
@ -646,11 +648,8 @@ TEST(GeneralTest, Calculator_test2) {
NUMBER <- [0-9]+ NUMBER <- [0-9]+
)"; )";
std::string start; auto cxt = ParserGenerator::parse(syntax, strlen(syntax), {}, nullptr, {});
bool enablePackratParsing = false; auto &g = *cxt.grammar;
auto grammar = ParserGenerator::parse(syntax, strlen(syntax), start,
enablePackratParsing, nullptr);
auto &g = *grammar;
// Setup actions // Setup actions
auto reduce = [](const SemanticValues &vs) -> long { auto reduce = [](const SemanticValues &vs) -> long {
@ -679,7 +678,7 @@ TEST(GeneralTest, Calculator_test2) {
// Parse // Parse
long val; long val;
auto r = g[start].parse_and_get_value("1+2*3*(4-5+6)/7-8", val); auto r = g[cxt.start].parse_and_get_value("1+2*3*(4-5+6)/7-8", val);
EXPECT_TRUE(r.ret); EXPECT_TRUE(r.ret);
EXPECT_EQ(-3, val); EXPECT_EQ(-3, val);
@ -1285,3 +1284,40 @@ TEST(GeneralTest, PassingContextAndOutputParameter) {
parser.parse<int>("42", dt, output); parser.parse<int>("42", dt, output);
EXPECT_EQ(42, output); EXPECT_EQ(42, output);
} }
TEST(GeneralTest, SpecifyStartRule) {
auto grammar = R"(
Start <- A
A <- B (',' B)*
B <- '[one]' / '[two]'
%whitespace <- [ \t\n]*
)";
{
parser peg(grammar, "AAA");
EXPECT_FALSE(peg);
}
{
parser peg(grammar, "A");
EXPECT_TRUE(peg.parse(" [one] , [two] "));
}
{
parser peg(grammar);
EXPECT_TRUE(peg.parse(" [one] , [two] "));
peg.load_grammar(grammar, "A");
EXPECT_TRUE(peg.parse(" [one] , [two] "));
}
{
parser peg;
peg.load_grammar(grammar);
EXPECT_TRUE(peg.parse(" [one] , [two] "));
peg.load_grammar(grammar, "A");
EXPECT_TRUE(peg.parse(" [one] , [two] "));
}
}