mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2025-04-01 00:12:08 +00:00
parent
2b022992a4
commit
79eb37c851
23
README.md
23
README.md
@ -647,6 +647,29 @@ custom_message.txt:1:8: code format error...
|
||||
|
||||
NOTE: If there are more than one elements with error message instruction in a prioritized choice, this feature may not work as you expect.
|
||||
|
||||
Change the Start Definition Rule
|
||||
--------------------------------
|
||||
|
||||
We can change the start definition rule as below.
|
||||
|
||||
```cpp
|
||||
auto grammar = R"(
|
||||
Start <- A
|
||||
A <- B (',' B)*
|
||||
B <- '[one]' / '[two]'
|
||||
%whitespace <- [ \t\n]*
|
||||
)";
|
||||
|
||||
peg::parser parser(grammar, "A"); // Start Rule is "A"
|
||||
|
||||
or
|
||||
|
||||
peg::parser parser;
|
||||
parser.load_grammar(grammar, "A"); // Start Rule is "A"
|
||||
|
||||
parser.parse(" [one] , [two] "); // OK
|
||||
```
|
||||
|
||||
peglint - PEG syntax lint utility
|
||||
---------------------------------
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
<li><span>Source Code</span></li>
|
||||
<li class="editor-options">
|
||||
<ul class="editor-header-options">
|
||||
<li class="option"><label>Start Rule: </label><input id="start-rule" type="text"></li>
|
||||
<li class="option"><input id="packrat" type="checkbox"><label>Packrat</label></li>
|
||||
<li class="option"><input id="auto-refresh" type="checkbox"><label>Auto Refresh</label></li>
|
||||
<li class="option"><button id="parse" class="parse">Parse</button></li>
|
||||
|
@ -27,6 +27,7 @@ const codeAstOptimized = setupInfoArea("code-ast-optimized");
|
||||
const codeProfile = setupInfoArea("code-profile");
|
||||
|
||||
$('#opt-mode').val(localStorage.getItem('optimizationMode') || 'all');
|
||||
$('#start-rule').val(localStorage.getItem('startRule') || '');
|
||||
$('#packrat').prop('checked', localStorage.getItem('packrat') === 'true');
|
||||
$('#auto-refresh').prop('checked', localStorage.getItem('autoRefresh') === 'true');
|
||||
$('#parse').prop('disabled', $('#auto-refresh').prop('checked'));
|
||||
@ -61,6 +62,7 @@ function updateLocalStorage() {
|
||||
localStorage.setItem('grammarText', grammar.getValue());
|
||||
localStorage.setItem('codeText', code.getValue());
|
||||
localStorage.setItem('optimizationMode', $('#opt-mode').val());
|
||||
localStorage.setItem('startRule', $('#start-rule').val());
|
||||
localStorage.setItem('packrat', $('#packrat').prop('checked'));
|
||||
localStorage.setItem('autoRefresh', $('#auto-refresh').prop('checked'));
|
||||
}
|
||||
@ -75,6 +77,7 @@ function parse() {
|
||||
const codeText = code.getValue();
|
||||
|
||||
const optimizationMode = $('#opt-mode').val();
|
||||
const startRule = $('#start-rule').val();
|
||||
const packrat = $('#packrat').prop('checked');
|
||||
|
||||
$grammarInfo.html('');
|
||||
@ -97,7 +100,7 @@ function parse() {
|
||||
'background-color': 'rgba(0, 0, 0, 0.1)'
|
||||
});
|
||||
window.setTimeout(() => {
|
||||
const data = JSON.parse(Module.lint(grammarText, codeText, mode, packrat));
|
||||
const data = JSON.parse(Module.lint(grammarText, codeText, mode, packrat, startRule));
|
||||
$('#overlay').css({
|
||||
'z-index': '-1',
|
||||
'display': 'none',
|
||||
@ -165,6 +168,7 @@ $('#code-info').on('click', 'li', makeOnClickInInfo(code));
|
||||
|
||||
// Event handing in the AST optimization
|
||||
$('#opt-mode').on('change', setupTimer);
|
||||
$('#start-rule').on('keydown', setupTimer);
|
||||
$('#packrat').on('change', setupTimer);
|
||||
$('#auto-refresh').on('change', () => {
|
||||
updateLocalStorage();
|
||||
|
@ -21,7 +21,8 @@ std::string escape_json(const std::string &s) {
|
||||
std::function<void(size_t, size_t, const std::string &, const std::string &)>
|
||||
makeJSONFormatter(peg::parser &peg, std::string &json, bool &init) {
|
||||
init = true;
|
||||
return [&](size_t ln, size_t col, const std::string &msg, const std::string &rule) mutable {
|
||||
return [&](size_t ln, size_t col, const std::string &msg,
|
||||
const std::string &rule) mutable {
|
||||
if (!init) { json += ","; }
|
||||
json += "{";
|
||||
json += R"("ln":)" + std::to_string(ln) + ",";
|
||||
@ -43,11 +44,11 @@ makeJSONFormatter(peg::parser &peg, std::string &json, bool &init) {
|
||||
}
|
||||
|
||||
bool parse_grammar(const std::string &text, peg::parser &peg,
|
||||
std::string &json) {
|
||||
const std::string &startRule, std::string &json) {
|
||||
bool init;
|
||||
peg.set_logger(makeJSONFormatter(peg, json, init));
|
||||
json += "[";
|
||||
auto ret = peg.load_grammar(text.data(), text.size());
|
||||
auto ret = peg.load_grammar(text.data(), text.size(), startRule);
|
||||
json += "]";
|
||||
return ret;
|
||||
}
|
||||
@ -64,7 +65,7 @@ bool parse_code(const std::string &text, peg::parser &peg, std::string &json,
|
||||
}
|
||||
|
||||
std::string lint(const std::string &grammarText, const std::string &codeText,
|
||||
bool opt_mode, bool packrat) {
|
||||
bool opt_mode, bool packrat, const std::string &startRule) {
|
||||
std::string grammarResult;
|
||||
std::string codeResult;
|
||||
std::string astResult;
|
||||
@ -72,7 +73,8 @@ std::string lint(const std::string &grammarText, const std::string &codeText,
|
||||
std::string profileResult;
|
||||
|
||||
peg::parser peg;
|
||||
auto is_grammar_valid = parse_grammar(grammarText, peg, grammarResult);
|
||||
auto is_grammar_valid =
|
||||
parse_grammar(grammarText, peg, startRule, grammarResult);
|
||||
auto is_source_valid = false;
|
||||
|
||||
if (is_grammar_valid && peg) {
|
||||
|
BIN
docs/native.wasm
BIN
docs/native.wasm
Binary file not shown.
117
peglib.h
117
peglib.h
@ -3298,18 +3298,15 @@ using Rules = std::unordered_map<std::string, std::shared_ptr<Ope>>;
|
||||
|
||||
class ParserGenerator {
|
||||
public:
|
||||
static std::shared_ptr<Grammar> parse(const char *s, size_t n,
|
||||
const Rules &rules, std::string &start,
|
||||
bool &enablePackratParsing, Log log) {
|
||||
return get_instance().perform_core(s, n, rules, start, enablePackratParsing,
|
||||
log);
|
||||
}
|
||||
struct ParserContext {
|
||||
std::shared_ptr<Grammar> grammar;
|
||||
std::string start;
|
||||
bool enablePackratParsing = false;
|
||||
};
|
||||
|
||||
static std::shared_ptr<Grammar> parse(const char *s, size_t n,
|
||||
std::string &start,
|
||||
bool &enablePackratParsing, Log log) {
|
||||
Rules dummy;
|
||||
return parse(s, n, dummy, start, enablePackratParsing, log);
|
||||
static ParserContext parse(const char *s, size_t n, const Rules &rules,
|
||||
Log log, std::string_view start) {
|
||||
return get_instance().perform_core(s, n, rules, log, std::string(start));
|
||||
}
|
||||
|
||||
// For debugging purpose
|
||||
@ -3989,9 +3986,8 @@ private:
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<Grammar> perform_core(const char *s, size_t n,
|
||||
const Rules &rules, std::string &start,
|
||||
bool &enablePackratParsing, Log log) {
|
||||
ParserContext perform_core(const char *s, size_t n, const Rules &rules,
|
||||
Log log, std::string requested_start) {
|
||||
Data data;
|
||||
auto &grammar = *data.grammar;
|
||||
|
||||
@ -4023,7 +4019,7 @@ private:
|
||||
log(line.first, line.second, "syntax error", r.error_info.label);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
return {};
|
||||
}
|
||||
|
||||
// User provided rules
|
||||
@ -4081,7 +4077,25 @@ private:
|
||||
}
|
||||
|
||||
// Set root definition
|
||||
auto &start_rule = grammar[data.start];
|
||||
auto start = data.start;
|
||||
|
||||
if (!requested_start.empty()) {
|
||||
if (grammar.count(requested_start)) {
|
||||
start = requested_start;
|
||||
} else {
|
||||
if (log) {
|
||||
auto line = line_info(s, s);
|
||||
log(line.first, line.second,
|
||||
"The specified start rule '" + requested_start + "' is undefined.",
|
||||
"");
|
||||
}
|
||||
ret = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret) { return {}; }
|
||||
|
||||
auto &start_rule = grammar[start];
|
||||
|
||||
// Check if the start rule has ignore operator
|
||||
{
|
||||
@ -4096,7 +4110,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret) { return nullptr; }
|
||||
if (!ret) { return {}; }
|
||||
|
||||
// Check missing definitions
|
||||
auto referenced = std::unordered_set<std::string>{
|
||||
@ -4129,7 +4143,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret) { return nullptr; }
|
||||
if (!ret) { return {}; }
|
||||
|
||||
// Link references
|
||||
for (auto &x : grammar) {
|
||||
@ -4153,10 +4167,10 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret) { return nullptr; }
|
||||
if (!ret) { return {}; }
|
||||
|
||||
// Check infinite loop
|
||||
if (detect_infiniteLoop(data, start_rule, log, s)) { return nullptr; }
|
||||
if (detect_infiniteLoop(data, start_rule, log, s)) { return {}; }
|
||||
|
||||
// Automatic whitespace skipping
|
||||
if (grammar.count(WHITESPACE_DEFINITION_NAME)) {
|
||||
@ -4169,7 +4183,7 @@ private:
|
||||
auto &rule = grammar[WHITESPACE_DEFINITION_NAME];
|
||||
start_rule.whitespaceOpe = wsp(rule.get_core_operator());
|
||||
|
||||
if (detect_infiniteLoop(data, rule, log, s)) { return nullptr; }
|
||||
if (detect_infiniteLoop(data, rule, log, s)) { return {}; }
|
||||
}
|
||||
|
||||
// Word expression
|
||||
@ -4177,7 +4191,7 @@ private:
|
||||
auto &rule = grammar[WORD_DEFINITION_NAME];
|
||||
start_rule.wordOpe = rule.get_core_operator();
|
||||
|
||||
if (detect_infiniteLoop(data, rule, log, s)) { return nullptr; }
|
||||
if (detect_infiniteLoop(data, rule, log, s)) { return {}; }
|
||||
}
|
||||
|
||||
// Apply instructions
|
||||
@ -4189,9 +4203,7 @@ private:
|
||||
const auto &info =
|
||||
std::any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data);
|
||||
|
||||
if (!apply_precedence_instruction(rule, info, s, log)) {
|
||||
return nullptr;
|
||||
}
|
||||
if (!apply_precedence_instruction(rule, info, s, log)) { return {}; }
|
||||
} else if (instruction.type == "error_message") {
|
||||
rule.error_message = std::any_cast<std::string>(instruction.data);
|
||||
} else if (instruction.type == "no_ast_opt") {
|
||||
@ -4200,11 +4212,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
// Set root definition
|
||||
start = data.start;
|
||||
enablePackratParsing = data.enablePackratParsing;
|
||||
|
||||
return data.grammar;
|
||||
return {data.grammar, start, data.enablePackratParsing};
|
||||
}
|
||||
|
||||
bool detect_infiniteLoop(const Data &data, Definition &rule, const Log &log,
|
||||
@ -4530,43 +4538,52 @@ class parser {
|
||||
public:
|
||||
parser() = default;
|
||||
|
||||
parser(const char *s, size_t n, const Rules &rules) {
|
||||
load_grammar(s, n, rules);
|
||||
parser(const char *s, size_t n, const Rules &rules,
|
||||
std::string_view start = {}) {
|
||||
load_grammar(s, n, rules, start);
|
||||
}
|
||||
|
||||
parser(const char *s, size_t n) : parser(s, n, Rules()) {}
|
||||
parser(const char *s, size_t n, std::string_view start = {})
|
||||
: parser(s, n, Rules(), start) {}
|
||||
|
||||
parser(std::string_view sv, const Rules &rules)
|
||||
: parser(sv.data(), sv.size(), rules) {}
|
||||
parser(std::string_view sv, const Rules &rules, std::string_view start = {})
|
||||
: parser(sv.data(), sv.size(), rules, start) {}
|
||||
|
||||
parser(std::string_view sv) : parser(sv.data(), sv.size(), Rules()) {}
|
||||
parser(std::string_view sv, std::string_view start = {})
|
||||
: parser(sv.data(), sv.size(), Rules(), start) {}
|
||||
|
||||
#if defined(__cpp_lib_char8_t)
|
||||
parser(std::u8string_view sv, const Rules &rules)
|
||||
: parser(reinterpret_cast<const char *>(sv.data()), sv.size(), rules) {}
|
||||
parser(std::u8string_view sv, const Rules &rules, std::string_view start = {})
|
||||
: parser(reinterpret_cast<const char *>(sv.data()), sv.size(), rules,
|
||||
start) {}
|
||||
|
||||
parser(std::u8string_view sv)
|
||||
: parser(reinterpret_cast<const char *>(sv.data()), sv.size(), Rules()) {}
|
||||
parser(std::u8string_view sv, std::string_view start = {})
|
||||
: parser(reinterpret_cast<const char *>(sv.data()), sv.size(), Rules(),
|
||||
start) {}
|
||||
#endif
|
||||
|
||||
operator bool() { return grammar_ != nullptr; }
|
||||
|
||||
bool load_grammar(const char *s, size_t n, const Rules &rules) {
|
||||
grammar_ = ParserGenerator::parse(s, n, rules, start_,
|
||||
enablePackratParsing_, log_);
|
||||
bool load_grammar(const char *s, size_t n, const Rules &rules,
|
||||
std::string_view start = {}) {
|
||||
auto cxt = ParserGenerator::parse(s, n, rules, log_, start);
|
||||
grammar_ = cxt.grammar;
|
||||
start_ = cxt.start;
|
||||
enablePackratParsing_ = cxt.enablePackratParsing;
|
||||
return grammar_ != nullptr;
|
||||
}
|
||||
|
||||
bool load_grammar(const char *s, size_t n) {
|
||||
return load_grammar(s, n, Rules());
|
||||
bool load_grammar(const char *s, size_t n, std::string_view start = {}) {
|
||||
return load_grammar(s, n, Rules(), start);
|
||||
}
|
||||
|
||||
bool load_grammar(std::string_view sv, const Rules &rules) {
|
||||
return load_grammar(sv.data(), sv.size(), rules);
|
||||
bool load_grammar(std::string_view sv, const Rules &rules,
|
||||
std::string_view start = {}) {
|
||||
return load_grammar(sv.data(), sv.size(), rules, start);
|
||||
}
|
||||
|
||||
bool load_grammar(std::string_view sv) {
|
||||
return load_grammar(sv.data(), sv.size());
|
||||
bool load_grammar(std::string_view sv, std::string_view start = {}) {
|
||||
return load_grammar(sv.data(), sv.size(), start);
|
||||
}
|
||||
|
||||
bool parse_n(const char *s, size_t n, const char *path = nullptr) const {
|
||||
@ -4671,7 +4688,7 @@ public:
|
||||
void enable_packrat_parsing() {
|
||||
if (grammar_ != nullptr) {
|
||||
auto &rule = (*grammar_)[start_];
|
||||
rule.enablePackratParsing = enablePackratParsing_ && true;
|
||||
rule.enablePackratParsing = enablePackratParsing_;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -423,7 +423,9 @@ TEST(GeneralTest, Skip_token_test2) {
|
||||
}
|
||||
|
||||
TEST(GeneralTest, Custom_AST_test) {
|
||||
struct CustomType { bool dummy = false; };
|
||||
struct CustomType {
|
||||
bool dummy = false;
|
||||
};
|
||||
using CustomAst = AstBase<CustomType>;
|
||||
|
||||
parser parser(R"(
|
||||
@ -646,11 +648,8 @@ TEST(GeneralTest, Calculator_test2) {
|
||||
NUMBER <- [0-9]+
|
||||
)";
|
||||
|
||||
std::string start;
|
||||
bool enablePackratParsing = false;
|
||||
auto grammar = ParserGenerator::parse(syntax, strlen(syntax), start,
|
||||
enablePackratParsing, nullptr);
|
||||
auto &g = *grammar;
|
||||
auto cxt = ParserGenerator::parse(syntax, strlen(syntax), {}, nullptr, {});
|
||||
auto &g = *cxt.grammar;
|
||||
|
||||
// Setup actions
|
||||
auto reduce = [](const SemanticValues &vs) -> long {
|
||||
@ -679,7 +678,7 @@ TEST(GeneralTest, Calculator_test2) {
|
||||
|
||||
// Parse
|
||||
long val;
|
||||
auto r = g[start].parse_and_get_value("1+2*3*(4-5+6)/7-8", val);
|
||||
auto r = g[cxt.start].parse_and_get_value("1+2*3*(4-5+6)/7-8", val);
|
||||
|
||||
EXPECT_TRUE(r.ret);
|
||||
EXPECT_EQ(-3, val);
|
||||
@ -1285,3 +1284,40 @@ TEST(GeneralTest, PassingContextAndOutputParameter) {
|
||||
parser.parse<int>("42", dt, output);
|
||||
EXPECT_EQ(42, output);
|
||||
}
|
||||
|
||||
TEST(GeneralTest, SpecifyStartRule) {
|
||||
auto grammar = R"(
|
||||
Start <- A
|
||||
A <- B (',' B)*
|
||||
B <- '[one]' / '[two]'
|
||||
%whitespace <- [ \t\n]*
|
||||
)";
|
||||
|
||||
{
|
||||
parser peg(grammar, "AAA");
|
||||
EXPECT_FALSE(peg);
|
||||
}
|
||||
|
||||
{
|
||||
parser peg(grammar, "A");
|
||||
EXPECT_TRUE(peg.parse(" [one] , [two] "));
|
||||
}
|
||||
|
||||
{
|
||||
parser peg(grammar);
|
||||
EXPECT_TRUE(peg.parse(" [one] , [two] "));
|
||||
|
||||
peg.load_grammar(grammar, "A");
|
||||
EXPECT_TRUE(peg.parse(" [one] , [two] "));
|
||||
}
|
||||
|
||||
{
|
||||
parser peg;
|
||||
|
||||
peg.load_grammar(grammar);
|
||||
EXPECT_TRUE(peg.parse(" [one] , [two] "));
|
||||
|
||||
peg.load_grammar(grammar, "A");
|
||||
EXPECT_TRUE(peg.parse(" [one] , [two] "));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user