mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2024-12-22 11:55:30 +00:00
Resolve #228
This commit is contained in:
parent
23c080af49
commit
5934f0abba
@ -19,9 +19,11 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
|||||||
set(add_link_deps Threads::Threads)
|
set(add_link_deps Threads::Threads)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_subdirectory(example)
|
|
||||||
add_subdirectory(lint)
|
add_subdirectory(lint)
|
||||||
|
|
||||||
|
add_subdirectory(example)
|
||||||
|
# add_subdirectory(cymbol)
|
||||||
|
|
||||||
if (${PEGLIB_BUILD_TESTS})
|
if (${PEGLIB_BUILD_TESTS})
|
||||||
add_subdirectory(test)
|
add_subdirectory(test)
|
||||||
enable_testing()
|
enable_testing()
|
||||||
|
30
README.md
30
README.md
@ -33,6 +33,9 @@ The PEG syntax is well described on page 2 in the [document](http://www.brynosau
|
|||||||
* `%recovery(` ... `)` (Error recovery operator)
|
* `%recovery(` ... `)` (Error recovery operator)
|
||||||
* `exp⇑label` or `exp^label` (Syntax sugar for `(exp / %recover(label))`)
|
* `exp⇑label` or `exp^label` (Syntax sugar for `(exp / %recover(label))`)
|
||||||
* `label { message "..." }` (Error message instruction)
|
* `label { message "..." }` (Error message instruction)
|
||||||
|
* `{ no_ast_opt }` (No AST node optimazation instruction)
|
||||||
|
* `{ declare_symbol "..." }` (Declare symbol instruction)
|
||||||
|
* `{ check_symbol "..." }` (Check symbol instruction)
|
||||||
|
|
||||||
'End of Input' check will be done as default. In order to disable the check, please call `disable_eoi_check`.
|
'End of Input' check will be done as default. In order to disable the check, please call `disable_eoi_check`.
|
||||||
|
|
||||||
@ -456,6 +459,33 @@ It internally calls `peg::AstOptimizer` to do the job. You can make your own AST
|
|||||||
|
|
||||||
See actual usages in the [AST calculator example](https://github.com/yhirose/cpp-peglib/blob/master/example/calc3.cc) and [PL/0 language example](https://github.com/yhirose/cpp-peglib/blob/master/pl0/pl0.cc).
|
See actual usages in the [AST calculator example](https://github.com/yhirose/cpp-peglib/blob/master/example/calc3.cc) and [PL/0 language example](https://github.com/yhirose/cpp-peglib/blob/master/pl0/pl0.cc).
|
||||||
|
|
||||||
|
Symbol Table
|
||||||
|
------------
|
||||||
|
|
||||||
|
Simple symbol table support is available with `declare_symbol` and `check_symbol` instructions.
|
||||||
|
|
||||||
|
```peg
|
||||||
|
S <- (Decl / Ref)*
|
||||||
|
Decl <- 'decl' symbol(Name)
|
||||||
|
Ref <- 'ref' is_symbol(Name)
|
||||||
|
Name <- < [a-zA-Z]+ >
|
||||||
|
%whitespace <- [ \t\r\n]*
|
||||||
|
|
||||||
|
# 'var_table' is a table name.
|
||||||
|
symbol(s) <- s { declare_symbol var_table } # Declare symbol instruction
|
||||||
|
is_symbol(s) <- s { check_symbol var_table } # Check symbol instruction
|
||||||
|
```
|
||||||
|
|
||||||
|
If we parse the following text with the above grammar, it will fail.
|
||||||
|
|
||||||
|
```
|
||||||
|
decl aaa
|
||||||
|
ref aaa
|
||||||
|
ref bbb
|
||||||
|
```
|
||||||
|
|
||||||
|
It is because the line 3 references undeclared 'bbb'.
|
||||||
|
|
||||||
Make a parser with parser combinators
|
Make a parser with parser combinators
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
|
|
||||||
|
BIN
docs/native.wasm
BIN
docs/native.wasm
Binary file not shown.
@ -20,6 +20,3 @@ target_link_libraries(calc5 ${add_link_deps})
|
|||||||
|
|
||||||
add_executable(indent indent.cc)
|
add_executable(indent indent.cc)
|
||||||
target_link_libraries(indent ${add_link_deps})
|
target_link_libraries(indent ${add_link_deps})
|
||||||
|
|
||||||
add_executable(symbol_check symbol_check.cc)
|
|
||||||
target_link_libraries(symbol_check ${add_link_deps})
|
|
||||||
|
@ -1,69 +0,0 @@
|
|||||||
//
|
|
||||||
// symbol_check.cc
|
|
||||||
//
|
|
||||||
// Copyright (c) 2022 Yuji Hirose. All rights reserved.
|
|
||||||
// MIT License
|
|
||||||
//
|
|
||||||
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <iostream>
|
|
||||||
#include <peglib.h>
|
|
||||||
#include <set>
|
|
||||||
|
|
||||||
using namespace peg;
|
|
||||||
|
|
||||||
int main(void) {
|
|
||||||
parser parser(R"(
|
|
||||||
S <- (Decl / Ref)*
|
|
||||||
Decl <- 'decl' symbol(Name)
|
|
||||||
Ref <- 'ref' symbol_reference(Name)
|
|
||||||
Name <- < [a-zA-Z]+ >
|
|
||||||
%whitespace <- [ \t\r\n]*
|
|
||||||
|
|
||||||
symbol(s) <- < s >
|
|
||||||
symbol_reference(s) <- < s >
|
|
||||||
)");
|
|
||||||
|
|
||||||
std::set<std::string> dic;
|
|
||||||
|
|
||||||
parser[R"(symbol)"].predicate =
|
|
||||||
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
|
|
||||||
auto tok = vs.token_to_string();
|
|
||||||
if (dic.find(tok) != dic.end()) {
|
|
||||||
msg = "'" + tok + "' already exists...";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
dic.insert(tok);
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
parser[R"(symbol_reference)"].predicate =
|
|
||||||
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
|
|
||||||
auto tok = vs.token_to_string();
|
|
||||||
if (dic.find(tok) == dic.end()) {
|
|
||||||
msg = "'" + tok + "' doesn't exists...";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
parser.enable_ast();
|
|
||||||
|
|
||||||
parser.log = [](size_t line, size_t col, const std::string &msg) {
|
|
||||||
std::cerr << line << ":" << col << ": " << msg << "\n";
|
|
||||||
};
|
|
||||||
|
|
||||||
const auto source = R"(decl aaa
|
|
||||||
ref aaa
|
|
||||||
ref bbb
|
|
||||||
)";
|
|
||||||
|
|
||||||
std::shared_ptr<Ast> ast;
|
|
||||||
if (parser.parse(source, ast)) {
|
|
||||||
ast = parser.optimize_ast(ast);
|
|
||||||
std::cout << ast_to_s(ast);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1;
|
|
||||||
}
|
|
77
peglib.h
77
peglib.h
@ -833,6 +833,8 @@ public:
|
|||||||
std::any trace_data;
|
std::any trace_data;
|
||||||
const bool verbose_trace;
|
const bool verbose_trace;
|
||||||
|
|
||||||
|
std::map<std::string, std::unordered_set<std::string>> symbol_tables;
|
||||||
|
|
||||||
Log log;
|
Log log;
|
||||||
|
|
||||||
Context(const char *path, const char *s, size_t l, size_t def_count,
|
Context(const char *path, const char *s, size_t l, size_t def_count,
|
||||||
@ -1950,7 +1952,7 @@ struct DetectLeftRecursion : public Ope::Visitor {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
std::string name_;
|
std::string name_;
|
||||||
std::set<std::string> refs_;
|
std::unordered_set<std::string> refs_;
|
||||||
bool done_ = false;
|
bool done_ = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -2425,6 +2427,10 @@ public:
|
|||||||
std::string error_message;
|
std::string error_message;
|
||||||
bool no_ast_opt = false;
|
bool no_ast_opt = false;
|
||||||
|
|
||||||
|
bool declare_symbol = false;
|
||||||
|
bool check_symbol = false;
|
||||||
|
std::string symbol_table_name;
|
||||||
|
|
||||||
bool eoi_check = true;
|
bool eoi_check = true;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -2691,13 +2697,40 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
|
|||||||
c.rule_stack.pop_back();
|
c.rule_stack.pop_back();
|
||||||
|
|
||||||
std::string msg;
|
std::string msg;
|
||||||
if (outer_->predicate && !outer_->predicate(vs, dt, msg)) {
|
|
||||||
|
if (success(len)) {
|
||||||
|
if (outer_->predicate && !outer_->predicate(vs, dt, msg)) {
|
||||||
|
len = static_cast<size_t>(-1);
|
||||||
|
} else if (outer_->declare_symbol) {
|
||||||
|
assert(outer_->is_token());
|
||||||
|
auto symbol = vs.token_to_string();
|
||||||
|
auto &table = c.symbol_tables[outer_->symbol_table_name];
|
||||||
|
auto ret = table.find(symbol) != table.end();
|
||||||
|
if (ret) {
|
||||||
|
msg = "'" + symbol + "' already exists.";
|
||||||
|
len = static_cast<size_t>(-1);
|
||||||
|
} else {
|
||||||
|
table.insert(symbol);
|
||||||
|
}
|
||||||
|
} else if (outer_->check_symbol) {
|
||||||
|
assert(outer_->is_token());
|
||||||
|
auto symbol = vs.token_to_string();
|
||||||
|
auto &table = c.symbol_tables[outer_->symbol_table_name];
|
||||||
|
auto ret = table.find(symbol) != table.end();
|
||||||
|
if (!ret) {
|
||||||
|
msg = "'" + symbol + "' doesn't exist.";
|
||||||
|
len = static_cast<size_t>(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fail(len)) {
|
||||||
if (c.log && !msg.empty() && c.error_info.message_pos < s) {
|
if (c.log && !msg.empty() && c.error_info.message_pos < s) {
|
||||||
c.error_info.message_pos = s;
|
c.error_info.message_pos = s;
|
||||||
c.error_info.message = msg;
|
c.error_info.message = msg;
|
||||||
}
|
}
|
||||||
len = static_cast<size_t>(-1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3378,8 +3411,9 @@ private:
|
|||||||
opt(seq(g["InstructionItem"], zom(seq(g["InstructionItemSeparator"],
|
opt(seq(g["InstructionItem"], zom(seq(g["InstructionItemSeparator"],
|
||||||
g["InstructionItem"])))),
|
g["InstructionItem"])))),
|
||||||
g["EndBlacket"]);
|
g["EndBlacket"]);
|
||||||
g["InstructionItem"] <=
|
g["InstructionItem"] <= cho(g["PrecedenceClimbing"], g["ErrorMessage"],
|
||||||
cho(g["PrecedenceClimbing"], g["ErrorMessage"], g["NoAstOpt"]);
|
g["NoAstOpt"], g["DeclareSymbol"],
|
||||||
|
g["CheckSymbol"]);
|
||||||
~g["InstructionItemSeparator"] <= seq(chr(';'), g["Spacing"]);
|
~g["InstructionItemSeparator"] <= seq(chr(';'), g["Spacing"]);
|
||||||
|
|
||||||
~g["SpacesZom"] <= zom(g["Space"]);
|
~g["SpacesZom"] <= zom(g["Space"]);
|
||||||
@ -3412,6 +3446,12 @@ private:
|
|||||||
// No Ast node optimazation instruction
|
// No Ast node optimazation instruction
|
||||||
g["NoAstOpt"] <= seq(lit("no_ast_opt"), g["SpacesZom"]);
|
g["NoAstOpt"] <= seq(lit("no_ast_opt"), g["SpacesZom"]);
|
||||||
|
|
||||||
|
// Symbol table instruction
|
||||||
|
g["DeclareSymbol"] <= seq(lit("declare_symbol"), g["SpacesZom"],
|
||||||
|
g["Identifier"], g["SpacesZom"]);
|
||||||
|
g["CheckSymbol"] <= seq(lit("check_symbol"), g["SpacesZom"],
|
||||||
|
g["Identifier"], g["SpacesZom"]);
|
||||||
|
|
||||||
// Set definition names
|
// Set definition names
|
||||||
for (auto &x : g) {
|
for (auto &x : g) {
|
||||||
x.second.name = x.first;
|
x.second.name = x.first;
|
||||||
@ -3441,13 +3481,16 @@ private:
|
|||||||
|
|
||||||
if (has_instructions) {
|
if (has_instructions) {
|
||||||
auto index = is_macro ? 5 : 4;
|
auto index = is_macro ? 5 : 4;
|
||||||
std::set<std::string> types;
|
std::unordered_set<std::string> types;
|
||||||
for (const auto &instruction :
|
for (const auto &instruction :
|
||||||
std::any_cast<std::vector<Instruction>>(vs[index])) {
|
std::any_cast<std::vector<Instruction>>(vs[index])) {
|
||||||
const auto &type = instruction.type;
|
const auto &type = instruction.type;
|
||||||
if (types.find(type) == types.end()) {
|
if (types.find(type) == types.end()) {
|
||||||
data.instructions[name].push_back(instruction);
|
data.instructions[name].push_back(instruction);
|
||||||
types.insert(instruction.type);
|
types.insert(instruction.type);
|
||||||
|
if (type == "declare_symbol" || type == "check_symbol") {
|
||||||
|
if (!TokenChecker::is_token(*ope)) { ope = tok(ope); }
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
data.duplicates_of_instruction.emplace_back(type,
|
data.duplicates_of_instruction.emplace_back(type,
|
||||||
instruction.sv.data());
|
instruction.sv.data());
|
||||||
@ -3811,6 +3854,22 @@ private:
|
|||||||
return instruction;
|
return instruction;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
g["DeclareSymbol"] = [](const SemanticValues &vs) {
|
||||||
|
Instruction instruction;
|
||||||
|
instruction.type = "declare_symbol";
|
||||||
|
instruction.data = std::string("default");
|
||||||
|
instruction.sv = vs.sv();
|
||||||
|
return instruction;
|
||||||
|
};
|
||||||
|
|
||||||
|
g["CheckSymbol"] = [](const SemanticValues &vs) {
|
||||||
|
Instruction instruction;
|
||||||
|
instruction.type = "check_symbol";
|
||||||
|
instruction.data = std::string("default");
|
||||||
|
instruction.sv = vs.sv();
|
||||||
|
return instruction;
|
||||||
|
};
|
||||||
|
|
||||||
g["Instruction"] = [](const SemanticValues &vs) {
|
g["Instruction"] = [](const SemanticValues &vs) {
|
||||||
return vs.transform<Instruction>();
|
return vs.transform<Instruction>();
|
||||||
};
|
};
|
||||||
@ -4061,6 +4120,12 @@ private:
|
|||||||
rule.error_message = std::any_cast<std::string>(instruction.data);
|
rule.error_message = std::any_cast<std::string>(instruction.data);
|
||||||
} else if (instruction.type == "no_ast_opt") {
|
} else if (instruction.type == "no_ast_opt") {
|
||||||
rule.no_ast_opt = true;
|
rule.no_ast_opt = true;
|
||||||
|
} else if (instruction.type == "declare_symbol") {
|
||||||
|
rule.declare_symbol = true;
|
||||||
|
rule.symbol_table_name = std::any_cast<std::string>(instruction.data);
|
||||||
|
} else if (instruction.type == "check_symbol") {
|
||||||
|
rule.check_symbol = true;
|
||||||
|
rule.symbol_table_name = std::any_cast<std::string>(instruction.data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
109
test/test2.cc
109
test/test2.cc
@ -861,6 +861,115 @@ TEST(PredicateTest, Semantic_predicate_test) {
|
|||||||
EXPECT_FALSE(parser.parse("200", val));
|
EXPECT_FALSE(parser.parse("200", val));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(SymbolTableTest, symbol_instruction_test) {
|
||||||
|
parser parser(R"(
|
||||||
|
S <- (Decl / Ref)*
|
||||||
|
Decl <- 'decl' symbol(Name)
|
||||||
|
Ref <- 'ref' is_symbol(Name)
|
||||||
|
Name <- < [a-zA-Z]+ >
|
||||||
|
%whitespace <- [ \t\r\n]*
|
||||||
|
|
||||||
|
symbol(s) <- s { declare_symbol var_table }
|
||||||
|
is_symbol(s) <- s { check_symbol var_table }
|
||||||
|
)");
|
||||||
|
|
||||||
|
{
|
||||||
|
const auto source = R"(decl aaa
|
||||||
|
ref aaa
|
||||||
|
ref bbb
|
||||||
|
)";
|
||||||
|
parser.log = [](size_t line, size_t col, const std::string &msg) {
|
||||||
|
EXPECT_EQ(3, line);
|
||||||
|
EXPECT_EQ(5, col);
|
||||||
|
EXPECT_EQ("'bbb' doesn't exist.", msg);
|
||||||
|
};
|
||||||
|
EXPECT_FALSE(parser.parse(source));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const auto source = R"(decl aaa
|
||||||
|
ref aaa
|
||||||
|
decl aaa
|
||||||
|
)";
|
||||||
|
parser.log = [](size_t line, size_t col, const std::string &msg) {
|
||||||
|
EXPECT_EQ(3, line);
|
||||||
|
EXPECT_EQ(6, col);
|
||||||
|
EXPECT_EQ("'aaa' already exists.", msg);
|
||||||
|
};
|
||||||
|
EXPECT_FALSE(parser.parse(source));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(SymbolTableTest, with_predicate_test) {
|
||||||
|
parser parser(R"(
|
||||||
|
S <- (Decl / Ref)*
|
||||||
|
Decl <- 'decl' symbol(Name)
|
||||||
|
Ref <- 'ref' is_symbol(Name)
|
||||||
|
Name <- < [a-zA-Z]+ >
|
||||||
|
%whitespace <- [ \t\r\n]*
|
||||||
|
|
||||||
|
# These must be tokens.
|
||||||
|
symbol(s) <- < s >
|
||||||
|
is_symbol(s) <- < s >
|
||||||
|
)");
|
||||||
|
|
||||||
|
std::set<std::string> dic;
|
||||||
|
|
||||||
|
parser[R"(symbol)"].predicate =
|
||||||
|
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
|
||||||
|
auto tok = vs.token_to_string();
|
||||||
|
if (dic.find(tok) != dic.end()) {
|
||||||
|
msg = "'" + tok + "' already exists.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
dic.insert(tok);
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
parser[R"(is_symbol)"].predicate =
|
||||||
|
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
|
||||||
|
auto tok = vs.token_to_string();
|
||||||
|
if (dic.find(tok) == dic.end()) {
|
||||||
|
msg = "'" + tok + "' doesn't exist.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
parser.enable_ast();
|
||||||
|
|
||||||
|
{
|
||||||
|
const auto source = R"(decl aaa
|
||||||
|
ref aaa
|
||||||
|
ref bbb
|
||||||
|
)";
|
||||||
|
parser.log = [](size_t line, size_t col, const std::string &msg) {
|
||||||
|
EXPECT_EQ(3, line);
|
||||||
|
EXPECT_EQ(5, col);
|
||||||
|
EXPECT_EQ("'bbb' doesn't exist.", msg);
|
||||||
|
};
|
||||||
|
std::shared_ptr<Ast> ast;
|
||||||
|
dic.clear();
|
||||||
|
EXPECT_FALSE(parser.parse(source, ast));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const auto source = R"(decl aaa
|
||||||
|
ref aaa
|
||||||
|
decl aaa
|
||||||
|
)";
|
||||||
|
parser.log = [](size_t line, size_t col, const std::string &msg) {
|
||||||
|
std::cerr << line << ":" << col << ": " << msg << "\n";
|
||||||
|
EXPECT_EQ(3, line);
|
||||||
|
EXPECT_EQ(6, col);
|
||||||
|
EXPECT_EQ("'aaa' already exists.", msg);
|
||||||
|
};
|
||||||
|
std::shared_ptr<Ast> ast;
|
||||||
|
dic.clear();
|
||||||
|
EXPECT_FALSE(parser.parse(source, ast));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(UnicodeTest, Japanese_character) {
|
TEST(UnicodeTest, Japanese_character) {
|
||||||
peg::parser parser(u8R"(
|
peg::parser parser(u8R"(
|
||||||
文 <- 修飾語? 主語 述語 '。'
|
文 <- 修飾語? 主語 述語 '。'
|
||||||
|
Loading…
Reference in New Issue
Block a user