This commit is contained in:
yhirose 2022-07-01 22:15:21 -04:00
parent 23c080af49
commit 5934f0abba
7 changed files with 213 additions and 79 deletions

View File

@ -19,9 +19,11 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
set(add_link_deps Threads::Threads) set(add_link_deps Threads::Threads)
endif() endif()
add_subdirectory(example)
add_subdirectory(lint) add_subdirectory(lint)
add_subdirectory(example)
# add_subdirectory(cymbol)
if (${PEGLIB_BUILD_TESTS}) if (${PEGLIB_BUILD_TESTS})
add_subdirectory(test) add_subdirectory(test)
enable_testing() enable_testing()

View File

@ -33,6 +33,9 @@ The PEG syntax is well described on page 2 in the [document](http://www.brynosau
* `%recovery(` ... `)` (Error recovery operator) * `%recovery(` ... `)` (Error recovery operator)
* `exp⇑label` or `exp^label` (Syntax sugar for `(exp / %recover(label))`) * `exp⇑label` or `exp^label` (Syntax sugar for `(exp / %recover(label))`)
* `label { message "..." }` (Error message instruction) * `label { message "..." }` (Error message instruction)
* `{ no_ast_opt }` (No AST node optimazation instruction)
* `{ declare_symbol "..." }` (Declare symbol instruction)
* `{ check_symbol "..." }` (Check symbol instruction)
'End of Input' check will be done as default. In order to disable the check, please call `disable_eoi_check`. 'End of Input' check will be done as default. In order to disable the check, please call `disable_eoi_check`.
@ -456,6 +459,33 @@ It internally calls `peg::AstOptimizer` to do the job. You can make your own AST
See actual usages in the [AST calculator example](https://github.com/yhirose/cpp-peglib/blob/master/example/calc3.cc) and [PL/0 language example](https://github.com/yhirose/cpp-peglib/blob/master/pl0/pl0.cc). See actual usages in the [AST calculator example](https://github.com/yhirose/cpp-peglib/blob/master/example/calc3.cc) and [PL/0 language example](https://github.com/yhirose/cpp-peglib/blob/master/pl0/pl0.cc).
Symbol Table
------------
Simple symbol table support is available with `declare_symbol` and `check_symbol` instructions.
```peg
S <- (Decl / Ref)*
Decl <- 'decl' symbol(Name)
Ref <- 'ref' is_symbol(Name)
Name <- < [a-zA-Z]+ >
%whitespace <- [ \t\r\n]*
# 'var_table' is a table name.
symbol(s) <- s { declare_symbol var_table } # Declare symbol instruction
is_symbol(s) <- s { check_symbol var_table } # Check symbol instruction
```
If we parse the following text with the above grammar, it will fail.
```
decl aaa
ref aaa
ref bbb
```
It is because the line 3 references undeclared 'bbb'.
Make a parser with parser combinators Make a parser with parser combinators
------------------------------------- -------------------------------------

Binary file not shown.

View File

@ -20,6 +20,3 @@ target_link_libraries(calc5 ${add_link_deps})
add_executable(indent indent.cc) add_executable(indent indent.cc)
target_link_libraries(indent ${add_link_deps}) target_link_libraries(indent ${add_link_deps})
add_executable(symbol_check symbol_check.cc)
target_link_libraries(symbol_check ${add_link_deps})

View File

@ -1,69 +0,0 @@
//
// symbol_check.cc
//
// Copyright (c) 2022 Yuji Hirose. All rights reserved.
// MIT License
//
#include <cstdlib>
#include <iostream>
#include <peglib.h>
#include <set>
using namespace peg;
int main(void) {
parser parser(R"(
S <- (Decl / Ref)*
Decl <- 'decl' symbol(Name)
Ref <- 'ref' symbol_reference(Name)
Name <- < [a-zA-Z]+ >
%whitespace <- [ \t\r\n]*
symbol(s) <- < s >
symbol_reference(s) <- < s >
)");
std::set<std::string> dic;
parser[R"(symbol)"].predicate =
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
auto tok = vs.token_to_string();
if (dic.find(tok) != dic.end()) {
msg = "'" + tok + "' already exists...";
return false;
}
dic.insert(tok);
return true;
};
parser[R"(symbol_reference)"].predicate =
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
auto tok = vs.token_to_string();
if (dic.find(tok) == dic.end()) {
msg = "'" + tok + "' doesn't exists...";
return false;
}
return true;
};
parser.enable_ast();
parser.log = [](size_t line, size_t col, const std::string &msg) {
std::cerr << line << ":" << col << ": " << msg << "\n";
};
const auto source = R"(decl aaa
ref aaa
ref bbb
)";
std::shared_ptr<Ast> ast;
if (parser.parse(source, ast)) {
ast = parser.optimize_ast(ast);
std::cout << ast_to_s(ast);
return 0;
}
return -1;
}

View File

@ -833,6 +833,8 @@ public:
std::any trace_data; std::any trace_data;
const bool verbose_trace; const bool verbose_trace;
std::map<std::string, std::unordered_set<std::string>> symbol_tables;
Log log; Log log;
Context(const char *path, const char *s, size_t l, size_t def_count, Context(const char *path, const char *s, size_t l, size_t def_count,
@ -1950,7 +1952,7 @@ struct DetectLeftRecursion : public Ope::Visitor {
private: private:
std::string name_; std::string name_;
std::set<std::string> refs_; std::unordered_set<std::string> refs_;
bool done_ = false; bool done_ = false;
}; };
@ -2425,6 +2427,10 @@ public:
std::string error_message; std::string error_message;
bool no_ast_opt = false; bool no_ast_opt = false;
bool declare_symbol = false;
bool check_symbol = false;
std::string symbol_table_name;
bool eoi_check = true; bool eoi_check = true;
private: private:
@ -2691,13 +2697,40 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
c.rule_stack.pop_back(); c.rule_stack.pop_back();
std::string msg; std::string msg;
if (outer_->predicate && !outer_->predicate(vs, dt, msg)) {
if (success(len)) {
if (outer_->predicate && !outer_->predicate(vs, dt, msg)) {
len = static_cast<size_t>(-1);
} else if (outer_->declare_symbol) {
assert(outer_->is_token());
auto symbol = vs.token_to_string();
auto &table = c.symbol_tables[outer_->symbol_table_name];
auto ret = table.find(symbol) != table.end();
if (ret) {
msg = "'" + symbol + "' already exists.";
len = static_cast<size_t>(-1);
} else {
table.insert(symbol);
}
} else if (outer_->check_symbol) {
assert(outer_->is_token());
auto symbol = vs.token_to_string();
auto &table = c.symbol_tables[outer_->symbol_table_name];
auto ret = table.find(symbol) != table.end();
if (!ret) {
msg = "'" + symbol + "' doesn't exist.";
len = static_cast<size_t>(-1);
}
}
}
if (fail(len)) {
if (c.log && !msg.empty() && c.error_info.message_pos < s) { if (c.log && !msg.empty() && c.error_info.message_pos < s) {
c.error_info.message_pos = s; c.error_info.message_pos = s;
c.error_info.message = msg; c.error_info.message = msg;
} }
len = static_cast<size_t>(-1);
} }
return len; return len;
} }
@ -3378,8 +3411,9 @@ private:
opt(seq(g["InstructionItem"], zom(seq(g["InstructionItemSeparator"], opt(seq(g["InstructionItem"], zom(seq(g["InstructionItemSeparator"],
g["InstructionItem"])))), g["InstructionItem"])))),
g["EndBlacket"]); g["EndBlacket"]);
g["InstructionItem"] <= g["InstructionItem"] <= cho(g["PrecedenceClimbing"], g["ErrorMessage"],
cho(g["PrecedenceClimbing"], g["ErrorMessage"], g["NoAstOpt"]); g["NoAstOpt"], g["DeclareSymbol"],
g["CheckSymbol"]);
~g["InstructionItemSeparator"] <= seq(chr(';'), g["Spacing"]); ~g["InstructionItemSeparator"] <= seq(chr(';'), g["Spacing"]);
~g["SpacesZom"] <= zom(g["Space"]); ~g["SpacesZom"] <= zom(g["Space"]);
@ -3412,6 +3446,12 @@ private:
// No Ast node optimazation instruction // No Ast node optimazation instruction
g["NoAstOpt"] <= seq(lit("no_ast_opt"), g["SpacesZom"]); g["NoAstOpt"] <= seq(lit("no_ast_opt"), g["SpacesZom"]);
// Symbol table instruction
g["DeclareSymbol"] <= seq(lit("declare_symbol"), g["SpacesZom"],
g["Identifier"], g["SpacesZom"]);
g["CheckSymbol"] <= seq(lit("check_symbol"), g["SpacesZom"],
g["Identifier"], g["SpacesZom"]);
// Set definition names // Set definition names
for (auto &x : g) { for (auto &x : g) {
x.second.name = x.first; x.second.name = x.first;
@ -3441,13 +3481,16 @@ private:
if (has_instructions) { if (has_instructions) {
auto index = is_macro ? 5 : 4; auto index = is_macro ? 5 : 4;
std::set<std::string> types; std::unordered_set<std::string> types;
for (const auto &instruction : for (const auto &instruction :
std::any_cast<std::vector<Instruction>>(vs[index])) { std::any_cast<std::vector<Instruction>>(vs[index])) {
const auto &type = instruction.type; const auto &type = instruction.type;
if (types.find(type) == types.end()) { if (types.find(type) == types.end()) {
data.instructions[name].push_back(instruction); data.instructions[name].push_back(instruction);
types.insert(instruction.type); types.insert(instruction.type);
if (type == "declare_symbol" || type == "check_symbol") {
if (!TokenChecker::is_token(*ope)) { ope = tok(ope); }
}
} else { } else {
data.duplicates_of_instruction.emplace_back(type, data.duplicates_of_instruction.emplace_back(type,
instruction.sv.data()); instruction.sv.data());
@ -3811,6 +3854,22 @@ private:
return instruction; return instruction;
}; };
g["DeclareSymbol"] = [](const SemanticValues &vs) {
Instruction instruction;
instruction.type = "declare_symbol";
instruction.data = std::string("default");
instruction.sv = vs.sv();
return instruction;
};
g["CheckSymbol"] = [](const SemanticValues &vs) {
Instruction instruction;
instruction.type = "check_symbol";
instruction.data = std::string("default");
instruction.sv = vs.sv();
return instruction;
};
g["Instruction"] = [](const SemanticValues &vs) { g["Instruction"] = [](const SemanticValues &vs) {
return vs.transform<Instruction>(); return vs.transform<Instruction>();
}; };
@ -4061,6 +4120,12 @@ private:
rule.error_message = std::any_cast<std::string>(instruction.data); rule.error_message = std::any_cast<std::string>(instruction.data);
} else if (instruction.type == "no_ast_opt") { } else if (instruction.type == "no_ast_opt") {
rule.no_ast_opt = true; rule.no_ast_opt = true;
} else if (instruction.type == "declare_symbol") {
rule.declare_symbol = true;
rule.symbol_table_name = std::any_cast<std::string>(instruction.data);
} else if (instruction.type == "check_symbol") {
rule.check_symbol = true;
rule.symbol_table_name = std::any_cast<std::string>(instruction.data);
} }
} }
} }

View File

@ -861,6 +861,115 @@ TEST(PredicateTest, Semantic_predicate_test) {
EXPECT_FALSE(parser.parse("200", val)); EXPECT_FALSE(parser.parse("200", val));
} }
TEST(SymbolTableTest, symbol_instruction_test) {
parser parser(R"(
S <- (Decl / Ref)*
Decl <- 'decl' symbol(Name)
Ref <- 'ref' is_symbol(Name)
Name <- < [a-zA-Z]+ >
%whitespace <- [ \t\r\n]*
symbol(s) <- s { declare_symbol var_table }
is_symbol(s) <- s { check_symbol var_table }
)");
{
const auto source = R"(decl aaa
ref aaa
ref bbb
)";
parser.log = [](size_t line, size_t col, const std::string &msg) {
EXPECT_EQ(3, line);
EXPECT_EQ(5, col);
EXPECT_EQ("'bbb' doesn't exist.", msg);
};
EXPECT_FALSE(parser.parse(source));
}
{
const auto source = R"(decl aaa
ref aaa
decl aaa
)";
parser.log = [](size_t line, size_t col, const std::string &msg) {
EXPECT_EQ(3, line);
EXPECT_EQ(6, col);
EXPECT_EQ("'aaa' already exists.", msg);
};
EXPECT_FALSE(parser.parse(source));
}
}
TEST(SymbolTableTest, with_predicate_test) {
parser parser(R"(
S <- (Decl / Ref)*
Decl <- 'decl' symbol(Name)
Ref <- 'ref' is_symbol(Name)
Name <- < [a-zA-Z]+ >
%whitespace <- [ \t\r\n]*
# These must be tokens.
symbol(s) <- < s >
is_symbol(s) <- < s >
)");
std::set<std::string> dic;
parser[R"(symbol)"].predicate =
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
auto tok = vs.token_to_string();
if (dic.find(tok) != dic.end()) {
msg = "'" + tok + "' already exists.";
return false;
}
dic.insert(tok);
return true;
};
parser[R"(is_symbol)"].predicate =
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
auto tok = vs.token_to_string();
if (dic.find(tok) == dic.end()) {
msg = "'" + tok + "' doesn't exist.";
return false;
}
return true;
};
parser.enable_ast();
{
const auto source = R"(decl aaa
ref aaa
ref bbb
)";
parser.log = [](size_t line, size_t col, const std::string &msg) {
EXPECT_EQ(3, line);
EXPECT_EQ(5, col);
EXPECT_EQ("'bbb' doesn't exist.", msg);
};
std::shared_ptr<Ast> ast;
dic.clear();
EXPECT_FALSE(parser.parse(source, ast));
}
{
const auto source = R"(decl aaa
ref aaa
decl aaa
)";
parser.log = [](size_t line, size_t col, const std::string &msg) {
std::cerr << line << ":" << col << ": " << msg << "\n";
EXPECT_EQ(3, line);
EXPECT_EQ(6, col);
EXPECT_EQ("'aaa' already exists.", msg);
};
std::shared_ptr<Ast> ast;
dic.clear();
EXPECT_FALSE(parser.parse(source, ast));
}
}
TEST(UnicodeTest, Japanese_character) { TEST(UnicodeTest, Japanese_character) {
peg::parser parser(u8R"( peg::parser parser(u8R"(
<- ? '' <- ? ''