mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2025-01-22 05:15:30 +00:00
Resolve #228
This commit is contained in:
parent
23c080af49
commit
5934f0abba
@ -19,9 +19,11 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
set(add_link_deps Threads::Threads)
|
||||
endif()
|
||||
|
||||
add_subdirectory(example)
|
||||
add_subdirectory(lint)
|
||||
|
||||
add_subdirectory(example)
|
||||
# add_subdirectory(cymbol)
|
||||
|
||||
if (${PEGLIB_BUILD_TESTS})
|
||||
add_subdirectory(test)
|
||||
enable_testing()
|
||||
|
30
README.md
30
README.md
@ -33,6 +33,9 @@ The PEG syntax is well described on page 2 in the [document](http://www.brynosau
|
||||
* `%recovery(` ... `)` (Error recovery operator)
|
||||
* `exp⇑label` or `exp^label` (Syntax sugar for `(exp / %recover(label))`)
|
||||
* `label { message "..." }` (Error message instruction)
|
||||
* `{ no_ast_opt }` (No AST node optimazation instruction)
|
||||
* `{ declare_symbol "..." }` (Declare symbol instruction)
|
||||
* `{ check_symbol "..." }` (Check symbol instruction)
|
||||
|
||||
'End of Input' check will be done as default. In order to disable the check, please call `disable_eoi_check`.
|
||||
|
||||
@ -456,6 +459,33 @@ It internally calls `peg::AstOptimizer` to do the job. You can make your own AST
|
||||
|
||||
See actual usages in the [AST calculator example](https://github.com/yhirose/cpp-peglib/blob/master/example/calc3.cc) and [PL/0 language example](https://github.com/yhirose/cpp-peglib/blob/master/pl0/pl0.cc).
|
||||
|
||||
Symbol Table
|
||||
------------
|
||||
|
||||
Simple symbol table support is available with `declare_symbol` and `check_symbol` instructions.
|
||||
|
||||
```peg
|
||||
S <- (Decl / Ref)*
|
||||
Decl <- 'decl' symbol(Name)
|
||||
Ref <- 'ref' is_symbol(Name)
|
||||
Name <- < [a-zA-Z]+ >
|
||||
%whitespace <- [ \t\r\n]*
|
||||
|
||||
# 'var_table' is a table name.
|
||||
symbol(s) <- s { declare_symbol var_table } # Declare symbol instruction
|
||||
is_symbol(s) <- s { check_symbol var_table } # Check symbol instruction
|
||||
```
|
||||
|
||||
If we parse the following text with the above grammar, it will fail.
|
||||
|
||||
```
|
||||
decl aaa
|
||||
ref aaa
|
||||
ref bbb
|
||||
```
|
||||
|
||||
It is because the line 3 references undeclared 'bbb'.
|
||||
|
||||
Make a parser with parser combinators
|
||||
-------------------------------------
|
||||
|
||||
|
BIN
docs/native.wasm
BIN
docs/native.wasm
Binary file not shown.
@ -20,6 +20,3 @@ target_link_libraries(calc5 ${add_link_deps})
|
||||
|
||||
add_executable(indent indent.cc)
|
||||
target_link_libraries(indent ${add_link_deps})
|
||||
|
||||
add_executable(symbol_check symbol_check.cc)
|
||||
target_link_libraries(symbol_check ${add_link_deps})
|
||||
|
@ -1,69 +0,0 @@
|
||||
//
|
||||
// symbol_check.cc
|
||||
//
|
||||
// Copyright (c) 2022 Yuji Hirose. All rights reserved.
|
||||
// MIT License
|
||||
//
|
||||
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <peglib.h>
|
||||
#include <set>
|
||||
|
||||
using namespace peg;
|
||||
|
||||
int main(void) {
|
||||
parser parser(R"(
|
||||
S <- (Decl / Ref)*
|
||||
Decl <- 'decl' symbol(Name)
|
||||
Ref <- 'ref' symbol_reference(Name)
|
||||
Name <- < [a-zA-Z]+ >
|
||||
%whitespace <- [ \t\r\n]*
|
||||
|
||||
symbol(s) <- < s >
|
||||
symbol_reference(s) <- < s >
|
||||
)");
|
||||
|
||||
std::set<std::string> dic;
|
||||
|
||||
parser[R"(symbol)"].predicate =
|
||||
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
|
||||
auto tok = vs.token_to_string();
|
||||
if (dic.find(tok) != dic.end()) {
|
||||
msg = "'" + tok + "' already exists...";
|
||||
return false;
|
||||
}
|
||||
dic.insert(tok);
|
||||
return true;
|
||||
};
|
||||
|
||||
parser[R"(symbol_reference)"].predicate =
|
||||
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
|
||||
auto tok = vs.token_to_string();
|
||||
if (dic.find(tok) == dic.end()) {
|
||||
msg = "'" + tok + "' doesn't exists...";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
parser.enable_ast();
|
||||
|
||||
parser.log = [](size_t line, size_t col, const std::string &msg) {
|
||||
std::cerr << line << ":" << col << ": " << msg << "\n";
|
||||
};
|
||||
|
||||
const auto source = R"(decl aaa
|
||||
ref aaa
|
||||
ref bbb
|
||||
)";
|
||||
|
||||
std::shared_ptr<Ast> ast;
|
||||
if (parser.parse(source, ast)) {
|
||||
ast = parser.optimize_ast(ast);
|
||||
std::cout << ast_to_s(ast);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
77
peglib.h
77
peglib.h
@ -833,6 +833,8 @@ public:
|
||||
std::any trace_data;
|
||||
const bool verbose_trace;
|
||||
|
||||
std::map<std::string, std::unordered_set<std::string>> symbol_tables;
|
||||
|
||||
Log log;
|
||||
|
||||
Context(const char *path, const char *s, size_t l, size_t def_count,
|
||||
@ -1950,7 +1952,7 @@ struct DetectLeftRecursion : public Ope::Visitor {
|
||||
|
||||
private:
|
||||
std::string name_;
|
||||
std::set<std::string> refs_;
|
||||
std::unordered_set<std::string> refs_;
|
||||
bool done_ = false;
|
||||
};
|
||||
|
||||
@ -2425,6 +2427,10 @@ public:
|
||||
std::string error_message;
|
||||
bool no_ast_opt = false;
|
||||
|
||||
bool declare_symbol = false;
|
||||
bool check_symbol = false;
|
||||
std::string symbol_table_name;
|
||||
|
||||
bool eoi_check = true;
|
||||
|
||||
private:
|
||||
@ -2691,13 +2697,40 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
|
||||
c.rule_stack.pop_back();
|
||||
|
||||
std::string msg;
|
||||
if (outer_->predicate && !outer_->predicate(vs, dt, msg)) {
|
||||
|
||||
if (success(len)) {
|
||||
if (outer_->predicate && !outer_->predicate(vs, dt, msg)) {
|
||||
len = static_cast<size_t>(-1);
|
||||
} else if (outer_->declare_symbol) {
|
||||
assert(outer_->is_token());
|
||||
auto symbol = vs.token_to_string();
|
||||
auto &table = c.symbol_tables[outer_->symbol_table_name];
|
||||
auto ret = table.find(symbol) != table.end();
|
||||
if (ret) {
|
||||
msg = "'" + symbol + "' already exists.";
|
||||
len = static_cast<size_t>(-1);
|
||||
} else {
|
||||
table.insert(symbol);
|
||||
}
|
||||
} else if (outer_->check_symbol) {
|
||||
assert(outer_->is_token());
|
||||
auto symbol = vs.token_to_string();
|
||||
auto &table = c.symbol_tables[outer_->symbol_table_name];
|
||||
auto ret = table.find(symbol) != table.end();
|
||||
if (!ret) {
|
||||
msg = "'" + symbol + "' doesn't exist.";
|
||||
len = static_cast<size_t>(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fail(len)) {
|
||||
if (c.log && !msg.empty() && c.error_info.message_pos < s) {
|
||||
c.error_info.message_pos = s;
|
||||
c.error_info.message = msg;
|
||||
}
|
||||
len = static_cast<size_t>(-1);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
@ -3378,8 +3411,9 @@ private:
|
||||
opt(seq(g["InstructionItem"], zom(seq(g["InstructionItemSeparator"],
|
||||
g["InstructionItem"])))),
|
||||
g["EndBlacket"]);
|
||||
g["InstructionItem"] <=
|
||||
cho(g["PrecedenceClimbing"], g["ErrorMessage"], g["NoAstOpt"]);
|
||||
g["InstructionItem"] <= cho(g["PrecedenceClimbing"], g["ErrorMessage"],
|
||||
g["NoAstOpt"], g["DeclareSymbol"],
|
||||
g["CheckSymbol"]);
|
||||
~g["InstructionItemSeparator"] <= seq(chr(';'), g["Spacing"]);
|
||||
|
||||
~g["SpacesZom"] <= zom(g["Space"]);
|
||||
@ -3412,6 +3446,12 @@ private:
|
||||
// No Ast node optimazation instruction
|
||||
g["NoAstOpt"] <= seq(lit("no_ast_opt"), g["SpacesZom"]);
|
||||
|
||||
// Symbol table instruction
|
||||
g["DeclareSymbol"] <= seq(lit("declare_symbol"), g["SpacesZom"],
|
||||
g["Identifier"], g["SpacesZom"]);
|
||||
g["CheckSymbol"] <= seq(lit("check_symbol"), g["SpacesZom"],
|
||||
g["Identifier"], g["SpacesZom"]);
|
||||
|
||||
// Set definition names
|
||||
for (auto &x : g) {
|
||||
x.second.name = x.first;
|
||||
@ -3441,13 +3481,16 @@ private:
|
||||
|
||||
if (has_instructions) {
|
||||
auto index = is_macro ? 5 : 4;
|
||||
std::set<std::string> types;
|
||||
std::unordered_set<std::string> types;
|
||||
for (const auto &instruction :
|
||||
std::any_cast<std::vector<Instruction>>(vs[index])) {
|
||||
const auto &type = instruction.type;
|
||||
if (types.find(type) == types.end()) {
|
||||
data.instructions[name].push_back(instruction);
|
||||
types.insert(instruction.type);
|
||||
if (type == "declare_symbol" || type == "check_symbol") {
|
||||
if (!TokenChecker::is_token(*ope)) { ope = tok(ope); }
|
||||
}
|
||||
} else {
|
||||
data.duplicates_of_instruction.emplace_back(type,
|
||||
instruction.sv.data());
|
||||
@ -3811,6 +3854,22 @@ private:
|
||||
return instruction;
|
||||
};
|
||||
|
||||
g["DeclareSymbol"] = [](const SemanticValues &vs) {
|
||||
Instruction instruction;
|
||||
instruction.type = "declare_symbol";
|
||||
instruction.data = std::string("default");
|
||||
instruction.sv = vs.sv();
|
||||
return instruction;
|
||||
};
|
||||
|
||||
g["CheckSymbol"] = [](const SemanticValues &vs) {
|
||||
Instruction instruction;
|
||||
instruction.type = "check_symbol";
|
||||
instruction.data = std::string("default");
|
||||
instruction.sv = vs.sv();
|
||||
return instruction;
|
||||
};
|
||||
|
||||
g["Instruction"] = [](const SemanticValues &vs) {
|
||||
return vs.transform<Instruction>();
|
||||
};
|
||||
@ -4061,6 +4120,12 @@ private:
|
||||
rule.error_message = std::any_cast<std::string>(instruction.data);
|
||||
} else if (instruction.type == "no_ast_opt") {
|
||||
rule.no_ast_opt = true;
|
||||
} else if (instruction.type == "declare_symbol") {
|
||||
rule.declare_symbol = true;
|
||||
rule.symbol_table_name = std::any_cast<std::string>(instruction.data);
|
||||
} else if (instruction.type == "check_symbol") {
|
||||
rule.check_symbol = true;
|
||||
rule.symbol_table_name = std::any_cast<std::string>(instruction.data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
109
test/test2.cc
109
test/test2.cc
@ -861,6 +861,115 @@ TEST(PredicateTest, Semantic_predicate_test) {
|
||||
EXPECT_FALSE(parser.parse("200", val));
|
||||
}
|
||||
|
||||
TEST(SymbolTableTest, symbol_instruction_test) {
|
||||
parser parser(R"(
|
||||
S <- (Decl / Ref)*
|
||||
Decl <- 'decl' symbol(Name)
|
||||
Ref <- 'ref' is_symbol(Name)
|
||||
Name <- < [a-zA-Z]+ >
|
||||
%whitespace <- [ \t\r\n]*
|
||||
|
||||
symbol(s) <- s { declare_symbol var_table }
|
||||
is_symbol(s) <- s { check_symbol var_table }
|
||||
)");
|
||||
|
||||
{
|
||||
const auto source = R"(decl aaa
|
||||
ref aaa
|
||||
ref bbb
|
||||
)";
|
||||
parser.log = [](size_t line, size_t col, const std::string &msg) {
|
||||
EXPECT_EQ(3, line);
|
||||
EXPECT_EQ(5, col);
|
||||
EXPECT_EQ("'bbb' doesn't exist.", msg);
|
||||
};
|
||||
EXPECT_FALSE(parser.parse(source));
|
||||
}
|
||||
|
||||
{
|
||||
const auto source = R"(decl aaa
|
||||
ref aaa
|
||||
decl aaa
|
||||
)";
|
||||
parser.log = [](size_t line, size_t col, const std::string &msg) {
|
||||
EXPECT_EQ(3, line);
|
||||
EXPECT_EQ(6, col);
|
||||
EXPECT_EQ("'aaa' already exists.", msg);
|
||||
};
|
||||
EXPECT_FALSE(parser.parse(source));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SymbolTableTest, with_predicate_test) {
|
||||
parser parser(R"(
|
||||
S <- (Decl / Ref)*
|
||||
Decl <- 'decl' symbol(Name)
|
||||
Ref <- 'ref' is_symbol(Name)
|
||||
Name <- < [a-zA-Z]+ >
|
||||
%whitespace <- [ \t\r\n]*
|
||||
|
||||
# These must be tokens.
|
||||
symbol(s) <- < s >
|
||||
is_symbol(s) <- < s >
|
||||
)");
|
||||
|
||||
std::set<std::string> dic;
|
||||
|
||||
parser[R"(symbol)"].predicate =
|
||||
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
|
||||
auto tok = vs.token_to_string();
|
||||
if (dic.find(tok) != dic.end()) {
|
||||
msg = "'" + tok + "' already exists.";
|
||||
return false;
|
||||
}
|
||||
dic.insert(tok);
|
||||
return true;
|
||||
};
|
||||
|
||||
parser[R"(is_symbol)"].predicate =
|
||||
[&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) {
|
||||
auto tok = vs.token_to_string();
|
||||
if (dic.find(tok) == dic.end()) {
|
||||
msg = "'" + tok + "' doesn't exist.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
parser.enable_ast();
|
||||
|
||||
{
|
||||
const auto source = R"(decl aaa
|
||||
ref aaa
|
||||
ref bbb
|
||||
)";
|
||||
parser.log = [](size_t line, size_t col, const std::string &msg) {
|
||||
EXPECT_EQ(3, line);
|
||||
EXPECT_EQ(5, col);
|
||||
EXPECT_EQ("'bbb' doesn't exist.", msg);
|
||||
};
|
||||
std::shared_ptr<Ast> ast;
|
||||
dic.clear();
|
||||
EXPECT_FALSE(parser.parse(source, ast));
|
||||
}
|
||||
|
||||
{
|
||||
const auto source = R"(decl aaa
|
||||
ref aaa
|
||||
decl aaa
|
||||
)";
|
||||
parser.log = [](size_t line, size_t col, const std::string &msg) {
|
||||
std::cerr << line << ":" << col << ": " << msg << "\n";
|
||||
EXPECT_EQ(3, line);
|
||||
EXPECT_EQ(6, col);
|
||||
EXPECT_EQ("'aaa' already exists.", msg);
|
||||
};
|
||||
std::shared_ptr<Ast> ast;
|
||||
dic.clear();
|
||||
EXPECT_FALSE(parser.parse(source, ast));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(UnicodeTest, Japanese_character) {
|
||||
peg::parser parser(u8R"(
|
||||
文 <- 修飾語? 主語 述語 '。'
|
||||
|
Loading…
Reference in New Issue
Block a user