Fixed word expression problem with Dictionary

This commit is contained in:
yhirose 2022-09-06 17:18:23 -04:00
parent afd3acbeaf
commit faa60bd85c
2 changed files with 184 additions and 19 deletions

View File

@ -2484,17 +2484,15 @@ inline size_t parse_literal(const char *s, size_t n, SemanticValues &vs,
}
// Skip whiltespace
if (!c.in_token_boundary_count) {
if (c.whitespaceOpe) {
auto save_ignore_trace_state = c.ignore_trace_state;
c.ignore_trace_state = !c.verbose_trace;
auto se =
scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
if (!c.in_token_boundary_count && c.whitespaceOpe) {
auto save_ignore_trace_state = c.ignore_trace_state;
c.ignore_trace_state = !c.verbose_trace;
auto se =
scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
auto len = c.whitespaceOpe->parse(s + i, n - i, vs, c, dt);
if (fail(len)) { return len; }
i += len;
}
auto len = c.whitespaceOpe->parse(s + i, n - i, vs, c, dt);
if (fail(len)) { return len; }
i += len;
}
return i;
@ -2558,9 +2556,7 @@ inline void ErrorInfo::output_log(const Log &log, const char *s, size_t n) {
msg += "'";
} else {
msg += "<" + error_rule->name + ">";
if (label.empty()) {
label = error_rule->name;
}
if (label.empty()) { label = error_rule->name; }
}
first_item = false;
}
@ -2641,12 +2637,50 @@ inline size_t Ope::parse(const char *s, size_t n, SemanticValues &vs,
}
inline size_t Dictionary::parse_core(const char *s, size_t n,
SemanticValues & /*vs*/, Context &c,
std::any & /*dt*/) const {
auto len = trie_.match(s, n);
if (len > 0) { return len; }
c.set_error_pos(s);
return static_cast<size_t>(-1);
SemanticValues &vs, Context &c,
std::any &dt) const {
auto i = trie_.match(s, n);
if (i == 0) {
c.set_error_pos(s);
return static_cast<size_t>(-1);
}
// Word check
if (c.wordOpe) {
auto save_ignore_trace_state = c.ignore_trace_state;
c.ignore_trace_state = !c.verbose_trace;
auto se =
scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
{
SemanticValues dummy_vs;
Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr,
nullptr, nullptr, false, nullptr);
std::any dummy_dt;
NotPredicate ope(c.wordOpe);
auto len = ope.parse(s + i, n - i, dummy_vs, dummy_c, dummy_dt);
if (fail(len)) {
c.set_error_pos(s);
return len;
}
i += len;
}
}
// Skip whiltespace
if (!c.in_token_boundary_count && c.whitespaceOpe) {
auto save_ignore_trace_state = c.ignore_trace_state;
c.ignore_trace_state = !c.verbose_trace;
auto se =
scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
auto len = c.whitespaceOpe->parse(s + i, n - i, vs, c, dt);
if (fail(len)) { return len; }
i += len;
}
return i;
}
inline size_t LiteralString::parse_core(const char *s, size_t n,

View File

@ -352,6 +352,28 @@ TEST(GeneralTest, Word_expression_test) {
EXPECT_TRUE(parser.parse("hello , world"));
}
TEST(GeneralTest, Word_expression_test_PrioritizedChoice) {
parser parser(R"(
Identifier < !Keyword [a-z][a-z]* >
Keyword 'def' / 'to'
%whitespace [ \t\r\n]*
%word [a-z]+
)");
EXPECT_TRUE(parser.parse("toa"));
}
TEST(GeneralTest, Word_expression_test_Dictionary) {
parser parser(R"(
Identifier < !Keyword [a-z][a-z]* >
Keyword 'def' | 'to'
%whitespace [ \t\r\n]*
%word [a-z]+
)");
EXPECT_TRUE(parser.parse("toa"));
}
TEST(GeneralTest, Skip_token_test) {
parser parser(" ROOT <- _ ITEM (',' _ ITEM _)* "
" ITEM <- ([a-z0-9])+ "
@ -875,6 +897,23 @@ TEST(GeneralTest, Literal_token_on_AST_test3) {
EXPECT_TRUE(ast->nodes.empty());
}
TEST(GeneralTest, Literal_token_on_AST_test4) {
parser parser(R"(
STRING_LITERAL <- < '"' < (ESC / CHAR)* > '"' >
ESC <- ('\\"' / '\\t' / '\\n')
CHAR <- (!["] .)
)");
parser.enable_ast();
std::shared_ptr<Ast> ast;
auto ret = parser.parse(R"("a\tb")", ast);
EXPECT_TRUE(ret);
EXPECT_TRUE(ast->is_token);
EXPECT_EQ(R"(a\tb)", ast->token);
EXPECT_TRUE(ast->nodes.empty());
}
TEST(GeneralTest, Missing_missing_definitions_test) {
parser parser(R"(
A <- B C
@ -1102,3 +1141,95 @@ TEST(GeneralTest, HeuristicErrorTokenTest) {
auto ret = parser.parse("enum sequencer");
EXPECT_FALSE(ret);
}
TEST(GeneralTest, LiteralContentInAST) {
parser parser(R"(
PROGRAM <- STATEMENTS
STATEMENTS <- (STATEMENT ';'?)*
STATEMENT <- ASSIGNMENT / RETURN / EXPRESSION_STATEMENT
ASSIGNMENT <- 'let' IDENTIFIER '=' EXPRESSION
RETURN <- 'return' EXPRESSION
EXPRESSION_STATEMENT <- EXPRESSION
EXPRESSION <- INFIX_EXPR(PREFIX_EXPR, INFIX_OPE)
INFIX_EXPR(ATOM, OPE) <- ATOM (OPE ATOM)* {
precedence
L == !=
L < >
L + -
L * /
}
IF <- 'if' '(' EXPRESSION ')' BLOCK ('else' BLOCK)?
FUNCTION <- 'fn' '(' PARAMETERS ')' BLOCK
PARAMETERS <- LIST(IDENTIFIER, ',')
BLOCK <- '{' STATEMENTS '}'
CALL <- PRIMARY (ARGUMENTS / INDEX)*
ARGUMENTS <- '(' LIST(EXPRESSION, ',') ')'
INDEX <- '[' EXPRESSION ']'
PREFIX_EXPR <- PREFIX_OPE* CALL
PRIMARY <- IF / FUNCTION / ARRAY / HASH / INTEGER / BOOLEAN / NULL / IDENTIFIER / STRING / '(' EXPRESSION ')'
ARRAY <- '[' LIST(EXPRESSION, ',') ']'
HASH <- '{' LIST(HASH_PAIR, ',') '}'
HASH_PAIR <- EXPRESSION ':' EXPRESSION
IDENTIFIER <- < !KEYWORD [a-zA-Z]+ >
INTEGER <- < [0-9]+ >
STRING <- < ["] < (!["] .)* > ["] >
BOOLEAN <- 'true' / 'false'
NULL <- 'null'
PREFIX_OPE <- < [-!] >
INFIX_OPE <- < [-+/*<>] / '==' / '!=' >
KEYWORD <- ('null' | 'true' | 'false' | 'let' | 'return' | 'if' | 'else' | 'fn') ![a-zA-Z]
LIST(ITEM, DELM) <- (ITEM (~DELM ITEM)*)?
LINE_COMMENT <- '//' (!LINE_END .)* &LINE_END
LINE_END <- '\r\n' / '\r' / '\n' / !.
%whitespace <- ([ \t\r\n]+ / LINE_COMMENT)*
%word <- [a-zA-Z]+
)");
parser.enable_ast();
std::shared_ptr<Ast> ast;
auto ret = parser.parse(R"({1: 1, 2: 2, 3: 3})", ast);
EXPECT_TRUE(ret);
auto opt =
AstOptimizer(true, {"EXPRESSION_STATEMENT", "PARAMETERS", "ARGUMENTS",
"INDEX", "RETURN", "BLOCK", "ARRAY", "HASH"});
ast = opt.optimize(ast);
EXPECT_EQ("EXPRESSION_STATEMENT", ast->name);
auto node = ast->nodes[0];
EXPECT_EQ("HASH", node->name);
std::map<std::string, int64_t> expected = {
{"1", 1},
{"2", 2},
{"3", 3},
};
for (auto node : node->nodes) {
auto key = node->nodes[0];
auto val = node->nodes[1];
EXPECT_EQ("INTEGER", key->name);
auto expectedValue = expected[key->token_to_string()];
EXPECT_EQ("INTEGER", val->name);
EXPECT_EQ(expectedValue, val->token_to_number<int64_t>());
}
}