mirror of
https://github.com/yhirose/cpp-peglib.git
synced 2024-12-22 11:55:30 +00:00
Fix #286
This commit is contained in:
parent
4e305b423b
commit
6201458f3b
@ -347,6 +347,13 @@ START <- 'This month is ' MONTH '.'
|
|||||||
MONTH <- 'Jan' | 'January' | 'Feb' | 'February' | '...'
|
MONTH <- 'Jan' | 'January' | 'Feb' | 'February' | '...'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
It supports the case insensitive mode.
|
||||||
|
|
||||||
|
```peg
|
||||||
|
START <- 'This month is ' MONTH '.'
|
||||||
|
MONTH <- 'Jan'i | 'January'i | 'Feb'i | 'February'i | '...'i
|
||||||
|
```
|
||||||
|
|
||||||
Cut operator
|
Cut operator
|
||||||
------------
|
------------
|
||||||
|
|
||||||
|
67
peglib.h
67
peglib.h
@ -377,14 +377,13 @@ template <typename T> T token_to_number_(std::string_view sv) {
|
|||||||
|
|
||||||
class Trie {
|
class Trie {
|
||||||
public:
|
public:
|
||||||
Trie() = default;
|
Trie(const std::vector<std::string> &items, bool ignore_case)
|
||||||
Trie(const Trie &) = default;
|
: ignore_case_(ignore_case) {
|
||||||
|
|
||||||
Trie(const std::vector<std::string> &items) {
|
|
||||||
for (const auto &item : items) {
|
for (const auto &item : items) {
|
||||||
for (size_t len = 1; len <= item.size(); len++) {
|
for (size_t len = 1; len <= item.size(); len++) {
|
||||||
auto last = len == item.size();
|
auto last = len == item.size();
|
||||||
std::string_view sv(item.data(), len);
|
const auto &s = ignore_case ? to_lower(item) : item;
|
||||||
|
std::string_view sv(s.data(), len);
|
||||||
auto it = dic_.find(sv);
|
auto it = dic_.find(sv);
|
||||||
if (it == dic_.end()) {
|
if (it == dic_.end()) {
|
||||||
dic_.emplace(sv, Info{last, last});
|
dic_.emplace(sv, Info{last, last});
|
||||||
@ -402,7 +401,8 @@ public:
|
|||||||
auto done = false;
|
auto done = false;
|
||||||
size_t len = 1;
|
size_t len = 1;
|
||||||
while (!done && len <= text_len) {
|
while (!done && len <= text_len) {
|
||||||
std::string_view sv(text, len);
|
const auto &s = ignore_case_ ? to_lower(text) : std::string(text);
|
||||||
|
std::string_view sv(s.data(), len);
|
||||||
auto it = dic_.find(sv);
|
auto it = dic_.find(sv);
|
||||||
if (it == dic_.end()) {
|
if (it == dic_.end()) {
|
||||||
done = true;
|
done = true;
|
||||||
@ -416,6 +416,13 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
std::string to_lower(std::string s) const {
|
||||||
|
for (char &c : s) {
|
||||||
|
c = std::tolower(c);
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
struct Info {
|
struct Info {
|
||||||
bool done;
|
bool done;
|
||||||
bool match;
|
bool match;
|
||||||
@ -424,6 +431,8 @@ private:
|
|||||||
// TODO: Use unordered_map when heterogeneous lookup is supported in C++20
|
// TODO: Use unordered_map when heterogeneous lookup is supported in C++20
|
||||||
// std::unordered_map<std::string, Info> dic_;
|
// std::unordered_map<std::string, Info> dic_;
|
||||||
std::map<std::string, Info, std::less<>> dic_;
|
std::map<std::string, Info, std::less<>> dic_;
|
||||||
|
|
||||||
|
bool ignore_case_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*-----------------------------------------------------------------------------
|
/*-----------------------------------------------------------------------------
|
||||||
@ -1159,7 +1168,8 @@ public:
|
|||||||
|
|
||||||
class Dictionary : public Ope, public std::enable_shared_from_this<Dictionary> {
|
class Dictionary : public Ope, public std::enable_shared_from_this<Dictionary> {
|
||||||
public:
|
public:
|
||||||
Dictionary(const std::vector<std::string> &v) : trie_(v) {}
|
Dictionary(const std::vector<std::string> &v, bool ignore_case)
|
||||||
|
: trie_(v, ignore_case) {}
|
||||||
|
|
||||||
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
|
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
|
||||||
std::any &dt) const override;
|
std::any &dt) const override;
|
||||||
@ -1568,8 +1578,9 @@ inline std::shared_ptr<Ope> npd(const std::shared_ptr<Ope> &ope) {
|
|||||||
return std::make_shared<NotPredicate>(ope);
|
return std::make_shared<NotPredicate>(ope);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::shared_ptr<Ope> dic(const std::vector<std::string> &v) {
|
inline std::shared_ptr<Ope> dic(const std::vector<std::string> &v,
|
||||||
return std::make_shared<Dictionary>(v);
|
bool ignore_case) {
|
||||||
|
return std::make_shared<Dictionary>(v, ignore_case);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::shared_ptr<Ope> lit(std::string &&s) {
|
inline std::shared_ptr<Ope> lit(std::string &&s) {
|
||||||
@ -3335,16 +3346,17 @@ private:
|
|||||||
seq(g["Suffix"], opt(seq(g["LABEL"], g["Identifier"])));
|
seq(g["Suffix"], opt(seq(g["LABEL"], g["Identifier"])));
|
||||||
g["Suffix"] <= seq(g["Primary"], opt(g["Loop"]));
|
g["Suffix"] <= seq(g["Primary"], opt(g["Loop"]));
|
||||||
g["Loop"] <= cho(g["QUESTION"], g["STAR"], g["PLUS"], g["Repetition"]);
|
g["Loop"] <= cho(g["QUESTION"], g["STAR"], g["PLUS"], g["Repetition"]);
|
||||||
g["Primary"] <=
|
g["Primary"] <= cho(seq(g["Ignore"], g["IdentCont"], g["Arguments"],
|
||||||
cho(seq(g["Ignore"], g["IdentCont"], g["Arguments"],
|
npd(g["LEFTARROW"])),
|
||||||
npd(g["LEFTARROW"])),
|
seq(g["Ignore"], g["Identifier"],
|
||||||
seq(g["Ignore"], g["Identifier"],
|
npd(seq(opt(g["Parameters"]), g["LEFTARROW"]))),
|
||||||
npd(seq(opt(g["Parameters"]), g["LEFTARROW"]))),
|
seq(g["OPEN"], g["Expression"], g["CLOSE"]),
|
||||||
seq(g["OPEN"], g["Expression"], g["CLOSE"]),
|
seq(g["BeginTok"], g["Expression"], g["EndTok"]),
|
||||||
seq(g["BeginTok"], g["Expression"], g["EndTok"]), g["CapScope"],
|
g["CapScope"],
|
||||||
seq(g["BeginCap"], g["Expression"], g["EndCap"]), g["BackRef"],
|
seq(g["BeginCap"], g["Expression"], g["EndCap"]),
|
||||||
g["LiteralI"], g["Dictionary"], g["Literal"], g["NegatedClassI"],
|
g["BackRef"], g["DictionaryI"], g["LiteralI"],
|
||||||
g["NegatedClass"], g["ClassI"], g["Class"], g["DOT"]);
|
g["Dictionary"], g["Literal"], g["NegatedClassI"],
|
||||||
|
g["NegatedClass"], g["ClassI"], g["Class"], g["DOT"]);
|
||||||
|
|
||||||
g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
|
g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
|
||||||
g["IdentCont"] <= tok(seq(g["IdentStart"], zom(g["IdentRest"])));
|
g["IdentCont"] <= tok(seq(g["IdentStart"], zom(g["IdentRest"])));
|
||||||
@ -3358,6 +3370,9 @@ private:
|
|||||||
|
|
||||||
g["Dictionary"] <= seq(g["LiteralD"], oom(seq(g["PIPE"], g["LiteralD"])));
|
g["Dictionary"] <= seq(g["LiteralD"], oom(seq(g["PIPE"], g["LiteralD"])));
|
||||||
|
|
||||||
|
g["DictionaryI"] <=
|
||||||
|
seq(g["LiteralID"], oom(seq(g["PIPE"], g["LiteralID"])));
|
||||||
|
|
||||||
auto lit_ope = cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))),
|
auto lit_ope = cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))),
|
||||||
cls("'"), g["Spacing"]),
|
cls("'"), g["Spacing"]),
|
||||||
seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))),
|
seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))),
|
||||||
@ -3365,11 +3380,13 @@ private:
|
|||||||
g["Literal"] <= lit_ope;
|
g["Literal"] <= lit_ope;
|
||||||
g["LiteralD"] <= lit_ope;
|
g["LiteralD"] <= lit_ope;
|
||||||
|
|
||||||
g["LiteralI"] <=
|
auto lit_case_ignore_ope =
|
||||||
cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), lit("'i"),
|
cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), lit("'i"),
|
||||||
g["Spacing"]),
|
g["Spacing"]),
|
||||||
seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), lit("\"i"),
|
seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), lit("\"i"),
|
||||||
g["Spacing"]));
|
g["Spacing"]));
|
||||||
|
g["LiteralI"] <= lit_case_ignore_ope;
|
||||||
|
g["LiteralID"] <= lit_case_ignore_ope;
|
||||||
|
|
||||||
// NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
|
// NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
|
||||||
g["Class"] <= seq(chr('['), npd(chr('^')),
|
g["Class"] <= seq(chr('['), npd(chr('^')),
|
||||||
@ -3720,7 +3737,11 @@ private:
|
|||||||
|
|
||||||
g["Dictionary"] = [](const SemanticValues &vs) {
|
g["Dictionary"] = [](const SemanticValues &vs) {
|
||||||
auto items = vs.transform<std::string>();
|
auto items = vs.transform<std::string>();
|
||||||
return dic(items);
|
return dic(items, false);
|
||||||
|
};
|
||||||
|
g["DictionaryI"] = [](const SemanticValues &vs) {
|
||||||
|
auto items = vs.transform<std::string>();
|
||||||
|
return dic(items, true);
|
||||||
};
|
};
|
||||||
|
|
||||||
g["Literal"] = [](const SemanticValues &vs) {
|
g["Literal"] = [](const SemanticValues &vs) {
|
||||||
@ -3735,6 +3756,10 @@ private:
|
|||||||
auto &tok = vs.tokens.front();
|
auto &tok = vs.tokens.front();
|
||||||
return resolve_escape_sequence(tok.data(), tok.size());
|
return resolve_escape_sequence(tok.data(), tok.size());
|
||||||
};
|
};
|
||||||
|
g["LiteralID"] = [](const SemanticValues &vs) {
|
||||||
|
auto &tok = vs.tokens.front();
|
||||||
|
return resolve_escape_sequence(tok.data(), tok.size());
|
||||||
|
};
|
||||||
|
|
||||||
g["Class"] = [](const SemanticValues &vs) {
|
g["Class"] = [](const SemanticValues &vs) {
|
||||||
auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
|
auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
|
||||||
|
@ -374,6 +374,28 @@ TEST(GeneralTest, Word_expression_test_Dictionary) {
|
|||||||
EXPECT_TRUE(parser.parse("toa"));
|
EXPECT_TRUE(parser.parse("toa"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(GeneralTest, Word_expression_case_ignore_test_Dictionary) {
|
||||||
|
parser parser(R"(
|
||||||
|
Identifier ← < !Keyword [a-z][a-z]* >
|
||||||
|
Keyword ← 'def'i | 'to'i
|
||||||
|
%whitespace ← [ \t\r\n]*
|
||||||
|
%word ← [a-z]+
|
||||||
|
)");
|
||||||
|
|
||||||
|
EXPECT_TRUE(parser.parse("toa"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(GeneralTest, Word_expression_syntax_error_test_Dictionary) {
|
||||||
|
parser parser(R"(
|
||||||
|
Identifier ← < !Keyword [a-z][a-z]* >
|
||||||
|
Keyword ← 'def' | 'to'i
|
||||||
|
%whitespace ← [ \t\r\n]*
|
||||||
|
%word ← [a-z]+
|
||||||
|
)");
|
||||||
|
|
||||||
|
EXPECT_FALSE(parser);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(GeneralTest, Skip_token_test) {
|
TEST(GeneralTest, Skip_token_test) {
|
||||||
parser parser(" ROOT <- _ ITEM (',' _ ITEM _)* "
|
parser parser(" ROOT <- _ ITEM (',' _ ITEM _)* "
|
||||||
" ITEM <- ([a-z0-9])+ "
|
" ITEM <- ([a-z0-9])+ "
|
||||||
|
Loading…
Reference in New Issue
Block a user