This commit is contained in:
yhirose 2024-03-27 12:49:54 -04:00
parent 6201458f3b
commit 939ccb2af4
3 changed files with 42 additions and 5 deletions

View File

@ -347,6 +347,14 @@ START <- 'This month is ' MONTH '.'
MONTH <- 'Jan' | 'January' | 'Feb' | 'February' | '...' MONTH <- 'Jan' | 'January' | 'Feb' | 'February' | '...'
``` ```
We are able to find which item is matched with `choice()`.
```cpp
parser["MONTH"] = [](const SemanticValues &vs) {
auto id = vs.choice();
};
```
It supports the case insensitive mode. It supports the case insensitive mode.
```peg ```peg

View File

@ -379,6 +379,7 @@ class Trie {
public: public:
Trie(const std::vector<std::string> &items, bool ignore_case) Trie(const std::vector<std::string> &items, bool ignore_case)
: ignore_case_(ignore_case) { : ignore_case_(ignore_case) {
size_t id = 0;
for (const auto &item : items) { for (const auto &item : items) {
for (size_t len = 1; len <= item.size(); len++) { for (size_t len = 1; len <= item.size(); len++) {
auto last = len == item.size(); auto last = len == item.size();
@ -386,17 +387,18 @@ public:
std::string_view sv(s.data(), len); std::string_view sv(s.data(), len);
auto it = dic_.find(sv); auto it = dic_.find(sv);
if (it == dic_.end()) { if (it == dic_.end()) {
dic_.emplace(sv, Info{last, last}); dic_.emplace(sv, Info{last, last, id});
} else if (last) { } else if (last) {
it->second.match = true; it->second.match = true;
} else { } else {
it->second.done = false; it->second.done = false;
} }
} }
id++;
} }
} }
size_t match(const char *text, size_t text_len) const { size_t match(const char *text, size_t text_len, size_t &id) const {
size_t match_len = 0; size_t match_len = 0;
auto done = false; auto done = false;
size_t len = 1; size_t len = 1;
@ -407,7 +409,10 @@ public:
if (it == dic_.end()) { if (it == dic_.end()) {
done = true; done = true;
} else { } else {
if (it->second.match) { match_len = len; } if (it->second.match) {
match_len = len;
id = it->second.id;
}
if (it->second.done) { done = true; } if (it->second.done) { done = true; }
} }
len += 1; len += 1;
@ -415,6 +420,8 @@ public:
return match_len; return match_len;
} }
size_t size() const { return dic_.size(); }
private: private:
std::string to_lower(std::string s) const { std::string to_lower(std::string s) const {
for (char &c : s) { for (char &c : s) {
@ -426,6 +433,7 @@ private:
struct Info { struct Info {
bool done; bool done;
bool match; bool match;
size_t id;
}; };
// TODO: Use unordered_map when heterogeneous lookup is supported in C++20 // TODO: Use unordered_map when heterogeneous lookup is supported in C++20
@ -580,6 +588,7 @@ struct SemanticValues : protected std::vector<std::any> {
private: private:
friend class Context; friend class Context;
friend class Dictionary;
friend class Sequence; friend class Sequence;
friend class PrioritizedChoice; friend class PrioritizedChoice;
friend class Repetition; friend class Repetition;
@ -2673,12 +2682,17 @@ inline size_t Ope::parse(const char *s, size_t n, SemanticValues &vs,
inline size_t Dictionary::parse_core(const char *s, size_t n, inline size_t Dictionary::parse_core(const char *s, size_t n,
SemanticValues &vs, Context &c, SemanticValues &vs, Context &c,
std::any &dt) const { std::any &dt) const {
auto i = trie_.match(s, n); size_t id;
auto i = trie_.match(s, n, id);
if (i == 0) { if (i == 0) {
c.set_error_pos(s); c.set_error_pos(s);
return static_cast<size_t>(-1); return static_cast<size_t>(-1);
} }
vs.choice_count_ = trie_.size();
vs.choice_ = id;
// Word check // Word check
if (c.wordOpe) { if (c.wordOpe) {
auto save_ignore_trace_state = c.ignore_trace_state; auto save_ignore_trace_state = c.ignore_trace_state;
@ -2792,7 +2806,8 @@ inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
auto tok_ptr = dynamic_cast<const peg::TokenBoundary *>(ope_ptr); auto tok_ptr = dynamic_cast<const peg::TokenBoundary *>(ope_ptr);
if (tok_ptr) { ope_ptr = tok_ptr->ope_.get(); } if (tok_ptr) { ope_ptr = tok_ptr->ope_.get(); }
} }
if (!dynamic_cast<const peg::PrioritizedChoice *>(ope_ptr)) { if (!dynamic_cast<const peg::PrioritizedChoice *>(ope_ptr) &&
!dynamic_cast<const peg::Dictionary *>(ope_ptr)) {
chvs.choice_count_ = 0; chvs.choice_count_ = 0;
chvs.choice_ = 0; chvs.choice_ = 0;
} }

View File

@ -1429,6 +1429,20 @@ TEST(DicTest, Dictionary_invalid) {
EXPECT_FALSE(ret); EXPECT_FALSE(ret);
} }
TEST(DicTest, Dictionary_index) {
parser parser(R"(
START <- 'This month is ' MONTH '.'
MONTH <- 'Jan' | 'January' | 'Feb' | 'February'
)");
parser["MONTH"] = [](const SemanticValues &vs) {
EXPECT_EQ("Feb", vs.token());
EXPECT_EQ(2, vs.choice());
};
EXPECT_TRUE(parser.parse("This month is Feb."));
}
TEST(ErrorTest, Default_error_handling_1) { TEST(ErrorTest, Default_error_handling_1) {
parser pg(R"( parser pg(R"(
S <- '@' A B S <- '@' A B