diff --git a/README.md b/README.md index cbbcab6..1e7f92c 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ The PEG syntax is well described on page 2 in the [document](http://pdos.csail.m * `<` ... `>` (Anchor operator) * `$<` ... `>` (Capture operator) * `~` (Ignore operator) + * `\x??` (Hex number char) How to use ---------- diff --git a/peglib.h b/peglib.h index 56bf2d6..23956a9 100644 --- a/peglib.h +++ b/peglib.h @@ -1119,8 +1119,9 @@ private: g["Range"] <= cho(seq(g["Char"], chr('-'), g["Char"]), g["Char"]); g["Char"] <= cho(seq(chr('\\'), cls("nrt'\"[]\\")), - seq(chr('\\'), cls("0-2"), cls("0-7"), cls("0-7")), // TODO: 0-2 should be 0-3. bug in the spec... + seq(chr('\\'), cls("0-3"), cls("0-7"), cls("0-7")), seq(chr('\\'), cls("0-7"), opt(cls("0-7"))), + seq(lit("\\x"), cls("0-9a-fA-F"), opt(cls("0-9a-fA-F"))), seq(npd(chr('\\')), dot())); g["LEFTARROW"] <= seq(lit("<-"), g["Spacing"]); @@ -1342,6 +1343,58 @@ private: return cxt.grammar; } + bool is_hex(char c, int& v) { + if ('0' <= c && c <= '9') { + v = c - '0'; + return true; + } else if ('a' <= c && c <= 'f') { + v = c - 'a' + 10; + return true; + } else if ('A' <= c && c <= 'F') { + v = c - 'A' + 10; + return true; + } + return false; + } + + bool is_digit(char c, int& v) { + if ('0' <= c && c <= '9') { + v = c - '0'; + return true; + } + return false; + } + + std::tuple parse_hex_number(const char* s, size_t l, size_t i) { + char ret = 0; + int n; + if (i < l && is_hex(s[i], n)) { + ret = n; + if (i + 1 < l && is_hex(s[i + 1], n)) { + ret = ret * 16 + n; + i++; + } + } + return std::make_tuple(ret, i); + } + + std::tuple parse_octal_number(const char* s, size_t l, size_t i) { + char ret = 0; + int n; + if (i < l && is_digit(s[i], n)) { + ret = n; + if (i + 1 < l && is_digit(s[i + 1], n)) { + ret = ret * 8 + n; + i++; + if (i + 1 < l && is_digit(s[i + 1], n)) { + ret = ret * 8 + n; + i++; + } + } + } + return std::make_tuple(ret, i); + } + std::string resolve_escape_sequence(const char* s, size_t l) { std::string r; r.reserve(l); @@ -1359,9 +1412,13 @@ private: case '[': r += '['; break; case ']': r += ']'; break; case '\\': r += '\\'; break; + case 'x': { + std::tie(ch, i) = parse_hex_number(s, l, i + 1); + r += ch; + break; + } default: { - // TODO: Octal number support - assert(false); + std::tie(ch, i) = parse_octal_number(s, l, i + 1); break; } } diff --git a/test/test.cc b/test/test.cc index 04c9e3c..250969b 100644 --- a/test/test.cc +++ b/test/test.cc @@ -183,6 +183,17 @@ TEST_CASE("Backtracking test", "[general]") REQUIRE(count == 2); } +TEST_CASE("Octal/Hex value test", "[general]") +{ + peglib::peg parser( + " ROOT <- '\132\x7a' " + ); + + auto ret = parser.parse("Zz"); + + REQUIRE(ret == true); +} + TEST_CASE("mutable lambda test", "[general]") { vector vec; @@ -539,8 +550,8 @@ TEST_CASE("PEG Char", "[peg]") REQUIRE(exact(g, "Char", "\\]") == true); REQUIRE(exact(g, "Char", "\\\\") == true); REQUIRE(exact(g, "Char", "\\000") == true); - REQUIRE(exact(g, "Char", "\\277") == true); - REQUIRE(exact(g, "Char", "\\377") == false); + REQUIRE(exact(g, "Char", "\\377") == true); + REQUIRE(exact(g, "Char", "\\477") == false); REQUIRE(exact(g, "Char", "\\087") == false); REQUIRE(exact(g, "Char", "\\079") == false); REQUIRE(exact(g, "Char", "\\00") == true);