Added case insensitive flag

pull/1/head
Sergey Lyubka 11 years ago
parent 7f8fbe5680
commit c59863843b
  1. 30
      README.md
  2. 54
      slre.c

@ -21,21 +21,21 @@ heavyweight for the given task. Developers of embedded systems would benefit
most.
## Supported Syntax
^ Match beginning of a buffer
$ Match end of a buffer
() Grouping and substring capturing
\s Match whitespace
\S Match non-whitespace
\d Match decimal digit
+ Match one or more times (greedy)
+? Match one or more times (non-greedy)
* Match zero or more times (greedy)
*? Match zero or more times (non-greedy)
? Match zero or once
x|y Match x or y (alternation operator)
\meta Match one of the meta character: ^$().[]*+?|\
\xHH Match byte with hex value 0xHH
(?i) Must be at the beginning of the regex. Makes match case-insensitive
^ Match beginning of a buffer
$ Match end of a buffer
() Grouping and substring capturing
\s Match whitespace
\S Match non-whitespace
\d Match decimal digit
+ Match one or more times (greedy)
+? Match one or more times (non-greedy)
* Match zero or more times (greedy)
*? Match zero or more times (non-greedy)
? Match zero or once
x|y Match x or y (alternation operator)
\meta Match one of the meta character: ^$().[]*+?|\
\xHH Match byte with hex value 0xHH
Not yet supported but in progress:

@ -97,22 +97,21 @@ static int bar(const char *re, int re_len, const char *s, int s_len,
/* i is offset in re, j is offset in s, bi is brackets index */
int i, j, n, step;
(void) caps;
DBG(("%s [%.*s] [%.*s]\n", __func__, re_len, re, s_len, s));
for (i = j = 0; i < re_len && j < s_len; i += step) {
step = get_op_len(re + i);
DBG(("%s [%.*s] [%.*s] re_len=%d step=%d i=%d j=%d\n",
__func__, re_len - i, re + i,
s_len - j, s + j, re_len, step, i, j));
/* Handle quantifiers. Get the length of the chunk. */
step = re[i] == '(' ? info->brackets[bi + 1].len + 2: get_op_len(re + i);
DBG(("%s [%.*s] [%.*s] re_len=%d step=%d i=%d j=%d\n", __func__,
re_len - i, re + i, s_len - j, s + j, re_len, step, i, j));
FAIL_IF(is_quantifier(&re[i]), static_error_unexpected_quantifier);
FAIL_IF(step <= 0, static_error_internal);
/* Handle quantifiers. Look ahead. */
if (i + step < re_len && is_quantifier(re + i + step)) {
DBG(("QUANTIFIER: [%.*s] %c\n", step, re + i, re[i + step]));
if (re[i + step] == '?') {
j += bar(re + i, step, s + j, s_len - j, caps, info, bi);
i++;
@ -145,6 +144,7 @@ static int bar(const char *re, int re_len, const char *s, int s_len,
}
}
/* Quantifiers handled. Process the rest. */
switch (re[i]) {
case '\\':
/* Metacharacters */
@ -191,18 +191,15 @@ static int bar(const char *re, int re_len, const char *s, int s_len,
case '(':
bi++;
FAIL_IF(bi >= info->num_brackets, static_error_internal);
DBG(("CAPTURING [%.*s] [%.*s]\n", info->brackets[bi].len + 2,
re + i, s_len - j, s + j));
DBG(("CAPTURING [%.*s] [%.*s]\n", step, re + i, s_len - j, s + j));
n = doh(s + j, s_len - j, caps, info, bi);
DBG(("CAPTURED [%.*s] [%.*s]:%d\n", info->brackets[bi].len + 2,
re + i, s_len - j, s + j, n));
FAIL_IF(n <= 0, static_error_no_match);
DBG(("CAPTURED [%.*s] [%.*s]:%d\n", step, re + i, s_len - j, s + j, n));
FAIL_IF(n <= 0, info->error_msg);
if (caps != NULL) {
caps[bi - 1].ptr = s + j;
caps[bi - 1].len = n;
}
j += n;
i += info->brackets[bi].len + 1;
break;
case '^':
@ -223,7 +220,12 @@ static int bar(const char *re, int re_len, const char *s, int s_len,
break;
default:
FAIL_IF(re[i] != s[j], static_error_no_match);
if (info->flags & IGNORE_CASE) {
FAIL_IF(tolower(((unsigned char *) re)[i]) !=
tolower(((unsigned char *) s)[j]), static_error_no_match);
} else {
FAIL_IF(re[i] != s[j], static_error_no_match);
}
j++;
break;
}
@ -340,7 +342,7 @@ static int foo(const char *re, int re_len, const char *s, int s_len,
result = 0;
for (i = 0; i < s_len; i++) {
result = doh(s + i, s_len - i, caps, info, 0);
DBG((" (iter) -> %d [%.*s] [%.*s] [%s]\n", result, re_len, re,
DBG((" (iter %d) -> %d [%.*s] [%.*s] [%s]\n", i, result, re_len, re,
s_len - i, s + i, info->error_msg));
if (result > 0 || re[0] == '^') {
result += i;
@ -361,6 +363,13 @@ int slre_match(const char *regexp, const char *s, int s_len,
info.error_msg = static_error_no_match;
DBG(("========================> [%s] [%.*s]\n", regexp, s_len, s));
/* Handle regexp flags. At the moment, only 'i' is supported */
if (memcmp(regexp, "(?i)", 4) == 0) {
info.flags |= IGNORE_CASE;
regexp += 4;
}
result = foo(regexp, strlen(regexp), s, s_len, caps, &info);
if (error_msg != NULL) {
@ -420,11 +429,15 @@ int main(void) {
const char *msg = "";
struct slre_cap caps[10];
#if 0
#endif
/* Flags - case sensitivity */
ASSERT(slre_match("FO", "foo", 3, NULL, &msg) == 0);
ASSERT(slre_match("(?i)FO", "foo", 3, NULL, &msg) == 2);
ASSERT(slre_match("(?m)FO", "foo", 3, NULL, &msg) == 0);
ASSERT(slre_match("(?m)x", "foo", 3, NULL, &msg) == 0);
ASSERT(strcmp(msg, static_error_unexpected_quantifier) == 0);
ASSERT(slre_match("fo", "foo", 3, NULL, &msg) == 2);
ASSERT(slre_match(".+", "foo", 3, NULL, &msg) == 3);
ASSERT(slre_match(".+k", "fooklmn", 7, NULL, &msg) == 4);
ASSERT(slre_match(".+k.", "fooklmn", 7, NULL, &msg) == 5);
ASSERT(slre_match("p+", "fooklmn", 7, NULL, &msg) == 0);
@ -480,6 +493,11 @@ int main(void) {
ASSERT(slre_match("(.*(2.))", "123", 3, caps, &msg) == 3);
ASSERT(slre_match("(.)(.)", "123", 3, caps, &msg) == 2);
ASSERT(slre_match("(\\d+)\\s+(\\S+)", "12 hi", 5, caps, &msg) == 5);
ASSERT(slre_match("ab(cd)+ef", "abcdcdef", 8, NULL, &msg) == 8);
ASSERT(slre_match("ab(cd)*ef", "abcdcdef", 8, NULL, &msg) == 8);
ASSERT(slre_match("ab(cd)+?ef", "abcdcdef", 8, NULL, &msg) == 8);
ASSERT(slre_match("ab(cd)+?.", "abcdcdef", 8, NULL, &msg) == 5);
ASSERT(slre_match("ab(cd)?", "abcdcdef", 8, NULL, &msg) == 4);
/* Greedy vs non-greedy */
ASSERT(slre_match(".+c", "abcabc", 6, NULL, &msg) == 6);

Loading…
Cancel
Save