From f48c4821f7f41f9547c53a4004f3c53f213147c4 Mon Sep 17 00:00:00 2001 From: Sergey Lyubka Date: Wed, 31 Jul 2013 08:56:42 +0100 Subject: [PATCH] Added handling of "*" quantifier and non-greedy match --- slre.c | 14 +++++++++----- unit_test.c | 10 ++++++++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/slre.c b/slre.c index ac2fb5f..f51ec05 100644 --- a/slre.c +++ b/slre.c @@ -74,8 +74,7 @@ struct regex_info { }; static int get_op_len(const char *re) { - return re[0] == '\\' ? 2 : - (re[0] == '*' || re[0] == '+') && re[1] == '?' ? 2 : 1; + return re[0] == '\\' ? 2 : 1; } static int is_quantifier(const char *re) { @@ -123,11 +122,15 @@ static int m1(const char *re, int re_len, const char *s, int s_len, j += m1(re + i, step, s + j, s_len - j, caps, info); i++; continue; - } else if (re[i + step] == '+') { - int j2 = j, nj = 0, n1, n2, ni, next_step; + } else if (re[i + step] == '+' || re[i + step] == '*') { + int j2 = j, nj = 0, n1, n2, ni, next_step, non_greedy = 0; /* Points to the regexp code after the quantifier */ next_step = get_op_len(re + i + step); + if (i + step + 1 < re_len && re[i + step + 1] == '?') { + non_greedy = 1; + next_step++; + } ni = i + step + next_step; while ((n1 = m1(re + i, step, s + j2, s_len - j2, caps, info)) > 0) { @@ -138,9 +141,10 @@ static int m1(const char *re, int re_len, const char *s, int s_len, s_len - (j2 + n1), caps, info)) > 0) { nj = j2 + n1 + n2; } + if (nj > 0 && non_greedy) break; j2 += n1; } - FAIL_IF(nj == 0, static_error_no_match); + FAIL_IF(re[i + step] == '+' && nj == 0, static_error_no_match); return nj; } } diff --git a/unit_test.c b/unit_test.c index 373a127..ff91f46 100644 --- a/unit_test.c +++ b/unit_test.c @@ -68,6 +68,7 @@ int main(void) { ASSERT(slre_match("()+", "fooklmn", 7, NULL, &msg) == 0); ASSERT(strcmp(msg, static_error_no_match) == 0); + /* Balancing brackets */ ASSERT(slre_match("(x))", "fooklmn", 7, NULL, &msg) == 0); ASSERT(strcmp(msg, static_error_unbalanced_brackets) == 0); ASSERT(slre_match("(", "fooklmn", 7, NULL, &msg) == 0); @@ -76,9 +77,18 @@ int main(void) { ASSERT(slre_match("klz?mn", "fooklmn", 7, NULL, &msg) == 7); ASSERT(slre_match("fa?b", "fooklmn", 7, NULL, &msg) == 0); + /* Brackets & capturing */ ASSERT(slre_match("^(te)", "tenacity subdues all", 20, NULL, &msg) == 2); ASSERT(slre_match("(bc)", "abcdef", 6, NULL, &msg) == 3); ASSERT(slre_match(".(d.)", "abcdef", 6, NULL, &msg) == 5); + ASSERT(slre_match(".(d.)\\)?", "abcdef", 6, NULL, &msg) == 5); + + /* Greedy vs non-greedy */ + ASSERT(slre_match(".+c", "abcabc", 6, NULL, &msg) == 6); + ASSERT(slre_match(".+?c", "abcabc", 6, NULL, &msg) == 3); + ASSERT(slre_match(".*?c", "abcabc", 6, NULL, &msg) == 3); + ASSERT(slre_match(".*c", "abcabc", 6, NULL, &msg) == 6); + ASSERT(slre_match("bc.d?k?b+", "abcabc", 6, NULL, &msg) == 5); printf("Unit test %s (total test: %d, failed tests: %d)\n", static_failed_tests > 0 ? "FAILED" : "PASSED",