README updated, bracket counting code fixed, added HTTP example

This commit is contained in:
Sergey Lyubka 2013-08-01 10:09:23 +01:00
parent 4b5c44036a
commit de6b3578a1
4 changed files with 94 additions and 53 deletions

View File

@ -1,4 +1,67 @@
slre
====
SLRE: Super Light Regular Expression library
============================================
Super Light Regular Expression library
SLRE is an ISO C library that implements a subset of Perl regular
expression syntax. Main focus of SLRE is small size, [simple
API](https://github.com/cesanta/slre/blob/master/slre.h), clarity of code
and extensibility. It is making it perfect for tasks like parsing network
requests, configuration files, user input, etc, when libraries like
[PCRE](http://pcre.org) are too heavyweight for the given task. Developers in
embedded would benefit most.
Extensibility is another great aspect of SLRE. For example, if one wants to
introduce a new metacharacter, '\i', meaning 'IPv4 address', it is easy to do
so with SLRE.
## Supported Syntax
^ Match beginning of a buffer
$ Match end of a buffer
() Grouping and substring capturing
[...] Match any character from set
[^...] Match any character but ones from set
\s Match whitespace
\S Match non-whitespace
\d Match decimal digit
+ Match one or more times (greedy)
+? Match one or more times (non-greedy)
* Match zero or more times (greedy)
*? Match zero or more times (non-greedy)
? Match zero or once
\xDD Match byte with hex value 0xDD
\meta Match one of the meta character: ^$().[*+\?
x|y Match x or y (alternation operator)
## API
int slre_match(const char *regexp, const char *buf, int buf_len,
struct slre_cap *caps, const char **error_msg);
`slre_match()` matches string buffer `buf` of length `buf_len` against
regular expression `regexp`, which should conform the syntax outlined
above. If regular expression `regexp` contains brackets, `slre_match()`
will capture the respective substrings. Array of captures, `caps`,
must have at least as many elements as number of bracket pairs in the `regexp`.
`slre_match()` returns 0 if there is no match found. Otherwise, it returns
the number scanned bytes from the beginning of the string. This way,
it is easy to do repetitive matches. Hint: if it is required to know
the exact matched substring, enclose `regexp` in a brackets and specify `caps`,
which should be an array of following structures:
struct slre_cap {
const char *ptr; /* Points to the matched fragment */
int len; /* Length of the matched fragment */
};
## Example: parsing HTTP request
const char *error_msg, *request = " GET /index.html HTTP/1.0\r\n\r\n";
struct slre_cap caps[4];
if (slre_match("^\\s*(\\S+)\\s+(\\S+)\\s+HTTP/(\\d)\\.(\\d)",
request, strlen(request), caps, &error_msg)) {
} else {
printf("Error parsing [%s]: [%s]\n", request, error_msg);
}

17
slre.c
View File

@ -157,7 +157,7 @@ static int bar(const char *re, int re_len, const char *s, int s_len,
break;
case '+': case '?': case '*': case '\\': case '(': case ')':
case '^': case '$':
case '^': case '$': case '.': case '[': case ']':
FAIL_IF(re[i + 1] != s[j], static_error_no_match);
j++;
break;
@ -169,19 +169,20 @@ static int bar(const char *re, int re_len, const char *s, int s_len,
break;
case '(':
FAIL_IF(bi + 1 >= info->num_brackets, static_error_internal);
DBG(("CAPTURING [%.*s] [%.*s]\n", info->brackets[bi + 1].len + 2,
bi++;
FAIL_IF(bi >= info->num_brackets, static_error_internal);
DBG(("CAPTURING [%.*s] [%.*s]\n", info->brackets[bi].len + 2,
re + i, s_len - j, s + j));
n = doh(s + j, s_len - j, caps, info, bi + 1);
DBG(("CAPTURED [%.*s] [%.*s]:%d\n", info->brackets[bi + 1].len + 2,
n = doh(s + j, s_len - j, caps, info, bi);
DBG(("CAPTURED [%.*s] [%.*s]:%d\n", info->brackets[bi].len + 2,
re + i, s_len - j, s + j, n));
FAIL_IF(n <= 0, static_error_no_match);
if (caps != NULL) {
caps[bi].ptr = s + j;
caps[bi].len = n;
caps[bi - 1].ptr = s + j;
caps[bi - 1].len = n;
}
j += n;
i += info->brackets[bi + 1].len + 1;
i += info->brackets[bi].len + 1;
break;
case '^':

39
slre.h
View File

@ -26,7 +26,7 @@ extern "C" {
/*
* This is a regular expression library that implements a subset of Perl RE.
* Please refer to http://cesanta.com/docs/slre for detailed reference.
* Please refer to https://github.com/cesanta/slre for detailed reference.
*/
/* This structure describes a matched fragment, a "capture" */
@ -49,43 +49,6 @@ struct slre_cap {
int slre_match(const char *regexp, const char *buf, int buf_len,
struct slre_cap *caps, const char **error_msg);
/*
* Supported syntax:
* ^ Match beginning of a buffer
* $ Match end of a buffer
* () Grouping and substring capturing
* [...] Match any character from set
* [^...] Match any character but ones from set
* \s Match whitespace
* \S Match non-whitespace
* \d Match decimal digit
* \r Match carriage return
* \n Match newline
* + Match one or more times (greedy)
* +? Match one or more times (non-greedy)
* * Match zero or more times (greedy)
* *? Match zero or more times (non-greedy)
* ? Match zero or once
* \xDD Match byte with hex value 0xDD
* \meta Match one of the meta character: ^$().[*+\?
* x|y Match x or y (alternation operator)
* Usage example: parsing HTTP request line.
*
* const char *request = "GET /index.html HTTP/1.0\r\n\r\n";
* struct slre_capture method, uri, version_min, version_maj;
*
* error = slre_match("^\\s*(GET|POST)\\s+(\\S+)\\s+HTTP/(\\d)\\.(\\d)",
* request, strlen(request),
* &method, &uri, &version_min, &version_maj);
*
* if (error != NULL) {
* printf("Error parsing HTTP request: %s\n", error);
* } else {
* printf("Requested URI: [%.*s]\n", uri.len, uri.ptr);
* }
*/
#ifdef __cplusplus
}
#endif

View File

@ -94,6 +94,7 @@ int main(void) {
ASSERT(memcmp(caps[0].ptr, "12", 2) == 0);
ASSERT(slre_match("(.*(2.))", "123", 3, caps, &msg) == 3);
ASSERT(slre_match("(.)(.)", "123", 3, caps, &msg) == 2);
ASSERT(slre_match("(\\d+)\\s+(\\S+)", "12 hi", 5, caps, &msg) == 5);
/* Greedy vs non-greedy */
ASSERT(slre_match(".+c", "abcabc", 6, NULL, &msg) == 6);
@ -113,18 +114,31 @@ int main(void) {
ASSERT(memcmp(caps[0].ptr, "bc", 2) == 0);
ASSERT(slre_match("(\\S+)\\s+(\\S+)\\s+HTTP/(\\d)", "POST /x HTTP/1.1", 16,
caps, &msg) == 16);
#if 0
/* HTTP request */
ASSERT(slre_match("(\\S+)\\s+(\\S+)\\s+HTTP/(\\d)",
"POST /x HTTP/1.1", 16, caps, &msg) == 14);
{
static const char *req = "POST /x HTTP/1.0\r\n\r\nPOST DATA";
int len = strlen(req);
ASSERT(slre_match("((\\S+)\\s+(\\S+)\\s+HTTP/(\\d)\\.(\\d)\r\n\r\n(.*))",
req, len, caps, &msg) == len);
}
#endif
/* Examples */
{
const char *error_msg, *request = " GET /index.html HTTP/1.0\r\n\r\n";
struct slre_cap caps[4];
if (slre_match("^\\s*(\\S+)\\s+(\\S+)\\s+HTTP/(\\d)\\.(\\d)",
request, strlen(request), caps, &error_msg)) {
printf("Method: [%.*s], URI: [%.*s]\n",
caps[0].len, caps[0].ptr,
caps[1].len, caps[1].ptr);
} else {
printf("Error parsing [%s]: [%s]\n", request, error_msg);
}
}
printf("Unit test %s (total test: %d, failed tests: %d)\n",
static_failed_tests > 0 ? "FAILED" : "PASSED",