mirror of
https://github.com/cesanta/slre.git
synced 2025-03-12 16:55:30 +00:00
README updated, bracket counting code fixed, added HTTP example
This commit is contained in:
parent
4b5c44036a
commit
de6b3578a1
69
README.md
69
README.md
@ -1,4 +1,67 @@
|
||||
slre
|
||||
====
|
||||
SLRE: Super Light Regular Expression library
|
||||
============================================
|
||||
|
||||
Super Light Regular Expression library
|
||||
SLRE is an ISO C library that implements a subset of Perl regular
|
||||
expression syntax. Main focus of SLRE is small size, [simple
|
||||
API](https://github.com/cesanta/slre/blob/master/slre.h), clarity of code
|
||||
and extensibility. It is making it perfect for tasks like parsing network
|
||||
requests, configuration files, user input, etc, when libraries like
|
||||
[PCRE](http://pcre.org) are too heavyweight for the given task. Developers in
|
||||
embedded would benefit most.
|
||||
|
||||
Extensibility is another great aspect of SLRE. For example, if one wants to
|
||||
introduce a new metacharacter, '\i', meaning 'IPv4 address', it is easy to do
|
||||
so with SLRE.
|
||||
|
||||
## Supported Syntax
|
||||
|
||||
^ Match beginning of a buffer
|
||||
$ Match end of a buffer
|
||||
() Grouping and substring capturing
|
||||
[...] Match any character from set
|
||||
[^...] Match any character but ones from set
|
||||
\s Match whitespace
|
||||
\S Match non-whitespace
|
||||
\d Match decimal digit
|
||||
+ Match one or more times (greedy)
|
||||
+? Match one or more times (non-greedy)
|
||||
* Match zero or more times (greedy)
|
||||
*? Match zero or more times (non-greedy)
|
||||
? Match zero or once
|
||||
\xDD Match byte with hex value 0xDD
|
||||
\meta Match one of the meta character: ^$().[*+\?
|
||||
x|y Match x or y (alternation operator)
|
||||
|
||||
## API
|
||||
|
||||
int slre_match(const char *regexp, const char *buf, int buf_len,
|
||||
struct slre_cap *caps, const char **error_msg);
|
||||
|
||||
|
||||
`slre_match()` matches string buffer `buf` of length `buf_len` against
|
||||
regular expression `regexp`, which should conform the syntax outlined
|
||||
above. If regular expression `regexp` contains brackets, `slre_match()`
|
||||
will capture the respective substrings. Array of captures, `caps`,
|
||||
must have at least as many elements as number of bracket pairs in the `regexp`.
|
||||
|
||||
`slre_match()` returns 0 if there is no match found. Otherwise, it returns
|
||||
the number scanned bytes from the beginning of the string. This way,
|
||||
it is easy to do repetitive matches. Hint: if it is required to know
|
||||
the exact matched substring, enclose `regexp` in a brackets and specify `caps`,
|
||||
which should be an array of following structures:
|
||||
|
||||
struct slre_cap {
|
||||
const char *ptr; /* Points to the matched fragment */
|
||||
int len; /* Length of the matched fragment */
|
||||
};
|
||||
|
||||
## Example: parsing HTTP request
|
||||
|
||||
const char *error_msg, *request = " GET /index.html HTTP/1.0\r\n\r\n";
|
||||
struct slre_cap caps[4];
|
||||
|
||||
if (slre_match("^\\s*(\\S+)\\s+(\\S+)\\s+HTTP/(\\d)\\.(\\d)",
|
||||
request, strlen(request), caps, &error_msg)) {
|
||||
} else {
|
||||
printf("Error parsing [%s]: [%s]\n", request, error_msg);
|
||||
}
|
||||
|
17
slre.c
17
slre.c
@ -157,7 +157,7 @@ static int bar(const char *re, int re_len, const char *s, int s_len,
|
||||
break;
|
||||
|
||||
case '+': case '?': case '*': case '\\': case '(': case ')':
|
||||
case '^': case '$':
|
||||
case '^': case '$': case '.': case '[': case ']':
|
||||
FAIL_IF(re[i + 1] != s[j], static_error_no_match);
|
||||
j++;
|
||||
break;
|
||||
@ -169,19 +169,20 @@ static int bar(const char *re, int re_len, const char *s, int s_len,
|
||||
break;
|
||||
|
||||
case '(':
|
||||
FAIL_IF(bi + 1 >= info->num_brackets, static_error_internal);
|
||||
DBG(("CAPTURING [%.*s] [%.*s]\n", info->brackets[bi + 1].len + 2,
|
||||
bi++;
|
||||
FAIL_IF(bi >= info->num_brackets, static_error_internal);
|
||||
DBG(("CAPTURING [%.*s] [%.*s]\n", info->brackets[bi].len + 2,
|
||||
re + i, s_len - j, s + j));
|
||||
n = doh(s + j, s_len - j, caps, info, bi + 1);
|
||||
DBG(("CAPTURED [%.*s] [%.*s]:%d\n", info->brackets[bi + 1].len + 2,
|
||||
n = doh(s + j, s_len - j, caps, info, bi);
|
||||
DBG(("CAPTURED [%.*s] [%.*s]:%d\n", info->brackets[bi].len + 2,
|
||||
re + i, s_len - j, s + j, n));
|
||||
FAIL_IF(n <= 0, static_error_no_match);
|
||||
if (caps != NULL) {
|
||||
caps[bi].ptr = s + j;
|
||||
caps[bi].len = n;
|
||||
caps[bi - 1].ptr = s + j;
|
||||
caps[bi - 1].len = n;
|
||||
}
|
||||
j += n;
|
||||
i += info->brackets[bi + 1].len + 1;
|
||||
i += info->brackets[bi].len + 1;
|
||||
break;
|
||||
|
||||
case '^':
|
||||
|
39
slre.h
39
slre.h
@ -26,7 +26,7 @@ extern "C" {
|
||||
|
||||
/*
|
||||
* This is a regular expression library that implements a subset of Perl RE.
|
||||
* Please refer to http://cesanta.com/docs/slre for detailed reference.
|
||||
* Please refer to https://github.com/cesanta/slre for detailed reference.
|
||||
*/
|
||||
|
||||
/* This structure describes a matched fragment, a "capture" */
|
||||
@ -49,43 +49,6 @@ struct slre_cap {
|
||||
int slre_match(const char *regexp, const char *buf, int buf_len,
|
||||
struct slre_cap *caps, const char **error_msg);
|
||||
|
||||
/*
|
||||
* Supported syntax:
|
||||
* ^ Match beginning of a buffer
|
||||
* $ Match end of a buffer
|
||||
* () Grouping and substring capturing
|
||||
* [...] Match any character from set
|
||||
* [^...] Match any character but ones from set
|
||||
* \s Match whitespace
|
||||
* \S Match non-whitespace
|
||||
* \d Match decimal digit
|
||||
* \r Match carriage return
|
||||
* \n Match newline
|
||||
* + Match one or more times (greedy)
|
||||
* +? Match one or more times (non-greedy)
|
||||
* * Match zero or more times (greedy)
|
||||
* *? Match zero or more times (non-greedy)
|
||||
* ? Match zero or once
|
||||
* \xDD Match byte with hex value 0xDD
|
||||
* \meta Match one of the meta character: ^$().[*+\?
|
||||
* x|y Match x or y (alternation operator)
|
||||
|
||||
* Usage example: parsing HTTP request line.
|
||||
*
|
||||
* const char *request = "GET /index.html HTTP/1.0\r\n\r\n";
|
||||
* struct slre_capture method, uri, version_min, version_maj;
|
||||
*
|
||||
* error = slre_match("^\\s*(GET|POST)\\s+(\\S+)\\s+HTTP/(\\d)\\.(\\d)",
|
||||
* request, strlen(request),
|
||||
* &method, &uri, &version_min, &version_maj);
|
||||
*
|
||||
* if (error != NULL) {
|
||||
* printf("Error parsing HTTP request: %s\n", error);
|
||||
* } else {
|
||||
* printf("Requested URI: [%.*s]\n", uri.len, uri.ptr);
|
||||
* }
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
22
unit_test.c
22
unit_test.c
@ -94,6 +94,7 @@ int main(void) {
|
||||
ASSERT(memcmp(caps[0].ptr, "12", 2) == 0);
|
||||
ASSERT(slre_match("(.*(2.))", "123", 3, caps, &msg) == 3);
|
||||
ASSERT(slre_match("(.)(.)", "123", 3, caps, &msg) == 2);
|
||||
ASSERT(slre_match("(\\d+)\\s+(\\S+)", "12 hi", 5, caps, &msg) == 5);
|
||||
|
||||
/* Greedy vs non-greedy */
|
||||
ASSERT(slre_match(".+c", "abcabc", 6, NULL, &msg) == 6);
|
||||
@ -113,18 +114,31 @@ int main(void) {
|
||||
ASSERT(memcmp(caps[0].ptr, "bc", 2) == 0);
|
||||
|
||||
|
||||
ASSERT(slre_match("(\\S+)\\s+(\\S+)\\s+HTTP/(\\d)", "POST /x HTTP/1.1", 16,
|
||||
caps, &msg) == 16);
|
||||
#if 0
|
||||
/* HTTP request */
|
||||
ASSERT(slre_match("(\\S+)\\s+(\\S+)\\s+HTTP/(\\d)",
|
||||
"POST /x HTTP/1.1", 16, caps, &msg) == 14);
|
||||
|
||||
{
|
||||
static const char *req = "POST /x HTTP/1.0\r\n\r\nPOST DATA";
|
||||
int len = strlen(req);
|
||||
ASSERT(slre_match("((\\S+)\\s+(\\S+)\\s+HTTP/(\\d)\\.(\\d)\r\n\r\n(.*))",
|
||||
req, len, caps, &msg) == len);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Examples */
|
||||
{
|
||||
const char *error_msg, *request = " GET /index.html HTTP/1.0\r\n\r\n";
|
||||
struct slre_cap caps[4];
|
||||
|
||||
if (slre_match("^\\s*(\\S+)\\s+(\\S+)\\s+HTTP/(\\d)\\.(\\d)",
|
||||
request, strlen(request), caps, &error_msg)) {
|
||||
printf("Method: [%.*s], URI: [%.*s]\n",
|
||||
caps[0].len, caps[0].ptr,
|
||||
caps[1].len, caps[1].ptr);
|
||||
} else {
|
||||
printf("Error parsing [%s]: [%s]\n", request, error_msg);
|
||||
}
|
||||
}
|
||||
|
||||
printf("Unit test %s (total test: %d, failed tests: %d)\n",
|
||||
static_failed_tests > 0 ? "FAILED" : "PASSED",
|
||||
|
Loading…
Reference in New Issue
Block a user