mirror of https://github.com/cesanta/slre.git
parent
354faf6509
commit
dfc6c21b5b
@ -1,4 +1,122 @@ |
|||||||
SLRE: Super Light Regular Expression library |
SLRE: Super Light Regular Expression library |
||||||
============================================ |
============================================ |
||||||
|
|
||||||
SLRE documentation is at [http://cesanta.com/docs.shtml](http://cesanta.com/docs.html?SLRE.md) |
SLRE is an ISO C library that implements a subset of Perl regular |
||||||
|
expression syntax. Main features of SLRE are: |
||||||
|
|
||||||
|
* Written in strict ANSI C'89 |
||||||
|
* Small size (compiled x86 code is about 5kB) |
||||||
|
* Uses little stack and does no dynamic memory allocation |
||||||
|
* Provides simple intuitive API |
||||||
|
* Implements most useful subset of Perl regex syntax (see below) |
||||||
|
* Easily extensible. E.g. if one wants to introduce a new |
||||||
|
metacharacter `\i`, meaning "IPv4 address", it is easy to do so with SLRE. |
||||||
|
|
||||||
|
SLRE is perfect for tasks like parsing network requests, configuration |
||||||
|
files, user input, etc, when libraries like [PCRE](http://pcre.org) are too |
||||||
|
heavyweight for the given task. Developers of embedded systems would benefit |
||||||
|
most. |
||||||
|
|
||||||
|
## Supported Syntax |
||||||
|
|
||||||
|
(?i) Must be at the beginning of the regex. Makes match case-insensitive |
||||||
|
^ Match beginning of a buffer |
||||||
|
$ Match end of a buffer |
||||||
|
() Grouping and substring capturing |
||||||
|
\s Match whitespace |
||||||
|
\S Match non-whitespace |
||||||
|
\d Match decimal digit |
||||||
|
+ Match one or more times (greedy) |
||||||
|
+? Match one or more times (non-greedy) |
||||||
|
* Match zero or more times (greedy) |
||||||
|
*? Match zero or more times (non-greedy) |
||||||
|
? Match zero or once (non-greedy) |
||||||
|
x|y Match x or y (alternation operator) |
||||||
|
\meta Match one of the meta character: ^$().[]*+?|\ |
||||||
|
\xHH Match byte with hex value 0xHH, e.g. \x4a |
||||||
|
[...] Match any character from set. Ranges like [a-z] are supported |
||||||
|
[^...] Match any character but ones from set |
||||||
|
|
||||||
|
Under development: Unicode support. |
||||||
|
|
||||||
|
## API |
||||||
|
|
||||||
|
int slre_match(const char *regexp, const char *buf, int buf_len, |
||||||
|
struct slre_cap *caps, int num_caps); |
||||||
|
|
||||||
|
`slre_match()` matches string buffer `buf` of length `buf_len` against |
||||||
|
regular expression `regexp`, which should conform the syntax outlined |
||||||
|
above. If regular expression `regexp` contains brackets, `slre_match()` |
||||||
|
can capture the respective substrings into the array of `struct slre_cap` |
||||||
|
structures: |
||||||
|
|
||||||
|
/* Stores matched fragment for the expression inside brackets */ |
||||||
|
struct slre_cap { |
||||||
|
const char *ptr; /* Points to the matched fragment */ |
||||||
|
int len; /* Length of the matched fragment */ |
||||||
|
}; |
||||||
|
|
||||||
|
N-th member of the `caps` array will contain fragment that corresponds to the |
||||||
|
N-th opening bracket in the `regex`, N is zero-based. `slre_match()` returns |
||||||
|
number of bytes scanned from the beginning of the string. If return value is |
||||||
|
greater or equal to 0, there is a match. If return value is less then 0, there |
||||||
|
is no match. Negative return codes are as follows: |
||||||
|
|
||||||
|
#define SLRE_NO_MATCH -1 |
||||||
|
#define SLRE_UNEXPECTED_QUANTIFIER -2 |
||||||
|
#define SLRE_UNBALANCED_BRACKETS -3 |
||||||
|
#define SLRE_INTERNAL_ERROR -4 |
||||||
|
#define SLRE_INVALID_CHARACTER_SET -5 |
||||||
|
#define SLRE_INVALID_METACHARACTER -6 |
||||||
|
#define SLRE_CAPS_ARRAY_TOO_SMALL -7 |
||||||
|
#define SLRE_TOO_MANY_BRANCHES -8 |
||||||
|
#define SLRE_TOO_MANY_BRACKETS -9 |
||||||
|
|
||||||
|
|
||||||
|
## Example: parsing HTTP request line |
||||||
|
|
||||||
|
const char *request = " GET /index.html HTTP/1.0\r\n\r\n"; |
||||||
|
struct slre_cap caps[4]; |
||||||
|
|
||||||
|
if (slre_match("^\\s*(\\S+)\\s+(\\S+)\\s+HTTP/(\\d)\\.(\\d)", |
||||||
|
request, strlen(request), caps, 4) > 0) { |
||||||
|
printf("Method: [%.*s], URI: [%.*s]\n", |
||||||
|
caps[0].len, caps[0].ptr, |
||||||
|
caps[1].len, caps[1].ptr); |
||||||
|
} else { |
||||||
|
printf("Error parsing [%s]\n", request); |
||||||
|
} |
||||||
|
|
||||||
|
## Example: find all URLs in a string |
||||||
|
|
||||||
|
static const char *str = |
||||||
|
"<img src=\"HTTPS://FOO.COM/x?b#c=tab1\"/> " |
||||||
|
" <a href=\"http://cesanta.com\">some link</a>"; |
||||||
|
|
||||||
|
static const char *regex = "(?i)((https?://)[^\\s/'\"<>]+/?[^\\s'\"<>]*)"; |
||||||
|
struct slre_cap caps[2]; |
||||||
|
int i, j = 0, str_len = strlen(str); |
||||||
|
|
||||||
|
while (j < str_len && |
||||||
|
(i = slre_match(regex, str + j, str_len - j, caps, 2, NULL)) > 0) { |
||||||
|
printf("Found URL: [%.*s]\n", caps[0].len, caps[0].ptr); |
||||||
|
j += i; |
||||||
|
} |
||||||
|
|
||||||
|
Output: |
||||||
|
|
||||||
|
Found URL: [HTTPS://FOO.COM/x?b#c=tab1] |
||||||
|
Found URL: [http://cesanta.com] |
||||||
|
|
||||||
|
# License |
||||||
|
|
||||||
|
SLRE is released under |
||||||
|
[GNU GPL v.2](http://www.gnu.org/licenses/old-licenses/gpl-2.0.html). |
||||||
|
Businesses have an option to get non-restrictive, royalty-free commercial |
||||||
|
license and professional support from |
||||||
|
[Cesanta Software](http://cesanta.com). |
||||||
|
|
||||||
|
[Super Light DNS Resolver](https://github.com/cesanta/sldr), |
||||||
|
[Mongoose web server](https://github.com/cesanta/mongoose) |
||||||
|
are other projects by Cesanta Software, developed with the same philosophy |
||||||
|
of functionality and simplicity. |
||||||
|
Loading…
Reference in new issue