UTF8 validation in json_value_init_string (+ tests).

pull/35/head
Krzysztof Gabis 9 years ago
parent 7ead2102b8
commit 7fd8dc1c4c
  1. 98
      parson.c
  2. 34
      tests.c

@ -50,6 +50,8 @@
#define PRINT_AND_SKIP(str, to_append) str += sprintf(str, to_append);
#define PRINTF_AND_SKIP(str, format, to_append) str += sprintf(str, format, to_append);
#define IS_CONT(b) (((unsigned char)(b) & 0xC0) == 0x80) /* is utf-8 continuation byte */
/* Type definitions */
typedef union json_value_value {
const char *string;
@ -84,9 +86,12 @@ static void remove_comments(char *string, const char *start_token, const char
static int try_realloc(void **ptr, size_t new_size);
static char * parson_strndup(const char *string, size_t n);
static char * parson_strdup(const char *string);
static int is_utf(const unsigned char *string);
static int is_utf16_hex(const unsigned char *string);
static int num_bytes_in_utf8_sequence(unsigned char c);
static int verify_utf8_sequence(const unsigned char *string, int *len);
static int is_valid_utf8(const char *string, size_t string_len);
static int is_decimal(const char *string, size_t length);
static size_t parson_strlen(const char *string);
static size_t serialization_strlen(const char *string);
/* JSON Object */
static JSON_Object * json_object_init(void);
@ -149,10 +154,79 @@ static char * parson_strdup(const char *string) {
return parson_strndup(string, strlen(string));
}
static int is_utf(const unsigned char *s) {
static int is_utf16_hex(const unsigned char *s) {
return isxdigit(s[0]) && isxdigit(s[1]) && isxdigit(s[2]) && isxdigit(s[3]);
}
static int num_bytes_in_utf8_sequence(unsigned char c) {
if (c == 0xC0 || c == 0xC1 || c > 0xF4 || IS_CONT(c)) {
return 0;
} else if ((c & 0x80) == 0) { /* 0xxxxxxx */
return 1;
} else if ((c & 0xE0) == 0xC0) { /* 110xxxxx */
return 2;
} else if ((c & 0xF0) == 0xE0) { /* 1110xxxx */
return 3;
} else if ((c & 0xF8) == 0xF0) { /* 11110xxx */
return 4;
}
return 0; /* won't happen */
}
static int verify_utf8_sequence(const unsigned char *string, int *len) {
unsigned int cp = 0;
*len = num_bytes_in_utf8_sequence(string[0]);
if (*len == 1) {
cp = string[0];
} else if (*len == 2 && IS_CONT(string[1])) {
cp = string[0] & 0x1F;
cp = (cp << 6) | (string[1] & 0x3F);
} else if (*len == 3 && IS_CONT(string[1]) && IS_CONT(string[2])) {
cp = ((unsigned char)string[0]) & 0xF;
cp = (cp << 6) | (string[1] & 0x3F);
cp = (cp << 6) | (string[2] & 0x3F);
} else if (*len == 4 && IS_CONT(string[1]) && IS_CONT(string[2]) && IS_CONT(string[3])) {
cp = string[0] & 0x7;
cp = (cp << 6) | (string[1] & 0x3F);
cp = (cp << 6) | (string[2] & 0x3F);
cp = (cp << 6) | (string[3] & 0x3F);
} else {
return 0;
}
/* overlong encodings */
if ((cp < 0x80 && *len > 1) ||
(cp < 0x800 && *len > 2) ||
(cp < 0x10000 && *len > 3)) {
return 0;
}
/* invalid unicode */
if (cp > 0x10FFFF) {
return 0;
}
/* surrogate halves */
if (cp >= 0xD800 && cp <= 0xDFFF) {
return 0;
}
return 1;
}
static int is_valid_utf8(const char *string, size_t string_len) {
int len = 0;
const char *string_end = string + string_len;
while (string < string_end) {
if (!verify_utf8_sequence((const unsigned char*)string, &len)) {
return 0;
}
string += len;
}
return 1;
}
static int is_decimal(const char *string, size_t length) {
if (length > 1 && string[0] == '0' && string[1] != '.')
return 0;
@ -164,7 +238,7 @@ static int is_decimal(const char *string, size_t length) {
return 1;
}
static size_t parson_strlen(const char *string) {
static size_t serialization_strlen(const char *string) {
size_t result = 0;
size_t i = 0, len = strlen(string);
for (i = 0; i < len; i++) {
@ -374,7 +448,7 @@ static int parse_utf_16(const char **unprocessed, char **processed) {
char *processed_ptr = *processed;
const char *unprocessed_ptr = *unprocessed;
unprocessed_ptr++; /* skips u */
if (!is_utf((const unsigned char*)unprocessed_ptr) || sscanf(unprocessed_ptr, "%4x", &cp) == EOF)
if (!is_utf16_hex((const unsigned char*)unprocessed_ptr) || sscanf(unprocessed_ptr, "%4x", &cp) == EOF)
return JSONFailure;
if (cp < 0x80) {
*processed_ptr = cp; /* 0xxxxxxx */
@ -389,7 +463,7 @@ static int parse_utf_16(const char **unprocessed, char **processed) {
lead = cp;
unprocessed_ptr += 4; /* should always be within the buffer, otherwise previous sscanf would fail */
if (*unprocessed_ptr++ != '\\' || *unprocessed_ptr++ != 'u' || /* starts with \u? */
!is_utf((const unsigned char*)unprocessed_ptr) ||
!is_utf16_hex((const unsigned char*)unprocessed_ptr) ||
sscanf(unprocessed_ptr, "%4x", &trail) == EOF ||
trail < 0xDC00 || trail > 0xDFFF) { /* valid trail surrogate? (0xDC00..0xDFFF) */
return JSONFailure;
@ -645,12 +719,12 @@ static size_t json_serialization_size_r(const JSON_Value *value, char *buf) {
result_size += (count * 2) - 1; /* : between keys and values and , between items */
for (i = 0; i < count; i++) {
key = json_object_get_name(object, i);
result_size += parson_strlen(key) + 2; /* string and quotes */
result_size += serialization_strlen(key) + 2; /* string and quotes */
result_size += json_serialization_size_r(json_object_get_value(object, key), buf);
}
return result_size;
case JSONString:
return parson_strlen(json_value_get_string(value)) + 2; /* string and quotes */
return serialization_strlen(json_value_get_string(value)) + 2; /* string and quotes */
case JSONBoolean:
if (json_value_get_boolean(value))
return 4; /* strlen("true"); */
@ -980,9 +1054,13 @@ JSON_Value * json_value_init_array(void) {
JSON_Value * json_value_init_string(const char *string) {
char *copy = NULL;
JSON_Value *value;
size_t string_len = 0;
if (string == NULL)
return NULL;
copy = parson_strdup(string);
string_len = strlen(string);
if (!is_valid_utf8(string, string_len))
return NULL;
copy = parson_strndup(string, string_len);
if (copy == NULL)
return NULL;
value = json_value_init_string_no_copy(copy);
@ -1334,7 +1412,6 @@ JSON_Status json_object_dotset_value(JSON_Object *object, const char *name, JSON
PARSON_FREE(current_name);
return json_object_dotset_value(temp_obj, dot_pos + 1, value);
}
return JSONFailure;
}
JSON_Status json_object_dotset_string(JSON_Object *object, const char *name, const char *string) {
@ -1417,7 +1494,6 @@ JSON_Status json_object_dotremove(JSON_Object *object, const char *name) {
PARSON_FREE(current_name);
return json_object_dotremove(temp_obj, dot_pos + 1);
}
return JSONFailure;
}
JSON_Status json_object_clear(JSON_Object *object) {

@ -290,6 +290,40 @@ void test_suite_5(void) {
TEST(json_array_replace_string(interests_arr, 100, "not existing") == JSONFailure);
TEST(json_array_append_string(json_object_get_array(obj, "interests"), NULL) == JSONFailure);
/* UTF-8 tests */
TEST(json_object_set_string(obj, "correct string", "κόσμε") == JSONSuccess);
TEST(json_object_set_string(obj, "boundary 1", "\xed\x9f\xbf") == JSONSuccess);
TEST(json_object_set_string(obj, "boundary 2", "\xee\x80\x80") == JSONSuccess);
TEST(json_object_set_string(obj, "boundary 3", "\xef\xbf\xbd") == JSONSuccess);
TEST(json_object_set_string(obj, "boundary 4", "\xf4\x8f\xbf\xbf") == JSONSuccess);
TEST(json_object_set_string(obj, "first continuation byte", "\x80") == JSONFailure);
TEST(json_object_set_string(obj, "last continuation byte", "\xbf") == JSONFailure);
TEST(json_object_set_string(obj, "impossible sequence 1", "\xfe") == JSONFailure);
TEST(json_object_set_string(obj, "impossible sequence 2", "\xff") == JSONFailure);
TEST(json_object_set_string(obj, "impossible sequence 3", "\xfe\xfe\xff\xff") == JSONFailure);
TEST(json_object_set_string(obj, "overlong 1", "\xc0\xaf") == JSONFailure);
TEST(json_object_set_string(obj, "overlong 2", "\xc1\xbf") == JSONFailure);
TEST(json_object_set_string(obj, "overlong 3", "\xe0\x80\xaf") == JSONFailure);
TEST(json_object_set_string(obj, "overlong 4", "\xe0\x9f\xbf") == JSONFailure);
TEST(json_object_set_string(obj, "overlong 5", "\xf0\x80\x80\xaf") == JSONFailure);
TEST(json_object_set_string(obj, "overlong 6", "\xf0\x8f\xbf\xbf") == JSONFailure);
TEST(json_object_set_string(obj, "overlong 7", "\xf0\x8f\xbf\xbf") == JSONFailure);
TEST(json_object_set_string(obj, "overlong null 1", "\xc0\x80") == JSONFailure);
TEST(json_object_set_string(obj, "overlong null 2", "\xe0\x80\x80") == JSONFailure);
TEST(json_object_set_string(obj, "overlong null 3", "\xf0\x80\x80\x80") == JSONFailure);
TEST(json_object_set_string(obj, "overlong null 4", "\xf8\x80\x80\x80\x80") == JSONFailure);
TEST(json_object_set_string(obj, "overlong null 5", "\xfc\x80\x80\x80\x80\x80") == JSONFailure);
TEST(json_object_set_string(obj, "single surrogate 1", "\xed\xa0\x80") == JSONFailure);
TEST(json_object_set_string(obj, "single surrogate 2", "\xed\xaf\xbf") == JSONFailure);
TEST(json_object_set_string(obj, "single surrogate 3", "\xed\xbf\xbf") == JSONFailure);
}
void test_suite_6(void) {

Loading…
Cancel
Save