diff --git a/README.md b/README.md index 61233bf..fe1a18e 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ Parson is a lighweight [json](http://json.org) parser and reader written in C. ##Features +* Full JSON support * Lightweight (only 2 files) * Simple API * Addressing json values with dot notation (similiar to C structs or objects in most OO languages, e.g. "objectA.objectB.value") diff --git a/parson.c b/parson.c index b7fb251..64124af 100644 --- a/parson.c +++ b/parson.c @@ -1,6 +1,6 @@ /* Parson ( http://kgabis.github.com/parson/ ) - Copyright (c) 2013 Krzysztof Gabis + Copyright (c) 2012 - 2014 Krzysztof Gabis Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -102,6 +102,7 @@ static JSON_Value * json_value_init_null(void); /* Parser */ static void skip_quotes(const char **string); +static int parse_utf_16(char **processed, char **unprocessed); static const char * get_processed_string(const char **string); static JSON_Value * parse_object_value(const char **string, size_t nesting); static JSON_Value * parse_array_value(const char **string, size_t nesting); @@ -380,63 +381,86 @@ static void skip_quotes(const char **string) { skip_char(string); } +static int parse_utf_16(char **processed, char **unprocessed) { + unsigned int cp, lead, trail; + char *processed_ptr = *processed; + char *unprocessed_ptr = *unprocessed; + unprocessed_ptr++; /* skips u */ + if (!is_utf((const unsigned char*)unprocessed_ptr) || sscanf(unprocessed_ptr, "%4x", &cp) == EOF) + return ERROR; + if (cp < 0x80) { + *processed_ptr = cp; /* 0xxxxxxx */ + } else if (cp < 0x800) { + *processed_ptr++ = ((cp >> 6) & 0x1F) | 0xC0; /* 110xxxxx */ + *processed_ptr = ((cp ) & 0x3F) | 0x80; /* 10xxxxxx */ + } else if (cp < 0xD800 || cp > 0xDFFF) { + *processed_ptr++ = ((cp >> 12) & 0x0F) | 0xE0; /* 1110xxxx */ + *processed_ptr++ = ((cp >> 6) & 0x3F) | 0x80; /* 10xxxxxx */ + *processed_ptr = ((cp ) & 0x3F) | 0x80; /* 10xxxxxx */ + } else if (cp >= 0xD800 && cp <= 0xDBFF) { /* lead surrogate (0xD800..0xDBFF) */ + lead = cp; + unprocessed_ptr += 4; /* should always be within the buffer, otherwise previous sscanf would fail */ + if (*unprocessed_ptr++ != '\\' || *unprocessed_ptr++ != 'u' || /* starts with \u? */ + !is_utf((const unsigned char*)unprocessed_ptr) || + sscanf(unprocessed_ptr, "%4x", &trail) == EOF || + trail < 0xDC00 || trail > 0xDFFF) { /* valid trail surrogate? (0xDC00..0xDFFF) */ + return ERROR; + } + cp = ((((lead-0xD800)&0x3FF)<<10)|((trail-0xDC00)&0x3FF))+0x010000; + *processed_ptr++ = (((cp >> 18) & 0x07) | 0xF0); /* 11110xxx */ + *processed_ptr++ = (((cp >> 12) & 0x3F) | 0x80); /* 10xxxxxx */ + *processed_ptr++ = (((cp >> 6) & 0x3F) | 0x80); /* 10xxxxxx */ + *processed_ptr = (((cp ) & 0x3F) | 0x80); /* 10xxxxxx */ + } else { /* trail surrogate before lead surrogate */ + return ERROR; + } + unprocessed_ptr += 3; + *processed = processed_ptr; + *unprocessed = unprocessed_ptr; + return SUCCESS; +} + /* Returns contents of a string inside double quotes and parses escaped characters inside. Example: "\u006Corem ipsum" -> lorem ipsum */ static const char * get_processed_string(const char **string) { const char *string_start = *string; - char *output, *processed_ptr, *unprocessed_ptr, current_char; - unsigned int utf_val; + char *output = NULL, *processed_ptr = NULL, *unprocessed_ptr = NULL; skip_quotes(string); if (**string == '\0') return NULL; - output = parson_strndup(string_start + 1, *string - string_start - 2); + output = parson_strndup(string_start + 1, *string - string_start - 2); if (!output) return NULL; processed_ptr = unprocessed_ptr = output; - while (*unprocessed_ptr) { - current_char = *unprocessed_ptr; - if (current_char == '\\') { + while (*unprocessed_ptr != '\0') { + if (*unprocessed_ptr == '\\') { unprocessed_ptr++; - current_char = *unprocessed_ptr; - switch (current_char) { + switch (*unprocessed_ptr) { case '\"': case '\\': case '/': break; - case 'b': current_char = '\b'; break; - case 'f': current_char = '\f'; break; - case 'n': current_char = '\n'; break; - case 'r': current_char = '\r'; break; - case 't': current_char = '\t'; break; + case 'b': *processed_ptr = '\b'; break; + case 'f': *processed_ptr = '\f'; break; + case 'n': *processed_ptr = '\n'; break; + case 'r': *processed_ptr = '\r'; break; + case 't': *processed_ptr = '\t'; break; case 'u': - unprocessed_ptr++; - if (!is_utf((const unsigned char*)unprocessed_ptr) || - sscanf(unprocessed_ptr, "%4x", &utf_val) == EOF) { - parson_free(output); - return NULL; + if (parse_utf_16(&processed_ptr, &unprocessed_ptr) == ERROR) { + parson_free(output); + return NULL; } - if (utf_val < 0x80) { - current_char = utf_val; - } else if (utf_val < 0x800) { - *processed_ptr++ = (utf_val >> 6) | 0xC0; - current_char = ((utf_val | 0x80) & 0xBF); - } else { - *processed_ptr++ = (utf_val >> 12) | 0xE0; - *processed_ptr++ = (((utf_val >> 6) | 0x80) & 0xBF); - current_char = ((utf_val | 0x80) & 0xBF); - } - unprocessed_ptr += 3; break; default: parson_free(output); return NULL; break; } - } else if ((unsigned char)current_char < 0x20) { /* 0x00-0x19 are invalid characters for json string (http://www.ietf.org/rfc/rfc4627.txt) */ - parson_free(output); + } else if ((unsigned char)*unprocessed_ptr < 0x20) { + parson_free(output); /* 0x00-0x19 are invalid characters for json string (http://www.ietf.org/rfc/rfc4627.txt) */ return NULL; + } else { + *processed_ptr = *unprocessed_ptr; } - *processed_ptr = current_char; - processed_ptr++; - unprocessed_ptr++; + processed_ptr++, unprocessed_ptr++; } *processed_ptr = '\0'; if (try_realloc((void**)&output, strlen(output) + 1) == ERROR) diff --git a/parson.h b/parson.h index ef21890..be3dd11 100644 --- a/parson.h +++ b/parson.h @@ -1,6 +1,6 @@ /* Parson ( http://kgabis.github.com/parson/ ) - Copyright (c) 2013 Krzysztof Gabis + Copyright (c) 2012 - 2014 Krzysztof Gabis Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -30,8 +30,6 @@ extern "C" #endif #include /* size_t */ - -#define PARSON_VERSION 20131130 /* Types and enums */ typedef struct json_object_t JSON_Object; diff --git a/tests.c b/tests.c index b357e6d..ba30089 100644 --- a/tests.c +++ b/tests.c @@ -1,6 +1,6 @@ /* Parson ( http://kgabis.github.com/parson/ ) - Copyright (c) 2013 Krzysztof Gabis + Copyright (c) 2012 - 2014 Krzysztof Gabis Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -36,7 +36,7 @@ void test_suite_1(void); void test_suite_2(JSON_Value *value); void test_suite_2_no_comments(void); -void test_suite_2_with_commnets(void); +void test_suite_2_with_comments(void); void test_suite_3(void); char *read_file(const char *filename); @@ -50,7 +50,7 @@ int main() { /* print_commits_info("torvalds", "linux"); */ test_suite_1(); test_suite_2_no_comments(); - test_suite_2_with_commnets(); + test_suite_2_with_comments(); test_suite_3(); printf("Tests failed: %d\n", tests_failed); printf("Tests passed: %d\n", tests_passed); @@ -87,6 +87,7 @@ void test_suite_2(JSON_Value *root_value) { TEST(STREQ(json_object_get_string(root_object, "string"), "lorem ipsum")); TEST(STREQ(json_object_get_string(root_object, "utf string"), "lorem ipsum")); TEST(STREQ(json_object_get_string(root_object, "utf-8 string"), "あいうえお")); + TEST(STREQ(json_object_get_string(root_object, "surrogate string"), "lorem𝄞ipsum𝍧lorem")); TEST(json_object_get_number(root_object, "positive one") == 1.0); TEST(json_object_get_number(root_object, "negative one") == -1.0); TEST(json_object_get_number(root_object, "hard to parse number") == -0.000314); @@ -145,7 +146,7 @@ void test_suite_2_no_comments(void) { json_value_free(root_value); } -void test_suite_2_with_commnets(void) { +void test_suite_2_with_comments(void) { const char *filename = "tests/test_2_comments.txt"; JSON_Value *root_value = NULL; printf("Testing %s:\n", filename); @@ -199,6 +200,7 @@ void test_suite_3(void) { TEST(json_parse_string("[-07]") == NULL); TEST(json_parse_string("[-007]") == NULL); TEST(json_parse_string("[-07.0]") == NULL); + TEST(json_parse_string("[\"\\uDF67\\uD834\"]") == NULL); /* wrong order surrogate pair */ } void print_commits_info(const char *username, const char *repo) { diff --git a/tests/test_2.txt b/tests/test_2.txt index aa32863..ff4e443 100644 --- a/tests/test_2.txt +++ b/tests/test_2.txt @@ -1,7 +1,8 @@ { "string" : "lorem ipsum", "utf string" : "\u006corem\u0020ipsum", - "utf-8 string": "あいうえお", + "utf-8 string": "あいうえお", + "surrogate string": "lorem\uD834\uDD1Eipsum\uD834\uDF67lorem", "positive one" : 1, "negative one" : -1, "pi" : 3.14, diff --git a/tests/test_2_comments.txt b/tests/test_2_comments.txt index ca85784..ece878c 100644 --- a/tests/test_2_comments.txt +++ b/tests/test_2_comments.txt @@ -8,6 +8,7 @@ "string" : "lorem ipsum", // lorem ipsum "utf string" : "\u006corem\u0020ipsum", // lorem ipsum // "utf-8 string": "あいうえお", // /* lorem ipsum */ + "surrogate string": "lorem\uD834\uDD1Eipsum\uD834\uDF67lorem", "positive one" : 1, "negative one" : -1, "pi" : 3.14,