mirror of
https://github.com/kgabis/parson.git
synced 2025-02-05 08:55:30 +00:00
Added surrogate pairs support (JSON support is full now), removed PARSON_VERSION macro.
This commit is contained in:
parent
19a0d79db4
commit
c707051778
@ -2,6 +2,7 @@
|
||||
Parson is a lighweight [json](http://json.org) parser and reader written in C.
|
||||
|
||||
##Features
|
||||
* Full JSON support
|
||||
* Lightweight (only 2 files)
|
||||
* Simple API
|
||||
* Addressing json values with dot notation (similiar to C structs or objects in most OO languages, e.g. "objectA.objectB.value")
|
||||
|
94
parson.c
94
parson.c
@ -1,6 +1,6 @@
|
||||
/*
|
||||
Parson ( http://kgabis.github.com/parson/ )
|
||||
Copyright (c) 2013 Krzysztof Gabis
|
||||
Copyright (c) 2012 - 2014 Krzysztof Gabis
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@ -102,6 +102,7 @@ static JSON_Value * json_value_init_null(void);
|
||||
|
||||
/* Parser */
|
||||
static void skip_quotes(const char **string);
|
||||
static int parse_utf_16(char **processed, char **unprocessed);
|
||||
static const char * get_processed_string(const char **string);
|
||||
static JSON_Value * parse_object_value(const char **string, size_t nesting);
|
||||
static JSON_Value * parse_array_value(const char **string, size_t nesting);
|
||||
@ -380,63 +381,86 @@ static void skip_quotes(const char **string) {
|
||||
skip_char(string);
|
||||
}
|
||||
|
||||
static int parse_utf_16(char **processed, char **unprocessed) {
|
||||
unsigned int cp, lead, trail;
|
||||
char *processed_ptr = *processed;
|
||||
char *unprocessed_ptr = *unprocessed;
|
||||
unprocessed_ptr++; /* skips u */
|
||||
if (!is_utf((const unsigned char*)unprocessed_ptr) || sscanf(unprocessed_ptr, "%4x", &cp) == EOF)
|
||||
return ERROR;
|
||||
if (cp < 0x80) {
|
||||
*processed_ptr = cp; /* 0xxxxxxx */
|
||||
} else if (cp < 0x800) {
|
||||
*processed_ptr++ = ((cp >> 6) & 0x1F) | 0xC0; /* 110xxxxx */
|
||||
*processed_ptr = ((cp ) & 0x3F) | 0x80; /* 10xxxxxx */
|
||||
} else if (cp < 0xD800 || cp > 0xDFFF) {
|
||||
*processed_ptr++ = ((cp >> 12) & 0x0F) | 0xE0; /* 1110xxxx */
|
||||
*processed_ptr++ = ((cp >> 6) & 0x3F) | 0x80; /* 10xxxxxx */
|
||||
*processed_ptr = ((cp ) & 0x3F) | 0x80; /* 10xxxxxx */
|
||||
} else if (cp >= 0xD800 && cp <= 0xDBFF) { /* lead surrogate (0xD800..0xDBFF) */
|
||||
lead = cp;
|
||||
unprocessed_ptr += 4; /* should always be within the buffer, otherwise previous sscanf would fail */
|
||||
if (*unprocessed_ptr++ != '\\' || *unprocessed_ptr++ != 'u' || /* starts with \u? */
|
||||
!is_utf((const unsigned char*)unprocessed_ptr) ||
|
||||
sscanf(unprocessed_ptr, "%4x", &trail) == EOF ||
|
||||
trail < 0xDC00 || trail > 0xDFFF) { /* valid trail surrogate? (0xDC00..0xDFFF) */
|
||||
return ERROR;
|
||||
}
|
||||
cp = ((((lead-0xD800)&0x3FF)<<10)|((trail-0xDC00)&0x3FF))+0x010000;
|
||||
*processed_ptr++ = (((cp >> 18) & 0x07) | 0xF0); /* 11110xxx */
|
||||
*processed_ptr++ = (((cp >> 12) & 0x3F) | 0x80); /* 10xxxxxx */
|
||||
*processed_ptr++ = (((cp >> 6) & 0x3F) | 0x80); /* 10xxxxxx */
|
||||
*processed_ptr = (((cp ) & 0x3F) | 0x80); /* 10xxxxxx */
|
||||
} else { /* trail surrogate before lead surrogate */
|
||||
return ERROR;
|
||||
}
|
||||
unprocessed_ptr += 3;
|
||||
*processed = processed_ptr;
|
||||
*unprocessed = unprocessed_ptr;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
/* Returns contents of a string inside double quotes and parses escaped
|
||||
characters inside.
|
||||
Example: "\u006Corem ipsum" -> lorem ipsum */
|
||||
static const char * get_processed_string(const char **string) {
|
||||
const char *string_start = *string;
|
||||
char *output, *processed_ptr, *unprocessed_ptr, current_char;
|
||||
unsigned int utf_val;
|
||||
char *output = NULL, *processed_ptr = NULL, *unprocessed_ptr = NULL;
|
||||
skip_quotes(string);
|
||||
if (**string == '\0')
|
||||
return NULL;
|
||||
output = parson_strndup(string_start + 1, *string - string_start - 2);
|
||||
output = parson_strndup(string_start + 1, *string - string_start - 2);
|
||||
if (!output)
|
||||
return NULL;
|
||||
processed_ptr = unprocessed_ptr = output;
|
||||
while (*unprocessed_ptr) {
|
||||
current_char = *unprocessed_ptr;
|
||||
if (current_char == '\\') {
|
||||
while (*unprocessed_ptr != '\0') {
|
||||
if (*unprocessed_ptr == '\\') {
|
||||
unprocessed_ptr++;
|
||||
current_char = *unprocessed_ptr;
|
||||
switch (current_char) {
|
||||
switch (*unprocessed_ptr) {
|
||||
case '\"': case '\\': case '/': break;
|
||||
case 'b': current_char = '\b'; break;
|
||||
case 'f': current_char = '\f'; break;
|
||||
case 'n': current_char = '\n'; break;
|
||||
case 'r': current_char = '\r'; break;
|
||||
case 't': current_char = '\t'; break;
|
||||
case 'b': *processed_ptr = '\b'; break;
|
||||
case 'f': *processed_ptr = '\f'; break;
|
||||
case 'n': *processed_ptr = '\n'; break;
|
||||
case 'r': *processed_ptr = '\r'; break;
|
||||
case 't': *processed_ptr = '\t'; break;
|
||||
case 'u':
|
||||
unprocessed_ptr++;
|
||||
if (!is_utf((const unsigned char*)unprocessed_ptr) ||
|
||||
sscanf(unprocessed_ptr, "%4x", &utf_val) == EOF) {
|
||||
parson_free(output);
|
||||
return NULL;
|
||||
if (parse_utf_16(&processed_ptr, &unprocessed_ptr) == ERROR) {
|
||||
parson_free(output);
|
||||
return NULL;
|
||||
}
|
||||
if (utf_val < 0x80) {
|
||||
current_char = utf_val;
|
||||
} else if (utf_val < 0x800) {
|
||||
*processed_ptr++ = (utf_val >> 6) | 0xC0;
|
||||
current_char = ((utf_val | 0x80) & 0xBF);
|
||||
} else {
|
||||
*processed_ptr++ = (utf_val >> 12) | 0xE0;
|
||||
*processed_ptr++ = (((utf_val >> 6) | 0x80) & 0xBF);
|
||||
current_char = ((utf_val | 0x80) & 0xBF);
|
||||
}
|
||||
unprocessed_ptr += 3;
|
||||
break;
|
||||
default:
|
||||
parson_free(output);
|
||||
return NULL;
|
||||
break;
|
||||
}
|
||||
} else if ((unsigned char)current_char < 0x20) { /* 0x00-0x19 are invalid characters for json string (http://www.ietf.org/rfc/rfc4627.txt) */
|
||||
parson_free(output);
|
||||
} else if ((unsigned char)*unprocessed_ptr < 0x20) {
|
||||
parson_free(output); /* 0x00-0x19 are invalid characters for json string (http://www.ietf.org/rfc/rfc4627.txt) */
|
||||
return NULL;
|
||||
} else {
|
||||
*processed_ptr = *unprocessed_ptr;
|
||||
}
|
||||
*processed_ptr = current_char;
|
||||
processed_ptr++;
|
||||
unprocessed_ptr++;
|
||||
processed_ptr++, unprocessed_ptr++;
|
||||
}
|
||||
*processed_ptr = '\0';
|
||||
if (try_realloc((void**)&output, strlen(output) + 1) == ERROR)
|
||||
|
4
parson.h
4
parson.h
@ -1,6 +1,6 @@
|
||||
/*
|
||||
Parson ( http://kgabis.github.com/parson/ )
|
||||
Copyright (c) 2013 Krzysztof Gabis
|
||||
Copyright (c) 2012 - 2014 Krzysztof Gabis
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@ -30,8 +30,6 @@ extern "C"
|
||||
#endif
|
||||
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
#define PARSON_VERSION 20131130
|
||||
|
||||
/* Types and enums */
|
||||
typedef struct json_object_t JSON_Object;
|
||||
|
10
tests.c
10
tests.c
@ -1,6 +1,6 @@
|
||||
/*
|
||||
Parson ( http://kgabis.github.com/parson/ )
|
||||
Copyright (c) 2013 Krzysztof Gabis
|
||||
Copyright (c) 2012 - 2014 Krzysztof Gabis
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@ -36,7 +36,7 @@
|
||||
void test_suite_1(void);
|
||||
void test_suite_2(JSON_Value *value);
|
||||
void test_suite_2_no_comments(void);
|
||||
void test_suite_2_with_commnets(void);
|
||||
void test_suite_2_with_comments(void);
|
||||
void test_suite_3(void);
|
||||
|
||||
char *read_file(const char *filename);
|
||||
@ -50,7 +50,7 @@ int main() {
|
||||
/* print_commits_info("torvalds", "linux"); */
|
||||
test_suite_1();
|
||||
test_suite_2_no_comments();
|
||||
test_suite_2_with_commnets();
|
||||
test_suite_2_with_comments();
|
||||
test_suite_3();
|
||||
printf("Tests failed: %d\n", tests_failed);
|
||||
printf("Tests passed: %d\n", tests_passed);
|
||||
@ -87,6 +87,7 @@ void test_suite_2(JSON_Value *root_value) {
|
||||
TEST(STREQ(json_object_get_string(root_object, "string"), "lorem ipsum"));
|
||||
TEST(STREQ(json_object_get_string(root_object, "utf string"), "lorem ipsum"));
|
||||
TEST(STREQ(json_object_get_string(root_object, "utf-8 string"), "あいうえお"));
|
||||
TEST(STREQ(json_object_get_string(root_object, "surrogate string"), "lorem𝄞ipsum𝍧lorem"));
|
||||
TEST(json_object_get_number(root_object, "positive one") == 1.0);
|
||||
TEST(json_object_get_number(root_object, "negative one") == -1.0);
|
||||
TEST(json_object_get_number(root_object, "hard to parse number") == -0.000314);
|
||||
@ -145,7 +146,7 @@ void test_suite_2_no_comments(void) {
|
||||
json_value_free(root_value);
|
||||
}
|
||||
|
||||
void test_suite_2_with_commnets(void) {
|
||||
void test_suite_2_with_comments(void) {
|
||||
const char *filename = "tests/test_2_comments.txt";
|
||||
JSON_Value *root_value = NULL;
|
||||
printf("Testing %s:\n", filename);
|
||||
@ -199,6 +200,7 @@ void test_suite_3(void) {
|
||||
TEST(json_parse_string("[-07]") == NULL);
|
||||
TEST(json_parse_string("[-007]") == NULL);
|
||||
TEST(json_parse_string("[-07.0]") == NULL);
|
||||
TEST(json_parse_string("[\"\\uDF67\\uD834\"]") == NULL); /* wrong order surrogate pair */
|
||||
}
|
||||
|
||||
void print_commits_info(const char *username, const char *repo) {
|
||||
|
@ -1,7 +1,8 @@
|
||||
{
|
||||
"string" : "lorem ipsum",
|
||||
"utf string" : "\u006corem\u0020ipsum",
|
||||
"utf-8 string": "あいうえお",
|
||||
"utf-8 string": "あいうえお",
|
||||
"surrogate string": "lorem\uD834\uDD1Eipsum\uD834\uDF67lorem",
|
||||
"positive one" : 1,
|
||||
"negative one" : -1,
|
||||
"pi" : 3.14,
|
||||
|
@ -8,6 +8,7 @@
|
||||
"string" : "lorem ipsum", // lorem ipsum
|
||||
"utf string" : "\u006corem\u0020ipsum", // lorem ipsum //
|
||||
"utf-8 string": "あいうえお", // /* lorem ipsum */
|
||||
"surrogate string": "lorem\uD834\uDD1Eipsum\uD834\uDF67lorem",
|
||||
"positive one" : 1,
|
||||
"negative one" : -1,
|
||||
"pi" : 3.14,
|
||||
|
Loading…
Reference in New Issue
Block a user