mirror of
https://github.com/kgabis/parson.git
synced 2025-02-05 17:05:29 +00:00
Added surrogate pairs support (JSON support is full now), removed PARSON_VERSION macro.
This commit is contained in:
parent
19a0d79db4
commit
c707051778
@ -2,6 +2,7 @@
|
|||||||
Parson is a lighweight [json](http://json.org) parser and reader written in C.
|
Parson is a lighweight [json](http://json.org) parser and reader written in C.
|
||||||
|
|
||||||
##Features
|
##Features
|
||||||
|
* Full JSON support
|
||||||
* Lightweight (only 2 files)
|
* Lightweight (only 2 files)
|
||||||
* Simple API
|
* Simple API
|
||||||
* Addressing json values with dot notation (similiar to C structs or objects in most OO languages, e.g. "objectA.objectB.value")
|
* Addressing json values with dot notation (similiar to C structs or objects in most OO languages, e.g. "objectA.objectB.value")
|
||||||
|
94
parson.c
94
parson.c
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
Parson ( http://kgabis.github.com/parson/ )
|
Parson ( http://kgabis.github.com/parson/ )
|
||||||
Copyright (c) 2013 Krzysztof Gabis
|
Copyright (c) 2012 - 2014 Krzysztof Gabis
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
@ -102,6 +102,7 @@ static JSON_Value * json_value_init_null(void);
|
|||||||
|
|
||||||
/* Parser */
|
/* Parser */
|
||||||
static void skip_quotes(const char **string);
|
static void skip_quotes(const char **string);
|
||||||
|
static int parse_utf_16(char **processed, char **unprocessed);
|
||||||
static const char * get_processed_string(const char **string);
|
static const char * get_processed_string(const char **string);
|
||||||
static JSON_Value * parse_object_value(const char **string, size_t nesting);
|
static JSON_Value * parse_object_value(const char **string, size_t nesting);
|
||||||
static JSON_Value * parse_array_value(const char **string, size_t nesting);
|
static JSON_Value * parse_array_value(const char **string, size_t nesting);
|
||||||
@ -380,63 +381,86 @@ static void skip_quotes(const char **string) {
|
|||||||
skip_char(string);
|
skip_char(string);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int parse_utf_16(char **processed, char **unprocessed) {
|
||||||
|
unsigned int cp, lead, trail;
|
||||||
|
char *processed_ptr = *processed;
|
||||||
|
char *unprocessed_ptr = *unprocessed;
|
||||||
|
unprocessed_ptr++; /* skips u */
|
||||||
|
if (!is_utf((const unsigned char*)unprocessed_ptr) || sscanf(unprocessed_ptr, "%4x", &cp) == EOF)
|
||||||
|
return ERROR;
|
||||||
|
if (cp < 0x80) {
|
||||||
|
*processed_ptr = cp; /* 0xxxxxxx */
|
||||||
|
} else if (cp < 0x800) {
|
||||||
|
*processed_ptr++ = ((cp >> 6) & 0x1F) | 0xC0; /* 110xxxxx */
|
||||||
|
*processed_ptr = ((cp ) & 0x3F) | 0x80; /* 10xxxxxx */
|
||||||
|
} else if (cp < 0xD800 || cp > 0xDFFF) {
|
||||||
|
*processed_ptr++ = ((cp >> 12) & 0x0F) | 0xE0; /* 1110xxxx */
|
||||||
|
*processed_ptr++ = ((cp >> 6) & 0x3F) | 0x80; /* 10xxxxxx */
|
||||||
|
*processed_ptr = ((cp ) & 0x3F) | 0x80; /* 10xxxxxx */
|
||||||
|
} else if (cp >= 0xD800 && cp <= 0xDBFF) { /* lead surrogate (0xD800..0xDBFF) */
|
||||||
|
lead = cp;
|
||||||
|
unprocessed_ptr += 4; /* should always be within the buffer, otherwise previous sscanf would fail */
|
||||||
|
if (*unprocessed_ptr++ != '\\' || *unprocessed_ptr++ != 'u' || /* starts with \u? */
|
||||||
|
!is_utf((const unsigned char*)unprocessed_ptr) ||
|
||||||
|
sscanf(unprocessed_ptr, "%4x", &trail) == EOF ||
|
||||||
|
trail < 0xDC00 || trail > 0xDFFF) { /* valid trail surrogate? (0xDC00..0xDFFF) */
|
||||||
|
return ERROR;
|
||||||
|
}
|
||||||
|
cp = ((((lead-0xD800)&0x3FF)<<10)|((trail-0xDC00)&0x3FF))+0x010000;
|
||||||
|
*processed_ptr++ = (((cp >> 18) & 0x07) | 0xF0); /* 11110xxx */
|
||||||
|
*processed_ptr++ = (((cp >> 12) & 0x3F) | 0x80); /* 10xxxxxx */
|
||||||
|
*processed_ptr++ = (((cp >> 6) & 0x3F) | 0x80); /* 10xxxxxx */
|
||||||
|
*processed_ptr = (((cp ) & 0x3F) | 0x80); /* 10xxxxxx */
|
||||||
|
} else { /* trail surrogate before lead surrogate */
|
||||||
|
return ERROR;
|
||||||
|
}
|
||||||
|
unprocessed_ptr += 3;
|
||||||
|
*processed = processed_ptr;
|
||||||
|
*unprocessed = unprocessed_ptr;
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
/* Returns contents of a string inside double quotes and parses escaped
|
/* Returns contents of a string inside double quotes and parses escaped
|
||||||
characters inside.
|
characters inside.
|
||||||
Example: "\u006Corem ipsum" -> lorem ipsum */
|
Example: "\u006Corem ipsum" -> lorem ipsum */
|
||||||
static const char * get_processed_string(const char **string) {
|
static const char * get_processed_string(const char **string) {
|
||||||
const char *string_start = *string;
|
const char *string_start = *string;
|
||||||
char *output, *processed_ptr, *unprocessed_ptr, current_char;
|
char *output = NULL, *processed_ptr = NULL, *unprocessed_ptr = NULL;
|
||||||
unsigned int utf_val;
|
|
||||||
skip_quotes(string);
|
skip_quotes(string);
|
||||||
if (**string == '\0')
|
if (**string == '\0')
|
||||||
return NULL;
|
return NULL;
|
||||||
output = parson_strndup(string_start + 1, *string - string_start - 2);
|
output = parson_strndup(string_start + 1, *string - string_start - 2);
|
||||||
if (!output)
|
if (!output)
|
||||||
return NULL;
|
return NULL;
|
||||||
processed_ptr = unprocessed_ptr = output;
|
processed_ptr = unprocessed_ptr = output;
|
||||||
while (*unprocessed_ptr) {
|
while (*unprocessed_ptr != '\0') {
|
||||||
current_char = *unprocessed_ptr;
|
if (*unprocessed_ptr == '\\') {
|
||||||
if (current_char == '\\') {
|
|
||||||
unprocessed_ptr++;
|
unprocessed_ptr++;
|
||||||
current_char = *unprocessed_ptr;
|
switch (*unprocessed_ptr) {
|
||||||
switch (current_char) {
|
|
||||||
case '\"': case '\\': case '/': break;
|
case '\"': case '\\': case '/': break;
|
||||||
case 'b': current_char = '\b'; break;
|
case 'b': *processed_ptr = '\b'; break;
|
||||||
case 'f': current_char = '\f'; break;
|
case 'f': *processed_ptr = '\f'; break;
|
||||||
case 'n': current_char = '\n'; break;
|
case 'n': *processed_ptr = '\n'; break;
|
||||||
case 'r': current_char = '\r'; break;
|
case 'r': *processed_ptr = '\r'; break;
|
||||||
case 't': current_char = '\t'; break;
|
case 't': *processed_ptr = '\t'; break;
|
||||||
case 'u':
|
case 'u':
|
||||||
unprocessed_ptr++;
|
if (parse_utf_16(&processed_ptr, &unprocessed_ptr) == ERROR) {
|
||||||
if (!is_utf((const unsigned char*)unprocessed_ptr) ||
|
parson_free(output);
|
||||||
sscanf(unprocessed_ptr, "%4x", &utf_val) == EOF) {
|
return NULL;
|
||||||
parson_free(output);
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
if (utf_val < 0x80) {
|
|
||||||
current_char = utf_val;
|
|
||||||
} else if (utf_val < 0x800) {
|
|
||||||
*processed_ptr++ = (utf_val >> 6) | 0xC0;
|
|
||||||
current_char = ((utf_val | 0x80) & 0xBF);
|
|
||||||
} else {
|
|
||||||
*processed_ptr++ = (utf_val >> 12) | 0xE0;
|
|
||||||
*processed_ptr++ = (((utf_val >> 6) | 0x80) & 0xBF);
|
|
||||||
current_char = ((utf_val | 0x80) & 0xBF);
|
|
||||||
}
|
|
||||||
unprocessed_ptr += 3;
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
parson_free(output);
|
parson_free(output);
|
||||||
return NULL;
|
return NULL;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if ((unsigned char)current_char < 0x20) { /* 0x00-0x19 are invalid characters for json string (http://www.ietf.org/rfc/rfc4627.txt) */
|
} else if ((unsigned char)*unprocessed_ptr < 0x20) {
|
||||||
parson_free(output);
|
parson_free(output); /* 0x00-0x19 are invalid characters for json string (http://www.ietf.org/rfc/rfc4627.txt) */
|
||||||
return NULL;
|
return NULL;
|
||||||
|
} else {
|
||||||
|
*processed_ptr = *unprocessed_ptr;
|
||||||
}
|
}
|
||||||
*processed_ptr = current_char;
|
processed_ptr++, unprocessed_ptr++;
|
||||||
processed_ptr++;
|
|
||||||
unprocessed_ptr++;
|
|
||||||
}
|
}
|
||||||
*processed_ptr = '\0';
|
*processed_ptr = '\0';
|
||||||
if (try_realloc((void**)&output, strlen(output) + 1) == ERROR)
|
if (try_realloc((void**)&output, strlen(output) + 1) == ERROR)
|
||||||
|
4
parson.h
4
parson.h
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
Parson ( http://kgabis.github.com/parson/ )
|
Parson ( http://kgabis.github.com/parson/ )
|
||||||
Copyright (c) 2013 Krzysztof Gabis
|
Copyright (c) 2012 - 2014 Krzysztof Gabis
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
@ -31,8 +31,6 @@ extern "C"
|
|||||||
|
|
||||||
#include <stddef.h> /* size_t */
|
#include <stddef.h> /* size_t */
|
||||||
|
|
||||||
#define PARSON_VERSION 20131130
|
|
||||||
|
|
||||||
/* Types and enums */
|
/* Types and enums */
|
||||||
typedef struct json_object_t JSON_Object;
|
typedef struct json_object_t JSON_Object;
|
||||||
typedef struct json_array_t JSON_Array;
|
typedef struct json_array_t JSON_Array;
|
||||||
|
10
tests.c
10
tests.c
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
Parson ( http://kgabis.github.com/parson/ )
|
Parson ( http://kgabis.github.com/parson/ )
|
||||||
Copyright (c) 2013 Krzysztof Gabis
|
Copyright (c) 2012 - 2014 Krzysztof Gabis
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
@ -36,7 +36,7 @@
|
|||||||
void test_suite_1(void);
|
void test_suite_1(void);
|
||||||
void test_suite_2(JSON_Value *value);
|
void test_suite_2(JSON_Value *value);
|
||||||
void test_suite_2_no_comments(void);
|
void test_suite_2_no_comments(void);
|
||||||
void test_suite_2_with_commnets(void);
|
void test_suite_2_with_comments(void);
|
||||||
void test_suite_3(void);
|
void test_suite_3(void);
|
||||||
|
|
||||||
char *read_file(const char *filename);
|
char *read_file(const char *filename);
|
||||||
@ -50,7 +50,7 @@ int main() {
|
|||||||
/* print_commits_info("torvalds", "linux"); */
|
/* print_commits_info("torvalds", "linux"); */
|
||||||
test_suite_1();
|
test_suite_1();
|
||||||
test_suite_2_no_comments();
|
test_suite_2_no_comments();
|
||||||
test_suite_2_with_commnets();
|
test_suite_2_with_comments();
|
||||||
test_suite_3();
|
test_suite_3();
|
||||||
printf("Tests failed: %d\n", tests_failed);
|
printf("Tests failed: %d\n", tests_failed);
|
||||||
printf("Tests passed: %d\n", tests_passed);
|
printf("Tests passed: %d\n", tests_passed);
|
||||||
@ -87,6 +87,7 @@ void test_suite_2(JSON_Value *root_value) {
|
|||||||
TEST(STREQ(json_object_get_string(root_object, "string"), "lorem ipsum"));
|
TEST(STREQ(json_object_get_string(root_object, "string"), "lorem ipsum"));
|
||||||
TEST(STREQ(json_object_get_string(root_object, "utf string"), "lorem ipsum"));
|
TEST(STREQ(json_object_get_string(root_object, "utf string"), "lorem ipsum"));
|
||||||
TEST(STREQ(json_object_get_string(root_object, "utf-8 string"), "あいうえお"));
|
TEST(STREQ(json_object_get_string(root_object, "utf-8 string"), "あいうえお"));
|
||||||
|
TEST(STREQ(json_object_get_string(root_object, "surrogate string"), "lorem𝄞ipsum𝍧lorem"));
|
||||||
TEST(json_object_get_number(root_object, "positive one") == 1.0);
|
TEST(json_object_get_number(root_object, "positive one") == 1.0);
|
||||||
TEST(json_object_get_number(root_object, "negative one") == -1.0);
|
TEST(json_object_get_number(root_object, "negative one") == -1.0);
|
||||||
TEST(json_object_get_number(root_object, "hard to parse number") == -0.000314);
|
TEST(json_object_get_number(root_object, "hard to parse number") == -0.000314);
|
||||||
@ -145,7 +146,7 @@ void test_suite_2_no_comments(void) {
|
|||||||
json_value_free(root_value);
|
json_value_free(root_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_suite_2_with_commnets(void) {
|
void test_suite_2_with_comments(void) {
|
||||||
const char *filename = "tests/test_2_comments.txt";
|
const char *filename = "tests/test_2_comments.txt";
|
||||||
JSON_Value *root_value = NULL;
|
JSON_Value *root_value = NULL;
|
||||||
printf("Testing %s:\n", filename);
|
printf("Testing %s:\n", filename);
|
||||||
@ -199,6 +200,7 @@ void test_suite_3(void) {
|
|||||||
TEST(json_parse_string("[-07]") == NULL);
|
TEST(json_parse_string("[-07]") == NULL);
|
||||||
TEST(json_parse_string("[-007]") == NULL);
|
TEST(json_parse_string("[-007]") == NULL);
|
||||||
TEST(json_parse_string("[-07.0]") == NULL);
|
TEST(json_parse_string("[-07.0]") == NULL);
|
||||||
|
TEST(json_parse_string("[\"\\uDF67\\uD834\"]") == NULL); /* wrong order surrogate pair */
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_commits_info(const char *username, const char *repo) {
|
void print_commits_info(const char *username, const char *repo) {
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
{
|
{
|
||||||
"string" : "lorem ipsum",
|
"string" : "lorem ipsum",
|
||||||
"utf string" : "\u006corem\u0020ipsum",
|
"utf string" : "\u006corem\u0020ipsum",
|
||||||
"utf-8 string": "あいうえお",
|
"utf-8 string": "あいうえお",
|
||||||
|
"surrogate string": "lorem\uD834\uDD1Eipsum\uD834\uDF67lorem",
|
||||||
"positive one" : 1,
|
"positive one" : 1,
|
||||||
"negative one" : -1,
|
"negative one" : -1,
|
||||||
"pi" : 3.14,
|
"pi" : 3.14,
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
"string" : "lorem ipsum", // lorem ipsum
|
"string" : "lorem ipsum", // lorem ipsum
|
||||||
"utf string" : "\u006corem\u0020ipsum", // lorem ipsum //
|
"utf string" : "\u006corem\u0020ipsum", // lorem ipsum //
|
||||||
"utf-8 string": "あいうえお", // /* lorem ipsum */
|
"utf-8 string": "あいうえお", // /* lorem ipsum */
|
||||||
|
"surrogate string": "lorem\uD834\uDD1Eipsum\uD834\uDF67lorem",
|
||||||
"positive one" : 1,
|
"positive one" : 1,
|
||||||
"negative one" : -1,
|
"negative one" : -1,
|
||||||
"pi" : 3.14,
|
"pi" : 3.14,
|
||||||
|
Loading…
Reference in New Issue
Block a user