diff --git a/CHANGES b/CHANGES
index 06e1adf..559f928 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,7 @@ README - 05/16/2004
CHANGES IN Mini-XML 2.0
- New programmers manual.
+ - Added UTF-16 support (input only; all output is UTF-8)
- Added index functions to build a searchable index of
XML nodes.
- Added character entity callback interface to support
diff --git a/TODO b/TODO
index 05af08e..3908528 100644
--- a/TODO
+++ b/TODO
@@ -1,10 +1,6 @@
TODO - 05/16/2004
-----------------
- - UTF-16 support.
- -- Auto-detect in strings via initial FFFE or FEFF BOM
- -- Convert to UTF-8
- -- Read UTF-8 or UTF-16, write UTF-8
- New documentation.
-- Use HTMLDOC to generate
-- Provide more tutorials
diff --git a/doc/relnotes.html b/doc/relnotes.html
index 8c6d09b..b28957d 100644
--- a/doc/relnotes.html
+++ b/doc/relnotes.html
@@ -9,6 +9,28 @@
New programmers manual.
+ Added UTF-16 support (input only; all output is
+ UTF-8)
+
+ Added index functions to build a searchable index of
+ XML nodes.
+
+ Added character entity callback interface to support
+ additional character entities beyond those defined in
+ the XHTML specification.
+
+ Added support for XHTML character entities.
+
+ The mxmldoc utility now produces XML output which
+ conforms to an updated XML schema, described in the file
+ "doc/mxmldoc.xsd".
+
+ Changed the whitespace callback interface to return
+ strings instead of a single character, allowing for
+ greater control over the formatting of XML files written
+ using Mini-XML. THIS CHANGE WILL REQUIRE CHANGES TO
+ YOUR 1.x CODE IF YOU USE WHITESPACE CALLBACKS.
+
The mxmldoc utility now produces XML output which
conforms to an updated XML schema, described in the file
"doc/mxmldoc.xsd".
diff --git a/mxml-file.c b/mxml-file.c
index fce035b..338edcf 100644
--- a/mxml-file.c
+++ b/mxml-file.c
@@ -1,5 +1,5 @@
/*
- * "$Id: mxml-file.c,v 1.30 2004/05/02 16:04:40 mike Exp $"
+ * "$Id: mxml-file.c,v 1.31 2004/05/16 21:54:47 mike Exp $"
*
* File loading code for Mini-XML, a small XML-like file parsing library.
*
@@ -44,6 +44,15 @@
#include "mxml.h"
+/*
+ * Character encoding...
+ */
+
+#define ENCODE_UTF8 0 /* UTF-8 */
+#define ENCODE_UTF16BE 1 /* UTF-16 Big-Endian */
+#define ENCODE_UTF16LE 2 /* UTF-16 Little-Endian */
+
+
/*
* Global error handler...
*/
@@ -58,15 +67,17 @@ extern void (*mxml_error_cb)(const char *);
static int mxml_add_char(int ch, char **ptr, char **buffer,
int *bufsize);
static int mxml_get_entity(mxml_node_t *parent, void *p,
- int (*getc_cb)(void *));
-static int mxml_file_getc(void *p);
+ int *encoding,
+ int (*getc_cb)(void *, int *));
+static int mxml_file_getc(void *p, int *encoding);
static int mxml_file_putc(int ch, void *p);
static mxml_node_t *mxml_load_data(mxml_node_t *top, void *p,
mxml_type_t (*cb)(mxml_node_t *),
- int (*getc_cb)(void *));
+ int (*getc_cb)(void *, int *));
static int mxml_parse_element(mxml_node_t *node, void *p,
- int (*getc_cb)(void *));
-static int mxml_string_getc(void *p);
+ int *encoding,
+ int (*getc_cb)(void *, int *));
+static int mxml_string_getc(void *p, int *encoding);
static int mxml_string_putc(int ch, void *p);
static int mxml_write_name(const char *s, void *p,
int (*putc_cb)(int, void *));
@@ -371,11 +382,12 @@ mxml_add_char(int ch, /* I - Character to add */
* 'mxml_get_entity()' - Get the character corresponding to an entity...
*/
-static int /* O - Character value or EOF on error */
-mxml_get_entity(mxml_node_t *parent, /* I - Parent node */
- void *p, /* I - Pointer to source */
- int (*getc_cb)(void *))
- /* I - Get character function */
+static int /* O - Character value or EOF on error */
+mxml_get_entity(mxml_node_t *parent, /* I - Parent node */
+ void *p, /* I - Pointer to source */
+ int *encoding, /* IO - Character encoding */
+ int (*getc_cb)(void *, int *))
+ /* I - Get character function */
{
int ch; /* Current character */
char entity[64], /* Entity string */
@@ -384,8 +396,8 @@ mxml_get_entity(mxml_node_t *parent, /* I - Parent node */
entptr = entity;
- while ((ch = (*getc_cb)(p)) != EOF)
- if (!isalnum(ch) && ch != '#')
+ while ((ch = (*getc_cb)(p, encoding)) != EOF)
+ if (ch > 126 || (!isalnum(ch) && ch != '#'))
break;
else if (entptr < (entity + sizeof(entity) - 1))
*entptr++ = ch;
@@ -424,8 +436,9 @@ mxml_get_entity(mxml_node_t *parent, /* I - Parent node */
* 'mxml_file_getc()' - Get a character from a file.
*/
-static int /* O - Character or EOF */
-mxml_file_getc(void *p) /* I - Pointer to file */
+static int /* O - Character or EOF */
+mxml_file_getc(void *p, /* I - Pointer to file */
+ int *encoding) /* IO - Encoding */
{
int ch, /* Character from file */
temp; /* Temporary character */
@@ -439,63 +452,142 @@ mxml_file_getc(void *p) /* I - Pointer to file */
fp = (FILE *)p;
ch = getc(fp);
- if (ch == EOF || !(ch & 0x80))
- return (ch);
-
- /*
- * Got a UTF-8 character; convert UTF-8 to Unicode and return...
- */
+ if (ch == EOF)
+ return (EOF);
- if ((ch & 0xe0) == 0xc0)
+ switch (*encoding)
{
- /*
- * Two-byte value...
- */
+ case ENCODE_UTF8 :
+ /*
+ * Got a UTF-8 character; convert UTF-8 to Unicode and return...
+ */
- if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
- return (EOF);
+ if (!(ch & 0x80))
+ return (ch);
+ else if (ch == 0xfe)
+ {
+ /*
+ * UTF-16 big-endian BOM?
+ */
- ch = ((ch & 0x1f) << 6) | (temp & 0x3f);
- }
- else if ((ch & 0xf0) == 0xe0)
- {
- /*
- * Three-byte value...
- */
+ ch = getc(fp);
+ if (ch != 0xff)
+ return (EOF);
+
+ *encoding = ENCODE_UTF16BE;
- if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
- return (EOF);
+ return (mxml_file_getc(p, encoding));
+ }
+ else if (ch == 0xff)
+ {
+ /*
+ * UTF-16 little-endian BOM?
+ */
- ch = ((ch & 0x0f) << 6) | (temp & 0x3f);
+ ch = getc(fp);
+ if (ch != 0xfe)
+ return (EOF);
- if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
- return (EOF);
+ *encoding = ENCODE_UTF16LE;
- ch = (ch << 6) | (temp & 0x3f);
- }
- else if ((ch & 0xf8) == 0xf0)
- {
- /*
- * Four-byte value...
- */
+ return (mxml_file_getc(p, encoding));
+ }
+ else if ((ch & 0xe0) == 0xc0)
+ {
+ /*
+ * Two-byte value...
+ */
+
+ if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
+ return (EOF);
+
+ ch = ((ch & 0x1f) << 6) | (temp & 0x3f);
+ }
+ else if ((ch & 0xf0) == 0xe0)
+ {
+ /*
+ * Three-byte value...
+ */
+
+ if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
+ return (EOF);
+
+ ch = ((ch & 0x0f) << 6) | (temp & 0x3f);
+
+ if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
+ return (EOF);
+
+ ch = (ch << 6) | (temp & 0x3f);
+ }
+ else if ((ch & 0xf8) == 0xf0)
+ {
+ /*
+ * Four-byte value...
+ */
+
+ if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
+ return (EOF);
+
+ ch = ((ch & 0x07) << 6) | (temp & 0x3f);
+
+ if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
+ return (EOF);
+
+ ch = (ch << 6) | (temp & 0x3f);
+
+ if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
+ return (EOF);
+
+ ch = (ch << 6) | (temp & 0x3f);
+ }
+ else
+ return (EOF);
+ break;
+
+ case ENCODE_UTF16BE :
+ /*
+ * Read UTF-16 big-endian char...
+ */
+
+ ch = (ch << 8) | getc(fp);
- if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
- return (EOF);
+ if (ch >= 0xd800 && ch <= 0xdbff)
+ {
+ /*
+ * Multi-word UTF-16 char...
+ */
- ch = ((ch & 0x07) << 6) | (temp & 0x3f);
+ int lch = (getc(fp) << 8) | getc(fp);
- if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
- return (EOF);
+ if (ch < 0xdc00 || ch >= 0xdfff)
+ return (EOF);
- ch = (ch << 6) | (temp & 0x3f);
+ ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
+ }
+ break;
- if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
- return (EOF);
+ case ENCODE_UTF16LE :
+ /*
+ * Read UTF-16 little-endian char...
+ */
- ch = (ch << 6) | (temp & 0x3f);
+ ch |= (getc(fp) << 8);
+
+ if (ch >= 0xd800 && ch <= 0xdbff)
+ {
+ /*
+ * Multi-word UTF-16 char...
+ */
+
+ int lch = getc(fp) | (getc(fp) << 8);
+
+ if (ch < 0xdc00 || ch >= 0xdfff)
+ return (EOF);
+
+ ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
+ }
+ break;
}
- else
- return (EOF);
return (ch);
}
@@ -562,7 +654,7 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
void *p, /* I - Pointer to data */
mxml_type_t (*cb)(mxml_node_t *),
/* I - Callback function or MXML_NO_CALLBACK */
- int (*getc_cb)(void *))
+ int (*getc_cb)(void *, int *))
/* I - Read function */
{
mxml_node_t *node, /* Current node */
@@ -573,6 +665,7 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
*bufptr; /* Pointer into buffer */
int bufsize; /* Size of buffer */
mxml_type_t type; /* Current node type */
+ int encoding; /* Character encoding */
/*
@@ -589,13 +682,14 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
bufptr = buffer;
parent = top;
whitespace = 0;
+ encoding = ENCODE_UTF8;
if (cb && parent)
type = (*cb)(parent);
else
type = MXML_TEXT;
- while ((ch = (*getc_cb)(p)) != EOF)
+ while ((ch = (*getc_cb)(p, &encoding)) != EOF)
{
if ((ch == '<' || (isspace(ch) && type != MXML_OPAQUE)) && bufptr > buffer)
{
@@ -676,12 +770,12 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
bufptr = buffer;
- while ((ch = (*getc_cb)(p)) != EOF)
+ while ((ch = (*getc_cb)(p, &encoding)) != EOF)
if (isspace(ch) || ch == '>' || (ch == '/' && bufptr > buffer))
break;
else if (ch == '&')
{
- if ((ch = mxml_get_entity(parent, p, getc_cb)) == EOF)
+ if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
goto error;
if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
@@ -700,7 +794,7 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
* Gather rest of comment...
*/
- while ((ch = (*getc_cb)(p)) != EOF)
+ while ((ch = (*getc_cb)(p, &encoding)) != EOF)
{
if (ch == '>' && bufptr > (buffer + 4) &&
!strncmp(bufptr - 2, "--", 2))
@@ -708,7 +802,7 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
else
{
if (ch == '&')
- if ((ch = mxml_get_entity(parent, p, getc_cb)) == EOF)
+ if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
goto error;
if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
@@ -753,14 +847,14 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
else
{
if (ch == '&')
- if ((ch = mxml_get_entity(parent, p, getc_cb)) == EOF)
+ if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
goto error;
if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
goto error;
}
}
- while ((ch = (*getc_cb)(p)) != EOF);
+ while ((ch = (*getc_cb)(p, &encoding)) != EOF);
/*
* Error out if we didn't get the whole declaration...
@@ -818,7 +912,7 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
*/
while (ch != '>' && ch != EOF)
- ch = (*getc_cb)(p);
+ ch = (*getc_cb)(p, &encoding);
/*
* Ascend into the parent and set the value type as needed...
@@ -849,10 +943,10 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
}
if (isspace(ch))
- ch = mxml_parse_element(node, p, getc_cb);
+ ch = mxml_parse_element(node, p, &encoding, getc_cb);
else if (ch == '/')
{
- if ((ch = (*getc_cb)(p)) != '>')
+ if ((ch = (*getc_cb)(p, &encoding)) != '>')
{
mxml_error("Expected > but got '%c' instead for element <%s/>!",
ch, buffer);
@@ -886,7 +980,7 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
* Add character entity to current buffer...
*/
- if ((ch = mxml_get_entity(parent, p, getc_cb)) == EOF)
+ if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
goto error;
if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
@@ -937,11 +1031,13 @@ error:
* 'mxml_parse_element()' - Parse an element for any attributes...
*/
-static int /* O - Terminating character */
-mxml_parse_element(mxml_node_t *node, /* I - Element node */
- void *p, /* I - Data to read from */
- int (*getc_cb)(void *))
- /* I - Data callback */
+static int /* O - Terminating character */
+mxml_parse_element(mxml_node_t *node, /* I - Element node */
+ void *p, /* I - Data to read from */
+ int *encoding,
+ /* IO - Encoding */
+ int (*getc_cb)(void *, int *))
+ /* I - Data callback */
{
int ch, /* Current character in file */
quote; /* Quoting character */
@@ -979,7 +1075,7 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
* Loop until we hit a >, /, ?, or EOF...
*/
- while ((ch = (*getc_cb)(p)) != EOF)
+ while ((ch = (*getc_cb)(p, encoding)) != EOF)
{
#if DEBUG > 1
fprintf(stderr, "parse_element: ch='%c'\n", ch);
@@ -1002,7 +1098,7 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
* Grab the > character and print an error if it isn't there...
*/
- quote = (*getc_cb)(p);
+ quote = (*getc_cb)(p, encoding);
if (quote != '>')
{
@@ -1031,10 +1127,10 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
quote = ch;
- while ((ch = (*getc_cb)(p)) != EOF)
+ while ((ch = (*getc_cb)(p, encoding)) != EOF)
{
if (ch == '&')
- if ((ch = mxml_get_entity(node, p, getc_cb)) == EOF)
+ if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
goto error;
if (mxml_add_char(ch, &ptr, &name, &namesize))
@@ -1050,13 +1146,13 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
* Grab an normal, non-quoted name...
*/
- while ((ch = (*getc_cb)(p)) != EOF)
+ while ((ch = (*getc_cb)(p, encoding)) != EOF)
if (isspace(ch) || ch == '=' || ch == '/' || ch == '>' || ch == '?')
break;
else
{
if (ch == '&')
- if ((ch = mxml_get_entity(node, p, getc_cb)) == EOF)
+ if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
goto error;
if (mxml_add_char(ch, &ptr, &name, &namesize))
@@ -1072,7 +1168,7 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
* Read the attribute value...
*/
- if ((ch = (*getc_cb)(p)) == EOF)
+ if ((ch = (*getc_cb)(p, encoding)) == EOF)
{
mxml_error("Missing value for attribute '%s' in element %s!",
name, node->value.element.name);
@@ -1088,13 +1184,13 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
quote = ch;
ptr = value;
- while ((ch = (*getc_cb)(p)) != EOF)
+ while ((ch = (*getc_cb)(p, encoding)) != EOF)
if (ch == quote)
break;
else
{
if (ch == '&')
- if ((ch = mxml_get_entity(node, p, getc_cb)) == EOF)
+ if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
goto error;
if (mxml_add_char(ch, &ptr, &value, &valsize))
@@ -1112,13 +1208,13 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
value[0] = ch;
ptr = value + 1;
- while ((ch = (*getc_cb)(p)) != EOF)
+ while ((ch = (*getc_cb)(p, encoding)) != EOF)
if (isspace(ch) || ch == '=' || ch == '/' || ch == '>')
break;
else
{
if (ch == '&')
- if ((ch = mxml_get_entity(node, p, getc_cb)) == EOF)
+ if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
goto error;
if (mxml_add_char(ch, &ptr, &value, &valsize))
@@ -1153,7 +1249,7 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
* Grab the > character and print an error if it isn't there...
*/
- quote = (*getc_cb)(p);
+ quote = (*getc_cb)(p, encoding);
if (quote != '>')
{
@@ -1194,8 +1290,9 @@ error:
* 'mxml_string_getc()' - Get a character from a string.
*/
-static int /* O - Character or EOF */
-mxml_string_getc(void *p) /* I - Pointer to file */
+static int /* O - Character or EOF */
+mxml_string_getc(void *p, /* I - Pointer to file */
+ int *encoding) /* IO - Encoding */
{
int ch; /* Character */
const char **s; /* Pointer to string pointer */
@@ -1203,7 +1300,7 @@ mxml_string_getc(void *p) /* I - Pointer to file */
s = (const char **)p;
- if ((ch = *s[0]) != 0)
+ if ((ch = *s[0] & 255) != 0 || *encoding == ENCODE_UTF16LE)
{
/*
* Got character; convert UTF-8 to integer and return...
@@ -1211,62 +1308,163 @@ mxml_string_getc(void *p) /* I - Pointer to file */
(*s)++;
- if (!(ch & 0x80))
- return (ch);
- else if ((ch & 0xe0) == 0xc0)
+ switch (*encoding)
{
- /*
- * Two-byte value...
- */
+ case ENCODE_UTF8 :
+ if (!(ch & 0x80))
+ return (ch);
+ else if (ch == 0xfe)
+ {
+ /*
+ * UTF-16 big-endian BOM?
+ */
- if ((*s[0] & 0xc0) != 0x80)
- return (EOF);
+ if ((*s[0] & 255) != 0xff)
+ return (EOF);
- ch = ((ch & 0x1f) << 6) | (*s[0] & 0x3f);
+ *encoding = ENCODE_UTF16BE;
+ (*s)++;
- (*s)++;
+ return (mxml_string_getc(p, encoding));
+ }
+ else if (ch == 0xff)
+ {
+ /*
+ * UTF-16 little-endian BOM?
+ */
- return (ch);
- }
- else if ((ch & 0xf0) == 0xe0)
- {
- /*
- * Three-byte value...
- */
+ if ((*s[0] & 255) != 0xfe)
+ return (EOF);
- if ((*s[0] & 0xc0) != 0x80 ||
- (*s[1] & 0xc0) != 0x80)
- return (EOF);
+ *encoding = ENCODE_UTF16LE;
+ (*s)++;
- ch = ((((ch & 0x0f) << 6) | (*s[0] & 0x3f)) << 6) | (*s[1] & 0x3f);
+ return (mxml_string_getc(p, encoding));
+ }
+ else if ((ch & 0xe0) == 0xc0)
+ {
+ /*
+ * Two-byte value...
+ */
- (*s) += 2;
+ if ((*s[0] & 0xc0) != 0x80)
+ return (EOF);
- return (ch);
- }
- else if ((ch & 0xf8) == 0xf0)
- {
- /*
- * Four-byte value...
- */
+ ch = ((ch & 0x1f) << 6) | (*s[0] & 0x3f);
- if ((*s[0] & 0xc0) != 0x80 ||
- (*s[1] & 0xc0) != 0x80 ||
- (*s[2] & 0xc0) != 0x80)
- return (EOF);
+ (*s)++;
+
+ return (ch);
+ }
+ else if ((ch & 0xf0) == 0xe0)
+ {
+ /*
+ * Three-byte value...
+ */
+
+ if ((*s[0] & 0xc0) != 0x80 ||
+ (*s[1] & 0xc0) != 0x80)
+ return (EOF);
+
+ ch = ((((ch & 0x0f) << 6) | (*s[0] & 0x3f)) << 6) | (*s[1] & 0x3f);
+
+ (*s) += 2;
+
+ return (ch);
+ }
+ else if ((ch & 0xf8) == 0xf0)
+ {
+ /*
+ * Four-byte value...
+ */
+
+ if ((*s[0] & 0xc0) != 0x80 ||
+ (*s[1] & 0xc0) != 0x80 ||
+ (*s[2] & 0xc0) != 0x80)
+ return (EOF);
+
+ ch = ((((((ch & 0x07) << 6) | (*s[0] & 0x3f)) << 6) |
+ (*s[1] & 0x3f)) << 6) | (*s[2] & 0x3f);
+
+ (*s) += 3;
+
+ return (ch);
+ }
+ else
+ return (EOF);
+
+ case ENCODE_UTF16BE :
+ /*
+ * Read UTF-16 big-endian char...
+ */
+
+ ch = (ch << 8) | (*s[0] & 255);
+ (*s) ++;
+
+ if (ch >= 0xd800 && ch <= 0xdbff)
+ {
+ /*
+ * Multi-word UTF-16 char...
+ */
+
+ int lch; /* Lower word */
+
+
+ if (!*s[0])
+ return (EOF);
+
+ lch = ((*s[0] & 255) << 8) | (*s[1] & 255);
+ (*s) += 2;
+
+ if (ch < 0xdc00 || ch >= 0xdfff)
+ return (EOF);
+
+ ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
+ }
+
+ return (ch);
+
+ case ENCODE_UTF16LE :
+ /*
+ * Read UTF-16 little-endian char...
+ */
+
+ ch = ch | ((*s[0] & 255) << 8);
+
+ if (!ch)
+ {
+ (*s) --;
+ return (EOF);
+ }
+
+ (*s) ++;
+
+ if (ch >= 0xd800 && ch <= 0xdbff)
+ {
+ /*
+ * Multi-word UTF-16 char...
+ */
+
+ int lch; /* Lower word */
- ch = ((((((ch & 0x07) << 6) | (*s[0] & 0x3f)) << 6) |
- (*s[1] & 0x3f)) << 6) | (*s[2] & 0x3f);
- (*s) += 3;
+ if (!*s[1])
+ return (EOF);
- return (ch);
+ lch = ((*s[1] & 255) << 8) | (*s[0] & 255);
+ (*s) += 2;
+
+ if (ch < 0xdc00 || ch >= 0xdfff)
+ return (EOF);
+
+ ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
+ }
+
+ return (ch);
}
- else
- return (EOF);
}
- else
- return (EOF);
+
+ return (EOF);
}
@@ -1734,5 +1932,5 @@ mxml_write_ws(mxml_node_t *node, /* I - Current node */
/*
- * End of "$Id: mxml-file.c,v 1.30 2004/05/02 16:04:40 mike Exp $".
+ * End of "$Id: mxml-file.c,v 1.31 2004/05/16 21:54:47 mike Exp $".
*/