UTF-16 input support.

web
Michael R Sweet 20 years ago
parent 934a9aeab3
commit acab636423
  1. 1
      CHANGES
  2. 4
      TODO
  3. 22
      doc/relnotes.html
  4. 460
      mxml-file.c

@ -4,6 +4,7 @@ README - 05/16/2004
CHANGES IN Mini-XML 2.0 CHANGES IN Mini-XML 2.0
- New programmers manual. - New programmers manual.
- Added UTF-16 support (input only; all output is UTF-8)
- Added index functions to build a searchable index of - Added index functions to build a searchable index of
XML nodes. XML nodes.
- Added character entity callback interface to support - Added character entity callback interface to support

@ -1,10 +1,6 @@
TODO - 05/16/2004 TODO - 05/16/2004
----------------- -----------------
- UTF-16 support.
-- Auto-detect in strings via initial FFFE or FEFF BOM
-- Convert to UTF-8
-- Read UTF-8 or UTF-16, write UTF-8
- New documentation. - New documentation.
-- Use HTMLDOC to generate -- Use HTMLDOC to generate
-- Provide more tutorials -- Provide more tutorials

@ -9,6 +9,28 @@
<li>New programmers manual.</li> <li>New programmers manual.</li>
<li>Added UTF-16 support (input only; all output is
UTF-8)</li>
<li>Added index functions to build a searchable index of
XML nodes.</li>
<li>Added character entity callback interface to support
additional character entities beyond those defined in
the XHTML specification.</li>
<li>Added support for XHTML character entities.</li>
<li>The mxmldoc utility now produces XML output which
conforms to an updated XML schema, described in the file
"doc/mxmldoc.xsd".</li>
<li>Changed the whitespace callback interface to return
strings instead of a single character, allowing for
greater control over the formatting of XML files written
using Mini-XML. THIS CHANGE WILL REQUIRE CHANGES TO
YOUR 1.x CODE IF YOU USE WHITESPACE CALLBACKS.</li>
<li>The mxmldoc utility now produces XML output which <li>The mxmldoc utility now produces XML output which
conforms to an updated XML schema, described in the file conforms to an updated XML schema, described in the file
"doc/mxmldoc.xsd".</li> "doc/mxmldoc.xsd".</li>

@ -1,5 +1,5 @@
/* /*
* "$Id: mxml-file.c,v 1.30 2004/05/02 16:04:40 mike Exp $" * "$Id: mxml-file.c,v 1.31 2004/05/16 21:54:47 mike Exp $"
* *
* File loading code for Mini-XML, a small XML-like file parsing library. * File loading code for Mini-XML, a small XML-like file parsing library.
* *
@ -44,6 +44,15 @@
#include "mxml.h" #include "mxml.h"
/*
* Character encoding...
*/
#define ENCODE_UTF8 0 /* UTF-8 */
#define ENCODE_UTF16BE 1 /* UTF-16 Big-Endian */
#define ENCODE_UTF16LE 2 /* UTF-16 Little-Endian */
/* /*
* Global error handler... * Global error handler...
*/ */
@ -58,15 +67,17 @@ extern void (*mxml_error_cb)(const char *);
static int mxml_add_char(int ch, char **ptr, char **buffer, static int mxml_add_char(int ch, char **ptr, char **buffer,
int *bufsize); int *bufsize);
static int mxml_get_entity(mxml_node_t *parent, void *p, static int mxml_get_entity(mxml_node_t *parent, void *p,
int (*getc_cb)(void *)); int *encoding,
static int mxml_file_getc(void *p); int (*getc_cb)(void *, int *));
static int mxml_file_getc(void *p, int *encoding);
static int mxml_file_putc(int ch, void *p); static int mxml_file_putc(int ch, void *p);
static mxml_node_t *mxml_load_data(mxml_node_t *top, void *p, static mxml_node_t *mxml_load_data(mxml_node_t *top, void *p,
mxml_type_t (*cb)(mxml_node_t *), mxml_type_t (*cb)(mxml_node_t *),
int (*getc_cb)(void *)); int (*getc_cb)(void *, int *));
static int mxml_parse_element(mxml_node_t *node, void *p, static int mxml_parse_element(mxml_node_t *node, void *p,
int (*getc_cb)(void *)); int *encoding,
static int mxml_string_getc(void *p); int (*getc_cb)(void *, int *));
static int mxml_string_getc(void *p, int *encoding);
static int mxml_string_putc(int ch, void *p); static int mxml_string_putc(int ch, void *p);
static int mxml_write_name(const char *s, void *p, static int mxml_write_name(const char *s, void *p,
int (*putc_cb)(int, void *)); int (*putc_cb)(int, void *));
@ -371,11 +382,12 @@ mxml_add_char(int ch, /* I - Character to add */
* 'mxml_get_entity()' - Get the character corresponding to an entity... * 'mxml_get_entity()' - Get the character corresponding to an entity...
*/ */
static int /* O - Character value or EOF on error */ static int /* O - Character value or EOF on error */
mxml_get_entity(mxml_node_t *parent, /* I - Parent node */ mxml_get_entity(mxml_node_t *parent, /* I - Parent node */
void *p, /* I - Pointer to source */ void *p, /* I - Pointer to source */
int (*getc_cb)(void *)) int *encoding, /* IO - Character encoding */
/* I - Get character function */ int (*getc_cb)(void *, int *))
/* I - Get character function */
{ {
int ch; /* Current character */ int ch; /* Current character */
char entity[64], /* Entity string */ char entity[64], /* Entity string */
@ -384,8 +396,8 @@ mxml_get_entity(mxml_node_t *parent, /* I - Parent node */
entptr = entity; entptr = entity;
while ((ch = (*getc_cb)(p)) != EOF) while ((ch = (*getc_cb)(p, encoding)) != EOF)
if (!isalnum(ch) && ch != '#') if (ch > 126 || (!isalnum(ch) && ch != '#'))
break; break;
else if (entptr < (entity + sizeof(entity) - 1)) else if (entptr < (entity + sizeof(entity) - 1))
*entptr++ = ch; *entptr++ = ch;
@ -424,8 +436,9 @@ mxml_get_entity(mxml_node_t *parent, /* I - Parent node */
* 'mxml_file_getc()' - Get a character from a file. * 'mxml_file_getc()' - Get a character from a file.
*/ */
static int /* O - Character or EOF */ static int /* O - Character or EOF */
mxml_file_getc(void *p) /* I - Pointer to file */ mxml_file_getc(void *p, /* I - Pointer to file */
int *encoding) /* IO - Encoding */
{ {
int ch, /* Character from file */ int ch, /* Character from file */
temp; /* Temporary character */ temp; /* Temporary character */
@ -439,63 +452,142 @@ mxml_file_getc(void *p) /* I - Pointer to file */
fp = (FILE *)p; fp = (FILE *)p;
ch = getc(fp); ch = getc(fp);
if (ch == EOF || !(ch & 0x80)) if (ch == EOF)
return (ch); return (EOF);
/*
* Got a UTF-8 character; convert UTF-8 to Unicode and return...
*/
if ((ch & 0xe0) == 0xc0) switch (*encoding)
{ {
/* case ENCODE_UTF8 :
* Two-byte value... /*
*/ * Got a UTF-8 character; convert UTF-8 to Unicode and return...
*/
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) if (!(ch & 0x80))
return (EOF); return (ch);
else if (ch == 0xfe)
{
/*
* UTF-16 big-endian BOM?
*/
ch = ((ch & 0x1f) << 6) | (temp & 0x3f); ch = getc(fp);
} if (ch != 0xff)
else if ((ch & 0xf0) == 0xe0) return (EOF);
{
/* *encoding = ENCODE_UTF16BE;
* Three-byte value...
*/
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) return (mxml_file_getc(p, encoding));
return (EOF); }
else if (ch == 0xff)
{
/*
* UTF-16 little-endian BOM?
*/
ch = ((ch & 0x0f) << 6) | (temp & 0x3f); ch = getc(fp);
if (ch != 0xfe)
return (EOF);
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) *encoding = ENCODE_UTF16LE;
return (EOF);
ch = (ch << 6) | (temp & 0x3f); return (mxml_file_getc(p, encoding));
} }
else if ((ch & 0xf8) == 0xf0) else if ((ch & 0xe0) == 0xc0)
{ {
/* /*
* Four-byte value... * Two-byte value...
*/ */
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
return (EOF);
ch = ((ch & 0x1f) << 6) | (temp & 0x3f);
}
else if ((ch & 0xf0) == 0xe0)
{
/*
* Three-byte value...
*/
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
return (EOF);
ch = ((ch & 0x0f) << 6) | (temp & 0x3f);
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
return (EOF);
ch = (ch << 6) | (temp & 0x3f);
}
else if ((ch & 0xf8) == 0xf0)
{
/*
* Four-byte value...
*/
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
return (EOF);
ch = ((ch & 0x07) << 6) | (temp & 0x3f);
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
return (EOF);
ch = (ch << 6) | (temp & 0x3f);
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
return (EOF);
ch = (ch << 6) | (temp & 0x3f);
}
else
return (EOF);
break;
case ENCODE_UTF16BE :
/*
* Read UTF-16 big-endian char...
*/
ch = (ch << 8) | getc(fp);
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) if (ch >= 0xd800 && ch <= 0xdbff)
return (EOF); {
/*
* Multi-word UTF-16 char...
*/
ch = ((ch & 0x07) << 6) | (temp & 0x3f); int lch = (getc(fp) << 8) | getc(fp);
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) if (ch < 0xdc00 || ch >= 0xdfff)
return (EOF); return (EOF);
ch = (ch << 6) | (temp & 0x3f); ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
}
break;
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) case ENCODE_UTF16LE :
return (EOF); /*
* Read UTF-16 little-endian char...
*/
ch = (ch << 6) | (temp & 0x3f); ch |= (getc(fp) << 8);
if (ch >= 0xd800 && ch <= 0xdbff)
{
/*
* Multi-word UTF-16 char...
*/
int lch = getc(fp) | (getc(fp) << 8);
if (ch < 0xdc00 || ch >= 0xdfff)
return (EOF);
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
}
break;
} }
else
return (EOF);
return (ch); return (ch);
} }
@ -562,7 +654,7 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
void *p, /* I - Pointer to data */ void *p, /* I - Pointer to data */
mxml_type_t (*cb)(mxml_node_t *), mxml_type_t (*cb)(mxml_node_t *),
/* I - Callback function or MXML_NO_CALLBACK */ /* I - Callback function or MXML_NO_CALLBACK */
int (*getc_cb)(void *)) int (*getc_cb)(void *, int *))
/* I - Read function */ /* I - Read function */
{ {
mxml_node_t *node, /* Current node */ mxml_node_t *node, /* Current node */
@ -573,6 +665,7 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
*bufptr; /* Pointer into buffer */ *bufptr; /* Pointer into buffer */
int bufsize; /* Size of buffer */ int bufsize; /* Size of buffer */
mxml_type_t type; /* Current node type */ mxml_type_t type; /* Current node type */
int encoding; /* Character encoding */
/* /*
@ -589,13 +682,14 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
bufptr = buffer; bufptr = buffer;
parent = top; parent = top;
whitespace = 0; whitespace = 0;
encoding = ENCODE_UTF8;
if (cb && parent) if (cb && parent)
type = (*cb)(parent); type = (*cb)(parent);
else else
type = MXML_TEXT; type = MXML_TEXT;
while ((ch = (*getc_cb)(p)) != EOF) while ((ch = (*getc_cb)(p, &encoding)) != EOF)
{ {
if ((ch == '<' || (isspace(ch) && type != MXML_OPAQUE)) && bufptr > buffer) if ((ch == '<' || (isspace(ch) && type != MXML_OPAQUE)) && bufptr > buffer)
{ {
@ -676,12 +770,12 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
bufptr = buffer; bufptr = buffer;
while ((ch = (*getc_cb)(p)) != EOF) while ((ch = (*getc_cb)(p, &encoding)) != EOF)
if (isspace(ch) || ch == '>' || (ch == '/' && bufptr > buffer)) if (isspace(ch) || ch == '>' || (ch == '/' && bufptr > buffer))
break; break;
else if (ch == '&') else if (ch == '&')
{ {
if ((ch = mxml_get_entity(parent, p, getc_cb)) == EOF) if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
goto error; goto error;
if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
@ -700,7 +794,7 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
* Gather rest of comment... * Gather rest of comment...
*/ */
while ((ch = (*getc_cb)(p)) != EOF) while ((ch = (*getc_cb)(p, &encoding)) != EOF)
{ {
if (ch == '>' && bufptr > (buffer + 4) && if (ch == '>' && bufptr > (buffer + 4) &&
!strncmp(bufptr - 2, "--", 2)) !strncmp(bufptr - 2, "--", 2))
@ -708,7 +802,7 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
else else
{ {
if (ch == '&') if (ch == '&')
if ((ch = mxml_get_entity(parent, p, getc_cb)) == EOF) if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
goto error; goto error;
if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
@ -753,14 +847,14 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
else else
{ {
if (ch == '&') if (ch == '&')
if ((ch = mxml_get_entity(parent, p, getc_cb)) == EOF) if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
goto error; goto error;
if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
goto error; goto error;
} }
} }
while ((ch = (*getc_cb)(p)) != EOF); while ((ch = (*getc_cb)(p, &encoding)) != EOF);
/* /*
* Error out if we didn't get the whole declaration... * Error out if we didn't get the whole declaration...
@ -818,7 +912,7 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
*/ */
while (ch != '>' && ch != EOF) while (ch != '>' && ch != EOF)
ch = (*getc_cb)(p); ch = (*getc_cb)(p, &encoding);
/* /*
* Ascend into the parent and set the value type as needed... * Ascend into the parent and set the value type as needed...
@ -849,10 +943,10 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
} }
if (isspace(ch)) if (isspace(ch))
ch = mxml_parse_element(node, p, getc_cb); ch = mxml_parse_element(node, p, &encoding, getc_cb);
else if (ch == '/') else if (ch == '/')
{ {
if ((ch = (*getc_cb)(p)) != '>') if ((ch = (*getc_cb)(p, &encoding)) != '>')
{ {
mxml_error("Expected > but got '%c' instead for element <%s/>!", mxml_error("Expected > but got '%c' instead for element <%s/>!",
ch, buffer); ch, buffer);
@ -886,7 +980,7 @@ mxml_load_data(mxml_node_t *top, /* I - Top node */
* Add character entity to current buffer... * Add character entity to current buffer...
*/ */
if ((ch = mxml_get_entity(parent, p, getc_cb)) == EOF) if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
goto error; goto error;
if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
@ -937,11 +1031,13 @@ error:
* 'mxml_parse_element()' - Parse an element for any attributes... * 'mxml_parse_element()' - Parse an element for any attributes...
*/ */
static int /* O - Terminating character */ static int /* O - Terminating character */
mxml_parse_element(mxml_node_t *node, /* I - Element node */ mxml_parse_element(mxml_node_t *node, /* I - Element node */
void *p, /* I - Data to read from */ void *p, /* I - Data to read from */
int (*getc_cb)(void *)) int *encoding,
/* I - Data callback */ /* IO - Encoding */
int (*getc_cb)(void *, int *))
/* I - Data callback */
{ {
int ch, /* Current character in file */ int ch, /* Current character in file */
quote; /* Quoting character */ quote; /* Quoting character */
@ -979,7 +1075,7 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
* Loop until we hit a >, /, ?, or EOF... * Loop until we hit a >, /, ?, or EOF...
*/ */
while ((ch = (*getc_cb)(p)) != EOF) while ((ch = (*getc_cb)(p, encoding)) != EOF)
{ {
#if DEBUG > 1 #if DEBUG > 1
fprintf(stderr, "parse_element: ch='%c'\n", ch); fprintf(stderr, "parse_element: ch='%c'\n", ch);
@ -1002,7 +1098,7 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
* Grab the > character and print an error if it isn't there... * Grab the > character and print an error if it isn't there...
*/ */
quote = (*getc_cb)(p); quote = (*getc_cb)(p, encoding);
if (quote != '>') if (quote != '>')
{ {
@ -1031,10 +1127,10 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
quote = ch; quote = ch;
while ((ch = (*getc_cb)(p)) != EOF) while ((ch = (*getc_cb)(p, encoding)) != EOF)
{ {
if (ch == '&') if (ch == '&')
if ((ch = mxml_get_entity(node, p, getc_cb)) == EOF) if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
goto error; goto error;
if (mxml_add_char(ch, &ptr, &name, &namesize)) if (mxml_add_char(ch, &ptr, &name, &namesize))
@ -1050,13 +1146,13 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
* Grab an normal, non-quoted name... * Grab an normal, non-quoted name...
*/ */
while ((ch = (*getc_cb)(p)) != EOF) while ((ch = (*getc_cb)(p, encoding)) != EOF)
if (isspace(ch) || ch == '=' || ch == '/' || ch == '>' || ch == '?') if (isspace(ch) || ch == '=' || ch == '/' || ch == '>' || ch == '?')
break; break;
else else
{ {
if (ch == '&') if (ch == '&')
if ((ch = mxml_get_entity(node, p, getc_cb)) == EOF) if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
goto error; goto error;
if (mxml_add_char(ch, &ptr, &name, &namesize)) if (mxml_add_char(ch, &ptr, &name, &namesize))
@ -1072,7 +1168,7 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
* Read the attribute value... * Read the attribute value...
*/ */
if ((ch = (*getc_cb)(p)) == EOF) if ((ch = (*getc_cb)(p, encoding)) == EOF)
{ {
mxml_error("Missing value for attribute '%s' in element %s!", mxml_error("Missing value for attribute '%s' in element %s!",
name, node->value.element.name); name, node->value.element.name);
@ -1088,13 +1184,13 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
quote = ch; quote = ch;
ptr = value; ptr = value;
while ((ch = (*getc_cb)(p)) != EOF) while ((ch = (*getc_cb)(p, encoding)) != EOF)
if (ch == quote) if (ch == quote)
break; break;
else else
{ {
if (ch == '&') if (ch == '&')
if ((ch = mxml_get_entity(node, p, getc_cb)) == EOF) if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
goto error; goto error;
if (mxml_add_char(ch, &ptr, &value, &valsize)) if (mxml_add_char(ch, &ptr, &value, &valsize))
@ -1112,13 +1208,13 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
value[0] = ch; value[0] = ch;
ptr = value + 1; ptr = value + 1;
while ((ch = (*getc_cb)(p)) != EOF) while ((ch = (*getc_cb)(p, encoding)) != EOF)
if (isspace(ch) || ch == '=' || ch == '/' || ch == '>') if (isspace(ch) || ch == '=' || ch == '/' || ch == '>')
break; break;
else else
{ {
if (ch == '&') if (ch == '&')
if ((ch = mxml_get_entity(node, p, getc_cb)) == EOF) if ((ch = mxml_get_entity(node, p, encoding, getc_cb)) == EOF)
goto error; goto error;
if (mxml_add_char(ch, &ptr, &value, &valsize)) if (mxml_add_char(ch, &ptr, &value, &valsize))
@ -1153,7 +1249,7 @@ mxml_parse_element(mxml_node_t *node, /* I - Element node */
* Grab the > character and print an error if it isn't there... * Grab the > character and print an error if it isn't there...
*/ */
quote = (*getc_cb)(p); quote = (*getc_cb)(p, encoding);
if (quote != '>') if (quote != '>')
{ {
@ -1194,8 +1290,9 @@ error:
* 'mxml_string_getc()' - Get a character from a string. * 'mxml_string_getc()' - Get a character from a string.
*/ */
static int /* O - Character or EOF */ static int /* O - Character or EOF */
mxml_string_getc(void *p) /* I - Pointer to file */ mxml_string_getc(void *p, /* I - Pointer to file */
int *encoding) /* IO - Encoding */
{ {
int ch; /* Character */ int ch; /* Character */
const char **s; /* Pointer to string pointer */ const char **s; /* Pointer to string pointer */
@ -1203,7 +1300,7 @@ mxml_string_getc(void *p) /* I - Pointer to file */
s = (const char **)p; s = (const char **)p;
if ((ch = *s[0]) != 0) if ((ch = *s[0] & 255) != 0 || *encoding == ENCODE_UTF16LE)
{ {
/* /*
* Got character; convert UTF-8 to integer and return... * Got character; convert UTF-8 to integer and return...
@ -1211,62 +1308,163 @@ mxml_string_getc(void *p) /* I - Pointer to file */
(*s)++; (*s)++;
if (!(ch & 0x80)) switch (*encoding)
return (ch);
else if ((ch & 0xe0) == 0xc0)
{ {
/* case ENCODE_UTF8 :
* Two-byte value... if (!(ch & 0x80))
*/ return (ch);
else if (ch == 0xfe)
{
/*
* UTF-16 big-endian BOM?
*/
if ((*s[0] & 0xc0) != 0x80) if ((*s[0] & 255) != 0xff)
return (EOF); return (EOF);
ch = ((ch & 0x1f) << 6) | (*s[0] & 0x3f); *encoding = ENCODE_UTF16BE;
(*s)++;
(*s)++; return (mxml_string_getc(p, encoding));
}
else if (ch == 0xff)
{
/*
* UTF-16 little-endian BOM?
*/
return (ch); if ((*s[0] & 255) != 0xfe)
} return (EOF);
else if ((ch & 0xf0) == 0xe0)
{
/*
* Three-byte value...
*/
if ((*s[0] & 0xc0) != 0x80 || *encoding = ENCODE_UTF16LE;
(*s[1] & 0xc0) != 0x80) (*s)++;
return (EOF);
ch = ((((ch & 0x0f) << 6) | (*s[0] & 0x3f)) << 6) | (*s[1] & 0x3f); return (mxml_string_getc(p, encoding));
}
else if ((ch & 0xe0) == 0xc0)
{
/*
* Two-byte value...
*/
(*s) += 2; if ((*s[0] & 0xc0) != 0x80)
return (EOF);
return (ch); ch = ((ch & 0x1f) << 6) | (*s[0] & 0x3f);
}
else if ((ch & 0xf8) == 0xf0)
{
/*
* Four-byte value...
*/
if ((*s[0] & 0xc0) != 0x80 || (*s)++;
(*s[1] & 0xc0) != 0x80 ||
(*s[2] & 0xc0) != 0x80) return (ch);
return (EOF); }
else if ((ch & 0xf0) == 0xe0)
{
/*
* Three-byte value...
*/
if ((*s[0] & 0xc0) != 0x80 ||
(*s[1] & 0xc0) != 0x80)
return (EOF);
ch = ((((ch & 0x0f) << 6) | (*s[0] & 0x3f)) << 6) | (*s[1] & 0x3f);
(*s) += 2;
return (ch);
}
else if ((ch & 0xf8) == 0xf0)
{
/*
* Four-byte value...
*/
if ((*s[0] & 0xc0) != 0x80 ||
(*s[1] & 0xc0) != 0x80 ||
(*s[2] & 0xc0) != 0x80)
return (EOF);
ch = ((((((ch & 0x07) << 6) | (*s[0] & 0x3f)) << 6) |
(*s[1] & 0x3f)) << 6) | (*s[2] & 0x3f);
(*s) += 3;
return (ch);
}
else
return (EOF);
case ENCODE_UTF16BE :
/*
* Read UTF-16 big-endian char...
*/
ch = (ch << 8) | (*s[0] & 255);
(*s) ++;
if (ch >= 0xd800 && ch <= 0xdbff)
{
/*
* Multi-word UTF-16 char...
*/
int lch; /* Lower word */
if (!*s[0])
return (EOF);
lch = ((*s[0] & 255) << 8) | (*s[1] & 255);
(*s) += 2;
if (ch < 0xdc00 || ch >= 0xdfff)
return (EOF);
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
}
return (ch);
case ENCODE_UTF16LE :
/*
* Read UTF-16 little-endian char...
*/
ch = ch | ((*s[0] & 255) << 8);
if (!ch)
{
(*s) --;
return (EOF);
}
(*s) ++;
if (ch >= 0xd800 && ch <= 0xdbff)
{
/*
* Multi-word UTF-16 char...
*/
int lch; /* Lower word */
ch = ((((((ch & 0x07) << 6) | (*s[0] & 0x3f)) << 6) |
(*s[1] & 0x3f)) << 6) | (*s[2] & 0x3f);
(*s) += 3; if (!*s[1])
return (EOF);
return (ch); lch = ((*s[1] & 255) << 8) | (*s[0] & 255);
(*s) += 2;
if (ch < 0xdc00 || ch >= 0xdfff)
return (EOF);
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
}
return (ch);
} }
else
return (EOF);
} }
else
return (EOF); return (EOF);
} }
@ -1734,5 +1932,5 @@ mxml_write_ws(mxml_node_t *node, /* I - Current node */
/* /*
* End of "$Id: mxml-file.c,v 1.30 2004/05/02 16:04:40 mike Exp $". * End of "$Id: mxml-file.c,v 1.31 2004/05/16 21:54:47 mike Exp $".
*/ */

Loading…
Cancel
Save