mxml/mxml-file.c

2895 lines
67 KiB
C
Raw Normal View History

2024-02-27 20:04:27 +00:00
//
// File loading code for Mini-XML, a small XML file parsing library.
//
// https://www.msweet.org/mxml
//
// Copyright © 2003-2024 by Michael R Sweet.
//
// Licensed under Apache License v2.0. See the file "LICENSE" for more
// information.
//
2003-06-03 19:46:29 +00:00
#ifndef _WIN32
# include <unistd.h>
2024-02-27 20:04:27 +00:00
#endif // !_WIN32
2009-02-05 06:06:11 +00:00
#include "mxml-private.h"
2003-06-03 19:46:29 +00:00
2024-02-27 20:04:27 +00:00
//
// Character encoding...
//
2004-05-16 21:54:47 +00:00
2024-02-27 20:04:27 +00:00
#define ENCODE_UTF8 0 // UTF-8
#define ENCODE_UTF16BE 1 // UTF-16 Big-Endian
#define ENCODE_UTF16LE 2 // UTF-16 Little-Endian
2004-05-16 21:54:47 +00:00
2024-02-27 20:04:27 +00:00
//
// Macro to test for a bad XML character...
//
#define mxml_bad_char(ch) ((ch) < ' ' && (ch) != '\n' && (ch) != '\r' && (ch) != '\t')
2024-02-27 20:04:27 +00:00
//
// Types and structures...
//
typedef int (*_mxml_getc_cb_t)(void *, int *);
typedef int (*_mxml_putc_cb_t)(int, void *);
2024-02-27 20:04:27 +00:00
typedef struct _mxml_fdbuf_s // File descriptor buffer
{
2024-02-27 20:04:27 +00:00
int fd; // File descriptor
unsigned char *current, // Current position in buffer
*end, // End of buffer
buffer[8192]; // Character buffer
} _mxml_fdbuf_t;
2024-02-27 20:04:27 +00:00
//
// Local functions...
//
2003-06-03 19:46:29 +00:00
static int mxml_add_char(int ch, char **ptr, char **buffer, int *bufsize);
static int mxml_fd_getc(void *p, int *encoding);
static int mxml_fd_putc(int ch, void *p);
static int mxml_fd_read(_mxml_fdbuf_t *buf);
static int mxml_fd_write(_mxml_fdbuf_t *buf);
static int mxml_file_getc(void *p, int *encoding);
static int mxml_file_putc(int ch, void *p);
static int mxml_get_entity(mxml_node_t *parent, void *p, int *encoding, _mxml_getc_cb_t getc_cb, int *line);
static inline int mxml_isspace(int ch)
{
return (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n');
}
static mxml_node_t *mxml_load_data(mxml_node_t *top, void *p, mxml_load_cb_t cb, _mxml_getc_cb_t getc_cb, mxml_sax_cb_t sax_cb, void *sax_data);
static int mxml_parse_element(mxml_node_t *node, void *p, int *encoding, _mxml_getc_cb_t getc_cb, int *line);
2004-05-16 21:54:47 +00:00
static int mxml_string_getc(void *p, int *encoding);
2003-06-19 04:25:12 +00:00
static int mxml_string_putc(int ch, void *p);
static int mxml_write_name(const char *s, void *p, _mxml_putc_cb_t putc_cb);
static int mxml_write_node(mxml_node_t *node, void *p, mxml_save_cb_t cb, int col, _mxml_putc_cb_t putc_cb, _mxml_global_t *global);
static int mxml_write_string(const char *s, void *p, _mxml_putc_cb_t putc_cb);
static int mxml_write_ws(mxml_node_t *node, void *p, mxml_save_cb_t cb, int ws, int col, _mxml_putc_cb_t putc_cb);
2024-02-27 20:04:27 +00:00
//
// 'mxmlLoadFd()' - Load a file descriptor into an XML node tree.
//
// The nodes in the specified file are added to the specified top node.
// If no top node is provided, the XML file MUST be well-formed with a
// single parent node like <?xml> for the entire file. The callback
// function returns the value type that should be used for child nodes.
// The constants `MXML_INTEGER_CALLBACK`, `MXML_TYPE_OPAQUE_CALLBACK`,
// `MXML_REAL_CALLBACK`, and `MXML_TYPE_TEXT_CALLBACK` are defined for
// loading child (data) nodes of the specified type.
//
// Note: The most common programming error when using the Mini-XML library is
// to load an XML file using the `MXML_TEXT_CALLBACK`, which returns inline
// text as a series of whitespace-delimited words, instead of using the
// `MXML_OPAQUE_CALLBACK` which returns the inline text as a single string
// (including whitespace).
//
mxml_node_t * // O - First node or `NULL` if the file could not be read.
mxmlLoadFd(mxml_node_t *top, // I - Top node
int fd, // I - File descriptor to read from
mxml_load_cb_t cb) // I - Callback function or constant
{
2024-02-27 20:04:27 +00:00
_mxml_fdbuf_t buf; // File descriptor buffer
2024-02-27 20:04:27 +00:00
// Initialize the file descriptor buffer...
buf.fd = fd;
buf.current = buf.buffer;
buf.end = buf.buffer;
2024-02-27 20:04:27 +00:00
// Read the XML data...
return (mxml_load_data(top, &buf, cb, mxml_fd_getc, MXML_NO_CALLBACK, NULL));
}
2024-02-27 20:04:27 +00:00
//
// 'mxmlLoadFile()' - Load a file into an XML node tree.
//
// The nodes in the specified file are added to the specified top node.
// If no top node is provided, the XML file MUST be well-formed with a
// single parent node like <?xml> for the entire file. The callback
// function returns the value type that should be used for child nodes.
// The constants `MXML_INTEGER_CALLBACK`, `MXML_TYPE_OPAQUE_CALLBACK`,
// `MXML_REAL_CALLBACK`, and `MXML_TYPE_TEXT_CALLBACK` are defined for
// loading child (data) nodes of the specified type.
//
// Note: The most common programming error when using the Mini-XML library is
// to load an XML file using the `MXML_TEXT_CALLBACK`, which returns inline
// text as a series of whitespace-delimited words, instead of using the
// `MXML_OPAQUE_CALLBACK` which returns the inline text as a single string
// (including whitespace).
//
mxml_node_t * // O - First node or `NULL` if the file could not be read.
mxmlLoadFile(mxml_node_t *top, // I - Top node
FILE *fp, // I - File to read from
mxml_load_cb_t cb) // I - Callback function or constant
{
2024-02-27 20:04:27 +00:00
// Read the XML data...
return (mxml_load_data(top, fp, cb, mxml_file_getc, MXML_NO_CALLBACK, NULL));
}
2003-06-03 19:46:29 +00:00
2024-02-27 20:04:27 +00:00
//
// 'mxmlLoadString()' - Load a string into an XML node tree.
//
// The nodes in the specified string are added to the specified top node.
// If no top node is provided, the XML string MUST be well-formed with a
// single parent node like <?xml> for the entire string. The callback
// function returns the value type that should be used for child nodes.
// The constants `MXML_INTEGER_CALLBACK`, `MXML_TYPE_OPAQUE_CALLBACK`,
// `MXML_REAL_CALLBACK`, and `MXML_TYPE_TEXT_CALLBACK` are defined for
// loading child (data) nodes of the specified type.
//
// Note: The most common programming error when using the Mini-XML library is
// to load an XML file using the `MXML_TEXT_CALLBACK`, which returns inline
// text as a series of whitespace-delimited words, instead of using the
// `MXML_OPAQUE_CALLBACK` which returns the inline text as a single string
// (including whitespace).
//
mxml_node_t * // O - First node or `NULL` if the string has errors.
mxmlLoadString(mxml_node_t *top, // I - Top node
const char *s, // I - String to load
mxml_load_cb_t cb) // I - Callback function or constant
{
2024-02-27 20:04:27 +00:00
// Read the XML data...
return (mxml_load_data(top, (void *)&s, cb, mxml_string_getc, MXML_NO_CALLBACK, NULL));
}
2024-02-27 20:04:27 +00:00
//
// 'mxmlSaveAllocString()' - Save an XML tree to an allocated string.
//
// This function returns a pointer to a string containing the textual
// representation of the XML node tree. The string should be freed
// using `free()` when you are done with it. `NULL` is returned if the node
// would produce an empty string or if the string cannot be allocated.
//
// The callback argument specifies a function that returns a whitespace
// string or `NULL` before and after each element. If `MXML_NO_CALLBACK`
// is specified, whitespace will only be added before `MXML_TYPE_TEXT` nodes
// with leading whitespace and before attribute names inside opening
// element tags.
//
char * // O - Allocated string or `NULL`
mxmlSaveAllocString(
2024-02-27 20:04:27 +00:00
mxml_node_t *node, // I - Node to write
mxml_save_cb_t cb) // I - Whitespace callback or `MXML_NO_CALLBACK`
2003-07-22 10:29:19 +00:00
{
2024-02-27 20:04:27 +00:00
int bytes; // Required bytes
char buffer[8192]; // Temporary buffer
char *s; // Allocated string
2003-07-22 10:29:19 +00:00
2024-02-27 20:04:27 +00:00
// Write the node to the temporary buffer...
2003-07-22 10:29:19 +00:00
bytes = mxmlSaveString(node, buffer, sizeof(buffer), cb);
if (bytes <= 0)
return (NULL);
if (bytes < (int)(sizeof(buffer) - 1))
{
2024-02-27 20:04:27 +00:00
// Node fit inside the buffer, so just duplicate that string and return...
2003-07-22 10:29:19 +00:00
return (strdup(buffer));
}
2024-02-27 20:04:27 +00:00
// Allocate a buffer of the required size and save the node to the new buffer...
2003-07-22 10:29:19 +00:00
if ((s = malloc(bytes + 1)) == NULL)
return (NULL);
mxmlSaveString(node, s, bytes + 1, cb);
2024-02-27 20:04:27 +00:00
// Return the allocated string...
2003-07-22 10:29:19 +00:00
return (s);
}
2024-02-27 20:04:27 +00:00
//
// 'mxmlSaveFd()' - Save an XML tree to a file descriptor.
//
// The callback argument specifies a function that returns a whitespace
// string or NULL before and after each element. If `MXML_NO_CALLBACK`
// is specified, whitespace will only be added before `MXML_TYPE_TEXT` nodes
// with leading whitespace and before attribute names inside opening
// element tags.
//
int // O - 0 on success, -1 on error.
mxmlSaveFd(mxml_node_t *node, // I - Node to write
int fd, // I - File descriptor to write to
mxml_save_cb_t cb) // I - Whitespace callback or `MXML_NO_CALLBACK`
{
2024-02-27 20:04:27 +00:00
int col; // Final column
_mxml_fdbuf_t buf; // File descriptor buffer
_mxml_global_t *global = _mxml_global();
2024-02-27 20:04:27 +00:00
// Global data
2024-02-27 20:04:27 +00:00
// Initialize the file descriptor buffer...
buf.fd = fd;
buf.current = buf.buffer;
buf.end = buf.buffer + sizeof(buf.buffer);
2024-02-27 20:04:27 +00:00
// Write the node...
if ((col = mxml_write_node(node, &buf, cb, 0, mxml_fd_putc, global)) < 0)
return (-1);
if (col > 0)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_putc('\n', &buf) < 0)
return (-1);
2024-02-27 20:04:27 +00:00
}
2024-02-27 20:04:27 +00:00
// Flush and return...
return (mxml_fd_write(&buf));
}
2024-02-27 20:04:27 +00:00
//
// 'mxmlSaveFile()' - Save an XML tree to a file.
//
// The callback argument specifies a function that returns a whitespace
// string or NULL before and after each element. If `MXML_NO_CALLBACK`
// is specified, whitespace will only be added before `MXML_TYPE_TEXT` nodes
// with leading whitespace and before attribute names inside opening
// element tags.
//
int // O - 0 on success, -1 on error.
mxmlSaveFile(mxml_node_t *node, // I - Node to write
FILE *fp, // I - File to write to
mxml_save_cb_t cb) // I - Whitespace callback or `MXML_NO_CALLBACK`
{
2024-02-27 20:04:27 +00:00
int col; // Final column
_mxml_global_t *global = _mxml_global();
2024-02-27 20:04:27 +00:00
// Global data
2024-02-27 20:04:27 +00:00
// Write the node...
if ((col = mxml_write_node(node, fp, cb, 0, mxml_file_putc, global)) < 0)
return (-1);
if (col > 0)
2024-02-27 20:04:27 +00:00
{
if (putc('\n', fp) < 0)
return (-1);
2024-02-27 20:04:27 +00:00
}
2024-02-27 20:04:27 +00:00
// Return 0 (success)...
return (0);
}
2024-02-27 20:04:27 +00:00
//
// 'mxmlSaveString()' - Save an XML node tree to a string.
//
// This function returns the total number of bytes that would be
// required for the string but only copies (bufsize - 1) characters
// into the specified buffer.
//
// The callback argument specifies a function that returns a whitespace
// string or NULL before and after each element. If `MXML_NO_CALLBACK`
// is specified, whitespace will only be added before `MXML_TYPE_TEXT` nodes
// with leading whitespace and before attribute names inside opening
// element tags.
//
int // O - Size of string
mxmlSaveString(mxml_node_t *node, // I - Node to write
char *buffer, // I - String buffer
int bufsize, // I - Size of string buffer
mxml_save_cb_t cb) // I - Whitespace callback or `MXML_NO_CALLBACK`
{
2024-02-27 20:04:27 +00:00
int col; // Final column
char *ptr[2]; // Pointers for putc_cb
_mxml_global_t *global = _mxml_global();
2024-02-27 20:04:27 +00:00
// Global data
2003-06-19 04:25:12 +00:00
2024-02-27 20:04:27 +00:00
// Write the node...
2003-06-19 04:25:12 +00:00
ptr[0] = buffer;
ptr[1] = buffer + bufsize;
if ((col = mxml_write_node(node, ptr, cb, 0, mxml_string_putc, global)) < 0)
2003-06-19 04:25:12 +00:00
return (-1);
if (col > 0)
mxml_string_putc('\n', ptr);
2024-02-27 20:04:27 +00:00
// Nul-terminate the buffer...
2003-06-19 04:25:12 +00:00
if (ptr[0] >= ptr[1])
{
if (bufsize > 0)
buffer[bufsize - 1] = '\0';
}
2003-06-19 04:25:12 +00:00
else
2024-02-27 20:04:27 +00:00
{
2003-06-19 04:25:12 +00:00
ptr[0][0] = '\0';
2024-02-27 20:04:27 +00:00
}
2003-06-19 04:25:12 +00:00
2024-02-27 20:04:27 +00:00
// Return the number of characters...
return ((int)(ptr[0] - buffer));
}
2024-02-27 20:04:27 +00:00
//
// 'mxmlSAXLoadFd()' - Load a file descriptor into an XML node tree
// using a SAX callback.
//
// The nodes in the specified file are added to the specified top node.
// If no top node is provided, the XML file MUST be well-formed with a
// single parent node like <?xml> for the entire file. The callback
// function returns the value type that should be used for child nodes.
// The constants `MXML_INTEGER_CALLBACK`, `MXML_TYPE_OPAQUE_CALLBACK`,
// `MXML_REAL_CALLBACK`, and `MXML_TYPE_TEXT_CALLBACK` are defined for
// loading child nodes of the specified type.
//
// The SAX callback must call @link mxmlRetain@ for any nodes that need to
// be kept for later use. Otherwise, nodes are deleted when the parent
// node is closed or after each data, comment, CDATA, or directive node.
//
mxml_node_t * // O - First node or `NULL` if the file could not be read.
mxmlSAXLoadFd(mxml_node_t *top, // I - Top node
int fd, // I - File descriptor to read from
mxml_load_cb_t cb, // I - Callback function or constant
mxml_sax_cb_t sax_cb, // I - SAX callback or `MXML_NO_CALLBACK`
void *sax_data) // I - SAX user data
{
2024-02-27 20:04:27 +00:00
_mxml_fdbuf_t buf; // File descriptor buffer
2024-02-27 20:04:27 +00:00
// Initialize the file descriptor buffer...
buf.fd = fd;
buf.current = buf.buffer;
buf.end = buf.buffer;
2024-02-27 20:04:27 +00:00
// Read the XML data...
return (mxml_load_data(top, &buf, cb, mxml_fd_getc, sax_cb, sax_data));
}
2024-02-27 20:04:27 +00:00
//
// 'mxmlSAXLoadFile()' - Load a file into an XML node tree
// using a SAX callback.
//
// The nodes in the specified file are added to the specified top node.
// If no top node is provided, the XML file MUST be well-formed with a
// single parent node like <?xml> for the entire file. The callback
// function returns the value type that should be used for child nodes.
// The constants `MXML_INTEGER_CALLBACK`, `MXML_TYPE_OPAQUE_CALLBACK`,
// `MXML_REAL_CALLBACK`, and `MXML_TYPE_TEXT_CALLBACK` are defined for
// loading child nodes of the specified type.
//
// The SAX callback must call @link mxmlRetain@ for any nodes that need to
// be kept for later use. Otherwise, nodes are deleted when the parent
// node is closed or after each data, comment, CDATA, or directive node.
//
mxml_node_t * // O - First node or `NULL` if the file could not be read.
mxmlSAXLoadFile(
2024-02-27 20:04:27 +00:00
mxml_node_t *top, // I - Top node
FILE *fp, // I - File to read from
mxml_load_cb_t cb, // I - Callback function or constant
mxml_sax_cb_t sax_cb, // I - SAX callback or `MXML_NO_CALLBACK`
void *sax_data) // I - SAX user data
{
2024-02-27 20:04:27 +00:00
// Read the XML data...
return (mxml_load_data(top, fp, cb, mxml_file_getc, sax_cb, sax_data));
}
2024-02-27 20:04:27 +00:00
//
// 'mxmlSAXLoadString()' - Load a string into an XML node tree
// using a SAX callback.
//
// The nodes in the specified string are added to the specified top node.
// If no top node is provided, the XML string MUST be well-formed with a
// single parent node like <?xml> for the entire string. The callback
// function returns the value type that should be used for child nodes.
// The constants `MXML_INTEGER_CALLBACK`, `MXML_TYPE_OPAQUE_CALLBACK`,
// `MXML_REAL_CALLBACK`, and `MXML_TYPE_TEXT_CALLBACK` are defined for
// loading child nodes of the specified type.
//
// The SAX callback must call @link mxmlRetain@ for any nodes that need to
// be kept for later use. Otherwise, nodes are deleted when the parent
// node is closed or after each data, comment, CDATA, or directive node.
//
mxml_node_t * // O - First node or `NULL` if the string has errors.
mxmlSAXLoadString(
2024-02-27 20:04:27 +00:00
mxml_node_t *top, // I - Top node
const char *s, // I - String to load
mxml_load_cb_t cb, // I - Callback function or constant
mxml_sax_cb_t sax_cb, // I - SAX callback or `MXML_NO_CALLBACK`
void *sax_data) // I - SAX user data
{
2024-02-27 20:04:27 +00:00
// Read the XML data...
2007-10-03 06:25:07 +00:00
return (mxml_load_data(top, (void *)&s, cb, mxml_string_getc, sax_cb, sax_data));
}
2024-02-27 20:04:27 +00:00
//
// 'mxmlSetCustomHandlers()' - Set the handling functions for custom data.
//
// The load function accepts a node pointer and a data string and must
// return 0 on success and non-zero on error.
//
// The save function accepts a node pointer and must return a malloc'd
// string on success and `NULL` on error.
//
//
void
mxmlSetCustomHandlers(
2024-02-27 20:04:27 +00:00
mxml_custom_load_cb_t load, // I - Load function
mxml_custom_save_cb_t save) // I - Save function
{
_mxml_global_t *global = _mxml_global();
2024-02-27 20:04:27 +00:00
// Global data
global->custom_load_cb = load;
global->custom_save_cb = save;
}
2024-02-27 20:04:27 +00:00
//
// 'mxmlSetErrorCallback()' - Set the error message callback.
//
void
2024-02-27 20:04:27 +00:00
mxmlSetErrorCallback(mxml_error_cb_t cb)// I - Error callback function
{
_mxml_global_t *global = _mxml_global();
2024-02-27 20:04:27 +00:00
// Global data
global->error_cb = cb;
}
2024-02-27 20:04:27 +00:00
//
// 'mxmlSetWrapMargin()' - Set the wrap margin when saving XML data.
//
// Wrapping is disabled when "column" is 0.
//
void
2024-02-27 20:04:27 +00:00
mxmlSetWrapMargin(int column) // I - Column for wrapping, 0 to disable wrapping
{
_mxml_global_t *global = _mxml_global();
2024-02-27 20:04:27 +00:00
// Global data
global->wrap = column;
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_add_char()' - Add a character to a buffer, expanding as needed.
//
2024-02-27 20:04:27 +00:00
static int // O - 0 on success, -1 on error
mxml_add_char(int ch, // I - Character to add
char **bufptr, // IO - Current position in buffer
char **buffer, // IO - Current buffer
int *bufsize) // IO - Current buffer size
{
2024-02-27 20:04:27 +00:00
char *newbuffer; // New buffer value
if (*bufptr >= (*buffer + *bufsize - 4))
{
2024-02-27 20:04:27 +00:00
// Increase the size of the buffer...
if (*bufsize < 1024)
(*bufsize) *= 2;
else
(*bufsize) += 1024;
if ((newbuffer = realloc(*buffer, *bufsize)) == NULL)
{
mxml_error("Unable to expand string buffer to %d bytes.", *bufsize);
return (-1);
}
*bufptr = newbuffer + (*bufptr - *buffer);
*buffer = newbuffer;
}
if (ch < 0x80)
{
2024-02-27 20:04:27 +00:00
// Single byte ASCII...
*(*bufptr)++ = ch;
}
else if (ch < 0x800)
{
2024-02-27 20:04:27 +00:00
// Two-byte UTF-8...
*(*bufptr)++ = 0xc0 | (ch >> 6);
*(*bufptr)++ = 0x80 | (ch & 0x3f);
}
else if (ch < 0x10000)
{
2024-02-27 20:04:27 +00:00
// Three-byte UTF-8...
*(*bufptr)++ = 0xe0 | (ch >> 12);
*(*bufptr)++ = 0x80 | ((ch >> 6) & 0x3f);
*(*bufptr)++ = 0x80 | (ch & 0x3f);
}
else
{
2024-02-27 20:04:27 +00:00
// Four-byte UTF-8...
*(*bufptr)++ = 0xf0 | (ch >> 18);
*(*bufptr)++ = 0x80 | ((ch >> 12) & 0x3f);
*(*bufptr)++ = 0x80 | ((ch >> 6) & 0x3f);
*(*bufptr)++ = 0x80 | (ch & 0x3f);
}
return (0);
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_fd_getc()' - Read a character from a file descriptor.
//
2024-02-27 20:04:27 +00:00
static int // O - Character or EOF
mxml_fd_getc(void *p, // I - File descriptor buffer
int *encoding) // IO - Encoding
{
2024-02-27 20:04:27 +00:00
_mxml_fdbuf_t *buf; // File descriptor buffer
int ch, // Current character
temp; // Temporary character
2024-02-27 20:04:27 +00:00
// Grab the next character in the buffer...
buf = (_mxml_fdbuf_t *)p;
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_read(buf) < 0)
return (EOF);
2024-02-27 20:04:27 +00:00
}
ch = *(buf->current)++;
switch (*encoding)
{
case ENCODE_UTF8 :
2024-02-27 20:04:27 +00:00
// Got a UTF-8 character; convert UTF-8 to Unicode and return...
if (!(ch & 0x80))
{
2024-02-27 20:04:27 +00:00
MXML_DEBUG("mxml_fd_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
if (mxml_bad_char(ch))
{
2024-02-27 20:04:27 +00:00
mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch);
return (EOF);
}
return (ch);
}
else if (ch == 0xfe)
{
2024-02-27 20:04:27 +00:00
// UTF-16 big-endian BOM?
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_read(buf) < 0)
return (EOF);
2024-02-27 20:04:27 +00:00
}
ch = *(buf->current)++;
if (ch != 0xff)
return (EOF);
*encoding = ENCODE_UTF16BE;
return (mxml_fd_getc(p, encoding));
}
else if (ch == 0xff)
{
2024-02-27 20:04:27 +00:00
// UTF-16 little-endian BOM?
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_read(buf) < 0)
return (EOF);
2024-02-27 20:04:27 +00:00
}
ch = *(buf->current)++;
if (ch != 0xfe)
return (EOF);
*encoding = ENCODE_UTF16LE;
return (mxml_fd_getc(p, encoding));
}
else if ((ch & 0xe0) == 0xc0)
{
2024-02-27 20:04:27 +00:00
// Two-byte value...
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_read(buf) < 0)
return (EOF);
2024-02-27 20:04:27 +00:00
}
temp = *(buf->current)++;
if ((temp & 0xc0) != 0x80)
return (EOF);
ch = ((ch & 0x1f) << 6) | (temp & 0x3f);
if (ch < 0x80)
{
mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch);
return (EOF);
}
}
else if ((ch & 0xf0) == 0xe0)
{
2024-02-27 20:04:27 +00:00
// Three-byte value...
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_read(buf) < 0)
return (EOF);
2024-02-27 20:04:27 +00:00
}
temp = *(buf->current)++;
if ((temp & 0xc0) != 0x80)
return (EOF);
ch = ((ch & 0x0f) << 6) | (temp & 0x3f);
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_read(buf) < 0)
return (EOF);
2024-02-27 20:04:27 +00:00
}
temp = *(buf->current)++;
if ((temp & 0xc0) != 0x80)
return (EOF);
ch = (ch << 6) | (temp & 0x3f);
if (ch < 0x800)
{
mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch);
return (EOF);
}
2024-02-27 20:04:27 +00:00
// Ignore (strip) Byte Order Mark (BOM)...
2009-05-17 05:20:52 +00:00
if (ch == 0xfeff)
return (mxml_fd_getc(p, encoding));
}
else if ((ch & 0xf8) == 0xf0)
{
2024-02-27 20:04:27 +00:00
// Four-byte value...
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_read(buf) < 0)
return (EOF);
2024-02-27 20:04:27 +00:00
}
temp = *(buf->current)++;
if ((temp & 0xc0) != 0x80)
return (EOF);
ch = ((ch & 0x07) << 6) | (temp & 0x3f);
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_read(buf) < 0)
return (EOF);
2024-02-27 20:04:27 +00:00
}
temp = *(buf->current)++;
if ((temp & 0xc0) != 0x80)
return (EOF);
ch = (ch << 6) | (temp & 0x3f);
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_read(buf) < 0)
return (EOF);
2024-02-27 20:04:27 +00:00
}
temp = *(buf->current)++;
if ((temp & 0xc0) != 0x80)
return (EOF);
ch = (ch << 6) | (temp & 0x3f);
if (ch < 0x10000)
{
mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch);
return (EOF);
}
}
else
2024-02-27 20:04:27 +00:00
{
return (EOF);
2024-02-27 20:04:27 +00:00
}
break;
case ENCODE_UTF16BE :
2024-02-27 20:04:27 +00:00
// Read UTF-16 big-endian char...
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_read(buf) < 0)
return (EOF);
2024-02-27 20:04:27 +00:00
}
temp = *(buf->current)++;
ch = (ch << 8) | temp;
if (mxml_bad_char(ch))
{
2024-02-27 20:04:27 +00:00
mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch);
return (EOF);
}
else if (ch >= 0xd800 && ch <= 0xdbff)
{
2024-02-27 20:04:27 +00:00
// Multi-word UTF-16 char...
int lch;
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_read(buf) < 0)
return (EOF);
2024-02-27 20:04:27 +00:00
}
lch = *(buf->current)++;
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_read(buf) < 0)
return (EOF);
2024-02-27 20:04:27 +00:00
}
temp = *(buf->current)++;
lch = (lch << 8) | temp;
if (lch < 0xdc00 || lch >= 0xdfff)
return (EOF);
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
}
break;
case ENCODE_UTF16LE :
2024-02-27 20:04:27 +00:00
// Read UTF-16 little-endian char...
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_read(buf) < 0)
return (EOF);
2024-02-27 20:04:27 +00:00
}
temp = *(buf->current)++;
ch |= (temp << 8);
if (mxml_bad_char(ch))
{
2024-02-27 20:04:27 +00:00
mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch);
return (EOF);
}
else if (ch >= 0xd800 && ch <= 0xdbff)
{
2024-02-27 20:04:27 +00:00
// Multi-word UTF-16 char...
int lch;
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_read(buf) < 0)
return (EOF);
2024-02-27 20:04:27 +00:00
}
lch = *(buf->current)++;
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_read(buf) < 0)
return (EOF);
2024-02-27 20:04:27 +00:00
}
temp = *(buf->current)++;
lch |= (temp << 8);
if (lch < 0xdc00 || lch >= 0xdfff)
return (EOF);
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
}
break;
}
2024-02-27 20:04:27 +00:00
MXML_DEBUG("mxml_fd_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
return (ch);
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_fd_putc()' - Write a character to a file descriptor.
//
2024-02-27 20:04:27 +00:00
static int // O - 0 on success, -1 on error
mxml_fd_putc(int ch, // I - Character
void *p) // I - File descriptor buffer
{
2024-02-27 20:04:27 +00:00
_mxml_fdbuf_t *buf; // File descriptor buffer
2024-02-27 20:04:27 +00:00
// Flush the write buffer as needed...
buf = (_mxml_fdbuf_t *)p;
if (buf->current >= buf->end)
2024-02-27 20:04:27 +00:00
{
if (mxml_fd_write(buf) < 0)
return (-1);
2024-02-27 20:04:27 +00:00
}
*(buf->current)++ = ch;
2024-02-27 20:04:27 +00:00
// Return successfully...
return (0);
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_fd_read()' - Read a buffer of data from a file descriptor.
//
2024-02-27 20:04:27 +00:00
static int // O - 0 on success, -1 on error
mxml_fd_read(_mxml_fdbuf_t *buf) // I - File descriptor buffer
{
2024-02-27 20:04:27 +00:00
int bytes; // Bytes read...
2024-02-27 20:04:27 +00:00
// Range check input...
if (!buf)
return (-1);
2024-02-27 20:04:27 +00:00
// Read from the file descriptor...
while ((bytes = (int)read(buf->fd, buf->buffer, sizeof(buf->buffer))) < 0)
2024-02-27 20:04:27 +00:00
{
#ifdef EINTR
if (errno != EAGAIN && errno != EINTR)
#else
if (errno != EAGAIN)
2024-02-27 20:04:27 +00:00
#endif // EINTR
return (-1);
2024-02-27 20:04:27 +00:00
}
if (bytes == 0)
return (-1);
2024-02-27 20:04:27 +00:00
// Update the pointers and return success...
buf->current = buf->buffer;
buf->end = buf->buffer + bytes;
return (0);
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_fd_write()' - Write a buffer of data to a file descriptor.
//
2024-02-27 20:04:27 +00:00
static int // O - 0 on success, -1 on error
mxml_fd_write(_mxml_fdbuf_t *buf) // I - File descriptor buffer
{
2024-02-27 20:04:27 +00:00
int bytes; // Bytes written
unsigned char *ptr; // Pointer into buffer
2024-02-27 20:04:27 +00:00
// Range check...
if (!buf)
return (-1);
2024-02-27 20:04:27 +00:00
// Return 0 if there is nothing to write...
if (buf->current == buf->buffer)
return (0);
2024-02-27 20:04:27 +00:00
// Loop until we have written everything...
for (ptr = buf->buffer; ptr < buf->current; ptr += bytes)
2024-02-27 20:04:27 +00:00
{
if ((bytes = (int)write(buf->fd, ptr, buf->current - ptr)) < 0)
return (-1);
2024-02-27 20:04:27 +00:00
}
2024-02-27 20:04:27 +00:00
// All done, reset pointers and return success...
buf->current = buf->buffer;
return (0);
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_file_getc()' - Get a character from a file.
//
2024-02-27 20:04:27 +00:00
static int // O - Character or EOF
mxml_file_getc(void *p, // I - Pointer to file
int *encoding) // IO - Encoding
{
2024-02-27 20:04:27 +00:00
int ch, // Character from file
temp; // Temporary character
FILE *fp; // Pointer to file
2024-02-27 20:04:27 +00:00
// Read a character from the file and see if it is EOF or ASCII...
fp = (FILE *)p;
ch = getc(fp);
2004-05-16 21:54:47 +00:00
if (ch == EOF)
return (EOF);
2004-05-16 21:54:47 +00:00
switch (*encoding)
{
2004-05-16 21:54:47 +00:00
case ENCODE_UTF8 :
2024-02-27 20:04:27 +00:00
// Got a UTF-8 character; convert UTF-8 to Unicode and return...
2004-05-16 21:54:47 +00:00
if (!(ch & 0x80))
{
if (mxml_bad_char(ch))
{
2024-02-27 20:04:27 +00:00
mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch);
return (EOF);
}
2024-02-27 20:04:27 +00:00
MXML_DEBUG("mxml_file_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
2004-05-16 21:54:47 +00:00
return (ch);
}
else if (ch == 0xfe)
2004-05-16 21:54:47 +00:00
{
2024-02-27 20:04:27 +00:00
// UTF-16 big-endian BOM?
2004-05-16 21:54:47 +00:00
ch = getc(fp);
if (ch != 0xff)
return (EOF);
*encoding = ENCODE_UTF16BE;
2004-05-16 21:54:47 +00:00
return (mxml_file_getc(p, encoding));
}
else if (ch == 0xff)
{
2024-02-27 20:04:27 +00:00
// UTF-16 little-endian BOM?
2004-05-16 21:54:47 +00:00
ch = getc(fp);
if (ch != 0xfe)
return (EOF);
2004-05-16 21:54:47 +00:00
*encoding = ENCODE_UTF16LE;
2004-05-16 21:54:47 +00:00
return (mxml_file_getc(p, encoding));
}
else if ((ch & 0xe0) == 0xc0)
{
2024-02-27 20:04:27 +00:00
// Two-byte value...
2004-05-16 21:54:47 +00:00
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
return (EOF);
ch = ((ch & 0x1f) << 6) | (temp & 0x3f);
if (ch < 0x80)
{
mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch);
return (EOF);
}
2004-05-16 21:54:47 +00:00
}
else if ((ch & 0xf0) == 0xe0)
{
2024-02-27 20:04:27 +00:00
// Three-byte value...
2004-05-16 21:54:47 +00:00
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
return (EOF);
ch = ((ch & 0x0f) << 6) | (temp & 0x3f);
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
return (EOF);
ch = (ch << 6) | (temp & 0x3f);
if (ch < 0x800)
{
mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch);
return (EOF);
}
2024-02-27 20:04:27 +00:00
// Ignore (strip) Byte Order Mark (BOM)...
2009-05-17 05:20:52 +00:00
if (ch == 0xfeff)
return (mxml_file_getc(p, encoding));
2004-05-16 21:54:47 +00:00
}
else if ((ch & 0xf8) == 0xf0)
{
2024-02-27 20:04:27 +00:00
// Four-byte value...
2004-05-16 21:54:47 +00:00
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
return (EOF);
ch = ((ch & 0x07) << 6) | (temp & 0x3f);
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
return (EOF);
ch = (ch << 6) | (temp & 0x3f);
if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
return (EOF);
ch = (ch << 6) | (temp & 0x3f);
if (ch < 0x10000)
{
mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch);
return (EOF);
}
2004-05-16 21:54:47 +00:00
}
else
2024-02-27 20:04:27 +00:00
{
2004-05-16 21:54:47 +00:00
return (EOF);
2024-02-27 20:04:27 +00:00
}
2004-05-16 21:54:47 +00:00
break;
case ENCODE_UTF16BE :
2024-02-27 20:04:27 +00:00
// Read UTF-16 big-endian char...
2004-05-16 21:54:47 +00:00
ch = (ch << 8) | getc(fp);
if (mxml_bad_char(ch))
{
2024-02-27 20:04:27 +00:00
mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch);
return (EOF);
}
else if (ch >= 0xd800 && ch <= 0xdbff)
2004-05-16 21:54:47 +00:00
{
2024-02-27 20:04:27 +00:00
// Multi-word UTF-16 char...
int lch = getc(fp);
lch = (lch << 8) | getc(fp);
if (lch < 0xdc00 || lch >= 0xdfff)
2004-05-16 21:54:47 +00:00
return (EOF);
2004-05-16 21:54:47 +00:00
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
}
break;
2004-05-16 21:54:47 +00:00
case ENCODE_UTF16LE :
2024-02-27 20:04:27 +00:00
// Read UTF-16 little-endian char...
2004-05-16 21:54:47 +00:00
ch |= (getc(fp) << 8);
if (mxml_bad_char(ch))
{
2024-02-27 20:04:27 +00:00
mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch);
return (EOF);
}
else if (ch >= 0xd800 && ch <= 0xdbff)
2004-05-16 21:54:47 +00:00
{
2024-02-27 20:04:27 +00:00
// Multi-word UTF-16 char...
int lch = getc(fp);
lch |= (getc(fp) << 8);
2004-05-16 21:54:47 +00:00
if (lch < 0xdc00 || lch >= 0xdfff)
2004-05-16 21:54:47 +00:00
return (EOF);
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
}
break;
}
2024-02-27 20:04:27 +00:00
MXML_DEBUG("mxml_file_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
return (ch);
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_file_putc()' - Write a character to a file.
//
2003-06-19 04:25:12 +00:00
2024-02-27 20:04:27 +00:00
static int // O - 0 on success, -1 on failure
mxml_file_putc(int ch, // I - Character to write
void *p) // I - Pointer to file
2003-06-19 04:25:12 +00:00
{
return (putc(ch, (FILE *)p) == EOF ? -1 : 0);
2003-06-19 04:25:12 +00:00
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_get_entity()' - Get the character corresponding to an entity...
//
2024-02-27 20:04:27 +00:00
static int // O - Character value or EOF on error
mxml_get_entity(mxml_node_t *parent, // I - Parent node
void *p, // I - Pointer to source
int *encoding, // IO - Character encoding
int (*getc_cb)(void *, int *),
2024-02-27 20:04:27 +00:00
// I - Get character function
int *line) // IO - Current line number
{
2024-02-27 20:04:27 +00:00
int ch; // Current character
char entity[64], // Entity string
*entptr; // Pointer into entity
entptr = entity;
while ((ch = (*getc_cb)(p, encoding)) != EOF)
{
if (ch > 126 || (!isalnum(ch) && ch != '#'))
2024-02-27 20:04:27 +00:00
{
break;
2024-02-27 20:04:27 +00:00
}
else if (entptr < (entity + sizeof(entity) - 1))
2024-02-27 20:04:27 +00:00
{
*entptr++ = ch;
2024-02-27 20:04:27 +00:00
}
else
{
mxml_error("Entity name too long under parent <%s> on line %d.", parent ? parent->value.element.name : "null", *line);
break;
}
}
*entptr = '\0';
if (ch != ';')
{
mxml_error("Character entity '%s' not terminated under parent <%s> on line %d.", entity, parent ? parent->value.element.name : "null", *line);
if (ch == '\n')
(*line)++;
return (EOF);
}
if (entity[0] == '#')
{
if (entity[1] == 'x')
ch = (int)strtol(entity + 2, NULL, 16);
else
ch = (int)strtol(entity + 1, NULL, 10);
}
else if ((ch = mxmlEntityGetValue(entity)) < 0)
2024-02-27 20:04:27 +00:00
{
mxml_error("Entity name '%s;' not supported under parent <%s> on line %d.", entity, parent ? parent->value.element.name : "null", *line);
2024-02-27 20:04:27 +00:00
}
if (mxml_bad_char(ch))
{
mxml_error("Bad control character 0x%02x under parent <%s> on line %d not allowed by XML standard.", ch, parent ? parent->value.element.name : "null", *line);
return (EOF);
}
return (ch);
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_load_data()' - Load data into an XML node tree.
//
2024-02-27 20:04:27 +00:00
static mxml_node_t * // O - First node or NULL if the file could not be read.
mxml_load_data(
2024-02-27 20:04:27 +00:00
mxml_node_t *top, // I - Top node
void *p, // I - Pointer to data
mxml_load_cb_t cb, // I - Callback function or MXML_NO_CALLBACK
_mxml_getc_cb_t getc_cb, // I - Read function
mxml_sax_cb_t sax_cb, // I - SAX callback or MXML_NO_CALLBACK
void *sax_data) // I - SAX user data
2003-06-03 19:46:29 +00:00
{
2024-02-27 20:04:27 +00:00
mxml_node_t *node = NULL, // Current node
*first = NULL, // First node added
*parent = NULL; // Current parent node
int line = 1, // Current line number
ch; // Character from file
bool whitespace; // Whitespace seen?
2024-02-27 20:04:27 +00:00
char *buffer, // String buffer
*bufptr; // Pointer into buffer
int bufsize; // Size of buffer
mxml_type_t type; // Current node type
int encoding; // Character encoding
_mxml_global_t *global = _mxml_global();
2024-02-27 20:04:27 +00:00
// Global data
static const char * const types[] = // Type strings...
{
"MXML_TYPE_CDATA", // CDATA
"MXML_TYPE_COMMENT", // Comment
"MXML_TYPE_DECLARATION",// Declaration
"MXML_TYPE_DIRECTIVE",// Processing instruction/directive
2024-02-27 20:04:27 +00:00
"MXML_TYPE_ELEMENT", // XML element with attributes
"MXML_TYPE_INTEGER", // Integer value
"MXML_TYPE_OPAQUE", // Opaque string
"MXML_TYPE_REAL", // Real value
"MXML_TYPE_TEXT", // Text fragment
"MXML_TYPE_CUSTOM" // Custom data
};
2003-06-03 19:46:29 +00:00
2024-02-27 20:04:27 +00:00
// Read elements and other nodes from the file...
if ((buffer = malloc(64)) == NULL)
{
mxml_error("Unable to allocate string buffer.");
return (NULL);
}
bufsize = 64;
2003-06-03 19:46:29 +00:00
bufptr = buffer;
parent = top;
first = NULL;
whitespace = false;
2004-05-16 21:54:47 +00:00
encoding = ENCODE_UTF8;
2003-06-03 19:46:29 +00:00
if (cb && parent)
type = (*cb)(parent);
else if (parent)
2024-02-27 20:04:27 +00:00
type = MXML_TYPE_TEXT;
else
2024-02-27 20:04:27 +00:00
type = MXML_TYPE_IGNORE;
2003-06-03 19:46:29 +00:00
if ((ch = (*getc_cb)(p, &encoding)) == EOF)
{
free(buffer);
return (NULL);
}
else if (ch != '<' && !top)
{
free(buffer);
mxml_error("XML does not start with '<' (saw '%c').", ch);
return (NULL);
}
do
2003-06-03 19:46:29 +00:00
{
if ((ch == '<' || (mxml_isspace(ch) && type != MXML_TYPE_OPAQUE && type != MXML_TYPE_CUSTOM)) && bufptr > buffer)
2003-06-03 19:46:29 +00:00
{
2024-02-27 20:04:27 +00:00
// Add a new value node...
2003-06-03 19:46:29 +00:00
*bufptr = '\0';
switch (type)
{
2024-02-27 20:04:27 +00:00
case MXML_TYPE_INTEGER :
node = mxmlNewInteger(parent, strtol(buffer, &bufptr, 0));
2003-06-03 19:46:29 +00:00
break;
2024-02-27 20:04:27 +00:00
case MXML_TYPE_OPAQUE :
2003-06-03 19:46:29 +00:00
node = mxmlNewOpaque(parent, buffer);
break;
2024-02-27 20:04:27 +00:00
case MXML_TYPE_REAL :
node = mxmlNewReal(parent, strtod(buffer, &bufptr));
2003-06-03 19:46:29 +00:00
break;
2024-02-27 20:04:27 +00:00
case MXML_TYPE_TEXT :
2003-06-03 19:46:29 +00:00
node = mxmlNewText(parent, whitespace, buffer);
break;
2024-02-27 20:04:27 +00:00
case MXML_TYPE_CUSTOM :
if (global->custom_load_cb)
{
2024-02-27 20:04:27 +00:00
// Use the callback to fill in the custom data...
node = mxmlNewCustom(parent, NULL, NULL);
if ((*global->custom_load_cb)(node, buffer))
{
mxml_error("Bad custom value '%s' in parent <%s> on line %d.", buffer, parent ? parent->value.element.name : "null", line);
mxmlDelete(node);
node = NULL;
}
break;
}
2024-02-27 20:04:27 +00:00
default : // Ignore...
2003-06-03 19:46:29 +00:00
node = NULL;
break;
}
2003-06-03 19:46:29 +00:00
if (*bufptr)
{
2024-02-27 20:04:27 +00:00
// Bad integer/real number value...
mxml_error("Bad %s value '%s' in parent <%s> on line %d.", type == MXML_TYPE_INTEGER ? "integer" : "real", buffer, parent ? parent->value.element.name : "null", line);
break;
}
bufptr = buffer;
2024-02-27 20:04:27 +00:00
whitespace = mxml_isspace(ch) && type == MXML_TYPE_TEXT;
2003-06-03 19:46:29 +00:00
2024-02-27 20:04:27 +00:00
if (!node && type != MXML_TYPE_IGNORE)
2003-06-03 19:46:29 +00:00
{
2024-02-27 20:04:27 +00:00
// Print error and return...
mxml_error("Unable to add value node of type %s to parent <%s> on line %d.", types[type], parent ? parent->value.element.name : "null", line);
goto error;
2003-06-03 19:46:29 +00:00
}
if (sax_cb)
{
2024-02-27 20:04:27 +00:00
(*sax_cb)(node, MXML_SAX_EVENT_DATA, sax_data);
if (!mxmlRelease(node))
node = NULL;
}
if (!first && node)
first = node;
2003-06-03 19:46:29 +00:00
}
2024-02-27 20:04:27 +00:00
else if (mxml_isspace(ch) && type == MXML_TYPE_TEXT)
{
whitespace = true;
2024-02-27 20:04:27 +00:00
}
if (ch == '\n')
line ++;
2024-02-27 20:04:27 +00:00
// Add lone whitespace node if we have an element and existing whitespace...
if (ch == '<' && whitespace && type == MXML_TYPE_TEXT)
{
if (parent)
{
node = mxmlNewText(parent, whitespace, "");
if (sax_cb)
{
2024-02-27 20:04:27 +00:00
(*sax_cb)(node, MXML_SAX_EVENT_DATA, sax_data);
if (!mxmlRelease(node))
node = NULL;
}
if (!first && node)
first = node;
}
whitespace = false;
}
2003-06-03 19:46:29 +00:00
if (ch == '<')
{
2024-02-27 20:04:27 +00:00
// Start of open/close tag...
2003-06-03 19:46:29 +00:00
bufptr = buffer;
2004-05-16 21:54:47 +00:00
while ((ch = (*getc_cb)(p, &encoding)) != EOF)
{
if (mxml_isspace(ch) || ch == '>' || (ch == '/' && bufptr > buffer))
2024-02-27 20:04:27 +00:00
{
2003-06-03 19:46:29 +00:00
break;
2024-02-27 20:04:27 +00:00
}
2007-04-18 02:45:47 +00:00
else if (ch == '<')
{
mxml_error("Bare < in element.");
2007-04-18 02:45:47 +00:00
goto error;
}
else if (ch == '&')
{
if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb, &line)) == EOF)
goto error;
if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
goto error;
}
else if (ch < '0' && ch != '!' && ch != '-' && ch != '.' && ch != '/')
2024-02-27 20:04:27 +00:00
{
goto error;
2024-02-27 20:04:27 +00:00
}
else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
2024-02-27 20:04:27 +00:00
{
goto error;
2024-02-27 20:04:27 +00:00
}
else if (((bufptr - buffer) == 1 && buffer[0] == '?') || ((bufptr - buffer) == 3 && !strncmp(buffer, "!--", 3)) || ((bufptr - buffer) == 8 && !strncmp(buffer, "![CDATA[", 8)))
{
break;
2024-02-27 20:04:27 +00:00
}
if (ch == '\n')
line ++;
}
2003-06-03 19:46:29 +00:00
*bufptr = '\0';
if (!strcmp(buffer, "!--"))
2003-06-04 01:23:21 +00:00
{
2024-02-27 20:04:27 +00:00
// Gather rest of comment...
2004-05-16 21:54:47 +00:00
while ((ch = (*getc_cb)(p, &encoding)) != EOF)
2003-06-04 01:23:21 +00:00
{
2024-02-27 20:04:27 +00:00
if (ch == '>' && bufptr > (buffer + 4) && bufptr[-3] != '-' && bufptr[-2] == '-' && bufptr[-1] == '-')
break;
else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
goto error;
if (ch == '\n')
line ++;
}
2024-02-27 20:04:27 +00:00
// Error out if we didn't get the whole comment...
if (ch != '>')
{
2024-02-27 20:04:27 +00:00
// Print error and return...
mxml_error("Early EOF in comment node on line %d.", line);
goto error;
}
2024-02-27 20:04:27 +00:00
// Otherwise add this as an element under the current parent...
bufptr[-2] = '\0';
if (!parent && first)
{
2024-02-27 20:04:27 +00:00
// There can only be one root element!
mxml_error("<%s--> cannot be a second root node after <%s> on line %d.", buffer, first->value.element.name, line);
goto error;
}
if ((node = mxmlNewComment(parent, buffer + 3)) == NULL)
{
2024-02-27 20:04:27 +00:00
// Just print error for now...
mxml_error("Unable to add comment node to parent <%s> on line %d.", parent ? parent->value.element.name : "null", line);
break;
}
if (sax_cb)
{
2024-02-27 20:04:27 +00:00
(*sax_cb)(node, MXML_SAX_EVENT_COMMENT, sax_data);
if (!mxmlRelease(node))
node = NULL;
}
if (node && !first)
first = node;
}
else if (!strcmp(buffer, "![CDATA["))
{
2024-02-27 20:04:27 +00:00
// Gather CDATA section...
while ((ch = (*getc_cb)(p, &encoding)) != EOF)
{
if (ch == '>' && !strncmp(bufptr - 2, "]]", 2))
{
2024-02-27 20:04:27 +00:00
// Drop terminator from CDATA string...
bufptr[-2] = '\0';
break;
}
else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
2024-02-27 20:04:27 +00:00
{
goto error;
2024-02-27 20:04:27 +00:00
}
if (ch == '\n')
line ++;
}
2003-06-04 01:23:21 +00:00
2024-02-27 20:04:27 +00:00
// Error out if we didn't get the whole comment...
if (ch != '>')
{
2024-02-27 20:04:27 +00:00
// Print error and return...
mxml_error("Early EOF in CDATA node on line %d.", line);
goto error;
}
2024-02-27 20:04:27 +00:00
// Otherwise add this as an element under the current parent...
bufptr[-2] = '\0';
if (!parent && first)
{
2024-02-27 20:04:27 +00:00
// There can only be one root element!
mxml_error("<%s]]> cannot be a second root node after <%s> on line %d.", buffer, first->value.element.name, line);
goto error;
}
if ((node = mxmlNewCDATA(parent, buffer + 8)) == NULL)
{
2024-02-27 20:04:27 +00:00
// Print error and return...
mxml_error("Unable to add CDATA node to parent <%s> on line %d.", parent ? parent->value.element.name : "null", line);
goto error;
}
if (sax_cb)
{
2024-02-27 20:04:27 +00:00
(*sax_cb)(node, MXML_SAX_EVENT_CDATA, sax_data);
if (!mxmlRelease(node))
node = NULL;
}
if (node && !first)
first = node;
}
else if (buffer[0] == '?')
{
2024-02-27 20:04:27 +00:00
// Gather rest of processing instruction...
while ((ch = (*getc_cb)(p, &encoding)) != EOF)
{
if (ch == '>' && bufptr > buffer && bufptr[-1] == '?')
break;
else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
goto error;
if (ch == '\n')
line ++;
}
2024-02-27 20:04:27 +00:00
// Error out if we didn't get the whole processing instruction...
if (ch != '>')
{
2024-02-27 20:04:27 +00:00
// Print error and return...
mxml_error("Early EOF in processing instruction node on line %d.", line);
goto error;
}
2024-02-27 20:04:27 +00:00
// Otherwise add this as an element under the current parent...
bufptr[-1] = '\0';
if (!parent && first)
{
2024-02-27 20:04:27 +00:00
// There can only be one root element!
mxml_error("<%s?> cannot be a second root node after <%s> on line %d.", buffer, first->value.element.name, line);
goto error;
}
if ((node = mxmlNewDirective(parent, buffer + 1)) == NULL)
{
2024-02-27 20:04:27 +00:00
// Print error and return...
mxml_error("Unable to add processing instruction node to parent <%s> on line %d.", parent ? parent->value.element.name : "null", line);
goto error;
}
if (sax_cb)
{
2024-02-27 20:04:27 +00:00
(*sax_cb)(node, MXML_SAX_EVENT_DIRECTIVE, sax_data);
if (strncmp(node->value.directive, "xml ", 4) && !mxmlRelease(node))
node = NULL;
}
if (node)
{
if (!first)
first = node;
if (!parent)
{
parent = node;
if (cb)
type = (*cb)(parent);
else
2024-02-27 20:04:27 +00:00
type = MXML_TYPE_TEXT;
}
}
}
else if (buffer[0] == '!')
{
2024-02-27 20:04:27 +00:00
// Gather rest of declaration...
do
{
if (ch == '>')
2024-02-27 20:04:27 +00:00
{
break;
2024-02-27 20:04:27 +00:00
}
else
{
if (ch == '&')
{
if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb, &line)) == EOF)
goto error;
}
if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
goto error;
2003-06-04 01:23:21 +00:00
}
if (ch == '\n')
line ++;
2003-06-04 01:23:21 +00:00
}
2004-05-16 21:54:47 +00:00
while ((ch = (*getc_cb)(p, &encoding)) != EOF);
2003-06-04 01:23:21 +00:00
2024-02-27 20:04:27 +00:00
// Error out if we didn't get the whole declaration...
if (ch != '>')
{
2024-02-27 20:04:27 +00:00
// Print error and return...
mxml_error("Early EOF in declaration node on line %d.", line);
goto error;
}
2024-02-27 20:04:27 +00:00
// Otherwise add this as an element under the current parent...
*bufptr = '\0';
if (!parent && first)
{
2024-02-27 20:04:27 +00:00
// There can only be one root element!
mxml_error("<%s> cannot be a second root node after <%s> on line %d.", buffer, first->value.element.name, line);
goto error;
}
if ((node = mxmlNewDeclaration(parent, buffer + 1)) == NULL)
{
2024-02-27 20:04:27 +00:00
// Print error and return...
mxml_error("Unable to add declaration node to parent <%s> on line %d.", parent ? parent->value.element.name : "null", line);
goto error;
}
if (sax_cb)
{
(*sax_cb)(node, MXML_SAX_EVENT_DECLARATION, sax_data);
if (!mxmlRelease(node))
node = NULL;
}
if (node)
{
if (!first)
first = node;
if (!parent)
{
parent = node;
if (cb)
type = (*cb)(parent);
else
2024-02-27 20:04:27 +00:00
type = MXML_TYPE_TEXT;
}
}
2003-06-04 01:23:21 +00:00
}
else if (buffer[0] == '/')
2003-06-03 19:46:29 +00:00
{
2024-02-27 20:04:27 +00:00
// Handle close tag...
2003-06-03 19:46:29 +00:00
if (!parent || strcmp(buffer + 1, parent->value.element.name))
{
2024-02-27 20:04:27 +00:00
// Close tag doesn't match tree; print an error for now...
mxml_error("Mismatched close tag <%s> under parent <%s> on line %d.", buffer, parent ? parent->value.element.name : "(null)", line);
goto error;
2003-06-03 19:46:29 +00:00
}
2024-02-27 20:04:27 +00:00
// Keep reading until we see >...
2003-06-03 19:46:29 +00:00
while (ch != '>' && ch != EOF)
2004-05-16 21:54:47 +00:00
ch = (*getc_cb)(p, &encoding);
2003-06-03 19:46:29 +00:00
node = parent;
parent = parent->parent;
if (sax_cb)
{
2024-02-27 20:04:27 +00:00
(*sax_cb)(node, MXML_SAX_EVENT_ELEMENT_CLOSE, sax_data);
if (!mxmlRelease(node))
{
if (first == node)
first = NULL;
node = NULL;
}
}
2024-02-27 20:04:27 +00:00
// Ascend into the parent and set the value type as needed...
if (cb && parent)
2003-06-03 19:46:29 +00:00
type = (*cb)(parent);
}
else
{
2024-02-27 20:04:27 +00:00
// Handle open tag...
if (!parent && first)
{
2024-02-27 20:04:27 +00:00
// There can only be one root element!
mxml_error("<%s> cannot be a second root node after <%s> on line %d.", buffer, first->value.element.name, line);
goto error;
}
if ((node = mxmlNewElement(parent, buffer)) == NULL)
2003-06-03 19:46:29 +00:00
{
2024-02-27 20:04:27 +00:00
// Just print error for now...
mxml_error("Unable to add element node to parent <%s> on line %d.", parent ? parent->value.element.name : "null", line);
goto error;
2003-06-03 19:46:29 +00:00
}
if (mxml_isspace(ch))
2007-09-09 08:11:25 +00:00
{
if ((ch = mxml_parse_element(node, p, &encoding, getc_cb, &line)) == EOF)
2007-09-09 08:11:25 +00:00
goto error;
}
2003-06-03 19:46:29 +00:00
else if (ch == '/')
{
2004-05-16 21:54:47 +00:00
if ((ch = (*getc_cb)(p, &encoding)) != '>')
2003-06-03 19:46:29 +00:00
{
mxml_error("Expected > but got '%c' instead for element <%s/> on line %d.", ch, buffer, line);
mxmlDelete(node);
node = NULL;
goto error;
2003-06-03 19:46:29 +00:00
}
ch = '/';
}
if (sax_cb)
2024-02-27 20:04:27 +00:00
(*sax_cb)(node, MXML_SAX_EVENT_ELEMENT_OPEN, sax_data);
if (!first)
first = node;
2003-06-03 19:46:29 +00:00
if (ch == EOF)
break;
if (ch != '/')
{
2024-02-27 20:04:27 +00:00
// Descend into this node, setting the value type as needed...
2003-06-03 19:46:29 +00:00
parent = node;
if (cb && parent)
2003-06-03 19:46:29 +00:00
type = (*cb)(parent);
else
2024-02-27 20:04:27 +00:00
type = MXML_TYPE_TEXT;
2003-06-03 19:46:29 +00:00
}
else if (sax_cb)
{
2024-02-27 20:04:27 +00:00
(*sax_cb)(node, MXML_SAX_EVENT_ELEMENT_CLOSE, sax_data);
if (!mxmlRelease(node))
{
if (first == node)
first = NULL;
node = NULL;
}
}
2003-06-03 19:46:29 +00:00
}
bufptr = buffer;
2003-06-03 19:46:29 +00:00
}
else if (ch == '&')
{
2024-02-27 20:04:27 +00:00
// Add character entity to current buffer...
if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb, &line)) == EOF)
goto error;
2003-06-03 19:46:29 +00:00
if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
goto error;
2003-06-03 19:46:29 +00:00
}
2024-02-27 20:04:27 +00:00
else if (type == MXML_TYPE_OPAQUE || type == MXML_TYPE_CUSTOM || !mxml_isspace(ch))
2003-06-03 19:46:29 +00:00
{
2024-02-27 20:04:27 +00:00
// Add character to current buffer...
if (mxml_add_char(ch, &bufptr, &buffer, &bufsize))
goto error;
2003-06-03 19:46:29 +00:00
}
}
while ((ch = (*getc_cb)(p, &encoding)) != EOF);
2003-06-03 19:46:29 +00:00
2024-02-27 20:04:27 +00:00
// Free the string buffer - we don't need it anymore...
free(buffer);
2024-02-27 20:04:27 +00:00
// Find the top element and return it...
2003-06-03 19:46:29 +00:00
if (parent)
{
node = parent;
while (parent != top && parent->parent)
2003-06-03 19:46:29 +00:00
parent = parent->parent;
if (node != parent)
{
mxml_error("Missing close tag </%s> under parent <%s> on line %d.", node->value.element.name, node->parent ? node->parent->value.element.name : "(null)", line);
mxmlDelete(first);
return (NULL);
}
2003-06-03 19:46:29 +00:00
}
if (parent)
return (parent);
else
return (first);
2024-02-27 20:04:27 +00:00
// Common error return...
error:
mxmlDelete(first);
free(buffer);
return (NULL);
2003-06-03 19:46:29 +00:00
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_parse_element()' - Parse an element for any attributes...
//
2003-06-03 19:46:29 +00:00
2024-02-27 20:04:27 +00:00
static int // O - Terminating character
mxml_parse_element(
2024-02-27 20:04:27 +00:00
mxml_node_t *node, // I - Element node
void *p, // I - Data to read from
int *encoding, // IO - Encoding
_mxml_getc_cb_t getc_cb, // I - Data callback
int *line) // IO - Current line number
2003-06-03 19:46:29 +00:00
{
2024-02-27 20:04:27 +00:00
int ch, // Current character in file
quote; // Quoting character
char *name, // Attribute name
*value, // Attribute value
*ptr; // Pointer into name/value
int namesize, // Size of name string
valsize; // Size of value string
2003-12-19 02:56:11 +00:00
2024-02-27 20:04:27 +00:00
// Initialize the name and value buffers...
if ((name = malloc(64)) == NULL)
{
mxml_error("Unable to allocate memory for name.");
return (EOF);
}
namesize = 64;
if ((value = malloc(64)) == NULL)
{
free(name);
mxml_error("Unable to allocate memory for value.");
return (EOF);
}
valsize = 64;
2024-02-27 20:04:27 +00:00
// Loop until we hit a >, /, ?, or EOF...
2004-05-16 21:54:47 +00:00
while ((ch = (*getc_cb)(p, encoding)) != EOF)
2003-06-03 19:46:29 +00:00
{
2024-02-27 20:04:27 +00:00
MXML_DEBUG("parse_element: ch='%c'\n", ch);
2003-06-03 19:46:29 +00:00
2024-02-27 20:04:27 +00:00
// Skip leading whitespace...
if (mxml_isspace(ch))
{
if (ch == '\n')
(*line)++;
2003-06-03 19:46:29 +00:00
continue;
}
2003-06-03 19:46:29 +00:00
2024-02-27 20:04:27 +00:00
// Stop at /, ?, or >...
2003-06-03 19:46:29 +00:00
if (ch == '/' || ch == '?')
{
2024-02-27 20:04:27 +00:00
// Grab the > character and print an error if it isn't there...
2004-05-16 21:54:47 +00:00
quote = (*getc_cb)(p, encoding);
2003-06-03 19:46:29 +00:00
if (quote != '>')
{
mxml_error("Expected '>' after '%c' for element %s, but got '%c' on line %d.", ch, node->value.element.name, quote, *line);
2007-04-18 02:45:47 +00:00
goto error;
2003-06-03 19:46:29 +00:00
}
break;
}
2007-04-18 02:45:47 +00:00
else if (ch == '<')
{
mxml_error("Bare < in element %s on line %d.", node->value.element.name, *line);
2007-04-18 02:45:47 +00:00
goto error;
}
2003-06-03 19:46:29 +00:00
else if (ch == '>')
2024-02-27 20:04:27 +00:00
{
2003-06-03 19:46:29 +00:00
break;
2024-02-27 20:04:27 +00:00
}
2003-06-03 19:46:29 +00:00
2024-02-27 20:04:27 +00:00
// Read the attribute name...
ptr = name;
if (mxml_add_char(ch, &ptr, &name, &namesize))
goto error;
2003-06-03 19:46:29 +00:00
if (ch == '\"' || ch == '\'')
{
2024-02-27 20:04:27 +00:00
// Name is in quotes, so get a quoted string...
quote = ch;
2004-05-16 21:54:47 +00:00
while ((ch = (*getc_cb)(p, encoding)) != EOF)
2003-06-03 19:46:29 +00:00
{
if (ch == '&')
{
if ((ch = mxml_get_entity(node, p, encoding, getc_cb, line)) == EOF)
goto error;
}
else if (ch == '\n')
2024-02-27 20:04:27 +00:00
{
(*line)++;
2024-02-27 20:04:27 +00:00
}
if (mxml_add_char(ch, &ptr, &name, &namesize))
goto error;
if (ch == quote)
break;
2003-06-03 19:46:29 +00:00
}
}
else
{
2024-02-27 20:04:27 +00:00
// Grab an normal, non-quoted name...
2004-05-16 21:54:47 +00:00
while ((ch = (*getc_cb)(p, encoding)) != EOF)
{
2024-02-27 20:04:27 +00:00
if (mxml_isspace(ch) || ch == '=' || ch == '/' || ch == '>' || ch == '?')
{
if (ch == '\n')
(*line)++;
break;
}
else
{
if (ch == '&')
{
if ((ch = mxml_get_entity(node, p, encoding, getc_cb, line)) == EOF)
goto error;
}
if (mxml_add_char(ch, &ptr, &name, &namesize))
goto error;
}
}
}
2003-06-03 19:46:29 +00:00
*ptr = '\0';
if (mxmlElementGetAttr(node, name))
{
mxml_error("Duplicate attribute '%s' in element %s on line %d.", name, node->value.element.name, *line);
goto error;
}
while (ch != EOF && mxml_isspace(ch))
{
ch = (*getc_cb)(p, encoding);
if (ch == '\n')
(*line)++;
}
2003-06-03 19:46:29 +00:00
if (ch == '=')
{
2024-02-27 20:04:27 +00:00
// Read the attribute value...
while ((ch = (*getc_cb)(p, encoding)) != EOF && mxml_isspace(ch))
{
if (ch == '\n')
(*line)++;
}
if (ch == EOF)
2003-06-03 19:46:29 +00:00
{
mxml_error("Missing value for attribute '%s' in element %s on line %d.", name, node->value.element.name, *line);
2007-09-09 08:22:12 +00:00
goto error;
2003-06-03 19:46:29 +00:00
}
if (ch == '\'' || ch == '\"')
{
2024-02-27 20:04:27 +00:00
// Read quoted value...
2003-06-03 19:46:29 +00:00
quote = ch;
ptr = value;
2004-05-16 21:54:47 +00:00
while ((ch = (*getc_cb)(p, encoding)) != EOF)
{
2003-06-03 19:46:29 +00:00
if (ch == quote)
{
2003-06-03 19:46:29 +00:00
break;
}
else
2003-06-03 19:46:29 +00:00
{
if (ch == '&')
{
if ((ch = mxml_get_entity(node, p, encoding, getc_cb, line)) == EOF)
goto error;
}
else if (ch == '\n')
2024-02-27 20:04:27 +00:00
{
(*line)++;
2024-02-27 20:04:27 +00:00
}
if (mxml_add_char(ch, &ptr, &value, &valsize))
goto error;
2003-06-03 19:46:29 +00:00
}
}
2003-06-03 19:46:29 +00:00
*ptr = '\0';
}
else
{
2024-02-27 20:04:27 +00:00
// Read unquoted value...
ptr = value;
if (mxml_add_char(ch, &ptr, &value, &valsize))
goto error;
2003-06-03 19:46:29 +00:00
2004-05-16 21:54:47 +00:00
while ((ch = (*getc_cb)(p, encoding)) != EOF)
{
if (mxml_isspace(ch) || ch == '=' || ch == '/' || ch == '>')
{
if (ch == '\n')
(*line)++;
2003-06-03 19:46:29 +00:00
break;
}
else
2003-06-03 19:46:29 +00:00
{
if (ch == '&')
{
if ((ch = mxml_get_entity(node, p, encoding, getc_cb, line)) == EOF)
goto error;
}
if (mxml_add_char(ch, &ptr, &value, &valsize))
goto error;
2003-06-03 19:46:29 +00:00
}
}
2003-06-03 19:46:29 +00:00
*ptr = '\0';
}
2024-02-27 20:04:27 +00:00
// Set the attribute with the given string value...
mxmlElementSetAttr(node, name, value);
2003-06-03 19:46:29 +00:00
}
else
{
mxml_error("Missing value for attribute '%s' in element %s on line %d.", name, node->value.element.name, *line);
2007-09-09 08:22:12 +00:00
goto error;
}
2003-06-03 19:46:29 +00:00
2024-02-27 20:04:27 +00:00
// Check the end character...
if (ch == '/' || ch == '?')
{
2024-02-27 20:04:27 +00:00
// Grab the > character and print an error if it isn't there...
2004-05-16 21:54:47 +00:00
quote = (*getc_cb)(p, encoding);
if (quote != '>')
{
mxml_error("Expected '>' after '%c' for element %s, but got '%c' on line %d.", ch, node->value.element.name, quote, *line);
ch = EOF;
}
break;
}
else if (ch == '>')
break;
2003-06-03 19:46:29 +00:00
}
2024-02-27 20:04:27 +00:00
// Free the name and value buffers and return...
free(name);
free(value);
2003-06-03 19:46:29 +00:00
return (ch);
2024-02-27 20:04:27 +00:00
// Common error return point...
error:
free(name);
free(value);
return (EOF);
2003-06-03 19:46:29 +00:00
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_string_getc()' - Get a character from a string.
//
2024-02-27 20:04:27 +00:00
static int // O - Character or EOF
mxml_string_getc(void *p, // I - Pointer to file
int *encoding) // IO - Encoding
{
2024-02-27 20:04:27 +00:00
int ch; // Character
const char **s; // Pointer to string pointer
s = (const char **)p;
if ((ch = (*s)[0] & 255) != 0 || *encoding == ENCODE_UTF16LE)
{
2024-02-27 20:04:27 +00:00
// Got character; convert UTF-8 to integer and return...
(*s)++;
2004-05-16 21:54:47 +00:00
switch (*encoding)
{
2004-05-16 21:54:47 +00:00
case ENCODE_UTF8 :
if (!(ch & 0x80))
{
2024-02-27 20:04:27 +00:00
MXML_DEBUG("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
if (mxml_bad_char(ch))
{
2024-02-27 20:04:27 +00:00
mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch);
return (EOF);
}
2004-05-16 21:54:47 +00:00
return (ch);
}
2004-05-16 21:54:47 +00:00
else if (ch == 0xfe)
{
2024-02-27 20:04:27 +00:00
// UTF-16 big-endian BOM?
if (((*s)[0] & 255) != 0xff)
2004-05-16 21:54:47 +00:00
return (EOF);
2004-05-16 21:54:47 +00:00
*encoding = ENCODE_UTF16BE;
(*s)++;
2004-05-16 21:54:47 +00:00
return (mxml_string_getc(p, encoding));
}
else if (ch == 0xff)
{
2024-02-27 20:04:27 +00:00
// UTF-16 little-endian BOM?
if (((*s)[0] & 255) != 0xfe)
2004-05-16 21:54:47 +00:00
return (EOF);
2004-05-16 21:54:47 +00:00
*encoding = ENCODE_UTF16LE;
(*s)++;
2004-05-16 21:54:47 +00:00
return (mxml_string_getc(p, encoding));
}
else if ((ch & 0xe0) == 0xc0)
{
2024-02-27 20:04:27 +00:00
// Two-byte value...
if (((*s)[0] & 0xc0) != 0x80)
2004-05-16 21:54:47 +00:00
return (EOF);
ch = ((ch & 0x1f) << 6) | ((*s)[0] & 0x3f);
2004-05-16 21:54:47 +00:00
(*s)++;
if (ch < 0x80)
{
mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch);
return (EOF);
}
2024-02-27 20:04:27 +00:00
MXML_DEBUG("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
2004-05-16 21:54:47 +00:00
return (ch);
}
else if ((ch & 0xf0) == 0xe0)
{
2024-02-27 20:04:27 +00:00
// Three-byte value...
if (((*s)[0] & 0xc0) != 0x80 || ((*s)[1] & 0xc0) != 0x80)
2004-05-16 21:54:47 +00:00
return (EOF);
ch = ((((ch & 0x0f) << 6) | ((*s)[0] & 0x3f)) << 6) | ((*s)[1] & 0x3f);
2004-05-16 21:54:47 +00:00
(*s) += 2;
if (ch < 0x800)
{
mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch);
return (EOF);
}
2024-02-27 20:04:27 +00:00
// Ignore (strip) Byte Order Mark (BOM)...
2009-05-17 05:20:52 +00:00
if (ch == 0xfeff)
return (mxml_string_getc(p, encoding));
2024-02-27 20:04:27 +00:00
MXML_DEBUG("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
2004-05-16 21:54:47 +00:00
return (ch);
}
else if ((ch & 0xf8) == 0xf0)
{
2024-02-27 20:04:27 +00:00
// Four-byte value...
if (((*s)[0] & 0xc0) != 0x80 || ((*s)[1] & 0xc0) != 0x80 || ((*s)[2] & 0xc0) != 0x80)
2004-05-16 21:54:47 +00:00
return (EOF);
2024-02-27 20:04:27 +00:00
ch = ((((((ch & 0x07) << 6) | ((*s)[0] & 0x3f)) << 6) | ((*s)[1] & 0x3f)) << 6) | ((*s)[2] & 0x3f);
2004-05-16 21:54:47 +00:00
(*s) += 3;
if (ch < 0x10000)
{
mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch);
return (EOF);
}
2024-02-27 20:04:27 +00:00
MXML_DEBUG("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
2004-05-16 21:54:47 +00:00
return (ch);
}
else
2024-02-27 20:04:27 +00:00
{
2004-05-16 21:54:47 +00:00
return (EOF);
2024-02-27 20:04:27 +00:00
}
2004-05-16 21:54:47 +00:00
case ENCODE_UTF16BE :
2024-02-27 20:04:27 +00:00
// Read UTF-16 big-endian char...
ch = (ch << 8) | ((*s)[0] & 255);
2004-05-16 21:54:47 +00:00
(*s) ++;
if (mxml_bad_char(ch))
{
2024-02-27 20:04:27 +00:00
mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch);
return (EOF);
}
else if (ch >= 0xd800 && ch <= 0xdbff)
2004-05-16 21:54:47 +00:00
{
2024-02-27 20:04:27 +00:00
// Multi-word UTF-16 char...
int lch; // Lower word
2004-05-16 21:54:47 +00:00
if (!(*s)[0])
2004-05-16 21:54:47 +00:00
return (EOF);
lch = (((*s)[0] & 255) << 8) | ((*s)[1] & 255);
2004-05-16 21:54:47 +00:00
(*s) += 2;
if (lch < 0xdc00 || lch >= 0xdfff)
2004-05-16 21:54:47 +00:00
return (EOF);
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
}
2024-02-27 20:04:27 +00:00
MXML_DEBUG("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
2004-05-16 21:54:47 +00:00
return (ch);
case ENCODE_UTF16LE :
2024-02-27 20:04:27 +00:00
// Read UTF-16 little-endian char...
ch = ch | (((*s)[0] & 255) << 8);
2004-05-16 21:54:47 +00:00
if (!ch)
{
(*s) --;
return (EOF);
}
(*s) ++;
if (mxml_bad_char(ch))
{
2024-02-27 20:04:27 +00:00
mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch);
return (EOF);
}
else if (ch >= 0xd800 && ch <= 0xdbff)
2004-05-16 21:54:47 +00:00
{
2024-02-27 20:04:27 +00:00
// Multi-word UTF-16 char...
int lch; // Lower word
if (!(*s)[1])
2004-05-16 21:54:47 +00:00
return (EOF);
lch = (((*s)[1] & 255) << 8) | ((*s)[0] & 255);
2004-05-16 21:54:47 +00:00
(*s) += 2;
if (lch < 0xdc00 || lch >= 0xdfff)
2004-05-16 21:54:47 +00:00
return (EOF);
ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
}
2024-02-27 20:04:27 +00:00
MXML_DEBUG("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
2004-05-16 21:54:47 +00:00
return (ch);
}
}
2004-05-16 21:54:47 +00:00
return (EOF);
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_string_putc()' - Write a character to a string.
//
2003-06-19 04:25:12 +00:00
2024-02-27 20:04:27 +00:00
static int // O - 0 on success, -1 on failure
mxml_string_putc(int ch, // I - Character to write
void *p) // I - Pointer to string pointers
2003-06-19 04:25:12 +00:00
{
2024-02-27 20:04:27 +00:00
char **pp; // Pointer to string pointers
2003-06-19 04:25:12 +00:00
pp = (char **)p;
if (pp[0] < pp[1])
pp[0][0] = ch;
pp[0] ++;
2003-06-19 04:25:12 +00:00
return (0);
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_write_name()' - Write a name string.
//
2024-02-27 20:04:27 +00:00
static int // O - 0 on success, -1 on failure
mxml_write_name(const char *s, // I - Name to write
void *p, // I - Write pointer
int (*putc_cb)(int, void *))
2024-02-27 20:04:27 +00:00
// I - Write callback
{
2024-02-27 20:04:27 +00:00
char quote; // Quote character
const char *name; // Entity name
if (*s == '\"' || *s == '\'')
{
2024-02-27 20:04:27 +00:00
// Write a quoted name string...
if ((*putc_cb)(*s, p) < 0)
return (-1);
quote = *s++;
while (*s && *s != quote)
{
if ((name = mxmlEntityGetName(*s)) != NULL)
{
if ((*putc_cb)('&', p) < 0)
return (-1);
while (*name)
{
if ((*putc_cb)(*name, p) < 0)
return (-1);
name ++;
}
if ((*putc_cb)(';', p) < 0)
return (-1);
}
else if ((*putc_cb)(*s, p) < 0)
2024-02-27 20:04:27 +00:00
{
return (-1);
2024-02-27 20:04:27 +00:00
}
s ++;
}
2024-02-27 20:04:27 +00:00
// Write the end quote...
if ((*putc_cb)(quote, p) < 0)
return (-1);
}
else
{
2024-02-27 20:04:27 +00:00
// Write a non-quoted name string...
while (*s)
{
if ((*putc_cb)(*s, p) < 0)
return (-1);
s ++;
}
}
return (0);
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_write_node()' - Save an XML node to a file.
//
2024-02-27 20:04:27 +00:00
static int // O - Column or -1 on error
mxml_write_node(mxml_node_t *node, // I - Node to write
void *p, // I - File to write to
mxml_save_cb_t cb, // I - Whitespace callback
int col, // I - Current column
_mxml_putc_cb_t putc_cb,// I - Output callback
_mxml_global_t *global)// I - Global data
{
2024-02-27 20:04:27 +00:00
mxml_node_t *current, // Current node
*next; // Next node
int i, // Looping var
width; // Width of attr + value
_mxml_attr_t *attr; // Current attribute
char s[255]; // Temporary string
const char *ptr; // Pointer into string
2024-02-27 20:04:27 +00:00
// Loop through this node and all of its children...
for (current = node; current; current = next)
{
2024-02-27 20:04:27 +00:00
// Print the node value...
switch (current->type)
{
case MXML_TYPE_CDATA :
col = mxml_write_ws(current, p, cb, MXML_WS_BEFORE_OPEN, col, putc_cb);
if ((*putc_cb)('<', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('!', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('[', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('C', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('D', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('A', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('T', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('A', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('[', p) < 0)
return (-1);
else
col ++;
for (ptr = current->value.cdata; *ptr; ptr ++)
{
if ((*putc_cb)(*ptr, p) < 0)
return (-1);
else if (*ptr == '\n')
col = 0;
else
col ++;
}
if ((*putc_cb)(']', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)(']', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('>', p) < 0)
return (-1);
else
col ++;
col = mxml_write_ws(current, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb);
break;
case MXML_TYPE_COMMENT :
col = mxml_write_ws(current, p, cb, MXML_WS_BEFORE_OPEN, col, putc_cb);
if ((*putc_cb)('<', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('!', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('-', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('-', p) < 0)
return (-1);
else
col ++;
for (ptr = current->value.comment; *ptr; ptr ++)
{
if ((*putc_cb)(*ptr, p) < 0)
return (-1);
else if (*ptr == '\n')
col = 0;
else
col ++;
}
if ((*putc_cb)('-', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('-', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('>', p) < 0)
return (-1);
else
col ++;
col = mxml_write_ws(current, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb);
break;
case MXML_TYPE_DECLARATION :
col = mxml_write_ws(current, p, cb, MXML_WS_BEFORE_OPEN, col, putc_cb);
if ((*putc_cb)('<', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('!', p) < 0)
return (-1);
else
col ++;
for (ptr = current->value.declaration; *ptr; ptr ++)
{
if ((*putc_cb)(*ptr, p) < 0)
return (-1);
else if (*ptr == '\n')
col = 0;
else
col ++;
}
if ((*putc_cb)('>', p) < 0)
return (-1);
else
col ++;
col = mxml_write_ws(current, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb);
break;
case MXML_TYPE_DIRECTIVE :
col = mxml_write_ws(current, p, cb, MXML_WS_BEFORE_OPEN, col, putc_cb);
if ((*putc_cb)('<', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('?', p) < 0)
return (-1);
else
col ++;
for (ptr = current->value.directive; *ptr; ptr ++)
{
if ((*putc_cb)(*ptr, p) < 0)
return (-1);
else if (*ptr == '\n')
col = 0;
else
col ++;
}
if ((*putc_cb)('?', p) < 0)
return (-1);
else
col ++;
if ((*putc_cb)('>', p) < 0)
return (-1);
else
col ++;
col = mxml_write_ws(current, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb);
break;
case MXML_TYPE_ELEMENT :
col = mxml_write_ws(current, p, cb, MXML_WS_BEFORE_OPEN, col, putc_cb);
if ((*putc_cb)('<', p) < 0)
return (-1);
else if (mxml_write_name(current->value.element.name, p, putc_cb) < 0)
return (-1);
col += strlen(current->value.element.name) + 1;
2024-02-27 20:04:27 +00:00
for (i = current->value.element.num_attrs, attr = current->value.element.attrs; i > 0; i --, attr ++)
{
width = (int)strlen(attr->name);
if (attr->value)
width += strlen(attr->value) + 3;
if (global->wrap > 0 && (col + width) > global->wrap)
{
if ((*putc_cb)('\n', p) < 0)
return (-1);
col = 0;
}
else
{
if ((*putc_cb)(' ', p) < 0)
return (-1);
col ++;
}
if (mxml_write_name(attr->name, p, putc_cb) < 0)
return (-1);
if (attr->value)
{
if ((*putc_cb)('=', p) < 0)
return (-1);
if ((*putc_cb)('\"', p) < 0)
return (-1);
if (mxml_write_string(attr->value, p, putc_cb) < 0)
return (-1);
if ((*putc_cb)('\"', p) < 0)
return (-1);
}
col += width;
}
if (current->child)
{
2024-02-27 20:04:27 +00:00
// Write children...
if ((*putc_cb)('>', p) < 0)
2003-06-19 04:25:12 +00:00
return (-1);
else
col ++;
col = mxml_write_ws(current, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb);
}
else
{
if ((*putc_cb)('/', p) < 0)
return (-1);
if ((*putc_cb)('>', p) < 0)
return (-1);
col += 2;
col = mxml_write_ws(current, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb);
}
break;
2024-02-27 20:04:27 +00:00
case MXML_TYPE_INTEGER :
if (current->prev)
{
if (global->wrap > 0 && col > global->wrap)
{
if ((*putc_cb)('\n', p) < 0)
return (-1);
col = 0;
}
else if ((*putc_cb)(' ', p) < 0)
2024-02-27 20:04:27 +00:00
{
return (-1);
2024-02-27 20:04:27 +00:00
}
else
2024-02-27 20:04:27 +00:00
{
col ++;
2024-02-27 20:04:27 +00:00
}
}
snprintf(s, sizeof(s), "%ld", current->value.integer);
if (mxml_write_string(s, p, putc_cb) < 0)
return (-1);
col += strlen(s);
break;
2024-02-27 20:04:27 +00:00
case MXML_TYPE_OPAQUE :
if (mxml_write_string(current->value.opaque, p, putc_cb) < 0)
return (-1);
col += strlen(current->value.opaque);
break;
2024-02-27 20:04:27 +00:00
case MXML_TYPE_REAL :
if (current->prev)
{
if (global->wrap > 0 && col > global->wrap)
{
if ((*putc_cb)('\n', p) < 0)
return (-1);
col = 0;
}
else if ((*putc_cb)(' ', p) < 0)
2024-02-27 20:04:27 +00:00
{
2003-07-20 13:19:08 +00:00
return (-1);
2024-02-27 20:04:27 +00:00
}
else
2024-02-27 20:04:27 +00:00
{
col ++;
2024-02-27 20:04:27 +00:00
}
}
2024-02-27 20:04:27 +00:00
// TODO: Provide locale-neutral formatting/scanning code for REAL
snprintf(s, sizeof(s), "%f", current->value.real);
if (mxml_write_string(s, p, putc_cb) < 0)
return (-1);
col += strlen(s);
break;
2024-02-27 20:04:27 +00:00
case MXML_TYPE_TEXT :
if (current->value.text.whitespace && col > 0)
{
if (global->wrap > 0 && col > global->wrap)
{
if ((*putc_cb)('\n', p) < 0)
return (-1);
col = 0;
}
else if ((*putc_cb)(' ', p) < 0)
2024-02-27 20:04:27 +00:00
{
return (-1);
2024-02-27 20:04:27 +00:00
}
else
2024-02-27 20:04:27 +00:00
{
col ++;
2024-02-27 20:04:27 +00:00
}
}
if (mxml_write_string(current->value.text.string, p, putc_cb) < 0)
return (-1);
col += strlen(current->value.text.string);
break;
2024-02-27 20:04:27 +00:00
case MXML_TYPE_CUSTOM :
if (global->custom_save_cb)
{
2024-02-27 20:04:27 +00:00
char *data; // Custom data string
const char *newline; // Last newline in string
if ((data = (*global->custom_save_cb)(current)) == NULL)
return (-1);
if (mxml_write_string(data, p, putc_cb) < 0)
return (-1);
if ((newline = strrchr(data, '\n')) == NULL)
col += strlen(data);
else
col = (int)strlen(newline);
free(data);
break;
}
2024-02-27 20:04:27 +00:00
default : // Should never happen
return (-1);
}
2024-02-27 20:04:27 +00:00
// Figure out the next node...
if ((next = current->child) == NULL)
{
if (current == node)
{
2024-02-27 20:04:27 +00:00
// Don't traverse to sibling node if we are at the "root" node...
next = NULL;
}
else
{
2024-02-27 20:04:27 +00:00
// Try the next sibling, and continue traversing upwards as needed...
while ((next = current->next) == NULL)
{
if (current == node || !current->parent)
break;
// Declarations and directives have no end tags...
current = current->parent;
if (current->type == MXML_TYPE_ELEMENT)
{
col = mxml_write_ws(current, p, cb, MXML_WS_BEFORE_CLOSE, col, putc_cb);
if ((*putc_cb)('<', p) < 0)
return (-1);
if ((*putc_cb)('/', p) < 0)
return (-1);
if (mxml_write_string(current->value.element.name, p, putc_cb) < 0)
return (-1);
if ((*putc_cb)('>', p) < 0)
return (-1);
col += strlen(current->value.element.name) + 3;
col = mxml_write_ws(current, p, cb, MXML_WS_AFTER_CLOSE, col, putc_cb);
}
if (current == node)
break;
}
}
}
}
return (col);
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_write_string()' - Write a string, escaping & and < as needed.
//
2003-06-03 19:46:29 +00:00
2024-02-27 20:04:27 +00:00
static int // O - 0 on success, -1 on failure
mxml_write_string(
2024-02-27 20:04:27 +00:00
const char *s, // I - String to write
void *p, // I - Write pointer
_mxml_putc_cb_t putc_cb) // I - Write callback
2003-06-03 19:46:29 +00:00
{
2024-02-27 20:04:27 +00:00
const char *name; // Entity name, if any
2003-06-19 04:25:12 +00:00
2003-06-03 19:46:29 +00:00
while (*s)
{
if ((name = mxmlEntityGetName(*s)) != NULL)
2003-06-03 19:46:29 +00:00
{
2003-06-19 04:25:12 +00:00
if ((*putc_cb)('&', p) < 0)
return (-1);
while (*name)
{
if ((*putc_cb)(*name, p) < 0)
2003-06-19 04:25:12 +00:00
return (-1);
name ++;
2003-06-19 04:25:12 +00:00
}
if ((*putc_cb)(';', p) < 0)
return (-1);
}
2003-06-19 04:25:12 +00:00
else if ((*putc_cb)(*s, p) < 0)
2024-02-27 20:04:27 +00:00
{
2003-06-03 19:46:29 +00:00
return (-1);
2024-02-27 20:04:27 +00:00
}
2003-06-03 19:46:29 +00:00
s ++;
}
return (0);
}
2024-02-27 20:04:27 +00:00
//
// 'mxml_write_ws()' - Do whitespace callback...
//
2024-02-27 20:04:27 +00:00
static int // O - New column
mxml_write_ws(mxml_node_t *node, // I - Current node
void *p, // I - Write pointer
mxml_save_cb_t cb, // I - Callback function
int ws, // I - Where value
int col, // I - Current column
_mxml_putc_cb_t putc_cb) // I - Write callback
{
2024-02-27 20:04:27 +00:00
const char *s; // Whitespace string
if (cb && (s = (*cb)(node, ws)) != NULL)
{
while (*s)
{
if ((*putc_cb)(*s, p) < 0)
2024-02-27 20:04:27 +00:00
{
return (-1);
2024-02-27 20:04:27 +00:00
}
else if (*s == '\n')
2024-02-27 20:04:27 +00:00
{
col = 0;
2024-02-27 20:04:27 +00:00
}
else if (*s == '\t')
{
col += MXML_TAB;
col = col - (col % MXML_TAB);
}
else
2024-02-27 20:04:27 +00:00
{
col ++;
2024-02-27 20:04:27 +00:00
}
s ++;
}
}
return (col);
}