// // File loading code for Mini-XML, a small XML file parsing library. // // https://www.msweet.org/mxml // // Copyright © 2003-2024 by Michael R Sweet. // // Licensed under Apache License v2.0. See the file "LICENSE" for more // information. // #ifndef _WIN32 # include #endif // !_WIN32 #include "mxml-private.h" // // Character encoding... // #define ENCODE_UTF8 0 // UTF-8 #define ENCODE_UTF16BE 1 // UTF-16 Big-Endian #define ENCODE_UTF16LE 2 // UTF-16 Little-Endian // // Macro to test for a bad XML character... // #define mxml_bad_char(ch) ((ch) < ' ' && (ch) != '\n' && (ch) != '\r' && (ch) != '\t') // // Types and structures... // typedef int (*_mxml_getc_cb_t)(void *, int *); typedef int (*_mxml_putc_cb_t)(int, void *); typedef struct _mxml_fdbuf_s // File descriptor buffer { int fd; // File descriptor unsigned char *current, // Current position in buffer *end, // End of buffer buffer[8192]; // Character buffer } _mxml_fdbuf_t; // // Local functions... // static int mxml_add_char(int ch, char **ptr, char **buffer, int *bufsize); static int mxml_fd_getc(void *p, int *encoding); static int mxml_fd_putc(int ch, void *p); static int mxml_fd_read(_mxml_fdbuf_t *buf); static int mxml_fd_write(_mxml_fdbuf_t *buf); static int mxml_file_getc(void *p, int *encoding); static int mxml_file_putc(int ch, void *p); static int mxml_get_entity(mxml_node_t *parent, void *p, int *encoding, _mxml_getc_cb_t getc_cb, int *line); static inline int mxml_isspace(int ch) { return (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'); } static mxml_node_t *mxml_load_data(mxml_node_t *top, void *p, mxml_load_cb_t cb, _mxml_getc_cb_t getc_cb, mxml_sax_cb_t sax_cb, void *sax_data); static int mxml_parse_element(mxml_node_t *node, void *p, int *encoding, _mxml_getc_cb_t getc_cb, int *line); static int mxml_string_getc(void *p, int *encoding); static int mxml_string_putc(int ch, void *p); static int mxml_write_name(const char *s, void *p, _mxml_putc_cb_t putc_cb); static int mxml_write_node(mxml_node_t *node, void *p, mxml_save_cb_t cb, int col, _mxml_putc_cb_t putc_cb, _mxml_global_t *global); static int mxml_write_string(const char *s, void *p, _mxml_putc_cb_t putc_cb); static int mxml_write_ws(mxml_node_t *node, void *p, mxml_save_cb_t cb, int ws, int col, _mxml_putc_cb_t putc_cb); // // 'mxmlLoadFd()' - Load a file descriptor into an XML node tree. // // The nodes in the specified file are added to the specified top node. // If no top node is provided, the XML file MUST be well-formed with a // single parent node like for the entire file. The callback // function returns the value type that should be used for child nodes. // The constants `MXML_INTEGER_CALLBACK`, `MXML_TYPE_OPAQUE_CALLBACK`, // `MXML_REAL_CALLBACK`, and `MXML_TYPE_TEXT_CALLBACK` are defined for // loading child (data) nodes of the specified type. // // Note: The most common programming error when using the Mini-XML library is // to load an XML file using the `MXML_TEXT_CALLBACK`, which returns inline // text as a series of whitespace-delimited words, instead of using the // `MXML_OPAQUE_CALLBACK` which returns the inline text as a single string // (including whitespace). // mxml_node_t * // O - First node or `NULL` if the file could not be read. mxmlLoadFd(mxml_node_t *top, // I - Top node int fd, // I - File descriptor to read from mxml_load_cb_t cb) // I - Callback function or constant { _mxml_fdbuf_t buf; // File descriptor buffer // Initialize the file descriptor buffer... buf.fd = fd; buf.current = buf.buffer; buf.end = buf.buffer; // Read the XML data... return (mxml_load_data(top, &buf, cb, mxml_fd_getc, MXML_NO_CALLBACK, NULL)); } // // 'mxmlLoadFile()' - Load a file into an XML node tree. // // The nodes in the specified file are added to the specified top node. // If no top node is provided, the XML file MUST be well-formed with a // single parent node like for the entire file. The callback // function returns the value type that should be used for child nodes. // The constants `MXML_INTEGER_CALLBACK`, `MXML_TYPE_OPAQUE_CALLBACK`, // `MXML_REAL_CALLBACK`, and `MXML_TYPE_TEXT_CALLBACK` are defined for // loading child (data) nodes of the specified type. // // Note: The most common programming error when using the Mini-XML library is // to load an XML file using the `MXML_TEXT_CALLBACK`, which returns inline // text as a series of whitespace-delimited words, instead of using the // `MXML_OPAQUE_CALLBACK` which returns the inline text as a single string // (including whitespace). // mxml_node_t * // O - First node or `NULL` if the file could not be read. mxmlLoadFile(mxml_node_t *top, // I - Top node FILE *fp, // I - File to read from mxml_load_cb_t cb) // I - Callback function or constant { // Read the XML data... return (mxml_load_data(top, fp, cb, mxml_file_getc, MXML_NO_CALLBACK, NULL)); } // // 'mxmlLoadString()' - Load a string into an XML node tree. // // The nodes in the specified string are added to the specified top node. // If no top node is provided, the XML string MUST be well-formed with a // single parent node like for the entire string. The callback // function returns the value type that should be used for child nodes. // The constants `MXML_INTEGER_CALLBACK`, `MXML_TYPE_OPAQUE_CALLBACK`, // `MXML_REAL_CALLBACK`, and `MXML_TYPE_TEXT_CALLBACK` are defined for // loading child (data) nodes of the specified type. // // Note: The most common programming error when using the Mini-XML library is // to load an XML file using the `MXML_TEXT_CALLBACK`, which returns inline // text as a series of whitespace-delimited words, instead of using the // `MXML_OPAQUE_CALLBACK` which returns the inline text as a single string // (including whitespace). // mxml_node_t * // O - First node or `NULL` if the string has errors. mxmlLoadString(mxml_node_t *top, // I - Top node const char *s, // I - String to load mxml_load_cb_t cb) // I - Callback function or constant { // Read the XML data... return (mxml_load_data(top, (void *)&s, cb, mxml_string_getc, MXML_NO_CALLBACK, NULL)); } // // 'mxmlSaveAllocString()' - Save an XML tree to an allocated string. // // This function returns a pointer to a string containing the textual // representation of the XML node tree. The string should be freed // using `free()` when you are done with it. `NULL` is returned if the node // would produce an empty string or if the string cannot be allocated. // // The callback argument specifies a function that returns a whitespace // string or `NULL` before and after each element. If `MXML_NO_CALLBACK` // is specified, whitespace will only be added before `MXML_TYPE_TEXT` nodes // with leading whitespace and before attribute names inside opening // element tags. // char * // O - Allocated string or `NULL` mxmlSaveAllocString( mxml_node_t *node, // I - Node to write mxml_save_cb_t cb) // I - Whitespace callback or `MXML_NO_CALLBACK` { int bytes; // Required bytes char buffer[8192]; // Temporary buffer char *s; // Allocated string // Write the node to the temporary buffer... bytes = mxmlSaveString(node, buffer, sizeof(buffer), cb); if (bytes <= 0) return (NULL); if (bytes < (int)(sizeof(buffer) - 1)) { // Node fit inside the buffer, so just duplicate that string and return... return (strdup(buffer)); } // Allocate a buffer of the required size and save the node to the new buffer... if ((s = malloc(bytes + 1)) == NULL) return (NULL); mxmlSaveString(node, s, bytes + 1, cb); // Return the allocated string... return (s); } // // 'mxmlSaveFd()' - Save an XML tree to a file descriptor. // // The callback argument specifies a function that returns a whitespace // string or NULL before and after each element. If `MXML_NO_CALLBACK` // is specified, whitespace will only be added before `MXML_TYPE_TEXT` nodes // with leading whitespace and before attribute names inside opening // element tags. // bool // O - `true` on success, `false` on error. mxmlSaveFd(mxml_node_t *node, // I - Node to write int fd, // I - File descriptor to write to mxml_save_cb_t cb) // I - Whitespace callback or `MXML_NO_CALLBACK` { int col; // Final column _mxml_fdbuf_t buf; // File descriptor buffer _mxml_global_t *global = _mxml_global(); // Global data // Initialize the file descriptor buffer... buf.fd = fd; buf.current = buf.buffer; buf.end = buf.buffer + sizeof(buf.buffer); // Write the node... if ((col = mxml_write_node(node, &buf, cb, 0, mxml_fd_putc, global)) < 0) return (-1); if (col > 0) { if (mxml_fd_putc('\n', &buf) < 0) return (-1); } // Flush and return... return (mxml_fd_write(&buf)); } // // 'mxmlSaveFile()' - Save an XML tree to a file. // // The callback argument specifies a function that returns a whitespace // string or NULL before and after each element. If `MXML_NO_CALLBACK` // is specified, whitespace will only be added before `MXML_TYPE_TEXT` nodes // with leading whitespace and before attribute names inside opening // element tags. // bool // O - `true` on success, `false` on error. mxmlSaveFile(mxml_node_t *node, // I - Node to write FILE *fp, // I - File to write to mxml_save_cb_t cb) // I - Whitespace callback or `MXML_NO_CALLBACK` { int col; // Final column _mxml_global_t *global = _mxml_global(); // Global data // Write the node... if ((col = mxml_write_node(node, fp, cb, 0, mxml_file_putc, global)) < 0) return (-1); if (col > 0) { if (putc('\n', fp) < 0) return (-1); } // Return 0 (success)... return (0); } // // 'mxmlSaveString()' - Save an XML node tree to a string. // // This function returns the total number of bytes that would be // required for the string but only copies (bufsize - 1) characters // into the specified buffer. // // The callback argument specifies a function that returns a whitespace // string or NULL before and after each element. If `MXML_NO_CALLBACK` // is specified, whitespace will only be added before `MXML_TYPE_TEXT` nodes // with leading whitespace and before attribute names inside opening // element tags. // size_t // O - Size of string mxmlSaveString(mxml_node_t *node, // I - Node to write char *buffer, // I - String buffer size_t bufsize, // I - Size of string buffer mxml_save_cb_t cb) // I - Whitespace callback or `MXML_NO_CALLBACK` { int col; // Final column char *ptr[2]; // Pointers for putc_cb _mxml_global_t *global = _mxml_global(); // Global data // Write the node... ptr[0] = buffer; ptr[1] = buffer + bufsize; if ((col = mxml_write_node(node, ptr, cb, 0, mxml_string_putc, global)) < 0) return (-1); if (col > 0) mxml_string_putc('\n', ptr); // Nul-terminate the buffer... if (ptr[0] >= ptr[1]) { if (bufsize > 0) buffer[bufsize - 1] = '\0'; } else { ptr[0][0] = '\0'; } // Return the number of characters... return ((size_t)(ptr[0] - buffer)); } // // 'mxmlSAXLoadFd()' - Load a file descriptor into an XML node tree // using a SAX callback. // // The nodes in the specified file are added to the specified top node. // If no top node is provided, the XML file MUST be well-formed with a // single parent node like for the entire file. The callback // function returns the value type that should be used for child nodes. // The constants `MXML_INTEGER_CALLBACK`, `MXML_TYPE_OPAQUE_CALLBACK`, // `MXML_REAL_CALLBACK`, and `MXML_TYPE_TEXT_CALLBACK` are defined for // loading child nodes of the specified type. // // The SAX callback must call @link mxmlRetain@ for any nodes that need to // be kept for later use. Otherwise, nodes are deleted when the parent // node is closed or after each data, comment, CDATA, or directive node. // mxml_node_t * // O - First node or `NULL` if the file could not be read. mxmlSAXLoadFd(mxml_node_t *top, // I - Top node int fd, // I - File descriptor to read from mxml_load_cb_t cb, // I - Callback function or constant mxml_sax_cb_t sax_cb, // I - SAX callback or `MXML_NO_CALLBACK` void *sax_data) // I - SAX user data { _mxml_fdbuf_t buf; // File descriptor buffer // Initialize the file descriptor buffer... buf.fd = fd; buf.current = buf.buffer; buf.end = buf.buffer; // Read the XML data... return (mxml_load_data(top, &buf, cb, mxml_fd_getc, sax_cb, sax_data)); } // // 'mxmlSAXLoadFile()' - Load a file into an XML node tree // using a SAX callback. // // The nodes in the specified file are added to the specified top node. // If no top node is provided, the XML file MUST be well-formed with a // single parent node like for the entire file. The callback // function returns the value type that should be used for child nodes. // The constants `MXML_INTEGER_CALLBACK`, `MXML_TYPE_OPAQUE_CALLBACK`, // `MXML_REAL_CALLBACK`, and `MXML_TYPE_TEXT_CALLBACK` are defined for // loading child nodes of the specified type. // // The SAX callback must call @link mxmlRetain@ for any nodes that need to // be kept for later use. Otherwise, nodes are deleted when the parent // node is closed or after each data, comment, CDATA, or directive node. // mxml_node_t * // O - First node or `NULL` if the file could not be read. mxmlSAXLoadFile( mxml_node_t *top, // I - Top node FILE *fp, // I - File to read from mxml_load_cb_t cb, // I - Callback function or constant mxml_sax_cb_t sax_cb, // I - SAX callback or `MXML_NO_CALLBACK` void *sax_data) // I - SAX user data { // Read the XML data... return (mxml_load_data(top, fp, cb, mxml_file_getc, sax_cb, sax_data)); } // // 'mxmlSAXLoadString()' - Load a string into an XML node tree // using a SAX callback. // // The nodes in the specified string are added to the specified top node. // If no top node is provided, the XML string MUST be well-formed with a // single parent node like for the entire string. The callback // function returns the value type that should be used for child nodes. // The constants `MXML_INTEGER_CALLBACK`, `MXML_TYPE_OPAQUE_CALLBACK`, // `MXML_REAL_CALLBACK`, and `MXML_TYPE_TEXT_CALLBACK` are defined for // loading child nodes of the specified type. // // The SAX callback must call @link mxmlRetain@ for any nodes that need to // be kept for later use. Otherwise, nodes are deleted when the parent // node is closed or after each data, comment, CDATA, or directive node. // mxml_node_t * // O - First node or `NULL` if the string has errors. mxmlSAXLoadString( mxml_node_t *top, // I - Top node const char *s, // I - String to load mxml_load_cb_t cb, // I - Callback function or constant mxml_sax_cb_t sax_cb, // I - SAX callback or `MXML_NO_CALLBACK` void *sax_data) // I - SAX user data { // Read the XML data... return (mxml_load_data(top, (void *)&s, cb, mxml_string_getc, sax_cb, sax_data)); } // // 'mxmlSetCustomHandlers()' - Set the handling functions for custom data. // // The load function accepts a node pointer and a data string and must // return 0 on success and non-zero on error. // // The save function accepts a node pointer and must return a malloc'd // string on success and `NULL` on error. // // void mxmlSetCustomHandlers( mxml_custom_load_cb_t load, // I - Load function mxml_custom_save_cb_t save) // I - Save function { _mxml_global_t *global = _mxml_global(); // Global data global->custom_load_cb = load; global->custom_save_cb = save; } // // 'mxmlSetErrorCallback()' - Set the error message callback. // void mxmlSetErrorCallback(mxml_error_cb_t cb)// I - Error callback function { _mxml_global_t *global = _mxml_global(); // Global data global->error_cb = cb; } // // 'mxmlSetWrapMargin()' - Set the wrap margin when saving XML data. // // Wrapping is disabled when "column" is 0. // void mxmlSetWrapMargin(int column) // I - Column for wrapping, 0 to disable wrapping { _mxml_global_t *global = _mxml_global(); // Global data global->wrap = column; } // // 'mxml_add_char()' - Add a character to a buffer, expanding as needed. // static int // O - 0 on success, -1 on error mxml_add_char(int ch, // I - Character to add char **bufptr, // IO - Current position in buffer char **buffer, // IO - Current buffer int *bufsize) // IO - Current buffer size { char *newbuffer; // New buffer value if (*bufptr >= (*buffer + *bufsize - 4)) { // Increase the size of the buffer... if (*bufsize < 1024) (*bufsize) *= 2; else (*bufsize) += 1024; if ((newbuffer = realloc(*buffer, *bufsize)) == NULL) { mxml_error("Unable to expand string buffer to %d bytes.", *bufsize); return (-1); } *bufptr = newbuffer + (*bufptr - *buffer); *buffer = newbuffer; } if (ch < 0x80) { // Single byte ASCII... *(*bufptr)++ = ch; } else if (ch < 0x800) { // Two-byte UTF-8... *(*bufptr)++ = 0xc0 | (ch >> 6); *(*bufptr)++ = 0x80 | (ch & 0x3f); } else if (ch < 0x10000) { // Three-byte UTF-8... *(*bufptr)++ = 0xe0 | (ch >> 12); *(*bufptr)++ = 0x80 | ((ch >> 6) & 0x3f); *(*bufptr)++ = 0x80 | (ch & 0x3f); } else { // Four-byte UTF-8... *(*bufptr)++ = 0xf0 | (ch >> 18); *(*bufptr)++ = 0x80 | ((ch >> 12) & 0x3f); *(*bufptr)++ = 0x80 | ((ch >> 6) & 0x3f); *(*bufptr)++ = 0x80 | (ch & 0x3f); } return (0); } // // 'mxml_fd_getc()' - Read a character from a file descriptor. // static int // O - Character or EOF mxml_fd_getc(void *p, // I - File descriptor buffer int *encoding) // IO - Encoding { _mxml_fdbuf_t *buf; // File descriptor buffer int ch, // Current character temp; // Temporary character // Grab the next character in the buffer... buf = (_mxml_fdbuf_t *)p; if (buf->current >= buf->end) { if (mxml_fd_read(buf) < 0) return (EOF); } ch = *(buf->current)++; switch (*encoding) { case ENCODE_UTF8 : // Got a UTF-8 character; convert UTF-8 to Unicode and return... if (!(ch & 0x80)) { MXML_DEBUG("mxml_fd_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); if (mxml_bad_char(ch)) { mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch); return (EOF); } return (ch); } else if (ch == 0xfe) { // UTF-16 big-endian BOM? if (buf->current >= buf->end) { if (mxml_fd_read(buf) < 0) return (EOF); } ch = *(buf->current)++; if (ch != 0xff) return (EOF); *encoding = ENCODE_UTF16BE; return (mxml_fd_getc(p, encoding)); } else if (ch == 0xff) { // UTF-16 little-endian BOM? if (buf->current >= buf->end) { if (mxml_fd_read(buf) < 0) return (EOF); } ch = *(buf->current)++; if (ch != 0xfe) return (EOF); *encoding = ENCODE_UTF16LE; return (mxml_fd_getc(p, encoding)); } else if ((ch & 0xe0) == 0xc0) { // Two-byte value... if (buf->current >= buf->end) { if (mxml_fd_read(buf) < 0) return (EOF); } temp = *(buf->current)++; if ((temp & 0xc0) != 0x80) return (EOF); ch = ((ch & 0x1f) << 6) | (temp & 0x3f); if (ch < 0x80) { mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch); return (EOF); } } else if ((ch & 0xf0) == 0xe0) { // Three-byte value... if (buf->current >= buf->end) { if (mxml_fd_read(buf) < 0) return (EOF); } temp = *(buf->current)++; if ((temp & 0xc0) != 0x80) return (EOF); ch = ((ch & 0x0f) << 6) | (temp & 0x3f); if (buf->current >= buf->end) { if (mxml_fd_read(buf) < 0) return (EOF); } temp = *(buf->current)++; if ((temp & 0xc0) != 0x80) return (EOF); ch = (ch << 6) | (temp & 0x3f); if (ch < 0x800) { mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch); return (EOF); } // Ignore (strip) Byte Order Mark (BOM)... if (ch == 0xfeff) return (mxml_fd_getc(p, encoding)); } else if ((ch & 0xf8) == 0xf0) { // Four-byte value... if (buf->current >= buf->end) { if (mxml_fd_read(buf) < 0) return (EOF); } temp = *(buf->current)++; if ((temp & 0xc0) != 0x80) return (EOF); ch = ((ch & 0x07) << 6) | (temp & 0x3f); if (buf->current >= buf->end) { if (mxml_fd_read(buf) < 0) return (EOF); } temp = *(buf->current)++; if ((temp & 0xc0) != 0x80) return (EOF); ch = (ch << 6) | (temp & 0x3f); if (buf->current >= buf->end) { if (mxml_fd_read(buf) < 0) return (EOF); } temp = *(buf->current)++; if ((temp & 0xc0) != 0x80) return (EOF); ch = (ch << 6) | (temp & 0x3f); if (ch < 0x10000) { mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch); return (EOF); } } else { return (EOF); } break; case ENCODE_UTF16BE : // Read UTF-16 big-endian char... if (buf->current >= buf->end) { if (mxml_fd_read(buf) < 0) return (EOF); } temp = *(buf->current)++; ch = (ch << 8) | temp; if (mxml_bad_char(ch)) { mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch); return (EOF); } else if (ch >= 0xd800 && ch <= 0xdbff) { // Multi-word UTF-16 char... int lch; if (buf->current >= buf->end) { if (mxml_fd_read(buf) < 0) return (EOF); } lch = *(buf->current)++; if (buf->current >= buf->end) { if (mxml_fd_read(buf) < 0) return (EOF); } temp = *(buf->current)++; lch = (lch << 8) | temp; if (lch < 0xdc00 || lch >= 0xdfff) return (EOF); ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; } break; case ENCODE_UTF16LE : // Read UTF-16 little-endian char... if (buf->current >= buf->end) { if (mxml_fd_read(buf) < 0) return (EOF); } temp = *(buf->current)++; ch |= (temp << 8); if (mxml_bad_char(ch)) { mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch); return (EOF); } else if (ch >= 0xd800 && ch <= 0xdbff) { // Multi-word UTF-16 char... int lch; if (buf->current >= buf->end) { if (mxml_fd_read(buf) < 0) return (EOF); } lch = *(buf->current)++; if (buf->current >= buf->end) { if (mxml_fd_read(buf) < 0) return (EOF); } temp = *(buf->current)++; lch |= (temp << 8); if (lch < 0xdc00 || lch >= 0xdfff) return (EOF); ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; } break; } MXML_DEBUG("mxml_fd_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); return (ch); } // // 'mxml_fd_putc()' - Write a character to a file descriptor. // static int // O - 0 on success, -1 on error mxml_fd_putc(int ch, // I - Character void *p) // I - File descriptor buffer { _mxml_fdbuf_t *buf; // File descriptor buffer // Flush the write buffer as needed... buf = (_mxml_fdbuf_t *)p; if (buf->current >= buf->end) { if (mxml_fd_write(buf) < 0) return (-1); } *(buf->current)++ = ch; // Return successfully... return (0); } // // 'mxml_fd_read()' - Read a buffer of data from a file descriptor. // static int // O - 0 on success, -1 on error mxml_fd_read(_mxml_fdbuf_t *buf) // I - File descriptor buffer { int bytes; // Bytes read... // Range check input... if (!buf) return (-1); // Read from the file descriptor... while ((bytes = (int)read(buf->fd, buf->buffer, sizeof(buf->buffer))) < 0) { #ifdef EINTR if (errno != EAGAIN && errno != EINTR) #else if (errno != EAGAIN) #endif // EINTR return (-1); } if (bytes == 0) return (-1); // Update the pointers and return success... buf->current = buf->buffer; buf->end = buf->buffer + bytes; return (0); } // // 'mxml_fd_write()' - Write a buffer of data to a file descriptor. // static int // O - 0 on success, -1 on error mxml_fd_write(_mxml_fdbuf_t *buf) // I - File descriptor buffer { int bytes; // Bytes written unsigned char *ptr; // Pointer into buffer // Range check... if (!buf) return (-1); // Return 0 if there is nothing to write... if (buf->current == buf->buffer) return (0); // Loop until we have written everything... for (ptr = buf->buffer; ptr < buf->current; ptr += bytes) { if ((bytes = (int)write(buf->fd, ptr, buf->current - ptr)) < 0) return (-1); } // All done, reset pointers and return success... buf->current = buf->buffer; return (0); } // // 'mxml_file_getc()' - Get a character from a file. // static int // O - Character or EOF mxml_file_getc(void *p, // I - Pointer to file int *encoding) // IO - Encoding { int ch, // Character from file temp; // Temporary character FILE *fp; // Pointer to file // Read a character from the file and see if it is EOF or ASCII... fp = (FILE *)p; ch = getc(fp); if (ch == EOF) return (EOF); switch (*encoding) { case ENCODE_UTF8 : // Got a UTF-8 character; convert UTF-8 to Unicode and return... if (!(ch & 0x80)) { if (mxml_bad_char(ch)) { mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch); return (EOF); } MXML_DEBUG("mxml_file_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); return (ch); } else if (ch == 0xfe) { // UTF-16 big-endian BOM? ch = getc(fp); if (ch != 0xff) return (EOF); *encoding = ENCODE_UTF16BE; return (mxml_file_getc(p, encoding)); } else if (ch == 0xff) { // UTF-16 little-endian BOM? ch = getc(fp); if (ch != 0xfe) return (EOF); *encoding = ENCODE_UTF16LE; return (mxml_file_getc(p, encoding)); } else if ((ch & 0xe0) == 0xc0) { // Two-byte value... if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) return (EOF); ch = ((ch & 0x1f) << 6) | (temp & 0x3f); if (ch < 0x80) { mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch); return (EOF); } } else if ((ch & 0xf0) == 0xe0) { // Three-byte value... if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) return (EOF); ch = ((ch & 0x0f) << 6) | (temp & 0x3f); if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) return (EOF); ch = (ch << 6) | (temp & 0x3f); if (ch < 0x800) { mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch); return (EOF); } // Ignore (strip) Byte Order Mark (BOM)... if (ch == 0xfeff) return (mxml_file_getc(p, encoding)); } else if ((ch & 0xf8) == 0xf0) { // Four-byte value... if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) return (EOF); ch = ((ch & 0x07) << 6) | (temp & 0x3f); if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) return (EOF); ch = (ch << 6) | (temp & 0x3f); if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80) return (EOF); ch = (ch << 6) | (temp & 0x3f); if (ch < 0x10000) { mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch); return (EOF); } } else { return (EOF); } break; case ENCODE_UTF16BE : // Read UTF-16 big-endian char... ch = (ch << 8) | getc(fp); if (mxml_bad_char(ch)) { mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch); return (EOF); } else if (ch >= 0xd800 && ch <= 0xdbff) { // Multi-word UTF-16 char... int lch = getc(fp); lch = (lch << 8) | getc(fp); if (lch < 0xdc00 || lch >= 0xdfff) return (EOF); ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; } break; case ENCODE_UTF16LE : // Read UTF-16 little-endian char... ch |= (getc(fp) << 8); if (mxml_bad_char(ch)) { mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch); return (EOF); } else if (ch >= 0xd800 && ch <= 0xdbff) { // Multi-word UTF-16 char... int lch = getc(fp); lch |= (getc(fp) << 8); if (lch < 0xdc00 || lch >= 0xdfff) return (EOF); ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; } break; } MXML_DEBUG("mxml_file_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); return (ch); } // // 'mxml_file_putc()' - Write a character to a file. // static int // O - 0 on success, -1 on failure mxml_file_putc(int ch, // I - Character to write void *p) // I - Pointer to file { return (putc(ch, (FILE *)p) == EOF ? -1 : 0); } // // 'mxml_get_entity()' - Get the character corresponding to an entity... // static int // O - Character value or EOF on error mxml_get_entity(mxml_node_t *parent, // I - Parent node void *p, // I - Pointer to source int *encoding, // IO - Character encoding int (*getc_cb)(void *, int *), // I - Get character function int *line) // IO - Current line number { int ch; // Current character char entity[64], // Entity string *entptr; // Pointer into entity entptr = entity; while ((ch = (*getc_cb)(p, encoding)) != EOF) { if (ch > 126 || (!isalnum(ch) && ch != '#')) { break; } else if (entptr < (entity + sizeof(entity) - 1)) { *entptr++ = ch; } else { mxml_error("Entity name too long under parent <%s> on line %d.", parent ? parent->value.element.name : "null", *line); break; } } *entptr = '\0'; if (ch != ';') { mxml_error("Character entity '%s' not terminated under parent <%s> on line %d.", entity, parent ? parent->value.element.name : "null", *line); if (ch == '\n') (*line)++; return (EOF); } if (entity[0] == '#') { if (entity[1] == 'x') ch = (int)strtol(entity + 2, NULL, 16); else ch = (int)strtol(entity + 1, NULL, 10); } else if ((ch = mxmlEntityGetValue(entity)) < 0) { mxml_error("Entity name '%s;' not supported under parent <%s> on line %d.", entity, parent ? parent->value.element.name : "null", *line); } if (mxml_bad_char(ch)) { mxml_error("Bad control character 0x%02x under parent <%s> on line %d not allowed by XML standard.", ch, parent ? parent->value.element.name : "null", *line); return (EOF); } return (ch); } // // 'mxml_load_data()' - Load data into an XML node tree. // static mxml_node_t * // O - First node or NULL if the file could not be read. mxml_load_data( mxml_node_t *top, // I - Top node void *p, // I - Pointer to data mxml_load_cb_t cb, // I - Callback function or MXML_NO_CALLBACK _mxml_getc_cb_t getc_cb, // I - Read function mxml_sax_cb_t sax_cb, // I - SAX callback or MXML_NO_CALLBACK void *sax_data) // I - SAX user data { mxml_node_t *node = NULL, // Current node *first = NULL, // First node added *parent = NULL; // Current parent node int line = 1, // Current line number ch; // Character from file bool whitespace; // Whitespace seen? char *buffer, // String buffer *bufptr; // Pointer into buffer int bufsize; // Size of buffer mxml_type_t type; // Current node type int encoding; // Character encoding _mxml_global_t *global = _mxml_global(); // Global data static const char * const types[] = // Type strings... { "MXML_TYPE_CDATA", // CDATA "MXML_TYPE_COMMENT", // Comment "MXML_TYPE_DECLARATION",// Declaration "MXML_TYPE_DIRECTIVE",// Processing instruction/directive "MXML_TYPE_ELEMENT", // XML element with attributes "MXML_TYPE_INTEGER", // Integer value "MXML_TYPE_OPAQUE", // Opaque string "MXML_TYPE_REAL", // Real value "MXML_TYPE_TEXT", // Text fragment "MXML_TYPE_CUSTOM" // Custom data }; // Read elements and other nodes from the file... if ((buffer = malloc(64)) == NULL) { mxml_error("Unable to allocate string buffer."); return (NULL); } bufsize = 64; bufptr = buffer; parent = top; first = NULL; whitespace = false; encoding = ENCODE_UTF8; if (cb && parent) type = (*cb)(parent); else if (parent) type = MXML_TYPE_TEXT; else type = MXML_TYPE_IGNORE; if ((ch = (*getc_cb)(p, &encoding)) == EOF) { free(buffer); return (NULL); } else if (ch != '<' && !top) { free(buffer); mxml_error("XML does not start with '<' (saw '%c').", ch); return (NULL); } do { if ((ch == '<' || (mxml_isspace(ch) && type != MXML_TYPE_OPAQUE && type != MXML_TYPE_CUSTOM)) && bufptr > buffer) { // Add a new value node... *bufptr = '\0'; switch (type) { case MXML_TYPE_INTEGER : node = mxmlNewInteger(parent, strtol(buffer, &bufptr, 0)); break; case MXML_TYPE_OPAQUE : node = mxmlNewOpaque(parent, buffer); break; case MXML_TYPE_REAL : node = mxmlNewReal(parent, strtod(buffer, &bufptr)); break; case MXML_TYPE_TEXT : node = mxmlNewText(parent, whitespace, buffer); break; case MXML_TYPE_CUSTOM : if (global->custom_load_cb) { // Use the callback to fill in the custom data... node = mxmlNewCustom(parent, NULL, NULL); if (!(*global->custom_load_cb)(node, buffer)) { mxml_error("Bad custom value '%s' in parent <%s> on line %d.", buffer, parent ? parent->value.element.name : "null", line); mxmlDelete(node); node = NULL; } break; } default : // Ignore... node = NULL; break; } if (*bufptr) { // Bad integer/real number value... mxml_error("Bad %s value '%s' in parent <%s> on line %d.", type == MXML_TYPE_INTEGER ? "integer" : "real", buffer, parent ? parent->value.element.name : "null", line); break; } bufptr = buffer; whitespace = mxml_isspace(ch) && type == MXML_TYPE_TEXT; if (!node && type != MXML_TYPE_IGNORE) { // Print error and return... mxml_error("Unable to add value node of type %s to parent <%s> on line %d.", types[type], parent ? parent->value.element.name : "null", line); goto error; } if (sax_cb) { if (!(*sax_cb)(node, MXML_SAX_EVENT_DATA, sax_data)) goto error; if (!mxmlRelease(node)) node = NULL; } if (!first && node) first = node; } else if (mxml_isspace(ch) && type == MXML_TYPE_TEXT) { whitespace = true; } if (ch == '\n') line ++; // Add lone whitespace node if we have an element and existing whitespace... if (ch == '<' && whitespace && type == MXML_TYPE_TEXT) { if (parent) { node = mxmlNewText(parent, whitespace, ""); if (sax_cb) { if (!(*sax_cb)(node, MXML_SAX_EVENT_DATA, sax_data)) goto error; if (!mxmlRelease(node)) node = NULL; } if (!first && node) first = node; } whitespace = false; } if (ch == '<') { // Start of open/close tag... bufptr = buffer; while ((ch = (*getc_cb)(p, &encoding)) != EOF) { if (mxml_isspace(ch) || ch == '>' || (ch == '/' && bufptr > buffer)) { break; } else if (ch == '<') { mxml_error("Bare < in element."); goto error; } else if (ch == '&') { if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb, &line)) == EOF) goto error; if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) goto error; } else if (ch < '0' && ch != '!' && ch != '-' && ch != '.' && ch != '/') { goto error; } else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) { goto error; } else if (((bufptr - buffer) == 1 && buffer[0] == '?') || ((bufptr - buffer) == 3 && !strncmp(buffer, "!--", 3)) || ((bufptr - buffer) == 8 && !strncmp(buffer, "![CDATA[", 8))) { break; } if (ch == '\n') line ++; } *bufptr = '\0'; if (!strcmp(buffer, "!--")) { // Gather rest of comment... while ((ch = (*getc_cb)(p, &encoding)) != EOF) { if (ch == '>' && bufptr > (buffer + 4) && bufptr[-3] != '-' && bufptr[-2] == '-' && bufptr[-1] == '-') break; else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) goto error; if (ch == '\n') line ++; } // Error out if we didn't get the whole comment... if (ch != '>') { // Print error and return... mxml_error("Early EOF in comment node on line %d.", line); goto error; } // Otherwise add this as an element under the current parent... bufptr[-2] = '\0'; if (!parent && first) { // There can only be one root element! mxml_error("<%s--> cannot be a second root node after <%s> on line %d.", buffer, first->value.element.name, line); goto error; } if ((node = mxmlNewComment(parent, buffer + 3)) == NULL) { // Just print error for now... mxml_error("Unable to add comment node to parent <%s> on line %d.", parent ? parent->value.element.name : "null", line); break; } if (sax_cb) { if (!(*sax_cb)(node, MXML_SAX_EVENT_COMMENT, sax_data)) goto error; if (!mxmlRelease(node)) node = NULL; } if (node && !first) first = node; } else if (!strcmp(buffer, "![CDATA[")) { // Gather CDATA section... while ((ch = (*getc_cb)(p, &encoding)) != EOF) { if (ch == '>' && !strncmp(bufptr - 2, "]]", 2)) { // Drop terminator from CDATA string... bufptr[-2] = '\0'; break; } else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) { goto error; } if (ch == '\n') line ++; } // Error out if we didn't get the whole comment... if (ch != '>') { // Print error and return... mxml_error("Early EOF in CDATA node on line %d.", line); goto error; } // Otherwise add this as an element under the current parent... bufptr[-2] = '\0'; if (!parent && first) { // There can only be one root element! mxml_error("<%s]]> cannot be a second root node after <%s> on line %d.", buffer, first->value.element.name, line); goto error; } if ((node = mxmlNewCDATA(parent, buffer + 8)) == NULL) { // Print error and return... mxml_error("Unable to add CDATA node to parent <%s> on line %d.", parent ? parent->value.element.name : "null", line); goto error; } if (sax_cb) { if (!(*sax_cb)(node, MXML_SAX_EVENT_CDATA, sax_data)) goto error; if (!mxmlRelease(node)) node = NULL; } if (node && !first) first = node; } else if (buffer[0] == '?') { // Gather rest of processing instruction... while ((ch = (*getc_cb)(p, &encoding)) != EOF) { if (ch == '>' && bufptr > buffer && bufptr[-1] == '?') break; else if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) goto error; if (ch == '\n') line ++; } // Error out if we didn't get the whole processing instruction... if (ch != '>') { // Print error and return... mxml_error("Early EOF in processing instruction node on line %d.", line); goto error; } // Otherwise add this as an element under the current parent... bufptr[-1] = '\0'; if (!parent && first) { // There can only be one root element! mxml_error("<%s?> cannot be a second root node after <%s> on line %d.", buffer, first->value.element.name, line); goto error; } if ((node = mxmlNewDirective(parent, buffer + 1)) == NULL) { // Print error and return... mxml_error("Unable to add processing instruction node to parent <%s> on line %d.", parent ? parent->value.element.name : "null", line); goto error; } if (sax_cb) { if (!(*sax_cb)(node, MXML_SAX_EVENT_DIRECTIVE, sax_data)) goto error; if (strncmp(node->value.directive, "xml ", 4) && !mxmlRelease(node)) node = NULL; } if (node) { if (!first) first = node; if (!parent) { parent = node; if (cb) type = (*cb)(parent); else type = MXML_TYPE_TEXT; } } } else if (buffer[0] == '!') { // Gather rest of declaration... do { if (ch == '>') { break; } else { if (ch == '&') { if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb, &line)) == EOF) goto error; } if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) goto error; } if (ch == '\n') line ++; } while ((ch = (*getc_cb)(p, &encoding)) != EOF); // Error out if we didn't get the whole declaration... if (ch != '>') { // Print error and return... mxml_error("Early EOF in declaration node on line %d.", line); goto error; } // Otherwise add this as an element under the current parent... *bufptr = '\0'; if (!parent && first) { // There can only be one root element! mxml_error("<%s> cannot be a second root node after <%s> on line %d.", buffer, first->value.element.name, line); goto error; } if ((node = mxmlNewDeclaration(parent, buffer + 1)) == NULL) { // Print error and return... mxml_error("Unable to add declaration node to parent <%s> on line %d.", parent ? parent->value.element.name : "null", line); goto error; } if (sax_cb) { if (!(*sax_cb)(node, MXML_SAX_EVENT_DECLARATION, sax_data)) goto error; if (!mxmlRelease(node)) node = NULL; } if (node) { if (!first) first = node; if (!parent) { parent = node; if (cb) type = (*cb)(parent); else type = MXML_TYPE_TEXT; } } } else if (buffer[0] == '/') { // Handle close tag... if (!parent || strcmp(buffer + 1, parent->value.element.name)) { // Close tag doesn't match tree; print an error for now... mxml_error("Mismatched close tag <%s> under parent <%s> on line %d.", buffer, parent ? parent->value.element.name : "(null)", line); goto error; } // Keep reading until we see >... while (ch != '>' && ch != EOF) ch = (*getc_cb)(p, &encoding); node = parent; parent = parent->parent; if (sax_cb) { if (!(*sax_cb)(node, MXML_SAX_EVENT_ELEMENT_CLOSE, sax_data)) goto error; if (!mxmlRelease(node)) { if (first == node) first = NULL; node = NULL; } } // Ascend into the parent and set the value type as needed... if (cb && parent) type = (*cb)(parent); } else { // Handle open tag... if (!parent && first) { // There can only be one root element! mxml_error("<%s> cannot be a second root node after <%s> on line %d.", buffer, first->value.element.name, line); goto error; } if ((node = mxmlNewElement(parent, buffer)) == NULL) { // Just print error for now... mxml_error("Unable to add element node to parent <%s> on line %d.", parent ? parent->value.element.name : "null", line); goto error; } if (mxml_isspace(ch)) { if ((ch = mxml_parse_element(node, p, &encoding, getc_cb, &line)) == EOF) goto error; } else if (ch == '/') { if ((ch = (*getc_cb)(p, &encoding)) != '>') { mxml_error("Expected > but got '%c' instead for element <%s/> on line %d.", ch, buffer, line); mxmlDelete(node); node = NULL; goto error; } ch = '/'; } if (sax_cb) { if (!(*sax_cb)(node, MXML_SAX_EVENT_ELEMENT_OPEN, sax_data)) goto error; } if (!first) first = node; if (ch == EOF) break; if (ch != '/') { // Descend into this node, setting the value type as needed... parent = node; if (cb && parent) type = (*cb)(parent); else type = MXML_TYPE_TEXT; } else if (sax_cb) { if (!(*sax_cb)(node, MXML_SAX_EVENT_ELEMENT_CLOSE, sax_data)) goto error; if (!mxmlRelease(node)) { if (first == node) first = NULL; node = NULL; } } } bufptr = buffer; } else if (ch == '&') { // Add character entity to current buffer... if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb, &line)) == EOF) goto error; if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) goto error; } else if (type == MXML_TYPE_OPAQUE || type == MXML_TYPE_CUSTOM || !mxml_isspace(ch)) { // Add character to current buffer... if (mxml_add_char(ch, &bufptr, &buffer, &bufsize)) goto error; } } while ((ch = (*getc_cb)(p, &encoding)) != EOF); // Free the string buffer - we don't need it anymore... free(buffer); // Find the top element and return it... if (parent) { node = parent; while (parent != top && parent->parent) parent = parent->parent; if (node != parent) { mxml_error("Missing close tag under parent <%s> on line %d.", node->value.element.name, node->parent ? node->parent->value.element.name : "(null)", line); mxmlDelete(first); return (NULL); } } if (parent) return (parent); else return (first); // Common error return... error: mxmlDelete(first); free(buffer); return (NULL); } // // 'mxml_parse_element()' - Parse an element for any attributes... // static int // O - Terminating character mxml_parse_element( mxml_node_t *node, // I - Element node void *p, // I - Data to read from int *encoding, // IO - Encoding _mxml_getc_cb_t getc_cb, // I - Data callback int *line) // IO - Current line number { int ch, // Current character in file quote; // Quoting character char *name, // Attribute name *value, // Attribute value *ptr; // Pointer into name/value int namesize, // Size of name string valsize; // Size of value string // Initialize the name and value buffers... if ((name = malloc(64)) == NULL) { mxml_error("Unable to allocate memory for name."); return (EOF); } namesize = 64; if ((value = malloc(64)) == NULL) { free(name); mxml_error("Unable to allocate memory for value."); return (EOF); } valsize = 64; // Loop until we hit a >, /, ?, or EOF... while ((ch = (*getc_cb)(p, encoding)) != EOF) { MXML_DEBUG("parse_element: ch='%c'\n", ch); // Skip leading whitespace... if (mxml_isspace(ch)) { if (ch == '\n') (*line)++; continue; } // Stop at /, ?, or >... if (ch == '/' || ch == '?') { // Grab the > character and print an error if it isn't there... quote = (*getc_cb)(p, encoding); if (quote != '>') { mxml_error("Expected '>' after '%c' for element %s, but got '%c' on line %d.", ch, node->value.element.name, quote, *line); goto error; } break; } else if (ch == '<') { mxml_error("Bare < in element %s on line %d.", node->value.element.name, *line); goto error; } else if (ch == '>') { break; } // Read the attribute name... ptr = name; if (mxml_add_char(ch, &ptr, &name, &namesize)) goto error; if (ch == '\"' || ch == '\'') { // Name is in quotes, so get a quoted string... quote = ch; while ((ch = (*getc_cb)(p, encoding)) != EOF) { if (ch == '&') { if ((ch = mxml_get_entity(node, p, encoding, getc_cb, line)) == EOF) goto error; } else if (ch == '\n') { (*line)++; } if (mxml_add_char(ch, &ptr, &name, &namesize)) goto error; if (ch == quote) break; } } else { // Grab an normal, non-quoted name... while ((ch = (*getc_cb)(p, encoding)) != EOF) { if (mxml_isspace(ch) || ch == '=' || ch == '/' || ch == '>' || ch == '?') { if (ch == '\n') (*line)++; break; } else { if (ch == '&') { if ((ch = mxml_get_entity(node, p, encoding, getc_cb, line)) == EOF) goto error; } if (mxml_add_char(ch, &ptr, &name, &namesize)) goto error; } } } *ptr = '\0'; if (mxmlElementGetAttr(node, name)) { mxml_error("Duplicate attribute '%s' in element %s on line %d.", name, node->value.element.name, *line); goto error; } while (ch != EOF && mxml_isspace(ch)) { ch = (*getc_cb)(p, encoding); if (ch == '\n') (*line)++; } if (ch == '=') { // Read the attribute value... while ((ch = (*getc_cb)(p, encoding)) != EOF && mxml_isspace(ch)) { if (ch == '\n') (*line)++; } if (ch == EOF) { mxml_error("Missing value for attribute '%s' in element %s on line %d.", name, node->value.element.name, *line); goto error; } if (ch == '\'' || ch == '\"') { // Read quoted value... quote = ch; ptr = value; while ((ch = (*getc_cb)(p, encoding)) != EOF) { if (ch == quote) { break; } else { if (ch == '&') { if ((ch = mxml_get_entity(node, p, encoding, getc_cb, line)) == EOF) goto error; } else if (ch == '\n') { (*line)++; } if (mxml_add_char(ch, &ptr, &value, &valsize)) goto error; } } *ptr = '\0'; } else { // Read unquoted value... ptr = value; if (mxml_add_char(ch, &ptr, &value, &valsize)) goto error; while ((ch = (*getc_cb)(p, encoding)) != EOF) { if (mxml_isspace(ch) || ch == '=' || ch == '/' || ch == '>') { if (ch == '\n') (*line)++; break; } else { if (ch == '&') { if ((ch = mxml_get_entity(node, p, encoding, getc_cb, line)) == EOF) goto error; } if (mxml_add_char(ch, &ptr, &value, &valsize)) goto error; } } *ptr = '\0'; } // Set the attribute with the given string value... mxmlElementSetAttr(node, name, value); } else { mxml_error("Missing value for attribute '%s' in element %s on line %d.", name, node->value.element.name, *line); goto error; } // Check the end character... if (ch == '/' || ch == '?') { // Grab the > character and print an error if it isn't there... quote = (*getc_cb)(p, encoding); if (quote != '>') { mxml_error("Expected '>' after '%c' for element %s, but got '%c' on line %d.", ch, node->value.element.name, quote, *line); ch = EOF; } break; } else if (ch == '>') break; } // Free the name and value buffers and return... free(name); free(value); return (ch); // Common error return point... error: free(name); free(value); return (EOF); } // // 'mxml_string_getc()' - Get a character from a string. // static int // O - Character or EOF mxml_string_getc(void *p, // I - Pointer to file int *encoding) // IO - Encoding { int ch; // Character const char **s; // Pointer to string pointer s = (const char **)p; if ((ch = (*s)[0] & 255) != 0 || *encoding == ENCODE_UTF16LE) { // Got character; convert UTF-8 to integer and return... (*s)++; switch (*encoding) { case ENCODE_UTF8 : if (!(ch & 0x80)) { MXML_DEBUG("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); if (mxml_bad_char(ch)) { mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch); return (EOF); } return (ch); } else if (ch == 0xfe) { // UTF-16 big-endian BOM? if (((*s)[0] & 255) != 0xff) return (EOF); *encoding = ENCODE_UTF16BE; (*s)++; return (mxml_string_getc(p, encoding)); } else if (ch == 0xff) { // UTF-16 little-endian BOM? if (((*s)[0] & 255) != 0xfe) return (EOF); *encoding = ENCODE_UTF16LE; (*s)++; return (mxml_string_getc(p, encoding)); } else if ((ch & 0xe0) == 0xc0) { // Two-byte value... if (((*s)[0] & 0xc0) != 0x80) return (EOF); ch = ((ch & 0x1f) << 6) | ((*s)[0] & 0x3f); (*s)++; if (ch < 0x80) { mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch); return (EOF); } MXML_DEBUG("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); return (ch); } else if ((ch & 0xf0) == 0xe0) { // Three-byte value... if (((*s)[0] & 0xc0) != 0x80 || ((*s)[1] & 0xc0) != 0x80) return (EOF); ch = ((((ch & 0x0f) << 6) | ((*s)[0] & 0x3f)) << 6) | ((*s)[1] & 0x3f); (*s) += 2; if (ch < 0x800) { mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch); return (EOF); } // Ignore (strip) Byte Order Mark (BOM)... if (ch == 0xfeff) return (mxml_string_getc(p, encoding)); MXML_DEBUG("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); return (ch); } else if ((ch & 0xf8) == 0xf0) { // Four-byte value... if (((*s)[0] & 0xc0) != 0x80 || ((*s)[1] & 0xc0) != 0x80 || ((*s)[2] & 0xc0) != 0x80) return (EOF); ch = ((((((ch & 0x07) << 6) | ((*s)[0] & 0x3f)) << 6) | ((*s)[1] & 0x3f)) << 6) | ((*s)[2] & 0x3f); (*s) += 3; if (ch < 0x10000) { mxml_error("Invalid UTF-8 sequence for character 0x%04x.", ch); return (EOF); } MXML_DEBUG("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); return (ch); } else { return (EOF); } case ENCODE_UTF16BE : // Read UTF-16 big-endian char... ch = (ch << 8) | ((*s)[0] & 255); (*s) ++; if (mxml_bad_char(ch)) { mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch); return (EOF); } else if (ch >= 0xd800 && ch <= 0xdbff) { // Multi-word UTF-16 char... int lch; // Lower word if (!(*s)[0]) return (EOF); lch = (((*s)[0] & 255) << 8) | ((*s)[1] & 255); (*s) += 2; if (lch < 0xdc00 || lch >= 0xdfff) return (EOF); ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; } MXML_DEBUG("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); return (ch); case ENCODE_UTF16LE : // Read UTF-16 little-endian char... ch = ch | (((*s)[0] & 255) << 8); if (!ch) { (*s) --; return (EOF); } (*s) ++; if (mxml_bad_char(ch)) { mxml_error("Bad control character 0x%02x not allowed by XML standard.", ch); return (EOF); } else if (ch >= 0xd800 && ch <= 0xdbff) { // Multi-word UTF-16 char... int lch; // Lower word if (!(*s)[1]) return (EOF); lch = (((*s)[1] & 255) << 8) | ((*s)[0] & 255); (*s) += 2; if (lch < 0xdc00 || lch >= 0xdfff) return (EOF); ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000; } MXML_DEBUG("mxml_string_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch); return (ch); } } return (EOF); } // // 'mxml_string_putc()' - Write a character to a string. // static int // O - 0 on success, -1 on failure mxml_string_putc(int ch, // I - Character to write void *p) // I - Pointer to string pointers { char **pp; // Pointer to string pointers pp = (char **)p; if (pp[0] < pp[1]) pp[0][0] = ch; pp[0] ++; return (0); } // // 'mxml_write_name()' - Write a name string. // static int // O - 0 on success, -1 on failure mxml_write_name(const char *s, // I - Name to write void *p, // I - Write pointer int (*putc_cb)(int, void *)) // I - Write callback { char quote; // Quote character const char *name; // Entity name if (*s == '\"' || *s == '\'') { // Write a quoted name string... if ((*putc_cb)(*s, p) < 0) return (-1); quote = *s++; while (*s && *s != quote) { if ((name = mxmlEntityGetName(*s)) != NULL) { if ((*putc_cb)('&', p) < 0) return (-1); while (*name) { if ((*putc_cb)(*name, p) < 0) return (-1); name ++; } if ((*putc_cb)(';', p) < 0) return (-1); } else if ((*putc_cb)(*s, p) < 0) { return (-1); } s ++; } // Write the end quote... if ((*putc_cb)(quote, p) < 0) return (-1); } else { // Write a non-quoted name string... while (*s) { if ((*putc_cb)(*s, p) < 0) return (-1); s ++; } } return (0); } // // 'mxml_write_node()' - Save an XML node to a file. // static int // O - Column or -1 on error mxml_write_node(mxml_node_t *node, // I - Node to write void *p, // I - File to write to mxml_save_cb_t cb, // I - Whitespace callback int col, // I - Current column _mxml_putc_cb_t putc_cb,// I - Output callback _mxml_global_t *global)// I - Global data { mxml_node_t *current, // Current node *next; // Next node int i, // Looping var width; // Width of attr + value _mxml_attr_t *attr; // Current attribute char s[255]; // Temporary string const char *ptr; // Pointer into string // Loop through this node and all of its children... for (current = node; current; current = next) { // Print the node value... switch (current->type) { case MXML_TYPE_CDATA : col = mxml_write_ws(current, p, cb, MXML_WS_BEFORE_OPEN, col, putc_cb); if ((*putc_cb)('<', p) < 0) return (-1); else col ++; if ((*putc_cb)('!', p) < 0) return (-1); else col ++; if ((*putc_cb)('[', p) < 0) return (-1); else col ++; if ((*putc_cb)('C', p) < 0) return (-1); else col ++; if ((*putc_cb)('D', p) < 0) return (-1); else col ++; if ((*putc_cb)('A', p) < 0) return (-1); else col ++; if ((*putc_cb)('T', p) < 0) return (-1); else col ++; if ((*putc_cb)('A', p) < 0) return (-1); else col ++; if ((*putc_cb)('[', p) < 0) return (-1); else col ++; for (ptr = current->value.cdata; *ptr; ptr ++) { if ((*putc_cb)(*ptr, p) < 0) return (-1); else if (*ptr == '\n') col = 0; else col ++; } if ((*putc_cb)(']', p) < 0) return (-1); else col ++; if ((*putc_cb)(']', p) < 0) return (-1); else col ++; if ((*putc_cb)('>', p) < 0) return (-1); else col ++; col = mxml_write_ws(current, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb); break; case MXML_TYPE_COMMENT : col = mxml_write_ws(current, p, cb, MXML_WS_BEFORE_OPEN, col, putc_cb); if ((*putc_cb)('<', p) < 0) return (-1); else col ++; if ((*putc_cb)('!', p) < 0) return (-1); else col ++; if ((*putc_cb)('-', p) < 0) return (-1); else col ++; if ((*putc_cb)('-', p) < 0) return (-1); else col ++; for (ptr = current->value.comment; *ptr; ptr ++) { if ((*putc_cb)(*ptr, p) < 0) return (-1); else if (*ptr == '\n') col = 0; else col ++; } if ((*putc_cb)('-', p) < 0) return (-1); else col ++; if ((*putc_cb)('-', p) < 0) return (-1); else col ++; if ((*putc_cb)('>', p) < 0) return (-1); else col ++; col = mxml_write_ws(current, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb); break; case MXML_TYPE_DECLARATION : col = mxml_write_ws(current, p, cb, MXML_WS_BEFORE_OPEN, col, putc_cb); if ((*putc_cb)('<', p) < 0) return (-1); else col ++; if ((*putc_cb)('!', p) < 0) return (-1); else col ++; for (ptr = current->value.declaration; *ptr; ptr ++) { if ((*putc_cb)(*ptr, p) < 0) return (-1); else if (*ptr == '\n') col = 0; else col ++; } if ((*putc_cb)('>', p) < 0) return (-1); else col ++; col = mxml_write_ws(current, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb); break; case MXML_TYPE_DIRECTIVE : col = mxml_write_ws(current, p, cb, MXML_WS_BEFORE_OPEN, col, putc_cb); if ((*putc_cb)('<', p) < 0) return (-1); else col ++; if ((*putc_cb)('?', p) < 0) return (-1); else col ++; for (ptr = current->value.directive; *ptr; ptr ++) { if ((*putc_cb)(*ptr, p) < 0) return (-1); else if (*ptr == '\n') col = 0; else col ++; } if ((*putc_cb)('?', p) < 0) return (-1); else col ++; if ((*putc_cb)('>', p) < 0) return (-1); else col ++; col = mxml_write_ws(current, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb); break; case MXML_TYPE_ELEMENT : col = mxml_write_ws(current, p, cb, MXML_WS_BEFORE_OPEN, col, putc_cb); if ((*putc_cb)('<', p) < 0) return (-1); else if (mxml_write_name(current->value.element.name, p, putc_cb) < 0) return (-1); col += strlen(current->value.element.name) + 1; for (i = current->value.element.num_attrs, attr = current->value.element.attrs; i > 0; i --, attr ++) { width = (int)strlen(attr->name); if (attr->value) width += strlen(attr->value) + 3; if (global->wrap > 0 && (col + width) > global->wrap) { if ((*putc_cb)('\n', p) < 0) return (-1); col = 0; } else { if ((*putc_cb)(' ', p) < 0) return (-1); col ++; } if (mxml_write_name(attr->name, p, putc_cb) < 0) return (-1); if (attr->value) { if ((*putc_cb)('=', p) < 0) return (-1); if ((*putc_cb)('\"', p) < 0) return (-1); if (mxml_write_string(attr->value, p, putc_cb) < 0) return (-1); if ((*putc_cb)('\"', p) < 0) return (-1); } col += width; } if (current->child) { // Write children... if ((*putc_cb)('>', p) < 0) return (-1); else col ++; col = mxml_write_ws(current, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb); } else { if ((*putc_cb)('/', p) < 0) return (-1); if ((*putc_cb)('>', p) < 0) return (-1); col += 2; col = mxml_write_ws(current, p, cb, MXML_WS_AFTER_OPEN, col, putc_cb); } break; case MXML_TYPE_INTEGER : if (current->prev) { if (global->wrap > 0 && col > global->wrap) { if ((*putc_cb)('\n', p) < 0) return (-1); col = 0; } else if ((*putc_cb)(' ', p) < 0) { return (-1); } else { col ++; } } snprintf(s, sizeof(s), "%ld", current->value.integer); if (mxml_write_string(s, p, putc_cb) < 0) return (-1); col += strlen(s); break; case MXML_TYPE_OPAQUE : if (mxml_write_string(current->value.opaque, p, putc_cb) < 0) return (-1); col += strlen(current->value.opaque); break; case MXML_TYPE_REAL : if (current->prev) { if (global->wrap > 0 && col > global->wrap) { if ((*putc_cb)('\n', p) < 0) return (-1); col = 0; } else if ((*putc_cb)(' ', p) < 0) { return (-1); } else { col ++; } } // TODO: Provide locale-neutral formatting/scanning code for REAL snprintf(s, sizeof(s), "%f", current->value.real); if (mxml_write_string(s, p, putc_cb) < 0) return (-1); col += strlen(s); break; case MXML_TYPE_TEXT : if (current->value.text.whitespace && col > 0) { if (global->wrap > 0 && col > global->wrap) { if ((*putc_cb)('\n', p) < 0) return (-1); col = 0; } else if ((*putc_cb)(' ', p) < 0) { return (-1); } else { col ++; } } if (mxml_write_string(current->value.text.string, p, putc_cb) < 0) return (-1); col += strlen(current->value.text.string); break; case MXML_TYPE_CUSTOM : if (global->custom_save_cb) { char *data; // Custom data string const char *newline; // Last newline in string if ((data = (*global->custom_save_cb)(current)) == NULL) return (-1); if (mxml_write_string(data, p, putc_cb) < 0) return (-1); if ((newline = strrchr(data, '\n')) == NULL) col += strlen(data); else col = (int)strlen(newline); free(data); break; } default : // Should never happen return (-1); } // Figure out the next node... if ((next = current->child) == NULL) { if (current == node) { // Don't traverse to sibling node if we are at the "root" node... next = NULL; } else { // Try the next sibling, and continue traversing upwards as needed... while ((next = current->next) == NULL) { if (current == node || !current->parent) break; // Declarations and directives have no end tags... current = current->parent; if (current->type == MXML_TYPE_ELEMENT) { col = mxml_write_ws(current, p, cb, MXML_WS_BEFORE_CLOSE, col, putc_cb); if ((*putc_cb)('<', p) < 0) return (-1); if ((*putc_cb)('/', p) < 0) return (-1); if (mxml_write_string(current->value.element.name, p, putc_cb) < 0) return (-1); if ((*putc_cb)('>', p) < 0) return (-1); col += strlen(current->value.element.name) + 3; col = mxml_write_ws(current, p, cb, MXML_WS_AFTER_CLOSE, col, putc_cb); } if (current == node) break; } } } } return (col); } // // 'mxml_write_string()' - Write a string, escaping & and < as needed. // static int // O - 0 on success, -1 on failure mxml_write_string( const char *s, // I - String to write void *p, // I - Write pointer _mxml_putc_cb_t putc_cb) // I - Write callback { const char *name; // Entity name, if any while (*s) { if ((name = mxmlEntityGetName(*s)) != NULL) { if ((*putc_cb)('&', p) < 0) return (-1); while (*name) { if ((*putc_cb)(*name, p) < 0) return (-1); name ++; } if ((*putc_cb)(';', p) < 0) return (-1); } else if ((*putc_cb)(*s, p) < 0) { return (-1); } s ++; } return (0); } // // 'mxml_write_ws()' - Do whitespace callback... // static int // O - New column mxml_write_ws(mxml_node_t *node, // I - Current node void *p, // I - Write pointer mxml_save_cb_t cb, // I - Callback function int ws, // I - Where value int col, // I - Current column _mxml_putc_cb_t putc_cb) // I - Write callback { const char *s; // Whitespace string if (cb && (s = (*cb)(node, ws)) != NULL) { while (*s) { if ((*putc_cb)(*s, p) < 0) { return (-1); } else if (*s == '\n') { col = 0; } else if (*s == '\t') { col += MXML_TAB; col = col - (col % MXML_TAB); } else { col ++; } s ++; } } return (col); }