You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
mxml/mxml-file.c

1003 lines
20 KiB

/*
* "$Id: mxml-file.c,v 1.9 2003/06/04 23:20:31 mike Exp $"
*
* File loading code for mini-XML, a small XML-like file parsing library.
*
* Copyright 2003 by Michael Sweet.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Contents:
*
* mxmlLoadFile() - Load a file into an XML node tree.
* mxmlSaveFile() - Save an XML tree to a file.
* mxml_parse_element() - Parse an element for any attributes...
* mxml_write_node() - Save an XML node to a file.
* mxml_write_string() - Write a string, escaping & and < as needed.
* mxml_write_ws() - Do whitespace callback...
*/
/*
* Include necessary headers...
*/
#include "mxml.h"
/*
* Local functions...
*/
static int mxml_parse_element(mxml_node_t *node, FILE *fp);
static int mxml_write_node(mxml_node_t *node, FILE *fp,
int (*cb)(mxml_node_t *, int), int col);
static int mxml_write_string(const char *s, FILE *fp);
static int mxml_write_ws(mxml_node_t *node, FILE *fp,
int (*cb)(mxml_node_t *, int), int ws, int col);
/*
* 'mxmlLoadFile()' - Load a file into an XML node tree.
*/
mxml_node_t * /* O - First node */
mxmlLoadFile(mxml_node_t *top, /* I - Top node */
FILE *fp, /* I - File to read from */
mxml_type_t (*cb)(mxml_node_t *))
/* I - Callback function */
{
mxml_node_t *node, /* Current node */
*parent; /* Current parent node */
int ch, /* Character from file */
whitespace; /* Non-zero if whitespace seen */
char buffer[16384], /* String buffer */
*bufptr; /* Pointer into buffer */
mxml_type_t type; /* Current node type */
/*
* Read elements and other nodes from the file...
*/
bufptr = buffer;
parent = top;
whitespace = 0;
if (cb && parent)
type = (*cb)(parent);
else
type = MXML_TEXT;
while ((ch = getc(fp)) != EOF)
{
if ((ch == '<' || (isspace(ch) && type != MXML_OPAQUE)) && bufptr > buffer)
{
/*
* Add a new value node...
*/
*bufptr = '\0';
switch (type)
{
case MXML_INTEGER :
node = mxmlNewInteger(parent, strtol(buffer, &bufptr, 0));
break;
case MXML_OPAQUE :
node = mxmlNewOpaque(parent, buffer);
break;
case MXML_REAL :
node = mxmlNewReal(parent, strtod(buffer, &bufptr));
break;
case MXML_TEXT :
node = mxmlNewText(parent, whitespace, buffer);
break;
default : /* Should never happen... */
node = NULL;
break;
}
if (*bufptr)
{
/*
* Bad integer/real number value...
*/
fprintf(stderr, "Bad %s value '%s' in parent <%s>!\n",
type == MXML_INTEGER ? "integer" : "real", buffer,
parent ? parent->value.element.name : "null");
break;
}
bufptr = buffer;
whitespace = isspace(ch) && type == MXML_TEXT;
if (!node)
{
/*
* Just print error for now...
*/
fprintf(stderr, "Unable to add value node of type %d to parent <%s>!\n",
type, parent ? parent->value.element.name : "null");
break;
}
}
else if (isspace(ch) && type == MXML_TEXT)
whitespace = 1;
/*
* Add lone whitespace node if we are starting a new element and have
* existing whitespace...
*/
if (ch == '<' && whitespace && type == MXML_TEXT)
{
/*
* Peek at the next character and only do this if we are starting
* an open tag...
*/
ch = getc(fp);
ungetc(ch, fp);
if (ch != '/')
{
mxmlNewText(parent, whitespace, "");
whitespace = 0;
}
ch = '<';
}
if (ch == '<')
{
/*
* Start of open/close tag...
*/
bufptr = buffer;
while ((ch = getc(fp)) != EOF)
if (isspace(ch) || ch == '>' || (ch == '/' && bufptr > buffer))
break;
else if (bufptr < (buffer + sizeof(buffer) - 1))
{
*bufptr++ = ch;
if ((bufptr - buffer) == 3 && !strncmp(buffer, "!--", 3))
break;
}
*bufptr = '\0';
if (!strcmp(buffer, "!--"))
{
/*
* Gather rest of comment...
*/
while ((ch = getc(fp)) != EOF)
{
if (ch == '>' && bufptr > (buffer + 4) &&
!strncmp(bufptr - 2, "--", 2))
break;
else if (bufptr < (buffer + sizeof(buffer) - 1))
*bufptr++ = ch;
else
{
fprintf(stderr, "Comment too long in file under parent <%s>!\n",
parent ? parent->value.element.name : "null");
break;
}
}
/*
* Error out if we didn't get the whole comment...
*/
if (ch != '>')
break;
/*
* Otherwise add this as an element under the current parent...
*/
*bufptr = '\0';
if (!mxmlNewElement(parent, buffer))
{
/*
* Just print error for now...
*/
fprintf(stderr, "Unable to add comment node to parent <%s>!\n",
parent ? parent->value.element.name : "null");
break;
}
}
else if (buffer[0] == '!')
{
/*
* Gather rest of declaration...
*/
do
{
if (ch == '>')
break;
else if (bufptr < (buffer + sizeof(buffer) - 1))
*bufptr++ = ch;
else
{
fprintf(stderr, "Declaration too long in file under parent <%s>!\n",
parent ? parent->value.element.name : "null");
break;
}
}
while ((ch = getc(fp)) != EOF);
/*
* Error out if we didn't get the whole declaration...
*/
if (ch != '>')
break;
/*
* Otherwise add this as an element under the current parent...
*/
*bufptr = '\0';
node = mxmlNewElement(parent, buffer);
if (!node)
{
/*
* Just print error for now...
*/
fprintf(stderr, "Unable to add declaration node to parent <%s>!\n",
parent ? parent->value.element.name : "null");
break;
}
/*
* Descend into this node, setting the value type as needed...
*/
parent = node;
if (cb && parent)
type = (*cb)(parent);
}
else if (buffer[0] == '/')
{
/*
* Handle close tag...
*/
if (!parent || strcmp(buffer + 1, parent->value.element.name))
{
/*
* Close tag doesn't match tree; print an error for now...
*/
fprintf(stderr, "Mismatched close tag <%s> under parent <%s>!\n",
buffer, parent->value.element.name);
break;
}
/*
* Keep reading until we see >...
*/
while (ch != '>' && ch != EOF)
ch = getc(fp);
/*
* Ascend into the parent and set the value type as needed...
*/
parent = parent->parent;
if (cb && parent)
type = (*cb)(parent);
}
else
{
/*
* Handle open tag...
*/
node = mxmlNewElement(parent, buffer);
if (!node)
{
/*
* Just print error for now...
*/
fprintf(stderr, "Unable to add element node to parent <%s>!\n",
parent ? parent->value.element.name : "null");
break;
}
if (isspace(ch))
ch = mxml_parse_element(node, fp);
else if (ch == '/')
{
if ((ch = getc(fp)) != '>')
{
fprintf(stderr, "Expected > but got '%c' instead for element <%s/>!\n",
ch, buffer);
break;
}
ch = '/';
}
if (ch == EOF)
break;
if (ch != '/')
{
/*
* Descend into this node, setting the value type as needed...
*/
parent = node;
if (cb && parent)
type = (*cb)(parent);
}
}
bufptr = buffer;
}
else if (ch == '&')
{
/*
* Add character entity to current buffer... Currently we only
* support &lt;, &amp;, &gt;, &nbsp;, &quot;, &#nnn;, and &#xXXXX;...
*/
char entity[64], /* Entity string */
*entptr; /* Pointer into entity */
entity[0] = ch;
entptr = entity + 1;
while ((ch = getc(fp)) != EOF)
if (!isalnum(ch) && ch != '#')
break;
else if (entptr < (entity + sizeof(entity) - 1))
*entptr++ = ch;
else
{
fprintf(stderr, "Entity name too long under parent <%s>!\n",
parent ? parent->value.element.name : "null");
break;
}
*entptr = '\0';
if (ch != ';')
{
fprintf(stderr, "Entity name \"%s\" not terminated under parent <%s>!\n",
entity, parent ? parent->value.element.name : "null");
break;
}
if (entity[1] == '#')
{
if (entity[2] == 'x')
ch = strtol(entity + 3, NULL, 16);
else
ch = strtol(entity + 2, NULL, 10);
}
else if (!strcmp(entity, "&amp"))
ch = '&';
else if (!strcmp(entity, "&gt"))
ch = '>';
else if (!strcmp(entity, "&lt"))
ch = '<';
else if (!strcmp(entity, "&nbsp"))
ch = 0xa0;
else if (!strcmp(entity, "&quot"))
ch = '\"';
else
{
fprintf(stderr, "Entity name \"%s;\" not supported under parent <%s>!\n",
entity, parent ? parent->value.element.name : "null");
break;
}
if (ch < 128)
{
/*
* Plain ASCII doesn't need special encoding...
*/
if (bufptr < (buffer + sizeof(buffer) - 1))
*bufptr++ = ch;
else
{
fprintf(stderr, "String too long in file under parent <%s>!\n",
parent ? parent->value.element.name : "null");
break;
}
}
else
{
/*
* Use UTF-8 encoding for the Unicode char...
*/
if (bufptr < (buffer + sizeof(buffer) - 5))
{
if (ch < 2048)
{
*bufptr++ = 0xc0 | (ch >> 6);
*bufptr++ = 0x80 | (ch & 63);
}
else if (ch < 65536)
{
*bufptr++ = 0xe0 | (ch >> 12);
*bufptr++ = 0x80 | ((ch >> 6) & 63);
*bufptr++ = 0x80 | (ch & 63);
}
else
{
*bufptr++ = 0xf0 | (ch >> 18);
*bufptr++ = 0x80 | ((ch >> 12) & 63);
*bufptr++ = 0x80 | ((ch >> 6) & 63);
*bufptr++ = 0x80 | (ch & 63);
}
}
else
{
fprintf(stderr, "String too long in file under parent <%s>!\n",
parent ? parent->value.element.name : "null");
break;
}
}
}
else if (type == MXML_OPAQUE || !isspace(ch))
{
/*
* Add character to current buffer...
*/
if (bufptr < (buffer + sizeof(buffer) - 1))
*bufptr++ = ch;
else
{
fprintf(stderr, "String too long in file under parent <%s>!\n",
parent ? parent->value.element.name : "null");
break;
}
}
}
/*
* Find the top element and return it...
*/
if (parent)
{
while (parent->parent != top)
parent = parent->parent;
}
return (parent);
}
/*
* 'mxmlSaveFile()' - Save an XML tree to a file.
*/
int /* O - 0 on success, -1 on error */
mxmlSaveFile(mxml_node_t *node, /* I - Node to write */
FILE *fp, /* I - File to write to */
int (*cb)(mxml_node_t *, int))
/* I - Whitespace callback */
{
int col; /* Final column */
/*
* Write the node...
*/
if ((col = mxml_write_node(node, fp, cb, 0)) < 0)
return (-1);
if (col > 0)
if (putc('\n', fp) < 0)
return (-1);
/*
* Return 0 (success)...
*/
return (0);
}
/*
* 'mxml_parse_element()' - Parse an element for any attributes...
*/
static int /* O - Terminating character */
mxml_parse_element(mxml_node_t *node, /* I - Element node */
FILE *fp) /* I - File to read from */
{
int ch, /* Current character in file */
quote; /* Quoting character */
char name[256], /* Attribute name */
value[256], /* Attribute value */
*ptr; /* Pointer into name/value */
/*
* Loop until we hit a >, /, ?, or EOF...
*/
while ((ch = getc(fp)) != EOF)
{
/*
* Skip leading whitespace...
*/
if (isspace(ch))
continue;
/*
* Stop at /, ?, or >...
*/
if (ch == '/' || ch == '?')
{
/*
* Grab the > character and print an error if it isn't there...
*/
quote = getc(fp);
if (quote != '>')
{
fprintf(stderr, "Expected '>' after '%c' for element %s, but got '%c'!\n",
ch, node->value.element.name, quote);
ch = EOF;
}
break;
}
else if (ch == '>')
break;
/*
* Read the attribute name...
*/
name[0] = ch;
ptr = name + 1;
while ((ch = getc(fp)) != EOF)
if (isspace(ch) || ch == '=' || ch == '/' || ch == '>' || ch == '?')
break;
else if (ptr < (name + sizeof(name) - 1))
*ptr++ = ch;
else
{
fprintf(stderr, "Attribute name too long for element %s!\n",
node->value.element.name);
return (EOF);
}
*ptr = '\0';
if (ch == '=')
{
/*
* Read the attribute value...
*/
if ((ch = getc(fp)) == EOF)
{
fprintf(stderr, "Missing value for attribute '%s' in element %s!\n",
name, node->value.element.name);
return (EOF);
}
if (ch == '\'' || ch == '\"')
{
/*
* Read quoted value...
*/
quote = ch;
ptr = value;
while ((ch = getc(fp)) != EOF)
if (ch == quote)
break;
else if (ptr < (value + sizeof(value) - 1))
*ptr++ = ch;
else
{
fprintf(stderr, "Attribute value too long for attribute '%s' in element %s!\n",
name, node->value.element.name);
return (EOF);
}
*ptr = '\0';
}
else
{
/*
* Read unquoted value...
*/
value[0] = ch;
ptr = value + 1;
while ((ch = getc(fp)) != EOF)
if (isspace(ch) || ch == '=' || ch == '/' || ch == '>')
break;
else if (ptr < (value + sizeof(value) - 1))
*ptr++ = ch;
else
{
fprintf(stderr, "Attribute value too long for attribute '%s' in element %s!\n",
name, node->value.element.name);
return (EOF);
}
*ptr = '\0';
}
}
else
value[0] = '\0';
/*
* Save last character in case we need it...
*/
if (ch == '/' || ch == '>' || ch == '?')
ungetc(ch, fp);
/*
* Set the attribute...
*/
mxmlElementSetAttr(node, name, value);
}
return (ch);
}
/*
* 'mxml_write_node()' - Save an XML node to a file.
*/
static int /* O - Column or -1 on error */
mxml_write_node(mxml_node_t *node, /* I - Node to write */
FILE *fp, /* I - File to write to */
int (*cb)(mxml_node_t *, int),
/* I - Whitespace callback */
int col) /* I - Current column */
{
int i; /* Looping var */
int n; /* Chars written */
mxml_attr_t *attr; /* Current attribute */
while (node != NULL)
{
/*
* Print the node value...
*/
switch (node->type)
{
case MXML_ELEMENT :
col = mxml_write_ws(node, fp, cb, MXML_WS_BEFORE_OPEN, col);
if ((n = fprintf(fp, "<%s", node->value.element.name)) < 0)
return (-1);
col += n;
for (i = node->value.element.num_attrs, attr = node->value.element.attrs;
i > 0;
i --, attr ++)
{
if ((col + strlen(attr->name) + strlen(attr->value) + 3) > MXML_WRAP)
{
if (putc('\n', fp) < 0)
return (-1);
col = 0;
}
else
{
if (putc(' ', fp) < 0)
return (-1);
col ++;
}
if ((n = fprintf(fp, "%s=\"%s\"", attr->name, attr->value)) < 0)
return (-1);
col += n;
}
if (node->child)
{
/*
* The ? and ! elements are special-cases and have no end tags...
*/
if (node->value.element.name[0] == '?')
{
if (fputs("?>\n", fp) < 0)
return (-1);
col = 0;
}
else if (putc('>', fp) < 0)
return (-1);
else
col ++;
col = mxml_write_ws(node, fp, cb, MXML_WS_AFTER_OPEN, col);
if ((col = mxml_write_node(node->child, fp, cb, col)) < 0)
return (-1);
if (node->value.element.name[0] != '?' &&
node->value.element.name[0] != '!')
{
col = mxml_write_ws(node, fp, cb, MXML_WS_BEFORE_CLOSE, col);
if ((n = fprintf(fp, "</%s>", node->value.element.name)) < 0)
return (-1);
col += n;
col = mxml_write_ws(node, fp, cb, MXML_WS_AFTER_CLOSE, col);
}
}
else if (node->value.element.name[0] == '!')
{
if (putc('>', fp) < 0)
return (-1);
else
col ++;
col = mxml_write_ws(node, fp, cb, MXML_WS_AFTER_OPEN, col);
}
else if (fputs("/>", fp) < 0)
return (-1);
else
{
col += 2;
col = mxml_write_ws(node, fp, cb, MXML_WS_AFTER_OPEN, col);
}
break;
case MXML_INTEGER :
if (node->prev)
{
if (col > MXML_WRAP)
{
if (putc('\n', fp) < 0)
return (-1);
col = 0;
}
else if (putc(' ', fp) < 0)
return (-1);
else
col ++;
}
if ((n = fprintf(fp, "%d", node->value.integer)) < 0)
return (-1);
col += n;
break;
case MXML_OPAQUE :
if (mxml_write_string(node->value.opaque, fp) < 0)
return (-1);
col += strlen(node->value.opaque);
break;
case MXML_REAL :
if (node->prev)
{
if (col > MXML_WRAP)
{
if (putc('\n', fp) < 0)
return (-1);
col = 0;
}
else if (putc(' ', fp) < 0)
return (-1);
else
col ++;
}
if ((n = fprintf(fp, "%f", node->value.real)) < 0)
return (-1);
col += n;
break;
case MXML_TEXT :
if (node->value.text.whitespace && col > 0)
{
if (col > MXML_WRAP)
{
if (putc('\n', fp) < 0)
return (-1);
col = 0;
}
else if (putc(' ', fp) < 0)
return (-1);
else
col ++;
}
if (mxml_write_string(node->value.text.string, fp) < 0)
return (-1);
col += strlen(node->value.text.string);
break;
}
/*
* Next node...
*/
node = node->next;
}
return (col);
}
/*
* 'mxml_write_string()' - Write a string, escaping & and < as needed.
*/
static int /* O - 0 on success, -1 on failure */
mxml_write_string(const char *s, /* I - String to write */
FILE *fp) /* I - File to write to */
{
while (*s)
{
if (*s == '&')
{
if (fputs("&amp;", fp) < 0)
return (-1);
}
else if (*s == '<')
{
if (fputs("&lt;", fp) < 0)
return (-1);
}
else if (*s == '>')
{
if (fputs("&gt;", fp) < 0)
return (-1);
}
else if (*s == '\"')
{
if (fputs("&quot;", fp) < 0)
return (-1);
}
else if (*s & 128)
{
/*
* Convert UTF-8 to Unicode constant...
*/
int ch; /* Unicode character */
ch = *s & 255;
if ((ch & 0xe0) == 0xc0)
{
ch = ((ch & 0x1f) << 6) | (s[1] & 0x3f);
s ++;
}
else if ((ch & 0xf0) == 0xe0)
{
ch = ((((ch * 0x0f) << 6) | (s[1] & 0x3f)) << 6) | (s[2] & 0x3f);
s += 2;
}
if (ch == 0xa0)
{
/*
* Handle non-breaking space as-is...
*/
if (fputs("&nbsp;", fp) < 0)
return (-1);
}
else if (fprintf(fp, "&#x%x;", ch) < 0)
return (-1);
}
else if (putc(*s, fp) < 0)
return (-1);
s ++;
}
return (0);
}
/*
* 'mxml_write_ws()' - Do whitespace callback...
*/
static int /* O - New column */
mxml_write_ws(mxml_node_t *node, /* I - Current node */
FILE *fp, /* I - File to write to */
int (*cb)(mxml_node_t *, int),
/* I - Callback function */
int ws, /* I - Where value */
int col) /* I - Current column */
{
int ch; /* Whitespace character */
if (cb && (ch = (*cb)(node, ws)) != 0)
{
if (putc(ch, fp) < 0)
return (-1);
else if (ch == '\n')
col = 0;
else if (ch == '\t')
{
col += MXML_TAB;
col = col - (col % MXML_TAB);
}
else
col ++;
}
return (col);
}
/*
* End of "$Id: mxml-file.c,v 1.9 2003/06/04 23:20:31 mike Exp $".
*/