Commit 438ded65 authored by Michael R Sweet's avatar Michael R Sweet

Whitespace handling issues.

Unicode output issues.

Comment/declaration handling fixes.

Add mxmldoc to build.
parent bfc6a08c
README - 06/03/2003
README - 06/04/2003
-------------------
......@@ -9,6 +9,17 @@ CHANGES IN Mini-XML 0.93
character entities.
- mxmlSaveFile() now uses newlines as whitespace
when valid to do so.
- mxmlFindElement() now also takes attribute name and
attribute value string arguments to limit the search
to specific elements with attributes and/or values.
NULL pointers can be used as "wildcards".
- Added uninstall target to makefile, and auto-reconfig
if Makefile.in or configure.in are changed.
- mxmlFindElement(), mxmlWalkNext(), and mxmlWalkPrev()
now all provide "descend" arguments to control whether
they descend into child nodes in the tree.
- Fixed some whitespace issues in mxmlLoadFile().
- Fixed Unicode output issues in mxmlSaveFile().
CHANGES IN Mini-XML 0.92
......
#
# "$Id: Makefile.in,v 1.3 2003/06/04 00:25:59 mike Exp $"
# "$Id: Makefile.in,v 1.4 2003/06/04 16:30:39 mike Exp $"
#
# Makefile for mini-XML, a small XML-like file parsing library.
#
......@@ -53,15 +53,15 @@ libdir = @libdir@
#
LIBOBJS = mxml-attr.o mxml-file.o mxml-node.o mxml-search.o
OBJS = testmxml.o $(LIBOBJS)
TARGETS = libmxml.a testmxml
OBJS = mxmldoc.o testmxml.o $(LIBOBJS)
TARGETS = libmxml.a mxmldoc testmxml
#
# Make everything...
#
all: $(TARGETS)
all: Makefile configure $(TARGETS)
#
......@@ -86,6 +86,37 @@ install: $(TARGETS)
cp mxml.h $(includedir)
#
# Uninstall everything...
#
uninstall:
rm -f $(libdir)/libmxml.a
rm -f $(includedir)/mxml.h
#
# autoconf stuff...
#
Makefile: configure Makefile.in
if test -f config.status; then \
./config.status --recheck; \
./config.status; \
else \
./configure; \
fi
configure: configure.in
autoconf
if test -f config.status; then \
./config.status --recheck; \
./config.status; \
else \
./configure; \
fi
#
# libmxml.a
#
......@@ -98,6 +129,16 @@ libmxml.a: $(LIBOBJS)
$(LIBOBJS): mxml.h
#
# mxmldoc
#
mxmldoc: libmxml.a mxmldoc.o
$(CC) $(LDFLAGS) -o $@ mxmldoc.o libmxml.a
mxmldoc.o: mxml.h
#
# testmxml
#
......@@ -116,9 +157,14 @@ testmxml: libmxml.a testmxml.o
testmxml.o: mxml.h
#
# All object files depend on the makefile...
#
$(OBJS): Makefile
#
# End of "$Id: Makefile.in,v 1.3 2003/06/04 00:25:59 mike Exp $".
# End of "$Id: Makefile.in,v 1.4 2003/06/04 16:30:39 mike Exp $".
#
README - 06/03/2003
README - 06/04/2003
-------------------
INTRODUCTION
This README file describes the Mini-XML library version
0.91.
0.93.
Mini-XML is a small XML parsing library that you can use to
read XML and XML-like data files in your application without
......@@ -92,15 +92,39 @@ DOCUMENTATION
You can find a named element/node using the
"mxmlFindElement()" function:
mxml_node_t *node = mxmlFindElement(tree, tree, "name");
mxml_node_t *node = mxmlFindElement(tree, tree, "name", "attr",
"value", MXML_DESCEND);
The "name", "attr", and "value" arguments can be passed as
NULL to act as wildcards, e.g.:
/* Find the first "a" element */
node = mxmlFindElement(tree, tree, "a", NULL, NULL, MXML_DESCEND);
/* Find the first "a" element with "href" attribute */
node = mxmlFindElement(tree, tree, "a", "href", NULL, MXML_DESCEND);
/* Find the first "a" element with "href" to a URL */
node = mxmlFindElement(tree, tree, "a", "href",
"http://www.easysw.com/~mike/mxml/",
MXML_DESCEND);
/* Find the first element with a "src" attribute*/
node = mxmlFindElement(tree, tree, NULL, "src", NULL, MXML_DESCEND);
/* Find the first element with a "src" = "foo.jpg" */
node = mxmlFindElement(tree, tree, NULL, "src", "foo.jpg",
MXML_DESCEND);
You can also iterate with the same function:
mxml_node_t *node;
for (node = mxmlFindElement(tree, tree, "name");
for (node = mxmlFindElement(tree, tree, "name", NULL, NULL,
MXML_DESCEND);
node != NULL;
node = mxmlFindElement(node, tree, "name"))
node = mxmlFindElement(node, tree, "name", NULL, NULL,
MXML_DESCEND))
{
... do something ...
}
......
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<HTML>
<HEAD>
<TITLE>Mini-XML Home Page</TITLE>
<STYLE><!--
H1, H2, H3, P { font-family: sans-serif; text-align: justify; }
H1.title, P.title { font-family: sans-serif; text-align: center; }
TT, PRE, PRE A:link, PRE A:visited { font-weight: bold; color: #7f0000; }
--></STYLE>
</HEAD>
<BODY>
<P CLASS="title" ALIGN="CENTER">[&nbsp;<A
HREF="../index.html">Back to Home Page</A>&nbsp;]</P>
<H1 CLASS="title" ALIGN="CENTER">Mini-XML Home Page</H1>
<P CLASS="title" ALIGN="CENTER">Current Release: v0.93 [&nbsp;<A
HREF="mxml-0.93.tar.gz">Download Source (.tar.gz 40k)</A> |
<A HREF="CHANGES">View Change Log</A>&nbsp;]</P>
<H2>Introduction</H2>
<P>Mini-XML is a small XML parsing library that you can use to
<!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>Mini-XML Home Page</title>
<style><!--
h1, h2, h3, p { font-family: sans-serif; text-align: justify; }
h1.title, p.title { font-family: sans-serif; text-align: center; }
tt, pre, pre a:link, pre a:visited, tt a:link, tt a:visited { font-weight: bold; color: #7f0000; }
--></style>
</head>
<body>
<p class="title" align="center">[&nbsp;<a
href="../index.html">Back to Home Page</a>&nbsp;]</p>
<h1 class="title" align="center">Mini-XML Home Page</h1>
<p class="title" align="center">Current Release: v0.93 [&nbsp;<a
href="mxml-0.93.tar.gz">Download Source (.tar.gz 40k)</a> |
<a href="CHANGES">View Change Log</a>&nbsp;]</p>
<h2>Introduction</h2>
<p>Mini-XML is a small XML parsing library that you can use to
read XML and XML-like data files in your application without
requiring large non-standard libraries. Mini-XML only requires
an ANSI C compatible compiler (GCC works, as do most vendors'
ANSI C compilers) and a "make" program.</P>
ANSI C compilers) and a "make" program.</p>
<P>Mini-XML was created to support the basic hierarchy provided
<p>Mini-XML was created to support the basic hierarchy provided
by XML and some simple data types, but doesn't do validation or
other types of processing on the data.</P>
other types of processing on the data.</p>
<H2>Building Mini-XML</H2>
<h2>Building Mini-XML</h2>
<P>Mini-XML comes with an autoconf-based configure script; just
type the following command to get things going:</P>
<p>Mini-XML comes with an autoconf-based configure script; just
type the following command to get things going:</p>
<PRE>
<pre>
./configure
</PRE>
</pre>
<P>The default install prefix is /usr/local, which can be
overridden using the --prefix option:</P>
<p>The default install prefix is /usr/local, which can be
overridden using the --prefix option:</p>
<PRE>
<pre>
./configure --prefix=/foo
</PRE>
</pre>
<P>Once you have configured the software, type "make" to do the
<p>Once you have configured the software, type "make" to do the
build and then run the test program to verify that things are
working, as follows:</P>
working, as follows:</p>
<PRE>
<pre>
make
./testmxml test.xml
</PRE>
</pre>
<H2>Installing Mini-XML</H2>
<h2>Installing Mini-XML</h2>
<P>The "install" target will install Mini-XML in the lib and
include directories:</P>
<p>The "install" target will install Mini-XML in the lib and
include directories:</p>
<PRE>
<pre>
make install
</PRE>
</pre>
<P>Once you have installed it, use the "-lmxml" option to link
your application against it.</P>
<p>Once you have installed it, use the "-lmxml" option to link
your application against it.</p>
<H2>Documentation</H2>
<h2>Documentation</h2>
<P>The documentation is currently just in this page. At some
<p>The documentation is currently just in this page. At some
point I'll probably do some proper documentation, but for now
just read here and look at the <A
HREF="testmxml.c"><TT>testmxml.c</TT></A> source file for an
just read here and look at the <tt><a
href="testmxml.c">testmxml.c</a></tt> source file for an
example of reading and printing the contents of an XML file to
stdout.</P>
stdout.</p>
<P>Mini-XML provides a single header file which you include:</P>
<p>Mini-XML provides a single header file which you include:</p>
<PRE>
<A HREF="mxml.h">#include &lt;mxml.h></A>
</PRE>
<pre>
<a href="mxml.h">#include &lt;mxml.h></a>
</pre>
<P>Nodes are defined by the <TT>mxml_node_t</TT> structure; the
<TT>type</TT> member defines the node type (element, integer,
<p>Nodes are defined by the <tt>mxml_node_t</tt> structure; the
<tt>type</tt> member defines the node type (element, integer,
opaque, real, or text) which determines which value you want to
look at in the <TT>value</TT> union. New nodes can be created
using the <TT>mxmlNewElement()</TT>, <TT>mxmlNewInteger()</TT>,
<TT>mxmlNewOpaque()</TT>, <TT>mxmlNewReal()</TT>, and
<TT>mxmlNewText()</TT> functions. Only elements can have child
nodes, and the top node must be an element, usually "?xml".</P>
look at in the <tt>value</tt> union. New nodes can be created
using the <tt>mxmlNewElement()</tt>, <tt>mxmlNewInteger()</tt>,
<tt>mxmlNewOpaque()</tt>, <tt>mxmlNewReal()</tt>, and
<tt>mxmlNewText()</tt> functions. Only elements can have child
nodes, and the top node must be an element, usually "?xml".</p>
<P>You load an XML file using the <TT>mxmlLoadFile()</TT> function:</P>
<p>You load an XML file using the <tt>mxmlLoadFile()</tt> function:</p>
<PRE>
<pre>
FILE *fp;
mxml_node_t *tree;
fp = fopen("filename.xml", "r");
tree = mxmlLoadFile(NULL, fp, MXML_NO_CALLBACK);
fclose(fp);
</PRE>
</pre>
<P>Similarly, you save an XML file using the <TT>mxmlSaveFile()</TT>
function:</P>
<p>Similarly, you save an XML file using the <tt>mxmlSaveFile()</tt>
function:</p>
<PRE>
<pre>
FILE *fp;
mxml_node_t *tree;
fp = fopen("filename.xml", "w");
mxmlSaveFile(tree, fp);
fclose(fp);
</PRE>
</pre>
<P>You can find a named element/node using the
<TT>mxmlFindElement()</TT> function:</P>
<p>You can find a named element/node using the
<tt>mxmlFindElement()</tt> function:</p>
<PRE>
mxml_node_t *node = mxmlFindElement(tree, tree, "name");
</PRE>
<pre>
mxml_node_t *node = mxmlFindElement(tree, tree, "name", "attr",
"value", MXML_DESCEND);
</pre>
<P>You can also iterate with the same function:
<p>The <tt>name</tt>, <tt>attr</tt>, and <tt>value</tt>
arguments can be passed as <tt>NULL</tt> to act as wildcards,
e.g.:</p>
<PRE>
<pre>
/* Find the first "a" element */
node = mxmlFindElement(tree, tree, "a", NULL, NULL, MXML_DESCEND);
/* Find the first "a" element with "href" attribute */
node = mxmlFindElement(tree, tree, "a", "href", NULL, MXML_DESCEND);
/* Find the first "a" element with "href" to a URL */
node = mxmlFindElement(tree, tree, "a", "href",
"http://www.easysw.com/~mike/mxml/", MXML_DESCEND);
/* Find the first element with a "src" attribute*/
node = mxmlFindElement(tree, tree, NULL, "src", NULL, MXML_DESCEND);
/* Find the first element with a "src" = "foo.jpg" */
node = mxmlFindElement(tree, tree, NULL, "src", "foo.jpg", MXML_DESCEND);
</pre>
<p>You can also iterate with the same function:</p>
<pre>
mxml_node_t *node;
for (node = mxmlFindElement(tree, tree, "name");
for (node = mxmlFindElement(tree, tree, "name", NULL, NULL, MXML_DESCEND);
node != NULL;
node = mxmlFindElement(node, tree, "name"))
node = mxmlFindElement(node, tree, "name", NULL, NULL, MXML_DESCEND))
{
... do something ...
}
</PRE>
</pre>
<P>Finally, once you are done with the XML data, use the
<TT>mxmlDelete()</TT> function to recursively free the memory
that is used for a particular node or the entire tree:</P>
<p>Finally, once you are done with the XML data, use the
<tt>mxmlDelete()</tt> function to recursively free the memory
that is used for a particular node or the entire tree:</p>
<PRE>
<pre>
mxmlDelete(tree);
</PRE>
</pre>
<H2>Getting Help and Reporting Problems</H2>
<h2>Getting Help and Reporting Problems</h2>
<P>You can email me at "mxml <I>at</I> easysw <I>dot</I> com" to
<p>You can email me at "mxml <i>at</i> easysw <i>dot</i> com" to
report problems and/or ask for help. Just don't expect an
instant response, as I get a <I>lot</I> of email...</P>
instant response, as I get a <i>lot</i> of email...</p>
<H2>Legal Stuff</H2>
<h2>Legal Stuff</h2>
<P>The Mini-XML library is Copyright 2003 by Michael Sweet.</P>
<p>The Mini-XML library is Copyright 2003 by Michael Sweet.</p>
<P>This library is free software; you can redistribute it
<p>This library is free software; you can redistribute it
and/or modify it under the terms of the GNU Library General
Public License as published by the Free Software Foundation;
either version 2 of the License, or (at your option) any
later version.</P>
later version.</p>
<P>This library is distributed in the hope that it will be
<p>This library is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU Library General Public License for
more details.</P>
more details.</p>
<P>You should have received a copy of the GNU Library General
<p>You should have received a copy of the GNU Library General
Public License along with this library; if not, write to the
Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA
02139, USA.</P>
02139, USA.</p>
</BODY>
</HTML>
</body>
</html>
/*
* "$Id: mxml-file.c,v 1.5 2003/06/04 02:34:29 mike Exp $"
* "$Id: mxml-file.c,v 1.6 2003/06/04 16:30:40 mike Exp $"
*
* File loading code for mini-XML, a small XML-like file parsing library.
*
......@@ -131,6 +131,32 @@ mxmlLoadFile(mxml_node_t *top, /* I - Top node */
break;
}
}
else if (isspace(ch) && type == MXML_TEXT)
whitespace = 1;
/*
* Add lone whitespace node if we are starting a new element and have
* existing whitespace...
*/
if (ch == '<' && whitespace && type == MXML_TEXT)
{
/*
* Peek at the next character and only do this if we are starting
* an open tag...
*/
ch = getc(fp);
ungetc(ch, fp);
if (ch != '/')
{
mxmlNewText(parent, whitespace, "");
whitespace = 0;
}
ch = '<';
}
if (ch == '<')
{
......@@ -144,40 +170,114 @@ mxmlLoadFile(mxml_node_t *top, /* I - Top node */
if (isspace(ch) || ch == '>' || (ch == '/' && bufptr > buffer))
break;
else if (bufptr < (buffer + sizeof(buffer) - 1))
{
*bufptr++ = ch;
if ((bufptr - buffer) == 3 && !strncmp(buffer, "!--", 3))
break;
}
*bufptr = '\0';
bufptr = buffer;
if (!strcmp(buffer, "!--"))
{
/*
* Skip comment...
* Gather rest of comment...
*/
buffer[3] = '\0';
while ((ch = getc(fp)) != EOF)
{
*bufptr++ = ch;
if ((bufptr - buffer) == 3)
if (ch == '>' && bufptr > (buffer + 4) &&
!strncmp(bufptr - 2, "--", 2))
break;
else if (bufptr < (buffer + sizeof(buffer) - 1))
*bufptr++ = ch;
else
{
if (!strcmp(buffer, "-->"))
break;
fprintf(stderr, "Comment too long in file under parent <%s>!\n",
parent ? parent->value.element.name : "null");
break;
}
}
buffer[0] = buffer[1];
buffer[1] = buffer[2];
bufptr --;
/*
* Error out if we didn't get the whole comment...
*/
if (ch != '>')
break;
/*
* Otherwise add this as an element under the current parent...
*/
*bufptr = '\0';
if (!mxmlNewElement(parent, buffer))
{
/*
* Just print error for now...
*/
fprintf(stderr, "Unable to add comment node to parent <%s>!\n",
parent ? parent->value.element.name : "null");
break;
}
}
else if (buffer[0] == '!')
{
/*
* Gather rest of declaration...
*/
do
{
if (ch == '>')
break;
else if (bufptr < (buffer + sizeof(buffer) - 1))
*bufptr++ = ch;
else
{
fprintf(stderr, "Declaration too long in file under parent <%s>!\n",
parent ? parent->value.element.name : "null");
break;
}
}
while ((ch = getc(fp)) != EOF);
bufptr = buffer;
/*
* Error out if we didn't get the whole declaration...
*/
if (ch == EOF)
if (ch != '>')
break;
else
continue;
/*
* Otherwise add this as an element under the current parent...
*/
*bufptr = '\0';
node = mxmlNewElement(parent, buffer);
if (!node)
{
/*
* Just print error for now...
*/
fprintf(stderr, "Unable to add declaration node to parent <%s>!\n",
parent ? parent->value.element.name : "null");
break;
}
/*
* Descend into this node, setting the value type as needed...
*/
parent = node;
if (cb)
type = (*cb)(parent);
}
else if (buffer[0] == '/')
{
......@@ -260,6 +360,8 @@ mxmlLoadFile(mxml_node_t *top, /* I - Top node */
type = (*cb)(parent);
}
}
bufptr = buffer;
}
else if (ch == '&')
{
......@@ -635,7 +737,7 @@ mxml_write_node(mxml_node_t *node, /* I - Node to write */
if (node->child)
{
/*
* The ?xml element is a special-case and has no end tag...
* The ? and ! elements are special-cases and have no end tags...
*/
if (node->value.element.name[0] == '?')
......@@ -653,7 +755,8 @@ mxml_write_node(mxml_node_t *node, /* I - Node to write */
if ((col = mxml_write_node(node->child, fp, col)) < 0)
return (-1);
if (node->value.element.name[0] != '?')
if (node->value.element.name[0] != '?' &&
node->value.element.name[0] != '!')
{
if ((n = fprintf(fp, "</%s>", node->value.element.name)) < 0)
return (-1);
......@@ -661,6 +764,13 @@ mxml_write_node(mxml_node_t *node, /* I - Node to write */
col += n;
}
}
else if (node->value.element.name[0] == '!')
{
if (putc('>', fp) < 0)
return (-1);
else
col ++;
}
else if (fputs("/>", fp) < 0)
return (-1);
else
......@@ -772,6 +882,45 @@ mxml_write_string(const char *s, /* I - String to write */
if (fputs("&lt;", fp) < 0)
return (-1);
}
else if (*s == '>')
{
if (fputs("&gt;", fp) < 0)
return (-1);
}
else if (*s & 128)
{
/*
* Convert UTF-8 to Unicode constant...
*/
int ch; /* Unicode character */
ch = *s & 255;
if ((ch & 0xe0) == 0xc0)
{
ch = ((ch & 0x1f) << 6) | (s[1] & 0x3f);
s ++;
}
else if ((ch & 0xf0) == 0xe0)
{
ch = ((((ch * 0x0f) << 6) | (s[1] & 0x3f)) << 6) | (s[2] & 0x3f);
s += 2;
}
if (ch == 0xa0)
{
/*
* Handle non-breaking space as-is...
*/
if (fputs("&nbsp;", fp) < 0)
return (-1);
}
else if (fprintf(fp, "&#x%x;", ch) < 0)
return (-1);
}
else if (putc(*s, fp) < 0)
return (-1);
......@@ -784,5 +933,5 @@ mxml_write_string(const char *s, /* I - String to write */
/*
* End of "$Id: mxml-file.c,v 1.5 2003/06/04 02:34:29 mike Exp $".
* End of "$Id: mxml-file.c,v 1.6 2003/06/04 16:30:40 mike Exp $".
*/
/*
* "$Id: mxml-search.c,v 1.2 2003/06/03 20:40:01 mike Exp $"
* "$Id: mxml-search.c,v 1.3 2003/06/04 16:30:40 mike Exp $"
*
* Search/navigation functions for mini-XML, a small XML-like file
* parsing library.
......@@ -21,7 +21,6 @@
* mxmlFindElement() - Find the named element.
* mxmlWalkNext() - Walk to the next logical node in the tree.
* mxmlWalkPrev() - Walk to the previous logical node in the tree.
* mxml_walk_next() - Walk to the next logical node in the tree.
*/
/*
......@@ -31,14 +30,6 @@
#include "mxml.h"
/*
* Local functions...
*/
mxml_node_t *mxml_walk_next(mxml_node_t *node, mxml_node_t *top,
int descend);
/*
* 'mxmlFindElement()' - Find the named element.
*/
......@@ -46,13 +37,26 @@ mxml_node_t *mxml_walk_next(mxml_node_t *node, mxml_node_t *top,
mxml_node_t * /* O - Element node or NULL */
mxmlFindElement(mxml_node_t *node, /* I - Current node */
mxml_node_t *top, /* I - Top node */
const char *name) /* I - Element name */
const char *name, /* I - Element name or NULL for any */
const char *attr, /* I - Attribute name, or NULL for none */
const char *value, /* I - Attribute value, or NULL for any */
int descend) /* I - Descend into tree? */
{
const char *temp; /* Current attribute value */