mxml-file.c 63.9 KB
Newer Older
Michael R Sweet's avatar
Michael R Sweet committed
1
/*
2
 * "$Id$"
Michael R Sweet's avatar
Michael R Sweet committed
3
 *
Michael R Sweet's avatar
Michael R Sweet committed
4
 * File loading code for Mini-XML, a small XML-like file parsing library.
Michael R Sweet's avatar
Michael R Sweet committed
5
 *
6
 * Copyright 2003-2016 by Michael R Sweet.
Michael R Sweet's avatar
Michael R Sweet committed
7
 *
8 9 10 11 12
 * These coded instructions, statements, and computer programs are the
 * property of Michael R Sweet and are protected by Federal copyright
 * law.  Distribution and use rights are outlined in the file "COPYING"
 * which should have been included with this file.  If this file is
 * missing or damaged, see the license at:
Michael R Sweet's avatar
Michael R Sweet committed
13
 *
Michael R Sweet's avatar
Michael R Sweet committed
14
 *     http://www.msweet.org/projects.php/Mini-XML
Michael R Sweet's avatar
Michael R Sweet committed
15 16 17 18 19 20
 */

/*
 * Include necessary headers...
 */

Michael R Sweet's avatar
Michael R Sweet committed
21
#ifndef WIN32
22
#  include <unistd.h>
Michael R Sweet's avatar
Michael R Sweet committed
23 24
#endif /* !WIN32 */
#include "mxml-private.h"
Michael R Sweet's avatar
Michael R Sweet committed
25 26


Michael R Sweet's avatar
Michael R Sweet committed
27 28 29 30 31 32 33 34 35
/*
 * Character encoding...
 */

#define ENCODE_UTF8	0		/* UTF-8 */
#define ENCODE_UTF16BE	1		/* UTF-16 Big-Endian */
#define ENCODE_UTF16LE	2		/* UTF-16 Little-Endian */


Michael R Sweet's avatar
Michael R Sweet committed
36 37 38 39 40 41 42
/*
 * Macro to test for a bad XML character...
 */

#define mxml_bad_char(ch) ((ch) < ' ' && (ch) != '\n' && (ch) != '\r' && (ch) != '\t')


43
/*
44
 * Types and structures...
45 46
 */

47 48 49 50
typedef int (*_mxml_getc_cb_t)(void *, int *);
typedef int (*_mxml_putc_cb_t)(int, void *);

typedef struct _mxml_fdbuf_s		/**** File descriptor buffer ****/
51 52 53 54 55
{
  int		fd;			/* File descriptor */
  unsigned char	*current,		/* Current position in buffer */
		*end,			/* End of buffer */
		buffer[8192];		/* Character buffer */
56
} _mxml_fdbuf_t;
57 58


Michael R Sweet's avatar
Michael R Sweet committed
59 60 61 62
/*
 * Local functions...
 */

63 64
static int		mxml_add_char(int ch, char **ptr, char **buffer,
			              int *bufsize);
65 66
static int		mxml_fd_getc(void *p, int *encoding);
static int		mxml_fd_putc(int ch, void *p);
67 68
static int		mxml_fd_read(_mxml_fdbuf_t *buf);
static int		mxml_fd_write(_mxml_fdbuf_t *buf);
69 70
static int		mxml_file_getc(void *p, int *encoding);
static int		mxml_file_putc(int ch, void *p);
71
static int		mxml_get_entity(mxml_node_t *parent, void *p,
Michael R Sweet's avatar
Michael R Sweet committed
72
			                int *encoding,
73
					_mxml_getc_cb_t getc_cb);
74 75 76
static inline int	mxml_isspace(int ch)
			{
			  return (ch == ' ' || ch == '\t' || ch == '\r' ||
77
			          ch == '\n');
78
			}
79
static mxml_node_t	*mxml_load_data(mxml_node_t *top, void *p,
80 81 82
			                mxml_load_cb_t cb,
			                _mxml_getc_cb_t getc_cb,
                                        mxml_sax_cb_t sax_cb, void *sax_data);
83
static int		mxml_parse_element(mxml_node_t *node, void *p,
Michael R Sweet's avatar
Michael R Sweet committed
84
			                   int *encoding,
85
					   _mxml_getc_cb_t getc_cb);
Michael R Sweet's avatar
Michael R Sweet committed
86
static int		mxml_string_getc(void *p, int *encoding);
87
static int		mxml_string_putc(int ch, void *p);
88
static int		mxml_write_name(const char *s, void *p,
89
					_mxml_putc_cb_t putc_cb);
90
static int		mxml_write_node(mxml_node_t *node, void *p,
91
			                mxml_save_cb_t cb, int col,
92 93
					_mxml_putc_cb_t putc_cb,
					_mxml_global_t *global);
94
static int		mxml_write_string(const char *s, void *p,
95
					  _mxml_putc_cb_t putc_cb);
96
static int		mxml_write_ws(mxml_node_t *node, void *p,
97 98
			              mxml_save_cb_t cb, int ws,
				      int col, _mxml_putc_cb_t putc_cb);
99 100


101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
/*
 * 'mxmlLoadFd()' - Load a file descriptor into an XML node tree.
 *
 * The nodes in the specified file are added to the specified top node.
 * If no top node is provided, the XML file MUST be well-formed with a
 * single parent node like <?xml> for the entire file. The callback
 * function returns the value type that should be used for child nodes.
 * If MXML_NO_CALLBACK is specified then all child nodes will be either
 * MXML_ELEMENT or MXML_TEXT nodes.
 *
 * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
 * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
 * child nodes of the specified type.
 */

mxml_node_t *				/* O - First node or NULL if the file could not be read. */
117 118 119
mxmlLoadFd(mxml_node_t    *top,		/* I - Top node */
           int            fd,		/* I - File descriptor to read from */
           mxml_load_cb_t cb)		/* I - Callback function or MXML_NO_CALLBACK */
120
{
121
  _mxml_fdbuf_t	buf;			/* File descriptor buffer */
122 123 124 125 126 127 128 129 130 131 132 133 134 135


 /*
  * Initialize the file descriptor buffer...
  */

  buf.fd      = fd;
  buf.current = buf.buffer;
  buf.end     = buf.buffer;

 /*
  * Read the XML data...
  */

136
  return (mxml_load_data(top, &buf, cb, mxml_fd_getc, MXML_NO_CALLBACK, NULL));
137 138 139
}


140
/*
141
 * 'mxmlLoadFile()' - Load a file into an XML node tree.
142
 *
143 144 145
 * The nodes in the specified file are added to the specified top node.
 * If no top node is provided, the XML file MUST be well-formed with a
 * single parent node like <?xml> for the entire file. The callback
146 147 148
 * function returns the value type that should be used for child nodes.
 * If MXML_NO_CALLBACK is specified then all child nodes will be either
 * MXML_ELEMENT or MXML_TEXT nodes.
149 150 151 152
 *
 * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
 * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
 * child nodes of the specified type.
153 154
 */

155
mxml_node_t *				/* O - First node or NULL if the file could not be read. */
156 157 158
mxmlLoadFile(mxml_node_t    *top,	/* I - Top node */
             FILE           *fp,	/* I - File to read from */
             mxml_load_cb_t cb)		/* I - Callback function or MXML_NO_CALLBACK */
159
{
160 161 162 163
 /*
  * Read the XML data...
  */

164
  return (mxml_load_data(top, fp, cb, mxml_file_getc, MXML_NO_CALLBACK, NULL));
165
}
Michael R Sweet's avatar
Michael R Sweet committed
166 167 168


/*
169
 * 'mxmlLoadString()' - Load a string into an XML node tree.
170
 *
171 172 173
 * The nodes in the specified string are added to the specified top node.
 * If no top node is provided, the XML string MUST be well-formed with a
 * single parent node like <?xml> for the entire string. The callback
174 175 176
 * function returns the value type that should be used for child nodes.
 * If MXML_NO_CALLBACK is specified then all child nodes will be either
 * MXML_ELEMENT or MXML_TEXT nodes.
177 178 179 180
 *
 * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
 * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
 * child nodes of the specified type.
Michael R Sweet's avatar
Michael R Sweet committed
181 182
 */

183
mxml_node_t *				/* O - First node or NULL if the string has errors. */
184 185 186
mxmlLoadString(mxml_node_t    *top,	/* I - Top node */
               const char     *s,	/* I - String to load */
               mxml_load_cb_t cb)	/* I - Callback function or MXML_NO_CALLBACK */
187
{
188 189 190 191
 /*
  * Read the XML data...
  */

Michael R Sweet's avatar
Michael R Sweet committed
192
  return (mxml_load_data(top, (void *)&s, cb, mxml_string_getc, MXML_NO_CALLBACK,
193
                         NULL));
194 195 196
}


197
/*
198
 * 'mxmlSaveAllocString()' - Save an XML tree to an allocated string.
199 200 201 202 203 204
 *
 * This function returns a pointer to a string containing the textual
 * representation of the XML node tree.  The string should be freed
 * using the free() function when you are done with it.  NULL is returned
 * if the node would produce an empty string or if the string cannot be
 * allocated.
205 206 207 208 209 210
 *
 * The callback argument specifies a function that returns a whitespace
 * string or NULL before and after each element. If MXML_NO_CALLBACK
 * is specified, whitespace will only be added before MXML_TEXT nodes
 * with leading whitespace and before attribute names inside opening
 * element tags.
211 212 213
 */

char *					/* O - Allocated string or NULL */
214 215 216
mxmlSaveAllocString(
    mxml_node_t    *node,		/* I - Node to write */
    mxml_save_cb_t cb)			/* I - Whitespace callback or MXML_NO_CALLBACK */
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
{
  int	bytes;				/* Required bytes */
  char	buffer[8192];			/* Temporary buffer */
  char	*s;				/* Allocated string */


 /*
  * Write the node to the temporary buffer...
  */

  bytes = mxmlSaveString(node, buffer, sizeof(buffer), cb);

  if (bytes <= 0)
    return (NULL);

  if (bytes < (int)(sizeof(buffer) - 1))
  {
   /*
    * Node fit inside the buffer, so just duplicate that string and
    * return...
    */

    return (strdup(buffer));
  }

 /*
  * Allocate a buffer of the required size and save the node to the
  * new buffer...
  */

  if ((s = malloc(bytes + 1)) == NULL)
    return (NULL);

  mxmlSaveString(node, s, bytes + 1, cb);

 /*
  * Return the allocated string...
  */

  return (s);
}


260 261 262 263 264 265 266 267 268 269 270
/*
 * 'mxmlSaveFd()' - Save an XML tree to a file descriptor.
 *
 * The callback argument specifies a function that returns a whitespace
 * string or NULL before and after each element. If MXML_NO_CALLBACK
 * is specified, whitespace will only be added before MXML_TEXT nodes
 * with leading whitespace and before attribute names inside opening
 * element tags.
 */

int					/* O - 0 on success, -1 on error. */
271 272 273
mxmlSaveFd(mxml_node_t    *node,	/* I - Node to write */
           int            fd,		/* I - File descriptor to write to */
	   mxml_save_cb_t cb)		/* I - Whitespace callback or MXML_NO_CALLBACK */
274 275
{
  int		col;			/* Final column */
276
  _mxml_fdbuf_t	buf;			/* File descriptor buffer */
277 278
  _mxml_global_t *global = _mxml_global();
					/* Global data */
279 280 281 282 283 284 285 286


 /*
  * Initialize the file descriptor buffer...
  */

  buf.fd      = fd;
  buf.current = buf.buffer;
287
  buf.end     = buf.buffer + sizeof(buf.buffer);
288 289 290 291 292

 /*
  * Write the node...
  */

293
  if ((col = mxml_write_node(node, &buf, cb, 0, mxml_fd_putc, global)) < 0)
294 295 296 297 298 299 300 301 302 303 304 305 306 307
    return (-1);

  if (col > 0)
    if (mxml_fd_putc('\n', &buf) < 0)
      return (-1);

 /*
  * Flush and return...
  */

  return (mxml_fd_write(&buf));
}


308 309 310 311
/*
 * 'mxmlSaveFile()' - Save an XML tree to a file.
 *
 * The callback argument specifies a function that returns a whitespace
312
 * string or NULL before and after each element. If MXML_NO_CALLBACK
313 314 315 316 317 318
 * is specified, whitespace will only be added before MXML_TEXT nodes
 * with leading whitespace and before attribute names inside opening
 * element tags.
 */

int					/* O - 0 on success, -1 on error. */
319 320 321
mxmlSaveFile(mxml_node_t    *node,	/* I - Node to write */
             FILE           *fp,	/* I - File to write to */
	     mxml_save_cb_t cb)		/* I - Whitespace callback or MXML_NO_CALLBACK */
322 323
{
  int	col;				/* Final column */
324 325
  _mxml_global_t *global = _mxml_global();
					/* Global data */
326 327 328 329 330 331


 /*
  * Write the node...
  */

332
  if ((col = mxml_write_node(node, fp, cb, 0, mxml_file_putc, global)) < 0)
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
    return (-1);

  if (col > 0)
    if (putc('\n', fp) < 0)
      return (-1);

 /*
  * Return 0 (success)...
  */

  return (0);
}


/*
 * 'mxmlSaveString()' - Save an XML node tree to a string.
 *
 * This function returns the total number of bytes that would be
 * required for the string but only copies (bufsize - 1) characters
 * into the specified buffer.
353 354 355 356 357 358
 *
 * The callback argument specifies a function that returns a whitespace
 * string or NULL before and after each element. If MXML_NO_CALLBACK
 * is specified, whitespace will only be added before MXML_TEXT nodes
 * with leading whitespace and before attribute names inside opening
 * element tags.
359 360 361
 */

int					/* O - Size of string */
362 363 364 365
mxmlSaveString(mxml_node_t    *node,	/* I - Node to write */
               char           *buffer,	/* I - String buffer */
               int            bufsize,	/* I - Size of string buffer */
               mxml_save_cb_t cb)	/* I - Whitespace callback or MXML_NO_CALLBACK */
366
{
367 368
  int	col;				/* Final column */
  char	*ptr[2];			/* Pointers for putc_cb */
369 370
  _mxml_global_t *global = _mxml_global();
					/* Global data */
371 372 373 374 375 376 377 378 379


 /*
  * Write the node...
  */

  ptr[0] = buffer;
  ptr[1] = buffer + bufsize;

380
  if ((col = mxml_write_node(node, ptr, cb, 0, mxml_string_putc, global)) < 0)
381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
    return (-1);

  if (col > 0)
    mxml_string_putc('\n', ptr);

 /*
  * Nul-terminate the buffer...
  */

  if (ptr[0] >= ptr[1])
    buffer[bufsize - 1] = '\0';
  else
    ptr[0][0] = '\0';

 /*
  * Return the number of characters...
  */

399
  return (ptr[0] - buffer);
400 401 402
}


403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
/*
 * 'mxmlSAXLoadFd()' - Load a file descriptor into an XML node tree
 *                     using a SAX callback.
 *
 * The nodes in the specified file are added to the specified top node.
 * If no top node is provided, the XML file MUST be well-formed with a
 * single parent node like <?xml> for the entire file. The callback
 * function returns the value type that should be used for child nodes.
 * If MXML_NO_CALLBACK is specified then all child nodes will be either
 * MXML_ELEMENT or MXML_TEXT nodes.
 *
 * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
 * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
 * child nodes of the specified type.
 *
 * The SAX callback must call mxmlRetain() for any nodes that need to
 * be kept for later use. Otherwise, nodes are deleted when the parent
 * node is closed or after each data, comment, CDATA, or directive node.
 *
 * @since Mini-XML 2.3@
 */

mxml_node_t *				/* O - First node or NULL if the file could not be read. */
mxmlSAXLoadFd(mxml_node_t    *top,	/* I - Top node */
              int            fd,	/* I - File descriptor to read from */
              mxml_load_cb_t cb,	/* I - Callback function or MXML_NO_CALLBACK */
              mxml_sax_cb_t  sax_cb,	/* I - SAX callback or MXML_NO_CALLBACK */
              void           *sax_data)	/* I - SAX user data */
{
  _mxml_fdbuf_t	buf;			/* File descriptor buffer */


 /*
  * Initialize the file descriptor buffer...
  */

  buf.fd      = fd;
  buf.current = buf.buffer;
  buf.end     = buf.buffer;

 /*
  * Read the XML data...
  */

  return (mxml_load_data(top, &buf, cb, mxml_fd_getc, sax_cb, sax_data));
}


/*
 * 'mxmlSAXLoadFile()' - Load a file into an XML node tree
 *                       using a SAX callback.
 *
 * The nodes in the specified file are added to the specified top node.
 * If no top node is provided, the XML file MUST be well-formed with a
 * single parent node like <?xml> for the entire file. The callback
 * function returns the value type that should be used for child nodes.
 * If MXML_NO_CALLBACK is specified then all child nodes will be either
 * MXML_ELEMENT or MXML_TEXT nodes.
 *
 * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
 * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
 * child nodes of the specified type.
 *
 * The SAX callback must call mxmlRetain() for any nodes that need to
 * be kept for later use. Otherwise, nodes are deleted when the parent
 * node is closed or after each data, comment, CDATA, or directive node.
 *
 * @since Mini-XML 2.3@
 */

mxml_node_t *				/* O - First node or NULL if the file could not be read. */
mxmlSAXLoadFile(
    mxml_node_t    *top,		/* I - Top node */
    FILE           *fp,			/* I - File to read from */
    mxml_load_cb_t cb,			/* I - Callback function or MXML_NO_CALLBACK */
    mxml_sax_cb_t  sax_cb,		/* I - SAX callback or MXML_NO_CALLBACK */
    void           *sax_data)		/* I - SAX user data */
{
 /*
  * Read the XML data...
  */

  return (mxml_load_data(top, fp, cb, mxml_file_getc, sax_cb, sax_data));
}


/*
 * 'mxmlSAXLoadString()' - Load a string into an XML node tree
 *                         using a SAX callback.
 *
 * The nodes in the specified string are added to the specified top node.
 * If no top node is provided, the XML string MUST be well-formed with a
 * single parent node like <?xml> for the entire string. The callback
 * function returns the value type that should be used for child nodes.
 * If MXML_NO_CALLBACK is specified then all child nodes will be either
 * MXML_ELEMENT or MXML_TEXT nodes.
 *
 * The constants MXML_INTEGER_CALLBACK, MXML_OPAQUE_CALLBACK,
 * MXML_REAL_CALLBACK, and MXML_TEXT_CALLBACK are defined for loading
 * child nodes of the specified type.
 *
 * The SAX callback must call mxmlRetain() for any nodes that need to
 * be kept for later use. Otherwise, nodes are deleted when the parent
 * node is closed or after each data, comment, CDATA, or directive node.
 *
 * @since Mini-XML 2.3@
 */

mxml_node_t *				/* O - First node or NULL if the string has errors. */
mxmlSAXLoadString(
    mxml_node_t    *top,		/* I - Top node */
    const char     *s,			/* I - String to load */
    mxml_load_cb_t cb,			/* I - Callback function or MXML_NO_CALLBACK */
    mxml_sax_cb_t  sax_cb,		/* I - SAX callback or MXML_NO_CALLBACK */
    void           *sax_data)		/* I - SAX user data */
{
 /*
  * Read the XML data...
  */

Michael R Sweet's avatar
Michael R Sweet committed
523
  return (mxml_load_data(top, (void *)&s, cb, mxml_string_getc, sax_cb, sax_data));
524 525 526
}


527 528 529 530 531 532 533 534
/*
 * 'mxmlSetCustomHandlers()' - Set the handling functions for custom data.
 *
 * The load function accepts a node pointer and a data string and must
 * return 0 on success and non-zero on error.
 *
 * The save function accepts a node pointer and must return a malloc'd
 * string on success and NULL on error.
535
 *
536 537 538
 */

void
539 540 541
mxmlSetCustomHandlers(
    mxml_custom_load_cb_t load,		/* I - Load function */
    mxml_custom_save_cb_t save)		/* I - Save function */
542
{
543 544 545 546 547 548
  _mxml_global_t *global = _mxml_global();
					/* Global data */


  global->custom_load_cb = load;
  global->custom_save_cb = save;
549 550 551
}


Michael R Sweet's avatar
Michael R Sweet committed
552 553 554 555 556
/*
 * 'mxmlSetErrorCallback()' - Set the error message callback.
 */

void
557
mxmlSetErrorCallback(mxml_error_cb_t cb)/* I - Error callback function */
Michael R Sweet's avatar
Michael R Sweet committed
558
{
559 560 561 562 563
  _mxml_global_t *global = _mxml_global();
					/* Global data */


  global->error_cb = cb;
Michael R Sweet's avatar
Michael R Sweet committed
564 565 566
}


567
/*
568
 * 'mxmlSetWrapMargin()' - Set the wrap margin when saving XML data.
569
 *
570
 * Wrapping is disabled when "column" is 0.
571 572
 *
 * @since Mini-XML 2.3@
573 574 575
 */

void
576
mxmlSetWrapMargin(int column)		/* I - Column for wrapping, 0 to disable wrapping */
577
{
578 579 580 581
  _mxml_global_t *global = _mxml_global();
					/* Global data */


582
  global->wrap = column;
583 584 585
}


586 587 588 589 590 591 592 593 594 595 596 597 598
/*
 * 'mxml_add_char()' - Add a character to a buffer, expanding as needed.
 */

static int				/* O  - 0 on success, -1 on error */
mxml_add_char(int  ch,			/* I  - Character to add */
              char **bufptr,		/* IO - Current position in buffer */
	      char **buffer,		/* IO - Current buffer */
	      int  *bufsize)		/* IO - Current buffer size */
{
  char	*newbuffer;			/* New buffer value */


599
  if (*bufptr >= (*buffer + *bufsize - 4))
600 601 602 603 604 605 606 607 608 609 610 611 612 613
  {
   /*
    * Increase the size of the buffer...
    */

    if (*bufsize < 1024)
      (*bufsize) *= 2;
    else
      (*bufsize) += 1024;

    if ((newbuffer = realloc(*buffer, *bufsize)) == NULL)
    {
      free(*buffer);

Michael R Sweet's avatar
Michael R Sweet committed
614
      mxml_error("Unable to expand string buffer to %d bytes!", *bufsize);
615 616 617 618 619

      return (-1);
    }

    *bufptr = newbuffer + (*bufptr - *buffer);
620
    *buffer = newbuffer;
621 622
  }

623
  if (ch < 0x80)
624 625 626 627 628 629 630
  {
   /*
    * Single byte ASCII...
    */

    *(*bufptr)++ = ch;
  }
631
  else if (ch < 0x800)
632 633 634 635 636 637 638 639
  {
   /*
    * Two-byte UTF-8...
    */

    *(*bufptr)++ = 0xc0 | (ch >> 6);
    *(*bufptr)++ = 0x80 | (ch & 0x3f);
  }
640
  else if (ch < 0x10000)
641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660
  {
   /*
    * Three-byte UTF-8...
    */

    *(*bufptr)++ = 0xe0 | (ch >> 12);
    *(*bufptr)++ = 0x80 | ((ch >> 6) & 0x3f);
    *(*bufptr)++ = 0x80 | (ch & 0x3f);
  }
  else
  {
   /*
    * Four-byte UTF-8...
    */

    *(*bufptr)++ = 0xf0 | (ch >> 18);
    *(*bufptr)++ = 0x80 | ((ch >> 12) & 0x3f);
    *(*bufptr)++ = 0x80 | ((ch >> 6) & 0x3f);
    *(*bufptr)++ = 0x80 | (ch & 0x3f);
  }
661 662 663 664 665

  return (0);
}


666
/*
667
 * 'mxml_fd_getc()' - Read a character from a file descriptor.
668 669
 */

670 671 672
static int				/* O  - Character or EOF */
mxml_fd_getc(void *p,			/* I  - File descriptor buffer */
             int  *encoding)		/* IO - Encoding */
673
{
674
  _mxml_fdbuf_t	*buf;			/* File descriptor buffer */
675 676
  int		ch,			/* Current character */
		temp;			/* Temporary character */
677 678


679 680 681
 /*
  * Grab the next character in the buffer...
  */
682

683
  buf = (_mxml_fdbuf_t *)p;
684

685 686 687
  if (buf->current >= buf->end)
    if (mxml_fd_read(buf) < 0)
      return (EOF);
688

689 690 691
  ch = *(buf->current)++;

  switch (*encoding)
692
  {
693 694 695 696 697 698
    case ENCODE_UTF8 :
       /*
	* Got a UTF-8 character; convert UTF-8 to Unicode and return...
	*/

	if (!(ch & 0x80))
Michael R Sweet's avatar
Michael R Sweet committed
699 700 701 702 703 704 705 706 707 708 709 710
	{
#if DEBUG > 1
          printf("mxml_fd_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
#endif /* DEBUG > 1 */

	  if (mxml_bad_char(ch))
	  {
	    mxml_error("Bad control character 0x%02x not allowed by XML standard!",
        	       ch);
	    return (EOF);
	  }

711
	  return (ch);
Michael R Sweet's avatar
Michael R Sweet committed
712 713
        }
	else if (ch == 0xfe)
714 715 716 717 718 719 720 721 722 723
	{
	 /*
	  * UTF-16 big-endian BOM?
	  */

	  if (buf->current >= buf->end)
	    if (mxml_fd_read(buf) < 0)
	      return (EOF);

	  ch = *(buf->current)++;
724

725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
	  if (ch != 0xff)
	    return (EOF);

	  *encoding = ENCODE_UTF16BE;

	  return (mxml_fd_getc(p, encoding));
	}
	else if (ch == 0xff)
	{
	 /*
	  * UTF-16 little-endian BOM?
	  */

	  if (buf->current >= buf->end)
	    if (mxml_fd_read(buf) < 0)
	      return (EOF);

	  ch = *(buf->current)++;
743

744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766
	  if (ch != 0xfe)
	    return (EOF);

	  *encoding = ENCODE_UTF16LE;

	  return (mxml_fd_getc(p, encoding));
	}
	else if ((ch & 0xe0) == 0xc0)
	{
	 /*
	  * Two-byte value...
	  */

	  if (buf->current >= buf->end)
	    if (mxml_fd_read(buf) < 0)
	      return (EOF);

	  temp = *(buf->current)++;

	  if ((temp & 0xc0) != 0x80)
	    return (EOF);

	  ch = ((ch & 0x1f) << 6) | (temp & 0x3f);
767 768

	  if (ch < 0x80)
769 770
	  {
	    mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
771
	    return (EOF);
772
	  }
773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800
	}
	else if ((ch & 0xf0) == 0xe0)
	{
	 /*
	  * Three-byte value...
	  */

	  if (buf->current >= buf->end)
	    if (mxml_fd_read(buf) < 0)
	      return (EOF);

	  temp = *(buf->current)++;

	  if ((temp & 0xc0) != 0x80)
	    return (EOF);

	  ch = ((ch & 0x0f) << 6) | (temp & 0x3f);

	  if (buf->current >= buf->end)
	    if (mxml_fd_read(buf) < 0)
	      return (EOF);

	  temp = *(buf->current)++;

	  if ((temp & 0xc0) != 0x80)
	    return (EOF);

	  ch = (ch << 6) | (temp & 0x3f);
801 802

	  if (ch < 0x800)
803 804 805 806 807
	  {
	    mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
	    return (EOF);
	  }

808 809 810 811 812 813
         /*
	  * Ignore (strip) Byte Order Mark (BOM)...
	  */

	  if (ch == 0xfeff)
	    return (mxml_fd_getc(p, encoding));
814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852
	}
	else if ((ch & 0xf8) == 0xf0)
	{
	 /*
	  * Four-byte value...
	  */

	  if (buf->current >= buf->end)
	    if (mxml_fd_read(buf) < 0)
	      return (EOF);

	  temp = *(buf->current)++;

	  if ((temp & 0xc0) != 0x80)
	    return (EOF);

	  ch = ((ch & 0x07) << 6) | (temp & 0x3f);

	  if (buf->current >= buf->end)
	    if (mxml_fd_read(buf) < 0)
	      return (EOF);

	  temp = *(buf->current)++;

	  if ((temp & 0xc0) != 0x80)
	    return (EOF);

	  ch = (ch << 6) | (temp & 0x3f);

	  if (buf->current >= buf->end)
	    if (mxml_fd_read(buf) < 0)
	      return (EOF);

	  temp = *(buf->current)++;

	  if ((temp & 0xc0) != 0x80)
	    return (EOF);

	  ch = (ch << 6) | (temp & 0x3f);
853 854

	  if (ch < 0x10000)
855 856
	  {
	    mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
857
	    return (EOF);
858
	  }
859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876
	}
	else
	  return (EOF);
	break;

    case ENCODE_UTF16BE :
       /*
        * Read UTF-16 big-endian char...
	*/

	if (buf->current >= buf->end)
	  if (mxml_fd_read(buf) < 0)
	    return (EOF);

	temp = *(buf->current)++;

	ch = (ch << 8) | temp;

Michael R Sweet's avatar
Michael R Sweet committed
877 878 879 880 881 882 883
	if (mxml_bad_char(ch))
	{
	  mxml_error("Bad control character 0x%02x not allowed by XML standard!",
        	     ch);
	  return (EOF);
	}
        else if (ch >= 0xd800 && ch <= 0xdbff)
884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924
	{
	 /*
	  * Multi-word UTF-16 char...
	  */

          int lch;

	  if (buf->current >= buf->end)
	    if (mxml_fd_read(buf) < 0)
	      return (EOF);

	  lch = *(buf->current)++;

	  if (buf->current >= buf->end)
	    if (mxml_fd_read(buf) < 0)
	      return (EOF);

	  temp = *(buf->current)++;

	  lch = (lch << 8) | temp;

          if (lch < 0xdc00 || lch >= 0xdfff)
	    return (EOF);

          ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
	}
	break;

    case ENCODE_UTF16LE :
       /*
        * Read UTF-16 little-endian char...
	*/

	if (buf->current >= buf->end)
	  if (mxml_fd_read(buf) < 0)
	    return (EOF);

	temp = *(buf->current)++;

	ch |= (temp << 8);

Michael R Sweet's avatar
Michael R Sweet committed
925 926 927 928 929 930 931
        if (mxml_bad_char(ch))
	{
	  mxml_error("Bad control character 0x%02x not allowed by XML standard!",
        	     ch);
	  return (EOF);
	}
        else if (ch >= 0xd800 && ch <= 0xdbff)
932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958
	{
	 /*
	  * Multi-word UTF-16 char...
	  */

          int lch;

	  if (buf->current >= buf->end)
	    if (mxml_fd_read(buf) < 0)
	      return (EOF);

	  lch = *(buf->current)++;

	  if (buf->current >= buf->end)
	    if (mxml_fd_read(buf) < 0)
	      return (EOF);

	  temp = *(buf->current)++;

	  lch |= (temp << 8);

          if (lch < 0xdc00 || lch >= 0xdfff)
	    return (EOF);

          ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
	}
	break;
959 960
  }

Michael R Sweet's avatar
Michael R Sweet committed
961 962 963 964
#if DEBUG > 1
  printf("mxml_fd_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
#endif /* DEBUG > 1 */

965 966 967 968 969 970 971 972 973 974 975 976
  return (ch);
}


/*
 * 'mxml_fd_putc()' - Write a character to a file descriptor.
 */

static int				/* O - 0 on success, -1 on error */
mxml_fd_putc(int  ch,			/* I - Character */
             void *p)			/* I - File descriptor buffer */
{
977
  _mxml_fdbuf_t	*buf;			/* File descriptor buffer */
978 979 980


 /*
981
  * Flush the write buffer as needed...
982 983
  */

984
  buf = (_mxml_fdbuf_t *)p;
985 986 987 988 989

  if (buf->current >= buf->end)
    if (mxml_fd_write(buf) < 0)
      return (-1);

990
  *(buf->current)++ = ch;
991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004

 /*
  * Return successfully...
  */

  return (0);
}


/*
 * 'mxml_fd_read()' - Read a buffer of data from a file descriptor.
 */

static int				/* O - 0 on success, -1 on error */
1005
mxml_fd_read(_mxml_fdbuf_t *buf)		/* I - File descriptor buffer */
1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021
{
  int	bytes;				/* Bytes read... */


 /*
  * Range check input...
  */

  if (!buf)
    return (-1);

 /*
  * Read from the file descriptor...
  */

  while ((bytes = read(buf->fd, buf->buffer, sizeof(buf->buffer))) < 0)
1022
#ifdef EINTR
1023
    if (errno != EAGAIN && errno != EINTR)
1024 1025 1026
#else
    if (errno != EAGAIN)
#endif /* EINTR */
1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
      return (-1);

  if (bytes == 0)
    return (-1);

 /*
  * Update the pointers and return success...
  */

  buf->current = buf->buffer;
  buf->end     = buf->buffer + bytes;

  return (0);
}


/*
 * 'mxml_fd_write()' - Write a buffer of data to a file descriptor.
 */

static int				/* O - 0 on success, -1 on error */
1048
mxml_fd_write(_mxml_fdbuf_t *buf)	/* I - File descriptor buffer */
1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
{
  int		bytes;			/* Bytes written */
  unsigned char	*ptr;			/* Pointer into buffer */


 /*
  * Range check...
  */

  if (!buf)
    return (-1);

 /*
  * Return 0 if there is nothing to write...
  */

  if (buf->current == buf->buffer)
    return (0);

 /*
  * Loop until we have written everything...
  */

  for (ptr = buf->buffer; ptr < buf->current; ptr += bytes)
    if ((bytes = write(buf->fd, ptr, buf->current - ptr)) < 0)
      return (-1);

 /*
  * All done, reset pointers and return success...
  */

  buf->current = buf->buffer;

  return (0);
1083 1084 1085
}


1086 1087 1088 1089
/*
 * 'mxml_file_getc()' - Get a character from a file.
 */

Michael R Sweet's avatar
Michael R Sweet committed
1090 1091 1092
static int				/* O  - Character or EOF */
mxml_file_getc(void *p,			/* I  - Pointer to file */
               int  *encoding)		/* IO - Encoding */
1093
{
1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
  int	ch,				/* Character from file */
	temp;				/* Temporary character */
  FILE	*fp;				/* Pointer to file */


 /*
  * Read a character from the file and see if it is EOF or ASCII...
  */

  fp = (FILE *)p;
  ch = getc(fp);

Michael R Sweet's avatar
Michael R Sweet committed
1106 1107
  if (ch == EOF)
    return (EOF);
1108

Michael R Sweet's avatar
Michael R Sweet committed
1109
  switch (*encoding)
1110
  {
Michael R Sweet's avatar
Michael R Sweet committed
1111 1112 1113 1114
    case ENCODE_UTF8 :
       /*
	* Got a UTF-8 character; convert UTF-8 to Unicode and return...
	*/
1115

Michael R Sweet's avatar
Michael R Sweet committed
1116
	if (!(ch & 0x80))
1117
	{
Michael R Sweet's avatar
Michael R Sweet committed
1118 1119 1120 1121 1122 1123 1124
	  if (mxml_bad_char(ch))
	  {
	    mxml_error("Bad control character 0x%02x not allowed by XML standard!",
        	       ch);
	    return (EOF);
	  }

1125 1126 1127 1128
#if DEBUG > 1
          printf("mxml_file_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
#endif /* DEBUG > 1 */

Michael R Sweet's avatar
Michael R Sweet committed
1129
	  return (ch);
1130 1131
        }
	else if (ch == 0xfe)
Michael R Sweet's avatar
Michael R Sweet committed
1132 1133 1134 1135
	{
	 /*
	  * UTF-16 big-endian BOM?
	  */
1136

Michael R Sweet's avatar
Michael R Sweet committed
1137 1138 1139 1140 1141
          ch = getc(fp);
	  if (ch != 0xff)
	    return (EOF);

	  *encoding = ENCODE_UTF16BE;
1142

Michael R Sweet's avatar
Michael R Sweet committed
1143 1144 1145 1146 1147 1148 1149
	  return (mxml_file_getc(p, encoding));
	}
	else if (ch == 0xff)
	{
	 /*
	  * UTF-16 little-endian BOM?
	  */
1150

Michael R Sweet's avatar
Michael R Sweet committed
1151 1152 1153
          ch = getc(fp);
	  if (ch != 0xfe)
	    return (EOF);
1154

Michael R Sweet's avatar
Michael R Sweet committed
1155
	  *encoding = ENCODE_UTF16LE;
1156

Michael R Sweet's avatar
Michael R Sweet committed
1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168
	  return (mxml_file_getc(p, encoding));
	}
	else if ((ch & 0xe0) == 0xc0)
	{
	 /*
	  * Two-byte value...
	  */

	  if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
	    return (EOF);

	  ch = ((ch & 0x1f) << 6) | (temp & 0x3f);
1169 1170

	  if (ch < 0x80)
1171 1172
	  {
	    mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
1173
	    return (EOF);
1174
	  }
Michael R Sweet's avatar
Michael R Sweet committed
1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190
	}
	else if ((ch & 0xf0) == 0xe0)
	{
	 /*
	  * Three-byte value...
	  */

	  if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
	    return (EOF);

	  ch = ((ch & 0x0f) << 6) | (temp & 0x3f);

	  if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
	    return (EOF);

	  ch = (ch << 6) | (temp & 0x3f);
1191 1192

	  if (ch < 0x800)
1193 1194 1195 1196 1197
	  {
	    mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
	    return (EOF);
	  }

1198 1199 1200 1201 1202 1203
         /*
	  * Ignore (strip) Byte Order Mark (BOM)...
	  */

	  if (ch == 0xfeff)
	    return (mxml_file_getc(p, encoding));
Michael R Sweet's avatar
Michael R Sweet committed
1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224
	}
	else if ((ch & 0xf8) == 0xf0)
	{
	 /*
	  * Four-byte value...
	  */

	  if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
	    return (EOF);

	  ch = ((ch & 0x07) << 6) | (temp & 0x3f);

	  if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
	    return (EOF);

	  ch = (ch << 6) | (temp & 0x3f);

	  if ((temp = getc(fp)) == EOF || (temp & 0xc0) != 0x80)
	    return (EOF);

	  ch = (ch << 6) | (temp & 0x3f);
1225 1226

	  if (ch < 0x10000)
1227 1228
	  {
	    mxml_error("Invalid UTF-8 sequence for character 0x%04x!", ch);
1229
	    return (EOF);
1230
	  }
Michael R Sweet's avatar
Michael R Sweet committed
1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241
	}
	else
	  return (EOF);
	break;

    case ENCODE_UTF16BE :
       /*
        * Read UTF-16 big-endian char...
	*/

	ch = (ch << 8) | getc(fp);
1242

Michael R Sweet's avatar
Michael R Sweet committed
1243 1244 1245 1246 1247 1248 1249
	if (mxml_bad_char(ch))
	{
	  mxml_error("Bad control character 0x%02x not allowed by XML standard!",
        	     ch);
	  return (EOF);
	}
        else if (ch >= 0xd800 && ch <= 0xdbff)
Michael R Sweet's avatar
Michael R Sweet committed
1250 1251 1252 1253
	{
	 /*
	  * Multi-word UTF-16 char...
	  */
1254

1255 1256
          int lch = getc(fp);
          lch = (lch << 8) | getc(fp);
1257

1258
          if (lch < 0xdc00 || lch >= 0xdfff)
Michael R Sweet's avatar
Michael R Sweet committed
1259
	    return (EOF);
1260

Michael R Sweet's avatar
Michael R Sweet committed
1261 1262 1263
          ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
	}
	break;
1264

Michael R Sweet's avatar
Michael R Sweet committed
1265 1266 1267 1268
    case ENCODE_UTF16LE :
       /*
        * Read UTF-16 little-endian char...
	*/
1269

Michael R Sweet's avatar
Michael R Sweet committed
1270 1271
	ch |= (getc(fp) << 8);

Michael R Sweet's avatar
Michael R Sweet committed
1272 1273 1274 1275 1276 1277 1278
        if (mxml_bad_char(ch))
	{
	  mxml_error("Bad control character 0x%02x not allowed by XML standard!",
        	     ch);
	  return (EOF);
	}
        else if (ch >= 0xd800 && ch <= 0xdbff)
Michael R Sweet's avatar
Michael R Sweet committed
1279 1280 1281 1282 1283
	{
	 /*
	  * Multi-word UTF-16 char...
	  */

1284 1285
          int lch = getc(fp);
          lch |= (getc(fp) << 8);
Michael R Sweet's avatar
Michael R Sweet committed
1286

1287
          if (lch < 0xdc00 || lch >= 0xdfff)
Michael R Sweet's avatar
Michael R Sweet committed
1288 1289 1290 1291 1292
	    return (EOF);

          ch = (((ch & 0x3ff) << 10) | (lch & 0x3ff)) + 0x10000;
	}
	break;
1293 1294
  }

1295 1296 1297 1298
#if DEBUG > 1
  printf("mxml_file_getc: %c (0x%04x)\n", ch < ' ' ? '.' : ch, ch);
#endif /* DEBUG > 1 */

1299
  return (ch);
1300 1301 1302
}


1303 1304 1305 1306 1307 1308 1309 1310
/*
 * 'mxml_file_putc()' - Write a character to a file.
 */

static int				/* O - 0 on success, -1 on failure */
mxml_file_putc(int  ch,			/* I - Character to write */
               void *p)			/* I - Pointer to file */
{
1311
  return (putc(ch, (FILE *)p) == EOF ? -1 : 0);
1312 1313 1314
}


1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353
/*
 * 'mxml_get_entity()' - Get the character corresponding to an entity...
 */

static int				/* O  - Character value or EOF on error */
mxml_get_entity(mxml_node_t *parent,	/* I  - Parent node */
		void        *p,		/* I  - Pointer to source */
		int         *encoding,	/* IO - Character encoding */
                int         (*getc_cb)(void *, int *))
					/* I  - Get character function */
{
  int	ch;				/* Current character */
  char	entity[64],			/* Entity string */
	*entptr;			/* Pointer into entity */


  entptr = entity;

  while ((ch = (*getc_cb)(p, encoding)) != EOF)
    if (ch > 126 || (!isalnum(ch) && ch != '#'))
      break;
    else if (entptr < (entity + sizeof(entity) - 1))
      *entptr++ = ch;
    else
    {
      mxml_error("Entity name too long under parent <%s>!",
	         parent ? parent->value.element.name : "null");
      break;
    }

  *entptr = '\0';

  if (ch != ';')
  {
    mxml_error("Character entity \"%s\" not terminated under parent <%s>!",
	       entity, parent ? parent->value.element.name : "null");
    return (EOF);
  }

1354
  if (entity[0] == '#')
1355
  {
1356 1357
    if (entity[1] == 'x')
      ch = strtol(entity + 2, NULL, 16);
1358
    else
1359
      ch = strtol(entity + 1, NULL, 10);
1360 1361 1362 1363 1364
  }
  else if ((ch = mxmlEntityGetValue(entity)) < 0)
    mxml_error("Entity name \"%s;\" not supported under parent <%s>!",
	       entity, parent ? parent->value.element.name : "null");

Michael R Sweet's avatar
Michael R Sweet committed
1365 1366 1367 1368 1369 1370 1371
  if (mxml_bad_char(ch))
  {
    mxml_error("Bad control character 0x%02x under parent <%s> not allowed by XML standard!",
               ch, parent ? parent->value.element.name : "null");
    return (EOF);
  }

1372 1373 1374 1375
  return (ch);
}


1376 1377 1378 1379 1380
/*
 * 'mxml_load_data()' - Load data into an XML node tree.
 */

static mxml_node_t *			/* O - First node or NULL if the file could not be read. */
1381 1382 1383 1384 1385 1386 1387
mxml_load_data(
    mxml_node_t     *top,		/* I - Top node */
    void            *p,			/* I - Pointer to data */
    mxml_load_cb_t  cb,			/* I - Callback function or MXML_NO_CALLBACK */
    _mxml_getc_cb_t getc_cb,		/* I - Read function */
    mxml_sax_cb_t   sax_cb,		/* I - SAX callback or MXML_NO_CALLBACK */
    void            *sax_data)		/* I - SAX user data */
Michael R Sweet's avatar
Michael R Sweet committed
1388 1389
{
  mxml_node_t	*node,			/* Current node */
1390
		*first,			/* First node added */
Michael R Sweet's avatar
Michael R Sweet committed
1391 1392 1393
		*parent;		/* Current parent node */
  int		ch,			/* Character from file */
		whitespace;		/* Non-zero if whitespace seen */
1394
  char		*buffer,		/* String buffer */
Michael R Sweet's avatar
Michael R Sweet committed
1395
		*bufptr;		/* Pointer into buffer */
1396
  int		bufsize;		/* Size of buffer */
Michael R Sweet's avatar
Michael R Sweet committed
1397
  mxml_type_t	type;			/* Current node type */
Michael R Sweet's avatar
Michael R Sweet committed
1398
  int		encoding;		/* Character encoding */
1399 1400
  _mxml_global_t *global = _mxml_global();
					/* Global data */
1401 1402 1403 1404 1405 1406 1407 1408 1409
  static const char * const types[] =	/* Type strings... */
		{
		  "MXML_ELEMENT",	/* XML element with attributes */
		  "MXML_INTEGER",	/* Integer value */
		  "MXML_OPAQUE",	/* Opaque string */
		  "MXML_REAL",		/* Real value */
		  "MXML_TEXT",		/* Text fragment */
		  "MXML_CUSTOM"		/* Custom data */
		};
Michael R Sweet's avatar
Michael R Sweet committed
1410 1411 1412 1413 1414 1415


 /*
  * Read elements and other nodes from the file...
  */

1416 1417
  if ((buffer = malloc(64)) == NULL)
  {
Michael R Sweet's avatar
Michael R Sweet committed
1418
    mxml_error("Unable to allocate string buffer!");
1419 1420 1421 1422
    return (NULL);
  }

  bufsize    = 64;
Michael R Sweet's avatar
Michael R Sweet committed
1423 1424
  bufptr     = buffer;
  parent     = top;
1425
  first      = NULL;
Michael R Sweet's avatar
Michael R Sweet committed
1426
  whitespace = 0;
Michael R Sweet's avatar
Michael R Sweet committed
1427
  encoding   = ENCODE_UTF8;
Michael R Sweet's avatar
Michael R Sweet committed
1428 1429 1430

  if (cb && parent)
    type = (*cb)(parent);
1431
  else if (parent)
Michael R Sweet's avatar
Michael R Sweet committed
1432
    type = MXML_TEXT;
1433 1434
  else
    type = MXML_IGNORE;
Michael R Sweet's avatar
Michael R Sweet committed
1435

Michael R Sweet's avatar
Michael R Sweet committed
1436
  while ((ch = (*getc_cb)(p, &encoding)) != EOF)
Michael R Sweet's avatar
Michael R Sweet committed
1437
  {
1438
    if ((ch == '<' ||
1439
         (mxml_isspace(ch) && type != MXML_OPAQUE && type != MXML_CUSTOM)) &&
1440
        bufptr > buffer)
Michael R Sweet's avatar
Michael R Sweet committed
1441 1442 1443 1444 1445 1446 1447 1448 1449 1450
    {
     /*
      * Add a new value node...
      */

      *bufptr = '\0';

      switch (type)
      {
	case MXML_INTEGER :
1451
            node = mxmlNewInteger(parent, strtol(buffer, &bufptr, 0));
Michael R Sweet's avatar
Michael R Sweet committed
1452 1453 1454 1455 1456 1457 1458
	    break;

	case MXML_OPAQUE :
            node = mxmlNewOpaque(parent, buffer);
	    break;

	case MXML_REAL :
1459
            node = mxmlNewReal(parent, strtod(buffer, &bufptr));
Michael R Sweet's avatar
Michael R Sweet committed
1460 1461 1462 1463 1464 1465
	    break;

	case MXML_TEXT :
            node = mxmlNewText(parent, whitespace, buffer);
	    break;

1466
	case MXML_CUSTOM :
1467
	    if (global->custom_load_cb)
1468 1469 1470 1471 1472 1473 1474
	    {
	     /*
	      * Use the callback to fill in the custom data...
	      */

              node = mxmlNewCustom(parent, NULL, NULL);

1475
	      if ((*global->custom_load_cb)(node, buffer))
1476 1477 1478 1479 1480 1481 1482 1483 1484
	      {
	        mxml_error("Bad custom value '%s' in parent <%s>!",
		           buffer, parent ? parent->value.element.name : "null");
		mxmlDelete(node);
		node = NULL;
	      }
	      break;
	    }

1485
        default : /* Ignore... */
Michael R Sweet's avatar
Michael R Sweet committed
1486 1487
	    node = NULL;
	    break;
1488
      }
Michael R Sweet's avatar
Michael R Sweet committed
1489

1490 1491 1492 1493 1494 1495
      if (*bufptr)
      {
       /*
        * Bad integer/real number value...
	*/

Michael R Sweet's avatar
Michael R Sweet committed
1496 1497 1498
        mxml_error("Bad %s value '%s' in parent <%s>!",
	           type == MXML_INTEGER ? "integer" : "real", buffer,
		   parent ? parent->value.element.name : "null");
1499 1500 1501 1502
	break;
      }

      bufptr     = buffer;
1503
      whitespace = mxml_isspace(ch) && type == MXML_TEXT;
Michael R Sweet's avatar
Michael R Sweet committed
1504

1505
      if (!node && type != MXML_IGNORE)
Michael R Sweet's avatar
Michael R Sweet committed
1506 1507
      {
       /*
1508
	* Print error and return...
Michael R Sweet's avatar
Michael R Sweet committed
1509 1510
	*/

1511 1512 1513
	mxml_error("Unable to add value node of type %s to parent <%s>!",
	           types[type], parent ? parent->value.element.name : "null");
	goto error;
Michael R Sweet's avatar
Michael R Sweet committed
1514
      }
1515

1516 1517 1518 1519 1520 1521 1522 1523
      if (sax_cb)
      {
        (*sax_cb)(node, MXML_SAX_DATA, sax_data);

        if (!mxmlRelease(node))
          node = NULL;
      }

1524
      if (!first && node)
1525
        first = node;
Michael R Sweet's avatar
Michael R Sweet committed
1526
    }
1527
    else if (mxml_isspace(ch) && type == MXML_TEXT)
1528 1529 1530
      whitespace = 1;

   /*
1531 1532
    * Add lone whitespace node if we have an element and existing
    * whitespace...
1533 1534 1535 1536
    */

    if (ch == '<' && whitespace && type == MXML_TEXT)
    {
1537
      if (parent)
1538
      {
1539
	node = mxmlNewText(parent, whitespace, "");
1540

1541 1542 1543
	if (sax_cb)
	{
	  (*sax_cb)(node, MXML_SAX_DATA, sax_data);
1544

1545 1546 1547 1548 1549 1550 1551
	  if (!mxmlRelease(node))
	    node = NULL;
	}

	if (!first && node)
	  first = node;
      }
1552

1553
      whitespace = 0;
1554
    }
Michael R Sweet's avatar
Michael R Sweet committed
1555 1556 1557 1558 1559 1560 1561 1562 1563

    if (ch == '<')
    {
     /*
      * Start of open/close tag...
      */

      bufptr = buffer;

Michael R Sweet's avatar
Michael R Sweet committed
1564
      while ((ch = (*getc_cb)(p, &encoding)) != EOF)
1565
        if (mxml_isspace(ch) || ch == '>' || (ch == '/' && bufptr > buffer))
Michael R Sweet's avatar
Michael R Sweet committed
1566
	  break;
1567 1568 1569 1570 1571
	else if (ch == '<')
	{
	  mxml_error("Bare < in element!");
	  goto error;
	}
1572
	else if (ch == '&')
1573
	{
Michael R Sweet's avatar
Michael R Sweet committed
1574
	  if ((ch = mxml_get_entity(parent, p, &encoding, getc_cb)) == EOF)
1575