parser.c revision f8e3db0445a1bc8cfe3f77326b07ec161482caa2
1/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 *            implemented on top of the SAX interfaces
4 *
5 * References:
6 *   The XML specification:
7 *     http://www.w3.org/TR/REC-xml
8 *   Original 1.0 version:
9 *     http://www.w3.org/TR/1998/REC-xml-19980210
10 *   XML second edition working draft
11 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33#define IN_LIBXML
34#include "libxml.h"
35
36#if defined(WIN32) && !defined (__CYGWIN__)
37#define XML_DIR_SEP '\\'
38#else
39#define XML_DIR_SEP '/'
40#endif
41
42#include <stdlib.h>
43#include <limits.h>
44#include <string.h>
45#include <stdarg.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/threads.h>
48#include <libxml/globals.h>
49#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
58#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
61#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
65#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
83#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
86
87#include "buf.h"
88#include "enc.h"
89
90static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
93static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95	                  const xmlChar *base, xmlParserCtxtPtr pctx);
96
97/************************************************************************
98 *									*
99 *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
100 *									*
101 ************************************************************************/
102
103#define XML_PARSER_BIG_ENTITY 1000
104#define XML_PARSER_LOT_ENTITY 5000
105
106/*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 *    replacement over the size in byte of the input indicates that you have
109 *    and eponential behaviour. A value of 10 correspond to at least 3 entity
110 *    replacement per byte of input.
111 */
112#define XML_PARSER_NON_LINEAR 10
113
114/*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123static int
124xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
125                     xmlEntityPtr ent)
126{
127    size_t consumed = 0;
128
129    if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130        return (0);
131    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132        return (1);
133    if (size != 0) {
134        /*
135         * Do the check based on the replacement size of the entity
136         */
137        if (size < XML_PARSER_BIG_ENTITY)
138	    return(0);
139
140        /*
141         * A limit on the amount of text data reasonably used
142         */
143        if (ctxt->input != NULL) {
144            consumed = ctxt->input->consumed +
145                (ctxt->input->cur - ctxt->input->base);
146        }
147        consumed += ctxt->sizeentities;
148
149        if ((size < XML_PARSER_NON_LINEAR * consumed) &&
150	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
151            return (0);
152    } else if (ent != NULL) {
153        /*
154         * use the number of parsed entities in the replacement
155         */
156        size = ent->checked;
157
158        /*
159         * The amount of data parsed counting entities size only once
160         */
161        if (ctxt->input != NULL) {
162            consumed = ctxt->input->consumed +
163                (ctxt->input->cur - ctxt->input->base);
164        }
165        consumed += ctxt->sizeentities;
166
167        /*
168         * Check the density of entities for the amount of data
169	 * knowing an entity reference will take at least 3 bytes
170         */
171        if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
172            return (0);
173    } else {
174        /*
175         * strange we got no data for checking just return
176         */
177        return (0);
178    }
179
180    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
181    return (1);
182}
183
184/**
185 * xmlParserMaxDepth:
186 *
187 * arbitrary depth limit for the XML documents that we allow to
188 * process. This is not a limitation of the parser but a safety
189 * boundary feature. It can be disabled with the XML_PARSE_HUGE
190 * parser option.
191 */
192unsigned int xmlParserMaxDepth = 256;
193
194
195
196#define SAX2 1
197#define XML_PARSER_BIG_BUFFER_SIZE 300
198#define XML_PARSER_BUFFER_SIZE 100
199#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
200
201/**
202 * XML_PARSER_CHUNK_SIZE
203 *
204 * When calling GROW that's the minimal amount of data
205 * the parser expected to have received. It is not a hard
206 * limit but an optimization when reading strings like Names
207 * It is not strictly needed as long as inputs available characters
208 * are followed by 0, which should be provided by the I/O level
209 */
210#define XML_PARSER_CHUNK_SIZE 100
211
212/*
213 * List of XML prefixed PI allowed by W3C specs
214 */
215
216static const char *xmlW3CPIs[] = {
217    "xml-stylesheet",
218    "xml-model",
219    NULL
220};
221
222
223/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
224static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
225                                              const xmlChar **str);
226
227static xmlParserErrors
228xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
229	              xmlSAXHandlerPtr sax,
230		      void *user_data, int depth, const xmlChar *URL,
231		      const xmlChar *ID, xmlNodePtr *list);
232
233static int
234xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
235                          const char *encoding);
236#ifdef LIBXML_LEGACY_ENABLED
237static void
238xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
239                      xmlNodePtr lastNode);
240#endif /* LIBXML_LEGACY_ENABLED */
241
242static xmlParserErrors
243xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
244		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
245
246static int
247xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
248
249/************************************************************************
250 *									*
251 *		Some factorized error routines				*
252 *									*
253 ************************************************************************/
254
255/**
256 * xmlErrAttributeDup:
257 * @ctxt:  an XML parser context
258 * @prefix:  the attribute prefix
259 * @localname:  the attribute localname
260 *
261 * Handle a redefinition of attribute error
262 */
263static void
264xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
265                   const xmlChar * localname)
266{
267    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
268        (ctxt->instate == XML_PARSER_EOF))
269	return;
270    if (ctxt != NULL)
271	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
272
273    if (prefix == NULL)
274        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
275                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
276                        (const char *) localname, NULL, NULL, 0, 0,
277                        "Attribute %s redefined\n", localname);
278    else
279        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
280                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
281                        (const char *) prefix, (const char *) localname,
282                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
283                        localname);
284    if (ctxt != NULL) {
285	ctxt->wellFormed = 0;
286	if (ctxt->recovery == 0)
287	    ctxt->disableSAX = 1;
288    }
289}
290
291/**
292 * xmlFatalErr:
293 * @ctxt:  an XML parser context
294 * @error:  the error number
295 * @extra:  extra information string
296 *
297 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
298 */
299static void
300xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
301{
302    const char *errmsg;
303    char errstr[129] = "";
304
305    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
306        (ctxt->instate == XML_PARSER_EOF))
307	return;
308    switch (error) {
309        case XML_ERR_INVALID_HEX_CHARREF:
310            errmsg = "CharRef: invalid hexadecimal value";
311            break;
312        case XML_ERR_INVALID_DEC_CHARREF:
313            errmsg = "CharRef: invalid decimal value";
314            break;
315        case XML_ERR_INVALID_CHARREF:
316            errmsg = "CharRef: invalid value";
317            break;
318        case XML_ERR_INTERNAL_ERROR:
319            errmsg = "internal error";
320            break;
321        case XML_ERR_PEREF_AT_EOF:
322            errmsg = "PEReference at end of document";
323            break;
324        case XML_ERR_PEREF_IN_PROLOG:
325            errmsg = "PEReference in prolog";
326            break;
327        case XML_ERR_PEREF_IN_EPILOG:
328            errmsg = "PEReference in epilog";
329            break;
330        case XML_ERR_PEREF_NO_NAME:
331            errmsg = "PEReference: no name";
332            break;
333        case XML_ERR_PEREF_SEMICOL_MISSING:
334            errmsg = "PEReference: expecting ';'";
335            break;
336        case XML_ERR_ENTITY_LOOP:
337            errmsg = "Detected an entity reference loop";
338            break;
339        case XML_ERR_ENTITY_NOT_STARTED:
340            errmsg = "EntityValue: \" or ' expected";
341            break;
342        case XML_ERR_ENTITY_PE_INTERNAL:
343            errmsg = "PEReferences forbidden in internal subset";
344            break;
345        case XML_ERR_ENTITY_NOT_FINISHED:
346            errmsg = "EntityValue: \" or ' expected";
347            break;
348        case XML_ERR_ATTRIBUTE_NOT_STARTED:
349            errmsg = "AttValue: \" or ' expected";
350            break;
351        case XML_ERR_LT_IN_ATTRIBUTE:
352            errmsg = "Unescaped '<' not allowed in attributes values";
353            break;
354        case XML_ERR_LITERAL_NOT_STARTED:
355            errmsg = "SystemLiteral \" or ' expected";
356            break;
357        case XML_ERR_LITERAL_NOT_FINISHED:
358            errmsg = "Unfinished System or Public ID \" or ' expected";
359            break;
360        case XML_ERR_MISPLACED_CDATA_END:
361            errmsg = "Sequence ']]>' not allowed in content";
362            break;
363        case XML_ERR_URI_REQUIRED:
364            errmsg = "SYSTEM or PUBLIC, the URI is missing";
365            break;
366        case XML_ERR_PUBID_REQUIRED:
367            errmsg = "PUBLIC, the Public Identifier is missing";
368            break;
369        case XML_ERR_HYPHEN_IN_COMMENT:
370            errmsg = "Comment must not contain '--' (double-hyphen)";
371            break;
372        case XML_ERR_PI_NOT_STARTED:
373            errmsg = "xmlParsePI : no target name";
374            break;
375        case XML_ERR_RESERVED_XML_NAME:
376            errmsg = "Invalid PI name";
377            break;
378        case XML_ERR_NOTATION_NOT_STARTED:
379            errmsg = "NOTATION: Name expected here";
380            break;
381        case XML_ERR_NOTATION_NOT_FINISHED:
382            errmsg = "'>' required to close NOTATION declaration";
383            break;
384        case XML_ERR_VALUE_REQUIRED:
385            errmsg = "Entity value required";
386            break;
387        case XML_ERR_URI_FRAGMENT:
388            errmsg = "Fragment not allowed";
389            break;
390        case XML_ERR_ATTLIST_NOT_STARTED:
391            errmsg = "'(' required to start ATTLIST enumeration";
392            break;
393        case XML_ERR_NMTOKEN_REQUIRED:
394            errmsg = "NmToken expected in ATTLIST enumeration";
395            break;
396        case XML_ERR_ATTLIST_NOT_FINISHED:
397            errmsg = "')' required to finish ATTLIST enumeration";
398            break;
399        case XML_ERR_MIXED_NOT_STARTED:
400            errmsg = "MixedContentDecl : '|' or ')*' expected";
401            break;
402        case XML_ERR_PCDATA_REQUIRED:
403            errmsg = "MixedContentDecl : '#PCDATA' expected";
404            break;
405        case XML_ERR_ELEMCONTENT_NOT_STARTED:
406            errmsg = "ContentDecl : Name or '(' expected";
407            break;
408        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
409            errmsg = "ContentDecl : ',' '|' or ')' expected";
410            break;
411        case XML_ERR_PEREF_IN_INT_SUBSET:
412            errmsg =
413                "PEReference: forbidden within markup decl in internal subset";
414            break;
415        case XML_ERR_GT_REQUIRED:
416            errmsg = "expected '>'";
417            break;
418        case XML_ERR_CONDSEC_INVALID:
419            errmsg = "XML conditional section '[' expected";
420            break;
421        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
422            errmsg = "Content error in the external subset";
423            break;
424        case XML_ERR_CONDSEC_INVALID_KEYWORD:
425            errmsg =
426                "conditional section INCLUDE or IGNORE keyword expected";
427            break;
428        case XML_ERR_CONDSEC_NOT_FINISHED:
429            errmsg = "XML conditional section not closed";
430            break;
431        case XML_ERR_XMLDECL_NOT_STARTED:
432            errmsg = "Text declaration '<?xml' required";
433            break;
434        case XML_ERR_XMLDECL_NOT_FINISHED:
435            errmsg = "parsing XML declaration: '?>' expected";
436            break;
437        case XML_ERR_EXT_ENTITY_STANDALONE:
438            errmsg = "external parsed entities cannot be standalone";
439            break;
440        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
441            errmsg = "EntityRef: expecting ';'";
442            break;
443        case XML_ERR_DOCTYPE_NOT_FINISHED:
444            errmsg = "DOCTYPE improperly terminated";
445            break;
446        case XML_ERR_LTSLASH_REQUIRED:
447            errmsg = "EndTag: '</' not found";
448            break;
449        case XML_ERR_EQUAL_REQUIRED:
450            errmsg = "expected '='";
451            break;
452        case XML_ERR_STRING_NOT_CLOSED:
453            errmsg = "String not closed expecting \" or '";
454            break;
455        case XML_ERR_STRING_NOT_STARTED:
456            errmsg = "String not started expecting ' or \"";
457            break;
458        case XML_ERR_ENCODING_NAME:
459            errmsg = "Invalid XML encoding name";
460            break;
461        case XML_ERR_STANDALONE_VALUE:
462            errmsg = "standalone accepts only 'yes' or 'no'";
463            break;
464        case XML_ERR_DOCUMENT_EMPTY:
465            errmsg = "Document is empty";
466            break;
467        case XML_ERR_DOCUMENT_END:
468            errmsg = "Extra content at the end of the document";
469            break;
470        case XML_ERR_NOT_WELL_BALANCED:
471            errmsg = "chunk is not well balanced";
472            break;
473        case XML_ERR_EXTRA_CONTENT:
474            errmsg = "extra content at the end of well balanced chunk";
475            break;
476        case XML_ERR_VERSION_MISSING:
477            errmsg = "Malformed declaration expecting version";
478            break;
479        case XML_ERR_NAME_TOO_LONG:
480            errmsg = "Name too long use XML_PARSE_HUGE option";
481            break;
482#if 0
483        case:
484            errmsg = "";
485            break;
486#endif
487        default:
488            errmsg = "Unregistered error message";
489    }
490    if (info == NULL)
491        snprintf(errstr, 128, "%s\n", errmsg);
492    else
493        snprintf(errstr, 128, "%s: %%s\n", errmsg);
494    if (ctxt != NULL)
495	ctxt->errNo = error;
496    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
497                    XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
498                    info);
499    if (ctxt != NULL) {
500	ctxt->wellFormed = 0;
501	if (ctxt->recovery == 0)
502	    ctxt->disableSAX = 1;
503    }
504}
505
506/**
507 * xmlFatalErrMsg:
508 * @ctxt:  an XML parser context
509 * @error:  the error number
510 * @msg:  the error message
511 *
512 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
513 */
514static void
515xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
516               const char *msg)
517{
518    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
519        (ctxt->instate == XML_PARSER_EOF))
520	return;
521    if (ctxt != NULL)
522	ctxt->errNo = error;
523    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
524                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
525    if (ctxt != NULL) {
526	ctxt->wellFormed = 0;
527	if (ctxt->recovery == 0)
528	    ctxt->disableSAX = 1;
529    }
530}
531
532/**
533 * xmlWarningMsg:
534 * @ctxt:  an XML parser context
535 * @error:  the error number
536 * @msg:  the error message
537 * @str1:  extra data
538 * @str2:  extra data
539 *
540 * Handle a warning.
541 */
542static void
543xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
544              const char *msg, const xmlChar *str1, const xmlChar *str2)
545{
546    xmlStructuredErrorFunc schannel = NULL;
547
548    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
549        (ctxt->instate == XML_PARSER_EOF))
550	return;
551    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
552        (ctxt->sax->initialized == XML_SAX2_MAGIC))
553        schannel = ctxt->sax->serror;
554    if (ctxt != NULL) {
555        __xmlRaiseError(schannel,
556                    (ctxt->sax) ? ctxt->sax->warning : NULL,
557                    ctxt->userData,
558                    ctxt, NULL, XML_FROM_PARSER, error,
559                    XML_ERR_WARNING, NULL, 0,
560		    (const char *) str1, (const char *) str2, NULL, 0, 0,
561		    msg, (const char *) str1, (const char *) str2);
562    } else {
563        __xmlRaiseError(schannel, NULL, NULL,
564                    ctxt, NULL, XML_FROM_PARSER, error,
565                    XML_ERR_WARNING, NULL, 0,
566		    (const char *) str1, (const char *) str2, NULL, 0, 0,
567		    msg, (const char *) str1, (const char *) str2);
568    }
569}
570
571/**
572 * xmlValidityError:
573 * @ctxt:  an XML parser context
574 * @error:  the error number
575 * @msg:  the error message
576 * @str1:  extra data
577 *
578 * Handle a validity error.
579 */
580static void
581xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
582              const char *msg, const xmlChar *str1, const xmlChar *str2)
583{
584    xmlStructuredErrorFunc schannel = NULL;
585
586    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
587        (ctxt->instate == XML_PARSER_EOF))
588	return;
589    if (ctxt != NULL) {
590	ctxt->errNo = error;
591	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
592	    schannel = ctxt->sax->serror;
593    }
594    if (ctxt != NULL) {
595        __xmlRaiseError(schannel,
596                    ctxt->vctxt.error, ctxt->vctxt.userData,
597                    ctxt, NULL, XML_FROM_DTD, error,
598                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
599		    (const char *) str2, NULL, 0, 0,
600		    msg, (const char *) str1, (const char *) str2);
601	ctxt->valid = 0;
602    } else {
603        __xmlRaiseError(schannel, NULL, NULL,
604                    ctxt, NULL, XML_FROM_DTD, error,
605                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
606		    (const char *) str2, NULL, 0, 0,
607		    msg, (const char *) str1, (const char *) str2);
608    }
609}
610
611/**
612 * xmlFatalErrMsgInt:
613 * @ctxt:  an XML parser context
614 * @error:  the error number
615 * @msg:  the error message
616 * @val:  an integer value
617 *
618 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
619 */
620static void
621xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
622                  const char *msg, int val)
623{
624    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
625        (ctxt->instate == XML_PARSER_EOF))
626	return;
627    if (ctxt != NULL)
628	ctxt->errNo = error;
629    __xmlRaiseError(NULL, NULL, NULL,
630                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
631                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
632    if (ctxt != NULL) {
633	ctxt->wellFormed = 0;
634	if (ctxt->recovery == 0)
635	    ctxt->disableSAX = 1;
636    }
637}
638
639/**
640 * xmlFatalErrMsgStrIntStr:
641 * @ctxt:  an XML parser context
642 * @error:  the error number
643 * @msg:  the error message
644 * @str1:  an string info
645 * @val:  an integer value
646 * @str2:  an string info
647 *
648 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
649 */
650static void
651xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
652                  const char *msg, const xmlChar *str1, int val,
653		  const xmlChar *str2)
654{
655    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
656        (ctxt->instate == XML_PARSER_EOF))
657	return;
658    if (ctxt != NULL)
659	ctxt->errNo = error;
660    __xmlRaiseError(NULL, NULL, NULL,
661                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
662                    NULL, 0, (const char *) str1, (const char *) str2,
663		    NULL, val, 0, msg, str1, val, str2);
664    if (ctxt != NULL) {
665	ctxt->wellFormed = 0;
666	if (ctxt->recovery == 0)
667	    ctxt->disableSAX = 1;
668    }
669}
670
671/**
672 * xmlFatalErrMsgStr:
673 * @ctxt:  an XML parser context
674 * @error:  the error number
675 * @msg:  the error message
676 * @val:  a string value
677 *
678 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
679 */
680static void
681xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
682                  const char *msg, const xmlChar * val)
683{
684    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
685        (ctxt->instate == XML_PARSER_EOF))
686	return;
687    if (ctxt != NULL)
688	ctxt->errNo = error;
689    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
690                    XML_FROM_PARSER, error, XML_ERR_FATAL,
691                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
692                    val);
693    if (ctxt != NULL) {
694	ctxt->wellFormed = 0;
695	if (ctxt->recovery == 0)
696	    ctxt->disableSAX = 1;
697    }
698}
699
700/**
701 * xmlErrMsgStr:
702 * @ctxt:  an XML parser context
703 * @error:  the error number
704 * @msg:  the error message
705 * @val:  a string value
706 *
707 * Handle a non fatal parser error
708 */
709static void
710xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
711                  const char *msg, const xmlChar * val)
712{
713    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714        (ctxt->instate == XML_PARSER_EOF))
715	return;
716    if (ctxt != NULL)
717	ctxt->errNo = error;
718    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
719                    XML_FROM_PARSER, error, XML_ERR_ERROR,
720                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
721                    val);
722}
723
724/**
725 * xmlNsErr:
726 * @ctxt:  an XML parser context
727 * @error:  the error number
728 * @msg:  the message
729 * @info1:  extra information string
730 * @info2:  extra information string
731 *
732 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
733 */
734static void
735xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
736         const char *msg,
737         const xmlChar * info1, const xmlChar * info2,
738         const xmlChar * info3)
739{
740    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
741        (ctxt->instate == XML_PARSER_EOF))
742	return;
743    if (ctxt != NULL)
744	ctxt->errNo = error;
745    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
746                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
747                    (const char *) info2, (const char *) info3, 0, 0, msg,
748                    info1, info2, info3);
749    if (ctxt != NULL)
750	ctxt->nsWellFormed = 0;
751}
752
753/**
754 * xmlNsWarn
755 * @ctxt:  an XML parser context
756 * @error:  the error number
757 * @msg:  the message
758 * @info1:  extra information string
759 * @info2:  extra information string
760 *
761 * Handle a namespace warning error
762 */
763static void
764xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
765         const char *msg,
766         const xmlChar * info1, const xmlChar * info2,
767         const xmlChar * info3)
768{
769    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
770        (ctxt->instate == XML_PARSER_EOF))
771	return;
772    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
773                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
774                    (const char *) info2, (const char *) info3, 0, 0, msg,
775                    info1, info2, info3);
776}
777
778/************************************************************************
779 *									*
780 *		Library wide options					*
781 *									*
782 ************************************************************************/
783
784/**
785  * xmlHasFeature:
786  * @feature: the feature to be examined
787  *
788  * Examines if the library has been compiled with a given feature.
789  *
790  * Returns a non-zero value if the feature exist, otherwise zero.
791  * Returns zero (0) if the feature does not exist or an unknown
792  * unknown feature is requested, non-zero otherwise.
793  */
794int
795xmlHasFeature(xmlFeature feature)
796{
797    switch (feature) {
798	case XML_WITH_THREAD:
799#ifdef LIBXML_THREAD_ENABLED
800	    return(1);
801#else
802	    return(0);
803#endif
804        case XML_WITH_TREE:
805#ifdef LIBXML_TREE_ENABLED
806            return(1);
807#else
808            return(0);
809#endif
810        case XML_WITH_OUTPUT:
811#ifdef LIBXML_OUTPUT_ENABLED
812            return(1);
813#else
814            return(0);
815#endif
816        case XML_WITH_PUSH:
817#ifdef LIBXML_PUSH_ENABLED
818            return(1);
819#else
820            return(0);
821#endif
822        case XML_WITH_READER:
823#ifdef LIBXML_READER_ENABLED
824            return(1);
825#else
826            return(0);
827#endif
828        case XML_WITH_PATTERN:
829#ifdef LIBXML_PATTERN_ENABLED
830            return(1);
831#else
832            return(0);
833#endif
834        case XML_WITH_WRITER:
835#ifdef LIBXML_WRITER_ENABLED
836            return(1);
837#else
838            return(0);
839#endif
840        case XML_WITH_SAX1:
841#ifdef LIBXML_SAX1_ENABLED
842            return(1);
843#else
844            return(0);
845#endif
846        case XML_WITH_FTP:
847#ifdef LIBXML_FTP_ENABLED
848            return(1);
849#else
850            return(0);
851#endif
852        case XML_WITH_HTTP:
853#ifdef LIBXML_HTTP_ENABLED
854            return(1);
855#else
856            return(0);
857#endif
858        case XML_WITH_VALID:
859#ifdef LIBXML_VALID_ENABLED
860            return(1);
861#else
862            return(0);
863#endif
864        case XML_WITH_HTML:
865#ifdef LIBXML_HTML_ENABLED
866            return(1);
867#else
868            return(0);
869#endif
870        case XML_WITH_LEGACY:
871#ifdef LIBXML_LEGACY_ENABLED
872            return(1);
873#else
874            return(0);
875#endif
876        case XML_WITH_C14N:
877#ifdef LIBXML_C14N_ENABLED
878            return(1);
879#else
880            return(0);
881#endif
882        case XML_WITH_CATALOG:
883#ifdef LIBXML_CATALOG_ENABLED
884            return(1);
885#else
886            return(0);
887#endif
888        case XML_WITH_XPATH:
889#ifdef LIBXML_XPATH_ENABLED
890            return(1);
891#else
892            return(0);
893#endif
894        case XML_WITH_XPTR:
895#ifdef LIBXML_XPTR_ENABLED
896            return(1);
897#else
898            return(0);
899#endif
900        case XML_WITH_XINCLUDE:
901#ifdef LIBXML_XINCLUDE_ENABLED
902            return(1);
903#else
904            return(0);
905#endif
906        case XML_WITH_ICONV:
907#ifdef LIBXML_ICONV_ENABLED
908            return(1);
909#else
910            return(0);
911#endif
912        case XML_WITH_ISO8859X:
913#ifdef LIBXML_ISO8859X_ENABLED
914            return(1);
915#else
916            return(0);
917#endif
918        case XML_WITH_UNICODE:
919#ifdef LIBXML_UNICODE_ENABLED
920            return(1);
921#else
922            return(0);
923#endif
924        case XML_WITH_REGEXP:
925#ifdef LIBXML_REGEXP_ENABLED
926            return(1);
927#else
928            return(0);
929#endif
930        case XML_WITH_AUTOMATA:
931#ifdef LIBXML_AUTOMATA_ENABLED
932            return(1);
933#else
934            return(0);
935#endif
936        case XML_WITH_EXPR:
937#ifdef LIBXML_EXPR_ENABLED
938            return(1);
939#else
940            return(0);
941#endif
942        case XML_WITH_SCHEMAS:
943#ifdef LIBXML_SCHEMAS_ENABLED
944            return(1);
945#else
946            return(0);
947#endif
948        case XML_WITH_SCHEMATRON:
949#ifdef LIBXML_SCHEMATRON_ENABLED
950            return(1);
951#else
952            return(0);
953#endif
954        case XML_WITH_MODULES:
955#ifdef LIBXML_MODULES_ENABLED
956            return(1);
957#else
958            return(0);
959#endif
960        case XML_WITH_DEBUG:
961#ifdef LIBXML_DEBUG_ENABLED
962            return(1);
963#else
964            return(0);
965#endif
966        case XML_WITH_DEBUG_MEM:
967#ifdef DEBUG_MEMORY_LOCATION
968            return(1);
969#else
970            return(0);
971#endif
972        case XML_WITH_DEBUG_RUN:
973#ifdef LIBXML_DEBUG_RUNTIME
974            return(1);
975#else
976            return(0);
977#endif
978        case XML_WITH_ZLIB:
979#ifdef LIBXML_ZLIB_ENABLED
980            return(1);
981#else
982            return(0);
983#endif
984        case XML_WITH_LZMA:
985#ifdef LIBXML_LZMA_ENABLED
986            return(1);
987#else
988            return(0);
989#endif
990        case XML_WITH_ICU:
991#ifdef LIBXML_ICU_ENABLED
992            return(1);
993#else
994            return(0);
995#endif
996        default:
997	    break;
998     }
999     return(0);
1000}
1001
1002/************************************************************************
1003 *									*
1004 *		SAX2 defaulted attributes handling			*
1005 *									*
1006 ************************************************************************/
1007
1008/**
1009 * xmlDetectSAX2:
1010 * @ctxt:  an XML parser context
1011 *
1012 * Do the SAX2 detection and specific intialization
1013 */
1014static void
1015xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1016    if (ctxt == NULL) return;
1017#ifdef LIBXML_SAX1_ENABLED
1018    if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1019        ((ctxt->sax->startElementNs != NULL) ||
1020         (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1021#else
1022    ctxt->sax2 = 1;
1023#endif /* LIBXML_SAX1_ENABLED */
1024
1025    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1026    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1027    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1028    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1029		(ctxt->str_xml_ns == NULL)) {
1030        xmlErrMemory(ctxt, NULL);
1031    }
1032}
1033
1034typedef struct _xmlDefAttrs xmlDefAttrs;
1035typedef xmlDefAttrs *xmlDefAttrsPtr;
1036struct _xmlDefAttrs {
1037    int nbAttrs;	/* number of defaulted attributes on that element */
1038    int maxAttrs;       /* the size of the array */
1039    const xmlChar *values[5]; /* array of localname/prefix/values/external */
1040};
1041
1042/**
1043 * xmlAttrNormalizeSpace:
1044 * @src: the source string
1045 * @dst: the target string
1046 *
1047 * Normalize the space in non CDATA attribute values:
1048 * If the attribute type is not CDATA, then the XML processor MUST further
1049 * process the normalized attribute value by discarding any leading and
1050 * trailing space (#x20) characters, and by replacing sequences of space
1051 * (#x20) characters by a single space (#x20) character.
1052 * Note that the size of dst need to be at least src, and if one doesn't need
1053 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1054 * passing src as dst is just fine.
1055 *
1056 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1057 *         is needed.
1058 */
1059static xmlChar *
1060xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1061{
1062    if ((src == NULL) || (dst == NULL))
1063        return(NULL);
1064
1065    while (*src == 0x20) src++;
1066    while (*src != 0) {
1067	if (*src == 0x20) {
1068	    while (*src == 0x20) src++;
1069	    if (*src != 0)
1070		*dst++ = 0x20;
1071	} else {
1072	    *dst++ = *src++;
1073	}
1074    }
1075    *dst = 0;
1076    if (dst == src)
1077       return(NULL);
1078    return(dst);
1079}
1080
1081/**
1082 * xmlAttrNormalizeSpace2:
1083 * @src: the source string
1084 *
1085 * Normalize the space in non CDATA attribute values, a slightly more complex
1086 * front end to avoid allocation problems when running on attribute values
1087 * coming from the input.
1088 *
1089 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1090 *         is needed.
1091 */
1092static const xmlChar *
1093xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1094{
1095    int i;
1096    int remove_head = 0;
1097    int need_realloc = 0;
1098    const xmlChar *cur;
1099
1100    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1101        return(NULL);
1102    i = *len;
1103    if (i <= 0)
1104        return(NULL);
1105
1106    cur = src;
1107    while (*cur == 0x20) {
1108        cur++;
1109	remove_head++;
1110    }
1111    while (*cur != 0) {
1112	if (*cur == 0x20) {
1113	    cur++;
1114	    if ((*cur == 0x20) || (*cur == 0)) {
1115	        need_realloc = 1;
1116		break;
1117	    }
1118	} else
1119	    cur++;
1120    }
1121    if (need_realloc) {
1122        xmlChar *ret;
1123
1124	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1125	if (ret == NULL) {
1126	    xmlErrMemory(ctxt, NULL);
1127	    return(NULL);
1128	}
1129	xmlAttrNormalizeSpace(ret, ret);
1130	*len = (int) strlen((const char *)ret);
1131        return(ret);
1132    } else if (remove_head) {
1133        *len -= remove_head;
1134        memmove(src, src + remove_head, 1 + *len);
1135	return(src);
1136    }
1137    return(NULL);
1138}
1139
1140/**
1141 * xmlAddDefAttrs:
1142 * @ctxt:  an XML parser context
1143 * @fullname:  the element fullname
1144 * @fullattr:  the attribute fullname
1145 * @value:  the attribute value
1146 *
1147 * Add a defaulted attribute for an element
1148 */
1149static void
1150xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1151               const xmlChar *fullname,
1152               const xmlChar *fullattr,
1153               const xmlChar *value) {
1154    xmlDefAttrsPtr defaults;
1155    int len;
1156    const xmlChar *name;
1157    const xmlChar *prefix;
1158
1159    /*
1160     * Allows to detect attribute redefinitions
1161     */
1162    if (ctxt->attsSpecial != NULL) {
1163        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1164	    return;
1165    }
1166
1167    if (ctxt->attsDefault == NULL) {
1168        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1169	if (ctxt->attsDefault == NULL)
1170	    goto mem_error;
1171    }
1172
1173    /*
1174     * split the element name into prefix:localname , the string found
1175     * are within the DTD and then not associated to namespace names.
1176     */
1177    name = xmlSplitQName3(fullname, &len);
1178    if (name == NULL) {
1179        name = xmlDictLookup(ctxt->dict, fullname, -1);
1180	prefix = NULL;
1181    } else {
1182        name = xmlDictLookup(ctxt->dict, name, -1);
1183	prefix = xmlDictLookup(ctxt->dict, fullname, len);
1184    }
1185
1186    /*
1187     * make sure there is some storage
1188     */
1189    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1190    if (defaults == NULL) {
1191        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1192	                   (4 * 5) * sizeof(const xmlChar *));
1193	if (defaults == NULL)
1194	    goto mem_error;
1195	defaults->nbAttrs = 0;
1196	defaults->maxAttrs = 4;
1197	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1198	                        defaults, NULL) < 0) {
1199	    xmlFree(defaults);
1200	    goto mem_error;
1201	}
1202    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1203        xmlDefAttrsPtr temp;
1204
1205        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1206		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1207	if (temp == NULL)
1208	    goto mem_error;
1209	defaults = temp;
1210	defaults->maxAttrs *= 2;
1211	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1212	                        defaults, NULL) < 0) {
1213	    xmlFree(defaults);
1214	    goto mem_error;
1215	}
1216    }
1217
1218    /*
1219     * Split the element name into prefix:localname , the string found
1220     * are within the DTD and hen not associated to namespace names.
1221     */
1222    name = xmlSplitQName3(fullattr, &len);
1223    if (name == NULL) {
1224        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1225	prefix = NULL;
1226    } else {
1227        name = xmlDictLookup(ctxt->dict, name, -1);
1228	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1229    }
1230
1231    defaults->values[5 * defaults->nbAttrs] = name;
1232    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1233    /* intern the string and precompute the end */
1234    len = xmlStrlen(value);
1235    value = xmlDictLookup(ctxt->dict, value, len);
1236    defaults->values[5 * defaults->nbAttrs + 2] = value;
1237    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1238    if (ctxt->external)
1239        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1240    else
1241        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1242    defaults->nbAttrs++;
1243
1244    return;
1245
1246mem_error:
1247    xmlErrMemory(ctxt, NULL);
1248    return;
1249}
1250
1251/**
1252 * xmlAddSpecialAttr:
1253 * @ctxt:  an XML parser context
1254 * @fullname:  the element fullname
1255 * @fullattr:  the attribute fullname
1256 * @type:  the attribute type
1257 *
1258 * Register this attribute type
1259 */
1260static void
1261xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1262		  const xmlChar *fullname,
1263		  const xmlChar *fullattr,
1264		  int type)
1265{
1266    if (ctxt->attsSpecial == NULL) {
1267        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1268	if (ctxt->attsSpecial == NULL)
1269	    goto mem_error;
1270    }
1271
1272    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1273        return;
1274
1275    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1276                     (void *) (long) type);
1277    return;
1278
1279mem_error:
1280    xmlErrMemory(ctxt, NULL);
1281    return;
1282}
1283
1284/**
1285 * xmlCleanSpecialAttrCallback:
1286 *
1287 * Removes CDATA attributes from the special attribute table
1288 */
1289static void
1290xmlCleanSpecialAttrCallback(void *payload, void *data,
1291                            const xmlChar *fullname, const xmlChar *fullattr,
1292                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1293    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1294
1295    if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1296        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1297    }
1298}
1299
1300/**
1301 * xmlCleanSpecialAttr:
1302 * @ctxt:  an XML parser context
1303 *
1304 * Trim the list of attributes defined to remove all those of type
1305 * CDATA as they are not special. This call should be done when finishing
1306 * to parse the DTD and before starting to parse the document root.
1307 */
1308static void
1309xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1310{
1311    if (ctxt->attsSpecial == NULL)
1312        return;
1313
1314    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1315
1316    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1317        xmlHashFree(ctxt->attsSpecial, NULL);
1318        ctxt->attsSpecial = NULL;
1319    }
1320    return;
1321}
1322
1323/**
1324 * xmlCheckLanguageID:
1325 * @lang:  pointer to the string value
1326 *
1327 * Checks that the value conforms to the LanguageID production:
1328 *
1329 * NOTE: this is somewhat deprecated, those productions were removed from
1330 *       the XML Second edition.
1331 *
1332 * [33] LanguageID ::= Langcode ('-' Subcode)*
1333 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1334 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1335 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1336 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1337 * [38] Subcode ::= ([a-z] | [A-Z])+
1338 *
1339 * The current REC reference the sucessors of RFC 1766, currently 5646
1340 *
1341 * http://www.rfc-editor.org/rfc/rfc5646.txt
1342 * langtag       = language
1343 *                 ["-" script]
1344 *                 ["-" region]
1345 *                 *("-" variant)
1346 *                 *("-" extension)
1347 *                 ["-" privateuse]
1348 * language      = 2*3ALPHA            ; shortest ISO 639 code
1349 *                 ["-" extlang]       ; sometimes followed by
1350 *                                     ; extended language subtags
1351 *               / 4ALPHA              ; or reserved for future use
1352 *               / 5*8ALPHA            ; or registered language subtag
1353 *
1354 * extlang       = 3ALPHA              ; selected ISO 639 codes
1355 *                 *2("-" 3ALPHA)      ; permanently reserved
1356 *
1357 * script        = 4ALPHA              ; ISO 15924 code
1358 *
1359 * region        = 2ALPHA              ; ISO 3166-1 code
1360 *               / 3DIGIT              ; UN M.49 code
1361 *
1362 * variant       = 5*8alphanum         ; registered variants
1363 *               / (DIGIT 3alphanum)
1364 *
1365 * extension     = singleton 1*("-" (2*8alphanum))
1366 *
1367 *                                     ; Single alphanumerics
1368 *                                     ; "x" reserved for private use
1369 * singleton     = DIGIT               ; 0 - 9
1370 *               / %x41-57             ; A - W
1371 *               / %x59-5A             ; Y - Z
1372 *               / %x61-77             ; a - w
1373 *               / %x79-7A             ; y - z
1374 *
1375 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1376 * The parser below doesn't try to cope with extension or privateuse
1377 * that could be added but that's not interoperable anyway
1378 *
1379 * Returns 1 if correct 0 otherwise
1380 **/
1381int
1382xmlCheckLanguageID(const xmlChar * lang)
1383{
1384    const xmlChar *cur = lang, *nxt;
1385
1386    if (cur == NULL)
1387        return (0);
1388    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1389        ((cur[0] == 'I') && (cur[1] == '-')) ||
1390        ((cur[0] == 'x') && (cur[1] == '-')) ||
1391        ((cur[0] == 'X') && (cur[1] == '-'))) {
1392        /*
1393         * Still allow IANA code and user code which were coming
1394         * from the previous version of the XML-1.0 specification
1395         * it's deprecated but we should not fail
1396         */
1397        cur += 2;
1398        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1399               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1400            cur++;
1401        return(cur[0] == 0);
1402    }
1403    nxt = cur;
1404    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1405           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1406           nxt++;
1407    if (nxt - cur >= 4) {
1408        /*
1409         * Reserved
1410         */
1411        if ((nxt - cur > 8) || (nxt[0] != 0))
1412            return(0);
1413        return(1);
1414    }
1415    if (nxt - cur < 2)
1416        return(0);
1417    /* we got an ISO 639 code */
1418    if (nxt[0] == 0)
1419        return(1);
1420    if (nxt[0] != '-')
1421        return(0);
1422
1423    nxt++;
1424    cur = nxt;
1425    /* now we can have extlang or script or region or variant */
1426    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1427        goto region_m49;
1428
1429    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1430           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1431           nxt++;
1432    if (nxt - cur == 4)
1433        goto script;
1434    if (nxt - cur == 2)
1435        goto region;
1436    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1437        goto variant;
1438    if (nxt - cur != 3)
1439        return(0);
1440    /* we parsed an extlang */
1441    if (nxt[0] == 0)
1442        return(1);
1443    if (nxt[0] != '-')
1444        return(0);
1445
1446    nxt++;
1447    cur = nxt;
1448    /* now we can have script or region or variant */
1449    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1450        goto region_m49;
1451
1452    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1453           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1454           nxt++;
1455    if (nxt - cur == 2)
1456        goto region;
1457    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1458        goto variant;
1459    if (nxt - cur != 4)
1460        return(0);
1461    /* we parsed a script */
1462script:
1463    if (nxt[0] == 0)
1464        return(1);
1465    if (nxt[0] != '-')
1466        return(0);
1467
1468    nxt++;
1469    cur = nxt;
1470    /* now we can have region or variant */
1471    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1472        goto region_m49;
1473
1474    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1475           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1476           nxt++;
1477
1478    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1479        goto variant;
1480    if (nxt - cur != 2)
1481        return(0);
1482    /* we parsed a region */
1483region:
1484    if (nxt[0] == 0)
1485        return(1);
1486    if (nxt[0] != '-')
1487        return(0);
1488
1489    nxt++;
1490    cur = nxt;
1491    /* now we can just have a variant */
1492    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1493           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1494           nxt++;
1495
1496    if ((nxt - cur < 5) || (nxt - cur > 8))
1497        return(0);
1498
1499    /* we parsed a variant */
1500variant:
1501    if (nxt[0] == 0)
1502        return(1);
1503    if (nxt[0] != '-')
1504        return(0);
1505    /* extensions and private use subtags not checked */
1506    return (1);
1507
1508region_m49:
1509    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1510        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1511        nxt += 3;
1512        goto region;
1513    }
1514    return(0);
1515}
1516
1517/************************************************************************
1518 *									*
1519 *		Parser stacks related functions and macros		*
1520 *									*
1521 ************************************************************************/
1522
1523static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1524                                            const xmlChar ** str);
1525
1526#ifdef SAX2
1527/**
1528 * nsPush:
1529 * @ctxt:  an XML parser context
1530 * @prefix:  the namespace prefix or NULL
1531 * @URL:  the namespace name
1532 *
1533 * Pushes a new parser namespace on top of the ns stack
1534 *
1535 * Returns -1 in case of error, -2 if the namespace should be discarded
1536 *	   and the index in the stack otherwise.
1537 */
1538static int
1539nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1540{
1541    if (ctxt->options & XML_PARSE_NSCLEAN) {
1542        int i;
1543	for (i = 0;i < ctxt->nsNr;i += 2) {
1544	    if (ctxt->nsTab[i] == prefix) {
1545		/* in scope */
1546	        if (ctxt->nsTab[i + 1] == URL)
1547		    return(-2);
1548		/* out of scope keep it */
1549		break;
1550	    }
1551	}
1552    }
1553    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1554	ctxt->nsMax = 10;
1555	ctxt->nsNr = 0;
1556	ctxt->nsTab = (const xmlChar **)
1557	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1558	if (ctxt->nsTab == NULL) {
1559	    xmlErrMemory(ctxt, NULL);
1560	    ctxt->nsMax = 0;
1561            return (-1);
1562	}
1563    } else if (ctxt->nsNr >= ctxt->nsMax) {
1564        const xmlChar ** tmp;
1565        ctxt->nsMax *= 2;
1566        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1567				    ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1568        if (tmp == NULL) {
1569            xmlErrMemory(ctxt, NULL);
1570	    ctxt->nsMax /= 2;
1571            return (-1);
1572        }
1573	ctxt->nsTab = tmp;
1574    }
1575    ctxt->nsTab[ctxt->nsNr++] = prefix;
1576    ctxt->nsTab[ctxt->nsNr++] = URL;
1577    return (ctxt->nsNr);
1578}
1579/**
1580 * nsPop:
1581 * @ctxt: an XML parser context
1582 * @nr:  the number to pop
1583 *
1584 * Pops the top @nr parser prefix/namespace from the ns stack
1585 *
1586 * Returns the number of namespaces removed
1587 */
1588static int
1589nsPop(xmlParserCtxtPtr ctxt, int nr)
1590{
1591    int i;
1592
1593    if (ctxt->nsTab == NULL) return(0);
1594    if (ctxt->nsNr < nr) {
1595        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1596        nr = ctxt->nsNr;
1597    }
1598    if (ctxt->nsNr <= 0)
1599        return (0);
1600
1601    for (i = 0;i < nr;i++) {
1602         ctxt->nsNr--;
1603	 ctxt->nsTab[ctxt->nsNr] = NULL;
1604    }
1605    return(nr);
1606}
1607#endif
1608
1609static int
1610xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1611    const xmlChar **atts;
1612    int *attallocs;
1613    int maxatts;
1614
1615    if (ctxt->atts == NULL) {
1616	maxatts = 55; /* allow for 10 attrs by default */
1617	atts = (const xmlChar **)
1618	       xmlMalloc(maxatts * sizeof(xmlChar *));
1619	if (atts == NULL) goto mem_error;
1620	ctxt->atts = atts;
1621	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1622	if (attallocs == NULL) goto mem_error;
1623	ctxt->attallocs = attallocs;
1624	ctxt->maxatts = maxatts;
1625    } else if (nr + 5 > ctxt->maxatts) {
1626	maxatts = (nr + 5) * 2;
1627	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1628				     maxatts * sizeof(const xmlChar *));
1629	if (atts == NULL) goto mem_error;
1630	ctxt->atts = atts;
1631	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1632	                             (maxatts / 5) * sizeof(int));
1633	if (attallocs == NULL) goto mem_error;
1634	ctxt->attallocs = attallocs;
1635	ctxt->maxatts = maxatts;
1636    }
1637    return(ctxt->maxatts);
1638mem_error:
1639    xmlErrMemory(ctxt, NULL);
1640    return(-1);
1641}
1642
1643/**
1644 * inputPush:
1645 * @ctxt:  an XML parser context
1646 * @value:  the parser input
1647 *
1648 * Pushes a new parser input on top of the input stack
1649 *
1650 * Returns -1 in case of error, the index in the stack otherwise
1651 */
1652int
1653inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1654{
1655    if ((ctxt == NULL) || (value == NULL))
1656        return(-1);
1657    if (ctxt->inputNr >= ctxt->inputMax) {
1658        ctxt->inputMax *= 2;
1659        ctxt->inputTab =
1660            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1661                                             ctxt->inputMax *
1662                                             sizeof(ctxt->inputTab[0]));
1663        if (ctxt->inputTab == NULL) {
1664            xmlErrMemory(ctxt, NULL);
1665	    xmlFreeInputStream(value);
1666	    ctxt->inputMax /= 2;
1667	    value = NULL;
1668            return (-1);
1669        }
1670    }
1671    ctxt->inputTab[ctxt->inputNr] = value;
1672    ctxt->input = value;
1673    return (ctxt->inputNr++);
1674}
1675/**
1676 * inputPop:
1677 * @ctxt: an XML parser context
1678 *
1679 * Pops the top parser input from the input stack
1680 *
1681 * Returns the input just removed
1682 */
1683xmlParserInputPtr
1684inputPop(xmlParserCtxtPtr ctxt)
1685{
1686    xmlParserInputPtr ret;
1687
1688    if (ctxt == NULL)
1689        return(NULL);
1690    if (ctxt->inputNr <= 0)
1691        return (NULL);
1692    ctxt->inputNr--;
1693    if (ctxt->inputNr > 0)
1694        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1695    else
1696        ctxt->input = NULL;
1697    ret = ctxt->inputTab[ctxt->inputNr];
1698    ctxt->inputTab[ctxt->inputNr] = NULL;
1699    return (ret);
1700}
1701/**
1702 * nodePush:
1703 * @ctxt:  an XML parser context
1704 * @value:  the element node
1705 *
1706 * Pushes a new element node on top of the node stack
1707 *
1708 * Returns -1 in case of error, the index in the stack otherwise
1709 */
1710int
1711nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1712{
1713    if (ctxt == NULL) return(0);
1714    if (ctxt->nodeNr >= ctxt->nodeMax) {
1715        xmlNodePtr *tmp;
1716
1717	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1718                                      ctxt->nodeMax * 2 *
1719                                      sizeof(ctxt->nodeTab[0]));
1720        if (tmp == NULL) {
1721            xmlErrMemory(ctxt, NULL);
1722            return (-1);
1723        }
1724        ctxt->nodeTab = tmp;
1725	ctxt->nodeMax *= 2;
1726    }
1727    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1728        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1729	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1730		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1731			  xmlParserMaxDepth);
1732	ctxt->instate = XML_PARSER_EOF;
1733	return(-1);
1734    }
1735    ctxt->nodeTab[ctxt->nodeNr] = value;
1736    ctxt->node = value;
1737    return (ctxt->nodeNr++);
1738}
1739
1740/**
1741 * nodePop:
1742 * @ctxt: an XML parser context
1743 *
1744 * Pops the top element node from the node stack
1745 *
1746 * Returns the node just removed
1747 */
1748xmlNodePtr
1749nodePop(xmlParserCtxtPtr ctxt)
1750{
1751    xmlNodePtr ret;
1752
1753    if (ctxt == NULL) return(NULL);
1754    if (ctxt->nodeNr <= 0)
1755        return (NULL);
1756    ctxt->nodeNr--;
1757    if (ctxt->nodeNr > 0)
1758        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1759    else
1760        ctxt->node = NULL;
1761    ret = ctxt->nodeTab[ctxt->nodeNr];
1762    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1763    return (ret);
1764}
1765
1766#ifdef LIBXML_PUSH_ENABLED
1767/**
1768 * nameNsPush:
1769 * @ctxt:  an XML parser context
1770 * @value:  the element name
1771 * @prefix:  the element prefix
1772 * @URI:  the element namespace name
1773 *
1774 * Pushes a new element name/prefix/URL on top of the name stack
1775 *
1776 * Returns -1 in case of error, the index in the stack otherwise
1777 */
1778static int
1779nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1780           const xmlChar *prefix, const xmlChar *URI, int nsNr)
1781{
1782    if (ctxt->nameNr >= ctxt->nameMax) {
1783        const xmlChar * *tmp;
1784        void **tmp2;
1785        ctxt->nameMax *= 2;
1786        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1787                                    ctxt->nameMax *
1788                                    sizeof(ctxt->nameTab[0]));
1789        if (tmp == NULL) {
1790	    ctxt->nameMax /= 2;
1791	    goto mem_error;
1792        }
1793	ctxt->nameTab = tmp;
1794        tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1795                                    ctxt->nameMax * 3 *
1796                                    sizeof(ctxt->pushTab[0]));
1797        if (tmp2 == NULL) {
1798	    ctxt->nameMax /= 2;
1799	    goto mem_error;
1800        }
1801	ctxt->pushTab = tmp2;
1802    }
1803    ctxt->nameTab[ctxt->nameNr] = value;
1804    ctxt->name = value;
1805    ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1806    ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1807    ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1808    return (ctxt->nameNr++);
1809mem_error:
1810    xmlErrMemory(ctxt, NULL);
1811    return (-1);
1812}
1813/**
1814 * nameNsPop:
1815 * @ctxt: an XML parser context
1816 *
1817 * Pops the top element/prefix/URI name from the name stack
1818 *
1819 * Returns the name just removed
1820 */
1821static const xmlChar *
1822nameNsPop(xmlParserCtxtPtr ctxt)
1823{
1824    const xmlChar *ret;
1825
1826    if (ctxt->nameNr <= 0)
1827        return (NULL);
1828    ctxt->nameNr--;
1829    if (ctxt->nameNr > 0)
1830        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1831    else
1832        ctxt->name = NULL;
1833    ret = ctxt->nameTab[ctxt->nameNr];
1834    ctxt->nameTab[ctxt->nameNr] = NULL;
1835    return (ret);
1836}
1837#endif /* LIBXML_PUSH_ENABLED */
1838
1839/**
1840 * namePush:
1841 * @ctxt:  an XML parser context
1842 * @value:  the element name
1843 *
1844 * Pushes a new element name on top of the name stack
1845 *
1846 * Returns -1 in case of error, the index in the stack otherwise
1847 */
1848int
1849namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1850{
1851    if (ctxt == NULL) return (-1);
1852
1853    if (ctxt->nameNr >= ctxt->nameMax) {
1854        const xmlChar * *tmp;
1855        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1856                                    ctxt->nameMax * 2 *
1857                                    sizeof(ctxt->nameTab[0]));
1858        if (tmp == NULL) {
1859	    goto mem_error;
1860        }
1861	ctxt->nameTab = tmp;
1862        ctxt->nameMax *= 2;
1863    }
1864    ctxt->nameTab[ctxt->nameNr] = value;
1865    ctxt->name = value;
1866    return (ctxt->nameNr++);
1867mem_error:
1868    xmlErrMemory(ctxt, NULL);
1869    return (-1);
1870}
1871/**
1872 * namePop:
1873 * @ctxt: an XML parser context
1874 *
1875 * Pops the top element name from the name stack
1876 *
1877 * Returns the name just removed
1878 */
1879const xmlChar *
1880namePop(xmlParserCtxtPtr ctxt)
1881{
1882    const xmlChar *ret;
1883
1884    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1885        return (NULL);
1886    ctxt->nameNr--;
1887    if (ctxt->nameNr > 0)
1888        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1889    else
1890        ctxt->name = NULL;
1891    ret = ctxt->nameTab[ctxt->nameNr];
1892    ctxt->nameTab[ctxt->nameNr] = NULL;
1893    return (ret);
1894}
1895
1896static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1897    if (ctxt->spaceNr >= ctxt->spaceMax) {
1898        int *tmp;
1899
1900	ctxt->spaceMax *= 2;
1901        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1902	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1903        if (tmp == NULL) {
1904	    xmlErrMemory(ctxt, NULL);
1905	    ctxt->spaceMax /=2;
1906	    return(-1);
1907	}
1908	ctxt->spaceTab = tmp;
1909    }
1910    ctxt->spaceTab[ctxt->spaceNr] = val;
1911    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1912    return(ctxt->spaceNr++);
1913}
1914
1915static int spacePop(xmlParserCtxtPtr ctxt) {
1916    int ret;
1917    if (ctxt->spaceNr <= 0) return(0);
1918    ctxt->spaceNr--;
1919    if (ctxt->spaceNr > 0)
1920	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1921    else
1922        ctxt->space = &ctxt->spaceTab[0];
1923    ret = ctxt->spaceTab[ctxt->spaceNr];
1924    ctxt->spaceTab[ctxt->spaceNr] = -1;
1925    return(ret);
1926}
1927
1928/*
1929 * Macros for accessing the content. Those should be used only by the parser,
1930 * and not exported.
1931 *
1932 * Dirty macros, i.e. one often need to make assumption on the context to
1933 * use them
1934 *
1935 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1936 *           To be used with extreme caution since operations consuming
1937 *           characters may move the input buffer to a different location !
1938 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1939 *           This should be used internally by the parser
1940 *           only to compare to ASCII values otherwise it would break when
1941 *           running with UTF-8 encoding.
1942 *   RAW     same as CUR but in the input buffer, bypass any token
1943 *           extraction that may have been done
1944 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1945 *           to compare on ASCII based substring.
1946 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1947 *           strings without newlines within the parser.
1948 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1949 *           defined char within the parser.
1950 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1951 *
1952 *   NEXT    Skip to the next character, this does the proper decoding
1953 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
1954 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
1955 *   CUR_CHAR(l) returns the current unicode character (int), set l
1956 *           to the number of xmlChars used for the encoding [0-5].
1957 *   CUR_SCHAR  same but operate on a string instead of the context
1958 *   COPY_BUF  copy the current unicode char to the target buffer, increment
1959 *            the index
1960 *   GROW, SHRINK  handling of input buffers
1961 */
1962
1963#define RAW (*ctxt->input->cur)
1964#define CUR (*ctxt->input->cur)
1965#define NXT(val) ctxt->input->cur[(val)]
1966#define CUR_PTR ctxt->input->cur
1967
1968#define CMP4( s, c1, c2, c3, c4 ) \
1969  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1970    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1971#define CMP5( s, c1, c2, c3, c4, c5 ) \
1972  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1973#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1974  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1975#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1976  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1977#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1978  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1979#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1980  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1981    ((unsigned char *) s)[ 8 ] == c9 )
1982#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1983  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1984    ((unsigned char *) s)[ 9 ] == c10 )
1985
1986#define SKIP(val) do {							\
1987    ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val);			\
1988    if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
1989    if ((*ctxt->input->cur == 0) &&					\
1990        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
1991	    xmlPopInput(ctxt);						\
1992  } while (0)
1993
1994#define SKIPL(val) do {							\
1995    int skipl;								\
1996    for(skipl=0; skipl<val; skipl++) {					\
1997	if (*(ctxt->input->cur) == '\n') {				\
1998	ctxt->input->line++; ctxt->input->col = 1;			\
1999	} else ctxt->input->col++;					\
2000	ctxt->nbChars++;						\
2001	ctxt->input->cur++;						\
2002    }									\
2003    if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
2004    if ((*ctxt->input->cur == 0) &&					\
2005        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
2006	    xmlPopInput(ctxt);						\
2007  } while (0)
2008
2009#define SHRINK if ((ctxt->progressive == 0) &&				\
2010		   (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2011		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2012	xmlSHRINK (ctxt);
2013
2014static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2015    xmlParserInputShrink(ctxt->input);
2016    if ((*ctxt->input->cur == 0) &&
2017        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2018	    xmlPopInput(ctxt);
2019  }
2020
2021#define GROW if ((ctxt->progressive == 0) &&				\
2022		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
2023	xmlGROW (ctxt);
2024
2025static void xmlGROW (xmlParserCtxtPtr ctxt) {
2026    if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
2027         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
2028        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2029        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2030        ctxt->instate = XML_PARSER_EOF;
2031    }
2032    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2033    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2034        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2035	    xmlPopInput(ctxt);
2036}
2037
2038#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2039
2040#define NEXT xmlNextChar(ctxt)
2041
2042#define NEXT1 {								\
2043	ctxt->input->col++;						\
2044	ctxt->input->cur++;						\
2045	ctxt->nbChars++;						\
2046	if (*ctxt->input->cur == 0)					\
2047	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
2048    }
2049
2050#define NEXTL(l) do {							\
2051    if (*(ctxt->input->cur) == '\n') {					\
2052	ctxt->input->line++; ctxt->input->col = 1;			\
2053    } else ctxt->input->col++;						\
2054    ctxt->input->cur += l;				\
2055    if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
2056  } while (0)
2057
2058#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2059#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2060
2061#define COPY_BUF(l,b,i,v)						\
2062    if (l == 1) b[i++] = (xmlChar) v;					\
2063    else i += xmlCopyCharMultiByte(&b[i],v)
2064
2065/**
2066 * xmlSkipBlankChars:
2067 * @ctxt:  the XML parser context
2068 *
2069 * skip all blanks character found at that point in the input streams.
2070 * It pops up finished entities in the process if allowable at that point.
2071 *
2072 * Returns the number of space chars skipped
2073 */
2074
2075int
2076xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2077    int res = 0;
2078
2079    /*
2080     * It's Okay to use CUR/NEXT here since all the blanks are on
2081     * the ASCII range.
2082     */
2083    if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2084	const xmlChar *cur;
2085	/*
2086	 * if we are in the document content, go really fast
2087	 */
2088	cur = ctxt->input->cur;
2089	while (IS_BLANK_CH(*cur)) {
2090	    if (*cur == '\n') {
2091		ctxt->input->line++; ctxt->input->col = 1;
2092	    }
2093	    cur++;
2094	    res++;
2095	    if (*cur == 0) {
2096		ctxt->input->cur = cur;
2097		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2098		cur = ctxt->input->cur;
2099	    }
2100	}
2101	ctxt->input->cur = cur;
2102    } else {
2103	int cur;
2104	do {
2105	    cur = CUR;
2106	    while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2107		NEXT;
2108		cur = CUR;
2109		res++;
2110	    }
2111	    while ((cur == 0) && (ctxt->inputNr > 1) &&
2112		   (ctxt->instate != XML_PARSER_COMMENT)) {
2113		xmlPopInput(ctxt);
2114		cur = CUR;
2115	    }
2116	    /*
2117	     * Need to handle support of entities branching here
2118	     */
2119	    if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2120	} while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2121    }
2122    return(res);
2123}
2124
2125/************************************************************************
2126 *									*
2127 *		Commodity functions to handle entities			*
2128 *									*
2129 ************************************************************************/
2130
2131/**
2132 * xmlPopInput:
2133 * @ctxt:  an XML parser context
2134 *
2135 * xmlPopInput: the current input pointed by ctxt->input came to an end
2136 *          pop it and return the next char.
2137 *
2138 * Returns the current xmlChar in the parser context
2139 */
2140xmlChar
2141xmlPopInput(xmlParserCtxtPtr ctxt) {
2142    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2143    if (xmlParserDebugEntities)
2144	xmlGenericError(xmlGenericErrorContext,
2145		"Popping input %d\n", ctxt->inputNr);
2146    xmlFreeInputStream(inputPop(ctxt));
2147    if ((*ctxt->input->cur == 0) &&
2148        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2149	    return(xmlPopInput(ctxt));
2150    return(CUR);
2151}
2152
2153/**
2154 * xmlPushInput:
2155 * @ctxt:  an XML parser context
2156 * @input:  an XML parser input fragment (entity, XML fragment ...).
2157 *
2158 * xmlPushInput: switch to a new input stream which is stacked on top
2159 *               of the previous one(s).
2160 * Returns -1 in case of error or the index in the input stack
2161 */
2162int
2163xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2164    int ret;
2165    if (input == NULL) return(-1);
2166
2167    if (xmlParserDebugEntities) {
2168	if ((ctxt->input != NULL) && (ctxt->input->filename))
2169	    xmlGenericError(xmlGenericErrorContext,
2170		    "%s(%d): ", ctxt->input->filename,
2171		    ctxt->input->line);
2172	xmlGenericError(xmlGenericErrorContext,
2173		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2174    }
2175    ret = inputPush(ctxt, input);
2176    if (ctxt->instate == XML_PARSER_EOF)
2177        return(-1);
2178    GROW;
2179    return(ret);
2180}
2181
2182/**
2183 * xmlParseCharRef:
2184 * @ctxt:  an XML parser context
2185 *
2186 * parse Reference declarations
2187 *
2188 * [66] CharRef ::= '&#' [0-9]+ ';' |
2189 *                  '&#x' [0-9a-fA-F]+ ';'
2190 *
2191 * [ WFC: Legal Character ]
2192 * Characters referred to using character references must match the
2193 * production for Char.
2194 *
2195 * Returns the value parsed (as an int), 0 in case of error
2196 */
2197int
2198xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2199    unsigned int val = 0;
2200    int count = 0;
2201    unsigned int outofrange = 0;
2202
2203    /*
2204     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2205     */
2206    if ((RAW == '&') && (NXT(1) == '#') &&
2207        (NXT(2) == 'x')) {
2208	SKIP(3);
2209	GROW;
2210	while (RAW != ';') { /* loop blocked by count */
2211	    if (count++ > 20) {
2212		count = 0;
2213		GROW;
2214                if (ctxt->instate == XML_PARSER_EOF)
2215                    return(0);
2216	    }
2217	    if ((RAW >= '0') && (RAW <= '9'))
2218	        val = val * 16 + (CUR - '0');
2219	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2220	        val = val * 16 + (CUR - 'a') + 10;
2221	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2222	        val = val * 16 + (CUR - 'A') + 10;
2223	    else {
2224		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2225		val = 0;
2226		break;
2227	    }
2228	    if (val > 0x10FFFF)
2229	        outofrange = val;
2230
2231	    NEXT;
2232	    count++;
2233	}
2234	if (RAW == ';') {
2235	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2236	    ctxt->input->col++;
2237	    ctxt->nbChars ++;
2238	    ctxt->input->cur++;
2239	}
2240    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2241	SKIP(2);
2242	GROW;
2243	while (RAW != ';') { /* loop blocked by count */
2244	    if (count++ > 20) {
2245		count = 0;
2246		GROW;
2247                if (ctxt->instate == XML_PARSER_EOF)
2248                    return(0);
2249	    }
2250	    if ((RAW >= '0') && (RAW <= '9'))
2251	        val = val * 10 + (CUR - '0');
2252	    else {
2253		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2254		val = 0;
2255		break;
2256	    }
2257	    if (val > 0x10FFFF)
2258	        outofrange = val;
2259
2260	    NEXT;
2261	    count++;
2262	}
2263	if (RAW == ';') {
2264	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2265	    ctxt->input->col++;
2266	    ctxt->nbChars ++;
2267	    ctxt->input->cur++;
2268	}
2269    } else {
2270        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2271    }
2272
2273    /*
2274     * [ WFC: Legal Character ]
2275     * Characters referred to using character references must match the
2276     * production for Char.
2277     */
2278    if ((IS_CHAR(val) && (outofrange == 0))) {
2279        return(val);
2280    } else {
2281        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2282                          "xmlParseCharRef: invalid xmlChar value %d\n",
2283	                  val);
2284    }
2285    return(0);
2286}
2287
2288/**
2289 * xmlParseStringCharRef:
2290 * @ctxt:  an XML parser context
2291 * @str:  a pointer to an index in the string
2292 *
2293 * parse Reference declarations, variant parsing from a string rather
2294 * than an an input flow.
2295 *
2296 * [66] CharRef ::= '&#' [0-9]+ ';' |
2297 *                  '&#x' [0-9a-fA-F]+ ';'
2298 *
2299 * [ WFC: Legal Character ]
2300 * Characters referred to using character references must match the
2301 * production for Char.
2302 *
2303 * Returns the value parsed (as an int), 0 in case of error, str will be
2304 *         updated to the current value of the index
2305 */
2306static int
2307xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2308    const xmlChar *ptr;
2309    xmlChar cur;
2310    unsigned int val = 0;
2311    unsigned int outofrange = 0;
2312
2313    if ((str == NULL) || (*str == NULL)) return(0);
2314    ptr = *str;
2315    cur = *ptr;
2316    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2317	ptr += 3;
2318	cur = *ptr;
2319	while (cur != ';') { /* Non input consuming loop */
2320	    if ((cur >= '0') && (cur <= '9'))
2321	        val = val * 16 + (cur - '0');
2322	    else if ((cur >= 'a') && (cur <= 'f'))
2323	        val = val * 16 + (cur - 'a') + 10;
2324	    else if ((cur >= 'A') && (cur <= 'F'))
2325	        val = val * 16 + (cur - 'A') + 10;
2326	    else {
2327		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2328		val = 0;
2329		break;
2330	    }
2331	    if (val > 0x10FFFF)
2332	        outofrange = val;
2333
2334	    ptr++;
2335	    cur = *ptr;
2336	}
2337	if (cur == ';')
2338	    ptr++;
2339    } else if  ((cur == '&') && (ptr[1] == '#')){
2340	ptr += 2;
2341	cur = *ptr;
2342	while (cur != ';') { /* Non input consuming loops */
2343	    if ((cur >= '0') && (cur <= '9'))
2344	        val = val * 10 + (cur - '0');
2345	    else {
2346		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2347		val = 0;
2348		break;
2349	    }
2350	    if (val > 0x10FFFF)
2351	        outofrange = val;
2352
2353	    ptr++;
2354	    cur = *ptr;
2355	}
2356	if (cur == ';')
2357	    ptr++;
2358    } else {
2359	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2360	return(0);
2361    }
2362    *str = ptr;
2363
2364    /*
2365     * [ WFC: Legal Character ]
2366     * Characters referred to using character references must match the
2367     * production for Char.
2368     */
2369    if ((IS_CHAR(val) && (outofrange == 0))) {
2370        return(val);
2371    } else {
2372        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2373			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2374			  val);
2375    }
2376    return(0);
2377}
2378
2379/**
2380 * xmlNewBlanksWrapperInputStream:
2381 * @ctxt:  an XML parser context
2382 * @entity:  an Entity pointer
2383 *
2384 * Create a new input stream for wrapping
2385 * blanks around a PEReference
2386 *
2387 * Returns the new input stream or NULL
2388 */
2389
2390static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2391
2392static xmlParserInputPtr
2393xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2394    xmlParserInputPtr input;
2395    xmlChar *buffer;
2396    size_t length;
2397    if (entity == NULL) {
2398	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2399	            "xmlNewBlanksWrapperInputStream entity\n");
2400	return(NULL);
2401    }
2402    if (xmlParserDebugEntities)
2403	xmlGenericError(xmlGenericErrorContext,
2404		"new blanks wrapper for entity: %s\n", entity->name);
2405    input = xmlNewInputStream(ctxt);
2406    if (input == NULL) {
2407	return(NULL);
2408    }
2409    length = xmlStrlen(entity->name) + 5;
2410    buffer = xmlMallocAtomic(length);
2411    if (buffer == NULL) {
2412	xmlErrMemory(ctxt, NULL);
2413        xmlFree(input);
2414	return(NULL);
2415    }
2416    buffer [0] = ' ';
2417    buffer [1] = '%';
2418    buffer [length-3] = ';';
2419    buffer [length-2] = ' ';
2420    buffer [length-1] = 0;
2421    memcpy(buffer + 2, entity->name, length - 5);
2422    input->free = deallocblankswrapper;
2423    input->base = buffer;
2424    input->cur = buffer;
2425    input->length = length;
2426    input->end = &buffer[length];
2427    return(input);
2428}
2429
2430/**
2431 * xmlParserHandlePEReference:
2432 * @ctxt:  the parser context
2433 *
2434 * [69] PEReference ::= '%' Name ';'
2435 *
2436 * [ WFC: No Recursion ]
2437 * A parsed entity must not contain a recursive
2438 * reference to itself, either directly or indirectly.
2439 *
2440 * [ WFC: Entity Declared ]
2441 * In a document without any DTD, a document with only an internal DTD
2442 * subset which contains no parameter entity references, or a document
2443 * with "standalone='yes'", ...  ... The declaration of a parameter
2444 * entity must precede any reference to it...
2445 *
2446 * [ VC: Entity Declared ]
2447 * In a document with an external subset or external parameter entities
2448 * with "standalone='no'", ...  ... The declaration of a parameter entity
2449 * must precede any reference to it...
2450 *
2451 * [ WFC: In DTD ]
2452 * Parameter-entity references may only appear in the DTD.
2453 * NOTE: misleading but this is handled.
2454 *
2455 * A PEReference may have been detected in the current input stream
2456 * the handling is done accordingly to
2457 *      http://www.w3.org/TR/REC-xml#entproc
2458 * i.e.
2459 *   - Included in literal in entity values
2460 *   - Included as Parameter Entity reference within DTDs
2461 */
2462void
2463xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2464    const xmlChar *name;
2465    xmlEntityPtr entity = NULL;
2466    xmlParserInputPtr input;
2467
2468    if (RAW != '%') return;
2469    switch(ctxt->instate) {
2470	case XML_PARSER_CDATA_SECTION:
2471	    return;
2472        case XML_PARSER_COMMENT:
2473	    return;
2474	case XML_PARSER_START_TAG:
2475	    return;
2476	case XML_PARSER_END_TAG:
2477	    return;
2478        case XML_PARSER_EOF:
2479	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2480	    return;
2481        case XML_PARSER_PROLOG:
2482	case XML_PARSER_START:
2483	case XML_PARSER_MISC:
2484	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2485	    return;
2486	case XML_PARSER_ENTITY_DECL:
2487        case XML_PARSER_CONTENT:
2488        case XML_PARSER_ATTRIBUTE_VALUE:
2489        case XML_PARSER_PI:
2490	case XML_PARSER_SYSTEM_LITERAL:
2491	case XML_PARSER_PUBLIC_LITERAL:
2492	    /* we just ignore it there */
2493	    return;
2494        case XML_PARSER_EPILOG:
2495	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2496	    return;
2497	case XML_PARSER_ENTITY_VALUE:
2498	    /*
2499	     * NOTE: in the case of entity values, we don't do the
2500	     *       substitution here since we need the literal
2501	     *       entity value to be able to save the internal
2502	     *       subset of the document.
2503	     *       This will be handled by xmlStringDecodeEntities
2504	     */
2505	    return;
2506        case XML_PARSER_DTD:
2507	    /*
2508	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2509	     * In the internal DTD subset, parameter-entity references
2510	     * can occur only where markup declarations can occur, not
2511	     * within markup declarations.
2512	     * In that case this is handled in xmlParseMarkupDecl
2513	     */
2514	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2515		return;
2516	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2517		return;
2518            break;
2519        case XML_PARSER_IGNORE:
2520            return;
2521    }
2522
2523    NEXT;
2524    name = xmlParseName(ctxt);
2525    if (xmlParserDebugEntities)
2526	xmlGenericError(xmlGenericErrorContext,
2527		"PEReference: %s\n", name);
2528    if (name == NULL) {
2529	xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2530    } else {
2531	if (RAW == ';') {
2532	    NEXT;
2533	    if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2534		entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2535	    if (entity == NULL) {
2536
2537		/*
2538		 * [ WFC: Entity Declared ]
2539		 * In a document without any DTD, a document with only an
2540		 * internal DTD subset which contains no parameter entity
2541		 * references, or a document with "standalone='yes'", ...
2542		 * ... The declaration of a parameter entity must precede
2543		 * any reference to it...
2544		 */
2545		if ((ctxt->standalone == 1) ||
2546		    ((ctxt->hasExternalSubset == 0) &&
2547		     (ctxt->hasPErefs == 0))) {
2548		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2549			 "PEReference: %%%s; not found\n", name);
2550	        } else {
2551		    /*
2552		     * [ VC: Entity Declared ]
2553		     * In a document with an external subset or external
2554		     * parameter entities with "standalone='no'", ...
2555		     * ... The declaration of a parameter entity must precede
2556		     * any reference to it...
2557		     */
2558		    if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2559		        xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2560			                 "PEReference: %%%s; not found\n",
2561				         name, NULL);
2562		    } else
2563		        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2564			              "PEReference: %%%s; not found\n",
2565				      name, NULL);
2566		    ctxt->valid = 0;
2567		}
2568	    } else if (ctxt->input->free != deallocblankswrapper) {
2569		    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2570		    if (xmlPushInput(ctxt, input) < 0)
2571		        return;
2572	    } else {
2573	        if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2574		    (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2575		    xmlChar start[4];
2576		    xmlCharEncoding enc;
2577
2578		    /*
2579		     * handle the extra spaces added before and after
2580		     * c.f. http://www.w3.org/TR/REC-xml#as-PE
2581		     * this is done independently.
2582		     */
2583		    input = xmlNewEntityInputStream(ctxt, entity);
2584		    if (xmlPushInput(ctxt, input) < 0)
2585		        return;
2586
2587		    /*
2588		     * Get the 4 first bytes and decode the charset
2589		     * if enc != XML_CHAR_ENCODING_NONE
2590		     * plug some encoding conversion routines.
2591		     * Note that, since we may have some non-UTF8
2592		     * encoding (like UTF16, bug 135229), the 'length'
2593		     * is not known, but we can calculate based upon
2594		     * the amount of data in the buffer.
2595		     */
2596		    GROW
2597                    if (ctxt->instate == XML_PARSER_EOF)
2598                        return;
2599		    if ((ctxt->input->end - ctxt->input->cur)>=4) {
2600			start[0] = RAW;
2601			start[1] = NXT(1);
2602			start[2] = NXT(2);
2603			start[3] = NXT(3);
2604			enc = xmlDetectCharEncoding(start, 4);
2605			if (enc != XML_CHAR_ENCODING_NONE) {
2606			    xmlSwitchEncoding(ctxt, enc);
2607			}
2608		    }
2609
2610		    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2611			(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2612			(IS_BLANK_CH(NXT(5)))) {
2613			xmlParseTextDecl(ctxt);
2614		    }
2615		} else {
2616		    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2617			     "PEReference: %s is not a parameter entity\n",
2618				      name);
2619		}
2620	    }
2621	} else {
2622	    xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2623	}
2624    }
2625}
2626
2627/*
2628 * Macro used to grow the current buffer.
2629 * buffer##_size is expected to be a size_t
2630 * mem_error: is expected to handle memory allocation failures
2631 */
2632#define growBuffer(buffer, n) {						\
2633    xmlChar *tmp;							\
2634    size_t new_size = buffer##_size * 2 + n;                            \
2635    if (new_size < buffer##_size) goto mem_error;                       \
2636    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2637    if (tmp == NULL) goto mem_error;					\
2638    buffer = tmp;							\
2639    buffer##_size = new_size;                                           \
2640}
2641
2642/**
2643 * xmlStringLenDecodeEntities:
2644 * @ctxt:  the parser context
2645 * @str:  the input string
2646 * @len: the string length
2647 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2648 * @end:  an end marker xmlChar, 0 if none
2649 * @end2:  an end marker xmlChar, 0 if none
2650 * @end3:  an end marker xmlChar, 0 if none
2651 *
2652 * Takes a entity string content and process to do the adequate substitutions.
2653 *
2654 * [67] Reference ::= EntityRef | CharRef
2655 *
2656 * [69] PEReference ::= '%' Name ';'
2657 *
2658 * Returns A newly allocated string with the substitution done. The caller
2659 *      must deallocate it !
2660 */
2661xmlChar *
2662xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2663		      int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2664    xmlChar *buffer = NULL;
2665    size_t buffer_size = 0;
2666    size_t nbchars = 0;
2667
2668    xmlChar *current = NULL;
2669    xmlChar *rep = NULL;
2670    const xmlChar *last;
2671    xmlEntityPtr ent;
2672    int c,l;
2673
2674    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2675	return(NULL);
2676    last = str + len;
2677
2678    if (((ctxt->depth > 40) &&
2679         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2680	(ctxt->depth > 1024)) {
2681	xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2682	return(NULL);
2683    }
2684
2685    /*
2686     * allocate a translation buffer.
2687     */
2688    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2689    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2690    if (buffer == NULL) goto mem_error;
2691
2692    /*
2693     * OK loop until we reach one of the ending char or a size limit.
2694     * we are operating on already parsed values.
2695     */
2696    if (str < last)
2697	c = CUR_SCHAR(str, l);
2698    else
2699        c = 0;
2700    while ((c != 0) && (c != end) && /* non input consuming loop */
2701	   (c != end2) && (c != end3)) {
2702
2703	if (c == 0) break;
2704        if ((c == '&') && (str[1] == '#')) {
2705	    int val = xmlParseStringCharRef(ctxt, &str);
2706	    if (val != 0) {
2707		COPY_BUF(0,buffer,nbchars,val);
2708	    }
2709	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2710	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2711	    }
2712	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2713	    if (xmlParserDebugEntities)
2714		xmlGenericError(xmlGenericErrorContext,
2715			"String decoding Entity Reference: %.30s\n",
2716			str);
2717	    ent = xmlParseStringEntityRef(ctxt, &str);
2718	    if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2719	        (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2720	        goto int_error;
2721	    if (ent != NULL)
2722	        ctxt->nbentities += ent->checked;
2723	    if ((ent != NULL) &&
2724		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2725		if (ent->content != NULL) {
2726		    COPY_BUF(0,buffer,nbchars,ent->content[0]);
2727		    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2728			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2729		    }
2730		} else {
2731		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2732			    "predefined entity has no content\n");
2733		}
2734	    } else if ((ent != NULL) && (ent->content != NULL)) {
2735		ctxt->depth++;
2736		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2737			                      0, 0, 0);
2738		ctxt->depth--;
2739
2740		if (rep != NULL) {
2741		    current = rep;
2742		    while (*current != 0) { /* non input consuming loop */
2743			buffer[nbchars++] = *current++;
2744			if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2745			    if (xmlParserEntityCheck(ctxt, nbchars, ent))
2746				goto int_error;
2747			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2748			}
2749		    }
2750		    xmlFree(rep);
2751		    rep = NULL;
2752		}
2753	    } else if (ent != NULL) {
2754		int i = xmlStrlen(ent->name);
2755		const xmlChar *cur = ent->name;
2756
2757		buffer[nbchars++] = '&';
2758		if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2759		    growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2760		}
2761		for (;i > 0;i--)
2762		    buffer[nbchars++] = *cur++;
2763		buffer[nbchars++] = ';';
2764	    }
2765	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2766	    if (xmlParserDebugEntities)
2767		xmlGenericError(xmlGenericErrorContext,
2768			"String decoding PE Reference: %.30s\n", str);
2769	    ent = xmlParseStringPEReference(ctxt, &str);
2770	    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2771	        goto int_error;
2772	    if (ent != NULL)
2773	        ctxt->nbentities += ent->checked;
2774	    if (ent != NULL) {
2775                if (ent->content == NULL) {
2776		    xmlLoadEntityContent(ctxt, ent);
2777		}
2778		ctxt->depth++;
2779		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2780			                      0, 0, 0);
2781		ctxt->depth--;
2782		if (rep != NULL) {
2783		    current = rep;
2784		    while (*current != 0) { /* non input consuming loop */
2785			buffer[nbchars++] = *current++;
2786			if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2787			    if (xmlParserEntityCheck(ctxt, nbchars, ent))
2788			        goto int_error;
2789			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2790			}
2791		    }
2792		    xmlFree(rep);
2793		    rep = NULL;
2794		}
2795	    }
2796	} else {
2797	    COPY_BUF(l,buffer,nbchars,c);
2798	    str += l;
2799	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2800	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2801	    }
2802	}
2803	if (str < last)
2804	    c = CUR_SCHAR(str, l);
2805	else
2806	    c = 0;
2807    }
2808    buffer[nbchars] = 0;
2809    return(buffer);
2810
2811mem_error:
2812    xmlErrMemory(ctxt, NULL);
2813int_error:
2814    if (rep != NULL)
2815        xmlFree(rep);
2816    if (buffer != NULL)
2817        xmlFree(buffer);
2818    return(NULL);
2819}
2820
2821/**
2822 * xmlStringDecodeEntities:
2823 * @ctxt:  the parser context
2824 * @str:  the input string
2825 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2826 * @end:  an end marker xmlChar, 0 if none
2827 * @end2:  an end marker xmlChar, 0 if none
2828 * @end3:  an end marker xmlChar, 0 if none
2829 *
2830 * Takes a entity string content and process to do the adequate substitutions.
2831 *
2832 * [67] Reference ::= EntityRef | CharRef
2833 *
2834 * [69] PEReference ::= '%' Name ';'
2835 *
2836 * Returns A newly allocated string with the substitution done. The caller
2837 *      must deallocate it !
2838 */
2839xmlChar *
2840xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2841		        xmlChar end, xmlChar  end2, xmlChar end3) {
2842    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2843    return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2844           end, end2, end3));
2845}
2846
2847/************************************************************************
2848 *									*
2849 *		Commodity functions, cleanup needed ?			*
2850 *									*
2851 ************************************************************************/
2852
2853/**
2854 * areBlanks:
2855 * @ctxt:  an XML parser context
2856 * @str:  a xmlChar *
2857 * @len:  the size of @str
2858 * @blank_chars: we know the chars are blanks
2859 *
2860 * Is this a sequence of blank chars that one can ignore ?
2861 *
2862 * Returns 1 if ignorable 0 otherwise.
2863 */
2864
2865static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2866                     int blank_chars) {
2867    int i, ret;
2868    xmlNodePtr lastChild;
2869
2870    /*
2871     * Don't spend time trying to differentiate them, the same callback is
2872     * used !
2873     */
2874    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2875	return(0);
2876
2877    /*
2878     * Check for xml:space value.
2879     */
2880    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2881        (*(ctxt->space) == -2))
2882	return(0);
2883
2884    /*
2885     * Check that the string is made of blanks
2886     */
2887    if (blank_chars == 0) {
2888	for (i = 0;i < len;i++)
2889	    if (!(IS_BLANK_CH(str[i]))) return(0);
2890    }
2891
2892    /*
2893     * Look if the element is mixed content in the DTD if available
2894     */
2895    if (ctxt->node == NULL) return(0);
2896    if (ctxt->myDoc != NULL) {
2897	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2898        if (ret == 0) return(1);
2899        if (ret == 1) return(0);
2900    }
2901
2902    /*
2903     * Otherwise, heuristic :-\
2904     */
2905    if ((RAW != '<') && (RAW != 0xD)) return(0);
2906    if ((ctxt->node->children == NULL) &&
2907	(RAW == '<') && (NXT(1) == '/')) return(0);
2908
2909    lastChild = xmlGetLastChild(ctxt->node);
2910    if (lastChild == NULL) {
2911        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2912            (ctxt->node->content != NULL)) return(0);
2913    } else if (xmlNodeIsText(lastChild))
2914        return(0);
2915    else if ((ctxt->node->children != NULL) &&
2916             (xmlNodeIsText(ctxt->node->children)))
2917        return(0);
2918    return(1);
2919}
2920
2921/************************************************************************
2922 *									*
2923 *		Extra stuff for namespace support			*
2924 *	Relates to http://www.w3.org/TR/WD-xml-names			*
2925 *									*
2926 ************************************************************************/
2927
2928/**
2929 * xmlSplitQName:
2930 * @ctxt:  an XML parser context
2931 * @name:  an XML parser context
2932 * @prefix:  a xmlChar **
2933 *
2934 * parse an UTF8 encoded XML qualified name string
2935 *
2936 * [NS 5] QName ::= (Prefix ':')? LocalPart
2937 *
2938 * [NS 6] Prefix ::= NCName
2939 *
2940 * [NS 7] LocalPart ::= NCName
2941 *
2942 * Returns the local part, and prefix is updated
2943 *   to get the Prefix if any.
2944 */
2945
2946xmlChar *
2947xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2948    xmlChar buf[XML_MAX_NAMELEN + 5];
2949    xmlChar *buffer = NULL;
2950    int len = 0;
2951    int max = XML_MAX_NAMELEN;
2952    xmlChar *ret = NULL;
2953    const xmlChar *cur = name;
2954    int c;
2955
2956    if (prefix == NULL) return(NULL);
2957    *prefix = NULL;
2958
2959    if (cur == NULL) return(NULL);
2960
2961#ifndef XML_XML_NAMESPACE
2962    /* xml: prefix is not really a namespace */
2963    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2964        (cur[2] == 'l') && (cur[3] == ':'))
2965	return(xmlStrdup(name));
2966#endif
2967
2968    /* nasty but well=formed */
2969    if (cur[0] == ':')
2970	return(xmlStrdup(name));
2971
2972    c = *cur++;
2973    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2974	buf[len++] = c;
2975	c = *cur++;
2976    }
2977    if (len >= max) {
2978	/*
2979	 * Okay someone managed to make a huge name, so he's ready to pay
2980	 * for the processing speed.
2981	 */
2982	max = len * 2;
2983
2984	buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2985	if (buffer == NULL) {
2986	    xmlErrMemory(ctxt, NULL);
2987	    return(NULL);
2988	}
2989	memcpy(buffer, buf, len);
2990	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2991	    if (len + 10 > max) {
2992	        xmlChar *tmp;
2993
2994		max *= 2;
2995		tmp = (xmlChar *) xmlRealloc(buffer,
2996						max * sizeof(xmlChar));
2997		if (tmp == NULL) {
2998		    xmlFree(buffer);
2999		    xmlErrMemory(ctxt, NULL);
3000		    return(NULL);
3001		}
3002		buffer = tmp;
3003	    }
3004	    buffer[len++] = c;
3005	    c = *cur++;
3006	}
3007	buffer[len] = 0;
3008    }
3009
3010    if ((c == ':') && (*cur == 0)) {
3011        if (buffer != NULL)
3012	    xmlFree(buffer);
3013	*prefix = NULL;
3014	return(xmlStrdup(name));
3015    }
3016
3017    if (buffer == NULL)
3018	ret = xmlStrndup(buf, len);
3019    else {
3020	ret = buffer;
3021	buffer = NULL;
3022	max = XML_MAX_NAMELEN;
3023    }
3024
3025
3026    if (c == ':') {
3027	c = *cur;
3028        *prefix = ret;
3029	if (c == 0) {
3030	    return(xmlStrndup(BAD_CAST "", 0));
3031	}
3032	len = 0;
3033
3034	/*
3035	 * Check that the first character is proper to start
3036	 * a new name
3037	 */
3038	if (!(((c >= 0x61) && (c <= 0x7A)) ||
3039	      ((c >= 0x41) && (c <= 0x5A)) ||
3040	      (c == '_') || (c == ':'))) {
3041	    int l;
3042	    int first = CUR_SCHAR(cur, l);
3043
3044	    if (!IS_LETTER(first) && (first != '_')) {
3045		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3046			    "Name %s is not XML Namespace compliant\n",
3047				  name);
3048	    }
3049	}
3050	cur++;
3051
3052	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3053	    buf[len++] = c;
3054	    c = *cur++;
3055	}
3056	if (len >= max) {
3057	    /*
3058	     * Okay someone managed to make a huge name, so he's ready to pay
3059	     * for the processing speed.
3060	     */
3061	    max = len * 2;
3062
3063	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3064	    if (buffer == NULL) {
3065	        xmlErrMemory(ctxt, NULL);
3066		return(NULL);
3067	    }
3068	    memcpy(buffer, buf, len);
3069	    while (c != 0) { /* tested bigname2.xml */
3070		if (len + 10 > max) {
3071		    xmlChar *tmp;
3072
3073		    max *= 2;
3074		    tmp = (xmlChar *) xmlRealloc(buffer,
3075						    max * sizeof(xmlChar));
3076		    if (tmp == NULL) {
3077			xmlErrMemory(ctxt, NULL);
3078			xmlFree(buffer);
3079			return(NULL);
3080		    }
3081		    buffer = tmp;
3082		}
3083		buffer[len++] = c;
3084		c = *cur++;
3085	    }
3086	    buffer[len] = 0;
3087	}
3088
3089	if (buffer == NULL)
3090	    ret = xmlStrndup(buf, len);
3091	else {
3092	    ret = buffer;
3093	}
3094    }
3095
3096    return(ret);
3097}
3098
3099/************************************************************************
3100 *									*
3101 *			The parser itself				*
3102 *	Relates to http://www.w3.org/TR/REC-xml				*
3103 *									*
3104 ************************************************************************/
3105
3106/************************************************************************
3107 *									*
3108 *	Routines to parse Name, NCName and NmToken			*
3109 *									*
3110 ************************************************************************/
3111#ifdef DEBUG
3112static unsigned long nbParseName = 0;
3113static unsigned long nbParseNmToken = 0;
3114static unsigned long nbParseNCName = 0;
3115static unsigned long nbParseNCNameComplex = 0;
3116static unsigned long nbParseNameComplex = 0;
3117static unsigned long nbParseStringName = 0;
3118#endif
3119
3120/*
3121 * The two following functions are related to the change of accepted
3122 * characters for Name and NmToken in the Revision 5 of XML-1.0
3123 * They correspond to the modified production [4] and the new production [4a]
3124 * changes in that revision. Also note that the macros used for the
3125 * productions Letter, Digit, CombiningChar and Extender are not needed
3126 * anymore.
3127 * We still keep compatibility to pre-revision5 parsing semantic if the
3128 * new XML_PARSE_OLD10 option is given to the parser.
3129 */
3130static int
3131xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3132    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3133        /*
3134	 * Use the new checks of production [4] [4a] amd [5] of the
3135	 * Update 5 of XML-1.0
3136	 */
3137	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3138	    (((c >= 'a') && (c <= 'z')) ||
3139	     ((c >= 'A') && (c <= 'Z')) ||
3140	     (c == '_') || (c == ':') ||
3141	     ((c >= 0xC0) && (c <= 0xD6)) ||
3142	     ((c >= 0xD8) && (c <= 0xF6)) ||
3143	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3144	     ((c >= 0x370) && (c <= 0x37D)) ||
3145	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3146	     ((c >= 0x200C) && (c <= 0x200D)) ||
3147	     ((c >= 0x2070) && (c <= 0x218F)) ||
3148	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3149	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3150	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3151	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3152	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3153	    return(1);
3154    } else {
3155        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3156	    return(1);
3157    }
3158    return(0);
3159}
3160
3161static int
3162xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3163    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3164        /*
3165	 * Use the new checks of production [4] [4a] amd [5] of the
3166	 * Update 5 of XML-1.0
3167	 */
3168	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3169	    (((c >= 'a') && (c <= 'z')) ||
3170	     ((c >= 'A') && (c <= 'Z')) ||
3171	     ((c >= '0') && (c <= '9')) || /* !start */
3172	     (c == '_') || (c == ':') ||
3173	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3174	     ((c >= 0xC0) && (c <= 0xD6)) ||
3175	     ((c >= 0xD8) && (c <= 0xF6)) ||
3176	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3177	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3178	     ((c >= 0x370) && (c <= 0x37D)) ||
3179	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3180	     ((c >= 0x200C) && (c <= 0x200D)) ||
3181	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3182	     ((c >= 0x2070) && (c <= 0x218F)) ||
3183	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3184	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3185	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3186	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3187	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3188	     return(1);
3189    } else {
3190        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3191            (c == '.') || (c == '-') ||
3192	    (c == '_') || (c == ':') ||
3193	    (IS_COMBINING(c)) ||
3194	    (IS_EXTENDER(c)))
3195	    return(1);
3196    }
3197    return(0);
3198}
3199
3200static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3201                                          int *len, int *alloc, int normalize);
3202
3203static const xmlChar *
3204xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3205    int len = 0, l;
3206    int c;
3207    int count = 0;
3208
3209#ifdef DEBUG
3210    nbParseNameComplex++;
3211#endif
3212
3213    /*
3214     * Handler for more complex cases
3215     */
3216    GROW;
3217    if (ctxt->instate == XML_PARSER_EOF)
3218        return(NULL);
3219    c = CUR_CHAR(l);
3220    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3221        /*
3222	 * Use the new checks of production [4] [4a] amd [5] of the
3223	 * Update 5 of XML-1.0
3224	 */
3225	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3226	    (!(((c >= 'a') && (c <= 'z')) ||
3227	       ((c >= 'A') && (c <= 'Z')) ||
3228	       (c == '_') || (c == ':') ||
3229	       ((c >= 0xC0) && (c <= 0xD6)) ||
3230	       ((c >= 0xD8) && (c <= 0xF6)) ||
3231	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3232	       ((c >= 0x370) && (c <= 0x37D)) ||
3233	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3234	       ((c >= 0x200C) && (c <= 0x200D)) ||
3235	       ((c >= 0x2070) && (c <= 0x218F)) ||
3236	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3237	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3238	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3239	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3240	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3241	    return(NULL);
3242	}
3243	len += l;
3244	NEXTL(l);
3245	c = CUR_CHAR(l);
3246	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3247	       (((c >= 'a') && (c <= 'z')) ||
3248	        ((c >= 'A') && (c <= 'Z')) ||
3249	        ((c >= '0') && (c <= '9')) || /* !start */
3250	        (c == '_') || (c == ':') ||
3251	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3252	        ((c >= 0xC0) && (c <= 0xD6)) ||
3253	        ((c >= 0xD8) && (c <= 0xF6)) ||
3254	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3255	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3256	        ((c >= 0x370) && (c <= 0x37D)) ||
3257	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258	        ((c >= 0x200C) && (c <= 0x200D)) ||
3259	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3260	        ((c >= 0x2070) && (c <= 0x218F)) ||
3261	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3262	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3263	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3264	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3265	        ((c >= 0x10000) && (c <= 0xEFFFF))
3266		)) {
3267	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3268		count = 0;
3269		GROW;
3270                if (ctxt->instate == XML_PARSER_EOF)
3271                    return(NULL);
3272	    }
3273	    len += l;
3274	    NEXTL(l);
3275	    c = CUR_CHAR(l);
3276	}
3277    } else {
3278	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3279	    (!IS_LETTER(c) && (c != '_') &&
3280	     (c != ':'))) {
3281	    return(NULL);
3282	}
3283	len += l;
3284	NEXTL(l);
3285	c = CUR_CHAR(l);
3286
3287	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3288	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3289		(c == '.') || (c == '-') ||
3290		(c == '_') || (c == ':') ||
3291		(IS_COMBINING(c)) ||
3292		(IS_EXTENDER(c)))) {
3293	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3294		count = 0;
3295		GROW;
3296                if (ctxt->instate == XML_PARSER_EOF)
3297                    return(NULL);
3298	    }
3299	    len += l;
3300	    NEXTL(l);
3301	    c = CUR_CHAR(l);
3302	    if (c == 0) {
3303		count = 0;
3304		GROW;
3305                if (ctxt->instate == XML_PARSER_EOF)
3306                    return(NULL);
3307		c = CUR_CHAR(l);
3308	    }
3309	}
3310    }
3311    if ((len > XML_MAX_NAME_LENGTH) &&
3312        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3313        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3314        return(NULL);
3315    }
3316    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3317        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3318    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3319}
3320
3321/**
3322 * xmlParseName:
3323 * @ctxt:  an XML parser context
3324 *
3325 * parse an XML name.
3326 *
3327 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3328 *                  CombiningChar | Extender
3329 *
3330 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3331 *
3332 * [6] Names ::= Name (#x20 Name)*
3333 *
3334 * Returns the Name parsed or NULL
3335 */
3336
3337const xmlChar *
3338xmlParseName(xmlParserCtxtPtr ctxt) {
3339    const xmlChar *in;
3340    const xmlChar *ret;
3341    int count = 0;
3342
3343    GROW;
3344
3345#ifdef DEBUG
3346    nbParseName++;
3347#endif
3348
3349    /*
3350     * Accelerator for simple ASCII names
3351     */
3352    in = ctxt->input->cur;
3353    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3354	((*in >= 0x41) && (*in <= 0x5A)) ||
3355	(*in == '_') || (*in == ':')) {
3356	in++;
3357	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3358	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3359	       ((*in >= 0x30) && (*in <= 0x39)) ||
3360	       (*in == '_') || (*in == '-') ||
3361	       (*in == ':') || (*in == '.'))
3362	    in++;
3363	if ((*in > 0) && (*in < 0x80)) {
3364	    count = in - ctxt->input->cur;
3365            if ((count > XML_MAX_NAME_LENGTH) &&
3366                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3367                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3368                return(NULL);
3369            }
3370	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3371	    ctxt->input->cur = in;
3372	    ctxt->nbChars += count;
3373	    ctxt->input->col += count;
3374	    if (ret == NULL)
3375	        xmlErrMemory(ctxt, NULL);
3376	    return(ret);
3377	}
3378    }
3379    /* accelerator for special cases */
3380    return(xmlParseNameComplex(ctxt));
3381}
3382
3383static const xmlChar *
3384xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3385    int len = 0, l;
3386    int c;
3387    int count = 0;
3388
3389#ifdef DEBUG
3390    nbParseNCNameComplex++;
3391#endif
3392
3393    /*
3394     * Handler for more complex cases
3395     */
3396    GROW;
3397    c = CUR_CHAR(l);
3398    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3399	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3400	return(NULL);
3401    }
3402
3403    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3404	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3405	if (count++ > XML_PARSER_CHUNK_SIZE) {
3406            if ((len > XML_MAX_NAME_LENGTH) &&
3407                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3408                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3409                return(NULL);
3410            }
3411	    count = 0;
3412	    GROW;
3413            if (ctxt->instate == XML_PARSER_EOF)
3414                return(NULL);
3415	}
3416	len += l;
3417	NEXTL(l);
3418	c = CUR_CHAR(l);
3419	if (c == 0) {
3420	    count = 0;
3421	    GROW;
3422            if (ctxt->instate == XML_PARSER_EOF)
3423                return(NULL);
3424	    c = CUR_CHAR(l);
3425	}
3426    }
3427    if ((len > XML_MAX_NAME_LENGTH) &&
3428        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3429        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3430        return(NULL);
3431    }
3432    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3433}
3434
3435/**
3436 * xmlParseNCName:
3437 * @ctxt:  an XML parser context
3438 * @len:  lenght of the string parsed
3439 *
3440 * parse an XML name.
3441 *
3442 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3443 *                      CombiningChar | Extender
3444 *
3445 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3446 *
3447 * Returns the Name parsed or NULL
3448 */
3449
3450static const xmlChar *
3451xmlParseNCName(xmlParserCtxtPtr ctxt) {
3452    const xmlChar *in;
3453    const xmlChar *ret;
3454    int count = 0;
3455
3456#ifdef DEBUG
3457    nbParseNCName++;
3458#endif
3459
3460    /*
3461     * Accelerator for simple ASCII names
3462     */
3463    in = ctxt->input->cur;
3464    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3465	((*in >= 0x41) && (*in <= 0x5A)) ||
3466	(*in == '_')) {
3467	in++;
3468	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3469	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3470	       ((*in >= 0x30) && (*in <= 0x39)) ||
3471	       (*in == '_') || (*in == '-') ||
3472	       (*in == '.'))
3473	    in++;
3474	if ((*in > 0) && (*in < 0x80)) {
3475	    count = in - ctxt->input->cur;
3476            if ((count > XML_MAX_NAME_LENGTH) &&
3477                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3478                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3479                return(NULL);
3480            }
3481	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3482	    ctxt->input->cur = in;
3483	    ctxt->nbChars += count;
3484	    ctxt->input->col += count;
3485	    if (ret == NULL) {
3486	        xmlErrMemory(ctxt, NULL);
3487	    }
3488	    return(ret);
3489	}
3490    }
3491    return(xmlParseNCNameComplex(ctxt));
3492}
3493
3494/**
3495 * xmlParseNameAndCompare:
3496 * @ctxt:  an XML parser context
3497 *
3498 * parse an XML name and compares for match
3499 * (specialized for endtag parsing)
3500 *
3501 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3502 * and the name for mismatch
3503 */
3504
3505static const xmlChar *
3506xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3507    register const xmlChar *cmp = other;
3508    register const xmlChar *in;
3509    const xmlChar *ret;
3510
3511    GROW;
3512    if (ctxt->instate == XML_PARSER_EOF)
3513        return(NULL);
3514
3515    in = ctxt->input->cur;
3516    while (*in != 0 && *in == *cmp) {
3517	++in;
3518	++cmp;
3519	ctxt->input->col++;
3520    }
3521    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3522	/* success */
3523	ctxt->input->cur = in;
3524	return (const xmlChar*) 1;
3525    }
3526    /* failure (or end of input buffer), check with full function */
3527    ret = xmlParseName (ctxt);
3528    /* strings coming from the dictionnary direct compare possible */
3529    if (ret == other) {
3530	return (const xmlChar*) 1;
3531    }
3532    return ret;
3533}
3534
3535/**
3536 * xmlParseStringName:
3537 * @ctxt:  an XML parser context
3538 * @str:  a pointer to the string pointer (IN/OUT)
3539 *
3540 * parse an XML name.
3541 *
3542 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3543 *                  CombiningChar | Extender
3544 *
3545 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3546 *
3547 * [6] Names ::= Name (#x20 Name)*
3548 *
3549 * Returns the Name parsed or NULL. The @str pointer
3550 * is updated to the current location in the string.
3551 */
3552
3553static xmlChar *
3554xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3555    xmlChar buf[XML_MAX_NAMELEN + 5];
3556    const xmlChar *cur = *str;
3557    int len = 0, l;
3558    int c;
3559
3560#ifdef DEBUG
3561    nbParseStringName++;
3562#endif
3563
3564    c = CUR_SCHAR(cur, l);
3565    if (!xmlIsNameStartChar(ctxt, c)) {
3566	return(NULL);
3567    }
3568
3569    COPY_BUF(l,buf,len,c);
3570    cur += l;
3571    c = CUR_SCHAR(cur, l);
3572    while (xmlIsNameChar(ctxt, c)) {
3573	COPY_BUF(l,buf,len,c);
3574	cur += l;
3575	c = CUR_SCHAR(cur, l);
3576	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3577	    /*
3578	     * Okay someone managed to make a huge name, so he's ready to pay
3579	     * for the processing speed.
3580	     */
3581	    xmlChar *buffer;
3582	    int max = len * 2;
3583
3584	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3585	    if (buffer == NULL) {
3586	        xmlErrMemory(ctxt, NULL);
3587		return(NULL);
3588	    }
3589	    memcpy(buffer, buf, len);
3590	    while (xmlIsNameChar(ctxt, c)) {
3591		if (len + 10 > max) {
3592		    xmlChar *tmp;
3593
3594                    if ((len > XML_MAX_NAME_LENGTH) &&
3595                        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3596                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3597			xmlFree(buffer);
3598                        return(NULL);
3599                    }
3600		    max *= 2;
3601		    tmp = (xmlChar *) xmlRealloc(buffer,
3602			                            max * sizeof(xmlChar));
3603		    if (tmp == NULL) {
3604			xmlErrMemory(ctxt, NULL);
3605			xmlFree(buffer);
3606			return(NULL);
3607		    }
3608		    buffer = tmp;
3609		}
3610		COPY_BUF(l,buffer,len,c);
3611		cur += l;
3612		c = CUR_SCHAR(cur, l);
3613	    }
3614	    buffer[len] = 0;
3615	    *str = cur;
3616	    return(buffer);
3617	}
3618    }
3619    if ((len > XML_MAX_NAME_LENGTH) &&
3620        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3621        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3622        return(NULL);
3623    }
3624    *str = cur;
3625    return(xmlStrndup(buf, len));
3626}
3627
3628/**
3629 * xmlParseNmtoken:
3630 * @ctxt:  an XML parser context
3631 *
3632 * parse an XML Nmtoken.
3633 *
3634 * [7] Nmtoken ::= (NameChar)+
3635 *
3636 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3637 *
3638 * Returns the Nmtoken parsed or NULL
3639 */
3640
3641xmlChar *
3642xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3643    xmlChar buf[XML_MAX_NAMELEN + 5];
3644    int len = 0, l;
3645    int c;
3646    int count = 0;
3647
3648#ifdef DEBUG
3649    nbParseNmToken++;
3650#endif
3651
3652    GROW;
3653    if (ctxt->instate == XML_PARSER_EOF)
3654        return(NULL);
3655    c = CUR_CHAR(l);
3656
3657    while (xmlIsNameChar(ctxt, c)) {
3658	if (count++ > XML_PARSER_CHUNK_SIZE) {
3659	    count = 0;
3660	    GROW;
3661	}
3662	COPY_BUF(l,buf,len,c);
3663	NEXTL(l);
3664	c = CUR_CHAR(l);
3665	if (c == 0) {
3666	    count = 0;
3667	    GROW;
3668	    if (ctxt->instate == XML_PARSER_EOF)
3669		return(NULL);
3670            c = CUR_CHAR(l);
3671	}
3672	if (len >= XML_MAX_NAMELEN) {
3673	    /*
3674	     * Okay someone managed to make a huge token, so he's ready to pay
3675	     * for the processing speed.
3676	     */
3677	    xmlChar *buffer;
3678	    int max = len * 2;
3679
3680	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3681	    if (buffer == NULL) {
3682	        xmlErrMemory(ctxt, NULL);
3683		return(NULL);
3684	    }
3685	    memcpy(buffer, buf, len);
3686	    while (xmlIsNameChar(ctxt, c)) {
3687		if (count++ > XML_PARSER_CHUNK_SIZE) {
3688		    count = 0;
3689		    GROW;
3690                    if (ctxt->instate == XML_PARSER_EOF) {
3691                        xmlFree(buffer);
3692                        return(NULL);
3693                    }
3694		}
3695		if (len + 10 > max) {
3696		    xmlChar *tmp;
3697
3698                    if ((max > XML_MAX_NAME_LENGTH) &&
3699                        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3700                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3701                        xmlFree(buffer);
3702                        return(NULL);
3703                    }
3704		    max *= 2;
3705		    tmp = (xmlChar *) xmlRealloc(buffer,
3706			                            max * sizeof(xmlChar));
3707		    if (tmp == NULL) {
3708			xmlErrMemory(ctxt, NULL);
3709			xmlFree(buffer);
3710			return(NULL);
3711		    }
3712		    buffer = tmp;
3713		}
3714		COPY_BUF(l,buffer,len,c);
3715		NEXTL(l);
3716		c = CUR_CHAR(l);
3717	    }
3718	    buffer[len] = 0;
3719	    return(buffer);
3720	}
3721    }
3722    if (len == 0)
3723        return(NULL);
3724    if ((len > XML_MAX_NAME_LENGTH) &&
3725        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3726        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3727        return(NULL);
3728    }
3729    return(xmlStrndup(buf, len));
3730}
3731
3732/**
3733 * xmlParseEntityValue:
3734 * @ctxt:  an XML parser context
3735 * @orig:  if non-NULL store a copy of the original entity value
3736 *
3737 * parse a value for ENTITY declarations
3738 *
3739 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3740 *	               "'" ([^%&'] | PEReference | Reference)* "'"
3741 *
3742 * Returns the EntityValue parsed with reference substituted or NULL
3743 */
3744
3745xmlChar *
3746xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3747    xmlChar *buf = NULL;
3748    int len = 0;
3749    int size = XML_PARSER_BUFFER_SIZE;
3750    int c, l;
3751    xmlChar stop;
3752    xmlChar *ret = NULL;
3753    const xmlChar *cur = NULL;
3754    xmlParserInputPtr input;
3755
3756    if (RAW == '"') stop = '"';
3757    else if (RAW == '\'') stop = '\'';
3758    else {
3759	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3760	return(NULL);
3761    }
3762    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3763    if (buf == NULL) {
3764	xmlErrMemory(ctxt, NULL);
3765	return(NULL);
3766    }
3767
3768    /*
3769     * The content of the entity definition is copied in a buffer.
3770     */
3771
3772    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3773    input = ctxt->input;
3774    GROW;
3775    if (ctxt->instate == XML_PARSER_EOF) {
3776        xmlFree(buf);
3777        return(NULL);
3778    }
3779    NEXT;
3780    c = CUR_CHAR(l);
3781    /*
3782     * NOTE: 4.4.5 Included in Literal
3783     * When a parameter entity reference appears in a literal entity
3784     * value, ... a single or double quote character in the replacement
3785     * text is always treated as a normal data character and will not
3786     * terminate the literal.
3787     * In practice it means we stop the loop only when back at parsing
3788     * the initial entity and the quote is found
3789     */
3790    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3791	    (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3792	if (len + 5 >= size) {
3793	    xmlChar *tmp;
3794
3795	    size *= 2;
3796	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3797	    if (tmp == NULL) {
3798		xmlErrMemory(ctxt, NULL);
3799		xmlFree(buf);
3800		return(NULL);
3801	    }
3802	    buf = tmp;
3803	}
3804	COPY_BUF(l,buf,len,c);
3805	NEXTL(l);
3806	/*
3807	 * Pop-up of finished entities.
3808	 */
3809	while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3810	    xmlPopInput(ctxt);
3811
3812	GROW;
3813	c = CUR_CHAR(l);
3814	if (c == 0) {
3815	    GROW;
3816	    c = CUR_CHAR(l);
3817	}
3818    }
3819    buf[len] = 0;
3820    if (ctxt->instate == XML_PARSER_EOF) {
3821        xmlFree(buf);
3822        return(NULL);
3823    }
3824
3825    /*
3826     * Raise problem w.r.t. '&' and '%' being used in non-entities
3827     * reference constructs. Note Charref will be handled in
3828     * xmlStringDecodeEntities()
3829     */
3830    cur = buf;
3831    while (*cur != 0) { /* non input consuming */
3832	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3833	    xmlChar *name;
3834	    xmlChar tmp = *cur;
3835
3836	    cur++;
3837	    name = xmlParseStringName(ctxt, &cur);
3838            if ((name == NULL) || (*cur != ';')) {
3839		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3840	    "EntityValue: '%c' forbidden except for entities references\n",
3841	                          tmp);
3842	    }
3843	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
3844		(ctxt->inputNr == 1)) {
3845		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3846	    }
3847	    if (name != NULL)
3848		xmlFree(name);
3849	    if (*cur == 0)
3850	        break;
3851	}
3852	cur++;
3853    }
3854
3855    /*
3856     * Then PEReference entities are substituted.
3857     */
3858    if (c != stop) {
3859	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3860	xmlFree(buf);
3861    } else {
3862	NEXT;
3863	/*
3864	 * NOTE: 4.4.7 Bypassed
3865	 * When a general entity reference appears in the EntityValue in
3866	 * an entity declaration, it is bypassed and left as is.
3867	 * so XML_SUBSTITUTE_REF is not set here.
3868	 */
3869	ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3870				      0, 0, 0);
3871	if (orig != NULL)
3872	    *orig = buf;
3873	else
3874	    xmlFree(buf);
3875    }
3876
3877    return(ret);
3878}
3879
3880/**
3881 * xmlParseAttValueComplex:
3882 * @ctxt:  an XML parser context
3883 * @len:   the resulting attribute len
3884 * @normalize:  wether to apply the inner normalization
3885 *
3886 * parse a value for an attribute, this is the fallback function
3887 * of xmlParseAttValue() when the attribute parsing requires handling
3888 * of non-ASCII characters, or normalization compaction.
3889 *
3890 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3891 */
3892static xmlChar *
3893xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3894    xmlChar limit = 0;
3895    xmlChar *buf = NULL;
3896    xmlChar *rep = NULL;
3897    size_t len = 0;
3898    size_t buf_size = 0;
3899    int c, l, in_space = 0;
3900    xmlChar *current = NULL;
3901    xmlEntityPtr ent;
3902
3903    if (NXT(0) == '"') {
3904	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3905	limit = '"';
3906        NEXT;
3907    } else if (NXT(0) == '\'') {
3908	limit = '\'';
3909	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3910        NEXT;
3911    } else {
3912	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3913	return(NULL);
3914    }
3915
3916    /*
3917     * allocate a translation buffer.
3918     */
3919    buf_size = XML_PARSER_BUFFER_SIZE;
3920    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3921    if (buf == NULL) goto mem_error;
3922
3923    /*
3924     * OK loop until we reach one of the ending char or a size limit.
3925     */
3926    c = CUR_CHAR(l);
3927    while (((NXT(0) != limit) && /* checked */
3928            (IS_CHAR(c)) && (c != '<')) &&
3929            (ctxt->instate != XML_PARSER_EOF)) {
3930        /*
3931         * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3932         * special option is given
3933         */
3934        if ((len > XML_MAX_TEXT_LENGTH) &&
3935            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3936            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3937                           "AttValue lenght too long\n");
3938            goto mem_error;
3939        }
3940	if (c == 0) break;
3941	if (c == '&') {
3942	    in_space = 0;
3943	    if (NXT(1) == '#') {
3944		int val = xmlParseCharRef(ctxt);
3945
3946		if (val == '&') {
3947		    if (ctxt->replaceEntities) {
3948			if (len + 10 > buf_size) {
3949			    growBuffer(buf, 10);
3950			}
3951			buf[len++] = '&';
3952		    } else {
3953			/*
3954			 * The reparsing will be done in xmlStringGetNodeList()
3955			 * called by the attribute() function in SAX.c
3956			 */
3957			if (len + 10 > buf_size) {
3958			    growBuffer(buf, 10);
3959			}
3960			buf[len++] = '&';
3961			buf[len++] = '#';
3962			buf[len++] = '3';
3963			buf[len++] = '8';
3964			buf[len++] = ';';
3965		    }
3966		} else if (val != 0) {
3967		    if (len + 10 > buf_size) {
3968			growBuffer(buf, 10);
3969		    }
3970		    len += xmlCopyChar(0, &buf[len], val);
3971		}
3972	    } else {
3973		ent = xmlParseEntityRef(ctxt);
3974		ctxt->nbentities++;
3975		if (ent != NULL)
3976		    ctxt->nbentities += ent->owner;
3977		if ((ent != NULL) &&
3978		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3979		    if (len + 10 > buf_size) {
3980			growBuffer(buf, 10);
3981		    }
3982		    if ((ctxt->replaceEntities == 0) &&
3983		        (ent->content[0] == '&')) {
3984			buf[len++] = '&';
3985			buf[len++] = '#';
3986			buf[len++] = '3';
3987			buf[len++] = '8';
3988			buf[len++] = ';';
3989		    } else {
3990			buf[len++] = ent->content[0];
3991		    }
3992		} else if ((ent != NULL) &&
3993		           (ctxt->replaceEntities != 0)) {
3994		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3995			rep = xmlStringDecodeEntities(ctxt, ent->content,
3996						      XML_SUBSTITUTE_REF,
3997						      0, 0, 0);
3998			if (rep != NULL) {
3999			    current = rep;
4000			    while (*current != 0) { /* non input consuming */
4001                                if ((*current == 0xD) || (*current == 0xA) ||
4002                                    (*current == 0x9)) {
4003                                    buf[len++] = 0x20;
4004                                    current++;
4005                                } else
4006                                    buf[len++] = *current++;
4007				if (len + 10 > buf_size) {
4008				    growBuffer(buf, 10);
4009				}
4010			    }
4011			    xmlFree(rep);
4012			    rep = NULL;
4013			}
4014		    } else {
4015			if (len + 10 > buf_size) {
4016			    growBuffer(buf, 10);
4017			}
4018			if (ent->content != NULL)
4019			    buf[len++] = ent->content[0];
4020		    }
4021		} else if (ent != NULL) {
4022		    int i = xmlStrlen(ent->name);
4023		    const xmlChar *cur = ent->name;
4024
4025		    /*
4026		     * This may look absurd but is needed to detect
4027		     * entities problems
4028		     */
4029		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4030			(ent->content != NULL)) {
4031			rep = xmlStringDecodeEntities(ctxt, ent->content,
4032						  XML_SUBSTITUTE_REF, 0, 0, 0);
4033			if (rep != NULL) {
4034			    xmlFree(rep);
4035			    rep = NULL;
4036			}
4037		    }
4038
4039		    /*
4040		     * Just output the reference
4041		     */
4042		    buf[len++] = '&';
4043		    while (len + i + 10 > buf_size) {
4044			growBuffer(buf, i + 10);
4045		    }
4046		    for (;i > 0;i--)
4047			buf[len++] = *cur++;
4048		    buf[len++] = ';';
4049		}
4050	    }
4051	} else {
4052	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4053	        if ((len != 0) || (!normalize)) {
4054		    if ((!normalize) || (!in_space)) {
4055			COPY_BUF(l,buf,len,0x20);
4056			while (len + 10 > buf_size) {
4057			    growBuffer(buf, 10);
4058			}
4059		    }
4060		    in_space = 1;
4061		}
4062	    } else {
4063	        in_space = 0;
4064		COPY_BUF(l,buf,len,c);
4065		if (len + 10 > buf_size) {
4066		    growBuffer(buf, 10);
4067		}
4068	    }
4069	    NEXTL(l);
4070	}
4071	GROW;
4072	c = CUR_CHAR(l);
4073    }
4074    if (ctxt->instate == XML_PARSER_EOF)
4075        goto error;
4076
4077    if ((in_space) && (normalize)) {
4078        while (buf[len - 1] == 0x20) len--;
4079    }
4080    buf[len] = 0;
4081    if (RAW == '<') {
4082	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4083    } else if (RAW != limit) {
4084	if ((c != 0) && (!IS_CHAR(c))) {
4085	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4086			   "invalid character in attribute value\n");
4087	} else {
4088	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4089			   "AttValue: ' expected\n");
4090        }
4091    } else
4092	NEXT;
4093
4094    /*
4095     * There we potentially risk an overflow, don't allow attribute value of
4096     * lenght more than INT_MAX it is a very reasonnable assumption !
4097     */
4098    if (len >= INT_MAX) {
4099        xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4100                       "AttValue lenght too long\n");
4101        goto mem_error;
4102    }
4103
4104    if (attlen != NULL) *attlen = (int) len;
4105    return(buf);
4106
4107mem_error:
4108    xmlErrMemory(ctxt, NULL);
4109error:
4110    if (buf != NULL)
4111        xmlFree(buf);
4112    if (rep != NULL)
4113        xmlFree(rep);
4114    return(NULL);
4115}
4116
4117/**
4118 * xmlParseAttValue:
4119 * @ctxt:  an XML parser context
4120 *
4121 * parse a value for an attribute
4122 * Note: the parser won't do substitution of entities here, this
4123 * will be handled later in xmlStringGetNodeList
4124 *
4125 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4126 *                   "'" ([^<&'] | Reference)* "'"
4127 *
4128 * 3.3.3 Attribute-Value Normalization:
4129 * Before the value of an attribute is passed to the application or
4130 * checked for validity, the XML processor must normalize it as follows:
4131 * - a character reference is processed by appending the referenced
4132 *   character to the attribute value
4133 * - an entity reference is processed by recursively processing the
4134 *   replacement text of the entity
4135 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4136 *   appending #x20 to the normalized value, except that only a single
4137 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4138 *   parsed entity or the literal entity value of an internal parsed entity
4139 * - other characters are processed by appending them to the normalized value
4140 * If the declared value is not CDATA, then the XML processor must further
4141 * process the normalized attribute value by discarding any leading and
4142 * trailing space (#x20) characters, and by replacing sequences of space
4143 * (#x20) characters by a single space (#x20) character.
4144 * All attributes for which no declaration has been read should be treated
4145 * by a non-validating parser as if declared CDATA.
4146 *
4147 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4148 */
4149
4150
4151xmlChar *
4152xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4153    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4154    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4155}
4156
4157/**
4158 * xmlParseSystemLiteral:
4159 * @ctxt:  an XML parser context
4160 *
4161 * parse an XML Literal
4162 *
4163 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4164 *
4165 * Returns the SystemLiteral parsed or NULL
4166 */
4167
4168xmlChar *
4169xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4170    xmlChar *buf = NULL;
4171    int len = 0;
4172    int size = XML_PARSER_BUFFER_SIZE;
4173    int cur, l;
4174    xmlChar stop;
4175    int state = ctxt->instate;
4176    int count = 0;
4177
4178    SHRINK;
4179    if (RAW == '"') {
4180        NEXT;
4181	stop = '"';
4182    } else if (RAW == '\'') {
4183        NEXT;
4184	stop = '\'';
4185    } else {
4186	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4187	return(NULL);
4188    }
4189
4190    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4191    if (buf == NULL) {
4192        xmlErrMemory(ctxt, NULL);
4193	return(NULL);
4194    }
4195    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4196    cur = CUR_CHAR(l);
4197    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4198	if (len + 5 >= size) {
4199	    xmlChar *tmp;
4200
4201            if ((size > XML_MAX_NAME_LENGTH) &&
4202                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4203                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4204                xmlFree(buf);
4205		ctxt->instate = (xmlParserInputState) state;
4206                return(NULL);
4207            }
4208	    size *= 2;
4209	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4210	    if (tmp == NULL) {
4211	        xmlFree(buf);
4212		xmlErrMemory(ctxt, NULL);
4213		ctxt->instate = (xmlParserInputState) state;
4214		return(NULL);
4215	    }
4216	    buf = tmp;
4217	}
4218	count++;
4219	if (count > 50) {
4220	    GROW;
4221	    count = 0;
4222            if (ctxt->instate == XML_PARSER_EOF) {
4223	        xmlFree(buf);
4224		return(NULL);
4225            }
4226	}
4227	COPY_BUF(l,buf,len,cur);
4228	NEXTL(l);
4229	cur = CUR_CHAR(l);
4230	if (cur == 0) {
4231	    GROW;
4232	    SHRINK;
4233	    cur = CUR_CHAR(l);
4234	}
4235    }
4236    buf[len] = 0;
4237    ctxt->instate = (xmlParserInputState) state;
4238    if (!IS_CHAR(cur)) {
4239	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4240    } else {
4241	NEXT;
4242    }
4243    return(buf);
4244}
4245
4246/**
4247 * xmlParsePubidLiteral:
4248 * @ctxt:  an XML parser context
4249 *
4250 * parse an XML public literal
4251 *
4252 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4253 *
4254 * Returns the PubidLiteral parsed or NULL.
4255 */
4256
4257xmlChar *
4258xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4259    xmlChar *buf = NULL;
4260    int len = 0;
4261    int size = XML_PARSER_BUFFER_SIZE;
4262    xmlChar cur;
4263    xmlChar stop;
4264    int count = 0;
4265    xmlParserInputState oldstate = ctxt->instate;
4266
4267    SHRINK;
4268    if (RAW == '"') {
4269        NEXT;
4270	stop = '"';
4271    } else if (RAW == '\'') {
4272        NEXT;
4273	stop = '\'';
4274    } else {
4275	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4276	return(NULL);
4277    }
4278    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4279    if (buf == NULL) {
4280	xmlErrMemory(ctxt, NULL);
4281	return(NULL);
4282    }
4283    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4284    cur = CUR;
4285    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4286	if (len + 1 >= size) {
4287	    xmlChar *tmp;
4288
4289            if ((size > XML_MAX_NAME_LENGTH) &&
4290                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4291                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4292                xmlFree(buf);
4293                return(NULL);
4294            }
4295	    size *= 2;
4296	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4297	    if (tmp == NULL) {
4298		xmlErrMemory(ctxt, NULL);
4299		xmlFree(buf);
4300		return(NULL);
4301	    }
4302	    buf = tmp;
4303	}
4304	buf[len++] = cur;
4305	count++;
4306	if (count > 50) {
4307	    GROW;
4308	    count = 0;
4309            if (ctxt->instate == XML_PARSER_EOF) {
4310		xmlFree(buf);
4311		return(NULL);
4312            }
4313	}
4314	NEXT;
4315	cur = CUR;
4316	if (cur == 0) {
4317	    GROW;
4318	    SHRINK;
4319	    cur = CUR;
4320	}
4321    }
4322    buf[len] = 0;
4323    if (cur != stop) {
4324	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4325    } else {
4326	NEXT;
4327    }
4328    ctxt->instate = oldstate;
4329    return(buf);
4330}
4331
4332static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4333
4334/*
4335 * used for the test in the inner loop of the char data testing
4336 */
4337static const unsigned char test_char_data[256] = {
4338    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4339    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4340    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4341    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4342    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4343    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4344    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4345    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4346    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4347    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4348    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4349    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4350    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4351    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4352    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4353    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4354    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4355    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4356    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4357    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4358    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4359    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4360    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4361    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4362    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4363    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4365    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4368    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4369    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4370};
4371
4372/**
4373 * xmlParseCharData:
4374 * @ctxt:  an XML parser context
4375 * @cdata:  int indicating whether we are within a CDATA section
4376 *
4377 * parse a CharData section.
4378 * if we are within a CDATA section ']]>' marks an end of section.
4379 *
4380 * The right angle bracket (>) may be represented using the string "&gt;",
4381 * and must, for compatibility, be escaped using "&gt;" or a character
4382 * reference when it appears in the string "]]>" in content, when that
4383 * string is not marking the end of a CDATA section.
4384 *
4385 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4386 */
4387
4388void
4389xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4390    const xmlChar *in;
4391    int nbchar = 0;
4392    int line = ctxt->input->line;
4393    int col = ctxt->input->col;
4394    int ccol;
4395
4396    SHRINK;
4397    GROW;
4398    /*
4399     * Accelerated common case where input don't need to be
4400     * modified before passing it to the handler.
4401     */
4402    if (!cdata) {
4403	in = ctxt->input->cur;
4404	do {
4405get_more_space:
4406	    while (*in == 0x20) { in++; ctxt->input->col++; }
4407	    if (*in == 0xA) {
4408		do {
4409		    ctxt->input->line++; ctxt->input->col = 1;
4410		    in++;
4411		} while (*in == 0xA);
4412		goto get_more_space;
4413	    }
4414	    if (*in == '<') {
4415		nbchar = in - ctxt->input->cur;
4416		if (nbchar > 0) {
4417		    const xmlChar *tmp = ctxt->input->cur;
4418		    ctxt->input->cur = in;
4419
4420		    if ((ctxt->sax != NULL) &&
4421		        (ctxt->sax->ignorableWhitespace !=
4422		         ctxt->sax->characters)) {
4423			if (areBlanks(ctxt, tmp, nbchar, 1)) {
4424			    if (ctxt->sax->ignorableWhitespace != NULL)
4425				ctxt->sax->ignorableWhitespace(ctxt->userData,
4426						       tmp, nbchar);
4427			} else {
4428			    if (ctxt->sax->characters != NULL)
4429				ctxt->sax->characters(ctxt->userData,
4430						      tmp, nbchar);
4431			    if (*ctxt->space == -1)
4432			        *ctxt->space = -2;
4433			}
4434		    } else if ((ctxt->sax != NULL) &&
4435		               (ctxt->sax->characters != NULL)) {
4436			ctxt->sax->characters(ctxt->userData,
4437					      tmp, nbchar);
4438		    }
4439		}
4440		return;
4441	    }
4442
4443get_more:
4444            ccol = ctxt->input->col;
4445	    while (test_char_data[*in]) {
4446		in++;
4447		ccol++;
4448	    }
4449	    ctxt->input->col = ccol;
4450	    if (*in == 0xA) {
4451		do {
4452		    ctxt->input->line++; ctxt->input->col = 1;
4453		    in++;
4454		} while (*in == 0xA);
4455		goto get_more;
4456	    }
4457	    if (*in == ']') {
4458		if ((in[1] == ']') && (in[2] == '>')) {
4459		    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4460		    ctxt->input->cur = in;
4461		    return;
4462		}
4463		in++;
4464		ctxt->input->col++;
4465		goto get_more;
4466	    }
4467	    nbchar = in - ctxt->input->cur;
4468	    if (nbchar > 0) {
4469		if ((ctxt->sax != NULL) &&
4470		    (ctxt->sax->ignorableWhitespace !=
4471		     ctxt->sax->characters) &&
4472		    (IS_BLANK_CH(*ctxt->input->cur))) {
4473		    const xmlChar *tmp = ctxt->input->cur;
4474		    ctxt->input->cur = in;
4475
4476		    if (areBlanks(ctxt, tmp, nbchar, 0)) {
4477		        if (ctxt->sax->ignorableWhitespace != NULL)
4478			    ctxt->sax->ignorableWhitespace(ctxt->userData,
4479							   tmp, nbchar);
4480		    } else {
4481		        if (ctxt->sax->characters != NULL)
4482			    ctxt->sax->characters(ctxt->userData,
4483						  tmp, nbchar);
4484			if (*ctxt->space == -1)
4485			    *ctxt->space = -2;
4486		    }
4487                    line = ctxt->input->line;
4488                    col = ctxt->input->col;
4489		} else if (ctxt->sax != NULL) {
4490		    if (ctxt->sax->characters != NULL)
4491			ctxt->sax->characters(ctxt->userData,
4492					      ctxt->input->cur, nbchar);
4493                    line = ctxt->input->line;
4494                    col = ctxt->input->col;
4495		}
4496                /* something really bad happened in the SAX callback */
4497                if (ctxt->instate != XML_PARSER_CONTENT)
4498                    return;
4499	    }
4500	    ctxt->input->cur = in;
4501	    if (*in == 0xD) {
4502		in++;
4503		if (*in == 0xA) {
4504		    ctxt->input->cur = in;
4505		    in++;
4506		    ctxt->input->line++; ctxt->input->col = 1;
4507		    continue; /* while */
4508		}
4509		in--;
4510	    }
4511	    if (*in == '<') {
4512		return;
4513	    }
4514	    if (*in == '&') {
4515		return;
4516	    }
4517	    SHRINK;
4518	    GROW;
4519            if (ctxt->instate == XML_PARSER_EOF)
4520		return;
4521	    in = ctxt->input->cur;
4522	} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4523	nbchar = 0;
4524    }
4525    ctxt->input->line = line;
4526    ctxt->input->col = col;
4527    xmlParseCharDataComplex(ctxt, cdata);
4528}
4529
4530/**
4531 * xmlParseCharDataComplex:
4532 * @ctxt:  an XML parser context
4533 * @cdata:  int indicating whether we are within a CDATA section
4534 *
4535 * parse a CharData section.this is the fallback function
4536 * of xmlParseCharData() when the parsing requires handling
4537 * of non-ASCII characters.
4538 */
4539static void
4540xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4541    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4542    int nbchar = 0;
4543    int cur, l;
4544    int count = 0;
4545
4546    SHRINK;
4547    GROW;
4548    cur = CUR_CHAR(l);
4549    while ((cur != '<') && /* checked */
4550           (cur != '&') &&
4551	   (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4552	if ((cur == ']') && (NXT(1) == ']') &&
4553	    (NXT(2) == '>')) {
4554	    if (cdata) break;
4555	    else {
4556		xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4557	    }
4558	}
4559	COPY_BUF(l,buf,nbchar,cur);
4560	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4561	    buf[nbchar] = 0;
4562
4563	    /*
4564	     * OK the segment is to be consumed as chars.
4565	     */
4566	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4567		if (areBlanks(ctxt, buf, nbchar, 0)) {
4568		    if (ctxt->sax->ignorableWhitespace != NULL)
4569			ctxt->sax->ignorableWhitespace(ctxt->userData,
4570			                               buf, nbchar);
4571		} else {
4572		    if (ctxt->sax->characters != NULL)
4573			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4574		    if ((ctxt->sax->characters !=
4575		         ctxt->sax->ignorableWhitespace) &&
4576			(*ctxt->space == -1))
4577			*ctxt->space = -2;
4578		}
4579	    }
4580	    nbchar = 0;
4581            /* something really bad happened in the SAX callback */
4582            if (ctxt->instate != XML_PARSER_CONTENT)
4583                return;
4584	}
4585	count++;
4586	if (count > 50) {
4587	    GROW;
4588	    count = 0;
4589            if (ctxt->instate == XML_PARSER_EOF)
4590		return;
4591	}
4592	NEXTL(l);
4593	cur = CUR_CHAR(l);
4594    }
4595    if (nbchar != 0) {
4596        buf[nbchar] = 0;
4597	/*
4598	 * OK the segment is to be consumed as chars.
4599	 */
4600	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4601	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4602		if (ctxt->sax->ignorableWhitespace != NULL)
4603		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4604	    } else {
4605		if (ctxt->sax->characters != NULL)
4606		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4607		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4608		    (*ctxt->space == -1))
4609		    *ctxt->space = -2;
4610	    }
4611	}
4612    }
4613    if ((cur != 0) && (!IS_CHAR(cur))) {
4614	/* Generate the error and skip the offending character */
4615        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4616                          "PCDATA invalid Char value %d\n",
4617	                  cur);
4618	NEXTL(l);
4619    }
4620}
4621
4622/**
4623 * xmlParseExternalID:
4624 * @ctxt:  an XML parser context
4625 * @publicID:  a xmlChar** receiving PubidLiteral
4626 * @strict: indicate whether we should restrict parsing to only
4627 *          production [75], see NOTE below
4628 *
4629 * Parse an External ID or a Public ID
4630 *
4631 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4632 *       'PUBLIC' S PubidLiteral S SystemLiteral
4633 *
4634 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4635 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4636 *
4637 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4638 *
4639 * Returns the function returns SystemLiteral and in the second
4640 *                case publicID receives PubidLiteral, is strict is off
4641 *                it is possible to return NULL and have publicID set.
4642 */
4643
4644xmlChar *
4645xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4646    xmlChar *URI = NULL;
4647
4648    SHRINK;
4649
4650    *publicID = NULL;
4651    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4652        SKIP(6);
4653	if (!IS_BLANK_CH(CUR)) {
4654	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4655	                   "Space required after 'SYSTEM'\n");
4656	}
4657        SKIP_BLANKS;
4658	URI = xmlParseSystemLiteral(ctxt);
4659	if (URI == NULL) {
4660	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4661        }
4662    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4663        SKIP(6);
4664	if (!IS_BLANK_CH(CUR)) {
4665	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4666		    "Space required after 'PUBLIC'\n");
4667	}
4668        SKIP_BLANKS;
4669	*publicID = xmlParsePubidLiteral(ctxt);
4670	if (*publicID == NULL) {
4671	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4672	}
4673	if (strict) {
4674	    /*
4675	     * We don't handle [83] so "S SystemLiteral" is required.
4676	     */
4677	    if (!IS_BLANK_CH(CUR)) {
4678		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4679			"Space required after the Public Identifier\n");
4680	    }
4681	} else {
4682	    /*
4683	     * We handle [83] so we return immediately, if
4684	     * "S SystemLiteral" is not detected. From a purely parsing
4685	     * point of view that's a nice mess.
4686	     */
4687	    const xmlChar *ptr;
4688	    GROW;
4689
4690	    ptr = CUR_PTR;
4691	    if (!IS_BLANK_CH(*ptr)) return(NULL);
4692
4693	    while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4694	    if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4695	}
4696        SKIP_BLANKS;
4697	URI = xmlParseSystemLiteral(ctxt);
4698	if (URI == NULL) {
4699	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4700        }
4701    }
4702    return(URI);
4703}
4704
4705/**
4706 * xmlParseCommentComplex:
4707 * @ctxt:  an XML parser context
4708 * @buf:  the already parsed part of the buffer
4709 * @len:  number of bytes filles in the buffer
4710 * @size:  allocated size of the buffer
4711 *
4712 * Skip an XML (SGML) comment <!-- .... -->
4713 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4714 *  must not occur within comments. "
4715 * This is the slow routine in case the accelerator for ascii didn't work
4716 *
4717 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4718 */
4719static void
4720xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4721                       size_t len, size_t size) {
4722    int q, ql;
4723    int r, rl;
4724    int cur, l;
4725    size_t count = 0;
4726    int inputid;
4727
4728    inputid = ctxt->input->id;
4729
4730    if (buf == NULL) {
4731        len = 0;
4732	size = XML_PARSER_BUFFER_SIZE;
4733	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4734	if (buf == NULL) {
4735	    xmlErrMemory(ctxt, NULL);
4736	    return;
4737	}
4738    }
4739    GROW;	/* Assure there's enough input data */
4740    q = CUR_CHAR(ql);
4741    if (q == 0)
4742        goto not_terminated;
4743    if (!IS_CHAR(q)) {
4744        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4745                          "xmlParseComment: invalid xmlChar value %d\n",
4746	                  q);
4747	xmlFree (buf);
4748	return;
4749    }
4750    NEXTL(ql);
4751    r = CUR_CHAR(rl);
4752    if (r == 0)
4753        goto not_terminated;
4754    if (!IS_CHAR(r)) {
4755        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4756                          "xmlParseComment: invalid xmlChar value %d\n",
4757	                  q);
4758	xmlFree (buf);
4759	return;
4760    }
4761    NEXTL(rl);
4762    cur = CUR_CHAR(l);
4763    if (cur == 0)
4764        goto not_terminated;
4765    while (IS_CHAR(cur) && /* checked */
4766           ((cur != '>') ||
4767	    (r != '-') || (q != '-'))) {
4768	if ((r == '-') && (q == '-')) {
4769	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4770	}
4771        if ((len > XML_MAX_TEXT_LENGTH) &&
4772            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4773            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4774                         "Comment too big found", NULL);
4775            xmlFree (buf);
4776            return;
4777        }
4778	if (len + 5 >= size) {
4779	    xmlChar *new_buf;
4780            size_t new_size;
4781
4782	    new_size = size * 2;
4783	    new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4784	    if (new_buf == NULL) {
4785		xmlFree (buf);
4786		xmlErrMemory(ctxt, NULL);
4787		return;
4788	    }
4789	    buf = new_buf;
4790            size = new_size;
4791	}
4792	COPY_BUF(ql,buf,len,q);
4793	q = r;
4794	ql = rl;
4795	r = cur;
4796	rl = l;
4797
4798	count++;
4799	if (count > 50) {
4800	    GROW;
4801	    count = 0;
4802            if (ctxt->instate == XML_PARSER_EOF) {
4803		xmlFree(buf);
4804		return;
4805            }
4806	}
4807	NEXTL(l);
4808	cur = CUR_CHAR(l);
4809	if (cur == 0) {
4810	    SHRINK;
4811	    GROW;
4812	    cur = CUR_CHAR(l);
4813	}
4814    }
4815    buf[len] = 0;
4816    if (cur == 0) {
4817	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4818	                     "Comment not terminated \n<!--%.50s\n", buf);
4819    } else if (!IS_CHAR(cur)) {
4820        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4821                          "xmlParseComment: invalid xmlChar value %d\n",
4822	                  cur);
4823    } else {
4824	if (inputid != ctxt->input->id) {
4825	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4826		"Comment doesn't start and stop in the same entity\n");
4827	}
4828        NEXT;
4829	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4830	    (!ctxt->disableSAX))
4831	    ctxt->sax->comment(ctxt->userData, buf);
4832    }
4833    xmlFree(buf);
4834    return;
4835not_terminated:
4836    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4837			 "Comment not terminated\n", NULL);
4838    xmlFree(buf);
4839    return;
4840}
4841
4842/**
4843 * xmlParseComment:
4844 * @ctxt:  an XML parser context
4845 *
4846 * Skip an XML (SGML) comment <!-- .... -->
4847 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4848 *  must not occur within comments. "
4849 *
4850 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4851 */
4852void
4853xmlParseComment(xmlParserCtxtPtr ctxt) {
4854    xmlChar *buf = NULL;
4855    size_t size = XML_PARSER_BUFFER_SIZE;
4856    size_t len = 0;
4857    xmlParserInputState state;
4858    const xmlChar *in;
4859    size_t nbchar = 0;
4860    int ccol;
4861    int inputid;
4862
4863    /*
4864     * Check that there is a comment right here.
4865     */
4866    if ((RAW != '<') || (NXT(1) != '!') ||
4867        (NXT(2) != '-') || (NXT(3) != '-')) return;
4868    state = ctxt->instate;
4869    ctxt->instate = XML_PARSER_COMMENT;
4870    inputid = ctxt->input->id;
4871    SKIP(4);
4872    SHRINK;
4873    GROW;
4874
4875    /*
4876     * Accelerated common case where input don't need to be
4877     * modified before passing it to the handler.
4878     */
4879    in = ctxt->input->cur;
4880    do {
4881	if (*in == 0xA) {
4882	    do {
4883		ctxt->input->line++; ctxt->input->col = 1;
4884		in++;
4885	    } while (*in == 0xA);
4886	}
4887get_more:
4888        ccol = ctxt->input->col;
4889	while (((*in > '-') && (*in <= 0x7F)) ||
4890	       ((*in >= 0x20) && (*in < '-')) ||
4891	       (*in == 0x09)) {
4892		    in++;
4893		    ccol++;
4894	}
4895	ctxt->input->col = ccol;
4896	if (*in == 0xA) {
4897	    do {
4898		ctxt->input->line++; ctxt->input->col = 1;
4899		in++;
4900	    } while (*in == 0xA);
4901	    goto get_more;
4902	}
4903	nbchar = in - ctxt->input->cur;
4904	/*
4905	 * save current set of data
4906	 */
4907	if (nbchar > 0) {
4908	    if ((ctxt->sax != NULL) &&
4909		(ctxt->sax->comment != NULL)) {
4910		if (buf == NULL) {
4911		    if ((*in == '-') && (in[1] == '-'))
4912		        size = nbchar + 1;
4913		    else
4914		        size = XML_PARSER_BUFFER_SIZE + nbchar;
4915		    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4916		    if (buf == NULL) {
4917		        xmlErrMemory(ctxt, NULL);
4918			ctxt->instate = state;
4919			return;
4920		    }
4921		    len = 0;
4922		} else if (len + nbchar + 1 >= size) {
4923		    xmlChar *new_buf;
4924		    size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4925		    new_buf = (xmlChar *) xmlRealloc(buf,
4926		                                     size * sizeof(xmlChar));
4927		    if (new_buf == NULL) {
4928		        xmlFree (buf);
4929			xmlErrMemory(ctxt, NULL);
4930			ctxt->instate = state;
4931			return;
4932		    }
4933		    buf = new_buf;
4934		}
4935		memcpy(&buf[len], ctxt->input->cur, nbchar);
4936		len += nbchar;
4937		buf[len] = 0;
4938	    }
4939	}
4940        if ((len > XML_MAX_TEXT_LENGTH) &&
4941            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4942            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4943                         "Comment too big found", NULL);
4944            xmlFree (buf);
4945            return;
4946        }
4947	ctxt->input->cur = in;
4948	if (*in == 0xA) {
4949	    in++;
4950	    ctxt->input->line++; ctxt->input->col = 1;
4951	}
4952	if (*in == 0xD) {
4953	    in++;
4954	    if (*in == 0xA) {
4955		ctxt->input->cur = in;
4956		in++;
4957		ctxt->input->line++; ctxt->input->col = 1;
4958		continue; /* while */
4959	    }
4960	    in--;
4961	}
4962	SHRINK;
4963	GROW;
4964        if (ctxt->instate == XML_PARSER_EOF) {
4965            xmlFree(buf);
4966            return;
4967        }
4968	in = ctxt->input->cur;
4969	if (*in == '-') {
4970	    if (in[1] == '-') {
4971	        if (in[2] == '>') {
4972		    if (ctxt->input->id != inputid) {
4973			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4974			"comment doesn't start and stop in the same entity\n");
4975		    }
4976		    SKIP(3);
4977		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4978		        (!ctxt->disableSAX)) {
4979			if (buf != NULL)
4980			    ctxt->sax->comment(ctxt->userData, buf);
4981			else
4982			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4983		    }
4984		    if (buf != NULL)
4985		        xmlFree(buf);
4986		    ctxt->instate = state;
4987		    return;
4988		}
4989		if (buf != NULL) {
4990		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4991		                      "Double hyphen within comment: "
4992                                      "<!--%.50s\n",
4993				      buf);
4994		} else
4995		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4996		                      "Double hyphen within comment\n", NULL);
4997		in++;
4998		ctxt->input->col++;
4999	    }
5000	    in++;
5001	    ctxt->input->col++;
5002	    goto get_more;
5003	}
5004    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5005    xmlParseCommentComplex(ctxt, buf, len, size);
5006    ctxt->instate = state;
5007    return;
5008}
5009
5010
5011/**
5012 * xmlParsePITarget:
5013 * @ctxt:  an XML parser context
5014 *
5015 * parse the name of a PI
5016 *
5017 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5018 *
5019 * Returns the PITarget name or NULL
5020 */
5021
5022const xmlChar *
5023xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5024    const xmlChar *name;
5025
5026    name = xmlParseName(ctxt);
5027    if ((name != NULL) &&
5028        ((name[0] == 'x') || (name[0] == 'X')) &&
5029        ((name[1] == 'm') || (name[1] == 'M')) &&
5030        ((name[2] == 'l') || (name[2] == 'L'))) {
5031	int i;
5032	if ((name[0] == 'x') && (name[1] == 'm') &&
5033	    (name[2] == 'l') && (name[3] == 0)) {
5034	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5035		 "XML declaration allowed only at the start of the document\n");
5036	    return(name);
5037	} else if (name[3] == 0) {
5038	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5039	    return(name);
5040	}
5041	for (i = 0;;i++) {
5042	    if (xmlW3CPIs[i] == NULL) break;
5043	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5044	        return(name);
5045	}
5046	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5047		      "xmlParsePITarget: invalid name prefix 'xml'\n",
5048		      NULL, NULL);
5049    }
5050    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5051	xmlNsErr(ctxt, XML_NS_ERR_COLON,
5052		 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5053    }
5054    return(name);
5055}
5056
5057#ifdef LIBXML_CATALOG_ENABLED
5058/**
5059 * xmlParseCatalogPI:
5060 * @ctxt:  an XML parser context
5061 * @catalog:  the PI value string
5062 *
5063 * parse an XML Catalog Processing Instruction.
5064 *
5065 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5066 *
5067 * Occurs only if allowed by the user and if happening in the Misc
5068 * part of the document before any doctype informations
5069 * This will add the given catalog to the parsing context in order
5070 * to be used if there is a resolution need further down in the document
5071 */
5072
5073static void
5074xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5075    xmlChar *URL = NULL;
5076    const xmlChar *tmp, *base;
5077    xmlChar marker;
5078
5079    tmp = catalog;
5080    while (IS_BLANK_CH(*tmp)) tmp++;
5081    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5082	goto error;
5083    tmp += 7;
5084    while (IS_BLANK_CH(*tmp)) tmp++;
5085    if (*tmp != '=') {
5086	return;
5087    }
5088    tmp++;
5089    while (IS_BLANK_CH(*tmp)) tmp++;
5090    marker = *tmp;
5091    if ((marker != '\'') && (marker != '"'))
5092	goto error;
5093    tmp++;
5094    base = tmp;
5095    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5096    if (*tmp == 0)
5097	goto error;
5098    URL = xmlStrndup(base, tmp - base);
5099    tmp++;
5100    while (IS_BLANK_CH(*tmp)) tmp++;
5101    if (*tmp != 0)
5102	goto error;
5103
5104    if (URL != NULL) {
5105	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5106	xmlFree(URL);
5107    }
5108    return;
5109
5110error:
5111    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5112	          "Catalog PI syntax error: %s\n",
5113		  catalog, NULL);
5114    if (URL != NULL)
5115	xmlFree(URL);
5116}
5117#endif
5118
5119/**
5120 * xmlParsePI:
5121 * @ctxt:  an XML parser context
5122 *
5123 * parse an XML Processing Instruction.
5124 *
5125 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5126 *
5127 * The processing is transfered to SAX once parsed.
5128 */
5129
5130void
5131xmlParsePI(xmlParserCtxtPtr ctxt) {
5132    xmlChar *buf = NULL;
5133    size_t len = 0;
5134    size_t size = XML_PARSER_BUFFER_SIZE;
5135    int cur, l;
5136    const xmlChar *target;
5137    xmlParserInputState state;
5138    int count = 0;
5139
5140    if ((RAW == '<') && (NXT(1) == '?')) {
5141	xmlParserInputPtr input = ctxt->input;
5142	state = ctxt->instate;
5143        ctxt->instate = XML_PARSER_PI;
5144	/*
5145	 * this is a Processing Instruction.
5146	 */
5147	SKIP(2);
5148	SHRINK;
5149
5150	/*
5151	 * Parse the target name and check for special support like
5152	 * namespace.
5153	 */
5154        target = xmlParsePITarget(ctxt);
5155	if (target != NULL) {
5156	    if ((RAW == '?') && (NXT(1) == '>')) {
5157		if (input != ctxt->input) {
5158		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5159	    "PI declaration doesn't start and stop in the same entity\n");
5160		}
5161		SKIP(2);
5162
5163		/*
5164		 * SAX: PI detected.
5165		 */
5166		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5167		    (ctxt->sax->processingInstruction != NULL))
5168		    ctxt->sax->processingInstruction(ctxt->userData,
5169		                                     target, NULL);
5170		if (ctxt->instate != XML_PARSER_EOF)
5171		    ctxt->instate = state;
5172		return;
5173	    }
5174	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5175	    if (buf == NULL) {
5176		xmlErrMemory(ctxt, NULL);
5177		ctxt->instate = state;
5178		return;
5179	    }
5180	    cur = CUR;
5181	    if (!IS_BLANK(cur)) {
5182		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5183			  "ParsePI: PI %s space expected\n", target);
5184	    }
5185            SKIP_BLANKS;
5186	    cur = CUR_CHAR(l);
5187	    while (IS_CHAR(cur) && /* checked */
5188		   ((cur != '?') || (NXT(1) != '>'))) {
5189		if (len + 5 >= size) {
5190		    xmlChar *tmp;
5191                    size_t new_size = size * 2;
5192		    tmp = (xmlChar *) xmlRealloc(buf, new_size);
5193		    if (tmp == NULL) {
5194			xmlErrMemory(ctxt, NULL);
5195			xmlFree(buf);
5196			ctxt->instate = state;
5197			return;
5198		    }
5199		    buf = tmp;
5200                    size = new_size;
5201		}
5202		count++;
5203		if (count > 50) {
5204		    GROW;
5205                    if (ctxt->instate == XML_PARSER_EOF) {
5206                        xmlFree(buf);
5207                        return;
5208                    }
5209		    count = 0;
5210                    if ((len > XML_MAX_TEXT_LENGTH) &&
5211                        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5212                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5213                                          "PI %s too big found", target);
5214                        xmlFree(buf);
5215                        ctxt->instate = state;
5216                        return;
5217                    }
5218		}
5219		COPY_BUF(l,buf,len,cur);
5220		NEXTL(l);
5221		cur = CUR_CHAR(l);
5222		if (cur == 0) {
5223		    SHRINK;
5224		    GROW;
5225		    cur = CUR_CHAR(l);
5226		}
5227	    }
5228            if ((len > XML_MAX_TEXT_LENGTH) &&
5229                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5230                xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5231                                  "PI %s too big found", target);
5232                xmlFree(buf);
5233                ctxt->instate = state;
5234                return;
5235            }
5236	    buf[len] = 0;
5237	    if (cur != '?') {
5238		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5239		      "ParsePI: PI %s never end ...\n", target);
5240	    } else {
5241		if (input != ctxt->input) {
5242		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5243	    "PI declaration doesn't start and stop in the same entity\n");
5244		}
5245		SKIP(2);
5246
5247#ifdef LIBXML_CATALOG_ENABLED
5248		if (((state == XML_PARSER_MISC) ||
5249	             (state == XML_PARSER_START)) &&
5250		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5251		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5252		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5253			(allow == XML_CATA_ALLOW_ALL))
5254			xmlParseCatalogPI(ctxt, buf);
5255		}
5256#endif
5257
5258
5259		/*
5260		 * SAX: PI detected.
5261		 */
5262		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5263		    (ctxt->sax->processingInstruction != NULL))
5264		    ctxt->sax->processingInstruction(ctxt->userData,
5265		                                     target, buf);
5266	    }
5267	    xmlFree(buf);
5268	} else {
5269	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5270	}
5271	if (ctxt->instate != XML_PARSER_EOF)
5272	    ctxt->instate = state;
5273    }
5274}
5275
5276/**
5277 * xmlParseNotationDecl:
5278 * @ctxt:  an XML parser context
5279 *
5280 * parse a notation declaration
5281 *
5282 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5283 *
5284 * Hence there is actually 3 choices:
5285 *     'PUBLIC' S PubidLiteral
5286 *     'PUBLIC' S PubidLiteral S SystemLiteral
5287 * and 'SYSTEM' S SystemLiteral
5288 *
5289 * See the NOTE on xmlParseExternalID().
5290 */
5291
5292void
5293xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5294    const xmlChar *name;
5295    xmlChar *Pubid;
5296    xmlChar *Systemid;
5297
5298    if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5299	xmlParserInputPtr input = ctxt->input;
5300	SHRINK;
5301	SKIP(10);
5302	if (!IS_BLANK_CH(CUR)) {
5303	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5304			   "Space required after '<!NOTATION'\n");
5305	    return;
5306	}
5307	SKIP_BLANKS;
5308
5309        name = xmlParseName(ctxt);
5310	if (name == NULL) {
5311	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5312	    return;
5313	}
5314	if (!IS_BLANK_CH(CUR)) {
5315	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5316		     "Space required after the NOTATION name'\n");
5317	    return;
5318	}
5319	if (xmlStrchr(name, ':') != NULL) {
5320	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5321		     "colon are forbidden from notation names '%s'\n",
5322		     name, NULL, NULL);
5323	}
5324	SKIP_BLANKS;
5325
5326	/*
5327	 * Parse the IDs.
5328	 */
5329	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5330	SKIP_BLANKS;
5331
5332	if (RAW == '>') {
5333	    if (input != ctxt->input) {
5334		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5335	"Notation declaration doesn't start and stop in the same entity\n");
5336	    }
5337	    NEXT;
5338	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5339		(ctxt->sax->notationDecl != NULL))
5340		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5341	} else {
5342	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5343	}
5344	if (Systemid != NULL) xmlFree(Systemid);
5345	if (Pubid != NULL) xmlFree(Pubid);
5346    }
5347}
5348
5349/**
5350 * xmlParseEntityDecl:
5351 * @ctxt:  an XML parser context
5352 *
5353 * parse <!ENTITY declarations
5354 *
5355 * [70] EntityDecl ::= GEDecl | PEDecl
5356 *
5357 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5358 *
5359 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5360 *
5361 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5362 *
5363 * [74] PEDef ::= EntityValue | ExternalID
5364 *
5365 * [76] NDataDecl ::= S 'NDATA' S Name
5366 *
5367 * [ VC: Notation Declared ]
5368 * The Name must match the declared name of a notation.
5369 */
5370
5371void
5372xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5373    const xmlChar *name = NULL;
5374    xmlChar *value = NULL;
5375    xmlChar *URI = NULL, *literal = NULL;
5376    const xmlChar *ndata = NULL;
5377    int isParameter = 0;
5378    xmlChar *orig = NULL;
5379    int skipped;
5380
5381    /* GROW; done in the caller */
5382    if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5383	xmlParserInputPtr input = ctxt->input;
5384	SHRINK;
5385	SKIP(8);
5386	skipped = SKIP_BLANKS;
5387	if (skipped == 0) {
5388	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5389			   "Space required after '<!ENTITY'\n");
5390	}
5391
5392	if (RAW == '%') {
5393	    NEXT;
5394	    skipped = SKIP_BLANKS;
5395	    if (skipped == 0) {
5396		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5397			       "Space required after '%'\n");
5398	    }
5399	    isParameter = 1;
5400	}
5401
5402        name = xmlParseName(ctxt);
5403	if (name == NULL) {
5404	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5405	                   "xmlParseEntityDecl: no name\n");
5406            return;
5407	}
5408	if (xmlStrchr(name, ':') != NULL) {
5409	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5410		     "colon are forbidden from entities names '%s'\n",
5411		     name, NULL, NULL);
5412	}
5413        skipped = SKIP_BLANKS;
5414	if (skipped == 0) {
5415	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5416			   "Space required after the entity name\n");
5417	}
5418
5419	ctxt->instate = XML_PARSER_ENTITY_DECL;
5420	/*
5421	 * handle the various case of definitions...
5422	 */
5423	if (isParameter) {
5424	    if ((RAW == '"') || (RAW == '\'')) {
5425	        value = xmlParseEntityValue(ctxt, &orig);
5426		if (value) {
5427		    if ((ctxt->sax != NULL) &&
5428			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5429			ctxt->sax->entityDecl(ctxt->userData, name,
5430		                    XML_INTERNAL_PARAMETER_ENTITY,
5431				    NULL, NULL, value);
5432		}
5433	    } else {
5434	        URI = xmlParseExternalID(ctxt, &literal, 1);
5435		if ((URI == NULL) && (literal == NULL)) {
5436		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5437		}
5438		if (URI) {
5439		    xmlURIPtr uri;
5440
5441		    uri = xmlParseURI((const char *) URI);
5442		    if (uri == NULL) {
5443		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5444				     "Invalid URI: %s\n", URI);
5445			/*
5446			 * This really ought to be a well formedness error
5447			 * but the XML Core WG decided otherwise c.f. issue
5448			 * E26 of the XML erratas.
5449			 */
5450		    } else {
5451			if (uri->fragment != NULL) {
5452			    /*
5453			     * Okay this is foolish to block those but not
5454			     * invalid URIs.
5455			     */
5456			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5457			} else {
5458			    if ((ctxt->sax != NULL) &&
5459				(!ctxt->disableSAX) &&
5460				(ctxt->sax->entityDecl != NULL))
5461				ctxt->sax->entityDecl(ctxt->userData, name,
5462					    XML_EXTERNAL_PARAMETER_ENTITY,
5463					    literal, URI, NULL);
5464			}
5465			xmlFreeURI(uri);
5466		    }
5467		}
5468	    }
5469	} else {
5470	    if ((RAW == '"') || (RAW == '\'')) {
5471	        value = xmlParseEntityValue(ctxt, &orig);
5472		if ((ctxt->sax != NULL) &&
5473		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5474		    ctxt->sax->entityDecl(ctxt->userData, name,
5475				XML_INTERNAL_GENERAL_ENTITY,
5476				NULL, NULL, value);
5477		/*
5478		 * For expat compatibility in SAX mode.
5479		 */
5480		if ((ctxt->myDoc == NULL) ||
5481		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5482		    if (ctxt->myDoc == NULL) {
5483			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5484			if (ctxt->myDoc == NULL) {
5485			    xmlErrMemory(ctxt, "New Doc failed");
5486			    return;
5487			}
5488			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5489		    }
5490		    if (ctxt->myDoc->intSubset == NULL)
5491			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5492					    BAD_CAST "fake", NULL, NULL);
5493
5494		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5495			              NULL, NULL, value);
5496		}
5497	    } else {
5498	        URI = xmlParseExternalID(ctxt, &literal, 1);
5499		if ((URI == NULL) && (literal == NULL)) {
5500		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5501		}
5502		if (URI) {
5503		    xmlURIPtr uri;
5504
5505		    uri = xmlParseURI((const char *)URI);
5506		    if (uri == NULL) {
5507		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5508				     "Invalid URI: %s\n", URI);
5509			/*
5510			 * This really ought to be a well formedness error
5511			 * but the XML Core WG decided otherwise c.f. issue
5512			 * E26 of the XML erratas.
5513			 */
5514		    } else {
5515			if (uri->fragment != NULL) {
5516			    /*
5517			     * Okay this is foolish to block those but not
5518			     * invalid URIs.
5519			     */
5520			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5521			}
5522			xmlFreeURI(uri);
5523		    }
5524		}
5525		if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5526		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5527				   "Space required before 'NDATA'\n");
5528		}
5529		SKIP_BLANKS;
5530		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5531		    SKIP(5);
5532		    if (!IS_BLANK_CH(CUR)) {
5533			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5534				       "Space required after 'NDATA'\n");
5535		    }
5536		    SKIP_BLANKS;
5537		    ndata = xmlParseName(ctxt);
5538		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5539		        (ctxt->sax->unparsedEntityDecl != NULL))
5540			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5541				    literal, URI, ndata);
5542		} else {
5543		    if ((ctxt->sax != NULL) &&
5544		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5545			ctxt->sax->entityDecl(ctxt->userData, name,
5546				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5547				    literal, URI, NULL);
5548		    /*
5549		     * For expat compatibility in SAX mode.
5550		     * assuming the entity repalcement was asked for
5551		     */
5552		    if ((ctxt->replaceEntities != 0) &&
5553			((ctxt->myDoc == NULL) ||
5554			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5555			if (ctxt->myDoc == NULL) {
5556			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5557			    if (ctxt->myDoc == NULL) {
5558			        xmlErrMemory(ctxt, "New Doc failed");
5559				return;
5560			    }
5561			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5562			}
5563
5564			if (ctxt->myDoc->intSubset == NULL)
5565			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5566						BAD_CAST "fake", NULL, NULL);
5567			xmlSAX2EntityDecl(ctxt, name,
5568				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5569				          literal, URI, NULL);
5570		    }
5571		}
5572	    }
5573	}
5574	SKIP_BLANKS;
5575	if (RAW != '>') {
5576	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5577	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5578	} else {
5579	    if (input != ctxt->input) {
5580		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5581	"Entity declaration doesn't start and stop in the same entity\n");
5582	    }
5583	    NEXT;
5584	}
5585	if (orig != NULL) {
5586	    /*
5587	     * Ugly mechanism to save the raw entity value.
5588	     */
5589	    xmlEntityPtr cur = NULL;
5590
5591	    if (isParameter) {
5592	        if ((ctxt->sax != NULL) &&
5593		    (ctxt->sax->getParameterEntity != NULL))
5594		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5595	    } else {
5596	        if ((ctxt->sax != NULL) &&
5597		    (ctxt->sax->getEntity != NULL))
5598		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5599		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5600		    cur = xmlSAX2GetEntity(ctxt, name);
5601		}
5602	    }
5603            if (cur != NULL) {
5604	        if (cur->orig != NULL)
5605		    xmlFree(orig);
5606		else
5607		    cur->orig = orig;
5608	    } else
5609		xmlFree(orig);
5610	}
5611	if (value != NULL) xmlFree(value);
5612	if (URI != NULL) xmlFree(URI);
5613	if (literal != NULL) xmlFree(literal);
5614    }
5615}
5616
5617/**
5618 * xmlParseDefaultDecl:
5619 * @ctxt:  an XML parser context
5620 * @value:  Receive a possible fixed default value for the attribute
5621 *
5622 * Parse an attribute default declaration
5623 *
5624 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5625 *
5626 * [ VC: Required Attribute ]
5627 * if the default declaration is the keyword #REQUIRED, then the
5628 * attribute must be specified for all elements of the type in the
5629 * attribute-list declaration.
5630 *
5631 * [ VC: Attribute Default Legal ]
5632 * The declared default value must meet the lexical constraints of
5633 * the declared attribute type c.f. xmlValidateAttributeDecl()
5634 *
5635 * [ VC: Fixed Attribute Default ]
5636 * if an attribute has a default value declared with the #FIXED
5637 * keyword, instances of that attribute must match the default value.
5638 *
5639 * [ WFC: No < in Attribute Values ]
5640 * handled in xmlParseAttValue()
5641 *
5642 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5643 *          or XML_ATTRIBUTE_FIXED.
5644 */
5645
5646int
5647xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5648    int val;
5649    xmlChar *ret;
5650
5651    *value = NULL;
5652    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5653	SKIP(9);
5654	return(XML_ATTRIBUTE_REQUIRED);
5655    }
5656    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5657	SKIP(8);
5658	return(XML_ATTRIBUTE_IMPLIED);
5659    }
5660    val = XML_ATTRIBUTE_NONE;
5661    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5662	SKIP(6);
5663	val = XML_ATTRIBUTE_FIXED;
5664	if (!IS_BLANK_CH(CUR)) {
5665	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5666			   "Space required after '#FIXED'\n");
5667	}
5668	SKIP_BLANKS;
5669    }
5670    ret = xmlParseAttValue(ctxt);
5671    ctxt->instate = XML_PARSER_DTD;
5672    if (ret == NULL) {
5673	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5674		       "Attribute default value declaration error\n");
5675    } else
5676        *value = ret;
5677    return(val);
5678}
5679
5680/**
5681 * xmlParseNotationType:
5682 * @ctxt:  an XML parser context
5683 *
5684 * parse an Notation attribute type.
5685 *
5686 * Note: the leading 'NOTATION' S part has already being parsed...
5687 *
5688 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5689 *
5690 * [ VC: Notation Attributes ]
5691 * Values of this type must match one of the notation names included
5692 * in the declaration; all notation names in the declaration must be declared.
5693 *
5694 * Returns: the notation attribute tree built while parsing
5695 */
5696
5697xmlEnumerationPtr
5698xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5699    const xmlChar *name;
5700    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5701
5702    if (RAW != '(') {
5703	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5704	return(NULL);
5705    }
5706    SHRINK;
5707    do {
5708        NEXT;
5709	SKIP_BLANKS;
5710        name = xmlParseName(ctxt);
5711	if (name == NULL) {
5712	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5713			   "Name expected in NOTATION declaration\n");
5714            xmlFreeEnumeration(ret);
5715	    return(NULL);
5716	}
5717	tmp = ret;
5718	while (tmp != NULL) {
5719	    if (xmlStrEqual(name, tmp->name)) {
5720		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5721	  "standalone: attribute notation value token %s duplicated\n",
5722				 name, NULL);
5723		if (!xmlDictOwns(ctxt->dict, name))
5724		    xmlFree((xmlChar *) name);
5725		break;
5726	    }
5727	    tmp = tmp->next;
5728	}
5729	if (tmp == NULL) {
5730	    cur = xmlCreateEnumeration(name);
5731	    if (cur == NULL) {
5732                xmlFreeEnumeration(ret);
5733                return(NULL);
5734            }
5735	    if (last == NULL) ret = last = cur;
5736	    else {
5737		last->next = cur;
5738		last = cur;
5739	    }
5740	}
5741	SKIP_BLANKS;
5742    } while (RAW == '|');
5743    if (RAW != ')') {
5744	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5745        xmlFreeEnumeration(ret);
5746	return(NULL);
5747    }
5748    NEXT;
5749    return(ret);
5750}
5751
5752/**
5753 * xmlParseEnumerationType:
5754 * @ctxt:  an XML parser context
5755 *
5756 * parse an Enumeration attribute type.
5757 *
5758 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5759 *
5760 * [ VC: Enumeration ]
5761 * Values of this type must match one of the Nmtoken tokens in
5762 * the declaration
5763 *
5764 * Returns: the enumeration attribute tree built while parsing
5765 */
5766
5767xmlEnumerationPtr
5768xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5769    xmlChar *name;
5770    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5771
5772    if (RAW != '(') {
5773	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5774	return(NULL);
5775    }
5776    SHRINK;
5777    do {
5778        NEXT;
5779	SKIP_BLANKS;
5780        name = xmlParseNmtoken(ctxt);
5781	if (name == NULL) {
5782	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5783	    return(ret);
5784	}
5785	tmp = ret;
5786	while (tmp != NULL) {
5787	    if (xmlStrEqual(name, tmp->name)) {
5788		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5789	  "standalone: attribute enumeration value token %s duplicated\n",
5790				 name, NULL);
5791		if (!xmlDictOwns(ctxt->dict, name))
5792		    xmlFree(name);
5793		break;
5794	    }
5795	    tmp = tmp->next;
5796	}
5797	if (tmp == NULL) {
5798	    cur = xmlCreateEnumeration(name);
5799	    if (!xmlDictOwns(ctxt->dict, name))
5800		xmlFree(name);
5801	    if (cur == NULL) {
5802                xmlFreeEnumeration(ret);
5803                return(NULL);
5804            }
5805	    if (last == NULL) ret = last = cur;
5806	    else {
5807		last->next = cur;
5808		last = cur;
5809	    }
5810	}
5811	SKIP_BLANKS;
5812    } while (RAW == '|');
5813    if (RAW != ')') {
5814	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5815	return(ret);
5816    }
5817    NEXT;
5818    return(ret);
5819}
5820
5821/**
5822 * xmlParseEnumeratedType:
5823 * @ctxt:  an XML parser context
5824 * @tree:  the enumeration tree built while parsing
5825 *
5826 * parse an Enumerated attribute type.
5827 *
5828 * [57] EnumeratedType ::= NotationType | Enumeration
5829 *
5830 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5831 *
5832 *
5833 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5834 */
5835
5836int
5837xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5838    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5839	SKIP(8);
5840	if (!IS_BLANK_CH(CUR)) {
5841	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5842			   "Space required after 'NOTATION'\n");
5843	    return(0);
5844	}
5845        SKIP_BLANKS;
5846	*tree = xmlParseNotationType(ctxt);
5847	if (*tree == NULL) return(0);
5848	return(XML_ATTRIBUTE_NOTATION);
5849    }
5850    *tree = xmlParseEnumerationType(ctxt);
5851    if (*tree == NULL) return(0);
5852    return(XML_ATTRIBUTE_ENUMERATION);
5853}
5854
5855/**
5856 * xmlParseAttributeType:
5857 * @ctxt:  an XML parser context
5858 * @tree:  the enumeration tree built while parsing
5859 *
5860 * parse the Attribute list def for an element
5861 *
5862 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5863 *
5864 * [55] StringType ::= 'CDATA'
5865 *
5866 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5867 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5868 *
5869 * Validity constraints for attribute values syntax are checked in
5870 * xmlValidateAttributeValue()
5871 *
5872 * [ VC: ID ]
5873 * Values of type ID must match the Name production. A name must not
5874 * appear more than once in an XML document as a value of this type;
5875 * i.e., ID values must uniquely identify the elements which bear them.
5876 *
5877 * [ VC: One ID per Element Type ]
5878 * No element type may have more than one ID attribute specified.
5879 *
5880 * [ VC: ID Attribute Default ]
5881 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5882 *
5883 * [ VC: IDREF ]
5884 * Values of type IDREF must match the Name production, and values
5885 * of type IDREFS must match Names; each IDREF Name must match the value
5886 * of an ID attribute on some element in the XML document; i.e. IDREF
5887 * values must match the value of some ID attribute.
5888 *
5889 * [ VC: Entity Name ]
5890 * Values of type ENTITY must match the Name production, values
5891 * of type ENTITIES must match Names; each Entity Name must match the
5892 * name of an unparsed entity declared in the DTD.
5893 *
5894 * [ VC: Name Token ]
5895 * Values of type NMTOKEN must match the Nmtoken production; values
5896 * of type NMTOKENS must match Nmtokens.
5897 *
5898 * Returns the attribute type
5899 */
5900int
5901xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5902    SHRINK;
5903    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5904	SKIP(5);
5905	return(XML_ATTRIBUTE_CDATA);
5906     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5907	SKIP(6);
5908	return(XML_ATTRIBUTE_IDREFS);
5909     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5910	SKIP(5);
5911	return(XML_ATTRIBUTE_IDREF);
5912     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5913        SKIP(2);
5914	return(XML_ATTRIBUTE_ID);
5915     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5916	SKIP(6);
5917	return(XML_ATTRIBUTE_ENTITY);
5918     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5919	SKIP(8);
5920	return(XML_ATTRIBUTE_ENTITIES);
5921     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5922	SKIP(8);
5923	return(XML_ATTRIBUTE_NMTOKENS);
5924     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5925	SKIP(7);
5926	return(XML_ATTRIBUTE_NMTOKEN);
5927     }
5928     return(xmlParseEnumeratedType(ctxt, tree));
5929}
5930
5931/**
5932 * xmlParseAttributeListDecl:
5933 * @ctxt:  an XML parser context
5934 *
5935 * : parse the Attribute list def for an element
5936 *
5937 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5938 *
5939 * [53] AttDef ::= S Name S AttType S DefaultDecl
5940 *
5941 */
5942void
5943xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5944    const xmlChar *elemName;
5945    const xmlChar *attrName;
5946    xmlEnumerationPtr tree;
5947
5948    if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5949	xmlParserInputPtr input = ctxt->input;
5950
5951	SKIP(9);
5952	if (!IS_BLANK_CH(CUR)) {
5953	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5954		                 "Space required after '<!ATTLIST'\n");
5955	}
5956        SKIP_BLANKS;
5957        elemName = xmlParseName(ctxt);
5958	if (elemName == NULL) {
5959	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5960			   "ATTLIST: no name for Element\n");
5961	    return;
5962	}
5963	SKIP_BLANKS;
5964	GROW;
5965	while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5966	    const xmlChar *check = CUR_PTR;
5967	    int type;
5968	    int def;
5969	    xmlChar *defaultValue = NULL;
5970
5971	    GROW;
5972            tree = NULL;
5973	    attrName = xmlParseName(ctxt);
5974	    if (attrName == NULL) {
5975		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5976			       "ATTLIST: no name for Attribute\n");
5977		break;
5978	    }
5979	    GROW;
5980	    if (!IS_BLANK_CH(CUR)) {
5981		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5982		        "Space required after the attribute name\n");
5983		break;
5984	    }
5985	    SKIP_BLANKS;
5986
5987	    type = xmlParseAttributeType(ctxt, &tree);
5988	    if (type <= 0) {
5989	        break;
5990	    }
5991
5992	    GROW;
5993	    if (!IS_BLANK_CH(CUR)) {
5994		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5995			       "Space required after the attribute type\n");
5996	        if (tree != NULL)
5997		    xmlFreeEnumeration(tree);
5998		break;
5999	    }
6000	    SKIP_BLANKS;
6001
6002	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
6003	    if (def <= 0) {
6004                if (defaultValue != NULL)
6005		    xmlFree(defaultValue);
6006	        if (tree != NULL)
6007		    xmlFreeEnumeration(tree);
6008	        break;
6009	    }
6010	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6011	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
6012
6013	    GROW;
6014            if (RAW != '>') {
6015		if (!IS_BLANK_CH(CUR)) {
6016		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6017			"Space required after the attribute default value\n");
6018		    if (defaultValue != NULL)
6019			xmlFree(defaultValue);
6020		    if (tree != NULL)
6021			xmlFreeEnumeration(tree);
6022		    break;
6023		}
6024		SKIP_BLANKS;
6025	    }
6026	    if (check == CUR_PTR) {
6027		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6028		            "in xmlParseAttributeListDecl\n");
6029		if (defaultValue != NULL)
6030		    xmlFree(defaultValue);
6031	        if (tree != NULL)
6032		    xmlFreeEnumeration(tree);
6033		break;
6034	    }
6035	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6036		(ctxt->sax->attributeDecl != NULL))
6037		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6038	                        type, def, defaultValue, tree);
6039	    else if (tree != NULL)
6040		xmlFreeEnumeration(tree);
6041
6042	    if ((ctxt->sax2) && (defaultValue != NULL) &&
6043	        (def != XML_ATTRIBUTE_IMPLIED) &&
6044		(def != XML_ATTRIBUTE_REQUIRED)) {
6045		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6046	    }
6047	    if (ctxt->sax2) {
6048		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6049	    }
6050	    if (defaultValue != NULL)
6051	        xmlFree(defaultValue);
6052	    GROW;
6053	}
6054	if (RAW == '>') {
6055	    if (input != ctxt->input) {
6056		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6057    "Attribute list declaration doesn't start and stop in the same entity\n",
6058                                 NULL, NULL);
6059	    }
6060	    NEXT;
6061	}
6062    }
6063}
6064
6065/**
6066 * xmlParseElementMixedContentDecl:
6067 * @ctxt:  an XML parser context
6068 * @inputchk:  the input used for the current entity, needed for boundary checks
6069 *
6070 * parse the declaration for a Mixed Element content
6071 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6072 *
6073 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6074 *                '(' S? '#PCDATA' S? ')'
6075 *
6076 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6077 *
6078 * [ VC: No Duplicate Types ]
6079 * The same name must not appear more than once in a single
6080 * mixed-content declaration.
6081 *
6082 * returns: the list of the xmlElementContentPtr describing the element choices
6083 */
6084xmlElementContentPtr
6085xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6086    xmlElementContentPtr ret = NULL, cur = NULL, n;
6087    const xmlChar *elem = NULL;
6088
6089    GROW;
6090    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6091	SKIP(7);
6092	SKIP_BLANKS;
6093	SHRINK;
6094	if (RAW == ')') {
6095	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6096		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6097"Element content declaration doesn't start and stop in the same entity\n",
6098                                 NULL, NULL);
6099	    }
6100	    NEXT;
6101	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6102	    if (ret == NULL)
6103	        return(NULL);
6104	    if (RAW == '*') {
6105		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6106		NEXT;
6107	    }
6108	    return(ret);
6109	}
6110	if ((RAW == '(') || (RAW == '|')) {
6111	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6112	    if (ret == NULL) return(NULL);
6113	}
6114	while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6115	    NEXT;
6116	    if (elem == NULL) {
6117	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6118		if (ret == NULL) return(NULL);
6119		ret->c1 = cur;
6120		if (cur != NULL)
6121		    cur->parent = ret;
6122		cur = ret;
6123	    } else {
6124	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6125		if (n == NULL) return(NULL);
6126		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6127		if (n->c1 != NULL)
6128		    n->c1->parent = n;
6129	        cur->c2 = n;
6130		if (n != NULL)
6131		    n->parent = cur;
6132		cur = n;
6133	    }
6134	    SKIP_BLANKS;
6135	    elem = xmlParseName(ctxt);
6136	    if (elem == NULL) {
6137		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6138			"xmlParseElementMixedContentDecl : Name expected\n");
6139		xmlFreeDocElementContent(ctxt->myDoc, cur);
6140		return(NULL);
6141	    }
6142	    SKIP_BLANKS;
6143	    GROW;
6144	}
6145	if ((RAW == ')') && (NXT(1) == '*')) {
6146	    if (elem != NULL) {
6147		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6148		                               XML_ELEMENT_CONTENT_ELEMENT);
6149		if (cur->c2 != NULL)
6150		    cur->c2->parent = cur;
6151            }
6152            if (ret != NULL)
6153                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6154	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6155		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6156"Element content declaration doesn't start and stop in the same entity\n",
6157				 NULL, NULL);
6158	    }
6159	    SKIP(2);
6160	} else {
6161	    xmlFreeDocElementContent(ctxt->myDoc, ret);
6162	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6163	    return(NULL);
6164	}
6165
6166    } else {
6167	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6168    }
6169    return(ret);
6170}
6171
6172/**
6173 * xmlParseElementChildrenContentDeclPriv:
6174 * @ctxt:  an XML parser context
6175 * @inputchk:  the input used for the current entity, needed for boundary checks
6176 * @depth: the level of recursion
6177 *
6178 * parse the declaration for a Mixed Element content
6179 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180 *
6181 *
6182 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6183 *
6184 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6185 *
6186 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6187 *
6188 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6189 *
6190 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6191 * TODO Parameter-entity replacement text must be properly nested
6192 *	with parenthesized groups. That is to say, if either of the
6193 *	opening or closing parentheses in a choice, seq, or Mixed
6194 *	construct is contained in the replacement text for a parameter
6195 *	entity, both must be contained in the same replacement text. For
6196 *	interoperability, if a parameter-entity reference appears in a
6197 *	choice, seq, or Mixed construct, its replacement text should not
6198 *	be empty, and neither the first nor last non-blank character of
6199 *	the replacement text should be a connector (| or ,).
6200 *
6201 * Returns the tree of xmlElementContentPtr describing the element
6202 *          hierarchy.
6203 */
6204static xmlElementContentPtr
6205xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6206                                       int depth) {
6207    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6208    const xmlChar *elem;
6209    xmlChar type = 0;
6210
6211    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6212        (depth >  2048)) {
6213        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6214"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6215                          depth);
6216	return(NULL);
6217    }
6218    SKIP_BLANKS;
6219    GROW;
6220    if (RAW == '(') {
6221	int inputid = ctxt->input->id;
6222
6223        /* Recurse on first child */
6224	NEXT;
6225	SKIP_BLANKS;
6226        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6227                                                           depth + 1);
6228	SKIP_BLANKS;
6229	GROW;
6230    } else {
6231	elem = xmlParseName(ctxt);
6232	if (elem == NULL) {
6233	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6234	    return(NULL);
6235	}
6236        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6237	if (cur == NULL) {
6238	    xmlErrMemory(ctxt, NULL);
6239	    return(NULL);
6240	}
6241	GROW;
6242	if (RAW == '?') {
6243	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6244	    NEXT;
6245	} else if (RAW == '*') {
6246	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6247	    NEXT;
6248	} else if (RAW == '+') {
6249	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6250	    NEXT;
6251	} else {
6252	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6253	}
6254	GROW;
6255    }
6256    SKIP_BLANKS;
6257    SHRINK;
6258    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6259        /*
6260	 * Each loop we parse one separator and one element.
6261	 */
6262        if (RAW == ',') {
6263	    if (type == 0) type = CUR;
6264
6265	    /*
6266	     * Detect "Name | Name , Name" error
6267	     */
6268	    else if (type != CUR) {
6269		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6270		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6271		                  type);
6272		if ((last != NULL) && (last != ret))
6273		    xmlFreeDocElementContent(ctxt->myDoc, last);
6274		if (ret != NULL)
6275		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6276		return(NULL);
6277	    }
6278	    NEXT;
6279
6280	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6281	    if (op == NULL) {
6282		if ((last != NULL) && (last != ret))
6283		    xmlFreeDocElementContent(ctxt->myDoc, last);
6284	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6285		return(NULL);
6286	    }
6287	    if (last == NULL) {
6288		op->c1 = ret;
6289		if (ret != NULL)
6290		    ret->parent = op;
6291		ret = cur = op;
6292	    } else {
6293	        cur->c2 = op;
6294		if (op != NULL)
6295		    op->parent = cur;
6296		op->c1 = last;
6297		if (last != NULL)
6298		    last->parent = op;
6299		cur =op;
6300		last = NULL;
6301	    }
6302	} else if (RAW == '|') {
6303	    if (type == 0) type = CUR;
6304
6305	    /*
6306	     * Detect "Name , Name | Name" error
6307	     */
6308	    else if (type != CUR) {
6309		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6310		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6311				  type);
6312		if ((last != NULL) && (last != ret))
6313		    xmlFreeDocElementContent(ctxt->myDoc, last);
6314		if (ret != NULL)
6315		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6316		return(NULL);
6317	    }
6318	    NEXT;
6319
6320	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6321	    if (op == NULL) {
6322		if ((last != NULL) && (last != ret))
6323		    xmlFreeDocElementContent(ctxt->myDoc, last);
6324		if (ret != NULL)
6325		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6326		return(NULL);
6327	    }
6328	    if (last == NULL) {
6329		op->c1 = ret;
6330		if (ret != NULL)
6331		    ret->parent = op;
6332		ret = cur = op;
6333	    } else {
6334	        cur->c2 = op;
6335		if (op != NULL)
6336		    op->parent = cur;
6337		op->c1 = last;
6338		if (last != NULL)
6339		    last->parent = op;
6340		cur =op;
6341		last = NULL;
6342	    }
6343	} else {
6344	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6345	    if ((last != NULL) && (last != ret))
6346	        xmlFreeDocElementContent(ctxt->myDoc, last);
6347	    if (ret != NULL)
6348		xmlFreeDocElementContent(ctxt->myDoc, ret);
6349	    return(NULL);
6350	}
6351	GROW;
6352	SKIP_BLANKS;
6353	GROW;
6354	if (RAW == '(') {
6355	    int inputid = ctxt->input->id;
6356	    /* Recurse on second child */
6357	    NEXT;
6358	    SKIP_BLANKS;
6359	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6360                                                          depth + 1);
6361	    SKIP_BLANKS;
6362	} else {
6363	    elem = xmlParseName(ctxt);
6364	    if (elem == NULL) {
6365		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6366		if (ret != NULL)
6367		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6368		return(NULL);
6369	    }
6370	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6371	    if (last == NULL) {
6372		if (ret != NULL)
6373		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6374		return(NULL);
6375	    }
6376	    if (RAW == '?') {
6377		last->ocur = XML_ELEMENT_CONTENT_OPT;
6378		NEXT;
6379	    } else if (RAW == '*') {
6380		last->ocur = XML_ELEMENT_CONTENT_MULT;
6381		NEXT;
6382	    } else if (RAW == '+') {
6383		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6384		NEXT;
6385	    } else {
6386		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6387	    }
6388	}
6389	SKIP_BLANKS;
6390	GROW;
6391    }
6392    if ((cur != NULL) && (last != NULL)) {
6393        cur->c2 = last;
6394	if (last != NULL)
6395	    last->parent = cur;
6396    }
6397    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6398	xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6399"Element content declaration doesn't start and stop in the same entity\n",
6400			 NULL, NULL);
6401    }
6402    NEXT;
6403    if (RAW == '?') {
6404	if (ret != NULL) {
6405	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6406	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6407	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6408	    else
6409	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6410	}
6411	NEXT;
6412    } else if (RAW == '*') {
6413	if (ret != NULL) {
6414	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6415	    cur = ret;
6416	    /*
6417	     * Some normalization:
6418	     * (a | b* | c?)* == (a | b | c)*
6419	     */
6420	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6421		if ((cur->c1 != NULL) &&
6422	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6423		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6424		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6425		if ((cur->c2 != NULL) &&
6426	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6427		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6428		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6429		cur = cur->c2;
6430	    }
6431	}
6432	NEXT;
6433    } else if (RAW == '+') {
6434	if (ret != NULL) {
6435	    int found = 0;
6436
6437	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6438	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6439	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6440	    else
6441	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6442	    /*
6443	     * Some normalization:
6444	     * (a | b*)+ == (a | b)*
6445	     * (a | b?)+ == (a | b)*
6446	     */
6447	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6448		if ((cur->c1 != NULL) &&
6449	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6450		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6451		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6452		    found = 1;
6453		}
6454		if ((cur->c2 != NULL) &&
6455	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6456		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6457		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6458		    found = 1;
6459		}
6460		cur = cur->c2;
6461	    }
6462	    if (found)
6463		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6464	}
6465	NEXT;
6466    }
6467    return(ret);
6468}
6469
6470/**
6471 * xmlParseElementChildrenContentDecl:
6472 * @ctxt:  an XML parser context
6473 * @inputchk:  the input used for the current entity, needed for boundary checks
6474 *
6475 * parse the declaration for a Mixed Element content
6476 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6477 *
6478 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6479 *
6480 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6481 *
6482 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6483 *
6484 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6485 *
6486 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6487 * TODO Parameter-entity replacement text must be properly nested
6488 *	with parenthesized groups. That is to say, if either of the
6489 *	opening or closing parentheses in a choice, seq, or Mixed
6490 *	construct is contained in the replacement text for a parameter
6491 *	entity, both must be contained in the same replacement text. For
6492 *	interoperability, if a parameter-entity reference appears in a
6493 *	choice, seq, or Mixed construct, its replacement text should not
6494 *	be empty, and neither the first nor last non-blank character of
6495 *	the replacement text should be a connector (| or ,).
6496 *
6497 * Returns the tree of xmlElementContentPtr describing the element
6498 *          hierarchy.
6499 */
6500xmlElementContentPtr
6501xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6502    /* stub left for API/ABI compat */
6503    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6504}
6505
6506/**
6507 * xmlParseElementContentDecl:
6508 * @ctxt:  an XML parser context
6509 * @name:  the name of the element being defined.
6510 * @result:  the Element Content pointer will be stored here if any
6511 *
6512 * parse the declaration for an Element content either Mixed or Children,
6513 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6514 *
6515 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6516 *
6517 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6518 */
6519
6520int
6521xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6522                           xmlElementContentPtr *result) {
6523
6524    xmlElementContentPtr tree = NULL;
6525    int inputid = ctxt->input->id;
6526    int res;
6527
6528    *result = NULL;
6529
6530    if (RAW != '(') {
6531	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6532		"xmlParseElementContentDecl : %s '(' expected\n", name);
6533	return(-1);
6534    }
6535    NEXT;
6536    GROW;
6537    if (ctxt->instate == XML_PARSER_EOF)
6538        return(-1);
6539    SKIP_BLANKS;
6540    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6541        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6542	res = XML_ELEMENT_TYPE_MIXED;
6543    } else {
6544        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6545	res = XML_ELEMENT_TYPE_ELEMENT;
6546    }
6547    SKIP_BLANKS;
6548    *result = tree;
6549    return(res);
6550}
6551
6552/**
6553 * xmlParseElementDecl:
6554 * @ctxt:  an XML parser context
6555 *
6556 * parse an Element declaration.
6557 *
6558 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6559 *
6560 * [ VC: Unique Element Type Declaration ]
6561 * No element type may be declared more than once
6562 *
6563 * Returns the type of the element, or -1 in case of error
6564 */
6565int
6566xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6567    const xmlChar *name;
6568    int ret = -1;
6569    xmlElementContentPtr content  = NULL;
6570
6571    /* GROW; done in the caller */
6572    if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6573	xmlParserInputPtr input = ctxt->input;
6574
6575	SKIP(9);
6576	if (!IS_BLANK_CH(CUR)) {
6577	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6578		           "Space required after 'ELEMENT'\n");
6579	}
6580        SKIP_BLANKS;
6581        name = xmlParseName(ctxt);
6582	if (name == NULL) {
6583	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6584			   "xmlParseElementDecl: no name for Element\n");
6585	    return(-1);
6586	}
6587	while ((RAW == 0) && (ctxt->inputNr > 1))
6588	    xmlPopInput(ctxt);
6589	if (!IS_BLANK_CH(CUR)) {
6590	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6591			   "Space required after the element name\n");
6592	}
6593        SKIP_BLANKS;
6594	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6595	    SKIP(5);
6596	    /*
6597	     * Element must always be empty.
6598	     */
6599	    ret = XML_ELEMENT_TYPE_EMPTY;
6600	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6601	           (NXT(2) == 'Y')) {
6602	    SKIP(3);
6603	    /*
6604	     * Element is a generic container.
6605	     */
6606	    ret = XML_ELEMENT_TYPE_ANY;
6607	} else if (RAW == '(') {
6608	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6609	} else {
6610	    /*
6611	     * [ WFC: PEs in Internal Subset ] error handling.
6612	     */
6613	    if ((RAW == '%') && (ctxt->external == 0) &&
6614	        (ctxt->inputNr == 1)) {
6615		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6616	  "PEReference: forbidden within markup decl in internal subset\n");
6617	    } else {
6618		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6619		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6620            }
6621	    return(-1);
6622	}
6623
6624	SKIP_BLANKS;
6625	/*
6626	 * Pop-up of finished entities.
6627	 */
6628	while ((RAW == 0) && (ctxt->inputNr > 1))
6629	    xmlPopInput(ctxt);
6630	SKIP_BLANKS;
6631
6632	if (RAW != '>') {
6633	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6634	    if (content != NULL) {
6635		xmlFreeDocElementContent(ctxt->myDoc, content);
6636	    }
6637	} else {
6638	    if (input != ctxt->input) {
6639		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6640    "Element declaration doesn't start and stop in the same entity\n");
6641	    }
6642
6643	    NEXT;
6644	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6645		(ctxt->sax->elementDecl != NULL)) {
6646		if (content != NULL)
6647		    content->parent = NULL;
6648	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6649		                       content);
6650		if ((content != NULL) && (content->parent == NULL)) {
6651		    /*
6652		     * this is a trick: if xmlAddElementDecl is called,
6653		     * instead of copying the full tree it is plugged directly
6654		     * if called from the parser. Avoid duplicating the
6655		     * interfaces or change the API/ABI
6656		     */
6657		    xmlFreeDocElementContent(ctxt->myDoc, content);
6658		}
6659	    } else if (content != NULL) {
6660		xmlFreeDocElementContent(ctxt->myDoc, content);
6661	    }
6662	}
6663    }
6664    return(ret);
6665}
6666
6667/**
6668 * xmlParseConditionalSections
6669 * @ctxt:  an XML parser context
6670 *
6671 * [61] conditionalSect ::= includeSect | ignoreSect
6672 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6673 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6674 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6675 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6676 */
6677
6678static void
6679xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6680    int id = ctxt->input->id;
6681
6682    SKIP(3);
6683    SKIP_BLANKS;
6684    if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6685	SKIP(7);
6686	SKIP_BLANKS;
6687	if (RAW != '[') {
6688	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6689	} else {
6690	    if (ctxt->input->id != id) {
6691		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6692	    "All markup of the conditional section is not in the same entity\n",
6693				     NULL, NULL);
6694	    }
6695	    NEXT;
6696	}
6697	if (xmlParserDebugEntities) {
6698	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6699		xmlGenericError(xmlGenericErrorContext,
6700			"%s(%d): ", ctxt->input->filename,
6701			ctxt->input->line);
6702	    xmlGenericError(xmlGenericErrorContext,
6703		    "Entering INCLUDE Conditional Section\n");
6704	}
6705
6706	while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6707	        (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6708	    const xmlChar *check = CUR_PTR;
6709	    unsigned int cons = ctxt->input->consumed;
6710
6711	    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6712		xmlParseConditionalSections(ctxt);
6713	    } else if (IS_BLANK_CH(CUR)) {
6714		NEXT;
6715	    } else if (RAW == '%') {
6716		xmlParsePEReference(ctxt);
6717	    } else
6718		xmlParseMarkupDecl(ctxt);
6719
6720	    /*
6721	     * Pop-up of finished entities.
6722	     */
6723	    while ((RAW == 0) && (ctxt->inputNr > 1))
6724		xmlPopInput(ctxt);
6725
6726	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6727		xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6728		break;
6729	    }
6730	}
6731	if (xmlParserDebugEntities) {
6732	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6733		xmlGenericError(xmlGenericErrorContext,
6734			"%s(%d): ", ctxt->input->filename,
6735			ctxt->input->line);
6736	    xmlGenericError(xmlGenericErrorContext,
6737		    "Leaving INCLUDE Conditional Section\n");
6738	}
6739
6740    } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6741	int state;
6742	xmlParserInputState instate;
6743	int depth = 0;
6744
6745	SKIP(6);
6746	SKIP_BLANKS;
6747	if (RAW != '[') {
6748	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6749	} else {
6750	    if (ctxt->input->id != id) {
6751		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6752	    "All markup of the conditional section is not in the same entity\n",
6753				     NULL, NULL);
6754	    }
6755	    NEXT;
6756	}
6757	if (xmlParserDebugEntities) {
6758	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6759		xmlGenericError(xmlGenericErrorContext,
6760			"%s(%d): ", ctxt->input->filename,
6761			ctxt->input->line);
6762	    xmlGenericError(xmlGenericErrorContext,
6763		    "Entering IGNORE Conditional Section\n");
6764	}
6765
6766	/*
6767	 * Parse up to the end of the conditional section
6768	 * But disable SAX event generating DTD building in the meantime
6769	 */
6770	state = ctxt->disableSAX;
6771	instate = ctxt->instate;
6772	if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6773	ctxt->instate = XML_PARSER_IGNORE;
6774
6775	while (((depth >= 0) && (RAW != 0)) &&
6776               (ctxt->instate != XML_PARSER_EOF)) {
6777	  if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6778	    depth++;
6779	    SKIP(3);
6780	    continue;
6781	  }
6782	  if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6783	    if (--depth >= 0) SKIP(3);
6784	    continue;
6785	  }
6786	  NEXT;
6787	  continue;
6788	}
6789
6790	ctxt->disableSAX = state;
6791	ctxt->instate = instate;
6792
6793	if (xmlParserDebugEntities) {
6794	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6795		xmlGenericError(xmlGenericErrorContext,
6796			"%s(%d): ", ctxt->input->filename,
6797			ctxt->input->line);
6798	    xmlGenericError(xmlGenericErrorContext,
6799		    "Leaving IGNORE Conditional Section\n");
6800	}
6801
6802    } else {
6803	xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6804    }
6805
6806    if (RAW == 0)
6807        SHRINK;
6808
6809    if (RAW == 0) {
6810	xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6811    } else {
6812	if (ctxt->input->id != id) {
6813	    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6814	"All markup of the conditional section is not in the same entity\n",
6815				 NULL, NULL);
6816	}
6817        SKIP(3);
6818    }
6819}
6820
6821/**
6822 * xmlParseMarkupDecl:
6823 * @ctxt:  an XML parser context
6824 *
6825 * parse Markup declarations
6826 *
6827 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6828 *                     NotationDecl | PI | Comment
6829 *
6830 * [ VC: Proper Declaration/PE Nesting ]
6831 * Parameter-entity replacement text must be properly nested with
6832 * markup declarations. That is to say, if either the first character
6833 * or the last character of a markup declaration (markupdecl above) is
6834 * contained in the replacement text for a parameter-entity reference,
6835 * both must be contained in the same replacement text.
6836 *
6837 * [ WFC: PEs in Internal Subset ]
6838 * In the internal DTD subset, parameter-entity references can occur
6839 * only where markup declarations can occur, not within markup declarations.
6840 * (This does not apply to references that occur in external parameter
6841 * entities or to the external subset.)
6842 */
6843void
6844xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6845    GROW;
6846    if (CUR == '<') {
6847        if (NXT(1) == '!') {
6848	    switch (NXT(2)) {
6849	        case 'E':
6850		    if (NXT(3) == 'L')
6851			xmlParseElementDecl(ctxt);
6852		    else if (NXT(3) == 'N')
6853			xmlParseEntityDecl(ctxt);
6854		    break;
6855	        case 'A':
6856		    xmlParseAttributeListDecl(ctxt);
6857		    break;
6858	        case 'N':
6859		    xmlParseNotationDecl(ctxt);
6860		    break;
6861	        case '-':
6862		    xmlParseComment(ctxt);
6863		    break;
6864		default:
6865		    /* there is an error but it will be detected later */
6866		    break;
6867	    }
6868	} else if (NXT(1) == '?') {
6869	    xmlParsePI(ctxt);
6870	}
6871    }
6872    /*
6873     * This is only for internal subset. On external entities,
6874     * the replacement is done before parsing stage
6875     */
6876    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6877	xmlParsePEReference(ctxt);
6878
6879    /*
6880     * Conditional sections are allowed from entities included
6881     * by PE References in the internal subset.
6882     */
6883    if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6884        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6885	    xmlParseConditionalSections(ctxt);
6886	}
6887    }
6888
6889    ctxt->instate = XML_PARSER_DTD;
6890}
6891
6892/**
6893 * xmlParseTextDecl:
6894 * @ctxt:  an XML parser context
6895 *
6896 * parse an XML declaration header for external entities
6897 *
6898 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6899 */
6900
6901void
6902xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6903    xmlChar *version;
6904    const xmlChar *encoding;
6905
6906    /*
6907     * We know that '<?xml' is here.
6908     */
6909    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6910	SKIP(5);
6911    } else {
6912	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6913	return;
6914    }
6915
6916    if (!IS_BLANK_CH(CUR)) {
6917	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6918		       "Space needed after '<?xml'\n");
6919    }
6920    SKIP_BLANKS;
6921
6922    /*
6923     * We may have the VersionInfo here.
6924     */
6925    version = xmlParseVersionInfo(ctxt);
6926    if (version == NULL)
6927	version = xmlCharStrdup(XML_DEFAULT_VERSION);
6928    else {
6929	if (!IS_BLANK_CH(CUR)) {
6930	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6931		           "Space needed here\n");
6932	}
6933    }
6934    ctxt->input->version = version;
6935
6936    /*
6937     * We must have the encoding declaration
6938     */
6939    encoding = xmlParseEncodingDecl(ctxt);
6940    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6941	/*
6942	 * The XML REC instructs us to stop parsing right here
6943	 */
6944        return;
6945    }
6946    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6947	xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6948		       "Missing encoding in text declaration\n");
6949    }
6950
6951    SKIP_BLANKS;
6952    if ((RAW == '?') && (NXT(1) == '>')) {
6953        SKIP(2);
6954    } else if (RAW == '>') {
6955        /* Deprecated old WD ... */
6956	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6957	NEXT;
6958    } else {
6959	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6960	MOVETO_ENDTAG(CUR_PTR);
6961	NEXT;
6962    }
6963}
6964
6965/**
6966 * xmlParseExternalSubset:
6967 * @ctxt:  an XML parser context
6968 * @ExternalID: the external identifier
6969 * @SystemID: the system identifier (or URL)
6970 *
6971 * parse Markup declarations from an external subset
6972 *
6973 * [30] extSubset ::= textDecl? extSubsetDecl
6974 *
6975 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6976 */
6977void
6978xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6979                       const xmlChar *SystemID) {
6980    xmlDetectSAX2(ctxt);
6981    GROW;
6982
6983    if ((ctxt->encoding == NULL) &&
6984        (ctxt->input->end - ctxt->input->cur >= 4)) {
6985        xmlChar start[4];
6986	xmlCharEncoding enc;
6987
6988	start[0] = RAW;
6989	start[1] = NXT(1);
6990	start[2] = NXT(2);
6991	start[3] = NXT(3);
6992	enc = xmlDetectCharEncoding(start, 4);
6993	if (enc != XML_CHAR_ENCODING_NONE)
6994	    xmlSwitchEncoding(ctxt, enc);
6995    }
6996
6997    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6998	xmlParseTextDecl(ctxt);
6999	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7000	    /*
7001	     * The XML REC instructs us to stop parsing right here
7002	     */
7003	    ctxt->instate = XML_PARSER_EOF;
7004	    return;
7005	}
7006    }
7007    if (ctxt->myDoc == NULL) {
7008        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7009	if (ctxt->myDoc == NULL) {
7010	    xmlErrMemory(ctxt, "New Doc failed");
7011	    return;
7012	}
7013	ctxt->myDoc->properties = XML_DOC_INTERNAL;
7014    }
7015    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7016        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7017
7018    ctxt->instate = XML_PARSER_DTD;
7019    ctxt->external = 1;
7020    while (((RAW == '<') && (NXT(1) == '?')) ||
7021           ((RAW == '<') && (NXT(1) == '!')) ||
7022	   (RAW == '%') || IS_BLANK_CH(CUR)) {
7023	const xmlChar *check = CUR_PTR;
7024	unsigned int cons = ctxt->input->consumed;
7025
7026	GROW;
7027        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7028	    xmlParseConditionalSections(ctxt);
7029	} else if (IS_BLANK_CH(CUR)) {
7030	    NEXT;
7031	} else if (RAW == '%') {
7032            xmlParsePEReference(ctxt);
7033	} else
7034	    xmlParseMarkupDecl(ctxt);
7035
7036	/*
7037	 * Pop-up of finished entities.
7038	 */
7039	while ((RAW == 0) && (ctxt->inputNr > 1))
7040	    xmlPopInput(ctxt);
7041
7042	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7043	    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7044	    break;
7045	}
7046    }
7047
7048    if (RAW != 0) {
7049	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7050    }
7051
7052}
7053
7054/**
7055 * xmlParseReference:
7056 * @ctxt:  an XML parser context
7057 *
7058 * parse and handle entity references in content, depending on the SAX
7059 * interface, this may end-up in a call to character() if this is a
7060 * CharRef, a predefined entity, if there is no reference() callback.
7061 * or if the parser was asked to switch to that mode.
7062 *
7063 * [67] Reference ::= EntityRef | CharRef
7064 */
7065void
7066xmlParseReference(xmlParserCtxtPtr ctxt) {
7067    xmlEntityPtr ent;
7068    xmlChar *val;
7069    int was_checked;
7070    xmlNodePtr list = NULL;
7071    xmlParserErrors ret = XML_ERR_OK;
7072
7073
7074    if (RAW != '&')
7075        return;
7076
7077    /*
7078     * Simple case of a CharRef
7079     */
7080    if (NXT(1) == '#') {
7081	int i = 0;
7082	xmlChar out[10];
7083	int hex = NXT(2);
7084	int value = xmlParseCharRef(ctxt);
7085
7086	if (value == 0)
7087	    return;
7088	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7089	    /*
7090	     * So we are using non-UTF-8 buffers
7091	     * Check that the char fit on 8bits, if not
7092	     * generate a CharRef.
7093	     */
7094	    if (value <= 0xFF) {
7095		out[0] = value;
7096		out[1] = 0;
7097		if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7098		    (!ctxt->disableSAX))
7099		    ctxt->sax->characters(ctxt->userData, out, 1);
7100	    } else {
7101		if ((hex == 'x') || (hex == 'X'))
7102		    snprintf((char *)out, sizeof(out), "#x%X", value);
7103		else
7104		    snprintf((char *)out, sizeof(out), "#%d", value);
7105		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7106		    (!ctxt->disableSAX))
7107		    ctxt->sax->reference(ctxt->userData, out);
7108	    }
7109	} else {
7110	    /*
7111	     * Just encode the value in UTF-8
7112	     */
7113	    COPY_BUF(0 ,out, i, value);
7114	    out[i] = 0;
7115	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7116		(!ctxt->disableSAX))
7117		ctxt->sax->characters(ctxt->userData, out, i);
7118	}
7119	return;
7120    }
7121
7122    /*
7123     * We are seeing an entity reference
7124     */
7125    ent = xmlParseEntityRef(ctxt);
7126    if (ent == NULL) return;
7127    if (!ctxt->wellFormed)
7128	return;
7129    was_checked = ent->checked;
7130
7131    /* special case of predefined entities */
7132    if ((ent->name == NULL) ||
7133        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7134	val = ent->content;
7135	if (val == NULL) return;
7136	/*
7137	 * inline the entity.
7138	 */
7139	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7140	    (!ctxt->disableSAX))
7141	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7142	return;
7143    }
7144
7145    /*
7146     * The first reference to the entity trigger a parsing phase
7147     * where the ent->children is filled with the result from
7148     * the parsing.
7149     * Note: external parsed entities will not be loaded, it is not
7150     * required for a non-validating parser, unless the parsing option
7151     * of validating, or substituting entities were given. Doing so is
7152     * far more secure as the parser will only process data coming from
7153     * the document entity by default.
7154     */
7155    if ((ent->checked == 0) &&
7156        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7157         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7158	unsigned long oldnbent = ctxt->nbentities;
7159
7160	/*
7161	 * This is a bit hackish but this seems the best
7162	 * way to make sure both SAX and DOM entity support
7163	 * behaves okay.
7164	 */
7165	void *user_data;
7166	if (ctxt->userData == ctxt)
7167	    user_data = NULL;
7168	else
7169	    user_data = ctxt->userData;
7170
7171	/*
7172	 * Check that this entity is well formed
7173	 * 4.3.2: An internal general parsed entity is well-formed
7174	 * if its replacement text matches the production labeled
7175	 * content.
7176	 */
7177	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7178	    ctxt->depth++;
7179	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7180	                                              user_data, &list);
7181	    ctxt->depth--;
7182
7183	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7184	    ctxt->depth++;
7185	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7186	                                   user_data, ctxt->depth, ent->URI,
7187					   ent->ExternalID, &list);
7188	    ctxt->depth--;
7189	} else {
7190	    ret = XML_ERR_ENTITY_PE_INTERNAL;
7191	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7192			 "invalid entity type found\n", NULL);
7193	}
7194
7195	/*
7196	 * Store the number of entities needing parsing for this entity
7197	 * content and do checkings
7198	 */
7199	ent->checked = ctxt->nbentities - oldnbent;
7200	if (ret == XML_ERR_ENTITY_LOOP) {
7201	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7202	    xmlFreeNodeList(list);
7203	    return;
7204	}
7205	if (xmlParserEntityCheck(ctxt, 0, ent)) {
7206	    xmlFreeNodeList(list);
7207	    return;
7208	}
7209
7210	if ((ret == XML_ERR_OK) && (list != NULL)) {
7211	    if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7212	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7213		(ent->children == NULL)) {
7214		ent->children = list;
7215		if (ctxt->replaceEntities) {
7216		    /*
7217		     * Prune it directly in the generated document
7218		     * except for single text nodes.
7219		     */
7220		    if (((list->type == XML_TEXT_NODE) &&
7221			 (list->next == NULL)) ||
7222			(ctxt->parseMode == XML_PARSE_READER)) {
7223			list->parent = (xmlNodePtr) ent;
7224			list = NULL;
7225			ent->owner = 1;
7226		    } else {
7227			ent->owner = 0;
7228			while (list != NULL) {
7229			    list->parent = (xmlNodePtr) ctxt->node;
7230			    list->doc = ctxt->myDoc;
7231			    if (list->next == NULL)
7232				ent->last = list;
7233			    list = list->next;
7234			}
7235			list = ent->children;
7236#ifdef LIBXML_LEGACY_ENABLED
7237			if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7238			  xmlAddEntityReference(ent, list, NULL);
7239#endif /* LIBXML_LEGACY_ENABLED */
7240		    }
7241		} else {
7242		    ent->owner = 1;
7243		    while (list != NULL) {
7244			list->parent = (xmlNodePtr) ent;
7245			xmlSetTreeDoc(list, ent->doc);
7246			if (list->next == NULL)
7247			    ent->last = list;
7248			list = list->next;
7249		    }
7250		}
7251	    } else {
7252		xmlFreeNodeList(list);
7253		list = NULL;
7254	    }
7255	} else if ((ret != XML_ERR_OK) &&
7256		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
7257	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7258		     "Entity '%s' failed to parse\n", ent->name);
7259	} else if (list != NULL) {
7260	    xmlFreeNodeList(list);
7261	    list = NULL;
7262	}
7263	if (ent->checked == 0)
7264	    ent->checked = 1;
7265    } else if (ent->checked != 1) {
7266	ctxt->nbentities += ent->checked;
7267    }
7268
7269    /*
7270     * Now that the entity content has been gathered
7271     * provide it to the application, this can take different forms based
7272     * on the parsing modes.
7273     */
7274    if (ent->children == NULL) {
7275	/*
7276	 * Probably running in SAX mode and the callbacks don't
7277	 * build the entity content. So unless we already went
7278	 * though parsing for first checking go though the entity
7279	 * content to generate callbacks associated to the entity
7280	 */
7281	if (was_checked != 0) {
7282	    void *user_data;
7283	    /*
7284	     * This is a bit hackish but this seems the best
7285	     * way to make sure both SAX and DOM entity support
7286	     * behaves okay.
7287	     */
7288	    if (ctxt->userData == ctxt)
7289		user_data = NULL;
7290	    else
7291		user_data = ctxt->userData;
7292
7293	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7294		ctxt->depth++;
7295		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7296				   ent->content, user_data, NULL);
7297		ctxt->depth--;
7298	    } else if (ent->etype ==
7299		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7300		ctxt->depth++;
7301		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7302			   ctxt->sax, user_data, ctxt->depth,
7303			   ent->URI, ent->ExternalID, NULL);
7304		ctxt->depth--;
7305	    } else {
7306		ret = XML_ERR_ENTITY_PE_INTERNAL;
7307		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7308			     "invalid entity type found\n", NULL);
7309	    }
7310	    if (ret == XML_ERR_ENTITY_LOOP) {
7311		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7312		return;
7313	    }
7314	}
7315	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7316	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7317	    /*
7318	     * Entity reference callback comes second, it's somewhat
7319	     * superfluous but a compatibility to historical behaviour
7320	     */
7321	    ctxt->sax->reference(ctxt->userData, ent->name);
7322	}
7323	return;
7324    }
7325
7326    /*
7327     * If we didn't get any children for the entity being built
7328     */
7329    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7330	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7331	/*
7332	 * Create a node.
7333	 */
7334	ctxt->sax->reference(ctxt->userData, ent->name);
7335	return;
7336    }
7337
7338    if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7339	/*
7340	 * There is a problem on the handling of _private for entities
7341	 * (bug 155816): Should we copy the content of the field from
7342	 * the entity (possibly overwriting some value set by the user
7343	 * when a copy is created), should we leave it alone, or should
7344	 * we try to take care of different situations?  The problem
7345	 * is exacerbated by the usage of this field by the xmlReader.
7346	 * To fix this bug, we look at _private on the created node
7347	 * and, if it's NULL, we copy in whatever was in the entity.
7348	 * If it's not NULL we leave it alone.  This is somewhat of a
7349	 * hack - maybe we should have further tests to determine
7350	 * what to do.
7351	 */
7352	if ((ctxt->node != NULL) && (ent->children != NULL)) {
7353	    /*
7354	     * Seems we are generating the DOM content, do
7355	     * a simple tree copy for all references except the first
7356	     * In the first occurrence list contains the replacement.
7357	     */
7358	    if (((list == NULL) && (ent->owner == 0)) ||
7359		(ctxt->parseMode == XML_PARSE_READER)) {
7360		xmlNodePtr nw = NULL, cur, firstChild = NULL;
7361
7362		/*
7363		 * when operating on a reader, the entities definitions
7364		 * are always owning the entities subtree.
7365		if (ctxt->parseMode == XML_PARSE_READER)
7366		    ent->owner = 1;
7367		 */
7368
7369		cur = ent->children;
7370		while (cur != NULL) {
7371		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7372		    if (nw != NULL) {
7373			if (nw->_private == NULL)
7374			    nw->_private = cur->_private;
7375			if (firstChild == NULL){
7376			    firstChild = nw;
7377			}
7378			nw = xmlAddChild(ctxt->node, nw);
7379		    }
7380		    if (cur == ent->last) {
7381			/*
7382			 * needed to detect some strange empty
7383			 * node cases in the reader tests
7384			 */
7385			if ((ctxt->parseMode == XML_PARSE_READER) &&
7386			    (nw != NULL) &&
7387			    (nw->type == XML_ELEMENT_NODE) &&
7388			    (nw->children == NULL))
7389			    nw->extra = 1;
7390
7391			break;
7392		    }
7393		    cur = cur->next;
7394		}
7395#ifdef LIBXML_LEGACY_ENABLED
7396		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7397		  xmlAddEntityReference(ent, firstChild, nw);
7398#endif /* LIBXML_LEGACY_ENABLED */
7399	    } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7400		xmlNodePtr nw = NULL, cur, next, last,
7401			   firstChild = NULL;
7402		/*
7403		 * Copy the entity child list and make it the new
7404		 * entity child list. The goal is to make sure any
7405		 * ID or REF referenced will be the one from the
7406		 * document content and not the entity copy.
7407		 */
7408		cur = ent->children;
7409		ent->children = NULL;
7410		last = ent->last;
7411		ent->last = NULL;
7412		while (cur != NULL) {
7413		    next = cur->next;
7414		    cur->next = NULL;
7415		    cur->parent = NULL;
7416		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7417		    if (nw != NULL) {
7418			if (nw->_private == NULL)
7419			    nw->_private = cur->_private;
7420			if (firstChild == NULL){
7421			    firstChild = cur;
7422			}
7423			xmlAddChild((xmlNodePtr) ent, nw);
7424			xmlAddChild(ctxt->node, cur);
7425		    }
7426		    if (cur == last)
7427			break;
7428		    cur = next;
7429		}
7430		if (ent->owner == 0)
7431		    ent->owner = 1;
7432#ifdef LIBXML_LEGACY_ENABLED
7433		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7434		  xmlAddEntityReference(ent, firstChild, nw);
7435#endif /* LIBXML_LEGACY_ENABLED */
7436	    } else {
7437		const xmlChar *nbktext;
7438
7439		/*
7440		 * the name change is to avoid coalescing of the
7441		 * node with a possible previous text one which
7442		 * would make ent->children a dangling pointer
7443		 */
7444		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7445					-1);
7446		if (ent->children->type == XML_TEXT_NODE)
7447		    ent->children->name = nbktext;
7448		if ((ent->last != ent->children) &&
7449		    (ent->last->type == XML_TEXT_NODE))
7450		    ent->last->name = nbktext;
7451		xmlAddChildList(ctxt->node, ent->children);
7452	    }
7453
7454	    /*
7455	     * This is to avoid a nasty side effect, see
7456	     * characters() in SAX.c
7457	     */
7458	    ctxt->nodemem = 0;
7459	    ctxt->nodelen = 0;
7460	    return;
7461	}
7462    }
7463}
7464
7465/**
7466 * xmlParseEntityRef:
7467 * @ctxt:  an XML parser context
7468 *
7469 * parse ENTITY references declarations
7470 *
7471 * [68] EntityRef ::= '&' Name ';'
7472 *
7473 * [ WFC: Entity Declared ]
7474 * In a document without any DTD, a document with only an internal DTD
7475 * subset which contains no parameter entity references, or a document
7476 * with "standalone='yes'", the Name given in the entity reference
7477 * must match that in an entity declaration, except that well-formed
7478 * documents need not declare any of the following entities: amp, lt,
7479 * gt, apos, quot.  The declaration of a parameter entity must precede
7480 * any reference to it.  Similarly, the declaration of a general entity
7481 * must precede any reference to it which appears in a default value in an
7482 * attribute-list declaration. Note that if entities are declared in the
7483 * external subset or in external parameter entities, a non-validating
7484 * processor is not obligated to read and process their declarations;
7485 * for such documents, the rule that an entity must be declared is a
7486 * well-formedness constraint only if standalone='yes'.
7487 *
7488 * [ WFC: Parsed Entity ]
7489 * An entity reference must not contain the name of an unparsed entity
7490 *
7491 * Returns the xmlEntityPtr if found, or NULL otherwise.
7492 */
7493xmlEntityPtr
7494xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7495    const xmlChar *name;
7496    xmlEntityPtr ent = NULL;
7497
7498    GROW;
7499    if (ctxt->instate == XML_PARSER_EOF)
7500        return(NULL);
7501
7502    if (RAW != '&')
7503        return(NULL);
7504    NEXT;
7505    name = xmlParseName(ctxt);
7506    if (name == NULL) {
7507	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7508		       "xmlParseEntityRef: no name\n");
7509        return(NULL);
7510    }
7511    if (RAW != ';') {
7512	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7513	return(NULL);
7514    }
7515    NEXT;
7516
7517    /*
7518     * Predefined entites override any extra definition
7519     */
7520    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7521        ent = xmlGetPredefinedEntity(name);
7522        if (ent != NULL)
7523            return(ent);
7524    }
7525
7526    /*
7527     * Increate the number of entity references parsed
7528     */
7529    ctxt->nbentities++;
7530
7531    /*
7532     * Ask first SAX for entity resolution, otherwise try the
7533     * entities which may have stored in the parser context.
7534     */
7535    if (ctxt->sax != NULL) {
7536	if (ctxt->sax->getEntity != NULL)
7537	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7538	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7539	    (ctxt->options & XML_PARSE_OLDSAX))
7540	    ent = xmlGetPredefinedEntity(name);
7541	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7542	    (ctxt->userData==ctxt)) {
7543	    ent = xmlSAX2GetEntity(ctxt, name);
7544	}
7545    }
7546    /*
7547     * [ WFC: Entity Declared ]
7548     * In a document without any DTD, a document with only an
7549     * internal DTD subset which contains no parameter entity
7550     * references, or a document with "standalone='yes'", the
7551     * Name given in the entity reference must match that in an
7552     * entity declaration, except that well-formed documents
7553     * need not declare any of the following entities: amp, lt,
7554     * gt, apos, quot.
7555     * The declaration of a parameter entity must precede any
7556     * reference to it.
7557     * Similarly, the declaration of a general entity must
7558     * precede any reference to it which appears in a default
7559     * value in an attribute-list declaration. Note that if
7560     * entities are declared in the external subset or in
7561     * external parameter entities, a non-validating processor
7562     * is not obligated to read and process their declarations;
7563     * for such documents, the rule that an entity must be
7564     * declared is a well-formedness constraint only if
7565     * standalone='yes'.
7566     */
7567    if (ent == NULL) {
7568	if ((ctxt->standalone == 1) ||
7569	    ((ctxt->hasExternalSubset == 0) &&
7570	     (ctxt->hasPErefs == 0))) {
7571	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7572		     "Entity '%s' not defined\n", name);
7573	} else {
7574	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7575		     "Entity '%s' not defined\n", name);
7576	    if ((ctxt->inSubset == 0) &&
7577		(ctxt->sax != NULL) &&
7578		(ctxt->sax->reference != NULL)) {
7579		ctxt->sax->reference(ctxt->userData, name);
7580	    }
7581	}
7582	ctxt->valid = 0;
7583    }
7584
7585    /*
7586     * [ WFC: Parsed Entity ]
7587     * An entity reference must not contain the name of an
7588     * unparsed entity
7589     */
7590    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7591	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7592		 "Entity reference to unparsed entity %s\n", name);
7593    }
7594
7595    /*
7596     * [ WFC: No External Entity References ]
7597     * Attribute values cannot contain direct or indirect
7598     * entity references to external entities.
7599     */
7600    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7601	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7602	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7603	     "Attribute references external entity '%s'\n", name);
7604    }
7605    /*
7606     * [ WFC: No < in Attribute Values ]
7607     * The replacement text of any entity referred to directly or
7608     * indirectly in an attribute value (other than "&lt;") must
7609     * not contain a <.
7610     */
7611    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7612	     (ent != NULL) && (ent->content != NULL) &&
7613	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7614	     (xmlStrchr(ent->content, '<'))) {
7615	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7616    "'<' in entity '%s' is not allowed in attributes values\n", name);
7617    }
7618
7619    /*
7620     * Internal check, no parameter entities here ...
7621     */
7622    else {
7623	switch (ent->etype) {
7624	    case XML_INTERNAL_PARAMETER_ENTITY:
7625	    case XML_EXTERNAL_PARAMETER_ENTITY:
7626	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7627	     "Attempt to reference the parameter entity '%s'\n",
7628			      name);
7629	    break;
7630	    default:
7631	    break;
7632	}
7633    }
7634
7635    /*
7636     * [ WFC: No Recursion ]
7637     * A parsed entity must not contain a recursive reference
7638     * to itself, either directly or indirectly.
7639     * Done somewhere else
7640     */
7641    return(ent);
7642}
7643
7644/**
7645 * xmlParseStringEntityRef:
7646 * @ctxt:  an XML parser context
7647 * @str:  a pointer to an index in the string
7648 *
7649 * parse ENTITY references declarations, but this version parses it from
7650 * a string value.
7651 *
7652 * [68] EntityRef ::= '&' Name ';'
7653 *
7654 * [ WFC: Entity Declared ]
7655 * In a document without any DTD, a document with only an internal DTD
7656 * subset which contains no parameter entity references, or a document
7657 * with "standalone='yes'", the Name given in the entity reference
7658 * must match that in an entity declaration, except that well-formed
7659 * documents need not declare any of the following entities: amp, lt,
7660 * gt, apos, quot.  The declaration of a parameter entity must precede
7661 * any reference to it.  Similarly, the declaration of a general entity
7662 * must precede any reference to it which appears in a default value in an
7663 * attribute-list declaration. Note that if entities are declared in the
7664 * external subset or in external parameter entities, a non-validating
7665 * processor is not obligated to read and process their declarations;
7666 * for such documents, the rule that an entity must be declared is a
7667 * well-formedness constraint only if standalone='yes'.
7668 *
7669 * [ WFC: Parsed Entity ]
7670 * An entity reference must not contain the name of an unparsed entity
7671 *
7672 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7673 * is updated to the current location in the string.
7674 */
7675static xmlEntityPtr
7676xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7677    xmlChar *name;
7678    const xmlChar *ptr;
7679    xmlChar cur;
7680    xmlEntityPtr ent = NULL;
7681
7682    if ((str == NULL) || (*str == NULL))
7683        return(NULL);
7684    ptr = *str;
7685    cur = *ptr;
7686    if (cur != '&')
7687	return(NULL);
7688
7689    ptr++;
7690    name = xmlParseStringName(ctxt, &ptr);
7691    if (name == NULL) {
7692	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7693		       "xmlParseStringEntityRef: no name\n");
7694	*str = ptr;
7695	return(NULL);
7696    }
7697    if (*ptr != ';') {
7698	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7699        xmlFree(name);
7700	*str = ptr;
7701	return(NULL);
7702    }
7703    ptr++;
7704
7705
7706    /*
7707     * Predefined entites override any extra definition
7708     */
7709    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7710        ent = xmlGetPredefinedEntity(name);
7711        if (ent != NULL) {
7712            xmlFree(name);
7713            *str = ptr;
7714            return(ent);
7715        }
7716    }
7717
7718    /*
7719     * Increate the number of entity references parsed
7720     */
7721    ctxt->nbentities++;
7722
7723    /*
7724     * Ask first SAX for entity resolution, otherwise try the
7725     * entities which may have stored in the parser context.
7726     */
7727    if (ctxt->sax != NULL) {
7728	if (ctxt->sax->getEntity != NULL)
7729	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7730	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7731	    ent = xmlGetPredefinedEntity(name);
7732	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7733	    ent = xmlSAX2GetEntity(ctxt, name);
7734	}
7735    }
7736
7737    /*
7738     * [ WFC: Entity Declared ]
7739     * In a document without any DTD, a document with only an
7740     * internal DTD subset which contains no parameter entity
7741     * references, or a document with "standalone='yes'", the
7742     * Name given in the entity reference must match that in an
7743     * entity declaration, except that well-formed documents
7744     * need not declare any of the following entities: amp, lt,
7745     * gt, apos, quot.
7746     * The declaration of a parameter entity must precede any
7747     * reference to it.
7748     * Similarly, the declaration of a general entity must
7749     * precede any reference to it which appears in a default
7750     * value in an attribute-list declaration. Note that if
7751     * entities are declared in the external subset or in
7752     * external parameter entities, a non-validating processor
7753     * is not obligated to read and process their declarations;
7754     * for such documents, the rule that an entity must be
7755     * declared is a well-formedness constraint only if
7756     * standalone='yes'.
7757     */
7758    if (ent == NULL) {
7759	if ((ctxt->standalone == 1) ||
7760	    ((ctxt->hasExternalSubset == 0) &&
7761	     (ctxt->hasPErefs == 0))) {
7762	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7763		     "Entity '%s' not defined\n", name);
7764	} else {
7765	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7766			  "Entity '%s' not defined\n",
7767			  name);
7768	}
7769	/* TODO ? check regressions ctxt->valid = 0; */
7770    }
7771
7772    /*
7773     * [ WFC: Parsed Entity ]
7774     * An entity reference must not contain the name of an
7775     * unparsed entity
7776     */
7777    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7778	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7779		 "Entity reference to unparsed entity %s\n", name);
7780    }
7781
7782    /*
7783     * [ WFC: No External Entity References ]
7784     * Attribute values cannot contain direct or indirect
7785     * entity references to external entities.
7786     */
7787    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7788	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7789	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7790	 "Attribute references external entity '%s'\n", name);
7791    }
7792    /*
7793     * [ WFC: No < in Attribute Values ]
7794     * The replacement text of any entity referred to directly or
7795     * indirectly in an attribute value (other than "&lt;") must
7796     * not contain a <.
7797     */
7798    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7799	     (ent != NULL) && (ent->content != NULL) &&
7800	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7801	     (xmlStrchr(ent->content, '<'))) {
7802	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7803     "'<' in entity '%s' is not allowed in attributes values\n",
7804			  name);
7805    }
7806
7807    /*
7808     * Internal check, no parameter entities here ...
7809     */
7810    else {
7811	switch (ent->etype) {
7812	    case XML_INTERNAL_PARAMETER_ENTITY:
7813	    case XML_EXTERNAL_PARAMETER_ENTITY:
7814		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7815	     "Attempt to reference the parameter entity '%s'\n",
7816				  name);
7817	    break;
7818	    default:
7819	    break;
7820	}
7821    }
7822
7823    /*
7824     * [ WFC: No Recursion ]
7825     * A parsed entity must not contain a recursive reference
7826     * to itself, either directly or indirectly.
7827     * Done somewhere else
7828     */
7829
7830    xmlFree(name);
7831    *str = ptr;
7832    return(ent);
7833}
7834
7835/**
7836 * xmlParsePEReference:
7837 * @ctxt:  an XML parser context
7838 *
7839 * parse PEReference declarations
7840 * The entity content is handled directly by pushing it's content as
7841 * a new input stream.
7842 *
7843 * [69] PEReference ::= '%' Name ';'
7844 *
7845 * [ WFC: No Recursion ]
7846 * A parsed entity must not contain a recursive
7847 * reference to itself, either directly or indirectly.
7848 *
7849 * [ WFC: Entity Declared ]
7850 * In a document without any DTD, a document with only an internal DTD
7851 * subset which contains no parameter entity references, or a document
7852 * with "standalone='yes'", ...  ... The declaration of a parameter
7853 * entity must precede any reference to it...
7854 *
7855 * [ VC: Entity Declared ]
7856 * In a document with an external subset or external parameter entities
7857 * with "standalone='no'", ...  ... The declaration of a parameter entity
7858 * must precede any reference to it...
7859 *
7860 * [ WFC: In DTD ]
7861 * Parameter-entity references may only appear in the DTD.
7862 * NOTE: misleading but this is handled.
7863 */
7864void
7865xmlParsePEReference(xmlParserCtxtPtr ctxt)
7866{
7867    const xmlChar *name;
7868    xmlEntityPtr entity = NULL;
7869    xmlParserInputPtr input;
7870
7871    if (RAW != '%')
7872        return;
7873    NEXT;
7874    name = xmlParseName(ctxt);
7875    if (name == NULL) {
7876	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7877		       "xmlParsePEReference: no name\n");
7878	return;
7879    }
7880    if (RAW != ';') {
7881	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7882        return;
7883    }
7884
7885    NEXT;
7886
7887    /*
7888     * Increate the number of entity references parsed
7889     */
7890    ctxt->nbentities++;
7891
7892    /*
7893     * Request the entity from SAX
7894     */
7895    if ((ctxt->sax != NULL) &&
7896	(ctxt->sax->getParameterEntity != NULL))
7897	entity = ctxt->sax->getParameterEntity(ctxt->userData,
7898					       name);
7899    if (entity == NULL) {
7900	/*
7901	 * [ WFC: Entity Declared ]
7902	 * In a document without any DTD, a document with only an
7903	 * internal DTD subset which contains no parameter entity
7904	 * references, or a document with "standalone='yes'", ...
7905	 * ... The declaration of a parameter entity must precede
7906	 * any reference to it...
7907	 */
7908	if ((ctxt->standalone == 1) ||
7909	    ((ctxt->hasExternalSubset == 0) &&
7910	     (ctxt->hasPErefs == 0))) {
7911	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7912			      "PEReference: %%%s; not found\n",
7913			      name);
7914	} else {
7915	    /*
7916	     * [ VC: Entity Declared ]
7917	     * In a document with an external subset or external
7918	     * parameter entities with "standalone='no'", ...
7919	     * ... The declaration of a parameter entity must
7920	     * precede any reference to it...
7921	     */
7922	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7923			  "PEReference: %%%s; not found\n",
7924			  name, NULL);
7925	    ctxt->valid = 0;
7926	}
7927    } else {
7928	/*
7929	 * Internal checking in case the entity quest barfed
7930	 */
7931	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7932	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7933	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7934		  "Internal: %%%s; is not a parameter entity\n",
7935			  name, NULL);
7936	} else if (ctxt->input->free != deallocblankswrapper) {
7937	    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7938	    if (xmlPushInput(ctxt, input) < 0)
7939		return;
7940	} else {
7941	    /*
7942	     * TODO !!!
7943	     * handle the extra spaces added before and after
7944	     * c.f. http://www.w3.org/TR/REC-xml#as-PE
7945	     */
7946	    input = xmlNewEntityInputStream(ctxt, entity);
7947	    if (xmlPushInput(ctxt, input) < 0)
7948		return;
7949	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7950		(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7951		(IS_BLANK_CH(NXT(5)))) {
7952		xmlParseTextDecl(ctxt);
7953		if (ctxt->errNo ==
7954		    XML_ERR_UNSUPPORTED_ENCODING) {
7955		    /*
7956		     * The XML REC instructs us to stop parsing
7957		     * right here
7958		     */
7959		    ctxt->instate = XML_PARSER_EOF;
7960		    return;
7961		}
7962	    }
7963	}
7964    }
7965    ctxt->hasPErefs = 1;
7966}
7967
7968/**
7969 * xmlLoadEntityContent:
7970 * @ctxt:  an XML parser context
7971 * @entity: an unloaded system entity
7972 *
7973 * Load the original content of the given system entity from the
7974 * ExternalID/SystemID given. This is to be used for Included in Literal
7975 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7976 *
7977 * Returns 0 in case of success and -1 in case of failure
7978 */
7979static int
7980xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7981    xmlParserInputPtr input;
7982    xmlBufferPtr buf;
7983    int l, c;
7984    int count = 0;
7985
7986    if ((ctxt == NULL) || (entity == NULL) ||
7987        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7988	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7989	(entity->content != NULL)) {
7990	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7991	            "xmlLoadEntityContent parameter error");
7992        return(-1);
7993    }
7994
7995    if (xmlParserDebugEntities)
7996	xmlGenericError(xmlGenericErrorContext,
7997		"Reading %s entity content input\n", entity->name);
7998
7999    buf = xmlBufferCreate();
8000    if (buf == NULL) {
8001	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8002	            "xmlLoadEntityContent parameter error");
8003        return(-1);
8004    }
8005
8006    input = xmlNewEntityInputStream(ctxt, entity);
8007    if (input == NULL) {
8008	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8009	            "xmlLoadEntityContent input error");
8010	xmlBufferFree(buf);
8011        return(-1);
8012    }
8013
8014    /*
8015     * Push the entity as the current input, read char by char
8016     * saving to the buffer until the end of the entity or an error
8017     */
8018    if (xmlPushInput(ctxt, input) < 0) {
8019        xmlBufferFree(buf);
8020	return(-1);
8021    }
8022
8023    GROW;
8024    c = CUR_CHAR(l);
8025    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8026           (IS_CHAR(c))) {
8027        xmlBufferAdd(buf, ctxt->input->cur, l);
8028	if (count++ > XML_PARSER_CHUNK_SIZE) {
8029	    count = 0;
8030	    GROW;
8031            if (ctxt->instate == XML_PARSER_EOF) {
8032                xmlBufferFree(buf);
8033                return(-1);
8034            }
8035	}
8036	NEXTL(l);
8037	c = CUR_CHAR(l);
8038	if (c == 0) {
8039	    count = 0;
8040	    GROW;
8041            if (ctxt->instate == XML_PARSER_EOF) {
8042                xmlBufferFree(buf);
8043                return(-1);
8044            }
8045	    c = CUR_CHAR(l);
8046	}
8047    }
8048
8049    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8050        xmlPopInput(ctxt);
8051    } else if (!IS_CHAR(c)) {
8052        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8053                          "xmlLoadEntityContent: invalid char value %d\n",
8054	                  c);
8055	xmlBufferFree(buf);
8056	return(-1);
8057    }
8058    entity->content = buf->content;
8059    buf->content = NULL;
8060    xmlBufferFree(buf);
8061
8062    return(0);
8063}
8064
8065/**
8066 * xmlParseStringPEReference:
8067 * @ctxt:  an XML parser context
8068 * @str:  a pointer to an index in the string
8069 *
8070 * parse PEReference declarations
8071 *
8072 * [69] PEReference ::= '%' Name ';'
8073 *
8074 * [ WFC: No Recursion ]
8075 * A parsed entity must not contain a recursive
8076 * reference to itself, either directly or indirectly.
8077 *
8078 * [ WFC: Entity Declared ]
8079 * In a document without any DTD, a document with only an internal DTD
8080 * subset which contains no parameter entity references, or a document
8081 * with "standalone='yes'", ...  ... The declaration of a parameter
8082 * entity must precede any reference to it...
8083 *
8084 * [ VC: Entity Declared ]
8085 * In a document with an external subset or external parameter entities
8086 * with "standalone='no'", ...  ... The declaration of a parameter entity
8087 * must precede any reference to it...
8088 *
8089 * [ WFC: In DTD ]
8090 * Parameter-entity references may only appear in the DTD.
8091 * NOTE: misleading but this is handled.
8092 *
8093 * Returns the string of the entity content.
8094 *         str is updated to the current value of the index
8095 */
8096static xmlEntityPtr
8097xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8098    const xmlChar *ptr;
8099    xmlChar cur;
8100    xmlChar *name;
8101    xmlEntityPtr entity = NULL;
8102
8103    if ((str == NULL) || (*str == NULL)) return(NULL);
8104    ptr = *str;
8105    cur = *ptr;
8106    if (cur != '%')
8107        return(NULL);
8108    ptr++;
8109    name = xmlParseStringName(ctxt, &ptr);
8110    if (name == NULL) {
8111	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8112		       "xmlParseStringPEReference: no name\n");
8113	*str = ptr;
8114	return(NULL);
8115    }
8116    cur = *ptr;
8117    if (cur != ';') {
8118	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8119	xmlFree(name);
8120	*str = ptr;
8121	return(NULL);
8122    }
8123    ptr++;
8124
8125    /*
8126     * Increate the number of entity references parsed
8127     */
8128    ctxt->nbentities++;
8129
8130    /*
8131     * Request the entity from SAX
8132     */
8133    if ((ctxt->sax != NULL) &&
8134	(ctxt->sax->getParameterEntity != NULL))
8135	entity = ctxt->sax->getParameterEntity(ctxt->userData,
8136					       name);
8137    if (entity == NULL) {
8138	/*
8139	 * [ WFC: Entity Declared ]
8140	 * In a document without any DTD, a document with only an
8141	 * internal DTD subset which contains no parameter entity
8142	 * references, or a document with "standalone='yes'", ...
8143	 * ... The declaration of a parameter entity must precede
8144	 * any reference to it...
8145	 */
8146	if ((ctxt->standalone == 1) ||
8147	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8148	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8149		 "PEReference: %%%s; not found\n", name);
8150	} else {
8151	    /*
8152	     * [ VC: Entity Declared ]
8153	     * In a document with an external subset or external
8154	     * parameter entities with "standalone='no'", ...
8155	     * ... The declaration of a parameter entity must
8156	     * precede any reference to it...
8157	     */
8158	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8159			  "PEReference: %%%s; not found\n",
8160			  name, NULL);
8161	    ctxt->valid = 0;
8162	}
8163    } else {
8164	/*
8165	 * Internal checking in case the entity quest barfed
8166	 */
8167	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8168	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8169	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8170			  "%%%s; is not a parameter entity\n",
8171			  name, NULL);
8172	}
8173    }
8174    ctxt->hasPErefs = 1;
8175    xmlFree(name);
8176    *str = ptr;
8177    return(entity);
8178}
8179
8180/**
8181 * xmlParseDocTypeDecl:
8182 * @ctxt:  an XML parser context
8183 *
8184 * parse a DOCTYPE declaration
8185 *
8186 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8187 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8188 *
8189 * [ VC: Root Element Type ]
8190 * The Name in the document type declaration must match the element
8191 * type of the root element.
8192 */
8193
8194void
8195xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8196    const xmlChar *name = NULL;
8197    xmlChar *ExternalID = NULL;
8198    xmlChar *URI = NULL;
8199
8200    /*
8201     * We know that '<!DOCTYPE' has been detected.
8202     */
8203    SKIP(9);
8204
8205    SKIP_BLANKS;
8206
8207    /*
8208     * Parse the DOCTYPE name.
8209     */
8210    name = xmlParseName(ctxt);
8211    if (name == NULL) {
8212	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8213		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8214    }
8215    ctxt->intSubName = name;
8216
8217    SKIP_BLANKS;
8218
8219    /*
8220     * Check for SystemID and ExternalID
8221     */
8222    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8223
8224    if ((URI != NULL) || (ExternalID != NULL)) {
8225        ctxt->hasExternalSubset = 1;
8226    }
8227    ctxt->extSubURI = URI;
8228    ctxt->extSubSystem = ExternalID;
8229
8230    SKIP_BLANKS;
8231
8232    /*
8233     * Create and update the internal subset.
8234     */
8235    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8236	(!ctxt->disableSAX))
8237	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8238
8239    /*
8240     * Is there any internal subset declarations ?
8241     * they are handled separately in xmlParseInternalSubset()
8242     */
8243    if (RAW == '[')
8244	return;
8245
8246    /*
8247     * We should be at the end of the DOCTYPE declaration.
8248     */
8249    if (RAW != '>') {
8250	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8251    }
8252    NEXT;
8253}
8254
8255/**
8256 * xmlParseInternalSubset:
8257 * @ctxt:  an XML parser context
8258 *
8259 * parse the internal subset declaration
8260 *
8261 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8262 */
8263
8264static void
8265xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8266    /*
8267     * Is there any DTD definition ?
8268     */
8269    if (RAW == '[') {
8270        ctxt->instate = XML_PARSER_DTD;
8271        NEXT;
8272	/*
8273	 * Parse the succession of Markup declarations and
8274	 * PEReferences.
8275	 * Subsequence (markupdecl | PEReference | S)*
8276	 */
8277	while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8278	    const xmlChar *check = CUR_PTR;
8279	    unsigned int cons = ctxt->input->consumed;
8280
8281	    SKIP_BLANKS;
8282	    xmlParseMarkupDecl(ctxt);
8283	    xmlParsePEReference(ctxt);
8284
8285	    /*
8286	     * Pop-up of finished entities.
8287	     */
8288	    while ((RAW == 0) && (ctxt->inputNr > 1))
8289		xmlPopInput(ctxt);
8290
8291	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8292		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8293	     "xmlParseInternalSubset: error detected in Markup declaration\n");
8294		break;
8295	    }
8296	}
8297	if (RAW == ']') {
8298	    NEXT;
8299	    SKIP_BLANKS;
8300	}
8301    }
8302
8303    /*
8304     * We should be at the end of the DOCTYPE declaration.
8305     */
8306    if (RAW != '>') {
8307	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8308    }
8309    NEXT;
8310}
8311
8312#ifdef LIBXML_SAX1_ENABLED
8313/**
8314 * xmlParseAttribute:
8315 * @ctxt:  an XML parser context
8316 * @value:  a xmlChar ** used to store the value of the attribute
8317 *
8318 * parse an attribute
8319 *
8320 * [41] Attribute ::= Name Eq AttValue
8321 *
8322 * [ WFC: No External Entity References ]
8323 * Attribute values cannot contain direct or indirect entity references
8324 * to external entities.
8325 *
8326 * [ WFC: No < in Attribute Values ]
8327 * The replacement text of any entity referred to directly or indirectly in
8328 * an attribute value (other than "&lt;") must not contain a <.
8329 *
8330 * [ VC: Attribute Value Type ]
8331 * The attribute must have been declared; the value must be of the type
8332 * declared for it.
8333 *
8334 * [25] Eq ::= S? '=' S?
8335 *
8336 * With namespace:
8337 *
8338 * [NS 11] Attribute ::= QName Eq AttValue
8339 *
8340 * Also the case QName == xmlns:??? is handled independently as a namespace
8341 * definition.
8342 *
8343 * Returns the attribute name, and the value in *value.
8344 */
8345
8346const xmlChar *
8347xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8348    const xmlChar *name;
8349    xmlChar *val;
8350
8351    *value = NULL;
8352    GROW;
8353    name = xmlParseName(ctxt);
8354    if (name == NULL) {
8355	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8356	               "error parsing attribute name\n");
8357        return(NULL);
8358    }
8359
8360    /*
8361     * read the value
8362     */
8363    SKIP_BLANKS;
8364    if (RAW == '=') {
8365        NEXT;
8366	SKIP_BLANKS;
8367	val = xmlParseAttValue(ctxt);
8368	ctxt->instate = XML_PARSER_CONTENT;
8369    } else {
8370	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8371	       "Specification mandate value for attribute %s\n", name);
8372	return(NULL);
8373    }
8374
8375    /*
8376     * Check that xml:lang conforms to the specification
8377     * No more registered as an error, just generate a warning now
8378     * since this was deprecated in XML second edition
8379     */
8380    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8381	if (!xmlCheckLanguageID(val)) {
8382	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8383		          "Malformed value for xml:lang : %s\n",
8384			  val, NULL);
8385	}
8386    }
8387
8388    /*
8389     * Check that xml:space conforms to the specification
8390     */
8391    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8392	if (xmlStrEqual(val, BAD_CAST "default"))
8393	    *(ctxt->space) = 0;
8394	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8395	    *(ctxt->space) = 1;
8396	else {
8397		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8398"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8399                                 val, NULL);
8400	}
8401    }
8402
8403    *value = val;
8404    return(name);
8405}
8406
8407/**
8408 * xmlParseStartTag:
8409 * @ctxt:  an XML parser context
8410 *
8411 * parse a start of tag either for rule element or
8412 * EmptyElement. In both case we don't parse the tag closing chars.
8413 *
8414 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8415 *
8416 * [ WFC: Unique Att Spec ]
8417 * No attribute name may appear more than once in the same start-tag or
8418 * empty-element tag.
8419 *
8420 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8421 *
8422 * [ WFC: Unique Att Spec ]
8423 * No attribute name may appear more than once in the same start-tag or
8424 * empty-element tag.
8425 *
8426 * With namespace:
8427 *
8428 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8429 *
8430 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8431 *
8432 * Returns the element name parsed
8433 */
8434
8435const xmlChar *
8436xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8437    const xmlChar *name;
8438    const xmlChar *attname;
8439    xmlChar *attvalue;
8440    const xmlChar **atts = ctxt->atts;
8441    int nbatts = 0;
8442    int maxatts = ctxt->maxatts;
8443    int i;
8444
8445    if (RAW != '<') return(NULL);
8446    NEXT1;
8447
8448    name = xmlParseName(ctxt);
8449    if (name == NULL) {
8450	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8451	     "xmlParseStartTag: invalid element name\n");
8452        return(NULL);
8453    }
8454
8455    /*
8456     * Now parse the attributes, it ends up with the ending
8457     *
8458     * (S Attribute)* S?
8459     */
8460    SKIP_BLANKS;
8461    GROW;
8462
8463    while (((RAW != '>') &&
8464	   ((RAW != '/') || (NXT(1) != '>')) &&
8465	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8466	const xmlChar *q = CUR_PTR;
8467	unsigned int cons = ctxt->input->consumed;
8468
8469	attname = xmlParseAttribute(ctxt, &attvalue);
8470        if ((attname != NULL) && (attvalue != NULL)) {
8471	    /*
8472	     * [ WFC: Unique Att Spec ]
8473	     * No attribute name may appear more than once in the same
8474	     * start-tag or empty-element tag.
8475	     */
8476	    for (i = 0; i < nbatts;i += 2) {
8477	        if (xmlStrEqual(atts[i], attname)) {
8478		    xmlErrAttributeDup(ctxt, NULL, attname);
8479		    xmlFree(attvalue);
8480		    goto failed;
8481		}
8482	    }
8483	    /*
8484	     * Add the pair to atts
8485	     */
8486	    if (atts == NULL) {
8487	        maxatts = 22; /* allow for 10 attrs by default */
8488	        atts = (const xmlChar **)
8489		       xmlMalloc(maxatts * sizeof(xmlChar *));
8490		if (atts == NULL) {
8491		    xmlErrMemory(ctxt, NULL);
8492		    if (attvalue != NULL)
8493			xmlFree(attvalue);
8494		    goto failed;
8495		}
8496		ctxt->atts = atts;
8497		ctxt->maxatts = maxatts;
8498	    } else if (nbatts + 4 > maxatts) {
8499	        const xmlChar **n;
8500
8501	        maxatts *= 2;
8502	        n = (const xmlChar **) xmlRealloc((void *) atts,
8503					     maxatts * sizeof(const xmlChar *));
8504		if (n == NULL) {
8505		    xmlErrMemory(ctxt, NULL);
8506		    if (attvalue != NULL)
8507			xmlFree(attvalue);
8508		    goto failed;
8509		}
8510		atts = n;
8511		ctxt->atts = atts;
8512		ctxt->maxatts = maxatts;
8513	    }
8514	    atts[nbatts++] = attname;
8515	    atts[nbatts++] = attvalue;
8516	    atts[nbatts] = NULL;
8517	    atts[nbatts + 1] = NULL;
8518	} else {
8519	    if (attvalue != NULL)
8520		xmlFree(attvalue);
8521	}
8522
8523failed:
8524
8525	GROW
8526	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8527	    break;
8528	if (!IS_BLANK_CH(RAW)) {
8529	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8530			   "attributes construct error\n");
8531	}
8532	SKIP_BLANKS;
8533        if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8534            (attname == NULL) && (attvalue == NULL)) {
8535	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8536			   "xmlParseStartTag: problem parsing attributes\n");
8537	    break;
8538	}
8539	SHRINK;
8540        GROW;
8541    }
8542
8543    /*
8544     * SAX: Start of Element !
8545     */
8546    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8547	(!ctxt->disableSAX)) {
8548	if (nbatts > 0)
8549	    ctxt->sax->startElement(ctxt->userData, name, atts);
8550	else
8551	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8552    }
8553
8554    if (atts != NULL) {
8555        /* Free only the content strings */
8556        for (i = 1;i < nbatts;i+=2)
8557	    if (atts[i] != NULL)
8558	       xmlFree((xmlChar *) atts[i]);
8559    }
8560    return(name);
8561}
8562
8563/**
8564 * xmlParseEndTag1:
8565 * @ctxt:  an XML parser context
8566 * @line:  line of the start tag
8567 * @nsNr:  number of namespaces on the start tag
8568 *
8569 * parse an end of tag
8570 *
8571 * [42] ETag ::= '</' Name S? '>'
8572 *
8573 * With namespace
8574 *
8575 * [NS 9] ETag ::= '</' QName S? '>'
8576 */
8577
8578static void
8579xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8580    const xmlChar *name;
8581
8582    GROW;
8583    if ((RAW != '<') || (NXT(1) != '/')) {
8584	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8585		       "xmlParseEndTag: '</' not found\n");
8586	return;
8587    }
8588    SKIP(2);
8589
8590    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8591
8592    /*
8593     * We should definitely be at the ending "S? '>'" part
8594     */
8595    GROW;
8596    SKIP_BLANKS;
8597    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8598	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8599    } else
8600	NEXT1;
8601
8602    /*
8603     * [ WFC: Element Type Match ]
8604     * The Name in an element's end-tag must match the element type in the
8605     * start-tag.
8606     *
8607     */
8608    if (name != (xmlChar*)1) {
8609        if (name == NULL) name = BAD_CAST "unparseable";
8610        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8611		     "Opening and ending tag mismatch: %s line %d and %s\n",
8612		                ctxt->name, line, name);
8613    }
8614
8615    /*
8616     * SAX: End of Tag
8617     */
8618    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8619	(!ctxt->disableSAX))
8620        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8621
8622    namePop(ctxt);
8623    spacePop(ctxt);
8624    return;
8625}
8626
8627/**
8628 * xmlParseEndTag:
8629 * @ctxt:  an XML parser context
8630 *
8631 * parse an end of tag
8632 *
8633 * [42] ETag ::= '</' Name S? '>'
8634 *
8635 * With namespace
8636 *
8637 * [NS 9] ETag ::= '</' QName S? '>'
8638 */
8639
8640void
8641xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8642    xmlParseEndTag1(ctxt, 0);
8643}
8644#endif /* LIBXML_SAX1_ENABLED */
8645
8646/************************************************************************
8647 *									*
8648 *		      SAX 2 specific operations				*
8649 *									*
8650 ************************************************************************/
8651
8652/*
8653 * xmlGetNamespace:
8654 * @ctxt:  an XML parser context
8655 * @prefix:  the prefix to lookup
8656 *
8657 * Lookup the namespace name for the @prefix (which ca be NULL)
8658 * The prefix must come from the @ctxt->dict dictionnary
8659 *
8660 * Returns the namespace name or NULL if not bound
8661 */
8662static const xmlChar *
8663xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8664    int i;
8665
8666    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8667    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8668        if (ctxt->nsTab[i] == prefix) {
8669	    if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8670	        return(NULL);
8671	    return(ctxt->nsTab[i + 1]);
8672	}
8673    return(NULL);
8674}
8675
8676/**
8677 * xmlParseQName:
8678 * @ctxt:  an XML parser context
8679 * @prefix:  pointer to store the prefix part
8680 *
8681 * parse an XML Namespace QName
8682 *
8683 * [6]  QName  ::= (Prefix ':')? LocalPart
8684 * [7]  Prefix  ::= NCName
8685 * [8]  LocalPart  ::= NCName
8686 *
8687 * Returns the Name parsed or NULL
8688 */
8689
8690static const xmlChar *
8691xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8692    const xmlChar *l, *p;
8693
8694    GROW;
8695
8696    l = xmlParseNCName(ctxt);
8697    if (l == NULL) {
8698        if (CUR == ':') {
8699	    l = xmlParseName(ctxt);
8700	    if (l != NULL) {
8701	        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8702		         "Failed to parse QName '%s'\n", l, NULL, NULL);
8703		*prefix = NULL;
8704		return(l);
8705	    }
8706	}
8707        return(NULL);
8708    }
8709    if (CUR == ':') {
8710        NEXT;
8711	p = l;
8712	l = xmlParseNCName(ctxt);
8713	if (l == NULL) {
8714	    xmlChar *tmp;
8715
8716            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8717	             "Failed to parse QName '%s:'\n", p, NULL, NULL);
8718	    l = xmlParseNmtoken(ctxt);
8719	    if (l == NULL)
8720		tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8721	    else {
8722		tmp = xmlBuildQName(l, p, NULL, 0);
8723		xmlFree((char *)l);
8724	    }
8725	    p = xmlDictLookup(ctxt->dict, tmp, -1);
8726	    if (tmp != NULL) xmlFree(tmp);
8727	    *prefix = NULL;
8728	    return(p);
8729	}
8730	if (CUR == ':') {
8731	    xmlChar *tmp;
8732
8733            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8734	             "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8735	    NEXT;
8736	    tmp = (xmlChar *) xmlParseName(ctxt);
8737	    if (tmp != NULL) {
8738	        tmp = xmlBuildQName(tmp, l, NULL, 0);
8739		l = xmlDictLookup(ctxt->dict, tmp, -1);
8740		if (tmp != NULL) xmlFree(tmp);
8741		*prefix = p;
8742		return(l);
8743	    }
8744	    tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8745	    l = xmlDictLookup(ctxt->dict, tmp, -1);
8746	    if (tmp != NULL) xmlFree(tmp);
8747	    *prefix = p;
8748	    return(l);
8749	}
8750	*prefix = p;
8751    } else
8752        *prefix = NULL;
8753    return(l);
8754}
8755
8756/**
8757 * xmlParseQNameAndCompare:
8758 * @ctxt:  an XML parser context
8759 * @name:  the localname
8760 * @prefix:  the prefix, if any.
8761 *
8762 * parse an XML name and compares for match
8763 * (specialized for endtag parsing)
8764 *
8765 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8766 * and the name for mismatch
8767 */
8768
8769static const xmlChar *
8770xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8771                        xmlChar const *prefix) {
8772    const xmlChar *cmp;
8773    const xmlChar *in;
8774    const xmlChar *ret;
8775    const xmlChar *prefix2;
8776
8777    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8778
8779    GROW;
8780    in = ctxt->input->cur;
8781
8782    cmp = prefix;
8783    while (*in != 0 && *in == *cmp) {
8784	++in;
8785	++cmp;
8786    }
8787    if ((*cmp == 0) && (*in == ':')) {
8788        in++;
8789	cmp = name;
8790	while (*in != 0 && *in == *cmp) {
8791	    ++in;
8792	    ++cmp;
8793	}
8794	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8795	    /* success */
8796	    ctxt->input->cur = in;
8797	    return((const xmlChar*) 1);
8798	}
8799    }
8800    /*
8801     * all strings coms from the dictionary, equality can be done directly
8802     */
8803    ret = xmlParseQName (ctxt, &prefix2);
8804    if ((ret == name) && (prefix == prefix2))
8805	return((const xmlChar*) 1);
8806    return ret;
8807}
8808
8809/**
8810 * xmlParseAttValueInternal:
8811 * @ctxt:  an XML parser context
8812 * @len:  attribute len result
8813 * @alloc:  whether the attribute was reallocated as a new string
8814 * @normalize:  if 1 then further non-CDATA normalization must be done
8815 *
8816 * parse a value for an attribute.
8817 * NOTE: if no normalization is needed, the routine will return pointers
8818 *       directly from the data buffer.
8819 *
8820 * 3.3.3 Attribute-Value Normalization:
8821 * Before the value of an attribute is passed to the application or
8822 * checked for validity, the XML processor must normalize it as follows:
8823 * - a character reference is processed by appending the referenced
8824 *   character to the attribute value
8825 * - an entity reference is processed by recursively processing the
8826 *   replacement text of the entity
8827 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8828 *   appending #x20 to the normalized value, except that only a single
8829 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8830 *   parsed entity or the literal entity value of an internal parsed entity
8831 * - other characters are processed by appending them to the normalized value
8832 * If the declared value is not CDATA, then the XML processor must further
8833 * process the normalized attribute value by discarding any leading and
8834 * trailing space (#x20) characters, and by replacing sequences of space
8835 * (#x20) characters by a single space (#x20) character.
8836 * All attributes for which no declaration has been read should be treated
8837 * by a non-validating parser as if declared CDATA.
8838 *
8839 * Returns the AttValue parsed or NULL. The value has to be freed by the
8840 *     caller if it was copied, this can be detected by val[*len] == 0.
8841 */
8842
8843static xmlChar *
8844xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8845                         int normalize)
8846{
8847    xmlChar limit = 0;
8848    const xmlChar *in = NULL, *start, *end, *last;
8849    xmlChar *ret = NULL;
8850
8851    GROW;
8852    in = (xmlChar *) CUR_PTR;
8853    if (*in != '"' && *in != '\'') {
8854        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8855        return (NULL);
8856    }
8857    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8858
8859    /*
8860     * try to handle in this routine the most common case where no
8861     * allocation of a new string is required and where content is
8862     * pure ASCII.
8863     */
8864    limit = *in++;
8865    end = ctxt->input->end;
8866    start = in;
8867    if (in >= end) {
8868        const xmlChar *oldbase = ctxt->input->base;
8869	GROW;
8870	if (oldbase != ctxt->input->base) {
8871	    long delta = ctxt->input->base - oldbase;
8872	    start = start + delta;
8873	    in = in + delta;
8874	}
8875	end = ctxt->input->end;
8876    }
8877    if (normalize) {
8878        /*
8879	 * Skip any leading spaces
8880	 */
8881	while ((in < end) && (*in != limit) &&
8882	       ((*in == 0x20) || (*in == 0x9) ||
8883	        (*in == 0xA) || (*in == 0xD))) {
8884	    in++;
8885	    start = in;
8886	    if (in >= end) {
8887		const xmlChar *oldbase = ctxt->input->base;
8888		GROW;
8889                if (ctxt->instate == XML_PARSER_EOF)
8890                    return(NULL);
8891		if (oldbase != ctxt->input->base) {
8892		    long delta = ctxt->input->base - oldbase;
8893		    start = start + delta;
8894		    in = in + delta;
8895		}
8896		end = ctxt->input->end;
8897                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8898                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8899                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8900                                   "AttValue lenght too long\n");
8901                    return(NULL);
8902                }
8903	    }
8904	}
8905	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8906	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8907	    if ((*in++ == 0x20) && (*in == 0x20)) break;
8908	    if (in >= end) {
8909		const xmlChar *oldbase = ctxt->input->base;
8910		GROW;
8911                if (ctxt->instate == XML_PARSER_EOF)
8912                    return(NULL);
8913		if (oldbase != ctxt->input->base) {
8914		    long delta = ctxt->input->base - oldbase;
8915		    start = start + delta;
8916		    in = in + delta;
8917		}
8918		end = ctxt->input->end;
8919                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8920                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8921                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8922                                   "AttValue lenght too long\n");
8923                    return(NULL);
8924                }
8925	    }
8926	}
8927	last = in;
8928	/*
8929	 * skip the trailing blanks
8930	 */
8931	while ((last[-1] == 0x20) && (last > start)) last--;
8932	while ((in < end) && (*in != limit) &&
8933	       ((*in == 0x20) || (*in == 0x9) ||
8934	        (*in == 0xA) || (*in == 0xD))) {
8935	    in++;
8936	    if (in >= end) {
8937		const xmlChar *oldbase = ctxt->input->base;
8938		GROW;
8939                if (ctxt->instate == XML_PARSER_EOF)
8940                    return(NULL);
8941		if (oldbase != ctxt->input->base) {
8942		    long delta = ctxt->input->base - oldbase;
8943		    start = start + delta;
8944		    in = in + delta;
8945		    last = last + delta;
8946		}
8947		end = ctxt->input->end;
8948                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8949                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8950                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8951                                   "AttValue lenght too long\n");
8952                    return(NULL);
8953                }
8954	    }
8955	}
8956        if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8957            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8958            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8959                           "AttValue lenght too long\n");
8960            return(NULL);
8961        }
8962	if (*in != limit) goto need_complex;
8963    } else {
8964	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8965	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8966	    in++;
8967	    if (in >= end) {
8968		const xmlChar *oldbase = ctxt->input->base;
8969		GROW;
8970                if (ctxt->instate == XML_PARSER_EOF)
8971                    return(NULL);
8972		if (oldbase != ctxt->input->base) {
8973		    long delta = ctxt->input->base - oldbase;
8974		    start = start + delta;
8975		    in = in + delta;
8976		}
8977		end = ctxt->input->end;
8978                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8979                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8980                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8981                                   "AttValue lenght too long\n");
8982                    return(NULL);
8983                }
8984	    }
8985	}
8986	last = in;
8987        if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8988            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8989            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8990                           "AttValue lenght too long\n");
8991            return(NULL);
8992        }
8993	if (*in != limit) goto need_complex;
8994    }
8995    in++;
8996    if (len != NULL) {
8997        *len = last - start;
8998        ret = (xmlChar *) start;
8999    } else {
9000        if (alloc) *alloc = 1;
9001        ret = xmlStrndup(start, last - start);
9002    }
9003    CUR_PTR = in;
9004    if (alloc) *alloc = 0;
9005    return ret;
9006need_complex:
9007    if (alloc) *alloc = 1;
9008    return xmlParseAttValueComplex(ctxt, len, normalize);
9009}
9010
9011/**
9012 * xmlParseAttribute2:
9013 * @ctxt:  an XML parser context
9014 * @pref:  the element prefix
9015 * @elem:  the element name
9016 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9017 * @value:  a xmlChar ** used to store the value of the attribute
9018 * @len:  an int * to save the length of the attribute
9019 * @alloc:  an int * to indicate if the attribute was allocated
9020 *
9021 * parse an attribute in the new SAX2 framework.
9022 *
9023 * Returns the attribute name, and the value in *value, .
9024 */
9025
9026static const xmlChar *
9027xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9028                   const xmlChar * pref, const xmlChar * elem,
9029                   const xmlChar ** prefix, xmlChar ** value,
9030                   int *len, int *alloc)
9031{
9032    const xmlChar *name;
9033    xmlChar *val, *internal_val = NULL;
9034    int normalize = 0;
9035
9036    *value = NULL;
9037    GROW;
9038    name = xmlParseQName(ctxt, prefix);
9039    if (name == NULL) {
9040        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9041                       "error parsing attribute name\n");
9042        return (NULL);
9043    }
9044
9045    /*
9046     * get the type if needed
9047     */
9048    if (ctxt->attsSpecial != NULL) {
9049        int type;
9050
9051        type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9052                                            pref, elem, *prefix, name);
9053        if (type != 0)
9054            normalize = 1;
9055    }
9056
9057    /*
9058     * read the value
9059     */
9060    SKIP_BLANKS;
9061    if (RAW == '=') {
9062        NEXT;
9063        SKIP_BLANKS;
9064        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9065	if (normalize) {
9066	    /*
9067	     * Sometimes a second normalisation pass for spaces is needed
9068	     * but that only happens if charrefs or entities refernces
9069	     * have been used in the attribute value, i.e. the attribute
9070	     * value have been extracted in an allocated string already.
9071	     */
9072	    if (*alloc) {
9073	        const xmlChar *val2;
9074
9075	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9076		if ((val2 != NULL) && (val2 != val)) {
9077		    xmlFree(val);
9078		    val = (xmlChar *) val2;
9079		}
9080	    }
9081	}
9082        ctxt->instate = XML_PARSER_CONTENT;
9083    } else {
9084        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9085                          "Specification mandate value for attribute %s\n",
9086                          name);
9087        return (NULL);
9088    }
9089
9090    if (*prefix == ctxt->str_xml) {
9091        /*
9092         * Check that xml:lang conforms to the specification
9093         * No more registered as an error, just generate a warning now
9094         * since this was deprecated in XML second edition
9095         */
9096        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9097            internal_val = xmlStrndup(val, *len);
9098            if (!xmlCheckLanguageID(internal_val)) {
9099                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9100                              "Malformed value for xml:lang : %s\n",
9101                              internal_val, NULL);
9102            }
9103        }
9104
9105        /*
9106         * Check that xml:space conforms to the specification
9107         */
9108        if (xmlStrEqual(name, BAD_CAST "space")) {
9109            internal_val = xmlStrndup(val, *len);
9110            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9111                *(ctxt->space) = 0;
9112            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9113                *(ctxt->space) = 1;
9114            else {
9115                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9116                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9117                              internal_val, NULL);
9118            }
9119        }
9120        if (internal_val) {
9121            xmlFree(internal_val);
9122        }
9123    }
9124
9125    *value = val;
9126    return (name);
9127}
9128/**
9129 * xmlParseStartTag2:
9130 * @ctxt:  an XML parser context
9131 *
9132 * parse a start of tag either for rule element or
9133 * EmptyElement. In both case we don't parse the tag closing chars.
9134 * This routine is called when running SAX2 parsing
9135 *
9136 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9137 *
9138 * [ WFC: Unique Att Spec ]
9139 * No attribute name may appear more than once in the same start-tag or
9140 * empty-element tag.
9141 *
9142 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9143 *
9144 * [ WFC: Unique Att Spec ]
9145 * No attribute name may appear more than once in the same start-tag or
9146 * empty-element tag.
9147 *
9148 * With namespace:
9149 *
9150 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9151 *
9152 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9153 *
9154 * Returns the element name parsed
9155 */
9156
9157static const xmlChar *
9158xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9159                  const xmlChar **URI, int *tlen) {
9160    const xmlChar *localname;
9161    const xmlChar *prefix;
9162    const xmlChar *attname;
9163    const xmlChar *aprefix;
9164    const xmlChar *nsname;
9165    xmlChar *attvalue;
9166    const xmlChar **atts = ctxt->atts;
9167    int maxatts = ctxt->maxatts;
9168    int nratts, nbatts, nbdef;
9169    int i, j, nbNs, attval, oldline, oldcol;
9170    const xmlChar *base;
9171    unsigned long cur;
9172    int nsNr = ctxt->nsNr;
9173
9174    if (RAW != '<') return(NULL);
9175    NEXT1;
9176
9177    /*
9178     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9179     *       point since the attribute values may be stored as pointers to
9180     *       the buffer and calling SHRINK would destroy them !
9181     *       The Shrinking is only possible once the full set of attribute
9182     *       callbacks have been done.
9183     */
9184reparse:
9185    SHRINK;
9186    base = ctxt->input->base;
9187    cur = ctxt->input->cur - ctxt->input->base;
9188    oldline = ctxt->input->line;
9189    oldcol = ctxt->input->col;
9190    nbatts = 0;
9191    nratts = 0;
9192    nbdef = 0;
9193    nbNs = 0;
9194    attval = 0;
9195    /* Forget any namespaces added during an earlier parse of this element. */
9196    ctxt->nsNr = nsNr;
9197
9198    localname = xmlParseQName(ctxt, &prefix);
9199    if (localname == NULL) {
9200	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9201		       "StartTag: invalid element name\n");
9202        return(NULL);
9203    }
9204    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9205
9206    /*
9207     * Now parse the attributes, it ends up with the ending
9208     *
9209     * (S Attribute)* S?
9210     */
9211    SKIP_BLANKS;
9212    GROW;
9213    if (ctxt->input->base != base) goto base_changed;
9214
9215    while (((RAW != '>') &&
9216	   ((RAW != '/') || (NXT(1) != '>')) &&
9217	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9218	const xmlChar *q = CUR_PTR;
9219	unsigned int cons = ctxt->input->consumed;
9220	int len = -1, alloc = 0;
9221
9222	attname = xmlParseAttribute2(ctxt, prefix, localname,
9223	                             &aprefix, &attvalue, &len, &alloc);
9224	if (ctxt->input->base != base) {
9225	    if ((attvalue != NULL) && (alloc != 0))
9226	        xmlFree(attvalue);
9227	    attvalue = NULL;
9228	    goto base_changed;
9229	}
9230        if ((attname != NULL) && (attvalue != NULL)) {
9231	    if (len < 0) len = xmlStrlen(attvalue);
9232            if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9233	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9234		xmlURIPtr uri;
9235
9236                if (*URL != 0) {
9237		    uri = xmlParseURI((const char *) URL);
9238		    if (uri == NULL) {
9239			xmlNsErr(ctxt, XML_WAR_NS_URI,
9240			         "xmlns: '%s' is not a valid URI\n",
9241					   URL, NULL, NULL);
9242		    } else {
9243			if (uri->scheme == NULL) {
9244			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9245				      "xmlns: URI %s is not absolute\n",
9246				      URL, NULL, NULL);
9247			}
9248			xmlFreeURI(uri);
9249		    }
9250		    if (URL == ctxt->str_xml_ns) {
9251			if (attname != ctxt->str_xml) {
9252			    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9253			 "xml namespace URI cannot be the default namespace\n",
9254				     NULL, NULL, NULL);
9255			}
9256			goto skip_default_ns;
9257		    }
9258		    if ((len == 29) &&
9259			(xmlStrEqual(URL,
9260				 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9261			xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9262			     "reuse of the xmlns namespace name is forbidden\n",
9263				 NULL, NULL, NULL);
9264			goto skip_default_ns;
9265		    }
9266		}
9267		/*
9268		 * check that it's not a defined namespace
9269		 */
9270		for (j = 1;j <= nbNs;j++)
9271		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9272			break;
9273		if (j <= nbNs)
9274		    xmlErrAttributeDup(ctxt, NULL, attname);
9275		else
9276		    if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9277skip_default_ns:
9278		if (alloc != 0) xmlFree(attvalue);
9279		SKIP_BLANKS;
9280		continue;
9281	    }
9282            if (aprefix == ctxt->str_xmlns) {
9283	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9284		xmlURIPtr uri;
9285
9286                if (attname == ctxt->str_xml) {
9287		    if (URL != ctxt->str_xml_ns) {
9288		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9289			         "xml namespace prefix mapped to wrong URI\n",
9290			         NULL, NULL, NULL);
9291		    }
9292		    /*
9293		     * Do not keep a namespace definition node
9294		     */
9295		    goto skip_ns;
9296		}
9297                if (URL == ctxt->str_xml_ns) {
9298		    if (attname != ctxt->str_xml) {
9299		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9300			         "xml namespace URI mapped to wrong prefix\n",
9301			         NULL, NULL, NULL);
9302		    }
9303		    goto skip_ns;
9304		}
9305                if (attname == ctxt->str_xmlns) {
9306		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9307			     "redefinition of the xmlns prefix is forbidden\n",
9308			     NULL, NULL, NULL);
9309		    goto skip_ns;
9310		}
9311		if ((len == 29) &&
9312		    (xmlStrEqual(URL,
9313		                 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9314		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9315			     "reuse of the xmlns namespace name is forbidden\n",
9316			     NULL, NULL, NULL);
9317		    goto skip_ns;
9318		}
9319		if ((URL == NULL) || (URL[0] == 0)) {
9320		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9321		             "xmlns:%s: Empty XML namespace is not allowed\n",
9322			          attname, NULL, NULL);
9323		    goto skip_ns;
9324		} else {
9325		    uri = xmlParseURI((const char *) URL);
9326		    if (uri == NULL) {
9327			xmlNsErr(ctxt, XML_WAR_NS_URI,
9328			     "xmlns:%s: '%s' is not a valid URI\n",
9329					   attname, URL, NULL);
9330		    } else {
9331			if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9332			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9333				      "xmlns:%s: URI %s is not absolute\n",
9334				      attname, URL, NULL);
9335			}
9336			xmlFreeURI(uri);
9337		    }
9338		}
9339
9340		/*
9341		 * check that it's not a defined namespace
9342		 */
9343		for (j = 1;j <= nbNs;j++)
9344		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9345			break;
9346		if (j <= nbNs)
9347		    xmlErrAttributeDup(ctxt, aprefix, attname);
9348		else
9349		    if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9350skip_ns:
9351		if (alloc != 0) xmlFree(attvalue);
9352		SKIP_BLANKS;
9353		if (ctxt->input->base != base) goto base_changed;
9354		continue;
9355	    }
9356
9357	    /*
9358	     * Add the pair to atts
9359	     */
9360	    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9361	        if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9362		    if (attvalue[len] == 0)
9363			xmlFree(attvalue);
9364		    goto failed;
9365		}
9366	        maxatts = ctxt->maxatts;
9367		atts = ctxt->atts;
9368	    }
9369	    ctxt->attallocs[nratts++] = alloc;
9370	    atts[nbatts++] = attname;
9371	    atts[nbatts++] = aprefix;
9372	    atts[nbatts++] = NULL; /* the URI will be fetched later */
9373	    atts[nbatts++] = attvalue;
9374	    attvalue += len;
9375	    atts[nbatts++] = attvalue;
9376	    /*
9377	     * tag if some deallocation is needed
9378	     */
9379	    if (alloc != 0) attval = 1;
9380	} else {
9381	    if ((attvalue != NULL) && (attvalue[len] == 0))
9382		xmlFree(attvalue);
9383	}
9384
9385failed:
9386
9387	GROW
9388        if (ctxt->instate == XML_PARSER_EOF)
9389            break;
9390	if (ctxt->input->base != base) goto base_changed;
9391	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9392	    break;
9393	if (!IS_BLANK_CH(RAW)) {
9394	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9395			   "attributes construct error\n");
9396	    break;
9397	}
9398	SKIP_BLANKS;
9399        if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9400            (attname == NULL) && (attvalue == NULL)) {
9401	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9402	         "xmlParseStartTag: problem parsing attributes\n");
9403	    break;
9404	}
9405        GROW;
9406	if (ctxt->input->base != base) goto base_changed;
9407    }
9408
9409    /*
9410     * The attributes defaulting
9411     */
9412    if (ctxt->attsDefault != NULL) {
9413        xmlDefAttrsPtr defaults;
9414
9415	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9416	if (defaults != NULL) {
9417	    for (i = 0;i < defaults->nbAttrs;i++) {
9418	        attname = defaults->values[5 * i];
9419		aprefix = defaults->values[5 * i + 1];
9420
9421                /*
9422		 * special work for namespaces defaulted defs
9423		 */
9424		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9425		    /*
9426		     * check that it's not a defined namespace
9427		     */
9428		    for (j = 1;j <= nbNs;j++)
9429		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9430			    break;
9431	            if (j <= nbNs) continue;
9432
9433		    nsname = xmlGetNamespace(ctxt, NULL);
9434		    if (nsname != defaults->values[5 * i + 2]) {
9435			if (nsPush(ctxt, NULL,
9436			           defaults->values[5 * i + 2]) > 0)
9437			    nbNs++;
9438		    }
9439		} else if (aprefix == ctxt->str_xmlns) {
9440		    /*
9441		     * check that it's not a defined namespace
9442		     */
9443		    for (j = 1;j <= nbNs;j++)
9444		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9445			    break;
9446	            if (j <= nbNs) continue;
9447
9448		    nsname = xmlGetNamespace(ctxt, attname);
9449		    if (nsname != defaults->values[2]) {
9450			if (nsPush(ctxt, attname,
9451			           defaults->values[5 * i + 2]) > 0)
9452			    nbNs++;
9453		    }
9454		} else {
9455		    /*
9456		     * check that it's not a defined attribute
9457		     */
9458		    for (j = 0;j < nbatts;j+=5) {
9459			if ((attname == atts[j]) && (aprefix == atts[j+1]))
9460			    break;
9461		    }
9462		    if (j < nbatts) continue;
9463
9464		    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9465			if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9466			    return(NULL);
9467			}
9468			maxatts = ctxt->maxatts;
9469			atts = ctxt->atts;
9470		    }
9471		    atts[nbatts++] = attname;
9472		    atts[nbatts++] = aprefix;
9473		    if (aprefix == NULL)
9474			atts[nbatts++] = NULL;
9475		    else
9476		        atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9477		    atts[nbatts++] = defaults->values[5 * i + 2];
9478		    atts[nbatts++] = defaults->values[5 * i + 3];
9479		    if ((ctxt->standalone == 1) &&
9480		        (defaults->values[5 * i + 4] != NULL)) {
9481			xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9482	  "standalone: attribute %s on %s defaulted from external subset\n",
9483	                                 attname, localname);
9484		    }
9485		    nbdef++;
9486		}
9487	    }
9488	}
9489    }
9490
9491    /*
9492     * The attributes checkings
9493     */
9494    for (i = 0; i < nbatts;i += 5) {
9495        /*
9496	* The default namespace does not apply to attribute names.
9497	*/
9498	if (atts[i + 1] != NULL) {
9499	    nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9500	    if (nsname == NULL) {
9501		xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9502		    "Namespace prefix %s for %s on %s is not defined\n",
9503		    atts[i + 1], atts[i], localname);
9504	    }
9505	    atts[i + 2] = nsname;
9506	} else
9507	    nsname = NULL;
9508	/*
9509	 * [ WFC: Unique Att Spec ]
9510	 * No attribute name may appear more than once in the same
9511	 * start-tag or empty-element tag.
9512	 * As extended by the Namespace in XML REC.
9513	 */
9514        for (j = 0; j < i;j += 5) {
9515	    if (atts[i] == atts[j]) {
9516	        if (atts[i+1] == atts[j+1]) {
9517		    xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9518		    break;
9519		}
9520		if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9521		    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9522			     "Namespaced Attribute %s in '%s' redefined\n",
9523			     atts[i], nsname, NULL);
9524		    break;
9525		}
9526	    }
9527	}
9528    }
9529
9530    nsname = xmlGetNamespace(ctxt, prefix);
9531    if ((prefix != NULL) && (nsname == NULL)) {
9532	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9533	         "Namespace prefix %s on %s is not defined\n",
9534		 prefix, localname, NULL);
9535    }
9536    *pref = prefix;
9537    *URI = nsname;
9538
9539    /*
9540     * SAX: Start of Element !
9541     */
9542    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9543	(!ctxt->disableSAX)) {
9544	if (nbNs > 0)
9545	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9546			  nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9547			  nbatts / 5, nbdef, atts);
9548	else
9549	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9550	                  nsname, 0, NULL, nbatts / 5, nbdef, atts);
9551    }
9552
9553    /*
9554     * Free up attribute allocated strings if needed
9555     */
9556    if (attval != 0) {
9557	for (i = 3,j = 0; j < nratts;i += 5,j++)
9558	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9559	        xmlFree((xmlChar *) atts[i]);
9560    }
9561
9562    return(localname);
9563
9564base_changed:
9565    /*
9566     * the attribute strings are valid iif the base didn't changed
9567     */
9568    if (attval != 0) {
9569	for (i = 3,j = 0; j < nratts;i += 5,j++)
9570	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9571	        xmlFree((xmlChar *) atts[i]);
9572    }
9573    ctxt->input->cur = ctxt->input->base + cur;
9574    ctxt->input->line = oldline;
9575    ctxt->input->col = oldcol;
9576    if (ctxt->wellFormed == 1) {
9577	goto reparse;
9578    }
9579    return(NULL);
9580}
9581
9582/**
9583 * xmlParseEndTag2:
9584 * @ctxt:  an XML parser context
9585 * @line:  line of the start tag
9586 * @nsNr:  number of namespaces on the start tag
9587 *
9588 * parse an end of tag
9589 *
9590 * [42] ETag ::= '</' Name S? '>'
9591 *
9592 * With namespace
9593 *
9594 * [NS 9] ETag ::= '</' QName S? '>'
9595 */
9596
9597static void
9598xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9599                const xmlChar *URI, int line, int nsNr, int tlen) {
9600    const xmlChar *name;
9601
9602    GROW;
9603    if ((RAW != '<') || (NXT(1) != '/')) {
9604	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9605	return;
9606    }
9607    SKIP(2);
9608
9609    if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9610        if (ctxt->input->cur[tlen] == '>') {
9611	    ctxt->input->cur += tlen + 1;
9612	    goto done;
9613	}
9614	ctxt->input->cur += tlen;
9615	name = (xmlChar*)1;
9616    } else {
9617	if (prefix == NULL)
9618	    name = xmlParseNameAndCompare(ctxt, ctxt->name);
9619	else
9620	    name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9621    }
9622
9623    /*
9624     * We should definitely be at the ending "S? '>'" part
9625     */
9626    GROW;
9627    if (ctxt->instate == XML_PARSER_EOF)
9628        return;
9629    SKIP_BLANKS;
9630    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9631	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9632    } else
9633	NEXT1;
9634
9635    /*
9636     * [ WFC: Element Type Match ]
9637     * The Name in an element's end-tag must match the element type in the
9638     * start-tag.
9639     *
9640     */
9641    if (name != (xmlChar*)1) {
9642        if (name == NULL) name = BAD_CAST "unparseable";
9643        if ((line == 0) && (ctxt->node != NULL))
9644            line = ctxt->node->line;
9645        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9646		     "Opening and ending tag mismatch: %s line %d and %s\n",
9647		                ctxt->name, line, name);
9648    }
9649
9650    /*
9651     * SAX: End of Tag
9652     */
9653done:
9654    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9655	(!ctxt->disableSAX))
9656	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9657
9658    spacePop(ctxt);
9659    if (nsNr != 0)
9660	nsPop(ctxt, nsNr);
9661    return;
9662}
9663
9664/**
9665 * xmlParseCDSect:
9666 * @ctxt:  an XML parser context
9667 *
9668 * Parse escaped pure raw content.
9669 *
9670 * [18] CDSect ::= CDStart CData CDEnd
9671 *
9672 * [19] CDStart ::= '<![CDATA['
9673 *
9674 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9675 *
9676 * [21] CDEnd ::= ']]>'
9677 */
9678void
9679xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9680    xmlChar *buf = NULL;
9681    int len = 0;
9682    int size = XML_PARSER_BUFFER_SIZE;
9683    int r, rl;
9684    int	s, sl;
9685    int cur, l;
9686    int count = 0;
9687
9688    /* Check 2.6.0 was NXT(0) not RAW */
9689    if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9690	SKIP(9);
9691    } else
9692        return;
9693
9694    ctxt->instate = XML_PARSER_CDATA_SECTION;
9695    r = CUR_CHAR(rl);
9696    if (!IS_CHAR(r)) {
9697	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9698	ctxt->instate = XML_PARSER_CONTENT;
9699        return;
9700    }
9701    NEXTL(rl);
9702    s = CUR_CHAR(sl);
9703    if (!IS_CHAR(s)) {
9704	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9705	ctxt->instate = XML_PARSER_CONTENT;
9706        return;
9707    }
9708    NEXTL(sl);
9709    cur = CUR_CHAR(l);
9710    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9711    if (buf == NULL) {
9712	xmlErrMemory(ctxt, NULL);
9713	return;
9714    }
9715    while (IS_CHAR(cur) &&
9716           ((r != ']') || (s != ']') || (cur != '>'))) {
9717	if (len + 5 >= size) {
9718	    xmlChar *tmp;
9719
9720            if ((size > XML_MAX_TEXT_LENGTH) &&
9721                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9722                xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9723                             "CData section too big found", NULL);
9724                xmlFree (buf);
9725                return;
9726            }
9727	    tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9728	    if (tmp == NULL) {
9729	        xmlFree(buf);
9730		xmlErrMemory(ctxt, NULL);
9731		return;
9732	    }
9733	    buf = tmp;
9734	    size *= 2;
9735	}
9736	COPY_BUF(rl,buf,len,r);
9737	r = s;
9738	rl = sl;
9739	s = cur;
9740	sl = l;
9741	count++;
9742	if (count > 50) {
9743	    GROW;
9744            if (ctxt->instate == XML_PARSER_EOF) {
9745		xmlFree(buf);
9746		return;
9747            }
9748	    count = 0;
9749	}
9750	NEXTL(l);
9751	cur = CUR_CHAR(l);
9752    }
9753    buf[len] = 0;
9754    ctxt->instate = XML_PARSER_CONTENT;
9755    if (cur != '>') {
9756	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9757	                     "CData section not finished\n%.50s\n", buf);
9758	xmlFree(buf);
9759        return;
9760    }
9761    NEXTL(l);
9762
9763    /*
9764     * OK the buffer is to be consumed as cdata.
9765     */
9766    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9767	if (ctxt->sax->cdataBlock != NULL)
9768	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9769	else if (ctxt->sax->characters != NULL)
9770	    ctxt->sax->characters(ctxt->userData, buf, len);
9771    }
9772    xmlFree(buf);
9773}
9774
9775/**
9776 * xmlParseContent:
9777 * @ctxt:  an XML parser context
9778 *
9779 * Parse a content:
9780 *
9781 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9782 */
9783
9784void
9785xmlParseContent(xmlParserCtxtPtr ctxt) {
9786    GROW;
9787    while ((RAW != 0) &&
9788	   ((RAW != '<') || (NXT(1) != '/')) &&
9789	   (ctxt->instate != XML_PARSER_EOF)) {
9790	const xmlChar *test = CUR_PTR;
9791	unsigned int cons = ctxt->input->consumed;
9792	const xmlChar *cur = ctxt->input->cur;
9793
9794	/*
9795	 * First case : a Processing Instruction.
9796	 */
9797	if ((*cur == '<') && (cur[1] == '?')) {
9798	    xmlParsePI(ctxt);
9799	}
9800
9801	/*
9802	 * Second case : a CDSection
9803	 */
9804	/* 2.6.0 test was *cur not RAW */
9805	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9806	    xmlParseCDSect(ctxt);
9807	}
9808
9809	/*
9810	 * Third case :  a comment
9811	 */
9812	else if ((*cur == '<') && (NXT(1) == '!') &&
9813		 (NXT(2) == '-') && (NXT(3) == '-')) {
9814	    xmlParseComment(ctxt);
9815	    ctxt->instate = XML_PARSER_CONTENT;
9816	}
9817
9818	/*
9819	 * Fourth case :  a sub-element.
9820	 */
9821	else if (*cur == '<') {
9822	    xmlParseElement(ctxt);
9823	}
9824
9825	/*
9826	 * Fifth case : a reference. If if has not been resolved,
9827	 *    parsing returns it's Name, create the node
9828	 */
9829
9830	else if (*cur == '&') {
9831	    xmlParseReference(ctxt);
9832	}
9833
9834	/*
9835	 * Last case, text. Note that References are handled directly.
9836	 */
9837	else {
9838	    xmlParseCharData(ctxt, 0);
9839	}
9840
9841	GROW;
9842	/*
9843	 * Pop-up of finished entities.
9844	 */
9845	while ((RAW == 0) && (ctxt->inputNr > 1))
9846	    xmlPopInput(ctxt);
9847	SHRINK;
9848
9849	if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9850	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9851	                "detected an error in element content\n");
9852	    ctxt->instate = XML_PARSER_EOF;
9853            break;
9854	}
9855    }
9856}
9857
9858/**
9859 * xmlParseElement:
9860 * @ctxt:  an XML parser context
9861 *
9862 * parse an XML element, this is highly recursive
9863 *
9864 * [39] element ::= EmptyElemTag | STag content ETag
9865 *
9866 * [ WFC: Element Type Match ]
9867 * The Name in an element's end-tag must match the element type in the
9868 * start-tag.
9869 *
9870 */
9871
9872void
9873xmlParseElement(xmlParserCtxtPtr ctxt) {
9874    const xmlChar *name;
9875    const xmlChar *prefix = NULL;
9876    const xmlChar *URI = NULL;
9877    xmlParserNodeInfo node_info;
9878    int line, tlen = 0;
9879    xmlNodePtr ret;
9880    int nsNr = ctxt->nsNr;
9881
9882    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9883        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9884	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9885		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9886			  xmlParserMaxDepth);
9887	ctxt->instate = XML_PARSER_EOF;
9888	return;
9889    }
9890
9891    /* Capture start position */
9892    if (ctxt->record_info) {
9893        node_info.begin_pos = ctxt->input->consumed +
9894                          (CUR_PTR - ctxt->input->base);
9895	node_info.begin_line = ctxt->input->line;
9896    }
9897
9898    if (ctxt->spaceNr == 0)
9899	spacePush(ctxt, -1);
9900    else if (*ctxt->space == -2)
9901	spacePush(ctxt, -1);
9902    else
9903	spacePush(ctxt, *ctxt->space);
9904
9905    line = ctxt->input->line;
9906#ifdef LIBXML_SAX1_ENABLED
9907    if (ctxt->sax2)
9908#endif /* LIBXML_SAX1_ENABLED */
9909        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9910#ifdef LIBXML_SAX1_ENABLED
9911    else
9912	name = xmlParseStartTag(ctxt);
9913#endif /* LIBXML_SAX1_ENABLED */
9914    if (ctxt->instate == XML_PARSER_EOF)
9915	return;
9916    if (name == NULL) {
9917	spacePop(ctxt);
9918        return;
9919    }
9920    namePush(ctxt, name);
9921    ret = ctxt->node;
9922
9923#ifdef LIBXML_VALID_ENABLED
9924    /*
9925     * [ VC: Root Element Type ]
9926     * The Name in the document type declaration must match the element
9927     * type of the root element.
9928     */
9929    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9930        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9931        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9932#endif /* LIBXML_VALID_ENABLED */
9933
9934    /*
9935     * Check for an Empty Element.
9936     */
9937    if ((RAW == '/') && (NXT(1) == '>')) {
9938        SKIP(2);
9939	if (ctxt->sax2) {
9940	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9941		(!ctxt->disableSAX))
9942		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9943#ifdef LIBXML_SAX1_ENABLED
9944	} else {
9945	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9946		(!ctxt->disableSAX))
9947		ctxt->sax->endElement(ctxt->userData, name);
9948#endif /* LIBXML_SAX1_ENABLED */
9949	}
9950	namePop(ctxt);
9951	spacePop(ctxt);
9952	if (nsNr != ctxt->nsNr)
9953	    nsPop(ctxt, ctxt->nsNr - nsNr);
9954	if ( ret != NULL && ctxt->record_info ) {
9955	   node_info.end_pos = ctxt->input->consumed +
9956			      (CUR_PTR - ctxt->input->base);
9957	   node_info.end_line = ctxt->input->line;
9958	   node_info.node = ret;
9959	   xmlParserAddNodeInfo(ctxt, &node_info);
9960	}
9961	return;
9962    }
9963    if (RAW == '>') {
9964        NEXT1;
9965    } else {
9966        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9967		     "Couldn't find end of Start Tag %s line %d\n",
9968		                name, line, NULL);
9969
9970	/*
9971	 * end of parsing of this node.
9972	 */
9973	nodePop(ctxt);
9974	namePop(ctxt);
9975	spacePop(ctxt);
9976	if (nsNr != ctxt->nsNr)
9977	    nsPop(ctxt, ctxt->nsNr - nsNr);
9978
9979	/*
9980	 * Capture end position and add node
9981	 */
9982	if ( ret != NULL && ctxt->record_info ) {
9983	   node_info.end_pos = ctxt->input->consumed +
9984			      (CUR_PTR - ctxt->input->base);
9985	   node_info.end_line = ctxt->input->line;
9986	   node_info.node = ret;
9987	   xmlParserAddNodeInfo(ctxt, &node_info);
9988	}
9989	return;
9990    }
9991
9992    /*
9993     * Parse the content of the element:
9994     */
9995    xmlParseContent(ctxt);
9996    if (!IS_BYTE_CHAR(RAW)) {
9997        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9998	 "Premature end of data in tag %s line %d\n",
9999		                name, line, NULL);
10000
10001	/*
10002	 * end of parsing of this node.
10003	 */
10004	nodePop(ctxt);
10005	namePop(ctxt);
10006	spacePop(ctxt);
10007	if (nsNr != ctxt->nsNr)
10008	    nsPop(ctxt, ctxt->nsNr - nsNr);
10009	return;
10010    }
10011
10012    /*
10013     * parse the end of tag: '</' should be here.
10014     */
10015    if (ctxt->sax2) {
10016	xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10017	namePop(ctxt);
10018    }
10019#ifdef LIBXML_SAX1_ENABLED
10020      else
10021	xmlParseEndTag1(ctxt, line);
10022#endif /* LIBXML_SAX1_ENABLED */
10023
10024    /*
10025     * Capture end position and add node
10026     */
10027    if ( ret != NULL && ctxt->record_info ) {
10028       node_info.end_pos = ctxt->input->consumed +
10029                          (CUR_PTR - ctxt->input->base);
10030       node_info.end_line = ctxt->input->line;
10031       node_info.node = ret;
10032       xmlParserAddNodeInfo(ctxt, &node_info);
10033    }
10034}
10035
10036/**
10037 * xmlParseVersionNum:
10038 * @ctxt:  an XML parser context
10039 *
10040 * parse the XML version value.
10041 *
10042 * [26] VersionNum ::= '1.' [0-9]+
10043 *
10044 * In practice allow [0-9].[0-9]+ at that level
10045 *
10046 * Returns the string giving the XML version number, or NULL
10047 */
10048xmlChar *
10049xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10050    xmlChar *buf = NULL;
10051    int len = 0;
10052    int size = 10;
10053    xmlChar cur;
10054
10055    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10056    if (buf == NULL) {
10057	xmlErrMemory(ctxt, NULL);
10058	return(NULL);
10059    }
10060    cur = CUR;
10061    if (!((cur >= '0') && (cur <= '9'))) {
10062	xmlFree(buf);
10063	return(NULL);
10064    }
10065    buf[len++] = cur;
10066    NEXT;
10067    cur=CUR;
10068    if (cur != '.') {
10069	xmlFree(buf);
10070	return(NULL);
10071    }
10072    buf[len++] = cur;
10073    NEXT;
10074    cur=CUR;
10075    while ((cur >= '0') && (cur <= '9')) {
10076	if (len + 1 >= size) {
10077	    xmlChar *tmp;
10078
10079	    size *= 2;
10080	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10081	    if (tmp == NULL) {
10082	        xmlFree(buf);
10083		xmlErrMemory(ctxt, NULL);
10084		return(NULL);
10085	    }
10086	    buf = tmp;
10087	}
10088	buf[len++] = cur;
10089	NEXT;
10090	cur=CUR;
10091    }
10092    buf[len] = 0;
10093    return(buf);
10094}
10095
10096/**
10097 * xmlParseVersionInfo:
10098 * @ctxt:  an XML parser context
10099 *
10100 * parse the XML version.
10101 *
10102 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10103 *
10104 * [25] Eq ::= S? '=' S?
10105 *
10106 * Returns the version string, e.g. "1.0"
10107 */
10108
10109xmlChar *
10110xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10111    xmlChar *version = NULL;
10112
10113    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10114	SKIP(7);
10115	SKIP_BLANKS;
10116	if (RAW != '=') {
10117	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10118	    return(NULL);
10119        }
10120	NEXT;
10121	SKIP_BLANKS;
10122	if (RAW == '"') {
10123	    NEXT;
10124	    version = xmlParseVersionNum(ctxt);
10125	    if (RAW != '"') {
10126		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10127	    } else
10128	        NEXT;
10129	} else if (RAW == '\''){
10130	    NEXT;
10131	    version = xmlParseVersionNum(ctxt);
10132	    if (RAW != '\'') {
10133		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10134	    } else
10135	        NEXT;
10136	} else {
10137	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10138	}
10139    }
10140    return(version);
10141}
10142
10143/**
10144 * xmlParseEncName:
10145 * @ctxt:  an XML parser context
10146 *
10147 * parse the XML encoding name
10148 *
10149 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10150 *
10151 * Returns the encoding name value or NULL
10152 */
10153xmlChar *
10154xmlParseEncName(xmlParserCtxtPtr ctxt) {
10155    xmlChar *buf = NULL;
10156    int len = 0;
10157    int size = 10;
10158    xmlChar cur;
10159
10160    cur = CUR;
10161    if (((cur >= 'a') && (cur <= 'z')) ||
10162        ((cur >= 'A') && (cur <= 'Z'))) {
10163	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10164	if (buf == NULL) {
10165	    xmlErrMemory(ctxt, NULL);
10166	    return(NULL);
10167	}
10168
10169	buf[len++] = cur;
10170	NEXT;
10171	cur = CUR;
10172	while (((cur >= 'a') && (cur <= 'z')) ||
10173	       ((cur >= 'A') && (cur <= 'Z')) ||
10174	       ((cur >= '0') && (cur <= '9')) ||
10175	       (cur == '.') || (cur == '_') ||
10176	       (cur == '-')) {
10177	    if (len + 1 >= size) {
10178	        xmlChar *tmp;
10179
10180		size *= 2;
10181		tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10182		if (tmp == NULL) {
10183		    xmlErrMemory(ctxt, NULL);
10184		    xmlFree(buf);
10185		    return(NULL);
10186		}
10187		buf = tmp;
10188	    }
10189	    buf[len++] = cur;
10190	    NEXT;
10191	    cur = CUR;
10192	    if (cur == 0) {
10193	        SHRINK;
10194		GROW;
10195		cur = CUR;
10196	    }
10197        }
10198	buf[len] = 0;
10199    } else {
10200	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10201    }
10202    return(buf);
10203}
10204
10205/**
10206 * xmlParseEncodingDecl:
10207 * @ctxt:  an XML parser context
10208 *
10209 * parse the XML encoding declaration
10210 *
10211 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10212 *
10213 * this setups the conversion filters.
10214 *
10215 * Returns the encoding value or NULL
10216 */
10217
10218const xmlChar *
10219xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10220    xmlChar *encoding = NULL;
10221
10222    SKIP_BLANKS;
10223    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10224	SKIP(8);
10225	SKIP_BLANKS;
10226	if (RAW != '=') {
10227	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10228	    return(NULL);
10229        }
10230	NEXT;
10231	SKIP_BLANKS;
10232	if (RAW == '"') {
10233	    NEXT;
10234	    encoding = xmlParseEncName(ctxt);
10235	    if (RAW != '"') {
10236		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10237	    } else
10238	        NEXT;
10239	} else if (RAW == '\''){
10240	    NEXT;
10241	    encoding = xmlParseEncName(ctxt);
10242	    if (RAW != '\'') {
10243		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10244	    } else
10245	        NEXT;
10246	} else {
10247	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10248	}
10249
10250        /*
10251         * Non standard parsing, allowing the user to ignore encoding
10252         */
10253        if (ctxt->options & XML_PARSE_IGNORE_ENC)
10254            return(encoding);
10255
10256	/*
10257	 * UTF-16 encoding stwich has already taken place at this stage,
10258	 * more over the little-endian/big-endian selection is already done
10259	 */
10260        if ((encoding != NULL) &&
10261	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10262	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10263	    /*
10264	     * If no encoding was passed to the parser, that we are
10265	     * using UTF-16 and no decoder is present i.e. the
10266	     * document is apparently UTF-8 compatible, then raise an
10267	     * encoding mismatch fatal error
10268	     */
10269	    if ((ctxt->encoding == NULL) &&
10270	        (ctxt->input->buf != NULL) &&
10271	        (ctxt->input->buf->encoder == NULL)) {
10272		xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10273		  "Document labelled UTF-16 but has UTF-8 content\n");
10274	    }
10275	    if (ctxt->encoding != NULL)
10276		xmlFree((xmlChar *) ctxt->encoding);
10277	    ctxt->encoding = encoding;
10278	}
10279	/*
10280	 * UTF-8 encoding is handled natively
10281	 */
10282        else if ((encoding != NULL) &&
10283	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10284	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10285	    if (ctxt->encoding != NULL)
10286		xmlFree((xmlChar *) ctxt->encoding);
10287	    ctxt->encoding = encoding;
10288	}
10289	else if (encoding != NULL) {
10290	    xmlCharEncodingHandlerPtr handler;
10291
10292	    if (ctxt->input->encoding != NULL)
10293		xmlFree((xmlChar *) ctxt->input->encoding);
10294	    ctxt->input->encoding = encoding;
10295
10296            handler = xmlFindCharEncodingHandler((const char *) encoding);
10297	    if (handler != NULL) {
10298		xmlSwitchToEncoding(ctxt, handler);
10299	    } else {
10300		xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10301			"Unsupported encoding %s\n", encoding);
10302		return(NULL);
10303	    }
10304	}
10305    }
10306    return(encoding);
10307}
10308
10309/**
10310 * xmlParseSDDecl:
10311 * @ctxt:  an XML parser context
10312 *
10313 * parse the XML standalone declaration
10314 *
10315 * [32] SDDecl ::= S 'standalone' Eq
10316 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10317 *
10318 * [ VC: Standalone Document Declaration ]
10319 * TODO The standalone document declaration must have the value "no"
10320 * if any external markup declarations contain declarations of:
10321 *  - attributes with default values, if elements to which these
10322 *    attributes apply appear in the document without specifications
10323 *    of values for these attributes, or
10324 *  - entities (other than amp, lt, gt, apos, quot), if references
10325 *    to those entities appear in the document, or
10326 *  - attributes with values subject to normalization, where the
10327 *    attribute appears in the document with a value which will change
10328 *    as a result of normalization, or
10329 *  - element types with element content, if white space occurs directly
10330 *    within any instance of those types.
10331 *
10332 * Returns:
10333 *   1 if standalone="yes"
10334 *   0 if standalone="no"
10335 *  -2 if standalone attribute is missing or invalid
10336 *	  (A standalone value of -2 means that the XML declaration was found,
10337 *	   but no value was specified for the standalone attribute).
10338 */
10339
10340int
10341xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10342    int standalone = -2;
10343
10344    SKIP_BLANKS;
10345    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10346	SKIP(10);
10347        SKIP_BLANKS;
10348	if (RAW != '=') {
10349	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10350	    return(standalone);
10351        }
10352	NEXT;
10353	SKIP_BLANKS;
10354        if (RAW == '\''){
10355	    NEXT;
10356	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10357	        standalone = 0;
10358                SKIP(2);
10359	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10360	               (NXT(2) == 's')) {
10361	        standalone = 1;
10362		SKIP(3);
10363            } else {
10364		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10365	    }
10366	    if (RAW != '\'') {
10367		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10368	    } else
10369	        NEXT;
10370	} else if (RAW == '"'){
10371	    NEXT;
10372	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10373	        standalone = 0;
10374		SKIP(2);
10375	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10376	               (NXT(2) == 's')) {
10377	        standalone = 1;
10378                SKIP(3);
10379            } else {
10380		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10381	    }
10382	    if (RAW != '"') {
10383		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10384	    } else
10385	        NEXT;
10386	} else {
10387	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10388        }
10389    }
10390    return(standalone);
10391}
10392
10393/**
10394 * xmlParseXMLDecl:
10395 * @ctxt:  an XML parser context
10396 *
10397 * parse an XML declaration header
10398 *
10399 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10400 */
10401
10402void
10403xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10404    xmlChar *version;
10405
10406    /*
10407     * This value for standalone indicates that the document has an
10408     * XML declaration but it does not have a standalone attribute.
10409     * It will be overwritten later if a standalone attribute is found.
10410     */
10411    ctxt->input->standalone = -2;
10412
10413    /*
10414     * We know that '<?xml' is here.
10415     */
10416    SKIP(5);
10417
10418    if (!IS_BLANK_CH(RAW)) {
10419	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10420	               "Blank needed after '<?xml'\n");
10421    }
10422    SKIP_BLANKS;
10423
10424    /*
10425     * We must have the VersionInfo here.
10426     */
10427    version = xmlParseVersionInfo(ctxt);
10428    if (version == NULL) {
10429	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10430    } else {
10431	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10432	    /*
10433	     * Changed here for XML-1.0 5th edition
10434	     */
10435	    if (ctxt->options & XML_PARSE_OLD10) {
10436		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10437			          "Unsupported version '%s'\n",
10438			          version);
10439	    } else {
10440	        if ((version[0] == '1') && ((version[1] == '.'))) {
10441		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10442		                  "Unsupported version '%s'\n",
10443				  version, NULL);
10444		} else {
10445		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10446				      "Unsupported version '%s'\n",
10447				      version);
10448		}
10449	    }
10450	}
10451	if (ctxt->version != NULL)
10452	    xmlFree((void *) ctxt->version);
10453	ctxt->version = version;
10454    }
10455
10456    /*
10457     * We may have the encoding declaration
10458     */
10459    if (!IS_BLANK_CH(RAW)) {
10460        if ((RAW == '?') && (NXT(1) == '>')) {
10461	    SKIP(2);
10462	    return;
10463	}
10464	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10465    }
10466    xmlParseEncodingDecl(ctxt);
10467    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10468	/*
10469	 * The XML REC instructs us to stop parsing right here
10470	 */
10471        return;
10472    }
10473
10474    /*
10475     * We may have the standalone status.
10476     */
10477    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10478        if ((RAW == '?') && (NXT(1) == '>')) {
10479	    SKIP(2);
10480	    return;
10481	}
10482	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10483    }
10484
10485    /*
10486     * We can grow the input buffer freely at that point
10487     */
10488    GROW;
10489
10490    SKIP_BLANKS;
10491    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10492
10493    SKIP_BLANKS;
10494    if ((RAW == '?') && (NXT(1) == '>')) {
10495        SKIP(2);
10496    } else if (RAW == '>') {
10497        /* Deprecated old WD ... */
10498	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10499	NEXT;
10500    } else {
10501	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10502	MOVETO_ENDTAG(CUR_PTR);
10503	NEXT;
10504    }
10505}
10506
10507/**
10508 * xmlParseMisc:
10509 * @ctxt:  an XML parser context
10510 *
10511 * parse an XML Misc* optional field.
10512 *
10513 * [27] Misc ::= Comment | PI |  S
10514 */
10515
10516void
10517xmlParseMisc(xmlParserCtxtPtr ctxt) {
10518    while ((ctxt->instate != XML_PARSER_EOF) &&
10519           (((RAW == '<') && (NXT(1) == '?')) ||
10520            (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10521            IS_BLANK_CH(CUR))) {
10522        if ((RAW == '<') && (NXT(1) == '?')) {
10523	    xmlParsePI(ctxt);
10524	} else if (IS_BLANK_CH(CUR)) {
10525	    NEXT;
10526	} else
10527	    xmlParseComment(ctxt);
10528    }
10529}
10530
10531/**
10532 * xmlParseDocument:
10533 * @ctxt:  an XML parser context
10534 *
10535 * parse an XML document (and build a tree if using the standard SAX
10536 * interface).
10537 *
10538 * [1] document ::= prolog element Misc*
10539 *
10540 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10541 *
10542 * Returns 0, -1 in case of error. the parser context is augmented
10543 *                as a result of the parsing.
10544 */
10545
10546int
10547xmlParseDocument(xmlParserCtxtPtr ctxt) {
10548    xmlChar start[4];
10549    xmlCharEncoding enc;
10550
10551    xmlInitParser();
10552
10553    if ((ctxt == NULL) || (ctxt->input == NULL))
10554        return(-1);
10555
10556    GROW;
10557
10558    /*
10559     * SAX: detecting the level.
10560     */
10561    xmlDetectSAX2(ctxt);
10562
10563    /*
10564     * SAX: beginning of the document processing.
10565     */
10566    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10567        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10568
10569    if ((ctxt->encoding == NULL) &&
10570        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10571	/*
10572	 * Get the 4 first bytes and decode the charset
10573	 * if enc != XML_CHAR_ENCODING_NONE
10574	 * plug some encoding conversion routines.
10575	 */
10576	start[0] = RAW;
10577	start[1] = NXT(1);
10578	start[2] = NXT(2);
10579	start[3] = NXT(3);
10580	enc = xmlDetectCharEncoding(&start[0], 4);
10581	if (enc != XML_CHAR_ENCODING_NONE) {
10582	    xmlSwitchEncoding(ctxt, enc);
10583	}
10584    }
10585
10586
10587    if (CUR == 0) {
10588	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10589    }
10590
10591    /*
10592     * Check for the XMLDecl in the Prolog.
10593     * do not GROW here to avoid the detected encoder to decode more
10594     * than just the first line, unless the amount of data is really
10595     * too small to hold "<?xml version="1.0" encoding="foo"
10596     */
10597    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10598       GROW;
10599    }
10600    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10601
10602	/*
10603	 * Note that we will switch encoding on the fly.
10604	 */
10605	xmlParseXMLDecl(ctxt);
10606	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10607	    /*
10608	     * The XML REC instructs us to stop parsing right here
10609	     */
10610	    return(-1);
10611	}
10612	ctxt->standalone = ctxt->input->standalone;
10613	SKIP_BLANKS;
10614    } else {
10615	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10616    }
10617    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10618        ctxt->sax->startDocument(ctxt->userData);
10619
10620    /*
10621     * The Misc part of the Prolog
10622     */
10623    GROW;
10624    xmlParseMisc(ctxt);
10625
10626    /*
10627     * Then possibly doc type declaration(s) and more Misc
10628     * (doctypedecl Misc*)?
10629     */
10630    GROW;
10631    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10632
10633	ctxt->inSubset = 1;
10634	xmlParseDocTypeDecl(ctxt);
10635	if (RAW == '[') {
10636	    ctxt->instate = XML_PARSER_DTD;
10637	    xmlParseInternalSubset(ctxt);
10638	}
10639
10640	/*
10641	 * Create and update the external subset.
10642	 */
10643	ctxt->inSubset = 2;
10644	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10645	    (!ctxt->disableSAX))
10646	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10647	                              ctxt->extSubSystem, ctxt->extSubURI);
10648	ctxt->inSubset = 0;
10649
10650        xmlCleanSpecialAttr(ctxt);
10651
10652	ctxt->instate = XML_PARSER_PROLOG;
10653	xmlParseMisc(ctxt);
10654    }
10655
10656    /*
10657     * Time to start parsing the tree itself
10658     */
10659    GROW;
10660    if (RAW != '<') {
10661	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10662		       "Start tag expected, '<' not found\n");
10663    } else {
10664	ctxt->instate = XML_PARSER_CONTENT;
10665	xmlParseElement(ctxt);
10666	ctxt->instate = XML_PARSER_EPILOG;
10667
10668
10669	/*
10670	 * The Misc part at the end
10671	 */
10672	xmlParseMisc(ctxt);
10673
10674	if (RAW != 0) {
10675	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10676	}
10677	ctxt->instate = XML_PARSER_EOF;
10678    }
10679
10680    /*
10681     * SAX: end of the document processing.
10682     */
10683    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10684        ctxt->sax->endDocument(ctxt->userData);
10685
10686    /*
10687     * Remove locally kept entity definitions if the tree was not built
10688     */
10689    if ((ctxt->myDoc != NULL) &&
10690	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10691	xmlFreeDoc(ctxt->myDoc);
10692	ctxt->myDoc = NULL;
10693    }
10694
10695    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10696        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10697	if (ctxt->valid)
10698	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10699	if (ctxt->nsWellFormed)
10700	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
10701	if (ctxt->options & XML_PARSE_OLD10)
10702	    ctxt->myDoc->properties |= XML_DOC_OLD10;
10703    }
10704    if (! ctxt->wellFormed) {
10705	ctxt->valid = 0;
10706	return(-1);
10707    }
10708    return(0);
10709}
10710
10711/**
10712 * xmlParseExtParsedEnt:
10713 * @ctxt:  an XML parser context
10714 *
10715 * parse a general parsed entity
10716 * An external general parsed entity is well-formed if it matches the
10717 * production labeled extParsedEnt.
10718 *
10719 * [78] extParsedEnt ::= TextDecl? content
10720 *
10721 * Returns 0, -1 in case of error. the parser context is augmented
10722 *                as a result of the parsing.
10723 */
10724
10725int
10726xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10727    xmlChar start[4];
10728    xmlCharEncoding enc;
10729
10730    if ((ctxt == NULL) || (ctxt->input == NULL))
10731        return(-1);
10732
10733    xmlDefaultSAXHandlerInit();
10734
10735    xmlDetectSAX2(ctxt);
10736
10737    GROW;
10738
10739    /*
10740     * SAX: beginning of the document processing.
10741     */
10742    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10743        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10744
10745    /*
10746     * Get the 4 first bytes and decode the charset
10747     * if enc != XML_CHAR_ENCODING_NONE
10748     * plug some encoding conversion routines.
10749     */
10750    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10751	start[0] = RAW;
10752	start[1] = NXT(1);
10753	start[2] = NXT(2);
10754	start[3] = NXT(3);
10755	enc = xmlDetectCharEncoding(start, 4);
10756	if (enc != XML_CHAR_ENCODING_NONE) {
10757	    xmlSwitchEncoding(ctxt, enc);
10758	}
10759    }
10760
10761
10762    if (CUR == 0) {
10763	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10764    }
10765
10766    /*
10767     * Check for the XMLDecl in the Prolog.
10768     */
10769    GROW;
10770    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10771
10772	/*
10773	 * Note that we will switch encoding on the fly.
10774	 */
10775	xmlParseXMLDecl(ctxt);
10776	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10777	    /*
10778	     * The XML REC instructs us to stop parsing right here
10779	     */
10780	    return(-1);
10781	}
10782	SKIP_BLANKS;
10783    } else {
10784	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10785    }
10786    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10787        ctxt->sax->startDocument(ctxt->userData);
10788
10789    /*
10790     * Doing validity checking on chunk doesn't make sense
10791     */
10792    ctxt->instate = XML_PARSER_CONTENT;
10793    ctxt->validate = 0;
10794    ctxt->loadsubset = 0;
10795    ctxt->depth = 0;
10796
10797    xmlParseContent(ctxt);
10798
10799    if ((RAW == '<') && (NXT(1) == '/')) {
10800	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10801    } else if (RAW != 0) {
10802	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10803    }
10804
10805    /*
10806     * SAX: end of the document processing.
10807     */
10808    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10809        ctxt->sax->endDocument(ctxt->userData);
10810
10811    if (! ctxt->wellFormed) return(-1);
10812    return(0);
10813}
10814
10815#ifdef LIBXML_PUSH_ENABLED
10816/************************************************************************
10817 *									*
10818 *		Progressive parsing interfaces				*
10819 *									*
10820 ************************************************************************/
10821
10822/**
10823 * xmlParseLookupSequence:
10824 * @ctxt:  an XML parser context
10825 * @first:  the first char to lookup
10826 * @next:  the next char to lookup or zero
10827 * @third:  the next char to lookup or zero
10828 *
10829 * Try to find if a sequence (first, next, third) or  just (first next) or
10830 * (first) is available in the input stream.
10831 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10832 * to avoid rescanning sequences of bytes, it DOES change the state of the
10833 * parser, do not use liberally.
10834 *
10835 * Returns the index to the current parsing point if the full sequence
10836 *      is available, -1 otherwise.
10837 */
10838static int
10839xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10840                       xmlChar next, xmlChar third) {
10841    int base, len;
10842    xmlParserInputPtr in;
10843    const xmlChar *buf;
10844
10845    in = ctxt->input;
10846    if (in == NULL) return(-1);
10847    base = in->cur - in->base;
10848    if (base < 0) return(-1);
10849    if (ctxt->checkIndex > base)
10850        base = ctxt->checkIndex;
10851    if (in->buf == NULL) {
10852	buf = in->base;
10853	len = in->length;
10854    } else {
10855	buf = xmlBufContent(in->buf->buffer);
10856	len = xmlBufUse(in->buf->buffer);
10857    }
10858    /* take into account the sequence length */
10859    if (third) len -= 2;
10860    else if (next) len --;
10861    for (;base < len;base++) {
10862        if (buf[base] == first) {
10863	    if (third != 0) {
10864		if ((buf[base + 1] != next) ||
10865		    (buf[base + 2] != third)) continue;
10866	    } else if (next != 0) {
10867		if (buf[base + 1] != next) continue;
10868	    }
10869	    ctxt->checkIndex = 0;
10870#ifdef DEBUG_PUSH
10871	    if (next == 0)
10872		xmlGenericError(xmlGenericErrorContext,
10873			"PP: lookup '%c' found at %d\n",
10874			first, base);
10875	    else if (third == 0)
10876		xmlGenericError(xmlGenericErrorContext,
10877			"PP: lookup '%c%c' found at %d\n",
10878			first, next, base);
10879	    else
10880		xmlGenericError(xmlGenericErrorContext,
10881			"PP: lookup '%c%c%c' found at %d\n",
10882			first, next, third, base);
10883#endif
10884	    return(base - (in->cur - in->base));
10885	}
10886    }
10887    ctxt->checkIndex = base;
10888#ifdef DEBUG_PUSH
10889    if (next == 0)
10890	xmlGenericError(xmlGenericErrorContext,
10891		"PP: lookup '%c' failed\n", first);
10892    else if (third == 0)
10893	xmlGenericError(xmlGenericErrorContext,
10894		"PP: lookup '%c%c' failed\n", first, next);
10895    else
10896	xmlGenericError(xmlGenericErrorContext,
10897		"PP: lookup '%c%c%c' failed\n", first, next, third);
10898#endif
10899    return(-1);
10900}
10901
10902/**
10903 * xmlParseGetLasts:
10904 * @ctxt:  an XML parser context
10905 * @lastlt:  pointer to store the last '<' from the input
10906 * @lastgt:  pointer to store the last '>' from the input
10907 *
10908 * Lookup the last < and > in the current chunk
10909 */
10910static void
10911xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10912                 const xmlChar **lastgt) {
10913    const xmlChar *tmp;
10914
10915    if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10916	xmlGenericError(xmlGenericErrorContext,
10917		    "Internal error: xmlParseGetLasts\n");
10918	return;
10919    }
10920    if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10921        tmp = ctxt->input->end;
10922	tmp--;
10923	while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10924	if (tmp < ctxt->input->base) {
10925	    *lastlt = NULL;
10926	    *lastgt = NULL;
10927	} else {
10928	    *lastlt = tmp;
10929	    tmp++;
10930	    while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10931	        if (*tmp == '\'') {
10932		    tmp++;
10933		    while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10934		    if (tmp < ctxt->input->end) tmp++;
10935		} else if (*tmp == '"') {
10936		    tmp++;
10937		    while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10938		    if (tmp < ctxt->input->end) tmp++;
10939		} else
10940		    tmp++;
10941	    }
10942	    if (tmp < ctxt->input->end)
10943	        *lastgt = tmp;
10944	    else {
10945	        tmp = *lastlt;
10946		tmp--;
10947		while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10948		if (tmp >= ctxt->input->base)
10949		    *lastgt = tmp;
10950		else
10951		    *lastgt = NULL;
10952	    }
10953	}
10954    } else {
10955        *lastlt = NULL;
10956	*lastgt = NULL;
10957    }
10958}
10959/**
10960 * xmlCheckCdataPush:
10961 * @cur: pointer to the bock of characters
10962 * @len: length of the block in bytes
10963 *
10964 * Check that the block of characters is okay as SCdata content [20]
10965 *
10966 * Returns the number of bytes to pass if okay, a negative index where an
10967 *         UTF-8 error occured otherwise
10968 */
10969static int
10970xmlCheckCdataPush(const xmlChar *utf, int len) {
10971    int ix;
10972    unsigned char c;
10973    int codepoint;
10974
10975    if ((utf == NULL) || (len <= 0))
10976        return(0);
10977
10978    for (ix = 0; ix < len;) {      /* string is 0-terminated */
10979        c = utf[ix];
10980        if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
10981	    if (c >= 0x20)
10982		ix++;
10983	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10984	        ix++;
10985	    else
10986	        return(-ix);
10987	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10988	    if (ix + 2 > len) return(ix);
10989	    if ((utf[ix+1] & 0xc0 ) != 0x80)
10990	        return(-ix);
10991	    codepoint = (utf[ix] & 0x1f) << 6;
10992	    codepoint |= utf[ix+1] & 0x3f;
10993	    if (!xmlIsCharQ(codepoint))
10994	        return(-ix);
10995	    ix += 2;
10996	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10997	    if (ix + 3 > len) return(ix);
10998	    if (((utf[ix+1] & 0xc0) != 0x80) ||
10999	        ((utf[ix+2] & 0xc0) != 0x80))
11000		    return(-ix);
11001	    codepoint = (utf[ix] & 0xf) << 12;
11002	    codepoint |= (utf[ix+1] & 0x3f) << 6;
11003	    codepoint |= utf[ix+2] & 0x3f;
11004	    if (!xmlIsCharQ(codepoint))
11005	        return(-ix);
11006	    ix += 3;
11007	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11008	    if (ix + 4 > len) return(ix);
11009	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11010	        ((utf[ix+2] & 0xc0) != 0x80) ||
11011		((utf[ix+3] & 0xc0) != 0x80))
11012		    return(-ix);
11013	    codepoint = (utf[ix] & 0x7) << 18;
11014	    codepoint |= (utf[ix+1] & 0x3f) << 12;
11015	    codepoint |= (utf[ix+2] & 0x3f) << 6;
11016	    codepoint |= utf[ix+3] & 0x3f;
11017	    if (!xmlIsCharQ(codepoint))
11018	        return(-ix);
11019	    ix += 4;
11020	} else				/* unknown encoding */
11021	    return(-ix);
11022      }
11023      return(ix);
11024}
11025
11026/**
11027 * xmlParseTryOrFinish:
11028 * @ctxt:  an XML parser context
11029 * @terminate:  last chunk indicator
11030 *
11031 * Try to progress on parsing
11032 *
11033 * Returns zero if no parsing was possible
11034 */
11035static int
11036xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11037    int ret = 0;
11038    int avail, tlen;
11039    xmlChar cur, next;
11040    const xmlChar *lastlt, *lastgt;
11041
11042    if (ctxt->input == NULL)
11043        return(0);
11044
11045#ifdef DEBUG_PUSH
11046    switch (ctxt->instate) {
11047	case XML_PARSER_EOF:
11048	    xmlGenericError(xmlGenericErrorContext,
11049		    "PP: try EOF\n"); break;
11050	case XML_PARSER_START:
11051	    xmlGenericError(xmlGenericErrorContext,
11052		    "PP: try START\n"); break;
11053	case XML_PARSER_MISC:
11054	    xmlGenericError(xmlGenericErrorContext,
11055		    "PP: try MISC\n");break;
11056	case XML_PARSER_COMMENT:
11057	    xmlGenericError(xmlGenericErrorContext,
11058		    "PP: try COMMENT\n");break;
11059	case XML_PARSER_PROLOG:
11060	    xmlGenericError(xmlGenericErrorContext,
11061		    "PP: try PROLOG\n");break;
11062	case XML_PARSER_START_TAG:
11063	    xmlGenericError(xmlGenericErrorContext,
11064		    "PP: try START_TAG\n");break;
11065	case XML_PARSER_CONTENT:
11066	    xmlGenericError(xmlGenericErrorContext,
11067		    "PP: try CONTENT\n");break;
11068	case XML_PARSER_CDATA_SECTION:
11069	    xmlGenericError(xmlGenericErrorContext,
11070		    "PP: try CDATA_SECTION\n");break;
11071	case XML_PARSER_END_TAG:
11072	    xmlGenericError(xmlGenericErrorContext,
11073		    "PP: try END_TAG\n");break;
11074	case XML_PARSER_ENTITY_DECL:
11075	    xmlGenericError(xmlGenericErrorContext,
11076		    "PP: try ENTITY_DECL\n");break;
11077	case XML_PARSER_ENTITY_VALUE:
11078	    xmlGenericError(xmlGenericErrorContext,
11079		    "PP: try ENTITY_VALUE\n");break;
11080	case XML_PARSER_ATTRIBUTE_VALUE:
11081	    xmlGenericError(xmlGenericErrorContext,
11082		    "PP: try ATTRIBUTE_VALUE\n");break;
11083	case XML_PARSER_DTD:
11084	    xmlGenericError(xmlGenericErrorContext,
11085		    "PP: try DTD\n");break;
11086	case XML_PARSER_EPILOG:
11087	    xmlGenericError(xmlGenericErrorContext,
11088		    "PP: try EPILOG\n");break;
11089	case XML_PARSER_PI:
11090	    xmlGenericError(xmlGenericErrorContext,
11091		    "PP: try PI\n");break;
11092        case XML_PARSER_IGNORE:
11093            xmlGenericError(xmlGenericErrorContext,
11094		    "PP: try IGNORE\n");break;
11095    }
11096#endif
11097
11098    if ((ctxt->input != NULL) &&
11099        (ctxt->input->cur - ctxt->input->base > 4096)) {
11100	xmlSHRINK(ctxt);
11101	ctxt->checkIndex = 0;
11102    }
11103    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11104
11105    while (1) {
11106	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11107	    return(0);
11108
11109
11110	/*
11111	 * Pop-up of finished entities.
11112	 */
11113	while ((RAW == 0) && (ctxt->inputNr > 1))
11114	    xmlPopInput(ctxt);
11115
11116	if (ctxt->input == NULL) break;
11117	if (ctxt->input->buf == NULL)
11118	    avail = ctxt->input->length -
11119	            (ctxt->input->cur - ctxt->input->base);
11120	else {
11121	    /*
11122	     * If we are operating on converted input, try to flush
11123	     * remainng chars to avoid them stalling in the non-converted
11124	     * buffer.
11125	     */
11126	    if (xmlBufIsEmpty(ctxt->input->buf->buffer) == 0) {
11127                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11128                                                 ctxt->input);
11129		size_t current = ctxt->input->cur - ctxt->input->base;
11130
11131		xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11132                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11133                                      base, current);
11134	    }
11135	    avail = xmlBufUse(ctxt->input->buf->buffer) -
11136		    (ctxt->input->cur - ctxt->input->base);
11137	}
11138        if (avail < 1)
11139	    goto done;
11140        switch (ctxt->instate) {
11141            case XML_PARSER_EOF:
11142	        /*
11143		 * Document parsing is done !
11144		 */
11145	        goto done;
11146            case XML_PARSER_START:
11147		if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11148		    xmlChar start[4];
11149		    xmlCharEncoding enc;
11150
11151		    /*
11152		     * Very first chars read from the document flow.
11153		     */
11154		    if (avail < 4)
11155			goto done;
11156
11157		    /*
11158		     * Get the 4 first bytes and decode the charset
11159		     * if enc != XML_CHAR_ENCODING_NONE
11160		     * plug some encoding conversion routines,
11161		     * else xmlSwitchEncoding will set to (default)
11162		     * UTF8.
11163		     */
11164		    start[0] = RAW;
11165		    start[1] = NXT(1);
11166		    start[2] = NXT(2);
11167		    start[3] = NXT(3);
11168		    enc = xmlDetectCharEncoding(start, 4);
11169		    xmlSwitchEncoding(ctxt, enc);
11170		    break;
11171		}
11172
11173		if (avail < 2)
11174		    goto done;
11175		cur = ctxt->input->cur[0];
11176		next = ctxt->input->cur[1];
11177		if (cur == 0) {
11178		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11179			ctxt->sax->setDocumentLocator(ctxt->userData,
11180						      &xmlDefaultSAXLocator);
11181		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11182		    ctxt->instate = XML_PARSER_EOF;
11183#ifdef DEBUG_PUSH
11184		    xmlGenericError(xmlGenericErrorContext,
11185			    "PP: entering EOF\n");
11186#endif
11187		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11188			ctxt->sax->endDocument(ctxt->userData);
11189		    goto done;
11190		}
11191	        if ((cur == '<') && (next == '?')) {
11192		    /* PI or XML decl */
11193		    if (avail < 5) return(ret);
11194		    if ((!terminate) &&
11195		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11196			return(ret);
11197		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11198			ctxt->sax->setDocumentLocator(ctxt->userData,
11199						      &xmlDefaultSAXLocator);
11200		    if ((ctxt->input->cur[2] == 'x') &&
11201			(ctxt->input->cur[3] == 'm') &&
11202			(ctxt->input->cur[4] == 'l') &&
11203			(IS_BLANK_CH(ctxt->input->cur[5]))) {
11204			ret += 5;
11205#ifdef DEBUG_PUSH
11206			xmlGenericError(xmlGenericErrorContext,
11207				"PP: Parsing XML Decl\n");
11208#endif
11209			xmlParseXMLDecl(ctxt);
11210			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11211			    /*
11212			     * The XML REC instructs us to stop parsing right
11213			     * here
11214			     */
11215			    ctxt->instate = XML_PARSER_EOF;
11216			    return(0);
11217			}
11218			ctxt->standalone = ctxt->input->standalone;
11219			if ((ctxt->encoding == NULL) &&
11220			    (ctxt->input->encoding != NULL))
11221			    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11222			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11223			    (!ctxt->disableSAX))
11224			    ctxt->sax->startDocument(ctxt->userData);
11225			ctxt->instate = XML_PARSER_MISC;
11226#ifdef DEBUG_PUSH
11227			xmlGenericError(xmlGenericErrorContext,
11228				"PP: entering MISC\n");
11229#endif
11230		    } else {
11231			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11232			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11233			    (!ctxt->disableSAX))
11234			    ctxt->sax->startDocument(ctxt->userData);
11235			ctxt->instate = XML_PARSER_MISC;
11236#ifdef DEBUG_PUSH
11237			xmlGenericError(xmlGenericErrorContext,
11238				"PP: entering MISC\n");
11239#endif
11240		    }
11241		} else {
11242		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11243			ctxt->sax->setDocumentLocator(ctxt->userData,
11244						      &xmlDefaultSAXLocator);
11245		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11246		    if (ctxt->version == NULL) {
11247		        xmlErrMemory(ctxt, NULL);
11248			break;
11249		    }
11250		    if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11251		        (!ctxt->disableSAX))
11252			ctxt->sax->startDocument(ctxt->userData);
11253		    ctxt->instate = XML_PARSER_MISC;
11254#ifdef DEBUG_PUSH
11255		    xmlGenericError(xmlGenericErrorContext,
11256			    "PP: entering MISC\n");
11257#endif
11258		}
11259		break;
11260            case XML_PARSER_START_TAG: {
11261	        const xmlChar *name;
11262		const xmlChar *prefix = NULL;
11263		const xmlChar *URI = NULL;
11264		int nsNr = ctxt->nsNr;
11265
11266		if ((avail < 2) && (ctxt->inputNr == 1))
11267		    goto done;
11268		cur = ctxt->input->cur[0];
11269	        if (cur != '<') {
11270		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11271		    ctxt->instate = XML_PARSER_EOF;
11272		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11273			ctxt->sax->endDocument(ctxt->userData);
11274		    goto done;
11275		}
11276		if (!terminate) {
11277		    if (ctxt->progressive) {
11278		        /* > can be found unescaped in attribute values */
11279		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11280			    goto done;
11281		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11282			goto done;
11283		    }
11284		}
11285		if (ctxt->spaceNr == 0)
11286		    spacePush(ctxt, -1);
11287		else if (*ctxt->space == -2)
11288		    spacePush(ctxt, -1);
11289		else
11290		    spacePush(ctxt, *ctxt->space);
11291#ifdef LIBXML_SAX1_ENABLED
11292		if (ctxt->sax2)
11293#endif /* LIBXML_SAX1_ENABLED */
11294		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11295#ifdef LIBXML_SAX1_ENABLED
11296		else
11297		    name = xmlParseStartTag(ctxt);
11298#endif /* LIBXML_SAX1_ENABLED */
11299		if (ctxt->instate == XML_PARSER_EOF)
11300		    goto done;
11301		if (name == NULL) {
11302		    spacePop(ctxt);
11303		    ctxt->instate = XML_PARSER_EOF;
11304		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11305			ctxt->sax->endDocument(ctxt->userData);
11306		    goto done;
11307		}
11308#ifdef LIBXML_VALID_ENABLED
11309		/*
11310		 * [ VC: Root Element Type ]
11311		 * The Name in the document type declaration must match
11312		 * the element type of the root element.
11313		 */
11314		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11315		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11316		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11317#endif /* LIBXML_VALID_ENABLED */
11318
11319		/*
11320		 * Check for an Empty Element.
11321		 */
11322		if ((RAW == '/') && (NXT(1) == '>')) {
11323		    SKIP(2);
11324
11325		    if (ctxt->sax2) {
11326			if ((ctxt->sax != NULL) &&
11327			    (ctxt->sax->endElementNs != NULL) &&
11328			    (!ctxt->disableSAX))
11329			    ctxt->sax->endElementNs(ctxt->userData, name,
11330			                            prefix, URI);
11331			if (ctxt->nsNr - nsNr > 0)
11332			    nsPop(ctxt, ctxt->nsNr - nsNr);
11333#ifdef LIBXML_SAX1_ENABLED
11334		    } else {
11335			if ((ctxt->sax != NULL) &&
11336			    (ctxt->sax->endElement != NULL) &&
11337			    (!ctxt->disableSAX))
11338			    ctxt->sax->endElement(ctxt->userData, name);
11339#endif /* LIBXML_SAX1_ENABLED */
11340		    }
11341		    spacePop(ctxt);
11342		    if (ctxt->nameNr == 0) {
11343			ctxt->instate = XML_PARSER_EPILOG;
11344		    } else {
11345			ctxt->instate = XML_PARSER_CONTENT;
11346		    }
11347                    ctxt->progressive = 1;
11348		    break;
11349		}
11350		if (RAW == '>') {
11351		    NEXT;
11352		} else {
11353		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11354					 "Couldn't find end of Start Tag %s\n",
11355					 name);
11356		    nodePop(ctxt);
11357		    spacePop(ctxt);
11358		}
11359		if (ctxt->sax2)
11360		    nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11361#ifdef LIBXML_SAX1_ENABLED
11362		else
11363		    namePush(ctxt, name);
11364#endif /* LIBXML_SAX1_ENABLED */
11365
11366		ctxt->instate = XML_PARSER_CONTENT;
11367                ctxt->progressive = 1;
11368                break;
11369	    }
11370            case XML_PARSER_CONTENT: {
11371		const xmlChar *test;
11372		unsigned int cons;
11373		if ((avail < 2) && (ctxt->inputNr == 1))
11374		    goto done;
11375		cur = ctxt->input->cur[0];
11376		next = ctxt->input->cur[1];
11377
11378		test = CUR_PTR;
11379	        cons = ctxt->input->consumed;
11380		if ((cur == '<') && (next == '/')) {
11381		    ctxt->instate = XML_PARSER_END_TAG;
11382		    break;
11383	        } else if ((cur == '<') && (next == '?')) {
11384		    if ((!terminate) &&
11385		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11386                        ctxt->progressive = XML_PARSER_PI;
11387			goto done;
11388                    }
11389		    xmlParsePI(ctxt);
11390		    ctxt->instate = XML_PARSER_CONTENT;
11391                    ctxt->progressive = 1;
11392		} else if ((cur == '<') && (next != '!')) {
11393		    ctxt->instate = XML_PARSER_START_TAG;
11394		    break;
11395		} else if ((cur == '<') && (next == '!') &&
11396		           (ctxt->input->cur[2] == '-') &&
11397			   (ctxt->input->cur[3] == '-')) {
11398		    int term;
11399
11400	            if (avail < 4)
11401		        goto done;
11402		    ctxt->input->cur += 4;
11403		    term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11404		    ctxt->input->cur -= 4;
11405		    if ((!terminate) && (term < 0)) {
11406                        ctxt->progressive = XML_PARSER_COMMENT;
11407			goto done;
11408                    }
11409		    xmlParseComment(ctxt);
11410		    ctxt->instate = XML_PARSER_CONTENT;
11411                    ctxt->progressive = 1;
11412		} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11413		    (ctxt->input->cur[2] == '[') &&
11414		    (ctxt->input->cur[3] == 'C') &&
11415		    (ctxt->input->cur[4] == 'D') &&
11416		    (ctxt->input->cur[5] == 'A') &&
11417		    (ctxt->input->cur[6] == 'T') &&
11418		    (ctxt->input->cur[7] == 'A') &&
11419		    (ctxt->input->cur[8] == '[')) {
11420		    SKIP(9);
11421		    ctxt->instate = XML_PARSER_CDATA_SECTION;
11422		    break;
11423		} else if ((cur == '<') && (next == '!') &&
11424		           (avail < 9)) {
11425		    goto done;
11426		} else if (cur == '&') {
11427		    if ((!terminate) &&
11428		        (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11429			goto done;
11430		    xmlParseReference(ctxt);
11431		} else {
11432		    /* TODO Avoid the extra copy, handle directly !!! */
11433		    /*
11434		     * Goal of the following test is:
11435		     *  - minimize calls to the SAX 'character' callback
11436		     *    when they are mergeable
11437		     *  - handle an problem for isBlank when we only parse
11438		     *    a sequence of blank chars and the next one is
11439		     *    not available to check against '<' presence.
11440		     *  - tries to homogenize the differences in SAX
11441		     *    callbacks between the push and pull versions
11442		     *    of the parser.
11443		     */
11444		    if ((ctxt->inputNr == 1) &&
11445		        (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11446			if (!terminate) {
11447			    if (ctxt->progressive) {
11448				if ((lastlt == NULL) ||
11449				    (ctxt->input->cur > lastlt))
11450				    goto done;
11451			    } else if (xmlParseLookupSequence(ctxt,
11452			                                      '<', 0, 0) < 0) {
11453				goto done;
11454			    }
11455			}
11456                    }
11457		    ctxt->checkIndex = 0;
11458		    xmlParseCharData(ctxt, 0);
11459		}
11460		/*
11461		 * Pop-up of finished entities.
11462		 */
11463		while ((RAW == 0) && (ctxt->inputNr > 1))
11464		    xmlPopInput(ctxt);
11465		if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11466		    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11467		                "detected an error in element content\n");
11468		    ctxt->instate = XML_PARSER_EOF;
11469		    break;
11470		}
11471		break;
11472	    }
11473            case XML_PARSER_END_TAG:
11474		if (avail < 2)
11475		    goto done;
11476		if (!terminate) {
11477		    if (ctxt->progressive) {
11478		        /* > can be found unescaped in attribute values */
11479		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11480			    goto done;
11481		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11482			goto done;
11483		    }
11484		}
11485		if (ctxt->sax2) {
11486		    xmlParseEndTag2(ctxt,
11487		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11488		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11489		       (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11490		    nameNsPop(ctxt);
11491		}
11492#ifdef LIBXML_SAX1_ENABLED
11493		  else
11494		    xmlParseEndTag1(ctxt, 0);
11495#endif /* LIBXML_SAX1_ENABLED */
11496		if (ctxt->instate == XML_PARSER_EOF) {
11497		    /* Nothing */
11498		} else if (ctxt->nameNr == 0) {
11499		    ctxt->instate = XML_PARSER_EPILOG;
11500		} else {
11501		    ctxt->instate = XML_PARSER_CONTENT;
11502		}
11503		break;
11504            case XML_PARSER_CDATA_SECTION: {
11505	        /*
11506		 * The Push mode need to have the SAX callback for
11507		 * cdataBlock merge back contiguous callbacks.
11508		 */
11509		int base;
11510
11511		base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11512		if (base < 0) {
11513		    if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11514		        int tmp;
11515
11516			tmp = xmlCheckCdataPush(ctxt->input->cur,
11517			                        XML_PARSER_BIG_BUFFER_SIZE);
11518			if (tmp < 0) {
11519			    tmp = -tmp;
11520			    ctxt->input->cur += tmp;
11521			    goto encoding_error;
11522			}
11523			if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11524			    if (ctxt->sax->cdataBlock != NULL)
11525				ctxt->sax->cdataBlock(ctxt->userData,
11526				                      ctxt->input->cur, tmp);
11527			    else if (ctxt->sax->characters != NULL)
11528				ctxt->sax->characters(ctxt->userData,
11529				                      ctxt->input->cur, tmp);
11530			}
11531			SKIPL(tmp);
11532			ctxt->checkIndex = 0;
11533		    }
11534		    goto done;
11535		} else {
11536		    int tmp;
11537
11538		    tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11539		    if ((tmp < 0) || (tmp != base)) {
11540			tmp = -tmp;
11541			ctxt->input->cur += tmp;
11542			goto encoding_error;
11543		    }
11544		    if ((ctxt->sax != NULL) && (base == 0) &&
11545		        (ctxt->sax->cdataBlock != NULL) &&
11546		        (!ctxt->disableSAX)) {
11547			/*
11548			 * Special case to provide identical behaviour
11549			 * between pull and push parsers on enpty CDATA
11550			 * sections
11551			 */
11552			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11553			     (!strncmp((const char *)&ctxt->input->cur[-9],
11554			               "<![CDATA[", 9)))
11555			     ctxt->sax->cdataBlock(ctxt->userData,
11556			                           BAD_CAST "", 0);
11557		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11558			(!ctxt->disableSAX)) {
11559			if (ctxt->sax->cdataBlock != NULL)
11560			    ctxt->sax->cdataBlock(ctxt->userData,
11561						  ctxt->input->cur, base);
11562			else if (ctxt->sax->characters != NULL)
11563			    ctxt->sax->characters(ctxt->userData,
11564						  ctxt->input->cur, base);
11565		    }
11566		    SKIPL(base + 3);
11567		    ctxt->checkIndex = 0;
11568		    ctxt->instate = XML_PARSER_CONTENT;
11569#ifdef DEBUG_PUSH
11570		    xmlGenericError(xmlGenericErrorContext,
11571			    "PP: entering CONTENT\n");
11572#endif
11573		}
11574		break;
11575	    }
11576            case XML_PARSER_MISC:
11577		SKIP_BLANKS;
11578		if (ctxt->input->buf == NULL)
11579		    avail = ctxt->input->length -
11580		            (ctxt->input->cur - ctxt->input->base);
11581		else
11582		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11583		            (ctxt->input->cur - ctxt->input->base);
11584		if (avail < 2)
11585		    goto done;
11586		cur = ctxt->input->cur[0];
11587		next = ctxt->input->cur[1];
11588	        if ((cur == '<') && (next == '?')) {
11589		    if ((!terminate) &&
11590		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11591                        ctxt->progressive = XML_PARSER_PI;
11592			goto done;
11593                    }
11594#ifdef DEBUG_PUSH
11595		    xmlGenericError(xmlGenericErrorContext,
11596			    "PP: Parsing PI\n");
11597#endif
11598		    xmlParsePI(ctxt);
11599		    ctxt->instate = XML_PARSER_MISC;
11600                    ctxt->progressive = 1;
11601		    ctxt->checkIndex = 0;
11602		} else if ((cur == '<') && (next == '!') &&
11603		    (ctxt->input->cur[2] == '-') &&
11604		    (ctxt->input->cur[3] == '-')) {
11605		    if ((!terminate) &&
11606		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11607                        ctxt->progressive = XML_PARSER_COMMENT;
11608			goto done;
11609                    }
11610#ifdef DEBUG_PUSH
11611		    xmlGenericError(xmlGenericErrorContext,
11612			    "PP: Parsing Comment\n");
11613#endif
11614		    xmlParseComment(ctxt);
11615		    ctxt->instate = XML_PARSER_MISC;
11616                    ctxt->progressive = 1;
11617		    ctxt->checkIndex = 0;
11618		} else if ((cur == '<') && (next == '!') &&
11619		    (ctxt->input->cur[2] == 'D') &&
11620		    (ctxt->input->cur[3] == 'O') &&
11621		    (ctxt->input->cur[4] == 'C') &&
11622		    (ctxt->input->cur[5] == 'T') &&
11623		    (ctxt->input->cur[6] == 'Y') &&
11624		    (ctxt->input->cur[7] == 'P') &&
11625		    (ctxt->input->cur[8] == 'E')) {
11626		    if ((!terminate) &&
11627		        (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11628                        ctxt->progressive = XML_PARSER_DTD;
11629			goto done;
11630                    }
11631#ifdef DEBUG_PUSH
11632		    xmlGenericError(xmlGenericErrorContext,
11633			    "PP: Parsing internal subset\n");
11634#endif
11635		    ctxt->inSubset = 1;
11636                    ctxt->progressive = 1;
11637		    ctxt->checkIndex = 0;
11638		    xmlParseDocTypeDecl(ctxt);
11639		    if (RAW == '[') {
11640			ctxt->instate = XML_PARSER_DTD;
11641#ifdef DEBUG_PUSH
11642			xmlGenericError(xmlGenericErrorContext,
11643				"PP: entering DTD\n");
11644#endif
11645		    } else {
11646			/*
11647			 * Create and update the external subset.
11648			 */
11649			ctxt->inSubset = 2;
11650			if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11651			    (ctxt->sax->externalSubset != NULL))
11652			    ctxt->sax->externalSubset(ctxt->userData,
11653				    ctxt->intSubName, ctxt->extSubSystem,
11654				    ctxt->extSubURI);
11655			ctxt->inSubset = 0;
11656			xmlCleanSpecialAttr(ctxt);
11657			ctxt->instate = XML_PARSER_PROLOG;
11658#ifdef DEBUG_PUSH
11659			xmlGenericError(xmlGenericErrorContext,
11660				"PP: entering PROLOG\n");
11661#endif
11662		    }
11663		} else if ((cur == '<') && (next == '!') &&
11664		           (avail < 9)) {
11665		    goto done;
11666		} else {
11667		    ctxt->instate = XML_PARSER_START_TAG;
11668		    ctxt->progressive = XML_PARSER_START_TAG;
11669		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11670#ifdef DEBUG_PUSH
11671		    xmlGenericError(xmlGenericErrorContext,
11672			    "PP: entering START_TAG\n");
11673#endif
11674		}
11675		break;
11676            case XML_PARSER_PROLOG:
11677		SKIP_BLANKS;
11678		if (ctxt->input->buf == NULL)
11679		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11680		else
11681		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11682                            (ctxt->input->cur - ctxt->input->base);
11683		if (avail < 2)
11684		    goto done;
11685		cur = ctxt->input->cur[0];
11686		next = ctxt->input->cur[1];
11687	        if ((cur == '<') && (next == '?')) {
11688		    if ((!terminate) &&
11689		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11690                        ctxt->progressive = XML_PARSER_PI;
11691			goto done;
11692                    }
11693#ifdef DEBUG_PUSH
11694		    xmlGenericError(xmlGenericErrorContext,
11695			    "PP: Parsing PI\n");
11696#endif
11697		    xmlParsePI(ctxt);
11698		    ctxt->instate = XML_PARSER_PROLOG;
11699                    ctxt->progressive = 1;
11700		} else if ((cur == '<') && (next == '!') &&
11701		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11702		    if ((!terminate) &&
11703		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11704                        ctxt->progressive = XML_PARSER_COMMENT;
11705			goto done;
11706                    }
11707#ifdef DEBUG_PUSH
11708		    xmlGenericError(xmlGenericErrorContext,
11709			    "PP: Parsing Comment\n");
11710#endif
11711		    xmlParseComment(ctxt);
11712		    ctxt->instate = XML_PARSER_PROLOG;
11713                    ctxt->progressive = 1;
11714		} else if ((cur == '<') && (next == '!') &&
11715		           (avail < 4)) {
11716		    goto done;
11717		} else {
11718		    ctxt->instate = XML_PARSER_START_TAG;
11719		    if (ctxt->progressive == 0)
11720			ctxt->progressive = XML_PARSER_START_TAG;
11721		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11722#ifdef DEBUG_PUSH
11723		    xmlGenericError(xmlGenericErrorContext,
11724			    "PP: entering START_TAG\n");
11725#endif
11726		}
11727		break;
11728            case XML_PARSER_EPILOG:
11729		SKIP_BLANKS;
11730		if (ctxt->input->buf == NULL)
11731		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11732		else
11733		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11734                            (ctxt->input->cur - ctxt->input->base);
11735		if (avail < 2)
11736		    goto done;
11737		cur = ctxt->input->cur[0];
11738		next = ctxt->input->cur[1];
11739	        if ((cur == '<') && (next == '?')) {
11740		    if ((!terminate) &&
11741		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11742                        ctxt->progressive = XML_PARSER_PI;
11743			goto done;
11744                    }
11745#ifdef DEBUG_PUSH
11746		    xmlGenericError(xmlGenericErrorContext,
11747			    "PP: Parsing PI\n");
11748#endif
11749		    xmlParsePI(ctxt);
11750		    ctxt->instate = XML_PARSER_EPILOG;
11751                    ctxt->progressive = 1;
11752		} else if ((cur == '<') && (next == '!') &&
11753		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11754		    if ((!terminate) &&
11755		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11756                        ctxt->progressive = XML_PARSER_COMMENT;
11757			goto done;
11758                    }
11759#ifdef DEBUG_PUSH
11760		    xmlGenericError(xmlGenericErrorContext,
11761			    "PP: Parsing Comment\n");
11762#endif
11763		    xmlParseComment(ctxt);
11764		    ctxt->instate = XML_PARSER_EPILOG;
11765                    ctxt->progressive = 1;
11766		} else if ((cur == '<') && (next == '!') &&
11767		           (avail < 4)) {
11768		    goto done;
11769		} else {
11770		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11771		    ctxt->instate = XML_PARSER_EOF;
11772#ifdef DEBUG_PUSH
11773		    xmlGenericError(xmlGenericErrorContext,
11774			    "PP: entering EOF\n");
11775#endif
11776		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11777			ctxt->sax->endDocument(ctxt->userData);
11778		    goto done;
11779		}
11780		break;
11781            case XML_PARSER_DTD: {
11782	        /*
11783		 * Sorry but progressive parsing of the internal subset
11784		 * is not expected to be supported. We first check that
11785		 * the full content of the internal subset is available and
11786		 * the parsing is launched only at that point.
11787		 * Internal subset ends up with "']' S? '>'" in an unescaped
11788		 * section and not in a ']]>' sequence which are conditional
11789		 * sections (whoever argued to keep that crap in XML deserve
11790		 * a place in hell !).
11791		 */
11792		int base, i;
11793		xmlChar *buf;
11794	        xmlChar quote = 0;
11795                size_t use;
11796
11797		base = ctxt->input->cur - ctxt->input->base;
11798		if (base < 0) return(0);
11799		if (ctxt->checkIndex > base)
11800		    base = ctxt->checkIndex;
11801		buf = xmlBufContent(ctxt->input->buf->buffer);
11802                use = xmlBufUse(ctxt->input->buf->buffer);
11803		for (;(unsigned int) base < use; base++) {
11804		    if (quote != 0) {
11805		        if (buf[base] == quote)
11806			    quote = 0;
11807			continue;
11808		    }
11809		    if ((quote == 0) && (buf[base] == '<')) {
11810		        int found  = 0;
11811			/* special handling of comments */
11812		        if (((unsigned int) base + 4 < use) &&
11813			    (buf[base + 1] == '!') &&
11814			    (buf[base + 2] == '-') &&
11815			    (buf[base + 3] == '-')) {
11816			    for (;(unsigned int) base + 3 < use; base++) {
11817				if ((buf[base] == '-') &&
11818				    (buf[base + 1] == '-') &&
11819				    (buf[base + 2] == '>')) {
11820				    found = 1;
11821				    base += 2;
11822				    break;
11823				}
11824		            }
11825			    if (!found) {
11826#if 0
11827			        fprintf(stderr, "unfinished comment\n");
11828#endif
11829			        break; /* for */
11830		            }
11831		            continue;
11832			}
11833		    }
11834		    if (buf[base] == '"') {
11835		        quote = '"';
11836			continue;
11837		    }
11838		    if (buf[base] == '\'') {
11839		        quote = '\'';
11840			continue;
11841		    }
11842		    if (buf[base] == ']') {
11843#if 0
11844		        fprintf(stderr, "%c%c%c%c: ", buf[base],
11845			        buf[base + 1], buf[base + 2], buf[base + 3]);
11846#endif
11847		        if ((unsigned int) base +1 >= use)
11848			    break;
11849			if (buf[base + 1] == ']') {
11850			    /* conditional crap, skip both ']' ! */
11851			    base++;
11852			    continue;
11853			}
11854		        for (i = 1; (unsigned int) base + i < use; i++) {
11855			    if (buf[base + i] == '>') {
11856#if 0
11857			        fprintf(stderr, "found\n");
11858#endif
11859			        goto found_end_int_subset;
11860			    }
11861			    if (!IS_BLANK_CH(buf[base + i])) {
11862#if 0
11863			        fprintf(stderr, "not found\n");
11864#endif
11865			        goto not_end_of_int_subset;
11866			    }
11867			}
11868#if 0
11869			fprintf(stderr, "end of stream\n");
11870#endif
11871		        break;
11872
11873		    }
11874not_end_of_int_subset:
11875                    continue; /* for */
11876		}
11877		/*
11878		 * We didn't found the end of the Internal subset
11879		 */
11880                if (quote == 0)
11881                    ctxt->checkIndex = base;
11882                else
11883                    ctxt->checkIndex = 0;
11884#ifdef DEBUG_PUSH
11885		if (next == 0)
11886		    xmlGenericError(xmlGenericErrorContext,
11887			    "PP: lookup of int subset end filed\n");
11888#endif
11889	        goto done;
11890
11891found_end_int_subset:
11892                ctxt->checkIndex = 0;
11893		xmlParseInternalSubset(ctxt);
11894		ctxt->inSubset = 2;
11895		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11896		    (ctxt->sax->externalSubset != NULL))
11897		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11898			    ctxt->extSubSystem, ctxt->extSubURI);
11899		ctxt->inSubset = 0;
11900		xmlCleanSpecialAttr(ctxt);
11901		ctxt->instate = XML_PARSER_PROLOG;
11902		ctxt->checkIndex = 0;
11903#ifdef DEBUG_PUSH
11904		xmlGenericError(xmlGenericErrorContext,
11905			"PP: entering PROLOG\n");
11906#endif
11907                break;
11908	    }
11909            case XML_PARSER_COMMENT:
11910		xmlGenericError(xmlGenericErrorContext,
11911			"PP: internal error, state == COMMENT\n");
11912		ctxt->instate = XML_PARSER_CONTENT;
11913#ifdef DEBUG_PUSH
11914		xmlGenericError(xmlGenericErrorContext,
11915			"PP: entering CONTENT\n");
11916#endif
11917		break;
11918            case XML_PARSER_IGNORE:
11919		xmlGenericError(xmlGenericErrorContext,
11920			"PP: internal error, state == IGNORE");
11921	        ctxt->instate = XML_PARSER_DTD;
11922#ifdef DEBUG_PUSH
11923		xmlGenericError(xmlGenericErrorContext,
11924			"PP: entering DTD\n");
11925#endif
11926	        break;
11927            case XML_PARSER_PI:
11928		xmlGenericError(xmlGenericErrorContext,
11929			"PP: internal error, state == PI\n");
11930		ctxt->instate = XML_PARSER_CONTENT;
11931#ifdef DEBUG_PUSH
11932		xmlGenericError(xmlGenericErrorContext,
11933			"PP: entering CONTENT\n");
11934#endif
11935		break;
11936            case XML_PARSER_ENTITY_DECL:
11937		xmlGenericError(xmlGenericErrorContext,
11938			"PP: internal error, state == ENTITY_DECL\n");
11939		ctxt->instate = XML_PARSER_DTD;
11940#ifdef DEBUG_PUSH
11941		xmlGenericError(xmlGenericErrorContext,
11942			"PP: entering DTD\n");
11943#endif
11944		break;
11945            case XML_PARSER_ENTITY_VALUE:
11946		xmlGenericError(xmlGenericErrorContext,
11947			"PP: internal error, state == ENTITY_VALUE\n");
11948		ctxt->instate = XML_PARSER_CONTENT;
11949#ifdef DEBUG_PUSH
11950		xmlGenericError(xmlGenericErrorContext,
11951			"PP: entering DTD\n");
11952#endif
11953		break;
11954            case XML_PARSER_ATTRIBUTE_VALUE:
11955		xmlGenericError(xmlGenericErrorContext,
11956			"PP: internal error, state == ATTRIBUTE_VALUE\n");
11957		ctxt->instate = XML_PARSER_START_TAG;
11958#ifdef DEBUG_PUSH
11959		xmlGenericError(xmlGenericErrorContext,
11960			"PP: entering START_TAG\n");
11961#endif
11962		break;
11963            case XML_PARSER_SYSTEM_LITERAL:
11964		xmlGenericError(xmlGenericErrorContext,
11965			"PP: internal error, state == SYSTEM_LITERAL\n");
11966		ctxt->instate = XML_PARSER_START_TAG;
11967#ifdef DEBUG_PUSH
11968		xmlGenericError(xmlGenericErrorContext,
11969			"PP: entering START_TAG\n");
11970#endif
11971		break;
11972            case XML_PARSER_PUBLIC_LITERAL:
11973		xmlGenericError(xmlGenericErrorContext,
11974			"PP: internal error, state == PUBLIC_LITERAL\n");
11975		ctxt->instate = XML_PARSER_START_TAG;
11976#ifdef DEBUG_PUSH
11977		xmlGenericError(xmlGenericErrorContext,
11978			"PP: entering START_TAG\n");
11979#endif
11980		break;
11981	}
11982    }
11983done:
11984#ifdef DEBUG_PUSH
11985    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11986#endif
11987    return(ret);
11988encoding_error:
11989    {
11990        char buffer[150];
11991
11992	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11993			ctxt->input->cur[0], ctxt->input->cur[1],
11994			ctxt->input->cur[2], ctxt->input->cur[3]);
11995	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11996		     "Input is not proper UTF-8, indicate encoding !\n%s",
11997		     BAD_CAST buffer, NULL);
11998    }
11999    return(0);
12000}
12001
12002/**
12003 * xmlParseCheckTransition:
12004 * @ctxt:  an XML parser context
12005 * @chunk:  a char array
12006 * @size:  the size in byte of the chunk
12007 *
12008 * Check depending on the current parser state if the chunk given must be
12009 * processed immediately or one need more data to advance on parsing.
12010 *
12011 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12012 */
12013static int
12014xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12015    if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12016        return(-1);
12017    if (ctxt->instate == XML_PARSER_START_TAG) {
12018        if (memchr(chunk, '>', size) != NULL)
12019            return(1);
12020        return(0);
12021    }
12022    if (ctxt->progressive == XML_PARSER_COMMENT) {
12023        if (memchr(chunk, '>', size) != NULL)
12024            return(1);
12025        return(0);
12026    }
12027    if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12028        if (memchr(chunk, '>', size) != NULL)
12029            return(1);
12030        return(0);
12031    }
12032    if (ctxt->progressive == XML_PARSER_PI) {
12033        if (memchr(chunk, '>', size) != NULL)
12034            return(1);
12035        return(0);
12036    }
12037    if (ctxt->instate == XML_PARSER_END_TAG) {
12038        if (memchr(chunk, '>', size) != NULL)
12039            return(1);
12040        return(0);
12041    }
12042    if ((ctxt->progressive == XML_PARSER_DTD) ||
12043        (ctxt->instate == XML_PARSER_DTD)) {
12044        if (memchr(chunk, ']', size) != NULL)
12045            return(1);
12046        return(0);
12047    }
12048    return(1);
12049}
12050
12051/**
12052 * xmlParseChunk:
12053 * @ctxt:  an XML parser context
12054 * @chunk:  an char array
12055 * @size:  the size in byte of the chunk
12056 * @terminate:  last chunk indicator
12057 *
12058 * Parse a Chunk of memory
12059 *
12060 * Returns zero if no error, the xmlParserErrors otherwise.
12061 */
12062int
12063xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12064              int terminate) {
12065    int end_in_lf = 0;
12066    int remain = 0;
12067    size_t old_avail = 0;
12068    size_t avail = 0;
12069
12070    if (ctxt == NULL)
12071        return(XML_ERR_INTERNAL_ERROR);
12072    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12073        return(ctxt->errNo);
12074    if (ctxt->instate == XML_PARSER_EOF)
12075        return(-1);
12076    if (ctxt->instate == XML_PARSER_START)
12077        xmlDetectSAX2(ctxt);
12078    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12079        (chunk[size - 1] == '\r')) {
12080	end_in_lf = 1;
12081	size--;
12082    }
12083
12084xmldecl_done:
12085
12086    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12087        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12088	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12089	size_t cur = ctxt->input->cur - ctxt->input->base;
12090	int res;
12091
12092        old_avail = xmlBufUse(ctxt->input->buf->buffer);
12093        /*
12094         * Specific handling if we autodetected an encoding, we should not
12095         * push more than the first line ... which depend on the encoding
12096         * And only push the rest once the final encoding was detected
12097         */
12098        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12099            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12100            unsigned int len = 45;
12101
12102            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12103                               BAD_CAST "UTF-16")) ||
12104                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12105                               BAD_CAST "UTF16")))
12106                len = 90;
12107            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12108                                    BAD_CAST "UCS-4")) ||
12109                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12110                                    BAD_CAST "UCS4")))
12111                len = 180;
12112
12113            if (ctxt->input->buf->rawconsumed < len)
12114                len -= ctxt->input->buf->rawconsumed;
12115
12116            /*
12117             * Change size for reading the initial declaration only
12118             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12119             * will blindly copy extra bytes from memory.
12120             */
12121            if ((unsigned int) size > len) {
12122                remain = size - len;
12123                size = len;
12124            } else {
12125                remain = 0;
12126            }
12127        }
12128	res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12129	if (res < 0) {
12130	    ctxt->errNo = XML_PARSER_EOF;
12131	    ctxt->disableSAX = 1;
12132	    return (XML_PARSER_EOF);
12133	}
12134        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12135#ifdef DEBUG_PUSH
12136	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12137#endif
12138
12139    } else if (ctxt->instate != XML_PARSER_EOF) {
12140	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12141	    xmlParserInputBufferPtr in = ctxt->input->buf;
12142	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
12143		    (in->raw != NULL)) {
12144		int nbchars;
12145
12146		nbchars = xmlCharEncInput(in);
12147		if (nbchars < 0) {
12148		    /* TODO 2.6.0 */
12149		    xmlGenericError(xmlGenericErrorContext,
12150				    "xmlParseChunk: encoder error\n");
12151		    return(XML_ERR_INVALID_ENCODING);
12152		}
12153	    }
12154	}
12155    }
12156    if (remain != 0) {
12157        xmlParseTryOrFinish(ctxt, 0);
12158    } else {
12159        if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12160            avail = xmlBufUse(ctxt->input->buf->buffer);
12161        /*
12162         * Depending on the current state it may not be such
12163         * a good idea to try parsing if there is nothing in the chunk
12164         * which would be worth doing a parser state transition and we
12165         * need to wait for more data
12166         */
12167        if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12168            (old_avail == 0) || (avail == 0) ||
12169            (xmlParseCheckTransition(ctxt,
12170                       (const char *)&ctxt->input->base[old_avail],
12171                                     avail - old_avail)))
12172            xmlParseTryOrFinish(ctxt, terminate);
12173    }
12174    if ((ctxt->input != NULL) &&
12175         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12176         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12177        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12178        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12179        ctxt->instate = XML_PARSER_EOF;
12180    }
12181    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12182        return(ctxt->errNo);
12183
12184    if (remain != 0) {
12185        chunk += size;
12186        size = remain;
12187        remain = 0;
12188        goto xmldecl_done;
12189    }
12190    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12191        (ctxt->input->buf != NULL)) {
12192	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12193    }
12194    if (terminate) {
12195	/*
12196	 * Check for termination
12197	 */
12198	int cur_avail = 0;
12199
12200	if (ctxt->input != NULL) {
12201	    if (ctxt->input->buf == NULL)
12202		cur_avail = ctxt->input->length -
12203			    (ctxt->input->cur - ctxt->input->base);
12204	    else
12205		cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12206			              (ctxt->input->cur - ctxt->input->base);
12207	}
12208
12209	if ((ctxt->instate != XML_PARSER_EOF) &&
12210	    (ctxt->instate != XML_PARSER_EPILOG)) {
12211	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12212	}
12213	if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12214	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12215	}
12216	if (ctxt->instate != XML_PARSER_EOF) {
12217	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12218		ctxt->sax->endDocument(ctxt->userData);
12219	}
12220	ctxt->instate = XML_PARSER_EOF;
12221    }
12222    return((xmlParserErrors) ctxt->errNo);
12223}
12224
12225/************************************************************************
12226 *									*
12227 *		I/O front end functions to the parser			*
12228 *									*
12229 ************************************************************************/
12230
12231/**
12232 * xmlCreatePushParserCtxt:
12233 * @sax:  a SAX handler
12234 * @user_data:  The user data returned on SAX callbacks
12235 * @chunk:  a pointer to an array of chars
12236 * @size:  number of chars in the array
12237 * @filename:  an optional file name or URI
12238 *
12239 * Create a parser context for using the XML parser in push mode.
12240 * If @buffer and @size are non-NULL, the data is used to detect
12241 * the encoding.  The remaining characters will be parsed so they
12242 * don't need to be fed in again through xmlParseChunk.
12243 * To allow content encoding detection, @size should be >= 4
12244 * The value of @filename is used for fetching external entities
12245 * and error/warning reports.
12246 *
12247 * Returns the new parser context or NULL
12248 */
12249
12250xmlParserCtxtPtr
12251xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12252                        const char *chunk, int size, const char *filename) {
12253    xmlParserCtxtPtr ctxt;
12254    xmlParserInputPtr inputStream;
12255    xmlParserInputBufferPtr buf;
12256    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12257
12258    /*
12259     * plug some encoding conversion routines
12260     */
12261    if ((chunk != NULL) && (size >= 4))
12262	enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12263
12264    buf = xmlAllocParserInputBuffer(enc);
12265    if (buf == NULL) return(NULL);
12266
12267    ctxt = xmlNewParserCtxt();
12268    if (ctxt == NULL) {
12269        xmlErrMemory(NULL, "creating parser: out of memory\n");
12270	xmlFreeParserInputBuffer(buf);
12271	return(NULL);
12272    }
12273    ctxt->dictNames = 1;
12274    ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12275    if (ctxt->pushTab == NULL) {
12276        xmlErrMemory(ctxt, NULL);
12277	xmlFreeParserInputBuffer(buf);
12278	xmlFreeParserCtxt(ctxt);
12279	return(NULL);
12280    }
12281    if (sax != NULL) {
12282#ifdef LIBXML_SAX1_ENABLED
12283	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12284#endif /* LIBXML_SAX1_ENABLED */
12285	    xmlFree(ctxt->sax);
12286	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12287	if (ctxt->sax == NULL) {
12288	    xmlErrMemory(ctxt, NULL);
12289	    xmlFreeParserInputBuffer(buf);
12290	    xmlFreeParserCtxt(ctxt);
12291	    return(NULL);
12292	}
12293	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12294	if (sax->initialized == XML_SAX2_MAGIC)
12295	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12296	else
12297	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12298	if (user_data != NULL)
12299	    ctxt->userData = user_data;
12300    }
12301    if (filename == NULL) {
12302	ctxt->directory = NULL;
12303    } else {
12304        ctxt->directory = xmlParserGetDirectory(filename);
12305    }
12306
12307    inputStream = xmlNewInputStream(ctxt);
12308    if (inputStream == NULL) {
12309	xmlFreeParserCtxt(ctxt);
12310	xmlFreeParserInputBuffer(buf);
12311	return(NULL);
12312    }
12313
12314    if (filename == NULL)
12315	inputStream->filename = NULL;
12316    else {
12317	inputStream->filename = (char *)
12318	    xmlCanonicPath((const xmlChar *) filename);
12319	if (inputStream->filename == NULL) {
12320	    xmlFreeParserCtxt(ctxt);
12321	    xmlFreeParserInputBuffer(buf);
12322	    return(NULL);
12323	}
12324    }
12325    inputStream->buf = buf;
12326    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12327    inputPush(ctxt, inputStream);
12328
12329    /*
12330     * If the caller didn't provide an initial 'chunk' for determining
12331     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12332     * that it can be automatically determined later
12333     */
12334    if ((size == 0) || (chunk == NULL)) {
12335	ctxt->charset = XML_CHAR_ENCODING_NONE;
12336    } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12337	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12338	size_t cur = ctxt->input->cur - ctxt->input->base;
12339
12340	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12341
12342        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12343#ifdef DEBUG_PUSH
12344	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12345#endif
12346    }
12347
12348    if (enc != XML_CHAR_ENCODING_NONE) {
12349        xmlSwitchEncoding(ctxt, enc);
12350    }
12351
12352    return(ctxt);
12353}
12354#endif /* LIBXML_PUSH_ENABLED */
12355
12356/**
12357 * xmlStopParser:
12358 * @ctxt:  an XML parser context
12359 *
12360 * Blocks further parser processing
12361 */
12362void
12363xmlStopParser(xmlParserCtxtPtr ctxt) {
12364    if (ctxt == NULL)
12365        return;
12366    ctxt->instate = XML_PARSER_EOF;
12367    ctxt->disableSAX = 1;
12368    if (ctxt->input != NULL) {
12369	ctxt->input->cur = BAD_CAST"";
12370	ctxt->input->base = ctxt->input->cur;
12371    }
12372}
12373
12374/**
12375 * xmlCreateIOParserCtxt:
12376 * @sax:  a SAX handler
12377 * @user_data:  The user data returned on SAX callbacks
12378 * @ioread:  an I/O read function
12379 * @ioclose:  an I/O close function
12380 * @ioctx:  an I/O handler
12381 * @enc:  the charset encoding if known
12382 *
12383 * Create a parser context for using the XML parser with an existing
12384 * I/O stream
12385 *
12386 * Returns the new parser context or NULL
12387 */
12388xmlParserCtxtPtr
12389xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12390	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12391	void *ioctx, xmlCharEncoding enc) {
12392    xmlParserCtxtPtr ctxt;
12393    xmlParserInputPtr inputStream;
12394    xmlParserInputBufferPtr buf;
12395
12396    if (ioread == NULL) return(NULL);
12397
12398    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12399    if (buf == NULL) {
12400        if (ioclose != NULL)
12401            ioclose(ioctx);
12402        return (NULL);
12403    }
12404
12405    ctxt = xmlNewParserCtxt();
12406    if (ctxt == NULL) {
12407	xmlFreeParserInputBuffer(buf);
12408	return(NULL);
12409    }
12410    if (sax != NULL) {
12411#ifdef LIBXML_SAX1_ENABLED
12412	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12413#endif /* LIBXML_SAX1_ENABLED */
12414	    xmlFree(ctxt->sax);
12415	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12416	if (ctxt->sax == NULL) {
12417	    xmlErrMemory(ctxt, NULL);
12418	    xmlFreeParserCtxt(ctxt);
12419	    return(NULL);
12420	}
12421	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12422	if (sax->initialized == XML_SAX2_MAGIC)
12423	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12424	else
12425	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12426	if (user_data != NULL)
12427	    ctxt->userData = user_data;
12428    }
12429
12430    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12431    if (inputStream == NULL) {
12432	xmlFreeParserCtxt(ctxt);
12433	return(NULL);
12434    }
12435    inputPush(ctxt, inputStream);
12436
12437    return(ctxt);
12438}
12439
12440#ifdef LIBXML_VALID_ENABLED
12441/************************************************************************
12442 *									*
12443 *		Front ends when parsing a DTD				*
12444 *									*
12445 ************************************************************************/
12446
12447/**
12448 * xmlIOParseDTD:
12449 * @sax:  the SAX handler block or NULL
12450 * @input:  an Input Buffer
12451 * @enc:  the charset encoding if known
12452 *
12453 * Load and parse a DTD
12454 *
12455 * Returns the resulting xmlDtdPtr or NULL in case of error.
12456 * @input will be freed by the function in any case.
12457 */
12458
12459xmlDtdPtr
12460xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12461	      xmlCharEncoding enc) {
12462    xmlDtdPtr ret = NULL;
12463    xmlParserCtxtPtr ctxt;
12464    xmlParserInputPtr pinput = NULL;
12465    xmlChar start[4];
12466
12467    if (input == NULL)
12468	return(NULL);
12469
12470    ctxt = xmlNewParserCtxt();
12471    if (ctxt == NULL) {
12472        xmlFreeParserInputBuffer(input);
12473	return(NULL);
12474    }
12475
12476    /*
12477     * Set-up the SAX context
12478     */
12479    if (sax != NULL) {
12480	if (ctxt->sax != NULL)
12481	    xmlFree(ctxt->sax);
12482        ctxt->sax = sax;
12483        ctxt->userData = ctxt;
12484    }
12485    xmlDetectSAX2(ctxt);
12486
12487    /*
12488     * generate a parser input from the I/O handler
12489     */
12490
12491    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12492    if (pinput == NULL) {
12493        if (sax != NULL) ctxt->sax = NULL;
12494        xmlFreeParserInputBuffer(input);
12495	xmlFreeParserCtxt(ctxt);
12496	return(NULL);
12497    }
12498
12499    /*
12500     * plug some encoding conversion routines here.
12501     */
12502    if (xmlPushInput(ctxt, pinput) < 0) {
12503        if (sax != NULL) ctxt->sax = NULL;
12504	xmlFreeParserCtxt(ctxt);
12505	return(NULL);
12506    }
12507    if (enc != XML_CHAR_ENCODING_NONE) {
12508        xmlSwitchEncoding(ctxt, enc);
12509    }
12510
12511    pinput->filename = NULL;
12512    pinput->line = 1;
12513    pinput->col = 1;
12514    pinput->base = ctxt->input->cur;
12515    pinput->cur = ctxt->input->cur;
12516    pinput->free = NULL;
12517
12518    /*
12519     * let's parse that entity knowing it's an external subset.
12520     */
12521    ctxt->inSubset = 2;
12522    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12523    if (ctxt->myDoc == NULL) {
12524	xmlErrMemory(ctxt, "New Doc failed");
12525	return(NULL);
12526    }
12527    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12528    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12529	                               BAD_CAST "none", BAD_CAST "none");
12530
12531    if ((enc == XML_CHAR_ENCODING_NONE) &&
12532        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12533	/*
12534	 * Get the 4 first bytes and decode the charset
12535	 * if enc != XML_CHAR_ENCODING_NONE
12536	 * plug some encoding conversion routines.
12537	 */
12538	start[0] = RAW;
12539	start[1] = NXT(1);
12540	start[2] = NXT(2);
12541	start[3] = NXT(3);
12542	enc = xmlDetectCharEncoding(start, 4);
12543	if (enc != XML_CHAR_ENCODING_NONE) {
12544	    xmlSwitchEncoding(ctxt, enc);
12545	}
12546    }
12547
12548    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12549
12550    if (ctxt->myDoc != NULL) {
12551	if (ctxt->wellFormed) {
12552	    ret = ctxt->myDoc->extSubset;
12553	    ctxt->myDoc->extSubset = NULL;
12554	    if (ret != NULL) {
12555		xmlNodePtr tmp;
12556
12557		ret->doc = NULL;
12558		tmp = ret->children;
12559		while (tmp != NULL) {
12560		    tmp->doc = NULL;
12561		    tmp = tmp->next;
12562		}
12563	    }
12564	} else {
12565	    ret = NULL;
12566	}
12567        xmlFreeDoc(ctxt->myDoc);
12568        ctxt->myDoc = NULL;
12569    }
12570    if (sax != NULL) ctxt->sax = NULL;
12571    xmlFreeParserCtxt(ctxt);
12572
12573    return(ret);
12574}
12575
12576/**
12577 * xmlSAXParseDTD:
12578 * @sax:  the SAX handler block
12579 * @ExternalID:  a NAME* containing the External ID of the DTD
12580 * @SystemID:  a NAME* containing the URL to the DTD
12581 *
12582 * Load and parse an external subset.
12583 *
12584 * Returns the resulting xmlDtdPtr or NULL in case of error.
12585 */
12586
12587xmlDtdPtr
12588xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12589                          const xmlChar *SystemID) {
12590    xmlDtdPtr ret = NULL;
12591    xmlParserCtxtPtr ctxt;
12592    xmlParserInputPtr input = NULL;
12593    xmlCharEncoding enc;
12594    xmlChar* systemIdCanonic;
12595
12596    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12597
12598    ctxt = xmlNewParserCtxt();
12599    if (ctxt == NULL) {
12600	return(NULL);
12601    }
12602
12603    /*
12604     * Set-up the SAX context
12605     */
12606    if (sax != NULL) {
12607	if (ctxt->sax != NULL)
12608	    xmlFree(ctxt->sax);
12609        ctxt->sax = sax;
12610        ctxt->userData = ctxt;
12611    }
12612
12613    /*
12614     * Canonicalise the system ID
12615     */
12616    systemIdCanonic = xmlCanonicPath(SystemID);
12617    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12618	xmlFreeParserCtxt(ctxt);
12619	return(NULL);
12620    }
12621
12622    /*
12623     * Ask the Entity resolver to load the damn thing
12624     */
12625
12626    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12627	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12628	                                 systemIdCanonic);
12629    if (input == NULL) {
12630        if (sax != NULL) ctxt->sax = NULL;
12631	xmlFreeParserCtxt(ctxt);
12632	if (systemIdCanonic != NULL)
12633	    xmlFree(systemIdCanonic);
12634	return(NULL);
12635    }
12636
12637    /*
12638     * plug some encoding conversion routines here.
12639     */
12640    if (xmlPushInput(ctxt, input) < 0) {
12641        if (sax != NULL) ctxt->sax = NULL;
12642	xmlFreeParserCtxt(ctxt);
12643	if (systemIdCanonic != NULL)
12644	    xmlFree(systemIdCanonic);
12645	return(NULL);
12646    }
12647    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12648	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12649	xmlSwitchEncoding(ctxt, enc);
12650    }
12651
12652    if (input->filename == NULL)
12653	input->filename = (char *) systemIdCanonic;
12654    else
12655	xmlFree(systemIdCanonic);
12656    input->line = 1;
12657    input->col = 1;
12658    input->base = ctxt->input->cur;
12659    input->cur = ctxt->input->cur;
12660    input->free = NULL;
12661
12662    /*
12663     * let's parse that entity knowing it's an external subset.
12664     */
12665    ctxt->inSubset = 2;
12666    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12667    if (ctxt->myDoc == NULL) {
12668	xmlErrMemory(ctxt, "New Doc failed");
12669        if (sax != NULL) ctxt->sax = NULL;
12670	xmlFreeParserCtxt(ctxt);
12671	return(NULL);
12672    }
12673    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12674    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12675	                               ExternalID, SystemID);
12676    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12677
12678    if (ctxt->myDoc != NULL) {
12679	if (ctxt->wellFormed) {
12680	    ret = ctxt->myDoc->extSubset;
12681	    ctxt->myDoc->extSubset = NULL;
12682	    if (ret != NULL) {
12683		xmlNodePtr tmp;
12684
12685		ret->doc = NULL;
12686		tmp = ret->children;
12687		while (tmp != NULL) {
12688		    tmp->doc = NULL;
12689		    tmp = tmp->next;
12690		}
12691	    }
12692	} else {
12693	    ret = NULL;
12694	}
12695        xmlFreeDoc(ctxt->myDoc);
12696        ctxt->myDoc = NULL;
12697    }
12698    if (sax != NULL) ctxt->sax = NULL;
12699    xmlFreeParserCtxt(ctxt);
12700
12701    return(ret);
12702}
12703
12704
12705/**
12706 * xmlParseDTD:
12707 * @ExternalID:  a NAME* containing the External ID of the DTD
12708 * @SystemID:  a NAME* containing the URL to the DTD
12709 *
12710 * Load and parse an external subset.
12711 *
12712 * Returns the resulting xmlDtdPtr or NULL in case of error.
12713 */
12714
12715xmlDtdPtr
12716xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12717    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12718}
12719#endif /* LIBXML_VALID_ENABLED */
12720
12721/************************************************************************
12722 *									*
12723 *		Front ends when parsing an Entity			*
12724 *									*
12725 ************************************************************************/
12726
12727/**
12728 * xmlParseCtxtExternalEntity:
12729 * @ctx:  the existing parsing context
12730 * @URL:  the URL for the entity to load
12731 * @ID:  the System ID for the entity to load
12732 * @lst:  the return value for the set of parsed nodes
12733 *
12734 * Parse an external general entity within an existing parsing context
12735 * An external general parsed entity is well-formed if it matches the
12736 * production labeled extParsedEnt.
12737 *
12738 * [78] extParsedEnt ::= TextDecl? content
12739 *
12740 * Returns 0 if the entity is well formed, -1 in case of args problem and
12741 *    the parser error code otherwise
12742 */
12743
12744int
12745xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12746	               const xmlChar *ID, xmlNodePtr *lst) {
12747    xmlParserCtxtPtr ctxt;
12748    xmlDocPtr newDoc;
12749    xmlNodePtr newRoot;
12750    xmlSAXHandlerPtr oldsax = NULL;
12751    int ret = 0;
12752    xmlChar start[4];
12753    xmlCharEncoding enc;
12754
12755    if (ctx == NULL) return(-1);
12756
12757    if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12758        (ctx->depth > 1024)) {
12759	return(XML_ERR_ENTITY_LOOP);
12760    }
12761
12762    if (lst != NULL)
12763        *lst = NULL;
12764    if ((URL == NULL) && (ID == NULL))
12765	return(-1);
12766    if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12767	return(-1);
12768
12769    ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12770    if (ctxt == NULL) {
12771	return(-1);
12772    }
12773
12774    oldsax = ctxt->sax;
12775    ctxt->sax = ctx->sax;
12776    xmlDetectSAX2(ctxt);
12777    newDoc = xmlNewDoc(BAD_CAST "1.0");
12778    if (newDoc == NULL) {
12779	xmlFreeParserCtxt(ctxt);
12780	return(-1);
12781    }
12782    newDoc->properties = XML_DOC_INTERNAL;
12783    if (ctx->myDoc->dict) {
12784	newDoc->dict = ctx->myDoc->dict;
12785	xmlDictReference(newDoc->dict);
12786    }
12787    if (ctx->myDoc != NULL) {
12788	newDoc->intSubset = ctx->myDoc->intSubset;
12789	newDoc->extSubset = ctx->myDoc->extSubset;
12790    }
12791    if (ctx->myDoc->URL != NULL) {
12792	newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12793    }
12794    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12795    if (newRoot == NULL) {
12796	ctxt->sax = oldsax;
12797	xmlFreeParserCtxt(ctxt);
12798	newDoc->intSubset = NULL;
12799	newDoc->extSubset = NULL;
12800        xmlFreeDoc(newDoc);
12801	return(-1);
12802    }
12803    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12804    nodePush(ctxt, newDoc->children);
12805    if (ctx->myDoc == NULL) {
12806	ctxt->myDoc = newDoc;
12807    } else {
12808	ctxt->myDoc = ctx->myDoc;
12809	newDoc->children->doc = ctx->myDoc;
12810    }
12811
12812    /*
12813     * Get the 4 first bytes and decode the charset
12814     * if enc != XML_CHAR_ENCODING_NONE
12815     * plug some encoding conversion routines.
12816     */
12817    GROW
12818    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12819	start[0] = RAW;
12820	start[1] = NXT(1);
12821	start[2] = NXT(2);
12822	start[3] = NXT(3);
12823	enc = xmlDetectCharEncoding(start, 4);
12824	if (enc != XML_CHAR_ENCODING_NONE) {
12825	    xmlSwitchEncoding(ctxt, enc);
12826	}
12827    }
12828
12829    /*
12830     * Parse a possible text declaration first
12831     */
12832    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12833	xmlParseTextDecl(ctxt);
12834	/*
12835	 * An XML-1.0 document can't reference an entity not XML-1.0
12836	 */
12837	if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12838	    (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12839	    xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12840	                   "Version mismatch between document and entity\n");
12841	}
12842    }
12843
12844    /*
12845     * If the user provided its own SAX callbacks then reuse the
12846     * useData callback field, otherwise the expected setup in a
12847     * DOM builder is to have userData == ctxt
12848     */
12849    if (ctx->userData == ctx)
12850        ctxt->userData = ctxt;
12851    else
12852        ctxt->userData = ctx->userData;
12853
12854    /*
12855     * Doing validity checking on chunk doesn't make sense
12856     */
12857    ctxt->instate = XML_PARSER_CONTENT;
12858    ctxt->validate = ctx->validate;
12859    ctxt->valid = ctx->valid;
12860    ctxt->loadsubset = ctx->loadsubset;
12861    ctxt->depth = ctx->depth + 1;
12862    ctxt->replaceEntities = ctx->replaceEntities;
12863    if (ctxt->validate) {
12864	ctxt->vctxt.error = ctx->vctxt.error;
12865	ctxt->vctxt.warning = ctx->vctxt.warning;
12866    } else {
12867	ctxt->vctxt.error = NULL;
12868	ctxt->vctxt.warning = NULL;
12869    }
12870    ctxt->vctxt.nodeTab = NULL;
12871    ctxt->vctxt.nodeNr = 0;
12872    ctxt->vctxt.nodeMax = 0;
12873    ctxt->vctxt.node = NULL;
12874    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12875    ctxt->dict = ctx->dict;
12876    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12877    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12878    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12879    ctxt->dictNames = ctx->dictNames;
12880    ctxt->attsDefault = ctx->attsDefault;
12881    ctxt->attsSpecial = ctx->attsSpecial;
12882    ctxt->linenumbers = ctx->linenumbers;
12883
12884    xmlParseContent(ctxt);
12885
12886    ctx->validate = ctxt->validate;
12887    ctx->valid = ctxt->valid;
12888    if ((RAW == '<') && (NXT(1) == '/')) {
12889	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12890    } else if (RAW != 0) {
12891	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12892    }
12893    if (ctxt->node != newDoc->children) {
12894	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12895    }
12896
12897    if (!ctxt->wellFormed) {
12898        if (ctxt->errNo == 0)
12899	    ret = 1;
12900	else
12901	    ret = ctxt->errNo;
12902    } else {
12903	if (lst != NULL) {
12904	    xmlNodePtr cur;
12905
12906	    /*
12907	     * Return the newly created nodeset after unlinking it from
12908	     * they pseudo parent.
12909	     */
12910	    cur = newDoc->children->children;
12911	    *lst = cur;
12912	    while (cur != NULL) {
12913		cur->parent = NULL;
12914		cur = cur->next;
12915	    }
12916            newDoc->children->children = NULL;
12917	}
12918	ret = 0;
12919    }
12920    ctxt->sax = oldsax;
12921    ctxt->dict = NULL;
12922    ctxt->attsDefault = NULL;
12923    ctxt->attsSpecial = NULL;
12924    xmlFreeParserCtxt(ctxt);
12925    newDoc->intSubset = NULL;
12926    newDoc->extSubset = NULL;
12927    xmlFreeDoc(newDoc);
12928
12929    return(ret);
12930}
12931
12932/**
12933 * xmlParseExternalEntityPrivate:
12934 * @doc:  the document the chunk pertains to
12935 * @oldctxt:  the previous parser context if available
12936 * @sax:  the SAX handler bloc (possibly NULL)
12937 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12938 * @depth:  Used for loop detection, use 0
12939 * @URL:  the URL for the entity to load
12940 * @ID:  the System ID for the entity to load
12941 * @list:  the return value for the set of parsed nodes
12942 *
12943 * Private version of xmlParseExternalEntity()
12944 *
12945 * Returns 0 if the entity is well formed, -1 in case of args problem and
12946 *    the parser error code otherwise
12947 */
12948
12949static xmlParserErrors
12950xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12951	              xmlSAXHandlerPtr sax,
12952		      void *user_data, int depth, const xmlChar *URL,
12953		      const xmlChar *ID, xmlNodePtr *list) {
12954    xmlParserCtxtPtr ctxt;
12955    xmlDocPtr newDoc;
12956    xmlNodePtr newRoot;
12957    xmlSAXHandlerPtr oldsax = NULL;
12958    xmlParserErrors ret = XML_ERR_OK;
12959    xmlChar start[4];
12960    xmlCharEncoding enc;
12961
12962    if (((depth > 40) &&
12963	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12964	(depth > 1024)) {
12965	return(XML_ERR_ENTITY_LOOP);
12966    }
12967
12968    if (list != NULL)
12969        *list = NULL;
12970    if ((URL == NULL) && (ID == NULL))
12971	return(XML_ERR_INTERNAL_ERROR);
12972    if (doc == NULL)
12973	return(XML_ERR_INTERNAL_ERROR);
12974
12975
12976    ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
12977    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12978    ctxt->userData = ctxt;
12979    if (oldctxt != NULL) {
12980	ctxt->_private = oldctxt->_private;
12981	ctxt->loadsubset = oldctxt->loadsubset;
12982	ctxt->validate = oldctxt->validate;
12983	ctxt->external = oldctxt->external;
12984	ctxt->record_info = oldctxt->record_info;
12985	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12986	ctxt->node_seq.length = oldctxt->node_seq.length;
12987	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12988    } else {
12989	/*
12990	 * Doing validity checking on chunk without context
12991	 * doesn't make sense
12992	 */
12993	ctxt->_private = NULL;
12994	ctxt->validate = 0;
12995	ctxt->external = 2;
12996	ctxt->loadsubset = 0;
12997    }
12998    if (sax != NULL) {
12999	oldsax = ctxt->sax;
13000        ctxt->sax = sax;
13001	if (user_data != NULL)
13002	    ctxt->userData = user_data;
13003    }
13004    xmlDetectSAX2(ctxt);
13005    newDoc = xmlNewDoc(BAD_CAST "1.0");
13006    if (newDoc == NULL) {
13007	ctxt->node_seq.maximum = 0;
13008	ctxt->node_seq.length = 0;
13009	ctxt->node_seq.buffer = NULL;
13010	xmlFreeParserCtxt(ctxt);
13011	return(XML_ERR_INTERNAL_ERROR);
13012    }
13013    newDoc->properties = XML_DOC_INTERNAL;
13014    newDoc->intSubset = doc->intSubset;
13015    newDoc->extSubset = doc->extSubset;
13016    newDoc->dict = doc->dict;
13017    xmlDictReference(newDoc->dict);
13018
13019    if (doc->URL != NULL) {
13020	newDoc->URL = xmlStrdup(doc->URL);
13021    }
13022    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13023    if (newRoot == NULL) {
13024	if (sax != NULL)
13025	    ctxt->sax = oldsax;
13026	ctxt->node_seq.maximum = 0;
13027	ctxt->node_seq.length = 0;
13028	ctxt->node_seq.buffer = NULL;
13029	xmlFreeParserCtxt(ctxt);
13030	newDoc->intSubset = NULL;
13031	newDoc->extSubset = NULL;
13032        xmlFreeDoc(newDoc);
13033	return(XML_ERR_INTERNAL_ERROR);
13034    }
13035    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13036    nodePush(ctxt, newDoc->children);
13037    ctxt->myDoc = doc;
13038    newRoot->doc = doc;
13039
13040    /*
13041     * Get the 4 first bytes and decode the charset
13042     * if enc != XML_CHAR_ENCODING_NONE
13043     * plug some encoding conversion routines.
13044     */
13045    GROW;
13046    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13047	start[0] = RAW;
13048	start[1] = NXT(1);
13049	start[2] = NXT(2);
13050	start[3] = NXT(3);
13051	enc = xmlDetectCharEncoding(start, 4);
13052	if (enc != XML_CHAR_ENCODING_NONE) {
13053	    xmlSwitchEncoding(ctxt, enc);
13054	}
13055    }
13056
13057    /*
13058     * Parse a possible text declaration first
13059     */
13060    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13061	xmlParseTextDecl(ctxt);
13062    }
13063
13064    ctxt->instate = XML_PARSER_CONTENT;
13065    ctxt->depth = depth;
13066
13067    xmlParseContent(ctxt);
13068
13069    if ((RAW == '<') && (NXT(1) == '/')) {
13070	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13071    } else if (RAW != 0) {
13072	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13073    }
13074    if (ctxt->node != newDoc->children) {
13075	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13076    }
13077
13078    if (!ctxt->wellFormed) {
13079        if (ctxt->errNo == 0)
13080	    ret = XML_ERR_INTERNAL_ERROR;
13081	else
13082	    ret = (xmlParserErrors)ctxt->errNo;
13083    } else {
13084	if (list != NULL) {
13085	    xmlNodePtr cur;
13086
13087	    /*
13088	     * Return the newly created nodeset after unlinking it from
13089	     * they pseudo parent.
13090	     */
13091	    cur = newDoc->children->children;
13092	    *list = cur;
13093	    while (cur != NULL) {
13094		cur->parent = NULL;
13095		cur = cur->next;
13096	    }
13097            newDoc->children->children = NULL;
13098	}
13099	ret = XML_ERR_OK;
13100    }
13101
13102    /*
13103     * Record in the parent context the number of entities replacement
13104     * done when parsing that reference.
13105     */
13106    if (oldctxt != NULL)
13107        oldctxt->nbentities += ctxt->nbentities;
13108
13109    /*
13110     * Also record the size of the entity parsed
13111     */
13112    if (ctxt->input != NULL) {
13113	oldctxt->sizeentities += ctxt->input->consumed;
13114	oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13115    }
13116    /*
13117     * And record the last error if any
13118     */
13119    if (ctxt->lastError.code != XML_ERR_OK)
13120        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13121
13122    if (sax != NULL)
13123	ctxt->sax = oldsax;
13124    oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13125    oldctxt->node_seq.length = ctxt->node_seq.length;
13126    oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13127    ctxt->node_seq.maximum = 0;
13128    ctxt->node_seq.length = 0;
13129    ctxt->node_seq.buffer = NULL;
13130    xmlFreeParserCtxt(ctxt);
13131    newDoc->intSubset = NULL;
13132    newDoc->extSubset = NULL;
13133    xmlFreeDoc(newDoc);
13134
13135    return(ret);
13136}
13137
13138#ifdef LIBXML_SAX1_ENABLED
13139/**
13140 * xmlParseExternalEntity:
13141 * @doc:  the document the chunk pertains to
13142 * @sax:  the SAX handler bloc (possibly NULL)
13143 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13144 * @depth:  Used for loop detection, use 0
13145 * @URL:  the URL for the entity to load
13146 * @ID:  the System ID for the entity to load
13147 * @lst:  the return value for the set of parsed nodes
13148 *
13149 * Parse an external general entity
13150 * An external general parsed entity is well-formed if it matches the
13151 * production labeled extParsedEnt.
13152 *
13153 * [78] extParsedEnt ::= TextDecl? content
13154 *
13155 * Returns 0 if the entity is well formed, -1 in case of args problem and
13156 *    the parser error code otherwise
13157 */
13158
13159int
13160xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13161	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13162    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13163		                       ID, lst));
13164}
13165
13166/**
13167 * xmlParseBalancedChunkMemory:
13168 * @doc:  the document the chunk pertains to
13169 * @sax:  the SAX handler bloc (possibly NULL)
13170 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13171 * @depth:  Used for loop detection, use 0
13172 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13173 * @lst:  the return value for the set of parsed nodes
13174 *
13175 * Parse a well-balanced chunk of an XML document
13176 * called by the parser
13177 * The allowed sequence for the Well Balanced Chunk is the one defined by
13178 * the content production in the XML grammar:
13179 *
13180 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13181 *
13182 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13183 *    the parser error code otherwise
13184 */
13185
13186int
13187xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13188     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13189    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13190                                                depth, string, lst, 0 );
13191}
13192#endif /* LIBXML_SAX1_ENABLED */
13193
13194/**
13195 * xmlParseBalancedChunkMemoryInternal:
13196 * @oldctxt:  the existing parsing context
13197 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13198 * @user_data:  the user data field for the parser context
13199 * @lst:  the return value for the set of parsed nodes
13200 *
13201 *
13202 * Parse a well-balanced chunk of an XML document
13203 * called by the parser
13204 * The allowed sequence for the Well Balanced Chunk is the one defined by
13205 * the content production in the XML grammar:
13206 *
13207 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13208 *
13209 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13210 * error code otherwise
13211 *
13212 * In case recover is set to 1, the nodelist will not be empty even if
13213 * the parsed chunk is not well balanced.
13214 */
13215static xmlParserErrors
13216xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13217	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13218    xmlParserCtxtPtr ctxt;
13219    xmlDocPtr newDoc = NULL;
13220    xmlNodePtr newRoot;
13221    xmlSAXHandlerPtr oldsax = NULL;
13222    xmlNodePtr content = NULL;
13223    xmlNodePtr last = NULL;
13224    int size;
13225    xmlParserErrors ret = XML_ERR_OK;
13226#ifdef SAX2
13227    int i;
13228#endif
13229
13230    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13231        (oldctxt->depth >  1024)) {
13232	return(XML_ERR_ENTITY_LOOP);
13233    }
13234
13235
13236    if (lst != NULL)
13237        *lst = NULL;
13238    if (string == NULL)
13239        return(XML_ERR_INTERNAL_ERROR);
13240
13241    size = xmlStrlen(string);
13242
13243    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13244    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13245    if (user_data != NULL)
13246	ctxt->userData = user_data;
13247    else
13248	ctxt->userData = ctxt;
13249    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13250    ctxt->dict = oldctxt->dict;
13251    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13252    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13253    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13254
13255#ifdef SAX2
13256    /* propagate namespaces down the entity */
13257    for (i = 0;i < oldctxt->nsNr;i += 2) {
13258        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13259    }
13260#endif
13261
13262    oldsax = ctxt->sax;
13263    ctxt->sax = oldctxt->sax;
13264    xmlDetectSAX2(ctxt);
13265    ctxt->replaceEntities = oldctxt->replaceEntities;
13266    ctxt->options = oldctxt->options;
13267
13268    ctxt->_private = oldctxt->_private;
13269    if (oldctxt->myDoc == NULL) {
13270	newDoc = xmlNewDoc(BAD_CAST "1.0");
13271	if (newDoc == NULL) {
13272	    ctxt->sax = oldsax;
13273	    ctxt->dict = NULL;
13274	    xmlFreeParserCtxt(ctxt);
13275	    return(XML_ERR_INTERNAL_ERROR);
13276	}
13277	newDoc->properties = XML_DOC_INTERNAL;
13278	newDoc->dict = ctxt->dict;
13279	xmlDictReference(newDoc->dict);
13280	ctxt->myDoc = newDoc;
13281    } else {
13282	ctxt->myDoc = oldctxt->myDoc;
13283        content = ctxt->myDoc->children;
13284	last = ctxt->myDoc->last;
13285    }
13286    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13287    if (newRoot == NULL) {
13288	ctxt->sax = oldsax;
13289	ctxt->dict = NULL;
13290	xmlFreeParserCtxt(ctxt);
13291	if (newDoc != NULL) {
13292	    xmlFreeDoc(newDoc);
13293	}
13294	return(XML_ERR_INTERNAL_ERROR);
13295    }
13296    ctxt->myDoc->children = NULL;
13297    ctxt->myDoc->last = NULL;
13298    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13299    nodePush(ctxt, ctxt->myDoc->children);
13300    ctxt->instate = XML_PARSER_CONTENT;
13301    ctxt->depth = oldctxt->depth + 1;
13302
13303    ctxt->validate = 0;
13304    ctxt->loadsubset = oldctxt->loadsubset;
13305    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13306	/*
13307	 * ID/IDREF registration will be done in xmlValidateElement below
13308	 */
13309	ctxt->loadsubset |= XML_SKIP_IDS;
13310    }
13311    ctxt->dictNames = oldctxt->dictNames;
13312    ctxt->attsDefault = oldctxt->attsDefault;
13313    ctxt->attsSpecial = oldctxt->attsSpecial;
13314
13315    xmlParseContent(ctxt);
13316    if ((RAW == '<') && (NXT(1) == '/')) {
13317	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13318    } else if (RAW != 0) {
13319	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13320    }
13321    if (ctxt->node != ctxt->myDoc->children) {
13322	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13323    }
13324
13325    if (!ctxt->wellFormed) {
13326        if (ctxt->errNo == 0)
13327	    ret = XML_ERR_INTERNAL_ERROR;
13328	else
13329	    ret = (xmlParserErrors)ctxt->errNo;
13330    } else {
13331      ret = XML_ERR_OK;
13332    }
13333
13334    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13335	xmlNodePtr cur;
13336
13337	/*
13338	 * Return the newly created nodeset after unlinking it from
13339	 * they pseudo parent.
13340	 */
13341	cur = ctxt->myDoc->children->children;
13342	*lst = cur;
13343	while (cur != NULL) {
13344#ifdef LIBXML_VALID_ENABLED
13345	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13346		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13347		(cur->type == XML_ELEMENT_NODE)) {
13348		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13349			oldctxt->myDoc, cur);
13350	    }
13351#endif /* LIBXML_VALID_ENABLED */
13352	    cur->parent = NULL;
13353	    cur = cur->next;
13354	}
13355	ctxt->myDoc->children->children = NULL;
13356    }
13357    if (ctxt->myDoc != NULL) {
13358	xmlFreeNode(ctxt->myDoc->children);
13359        ctxt->myDoc->children = content;
13360        ctxt->myDoc->last = last;
13361    }
13362
13363    /*
13364     * Record in the parent context the number of entities replacement
13365     * done when parsing that reference.
13366     */
13367    if (oldctxt != NULL)
13368        oldctxt->nbentities += ctxt->nbentities;
13369
13370    /*
13371     * Also record the last error if any
13372     */
13373    if (ctxt->lastError.code != XML_ERR_OK)
13374        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13375
13376    ctxt->sax = oldsax;
13377    ctxt->dict = NULL;
13378    ctxt->attsDefault = NULL;
13379    ctxt->attsSpecial = NULL;
13380    xmlFreeParserCtxt(ctxt);
13381    if (newDoc != NULL) {
13382	xmlFreeDoc(newDoc);
13383    }
13384
13385    return(ret);
13386}
13387
13388/**
13389 * xmlParseInNodeContext:
13390 * @node:  the context node
13391 * @data:  the input string
13392 * @datalen:  the input string length in bytes
13393 * @options:  a combination of xmlParserOption
13394 * @lst:  the return value for the set of parsed nodes
13395 *
13396 * Parse a well-balanced chunk of an XML document
13397 * within the context (DTD, namespaces, etc ...) of the given node.
13398 *
13399 * The allowed sequence for the data is a Well Balanced Chunk defined by
13400 * the content production in the XML grammar:
13401 *
13402 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13403 *
13404 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13405 * error code otherwise
13406 */
13407xmlParserErrors
13408xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13409                      int options, xmlNodePtr *lst) {
13410#ifdef SAX2
13411    xmlParserCtxtPtr ctxt;
13412    xmlDocPtr doc = NULL;
13413    xmlNodePtr fake, cur;
13414    int nsnr = 0;
13415
13416    xmlParserErrors ret = XML_ERR_OK;
13417
13418    /*
13419     * check all input parameters, grab the document
13420     */
13421    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13422        return(XML_ERR_INTERNAL_ERROR);
13423    switch (node->type) {
13424        case XML_ELEMENT_NODE:
13425        case XML_ATTRIBUTE_NODE:
13426        case XML_TEXT_NODE:
13427        case XML_CDATA_SECTION_NODE:
13428        case XML_ENTITY_REF_NODE:
13429        case XML_PI_NODE:
13430        case XML_COMMENT_NODE:
13431        case XML_DOCUMENT_NODE:
13432        case XML_HTML_DOCUMENT_NODE:
13433	    break;
13434	default:
13435	    return(XML_ERR_INTERNAL_ERROR);
13436
13437    }
13438    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13439           (node->type != XML_DOCUMENT_NODE) &&
13440	   (node->type != XML_HTML_DOCUMENT_NODE))
13441	node = node->parent;
13442    if (node == NULL)
13443	return(XML_ERR_INTERNAL_ERROR);
13444    if (node->type == XML_ELEMENT_NODE)
13445	doc = node->doc;
13446    else
13447        doc = (xmlDocPtr) node;
13448    if (doc == NULL)
13449	return(XML_ERR_INTERNAL_ERROR);
13450
13451    /*
13452     * allocate a context and set-up everything not related to the
13453     * node position in the tree
13454     */
13455    if (doc->type == XML_DOCUMENT_NODE)
13456	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13457#ifdef LIBXML_HTML_ENABLED
13458    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13459	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13460        /*
13461         * When parsing in context, it makes no sense to add implied
13462         * elements like html/body/etc...
13463         */
13464        options |= HTML_PARSE_NOIMPLIED;
13465    }
13466#endif
13467    else
13468        return(XML_ERR_INTERNAL_ERROR);
13469
13470    if (ctxt == NULL)
13471        return(XML_ERR_NO_MEMORY);
13472
13473    /*
13474     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13475     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13476     * we must wait until the last moment to free the original one.
13477     */
13478    if (doc->dict != NULL) {
13479        if (ctxt->dict != NULL)
13480	    xmlDictFree(ctxt->dict);
13481	ctxt->dict = doc->dict;
13482    } else
13483        options |= XML_PARSE_NODICT;
13484
13485    if (doc->encoding != NULL) {
13486        xmlCharEncodingHandlerPtr hdlr;
13487
13488        if (ctxt->encoding != NULL)
13489	    xmlFree((xmlChar *) ctxt->encoding);
13490        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13491
13492        hdlr = xmlFindCharEncodingHandler(doc->encoding);
13493        if (hdlr != NULL) {
13494            xmlSwitchToEncoding(ctxt, hdlr);
13495	} else {
13496            return(XML_ERR_UNSUPPORTED_ENCODING);
13497        }
13498    }
13499
13500    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13501    xmlDetectSAX2(ctxt);
13502    ctxt->myDoc = doc;
13503
13504    fake = xmlNewComment(NULL);
13505    if (fake == NULL) {
13506        xmlFreeParserCtxt(ctxt);
13507	return(XML_ERR_NO_MEMORY);
13508    }
13509    xmlAddChild(node, fake);
13510
13511    if (node->type == XML_ELEMENT_NODE) {
13512	nodePush(ctxt, node);
13513	/*
13514	 * initialize the SAX2 namespaces stack
13515	 */
13516	cur = node;
13517	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13518	    xmlNsPtr ns = cur->nsDef;
13519	    const xmlChar *iprefix, *ihref;
13520
13521	    while (ns != NULL) {
13522		if (ctxt->dict) {
13523		    iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13524		    ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13525		} else {
13526		    iprefix = ns->prefix;
13527		    ihref = ns->href;
13528		}
13529
13530	        if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13531		    nsPush(ctxt, iprefix, ihref);
13532		    nsnr++;
13533		}
13534		ns = ns->next;
13535	    }
13536	    cur = cur->parent;
13537	}
13538	ctxt->instate = XML_PARSER_CONTENT;
13539    }
13540
13541    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13542	/*
13543	 * ID/IDREF registration will be done in xmlValidateElement below
13544	 */
13545	ctxt->loadsubset |= XML_SKIP_IDS;
13546    }
13547
13548#ifdef LIBXML_HTML_ENABLED
13549    if (doc->type == XML_HTML_DOCUMENT_NODE)
13550        __htmlParseContent(ctxt);
13551    else
13552#endif
13553	xmlParseContent(ctxt);
13554
13555    nsPop(ctxt, nsnr);
13556    if ((RAW == '<') && (NXT(1) == '/')) {
13557	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13558    } else if (RAW != 0) {
13559	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13560    }
13561    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13562	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13563	ctxt->wellFormed = 0;
13564    }
13565
13566    if (!ctxt->wellFormed) {
13567        if (ctxt->errNo == 0)
13568	    ret = XML_ERR_INTERNAL_ERROR;
13569	else
13570	    ret = (xmlParserErrors)ctxt->errNo;
13571    } else {
13572        ret = XML_ERR_OK;
13573    }
13574
13575    /*
13576     * Return the newly created nodeset after unlinking it from
13577     * the pseudo sibling.
13578     */
13579
13580    cur = fake->next;
13581    fake->next = NULL;
13582    node->last = fake;
13583
13584    if (cur != NULL) {
13585	cur->prev = NULL;
13586    }
13587
13588    *lst = cur;
13589
13590    while (cur != NULL) {
13591	cur->parent = NULL;
13592	cur = cur->next;
13593    }
13594
13595    xmlUnlinkNode(fake);
13596    xmlFreeNode(fake);
13597
13598
13599    if (ret != XML_ERR_OK) {
13600        xmlFreeNodeList(*lst);
13601	*lst = NULL;
13602    }
13603
13604    if (doc->dict != NULL)
13605        ctxt->dict = NULL;
13606    xmlFreeParserCtxt(ctxt);
13607
13608    return(ret);
13609#else /* !SAX2 */
13610    return(XML_ERR_INTERNAL_ERROR);
13611#endif
13612}
13613
13614#ifdef LIBXML_SAX1_ENABLED
13615/**
13616 * xmlParseBalancedChunkMemoryRecover:
13617 * @doc:  the document the chunk pertains to
13618 * @sax:  the SAX handler bloc (possibly NULL)
13619 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13620 * @depth:  Used for loop detection, use 0
13621 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13622 * @lst:  the return value for the set of parsed nodes
13623 * @recover: return nodes even if the data is broken (use 0)
13624 *
13625 *
13626 * Parse a well-balanced chunk of an XML document
13627 * called by the parser
13628 * The allowed sequence for the Well Balanced Chunk is the one defined by
13629 * the content production in the XML grammar:
13630 *
13631 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13632 *
13633 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13634 *    the parser error code otherwise
13635 *
13636 * In case recover is set to 1, the nodelist will not be empty even if
13637 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13638 * some extent.
13639 */
13640int
13641xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13642     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13643     int recover) {
13644    xmlParserCtxtPtr ctxt;
13645    xmlDocPtr newDoc;
13646    xmlSAXHandlerPtr oldsax = NULL;
13647    xmlNodePtr content, newRoot;
13648    int size;
13649    int ret = 0;
13650
13651    if (depth > 40) {
13652	return(XML_ERR_ENTITY_LOOP);
13653    }
13654
13655
13656    if (lst != NULL)
13657        *lst = NULL;
13658    if (string == NULL)
13659        return(-1);
13660
13661    size = xmlStrlen(string);
13662
13663    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13664    if (ctxt == NULL) return(-1);
13665    ctxt->userData = ctxt;
13666    if (sax != NULL) {
13667	oldsax = ctxt->sax;
13668        ctxt->sax = sax;
13669	if (user_data != NULL)
13670	    ctxt->userData = user_data;
13671    }
13672    newDoc = xmlNewDoc(BAD_CAST "1.0");
13673    if (newDoc == NULL) {
13674	xmlFreeParserCtxt(ctxt);
13675	return(-1);
13676    }
13677    newDoc->properties = XML_DOC_INTERNAL;
13678    if ((doc != NULL) && (doc->dict != NULL)) {
13679        xmlDictFree(ctxt->dict);
13680	ctxt->dict = doc->dict;
13681	xmlDictReference(ctxt->dict);
13682	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13683	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13684	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13685	ctxt->dictNames = 1;
13686    } else {
13687	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13688    }
13689    if (doc != NULL) {
13690	newDoc->intSubset = doc->intSubset;
13691	newDoc->extSubset = doc->extSubset;
13692    }
13693    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13694    if (newRoot == NULL) {
13695	if (sax != NULL)
13696	    ctxt->sax = oldsax;
13697	xmlFreeParserCtxt(ctxt);
13698	newDoc->intSubset = NULL;
13699	newDoc->extSubset = NULL;
13700        xmlFreeDoc(newDoc);
13701	return(-1);
13702    }
13703    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13704    nodePush(ctxt, newRoot);
13705    if (doc == NULL) {
13706	ctxt->myDoc = newDoc;
13707    } else {
13708	ctxt->myDoc = newDoc;
13709	newDoc->children->doc = doc;
13710	/* Ensure that doc has XML spec namespace */
13711	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13712	newDoc->oldNs = doc->oldNs;
13713    }
13714    ctxt->instate = XML_PARSER_CONTENT;
13715    ctxt->depth = depth;
13716
13717    /*
13718     * Doing validity checking on chunk doesn't make sense
13719     */
13720    ctxt->validate = 0;
13721    ctxt->loadsubset = 0;
13722    xmlDetectSAX2(ctxt);
13723
13724    if ( doc != NULL ){
13725        content = doc->children;
13726        doc->children = NULL;
13727        xmlParseContent(ctxt);
13728        doc->children = content;
13729    }
13730    else {
13731        xmlParseContent(ctxt);
13732    }
13733    if ((RAW == '<') && (NXT(1) == '/')) {
13734	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13735    } else if (RAW != 0) {
13736	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13737    }
13738    if (ctxt->node != newDoc->children) {
13739	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13740    }
13741
13742    if (!ctxt->wellFormed) {
13743        if (ctxt->errNo == 0)
13744	    ret = 1;
13745	else
13746	    ret = ctxt->errNo;
13747    } else {
13748      ret = 0;
13749    }
13750
13751    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13752	xmlNodePtr cur;
13753
13754	/*
13755	 * Return the newly created nodeset after unlinking it from
13756	 * they pseudo parent.
13757	 */
13758	cur = newDoc->children->children;
13759	*lst = cur;
13760	while (cur != NULL) {
13761	    xmlSetTreeDoc(cur, doc);
13762	    cur->parent = NULL;
13763	    cur = cur->next;
13764	}
13765	newDoc->children->children = NULL;
13766    }
13767
13768    if (sax != NULL)
13769	ctxt->sax = oldsax;
13770    xmlFreeParserCtxt(ctxt);
13771    newDoc->intSubset = NULL;
13772    newDoc->extSubset = NULL;
13773    newDoc->oldNs = NULL;
13774    xmlFreeDoc(newDoc);
13775
13776    return(ret);
13777}
13778
13779/**
13780 * xmlSAXParseEntity:
13781 * @sax:  the SAX handler block
13782 * @filename:  the filename
13783 *
13784 * parse an XML external entity out of context and build a tree.
13785 * It use the given SAX function block to handle the parsing callback.
13786 * If sax is NULL, fallback to the default DOM tree building routines.
13787 *
13788 * [78] extParsedEnt ::= TextDecl? content
13789 *
13790 * This correspond to a "Well Balanced" chunk
13791 *
13792 * Returns the resulting document tree
13793 */
13794
13795xmlDocPtr
13796xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13797    xmlDocPtr ret;
13798    xmlParserCtxtPtr ctxt;
13799
13800    ctxt = xmlCreateFileParserCtxt(filename);
13801    if (ctxt == NULL) {
13802	return(NULL);
13803    }
13804    if (sax != NULL) {
13805	if (ctxt->sax != NULL)
13806	    xmlFree(ctxt->sax);
13807        ctxt->sax = sax;
13808        ctxt->userData = NULL;
13809    }
13810
13811    xmlParseExtParsedEnt(ctxt);
13812
13813    if (ctxt->wellFormed)
13814	ret = ctxt->myDoc;
13815    else {
13816        ret = NULL;
13817        xmlFreeDoc(ctxt->myDoc);
13818        ctxt->myDoc = NULL;
13819    }
13820    if (sax != NULL)
13821        ctxt->sax = NULL;
13822    xmlFreeParserCtxt(ctxt);
13823
13824    return(ret);
13825}
13826
13827/**
13828 * xmlParseEntity:
13829 * @filename:  the filename
13830 *
13831 * parse an XML external entity out of context and build a tree.
13832 *
13833 * [78] extParsedEnt ::= TextDecl? content
13834 *
13835 * This correspond to a "Well Balanced" chunk
13836 *
13837 * Returns the resulting document tree
13838 */
13839
13840xmlDocPtr
13841xmlParseEntity(const char *filename) {
13842    return(xmlSAXParseEntity(NULL, filename));
13843}
13844#endif /* LIBXML_SAX1_ENABLED */
13845
13846/**
13847 * xmlCreateEntityParserCtxtInternal:
13848 * @URL:  the entity URL
13849 * @ID:  the entity PUBLIC ID
13850 * @base:  a possible base for the target URI
13851 * @pctx:  parser context used to set options on new context
13852 *
13853 * Create a parser context for an external entity
13854 * Automatic support for ZLIB/Compress compressed document is provided
13855 * by default if found at compile-time.
13856 *
13857 * Returns the new parser context or NULL
13858 */
13859static xmlParserCtxtPtr
13860xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13861	                  const xmlChar *base, xmlParserCtxtPtr pctx) {
13862    xmlParserCtxtPtr ctxt;
13863    xmlParserInputPtr inputStream;
13864    char *directory = NULL;
13865    xmlChar *uri;
13866
13867    ctxt = xmlNewParserCtxt();
13868    if (ctxt == NULL) {
13869	return(NULL);
13870    }
13871
13872    if (pctx != NULL) {
13873        ctxt->options = pctx->options;
13874        ctxt->_private = pctx->_private;
13875    }
13876
13877    uri = xmlBuildURI(URL, base);
13878
13879    if (uri == NULL) {
13880	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13881	if (inputStream == NULL) {
13882	    xmlFreeParserCtxt(ctxt);
13883	    return(NULL);
13884	}
13885
13886	inputPush(ctxt, inputStream);
13887
13888	if ((ctxt->directory == NULL) && (directory == NULL))
13889	    directory = xmlParserGetDirectory((char *)URL);
13890	if ((ctxt->directory == NULL) && (directory != NULL))
13891	    ctxt->directory = directory;
13892    } else {
13893	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13894	if (inputStream == NULL) {
13895	    xmlFree(uri);
13896	    xmlFreeParserCtxt(ctxt);
13897	    return(NULL);
13898	}
13899
13900	inputPush(ctxt, inputStream);
13901
13902	if ((ctxt->directory == NULL) && (directory == NULL))
13903	    directory = xmlParserGetDirectory((char *)uri);
13904	if ((ctxt->directory == NULL) && (directory != NULL))
13905	    ctxt->directory = directory;
13906	xmlFree(uri);
13907    }
13908    return(ctxt);
13909}
13910
13911/**
13912 * xmlCreateEntityParserCtxt:
13913 * @URL:  the entity URL
13914 * @ID:  the entity PUBLIC ID
13915 * @base:  a possible base for the target URI
13916 *
13917 * Create a parser context for an external entity
13918 * Automatic support for ZLIB/Compress compressed document is provided
13919 * by default if found at compile-time.
13920 *
13921 * Returns the new parser context or NULL
13922 */
13923xmlParserCtxtPtr
13924xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13925	                  const xmlChar *base) {
13926    return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13927
13928}
13929
13930/************************************************************************
13931 *									*
13932 *		Front ends when parsing from a file			*
13933 *									*
13934 ************************************************************************/
13935
13936/**
13937 * xmlCreateURLParserCtxt:
13938 * @filename:  the filename or URL
13939 * @options:  a combination of xmlParserOption
13940 *
13941 * Create a parser context for a file or URL content.
13942 * Automatic support for ZLIB/Compress compressed document is provided
13943 * by default if found at compile-time and for file accesses
13944 *
13945 * Returns the new parser context or NULL
13946 */
13947xmlParserCtxtPtr
13948xmlCreateURLParserCtxt(const char *filename, int options)
13949{
13950    xmlParserCtxtPtr ctxt;
13951    xmlParserInputPtr inputStream;
13952    char *directory = NULL;
13953
13954    ctxt = xmlNewParserCtxt();
13955    if (ctxt == NULL) {
13956	xmlErrMemory(NULL, "cannot allocate parser context");
13957	return(NULL);
13958    }
13959
13960    if (options)
13961	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13962    ctxt->linenumbers = 1;
13963
13964    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13965    if (inputStream == NULL) {
13966	xmlFreeParserCtxt(ctxt);
13967	return(NULL);
13968    }
13969
13970    inputPush(ctxt, inputStream);
13971    if ((ctxt->directory == NULL) && (directory == NULL))
13972        directory = xmlParserGetDirectory(filename);
13973    if ((ctxt->directory == NULL) && (directory != NULL))
13974        ctxt->directory = directory;
13975
13976    return(ctxt);
13977}
13978
13979/**
13980 * xmlCreateFileParserCtxt:
13981 * @filename:  the filename
13982 *
13983 * Create a parser context for a file content.
13984 * Automatic support for ZLIB/Compress compressed document is provided
13985 * by default if found at compile-time.
13986 *
13987 * Returns the new parser context or NULL
13988 */
13989xmlParserCtxtPtr
13990xmlCreateFileParserCtxt(const char *filename)
13991{
13992    return(xmlCreateURLParserCtxt(filename, 0));
13993}
13994
13995#ifdef LIBXML_SAX1_ENABLED
13996/**
13997 * xmlSAXParseFileWithData:
13998 * @sax:  the SAX handler block
13999 * @filename:  the filename
14000 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14001 *             documents
14002 * @data:  the userdata
14003 *
14004 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14005 * compressed document is provided by default if found at compile-time.
14006 * It use the given SAX function block to handle the parsing callback.
14007 * If sax is NULL, fallback to the default DOM tree building routines.
14008 *
14009 * User data (void *) is stored within the parser context in the
14010 * context's _private member, so it is available nearly everywhere in libxml
14011 *
14012 * Returns the resulting document tree
14013 */
14014
14015xmlDocPtr
14016xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14017                        int recovery, void *data) {
14018    xmlDocPtr ret;
14019    xmlParserCtxtPtr ctxt;
14020
14021    xmlInitParser();
14022
14023    ctxt = xmlCreateFileParserCtxt(filename);
14024    if (ctxt == NULL) {
14025	return(NULL);
14026    }
14027    if (sax != NULL) {
14028	if (ctxt->sax != NULL)
14029	    xmlFree(ctxt->sax);
14030        ctxt->sax = sax;
14031    }
14032    xmlDetectSAX2(ctxt);
14033    if (data!=NULL) {
14034	ctxt->_private = data;
14035    }
14036
14037    if (ctxt->directory == NULL)
14038        ctxt->directory = xmlParserGetDirectory(filename);
14039
14040    ctxt->recovery = recovery;
14041
14042    xmlParseDocument(ctxt);
14043
14044    if ((ctxt->wellFormed) || recovery) {
14045        ret = ctxt->myDoc;
14046	if (ret != NULL) {
14047	    if (ctxt->input->buf->compressed > 0)
14048		ret->compression = 9;
14049	    else
14050		ret->compression = ctxt->input->buf->compressed;
14051	}
14052    }
14053    else {
14054       ret = NULL;
14055       xmlFreeDoc(ctxt->myDoc);
14056       ctxt->myDoc = NULL;
14057    }
14058    if (sax != NULL)
14059        ctxt->sax = NULL;
14060    xmlFreeParserCtxt(ctxt);
14061
14062    return(ret);
14063}
14064
14065/**
14066 * xmlSAXParseFile:
14067 * @sax:  the SAX handler block
14068 * @filename:  the filename
14069 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14070 *             documents
14071 *
14072 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14073 * compressed document is provided by default if found at compile-time.
14074 * It use the given SAX function block to handle the parsing callback.
14075 * If sax is NULL, fallback to the default DOM tree building routines.
14076 *
14077 * Returns the resulting document tree
14078 */
14079
14080xmlDocPtr
14081xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14082                          int recovery) {
14083    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14084}
14085
14086/**
14087 * xmlRecoverDoc:
14088 * @cur:  a pointer to an array of xmlChar
14089 *
14090 * parse an XML in-memory document and build a tree.
14091 * In the case the document is not Well Formed, a attempt to build a
14092 * tree is tried anyway
14093 *
14094 * Returns the resulting document tree or NULL in case of failure
14095 */
14096
14097xmlDocPtr
14098xmlRecoverDoc(const xmlChar *cur) {
14099    return(xmlSAXParseDoc(NULL, cur, 1));
14100}
14101
14102/**
14103 * xmlParseFile:
14104 * @filename:  the filename
14105 *
14106 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14107 * compressed document is provided by default if found at compile-time.
14108 *
14109 * Returns the resulting document tree if the file was wellformed,
14110 * NULL otherwise.
14111 */
14112
14113xmlDocPtr
14114xmlParseFile(const char *filename) {
14115    return(xmlSAXParseFile(NULL, filename, 0));
14116}
14117
14118/**
14119 * xmlRecoverFile:
14120 * @filename:  the filename
14121 *
14122 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14123 * compressed document is provided by default if found at compile-time.
14124 * In the case the document is not Well Formed, it attempts to build
14125 * a tree anyway
14126 *
14127 * Returns the resulting document tree or NULL in case of failure
14128 */
14129
14130xmlDocPtr
14131xmlRecoverFile(const char *filename) {
14132    return(xmlSAXParseFile(NULL, filename, 1));
14133}
14134
14135
14136/**
14137 * xmlSetupParserForBuffer:
14138 * @ctxt:  an XML parser context
14139 * @buffer:  a xmlChar * buffer
14140 * @filename:  a file name
14141 *
14142 * Setup the parser context to parse a new buffer; Clears any prior
14143 * contents from the parser context. The buffer parameter must not be
14144 * NULL, but the filename parameter can be
14145 */
14146void
14147xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14148                             const char* filename)
14149{
14150    xmlParserInputPtr input;
14151
14152    if ((ctxt == NULL) || (buffer == NULL))
14153        return;
14154
14155    input = xmlNewInputStream(ctxt);
14156    if (input == NULL) {
14157        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14158        xmlClearParserCtxt(ctxt);
14159        return;
14160    }
14161
14162    xmlClearParserCtxt(ctxt);
14163    if (filename != NULL)
14164        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14165    input->base = buffer;
14166    input->cur = buffer;
14167    input->end = &buffer[xmlStrlen(buffer)];
14168    inputPush(ctxt, input);
14169}
14170
14171/**
14172 * xmlSAXUserParseFile:
14173 * @sax:  a SAX handler
14174 * @user_data:  The user data returned on SAX callbacks
14175 * @filename:  a file name
14176 *
14177 * parse an XML file and call the given SAX handler routines.
14178 * Automatic support for ZLIB/Compress compressed document is provided
14179 *
14180 * Returns 0 in case of success or a error number otherwise
14181 */
14182int
14183xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14184                    const char *filename) {
14185    int ret = 0;
14186    xmlParserCtxtPtr ctxt;
14187
14188    ctxt = xmlCreateFileParserCtxt(filename);
14189    if (ctxt == NULL) return -1;
14190    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14191	xmlFree(ctxt->sax);
14192    ctxt->sax = sax;
14193    xmlDetectSAX2(ctxt);
14194
14195    if (user_data != NULL)
14196	ctxt->userData = user_data;
14197
14198    xmlParseDocument(ctxt);
14199
14200    if (ctxt->wellFormed)
14201	ret = 0;
14202    else {
14203        if (ctxt->errNo != 0)
14204	    ret = ctxt->errNo;
14205	else
14206	    ret = -1;
14207    }
14208    if (sax != NULL)
14209	ctxt->sax = NULL;
14210    if (ctxt->myDoc != NULL) {
14211        xmlFreeDoc(ctxt->myDoc);
14212	ctxt->myDoc = NULL;
14213    }
14214    xmlFreeParserCtxt(ctxt);
14215
14216    return ret;
14217}
14218#endif /* LIBXML_SAX1_ENABLED */
14219
14220/************************************************************************
14221 *									*
14222 *		Front ends when parsing from memory			*
14223 *									*
14224 ************************************************************************/
14225
14226/**
14227 * xmlCreateMemoryParserCtxt:
14228 * @buffer:  a pointer to a char array
14229 * @size:  the size of the array
14230 *
14231 * Create a parser context for an XML in-memory document.
14232 *
14233 * Returns the new parser context or NULL
14234 */
14235xmlParserCtxtPtr
14236xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14237    xmlParserCtxtPtr ctxt;
14238    xmlParserInputPtr input;
14239    xmlParserInputBufferPtr buf;
14240
14241    if (buffer == NULL)
14242	return(NULL);
14243    if (size <= 0)
14244	return(NULL);
14245
14246    ctxt = xmlNewParserCtxt();
14247    if (ctxt == NULL)
14248	return(NULL);
14249
14250    /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14251    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14252    if (buf == NULL) {
14253	xmlFreeParserCtxt(ctxt);
14254	return(NULL);
14255    }
14256
14257    input = xmlNewInputStream(ctxt);
14258    if (input == NULL) {
14259	xmlFreeParserInputBuffer(buf);
14260	xmlFreeParserCtxt(ctxt);
14261	return(NULL);
14262    }
14263
14264    input->filename = NULL;
14265    input->buf = buf;
14266    xmlBufResetInput(input->buf->buffer, input);
14267
14268    inputPush(ctxt, input);
14269    return(ctxt);
14270}
14271
14272#ifdef LIBXML_SAX1_ENABLED
14273/**
14274 * xmlSAXParseMemoryWithData:
14275 * @sax:  the SAX handler block
14276 * @buffer:  an pointer to a char array
14277 * @size:  the size of the array
14278 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14279 *             documents
14280 * @data:  the userdata
14281 *
14282 * parse an XML in-memory block and use the given SAX function block
14283 * to handle the parsing callback. If sax is NULL, fallback to the default
14284 * DOM tree building routines.
14285 *
14286 * User data (void *) is stored within the parser context in the
14287 * context's _private member, so it is available nearly everywhere in libxml
14288 *
14289 * Returns the resulting document tree
14290 */
14291
14292xmlDocPtr
14293xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14294	          int size, int recovery, void *data) {
14295    xmlDocPtr ret;
14296    xmlParserCtxtPtr ctxt;
14297
14298    xmlInitParser();
14299
14300    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14301    if (ctxt == NULL) return(NULL);
14302    if (sax != NULL) {
14303	if (ctxt->sax != NULL)
14304	    xmlFree(ctxt->sax);
14305        ctxt->sax = sax;
14306    }
14307    xmlDetectSAX2(ctxt);
14308    if (data!=NULL) {
14309	ctxt->_private=data;
14310    }
14311
14312    ctxt->recovery = recovery;
14313
14314    xmlParseDocument(ctxt);
14315
14316    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14317    else {
14318       ret = NULL;
14319       xmlFreeDoc(ctxt->myDoc);
14320       ctxt->myDoc = NULL;
14321    }
14322    if (sax != NULL)
14323	ctxt->sax = NULL;
14324    xmlFreeParserCtxt(ctxt);
14325
14326    return(ret);
14327}
14328
14329/**
14330 * xmlSAXParseMemory:
14331 * @sax:  the SAX handler block
14332 * @buffer:  an pointer to a char array
14333 * @size:  the size of the array
14334 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14335 *             documents
14336 *
14337 * parse an XML in-memory block and use the given SAX function block
14338 * to handle the parsing callback. If sax is NULL, fallback to the default
14339 * DOM tree building routines.
14340 *
14341 * Returns the resulting document tree
14342 */
14343xmlDocPtr
14344xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14345	          int size, int recovery) {
14346    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14347}
14348
14349/**
14350 * xmlParseMemory:
14351 * @buffer:  an pointer to a char array
14352 * @size:  the size of the array
14353 *
14354 * parse an XML in-memory block and build a tree.
14355 *
14356 * Returns the resulting document tree
14357 */
14358
14359xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14360   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14361}
14362
14363/**
14364 * xmlRecoverMemory:
14365 * @buffer:  an pointer to a char array
14366 * @size:  the size of the array
14367 *
14368 * parse an XML in-memory block and build a tree.
14369 * In the case the document is not Well Formed, an attempt to
14370 * build a tree is tried anyway
14371 *
14372 * Returns the resulting document tree or NULL in case of error
14373 */
14374
14375xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14376   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14377}
14378
14379/**
14380 * xmlSAXUserParseMemory:
14381 * @sax:  a SAX handler
14382 * @user_data:  The user data returned on SAX callbacks
14383 * @buffer:  an in-memory XML document input
14384 * @size:  the length of the XML document in bytes
14385 *
14386 * A better SAX parsing routine.
14387 * parse an XML in-memory buffer and call the given SAX handler routines.
14388 *
14389 * Returns 0 in case of success or a error number otherwise
14390 */
14391int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14392			  const char *buffer, int size) {
14393    int ret = 0;
14394    xmlParserCtxtPtr ctxt;
14395
14396    xmlInitParser();
14397
14398    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14399    if (ctxt == NULL) return -1;
14400    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14401        xmlFree(ctxt->sax);
14402    ctxt->sax = sax;
14403    xmlDetectSAX2(ctxt);
14404
14405    if (user_data != NULL)
14406	ctxt->userData = user_data;
14407
14408    xmlParseDocument(ctxt);
14409
14410    if (ctxt->wellFormed)
14411	ret = 0;
14412    else {
14413        if (ctxt->errNo != 0)
14414	    ret = ctxt->errNo;
14415	else
14416	    ret = -1;
14417    }
14418    if (sax != NULL)
14419        ctxt->sax = NULL;
14420    if (ctxt->myDoc != NULL) {
14421        xmlFreeDoc(ctxt->myDoc);
14422	ctxt->myDoc = NULL;
14423    }
14424    xmlFreeParserCtxt(ctxt);
14425
14426    return ret;
14427}
14428#endif /* LIBXML_SAX1_ENABLED */
14429
14430/**
14431 * xmlCreateDocParserCtxt:
14432 * @cur:  a pointer to an array of xmlChar
14433 *
14434 * Creates a parser context for an XML in-memory document.
14435 *
14436 * Returns the new parser context or NULL
14437 */
14438xmlParserCtxtPtr
14439xmlCreateDocParserCtxt(const xmlChar *cur) {
14440    int len;
14441
14442    if (cur == NULL)
14443	return(NULL);
14444    len = xmlStrlen(cur);
14445    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14446}
14447
14448#ifdef LIBXML_SAX1_ENABLED
14449/**
14450 * xmlSAXParseDoc:
14451 * @sax:  the SAX handler block
14452 * @cur:  a pointer to an array of xmlChar
14453 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14454 *             documents
14455 *
14456 * parse an XML in-memory document and build a tree.
14457 * It use the given SAX function block to handle the parsing callback.
14458 * If sax is NULL, fallback to the default DOM tree building routines.
14459 *
14460 * Returns the resulting document tree
14461 */
14462
14463xmlDocPtr
14464xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14465    xmlDocPtr ret;
14466    xmlParserCtxtPtr ctxt;
14467    xmlSAXHandlerPtr oldsax = NULL;
14468
14469    if (cur == NULL) return(NULL);
14470
14471
14472    ctxt = xmlCreateDocParserCtxt(cur);
14473    if (ctxt == NULL) return(NULL);
14474    if (sax != NULL) {
14475        oldsax = ctxt->sax;
14476        ctxt->sax = sax;
14477        ctxt->userData = NULL;
14478    }
14479    xmlDetectSAX2(ctxt);
14480
14481    xmlParseDocument(ctxt);
14482    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14483    else {
14484       ret = NULL;
14485       xmlFreeDoc(ctxt->myDoc);
14486       ctxt->myDoc = NULL;
14487    }
14488    if (sax != NULL)
14489	ctxt->sax = oldsax;
14490    xmlFreeParserCtxt(ctxt);
14491
14492    return(ret);
14493}
14494
14495/**
14496 * xmlParseDoc:
14497 * @cur:  a pointer to an array of xmlChar
14498 *
14499 * parse an XML in-memory document and build a tree.
14500 *
14501 * Returns the resulting document tree
14502 */
14503
14504xmlDocPtr
14505xmlParseDoc(const xmlChar *cur) {
14506    return(xmlSAXParseDoc(NULL, cur, 0));
14507}
14508#endif /* LIBXML_SAX1_ENABLED */
14509
14510#ifdef LIBXML_LEGACY_ENABLED
14511/************************************************************************
14512 *									*
14513 *	Specific function to keep track of entities references		*
14514 *	and used by the XSLT debugger					*
14515 *									*
14516 ************************************************************************/
14517
14518static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14519
14520/**
14521 * xmlAddEntityReference:
14522 * @ent : A valid entity
14523 * @firstNode : A valid first node for children of entity
14524 * @lastNode : A valid last node of children entity
14525 *
14526 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14527 */
14528static void
14529xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14530                      xmlNodePtr lastNode)
14531{
14532    if (xmlEntityRefFunc != NULL) {
14533        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14534    }
14535}
14536
14537
14538/**
14539 * xmlSetEntityReferenceFunc:
14540 * @func: A valid function
14541 *
14542 * Set the function to call call back when a xml reference has been made
14543 */
14544void
14545xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14546{
14547    xmlEntityRefFunc = func;
14548}
14549#endif /* LIBXML_LEGACY_ENABLED */
14550
14551/************************************************************************
14552 *									*
14553 *				Miscellaneous				*
14554 *									*
14555 ************************************************************************/
14556
14557#ifdef LIBXML_XPATH_ENABLED
14558#include <libxml/xpath.h>
14559#endif
14560
14561extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14562static int xmlParserInitialized = 0;
14563
14564/**
14565 * xmlInitParser:
14566 *
14567 * Initialization function for the XML parser.
14568 * This is not reentrant. Call once before processing in case of
14569 * use in multithreaded programs.
14570 */
14571
14572void
14573xmlInitParser(void) {
14574    if (xmlParserInitialized != 0)
14575	return;
14576
14577#ifdef LIBXML_THREAD_ENABLED
14578    __xmlGlobalInitMutexLock();
14579    if (xmlParserInitialized == 0) {
14580#endif
14581	xmlInitThreads();
14582	xmlInitGlobals();
14583	if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14584	    (xmlGenericError == NULL))
14585	    initGenericErrorDefaultFunc(NULL);
14586	xmlInitMemory();
14587        xmlInitializeDict();
14588	xmlInitCharEncodingHandlers();
14589	xmlDefaultSAXHandlerInit();
14590	xmlRegisterDefaultInputCallbacks();
14591#ifdef LIBXML_OUTPUT_ENABLED
14592	xmlRegisterDefaultOutputCallbacks();
14593#endif /* LIBXML_OUTPUT_ENABLED */
14594#ifdef LIBXML_HTML_ENABLED
14595	htmlInitAutoClose();
14596	htmlDefaultSAXHandlerInit();
14597#endif
14598#ifdef LIBXML_XPATH_ENABLED
14599	xmlXPathInit();
14600#endif
14601	xmlParserInitialized = 1;
14602#ifdef LIBXML_THREAD_ENABLED
14603    }
14604    __xmlGlobalInitMutexUnlock();
14605#endif
14606}
14607
14608/**
14609 * xmlCleanupParser:
14610 *
14611 * This function name is somewhat misleading. It does not clean up
14612 * parser state, it cleans up memory allocated by the library itself.
14613 * It is a cleanup function for the XML library. It tries to reclaim all
14614 * related global memory allocated for the library processing.
14615 * It doesn't deallocate any document related memory. One should
14616 * call xmlCleanupParser() only when the process has finished using
14617 * the library and all XML/HTML documents built with it.
14618 * See also xmlInitParser() which has the opposite function of preparing
14619 * the library for operations.
14620 *
14621 * WARNING: if your application is multithreaded or has plugin support
14622 *          calling this may crash the application if another thread or
14623 *          a plugin is still using libxml2. It's sometimes very hard to
14624 *          guess if libxml2 is in use in the application, some libraries
14625 *          or plugins may use it without notice. In case of doubt abstain
14626 *          from calling this function or do it just before calling exit()
14627 *          to avoid leak reports from valgrind !
14628 */
14629
14630void
14631xmlCleanupParser(void) {
14632    if (!xmlParserInitialized)
14633	return;
14634
14635    xmlCleanupCharEncodingHandlers();
14636#ifdef LIBXML_CATALOG_ENABLED
14637    xmlCatalogCleanup();
14638#endif
14639    xmlDictCleanup();
14640    xmlCleanupInputCallbacks();
14641#ifdef LIBXML_OUTPUT_ENABLED
14642    xmlCleanupOutputCallbacks();
14643#endif
14644#ifdef LIBXML_SCHEMAS_ENABLED
14645    xmlSchemaCleanupTypes();
14646    xmlRelaxNGCleanupTypes();
14647#endif
14648    xmlCleanupGlobals();
14649    xmlResetLastError();
14650    xmlCleanupThreads(); /* must be last if called not from the main thread */
14651    xmlCleanupMemory();
14652    xmlParserInitialized = 0;
14653}
14654
14655/************************************************************************
14656 *									*
14657 *	New set (2.6.0) of simpler and more flexible APIs		*
14658 *									*
14659 ************************************************************************/
14660
14661/**
14662 * DICT_FREE:
14663 * @str:  a string
14664 *
14665 * Free a string if it is not owned by the "dict" dictionnary in the
14666 * current scope
14667 */
14668#define DICT_FREE(str)						\
14669	if ((str) && ((!dict) ||				\
14670	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
14671	    xmlFree((char *)(str));
14672
14673/**
14674 * xmlCtxtReset:
14675 * @ctxt: an XML parser context
14676 *
14677 * Reset a parser context
14678 */
14679void
14680xmlCtxtReset(xmlParserCtxtPtr ctxt)
14681{
14682    xmlParserInputPtr input;
14683    xmlDictPtr dict;
14684
14685    if (ctxt == NULL)
14686        return;
14687
14688    dict = ctxt->dict;
14689
14690    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14691        xmlFreeInputStream(input);
14692    }
14693    ctxt->inputNr = 0;
14694    ctxt->input = NULL;
14695
14696    ctxt->spaceNr = 0;
14697    if (ctxt->spaceTab != NULL) {
14698	ctxt->spaceTab[0] = -1;
14699	ctxt->space = &ctxt->spaceTab[0];
14700    } else {
14701        ctxt->space = NULL;
14702    }
14703
14704
14705    ctxt->nodeNr = 0;
14706    ctxt->node = NULL;
14707
14708    ctxt->nameNr = 0;
14709    ctxt->name = NULL;
14710
14711    DICT_FREE(ctxt->version);
14712    ctxt->version = NULL;
14713    DICT_FREE(ctxt->encoding);
14714    ctxt->encoding = NULL;
14715    DICT_FREE(ctxt->directory);
14716    ctxt->directory = NULL;
14717    DICT_FREE(ctxt->extSubURI);
14718    ctxt->extSubURI = NULL;
14719    DICT_FREE(ctxt->extSubSystem);
14720    ctxt->extSubSystem = NULL;
14721    if (ctxt->myDoc != NULL)
14722        xmlFreeDoc(ctxt->myDoc);
14723    ctxt->myDoc = NULL;
14724
14725    ctxt->standalone = -1;
14726    ctxt->hasExternalSubset = 0;
14727    ctxt->hasPErefs = 0;
14728    ctxt->html = 0;
14729    ctxt->external = 0;
14730    ctxt->instate = XML_PARSER_START;
14731    ctxt->token = 0;
14732
14733    ctxt->wellFormed = 1;
14734    ctxt->nsWellFormed = 1;
14735    ctxt->disableSAX = 0;
14736    ctxt->valid = 1;
14737#if 0
14738    ctxt->vctxt.userData = ctxt;
14739    ctxt->vctxt.error = xmlParserValidityError;
14740    ctxt->vctxt.warning = xmlParserValidityWarning;
14741#endif
14742    ctxt->record_info = 0;
14743    ctxt->nbChars = 0;
14744    ctxt->checkIndex = 0;
14745    ctxt->inSubset = 0;
14746    ctxt->errNo = XML_ERR_OK;
14747    ctxt->depth = 0;
14748    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14749    ctxt->catalogs = NULL;
14750    ctxt->nbentities = 0;
14751    ctxt->sizeentities = 0;
14752    xmlInitNodeInfoSeq(&ctxt->node_seq);
14753
14754    if (ctxt->attsDefault != NULL) {
14755        xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14756        ctxt->attsDefault = NULL;
14757    }
14758    if (ctxt->attsSpecial != NULL) {
14759        xmlHashFree(ctxt->attsSpecial, NULL);
14760        ctxt->attsSpecial = NULL;
14761    }
14762
14763#ifdef LIBXML_CATALOG_ENABLED
14764    if (ctxt->catalogs != NULL)
14765	xmlCatalogFreeLocal(ctxt->catalogs);
14766#endif
14767    if (ctxt->lastError.code != XML_ERR_OK)
14768        xmlResetError(&ctxt->lastError);
14769}
14770
14771/**
14772 * xmlCtxtResetPush:
14773 * @ctxt: an XML parser context
14774 * @chunk:  a pointer to an array of chars
14775 * @size:  number of chars in the array
14776 * @filename:  an optional file name or URI
14777 * @encoding:  the document encoding, or NULL
14778 *
14779 * Reset a push parser context
14780 *
14781 * Returns 0 in case of success and 1 in case of error
14782 */
14783int
14784xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14785                 int size, const char *filename, const char *encoding)
14786{
14787    xmlParserInputPtr inputStream;
14788    xmlParserInputBufferPtr buf;
14789    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14790
14791    if (ctxt == NULL)
14792        return(1);
14793
14794    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14795        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14796
14797    buf = xmlAllocParserInputBuffer(enc);
14798    if (buf == NULL)
14799        return(1);
14800
14801    if (ctxt == NULL) {
14802        xmlFreeParserInputBuffer(buf);
14803        return(1);
14804    }
14805
14806    xmlCtxtReset(ctxt);
14807
14808    if (ctxt->pushTab == NULL) {
14809        ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14810	                                    sizeof(xmlChar *));
14811        if (ctxt->pushTab == NULL) {
14812	    xmlErrMemory(ctxt, NULL);
14813            xmlFreeParserInputBuffer(buf);
14814            return(1);
14815        }
14816    }
14817
14818    if (filename == NULL) {
14819        ctxt->directory = NULL;
14820    } else {
14821        ctxt->directory = xmlParserGetDirectory(filename);
14822    }
14823
14824    inputStream = xmlNewInputStream(ctxt);
14825    if (inputStream == NULL) {
14826        xmlFreeParserInputBuffer(buf);
14827        return(1);
14828    }
14829
14830    if (filename == NULL)
14831        inputStream->filename = NULL;
14832    else
14833        inputStream->filename = (char *)
14834            xmlCanonicPath((const xmlChar *) filename);
14835    inputStream->buf = buf;
14836    xmlBufResetInput(buf->buffer, inputStream);
14837
14838    inputPush(ctxt, inputStream);
14839
14840    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14841        (ctxt->input->buf != NULL)) {
14842	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14843        size_t cur = ctxt->input->cur - ctxt->input->base;
14844
14845        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14846
14847        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14848#ifdef DEBUG_PUSH
14849        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14850#endif
14851    }
14852
14853    if (encoding != NULL) {
14854        xmlCharEncodingHandlerPtr hdlr;
14855
14856        if (ctxt->encoding != NULL)
14857	    xmlFree((xmlChar *) ctxt->encoding);
14858        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14859
14860        hdlr = xmlFindCharEncodingHandler(encoding);
14861        if (hdlr != NULL) {
14862            xmlSwitchToEncoding(ctxt, hdlr);
14863	} else {
14864	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14865			      "Unsupported encoding %s\n", BAD_CAST encoding);
14866        }
14867    } else if (enc != XML_CHAR_ENCODING_NONE) {
14868        xmlSwitchEncoding(ctxt, enc);
14869    }
14870
14871    return(0);
14872}
14873
14874
14875/**
14876 * xmlCtxtUseOptionsInternal:
14877 * @ctxt: an XML parser context
14878 * @options:  a combination of xmlParserOption
14879 * @encoding:  the user provided encoding to use
14880 *
14881 * Applies the options to the parser context
14882 *
14883 * Returns 0 in case of success, the set of unknown or unimplemented options
14884 *         in case of error.
14885 */
14886static int
14887xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14888{
14889    if (ctxt == NULL)
14890        return(-1);
14891    if (encoding != NULL) {
14892        if (ctxt->encoding != NULL)
14893	    xmlFree((xmlChar *) ctxt->encoding);
14894        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14895    }
14896    if (options & XML_PARSE_RECOVER) {
14897        ctxt->recovery = 1;
14898        options -= XML_PARSE_RECOVER;
14899	ctxt->options |= XML_PARSE_RECOVER;
14900    } else
14901        ctxt->recovery = 0;
14902    if (options & XML_PARSE_DTDLOAD) {
14903        ctxt->loadsubset = XML_DETECT_IDS;
14904        options -= XML_PARSE_DTDLOAD;
14905	ctxt->options |= XML_PARSE_DTDLOAD;
14906    } else
14907        ctxt->loadsubset = 0;
14908    if (options & XML_PARSE_DTDATTR) {
14909        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14910        options -= XML_PARSE_DTDATTR;
14911	ctxt->options |= XML_PARSE_DTDATTR;
14912    }
14913    if (options & XML_PARSE_NOENT) {
14914        ctxt->replaceEntities = 1;
14915        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14916        options -= XML_PARSE_NOENT;
14917	ctxt->options |= XML_PARSE_NOENT;
14918    } else
14919        ctxt->replaceEntities = 0;
14920    if (options & XML_PARSE_PEDANTIC) {
14921        ctxt->pedantic = 1;
14922        options -= XML_PARSE_PEDANTIC;
14923	ctxt->options |= XML_PARSE_PEDANTIC;
14924    } else
14925        ctxt->pedantic = 0;
14926    if (options & XML_PARSE_NOBLANKS) {
14927        ctxt->keepBlanks = 0;
14928        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14929        options -= XML_PARSE_NOBLANKS;
14930	ctxt->options |= XML_PARSE_NOBLANKS;
14931    } else
14932        ctxt->keepBlanks = 1;
14933    if (options & XML_PARSE_DTDVALID) {
14934        ctxt->validate = 1;
14935        if (options & XML_PARSE_NOWARNING)
14936            ctxt->vctxt.warning = NULL;
14937        if (options & XML_PARSE_NOERROR)
14938            ctxt->vctxt.error = NULL;
14939        options -= XML_PARSE_DTDVALID;
14940	ctxt->options |= XML_PARSE_DTDVALID;
14941    } else
14942        ctxt->validate = 0;
14943    if (options & XML_PARSE_NOWARNING) {
14944        ctxt->sax->warning = NULL;
14945        options -= XML_PARSE_NOWARNING;
14946    }
14947    if (options & XML_PARSE_NOERROR) {
14948        ctxt->sax->error = NULL;
14949        ctxt->sax->fatalError = NULL;
14950        options -= XML_PARSE_NOERROR;
14951    }
14952#ifdef LIBXML_SAX1_ENABLED
14953    if (options & XML_PARSE_SAX1) {
14954        ctxt->sax->startElement = xmlSAX2StartElement;
14955        ctxt->sax->endElement = xmlSAX2EndElement;
14956        ctxt->sax->startElementNs = NULL;
14957        ctxt->sax->endElementNs = NULL;
14958        ctxt->sax->initialized = 1;
14959        options -= XML_PARSE_SAX1;
14960	ctxt->options |= XML_PARSE_SAX1;
14961    }
14962#endif /* LIBXML_SAX1_ENABLED */
14963    if (options & XML_PARSE_NODICT) {
14964        ctxt->dictNames = 0;
14965        options -= XML_PARSE_NODICT;
14966	ctxt->options |= XML_PARSE_NODICT;
14967    } else {
14968        ctxt->dictNames = 1;
14969    }
14970    if (options & XML_PARSE_NOCDATA) {
14971        ctxt->sax->cdataBlock = NULL;
14972        options -= XML_PARSE_NOCDATA;
14973	ctxt->options |= XML_PARSE_NOCDATA;
14974    }
14975    if (options & XML_PARSE_NSCLEAN) {
14976	ctxt->options |= XML_PARSE_NSCLEAN;
14977        options -= XML_PARSE_NSCLEAN;
14978    }
14979    if (options & XML_PARSE_NONET) {
14980	ctxt->options |= XML_PARSE_NONET;
14981        options -= XML_PARSE_NONET;
14982    }
14983    if (options & XML_PARSE_COMPACT) {
14984	ctxt->options |= XML_PARSE_COMPACT;
14985        options -= XML_PARSE_COMPACT;
14986    }
14987    if (options & XML_PARSE_OLD10) {
14988	ctxt->options |= XML_PARSE_OLD10;
14989        options -= XML_PARSE_OLD10;
14990    }
14991    if (options & XML_PARSE_NOBASEFIX) {
14992	ctxt->options |= XML_PARSE_NOBASEFIX;
14993        options -= XML_PARSE_NOBASEFIX;
14994    }
14995    if (options & XML_PARSE_HUGE) {
14996	ctxt->options |= XML_PARSE_HUGE;
14997        options -= XML_PARSE_HUGE;
14998        if (ctxt->dict != NULL)
14999            xmlDictSetLimit(ctxt->dict, 0);
15000    }
15001    if (options & XML_PARSE_OLDSAX) {
15002	ctxt->options |= XML_PARSE_OLDSAX;
15003        options -= XML_PARSE_OLDSAX;
15004    }
15005    if (options & XML_PARSE_IGNORE_ENC) {
15006	ctxt->options |= XML_PARSE_IGNORE_ENC;
15007        options -= XML_PARSE_IGNORE_ENC;
15008    }
15009    if (options & XML_PARSE_BIG_LINES) {
15010	ctxt->options |= XML_PARSE_BIG_LINES;
15011        options -= XML_PARSE_BIG_LINES;
15012    }
15013    ctxt->linenumbers = 1;
15014    return (options);
15015}
15016
15017/**
15018 * xmlCtxtUseOptions:
15019 * @ctxt: an XML parser context
15020 * @options:  a combination of xmlParserOption
15021 *
15022 * Applies the options to the parser context
15023 *
15024 * Returns 0 in case of success, the set of unknown or unimplemented options
15025 *         in case of error.
15026 */
15027int
15028xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15029{
15030   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15031}
15032
15033/**
15034 * xmlDoRead:
15035 * @ctxt:  an XML parser context
15036 * @URL:  the base URL to use for the document
15037 * @encoding:  the document encoding, or NULL
15038 * @options:  a combination of xmlParserOption
15039 * @reuse:  keep the context for reuse
15040 *
15041 * Common front-end for the xmlRead functions
15042 *
15043 * Returns the resulting document tree or NULL
15044 */
15045static xmlDocPtr
15046xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15047          int options, int reuse)
15048{
15049    xmlDocPtr ret;
15050
15051    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15052    if (encoding != NULL) {
15053        xmlCharEncodingHandlerPtr hdlr;
15054
15055	hdlr = xmlFindCharEncodingHandler(encoding);
15056	if (hdlr != NULL)
15057	    xmlSwitchToEncoding(ctxt, hdlr);
15058    }
15059    if ((URL != NULL) && (ctxt->input != NULL) &&
15060        (ctxt->input->filename == NULL))
15061        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15062    xmlParseDocument(ctxt);
15063    if ((ctxt->wellFormed) || ctxt->recovery)
15064        ret = ctxt->myDoc;
15065    else {
15066        ret = NULL;
15067	if (ctxt->myDoc != NULL) {
15068	    xmlFreeDoc(ctxt->myDoc);
15069	}
15070    }
15071    ctxt->myDoc = NULL;
15072    if (!reuse) {
15073	xmlFreeParserCtxt(ctxt);
15074    }
15075
15076    return (ret);
15077}
15078
15079/**
15080 * xmlReadDoc:
15081 * @cur:  a pointer to a zero terminated string
15082 * @URL:  the base URL to use for the document
15083 * @encoding:  the document encoding, or NULL
15084 * @options:  a combination of xmlParserOption
15085 *
15086 * parse an XML in-memory document and build a tree.
15087 *
15088 * Returns the resulting document tree
15089 */
15090xmlDocPtr
15091xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15092{
15093    xmlParserCtxtPtr ctxt;
15094
15095    if (cur == NULL)
15096        return (NULL);
15097
15098    ctxt = xmlCreateDocParserCtxt(cur);
15099    if (ctxt == NULL)
15100        return (NULL);
15101    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15102}
15103
15104/**
15105 * xmlReadFile:
15106 * @filename:  a file or URL
15107 * @encoding:  the document encoding, or NULL
15108 * @options:  a combination of xmlParserOption
15109 *
15110 * parse an XML file from the filesystem or the network.
15111 *
15112 * Returns the resulting document tree
15113 */
15114xmlDocPtr
15115xmlReadFile(const char *filename, const char *encoding, int options)
15116{
15117    xmlParserCtxtPtr ctxt;
15118
15119    ctxt = xmlCreateURLParserCtxt(filename, options);
15120    if (ctxt == NULL)
15121        return (NULL);
15122    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15123}
15124
15125/**
15126 * xmlReadMemory:
15127 * @buffer:  a pointer to a char array
15128 * @size:  the size of the array
15129 * @URL:  the base URL to use for the document
15130 * @encoding:  the document encoding, or NULL
15131 * @options:  a combination of xmlParserOption
15132 *
15133 * parse an XML in-memory document and build a tree.
15134 *
15135 * Returns the resulting document tree
15136 */
15137xmlDocPtr
15138xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15139{
15140    xmlParserCtxtPtr ctxt;
15141
15142    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15143    if (ctxt == NULL)
15144        return (NULL);
15145    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146}
15147
15148/**
15149 * xmlReadFd:
15150 * @fd:  an open file descriptor
15151 * @URL:  the base URL to use for the document
15152 * @encoding:  the document encoding, or NULL
15153 * @options:  a combination of xmlParserOption
15154 *
15155 * parse an XML from a file descriptor and build a tree.
15156 * NOTE that the file descriptor will not be closed when the
15157 *      reader is closed or reset.
15158 *
15159 * Returns the resulting document tree
15160 */
15161xmlDocPtr
15162xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15163{
15164    xmlParserCtxtPtr ctxt;
15165    xmlParserInputBufferPtr input;
15166    xmlParserInputPtr stream;
15167
15168    if (fd < 0)
15169        return (NULL);
15170
15171    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15172    if (input == NULL)
15173        return (NULL);
15174    input->closecallback = NULL;
15175    ctxt = xmlNewParserCtxt();
15176    if (ctxt == NULL) {
15177        xmlFreeParserInputBuffer(input);
15178        return (NULL);
15179    }
15180    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15181    if (stream == NULL) {
15182        xmlFreeParserInputBuffer(input);
15183	xmlFreeParserCtxt(ctxt);
15184        return (NULL);
15185    }
15186    inputPush(ctxt, stream);
15187    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15188}
15189
15190/**
15191 * xmlReadIO:
15192 * @ioread:  an I/O read function
15193 * @ioclose:  an I/O close function
15194 * @ioctx:  an I/O handler
15195 * @URL:  the base URL to use for the document
15196 * @encoding:  the document encoding, or NULL
15197 * @options:  a combination of xmlParserOption
15198 *
15199 * parse an XML document from I/O functions and source and build a tree.
15200 *
15201 * Returns the resulting document tree
15202 */
15203xmlDocPtr
15204xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15205          void *ioctx, const char *URL, const char *encoding, int options)
15206{
15207    xmlParserCtxtPtr ctxt;
15208    xmlParserInputBufferPtr input;
15209    xmlParserInputPtr stream;
15210
15211    if (ioread == NULL)
15212        return (NULL);
15213
15214    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15215                                         XML_CHAR_ENCODING_NONE);
15216    if (input == NULL) {
15217        if (ioclose != NULL)
15218            ioclose(ioctx);
15219        return (NULL);
15220    }
15221    ctxt = xmlNewParserCtxt();
15222    if (ctxt == NULL) {
15223        xmlFreeParserInputBuffer(input);
15224        return (NULL);
15225    }
15226    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15227    if (stream == NULL) {
15228        xmlFreeParserInputBuffer(input);
15229	xmlFreeParserCtxt(ctxt);
15230        return (NULL);
15231    }
15232    inputPush(ctxt, stream);
15233    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15234}
15235
15236/**
15237 * xmlCtxtReadDoc:
15238 * @ctxt:  an XML parser context
15239 * @cur:  a pointer to a zero terminated string
15240 * @URL:  the base URL to use for the document
15241 * @encoding:  the document encoding, or NULL
15242 * @options:  a combination of xmlParserOption
15243 *
15244 * parse an XML in-memory document and build a tree.
15245 * This reuses the existing @ctxt parser context
15246 *
15247 * Returns the resulting document tree
15248 */
15249xmlDocPtr
15250xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15251               const char *URL, const char *encoding, int options)
15252{
15253    xmlParserInputPtr stream;
15254
15255    if (cur == NULL)
15256        return (NULL);
15257    if (ctxt == NULL)
15258        return (NULL);
15259
15260    xmlCtxtReset(ctxt);
15261
15262    stream = xmlNewStringInputStream(ctxt, cur);
15263    if (stream == NULL) {
15264        return (NULL);
15265    }
15266    inputPush(ctxt, stream);
15267    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15268}
15269
15270/**
15271 * xmlCtxtReadFile:
15272 * @ctxt:  an XML parser context
15273 * @filename:  a file or URL
15274 * @encoding:  the document encoding, or NULL
15275 * @options:  a combination of xmlParserOption
15276 *
15277 * parse an XML file from the filesystem or the network.
15278 * This reuses the existing @ctxt parser context
15279 *
15280 * Returns the resulting document tree
15281 */
15282xmlDocPtr
15283xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15284                const char *encoding, int options)
15285{
15286    xmlParserInputPtr stream;
15287
15288    if (filename == NULL)
15289        return (NULL);
15290    if (ctxt == NULL)
15291        return (NULL);
15292
15293    xmlCtxtReset(ctxt);
15294
15295    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15296    if (stream == NULL) {
15297        return (NULL);
15298    }
15299    inputPush(ctxt, stream);
15300    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15301}
15302
15303/**
15304 * xmlCtxtReadMemory:
15305 * @ctxt:  an XML parser context
15306 * @buffer:  a pointer to a char array
15307 * @size:  the size of the array
15308 * @URL:  the base URL to use for the document
15309 * @encoding:  the document encoding, or NULL
15310 * @options:  a combination of xmlParserOption
15311 *
15312 * parse an XML in-memory document and build a tree.
15313 * This reuses the existing @ctxt parser context
15314 *
15315 * Returns the resulting document tree
15316 */
15317xmlDocPtr
15318xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15319                  const char *URL, const char *encoding, int options)
15320{
15321    xmlParserInputBufferPtr input;
15322    xmlParserInputPtr stream;
15323
15324    if (ctxt == NULL)
15325        return (NULL);
15326    if (buffer == NULL)
15327        return (NULL);
15328
15329    xmlCtxtReset(ctxt);
15330
15331    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15332    if (input == NULL) {
15333	return(NULL);
15334    }
15335
15336    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15337    if (stream == NULL) {
15338	xmlFreeParserInputBuffer(input);
15339	return(NULL);
15340    }
15341
15342    inputPush(ctxt, stream);
15343    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15344}
15345
15346/**
15347 * xmlCtxtReadFd:
15348 * @ctxt:  an XML parser context
15349 * @fd:  an open file descriptor
15350 * @URL:  the base URL to use for the document
15351 * @encoding:  the document encoding, or NULL
15352 * @options:  a combination of xmlParserOption
15353 *
15354 * parse an XML from a file descriptor and build a tree.
15355 * This reuses the existing @ctxt parser context
15356 * NOTE that the file descriptor will not be closed when the
15357 *      reader is closed or reset.
15358 *
15359 * Returns the resulting document tree
15360 */
15361xmlDocPtr
15362xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15363              const char *URL, const char *encoding, int options)
15364{
15365    xmlParserInputBufferPtr input;
15366    xmlParserInputPtr stream;
15367
15368    if (fd < 0)
15369        return (NULL);
15370    if (ctxt == NULL)
15371        return (NULL);
15372
15373    xmlCtxtReset(ctxt);
15374
15375
15376    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15377    if (input == NULL)
15378        return (NULL);
15379    input->closecallback = NULL;
15380    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15381    if (stream == NULL) {
15382        xmlFreeParserInputBuffer(input);
15383        return (NULL);
15384    }
15385    inputPush(ctxt, stream);
15386    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15387}
15388
15389/**
15390 * xmlCtxtReadIO:
15391 * @ctxt:  an XML parser context
15392 * @ioread:  an I/O read function
15393 * @ioclose:  an I/O close function
15394 * @ioctx:  an I/O handler
15395 * @URL:  the base URL to use for the document
15396 * @encoding:  the document encoding, or NULL
15397 * @options:  a combination of xmlParserOption
15398 *
15399 * parse an XML document from I/O functions and source and build a tree.
15400 * This reuses the existing @ctxt parser context
15401 *
15402 * Returns the resulting document tree
15403 */
15404xmlDocPtr
15405xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15406              xmlInputCloseCallback ioclose, void *ioctx,
15407	      const char *URL,
15408              const char *encoding, int options)
15409{
15410    xmlParserInputBufferPtr input;
15411    xmlParserInputPtr stream;
15412
15413    if (ioread == NULL)
15414        return (NULL);
15415    if (ctxt == NULL)
15416        return (NULL);
15417
15418    xmlCtxtReset(ctxt);
15419
15420    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15421                                         XML_CHAR_ENCODING_NONE);
15422    if (input == NULL) {
15423        if (ioclose != NULL)
15424            ioclose(ioctx);
15425        return (NULL);
15426    }
15427    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15428    if (stream == NULL) {
15429        xmlFreeParserInputBuffer(input);
15430        return (NULL);
15431    }
15432    inputPush(ctxt, stream);
15433    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15434}
15435
15436#define bottom_parser
15437#include "elfgcchack.h"
15438