1/*
2 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 *                     XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * daniel@veillard.com
8 */
9
10#define IN_LIBXML
11#include "libxml.h"
12
13#if defined(WIN32) && !defined (__CYGWIN__)
14#define XML_DIR_SEP '\\'
15#else
16#define XML_DIR_SEP '/'
17#endif
18
19#include <string.h>
20#ifdef HAVE_CTYPE_H
21#include <ctype.h>
22#endif
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
35#ifdef HAVE_ZLIB_H
36#include <zlib.h>
37#endif
38
39#include <libxml/xmlmemory.h>
40#include <libxml/tree.h>
41#include <libxml/parser.h>
42#include <libxml/parserInternals.h>
43#include <libxml/valid.h>
44#include <libxml/entities.h>
45#include <libxml/xmlerror.h>
46#include <libxml/encoding.h>
47#include <libxml/valid.h>
48#include <libxml/xmlIO.h>
49#include <libxml/uri.h>
50#include <libxml/dict.h>
51#include <libxml/SAX.h>
52#ifdef LIBXML_CATALOG_ENABLED
53#include <libxml/catalog.h>
54#endif
55#include <libxml/globals.h>
56#include <libxml/chvalid.h>
57
58/*
59 * Various global defaults for parsing
60 */
61
62/**
63 * xmlCheckVersion:
64 * @version: the include version number
65 *
66 * check the compiled lib version against the include one.
67 * This can warn or immediately kill the application
68 */
69void
70xmlCheckVersion(int version) {
71    int myversion = (int) LIBXML_VERSION;
72
73    xmlInitParser();
74
75    if ((myversion / 10000) != (version / 10000)) {
76	xmlGenericError(xmlGenericErrorContext,
77		"Fatal: program compiled against libxml %d using libxml %d\n",
78		(version / 10000), (myversion / 10000));
79	fprintf(stderr,
80		"Fatal: program compiled against libxml %d using libxml %d\n",
81		(version / 10000), (myversion / 10000));
82    }
83    if ((myversion / 100) < (version / 100)) {
84	xmlGenericError(xmlGenericErrorContext,
85		"Warning: program compiled against libxml %d using older %d\n",
86		(version / 100), (myversion / 100));
87    }
88}
89
90
91/************************************************************************
92 *									*
93 * 		Some factorized error routines				*
94 *									*
95 ************************************************************************/
96
97
98/**
99 * xmlErrMemory:
100 * @ctxt:  an XML parser context
101 * @extra:  extra informations
102 *
103 * Handle a redefinition of attribute error
104 */
105void
106xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
107{
108    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
109        (ctxt->instate == XML_PARSER_EOF))
110	return;
111    if (ctxt != NULL) {
112        ctxt->errNo = XML_ERR_NO_MEMORY;
113        ctxt->instate = XML_PARSER_EOF;
114        ctxt->disableSAX = 1;
115    }
116    if (extra)
117        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
118                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
119                        NULL, NULL, 0, 0,
120                        "Memory allocation failed : %s\n", extra);
121    else
122        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
123                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
124                        NULL, NULL, 0, 0, "Memory allocation failed\n");
125}
126
127/**
128 * __xmlErrEncoding:
129 * @ctxt:  an XML parser context
130 * @xmlerr:  the error number
131 * @msg:  the error message
132 * @str1:  an string info
133 * @str2:  an string info
134 *
135 * Handle an encoding error
136 */
137void
138__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
139                 const char *msg, const xmlChar * str1, const xmlChar * str2)
140{
141    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
142        (ctxt->instate == XML_PARSER_EOF))
143	return;
144    if (ctxt != NULL)
145        ctxt->errNo = xmlerr;
146    __xmlRaiseError(NULL, NULL, NULL,
147                    ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
148                    NULL, 0, (const char *) str1, (const char *) str2,
149                    NULL, 0, 0, msg, str1, str2);
150    if (ctxt != NULL) {
151        ctxt->wellFormed = 0;
152        if (ctxt->recovery == 0)
153            ctxt->disableSAX = 1;
154    }
155}
156
157/**
158 * xmlErrInternal:
159 * @ctxt:  an XML parser context
160 * @msg:  the error message
161 * @str:  error informations
162 *
163 * Handle an internal error
164 */
165static void
166xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
167{
168    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
169        (ctxt->instate == XML_PARSER_EOF))
170	return;
171    if (ctxt != NULL)
172        ctxt->errNo = XML_ERR_INTERNAL_ERROR;
173    __xmlRaiseError(NULL, NULL, NULL,
174                    ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
175                    XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
176                    0, 0, msg, str);
177    if (ctxt != NULL) {
178        ctxt->wellFormed = 0;
179        if (ctxt->recovery == 0)
180            ctxt->disableSAX = 1;
181    }
182}
183
184/**
185 * xmlErrEncodingInt:
186 * @ctxt:  an XML parser context
187 * @error:  the error number
188 * @msg:  the error message
189 * @val:  an integer value
190 *
191 * n encoding error
192 */
193static void
194xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
195                  const char *msg, int val)
196{
197    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
198        (ctxt->instate == XML_PARSER_EOF))
199	return;
200    if (ctxt != NULL)
201        ctxt->errNo = error;
202    __xmlRaiseError(NULL, NULL, NULL,
203                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
204                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
205    if (ctxt != NULL) {
206        ctxt->wellFormed = 0;
207        if (ctxt->recovery == 0)
208            ctxt->disableSAX = 1;
209    }
210}
211
212/**
213 * xmlIsLetter:
214 * @c:  an unicode character (int)
215 *
216 * Check whether the character is allowed by the production
217 * [84] Letter ::= BaseChar | Ideographic
218 *
219 * Returns 0 if not, non-zero otherwise
220 */
221int
222xmlIsLetter(int c) {
223    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
224}
225
226/************************************************************************
227 *									*
228 * 		Input handling functions for progressive parsing	*
229 *									*
230 ************************************************************************/
231
232/* #define DEBUG_INPUT */
233/* #define DEBUG_STACK */
234/* #define DEBUG_PUSH */
235
236
237/* we need to keep enough input to show errors in context */
238#define LINE_LEN        80
239
240#ifdef DEBUG_INPUT
241#define CHECK_BUFFER(in) check_buffer(in)
242
243static
244void check_buffer(xmlParserInputPtr in) {
245    if (in->base != in->buf->buffer->content) {
246        xmlGenericError(xmlGenericErrorContext,
247		"xmlParserInput: base mismatch problem\n");
248    }
249    if (in->cur < in->base) {
250        xmlGenericError(xmlGenericErrorContext,
251		"xmlParserInput: cur < base problem\n");
252    }
253    if (in->cur > in->base + in->buf->buffer->use) {
254        xmlGenericError(xmlGenericErrorContext,
255		"xmlParserInput: cur > base + use problem\n");
256    }
257    xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
258            (int) in, (int) in->buf->buffer->content, in->cur - in->base,
259	    in->buf->buffer->use, in->buf->buffer->size);
260}
261
262#else
263#define CHECK_BUFFER(in)
264#endif
265
266
267/**
268 * xmlParserInputRead:
269 * @in:  an XML parser input
270 * @len:  an indicative size for the lookahead
271 *
272 * This function refresh the input for the parser. It doesn't try to
273 * preserve pointers to the input buffer, and discard already read data
274 *
275 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
276 * end of this entity
277 */
278int
279xmlParserInputRead(xmlParserInputPtr in, int len) {
280    int ret;
281    int used;
282    int indx;
283
284    if (in == NULL) return(-1);
285#ifdef DEBUG_INPUT
286    xmlGenericError(xmlGenericErrorContext, "Read\n");
287#endif
288    if (in->buf == NULL) return(-1);
289    if (in->base == NULL) return(-1);
290    if (in->cur == NULL) return(-1);
291    if (in->buf->buffer == NULL) return(-1);
292    if (in->buf->readcallback == NULL) return(-1);
293
294    CHECK_BUFFER(in);
295
296    used = in->cur - in->buf->buffer->content;
297    ret = xmlBufferShrink(in->buf->buffer, used);
298    if (ret > 0) {
299	in->cur -= ret;
300	in->consumed += ret;
301    }
302    ret = xmlParserInputBufferRead(in->buf, len);
303    if (in->base != in->buf->buffer->content) {
304        /*
305	 * the buffer has been reallocated
306	 */
307	indx = in->cur - in->base;
308	in->base = in->buf->buffer->content;
309	in->cur = &in->buf->buffer->content[indx];
310    }
311    in->end = &in->buf->buffer->content[in->buf->buffer->use];
312
313    CHECK_BUFFER(in);
314
315    return(ret);
316}
317
318/**
319 * xmlParserInputGrow:
320 * @in:  an XML parser input
321 * @len:  an indicative size for the lookahead
322 *
323 * This function increase the input for the parser. It tries to
324 * preserve pointers to the input buffer, and keep already read data
325 *
326 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
327 * end of this entity
328 */
329int
330xmlParserInputGrow(xmlParserInputPtr in, int len) {
331    int ret;
332    int indx;
333
334    if (in == NULL) return(-1);
335#ifdef DEBUG_INPUT
336    xmlGenericError(xmlGenericErrorContext, "Grow\n");
337#endif
338    if (in->buf == NULL) return(-1);
339    if (in->base == NULL) return(-1);
340    if (in->cur == NULL) return(-1);
341    if (in->buf->buffer == NULL) return(-1);
342
343    CHECK_BUFFER(in);
344
345    indx = in->cur - in->base;
346    if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
347
348	CHECK_BUFFER(in);
349
350        return(0);
351    }
352    if (in->buf->readcallback != NULL)
353	ret = xmlParserInputBufferGrow(in->buf, len);
354    else
355        return(0);
356
357    /*
358     * NOTE : in->base may be a "dangling" i.e. freed pointer in this
359     *        block, but we use it really as an integer to do some
360     *        pointer arithmetic. Insure will raise it as a bug but in
361     *        that specific case, that's not !
362     */
363    if (in->base != in->buf->buffer->content) {
364        /*
365	 * the buffer has been reallocated
366	 */
367	indx = in->cur - in->base;
368	in->base = in->buf->buffer->content;
369	in->cur = &in->buf->buffer->content[indx];
370    }
371    in->end = &in->buf->buffer->content[in->buf->buffer->use];
372
373    CHECK_BUFFER(in);
374
375    return(ret);
376}
377
378/**
379 * xmlParserInputShrink:
380 * @in:  an XML parser input
381 *
382 * This function removes used input for the parser.
383 */
384void
385xmlParserInputShrink(xmlParserInputPtr in) {
386    int used;
387    int ret;
388    int indx;
389
390#ifdef DEBUG_INPUT
391    xmlGenericError(xmlGenericErrorContext, "Shrink\n");
392#endif
393    if (in == NULL) return;
394    if (in->buf == NULL) return;
395    if (in->base == NULL) return;
396    if (in->cur == NULL) return;
397    if (in->buf->buffer == NULL) return;
398
399    CHECK_BUFFER(in);
400
401    used = in->cur - in->buf->buffer->content;
402    /*
403     * Do not shrink on large buffers whose only a tiny fraction
404     * was consumed
405     */
406    if (used > INPUT_CHUNK) {
407	ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
408	if (ret > 0) {
409	    in->cur -= ret;
410	    in->consumed += ret;
411	}
412	in->end = &in->buf->buffer->content[in->buf->buffer->use];
413    }
414
415    CHECK_BUFFER(in);
416
417    if (in->buf->buffer->use > INPUT_CHUNK) {
418        return;
419    }
420    xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
421    if (in->base != in->buf->buffer->content) {
422        /*
423	 * the buffer has been reallocated
424	 */
425	indx = in->cur - in->base;
426	in->base = in->buf->buffer->content;
427	in->cur = &in->buf->buffer->content[indx];
428    }
429    in->end = &in->buf->buffer->content[in->buf->buffer->use];
430
431    CHECK_BUFFER(in);
432}
433
434/************************************************************************
435 *									*
436 * 		UTF8 character input and related functions		*
437 *									*
438 ************************************************************************/
439
440/**
441 * xmlNextChar:
442 * @ctxt:  the XML parser context
443 *
444 * Skip to the next char input char.
445 */
446
447void
448xmlNextChar(xmlParserCtxtPtr ctxt)
449{
450    if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
451        (ctxt->input == NULL))
452        return;
453
454    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
455        if ((*ctxt->input->cur == 0) &&
456            (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
457            (ctxt->instate != XML_PARSER_COMMENT)) {
458            /*
459             * If we are at the end of the current entity and
460             * the context allows it, we pop consumed entities
461             * automatically.
462             * the auto closing should be blocked in other cases
463             */
464            xmlPopInput(ctxt);
465        } else {
466            const unsigned char *cur;
467            unsigned char c;
468
469            /*
470             *   2.11 End-of-Line Handling
471             *   the literal two-character sequence "#xD#xA" or a standalone
472             *   literal #xD, an XML processor must pass to the application
473             *   the single character #xA.
474             */
475            if (*(ctxt->input->cur) == '\n') {
476                ctxt->input->line++; ctxt->input->col = 1;
477            } else
478                ctxt->input->col++;
479
480            /*
481             * We are supposed to handle UTF8, check it's valid
482             * From rfc2044: encoding of the Unicode values on UTF-8:
483             *
484             * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
485             * 0000 0000-0000 007F   0xxxxxxx
486             * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
487             * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
488             *
489             * Check for the 0x110000 limit too
490             */
491            cur = ctxt->input->cur;
492
493            c = *cur;
494            if (c & 0x80) {
495	        if (c == 0xC0)
496		    goto encoding_error;
497                if (cur[1] == 0)
498                    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
499                if ((cur[1] & 0xc0) != 0x80)
500                    goto encoding_error;
501                if ((c & 0xe0) == 0xe0) {
502                    unsigned int val;
503
504                    if (cur[2] == 0)
505                        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
506                    if ((cur[2] & 0xc0) != 0x80)
507                        goto encoding_error;
508                    if ((c & 0xf0) == 0xf0) {
509                        if (cur[3] == 0)
510                            xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
511                        if (((c & 0xf8) != 0xf0) ||
512                            ((cur[3] & 0xc0) != 0x80))
513                            goto encoding_error;
514                        /* 4-byte code */
515                        ctxt->input->cur += 4;
516                        val = (cur[0] & 0x7) << 18;
517                        val |= (cur[1] & 0x3f) << 12;
518                        val |= (cur[2] & 0x3f) << 6;
519                        val |= cur[3] & 0x3f;
520                    } else {
521                        /* 3-byte code */
522                        ctxt->input->cur += 3;
523                        val = (cur[0] & 0xf) << 12;
524                        val |= (cur[1] & 0x3f) << 6;
525                        val |= cur[2] & 0x3f;
526                    }
527                    if (((val > 0xd7ff) && (val < 0xe000)) ||
528                        ((val > 0xfffd) && (val < 0x10000)) ||
529                        (val >= 0x110000)) {
530			xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
531					  "Char 0x%X out of allowed range\n",
532					  val);
533                    }
534                } else
535                    /* 2-byte code */
536                    ctxt->input->cur += 2;
537            } else
538                /* 1-byte code */
539                ctxt->input->cur++;
540
541            ctxt->nbChars++;
542            if (*ctxt->input->cur == 0)
543                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
544        }
545    } else {
546        /*
547         * Assume it's a fixed length encoding (1) with
548         * a compatible encoding for the ASCII set, since
549         * XML constructs only use < 128 chars
550         */
551
552        if (*(ctxt->input->cur) == '\n') {
553            ctxt->input->line++; ctxt->input->col = 1;
554        } else
555            ctxt->input->col++;
556        ctxt->input->cur++;
557        ctxt->nbChars++;
558        if (*ctxt->input->cur == 0)
559            xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
560    }
561    if ((*ctxt->input->cur == '%') && (!ctxt->html))
562        xmlParserHandlePEReference(ctxt);
563    if ((*ctxt->input->cur == 0) &&
564        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
565        xmlPopInput(ctxt);
566    return;
567encoding_error:
568    /*
569     * If we detect an UTF8 error that probably mean that the
570     * input encoding didn't get properly advertised in the
571     * declaration header. Report the error and switch the encoding
572     * to ISO-Latin-1 (if you don't like this policy, just declare the
573     * encoding !)
574     */
575    if ((ctxt == NULL) || (ctxt->input == NULL) ||
576        (ctxt->input->end - ctxt->input->cur < 4)) {
577	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
578		     "Input is not proper UTF-8, indicate encoding !\n",
579		     NULL, NULL);
580    } else {
581        char buffer[150];
582
583	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
584			ctxt->input->cur[0], ctxt->input->cur[1],
585			ctxt->input->cur[2], ctxt->input->cur[3]);
586	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
587		     "Input is not proper UTF-8, indicate encoding !\n%s",
588		     BAD_CAST buffer, NULL);
589    }
590    ctxt->charset = XML_CHAR_ENCODING_8859_1;
591    ctxt->input->cur++;
592    return;
593}
594
595/**
596 * xmlCurrentChar:
597 * @ctxt:  the XML parser context
598 * @len:  pointer to the length of the char read
599 *
600 * The current char value, if using UTF-8 this may actually span multiple
601 * bytes in the input buffer. Implement the end of line normalization:
602 * 2.11 End-of-Line Handling
603 * Wherever an external parsed entity or the literal entity value
604 * of an internal parsed entity contains either the literal two-character
605 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
606 * must pass to the application the single character #xA.
607 * This behavior can conveniently be produced by normalizing all
608 * line breaks to #xA on input, before parsing.)
609 *
610 * Returns the current char value and its length
611 */
612
613int
614xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
615    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
616    if (ctxt->instate == XML_PARSER_EOF)
617	return(0);
618
619    if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
620	    *len = 1;
621	    return((int) *ctxt->input->cur);
622    }
623    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
624	/*
625	 * We are supposed to handle UTF8, check it's valid
626	 * From rfc2044: encoding of the Unicode values on UTF-8:
627	 *
628	 * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
629	 * 0000 0000-0000 007F   0xxxxxxx
630	 * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
631	 * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
632	 *
633	 * Check for the 0x110000 limit too
634	 */
635	const unsigned char *cur = ctxt->input->cur;
636	unsigned char c;
637	unsigned int val;
638
639	c = *cur;
640	if (c & 0x80) {
641	    if (((c & 0x40) == 0) || (c == 0xC0))
642		goto encoding_error;
643	    if (cur[1] == 0)
644		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
645	    if ((cur[1] & 0xc0) != 0x80)
646		goto encoding_error;
647	    if ((c & 0xe0) == 0xe0) {
648		if (cur[2] == 0)
649		    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
650		if ((cur[2] & 0xc0) != 0x80)
651		    goto encoding_error;
652		if ((c & 0xf0) == 0xf0) {
653		    if (cur[3] == 0)
654			xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
655		    if (((c & 0xf8) != 0xf0) ||
656			((cur[3] & 0xc0) != 0x80))
657			goto encoding_error;
658		    /* 4-byte code */
659		    *len = 4;
660		    val = (cur[0] & 0x7) << 18;
661		    val |= (cur[1] & 0x3f) << 12;
662		    val |= (cur[2] & 0x3f) << 6;
663		    val |= cur[3] & 0x3f;
664		    if (val < 0x10000)
665			goto encoding_error;
666		} else {
667		  /* 3-byte code */
668		    *len = 3;
669		    val = (cur[0] & 0xf) << 12;
670		    val |= (cur[1] & 0x3f) << 6;
671		    val |= cur[2] & 0x3f;
672		    if (val < 0x800)
673			goto encoding_error;
674		}
675	    } else {
676	      /* 2-byte code */
677		*len = 2;
678		val = (cur[0] & 0x1f) << 6;
679		val |= cur[1] & 0x3f;
680		if (val < 0x80)
681		    goto encoding_error;
682	    }
683	    if (!IS_CHAR(val)) {
684	        xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
685				  "Char 0x%X out of allowed range\n", val);
686	    }
687	    return(val);
688	} else {
689	    /* 1-byte code */
690	    *len = 1;
691	    if (*ctxt->input->cur == 0)
692		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
693	    if ((*ctxt->input->cur == 0) &&
694	        (ctxt->input->end > ctxt->input->cur)) {
695	        xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
696				  "Char 0x0 out of allowed range\n", 0);
697	    }
698	    if (*ctxt->input->cur == 0xD) {
699		if (ctxt->input->cur[1] == 0xA) {
700		    ctxt->nbChars++;
701		    ctxt->input->cur++;
702		}
703		return(0xA);
704	    }
705	    return((int) *ctxt->input->cur);
706	}
707    }
708    /*
709     * Assume it's a fixed length encoding (1) with
710     * a compatible encoding for the ASCII set, since
711     * XML constructs only use < 128 chars
712     */
713    *len = 1;
714    if (*ctxt->input->cur == 0xD) {
715	if (ctxt->input->cur[1] == 0xA) {
716	    ctxt->nbChars++;
717	    ctxt->input->cur++;
718	}
719	return(0xA);
720    }
721    return((int) *ctxt->input->cur);
722encoding_error:
723    /*
724     * An encoding problem may arise from a truncated input buffer
725     * splitting a character in the middle. In that case do not raise
726     * an error but return 0 to endicate an end of stream problem
727     */
728    if (ctxt->input->end - ctxt->input->cur < 4) {
729	*len = 0;
730	return(0);
731    }
732
733    /*
734     * If we detect an UTF8 error that probably mean that the
735     * input encoding didn't get properly advertised in the
736     * declaration header. Report the error and switch the encoding
737     * to ISO-Latin-1 (if you don't like this policy, just declare the
738     * encoding !)
739     */
740    {
741        char buffer[150];
742
743	snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
744			ctxt->input->cur[0], ctxt->input->cur[1],
745			ctxt->input->cur[2], ctxt->input->cur[3]);
746	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
747		     "Input is not proper UTF-8, indicate encoding !\n%s",
748		     BAD_CAST buffer, NULL);
749    }
750    ctxt->charset = XML_CHAR_ENCODING_8859_1;
751    *len = 1;
752    return((int) *ctxt->input->cur);
753}
754
755/**
756 * xmlStringCurrentChar:
757 * @ctxt:  the XML parser context
758 * @cur:  pointer to the beginning of the char
759 * @len:  pointer to the length of the char read
760 *
761 * The current char value, if using UTF-8 this may actually span multiple
762 * bytes in the input buffer.
763 *
764 * Returns the current char value and its length
765 */
766
767int
768xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
769{
770    if ((len == NULL) || (cur == NULL)) return(0);
771    if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
772        /*
773         * We are supposed to handle UTF8, check it's valid
774         * From rfc2044: encoding of the Unicode values on UTF-8:
775         *
776         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
777         * 0000 0000-0000 007F   0xxxxxxx
778         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
779         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
780         *
781         * Check for the 0x110000 limit too
782         */
783        unsigned char c;
784        unsigned int val;
785
786        c = *cur;
787        if (c & 0x80) {
788            if ((cur[1] & 0xc0) != 0x80)
789                goto encoding_error;
790            if ((c & 0xe0) == 0xe0) {
791
792                if ((cur[2] & 0xc0) != 0x80)
793                    goto encoding_error;
794                if ((c & 0xf0) == 0xf0) {
795                    if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
796                        goto encoding_error;
797                    /* 4-byte code */
798                    *len = 4;
799                    val = (cur[0] & 0x7) << 18;
800                    val |= (cur[1] & 0x3f) << 12;
801                    val |= (cur[2] & 0x3f) << 6;
802                    val |= cur[3] & 0x3f;
803                } else {
804                    /* 3-byte code */
805                    *len = 3;
806                    val = (cur[0] & 0xf) << 12;
807                    val |= (cur[1] & 0x3f) << 6;
808                    val |= cur[2] & 0x3f;
809                }
810            } else {
811                /* 2-byte code */
812                *len = 2;
813                val = (cur[0] & 0x1f) << 6;
814                val |= cur[1] & 0x3f;
815            }
816            if (!IS_CHAR(val)) {
817	        xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
818				  "Char 0x%X out of allowed range\n", val);
819            }
820            return (val);
821        } else {
822            /* 1-byte code */
823            *len = 1;
824            return ((int) *cur);
825        }
826    }
827    /*
828     * Assume it's a fixed length encoding (1) with
829     * a compatible encoding for the ASCII set, since
830     * XML constructs only use < 128 chars
831     */
832    *len = 1;
833    return ((int) *cur);
834encoding_error:
835
836    /*
837     * An encoding problem may arise from a truncated input buffer
838     * splitting a character in the middle. In that case do not raise
839     * an error but return 0 to endicate an end of stream problem
840     */
841    if ((ctxt == NULL) || (ctxt->input == NULL) ||
842        (ctxt->input->end - ctxt->input->cur < 4)) {
843	*len = 0;
844	return(0);
845    }
846    /*
847     * If we detect an UTF8 error that probably mean that the
848     * input encoding didn't get properly advertised in the
849     * declaration header. Report the error and switch the encoding
850     * to ISO-Latin-1 (if you don't like this policy, just declare the
851     * encoding !)
852     */
853    {
854        char buffer[150];
855
856	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
857			ctxt->input->cur[0], ctxt->input->cur[1],
858			ctxt->input->cur[2], ctxt->input->cur[3]);
859	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
860		     "Input is not proper UTF-8, indicate encoding !\n%s",
861		     BAD_CAST buffer, NULL);
862    }
863    *len = 1;
864    return ((int) *cur);
865}
866
867/**
868 * xmlCopyCharMultiByte:
869 * @out:  pointer to an array of xmlChar
870 * @val:  the char value
871 *
872 * append the char value in the array
873 *
874 * Returns the number of xmlChar written
875 */
876int
877xmlCopyCharMultiByte(xmlChar *out, int val) {
878    if (out == NULL) return(0);
879    /*
880     * We are supposed to handle UTF8, check it's valid
881     * From rfc2044: encoding of the Unicode values on UTF-8:
882     *
883     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
884     * 0000 0000-0000 007F   0xxxxxxx
885     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
886     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
887     */
888    if  (val >= 0x80) {
889	xmlChar *savedout = out;
890	int bits;
891	if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
892	else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
893	else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
894	else {
895	    xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
896		    "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
897			      val);
898	    return(0);
899	}
900	for ( ; bits >= 0; bits-= 6)
901	    *out++= ((val >> bits) & 0x3F) | 0x80 ;
902	return (out - savedout);
903    }
904    *out = (xmlChar) val;
905    return 1;
906}
907
908/**
909 * xmlCopyChar:
910 * @len:  Ignored, compatibility
911 * @out:  pointer to an array of xmlChar
912 * @val:  the char value
913 *
914 * append the char value in the array
915 *
916 * Returns the number of xmlChar written
917 */
918
919int
920xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
921    if (out == NULL) return(0);
922    /* the len parameter is ignored */
923    if  (val >= 0x80) {
924	return(xmlCopyCharMultiByte (out, val));
925    }
926    *out = (xmlChar) val;
927    return 1;
928}
929
930/************************************************************************
931 *									*
932 *		Commodity functions to switch encodings			*
933 *									*
934 ************************************************************************/
935
936/**
937 * xmlSwitchEncoding:
938 * @ctxt:  the parser context
939 * @enc:  the encoding value (number)
940 *
941 * change the input functions when discovering the character encoding
942 * of a given entity.
943 *
944 * Returns 0 in case of success, -1 otherwise
945 */
946int
947xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
948{
949    xmlCharEncodingHandlerPtr handler;
950
951    if (ctxt == NULL) return(-1);
952    switch (enc) {
953	case XML_CHAR_ENCODING_ERROR:
954	    __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
955	                   "encoding unknown\n", NULL, NULL);
956	    return(-1);
957	case XML_CHAR_ENCODING_NONE:
958	    /* let's assume it's UTF-8 without the XML decl */
959	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
960	    return(0);
961	case XML_CHAR_ENCODING_UTF8:
962	    /* default encoding, no conversion should be needed */
963	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
964
965	    /*
966	     * Errata on XML-1.0 June 20 2001
967	     * Specific handling of the Byte Order Mark for
968	     * UTF-8
969	     */
970	    if ((ctxt->input != NULL) &&
971		(ctxt->input->cur[0] == 0xEF) &&
972		(ctxt->input->cur[1] == 0xBB) &&
973		(ctxt->input->cur[2] == 0xBF)) {
974		ctxt->input->cur += 3;
975	    }
976	    return(0);
977    case XML_CHAR_ENCODING_UTF16LE:
978    case XML_CHAR_ENCODING_UTF16BE:
979        /*The raw input characters are encoded
980         *in UTF-16. As we expect this function
981         *to be called after xmlCharEncInFunc, we expect
982         *ctxt->input->cur to contain UTF-8 encoded characters.
983         *So the raw UTF16 Byte Order Mark
984         *has also been converted into
985         *an UTF-8 BOM. Let's skip that BOM.
986         */
987        if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
988            (ctxt->input->cur[0] == 0xEF) &&
989            (ctxt->input->cur[1] == 0xBB) &&
990            (ctxt->input->cur[2] == 0xBF)) {
991            ctxt->input->cur += 3;
992        }
993	break ;
994	default:
995	    break;
996    }
997    handler = xmlGetCharEncodingHandler(enc);
998    if (handler == NULL) {
999	/*
1000	 * Default handlers.
1001	 */
1002	switch (enc) {
1003	    case XML_CHAR_ENCODING_ASCII:
1004		/* default encoding, no conversion should be needed */
1005		ctxt->charset = XML_CHAR_ENCODING_UTF8;
1006		return(0);
1007	    case XML_CHAR_ENCODING_UTF16LE:
1008		break;
1009	    case XML_CHAR_ENCODING_UTF16BE:
1010		break;
1011	    case XML_CHAR_ENCODING_UCS4LE:
1012		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1013			       "encoding not supported %s\n",
1014			       BAD_CAST "USC4 little endian", NULL);
1015		break;
1016	    case XML_CHAR_ENCODING_UCS4BE:
1017		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1018			       "encoding not supported %s\n",
1019			       BAD_CAST "USC4 big endian", NULL);
1020		break;
1021	    case XML_CHAR_ENCODING_EBCDIC:
1022		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1023			       "encoding not supported %s\n",
1024			       BAD_CAST "EBCDIC", NULL);
1025		break;
1026	    case XML_CHAR_ENCODING_UCS4_2143:
1027		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1028			       "encoding not supported %s\n",
1029			       BAD_CAST "UCS4 2143", NULL);
1030		break;
1031	    case XML_CHAR_ENCODING_UCS4_3412:
1032		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1033			       "encoding not supported %s\n",
1034			       BAD_CAST "UCS4 3412", NULL);
1035		break;
1036	    case XML_CHAR_ENCODING_UCS2:
1037		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1038			       "encoding not supported %s\n",
1039			       BAD_CAST "UCS2", NULL);
1040		break;
1041	    case XML_CHAR_ENCODING_8859_1:
1042	    case XML_CHAR_ENCODING_8859_2:
1043	    case XML_CHAR_ENCODING_8859_3:
1044	    case XML_CHAR_ENCODING_8859_4:
1045	    case XML_CHAR_ENCODING_8859_5:
1046	    case XML_CHAR_ENCODING_8859_6:
1047	    case XML_CHAR_ENCODING_8859_7:
1048	    case XML_CHAR_ENCODING_8859_8:
1049	    case XML_CHAR_ENCODING_8859_9:
1050		/*
1051		 * We used to keep the internal content in the
1052		 * document encoding however this turns being unmaintainable
1053		 * So xmlGetCharEncodingHandler() will return non-null
1054		 * values for this now.
1055		 */
1056		if ((ctxt->inputNr == 1) &&
1057		    (ctxt->encoding == NULL) &&
1058		    (ctxt->input != NULL) &&
1059		    (ctxt->input->encoding != NULL)) {
1060		    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1061		}
1062		ctxt->charset = enc;
1063		return(0);
1064	    case XML_CHAR_ENCODING_2022_JP:
1065		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1066			       "encoding not supported %s\n",
1067			       BAD_CAST "ISO-2022-JP", NULL);
1068		break;
1069	    case XML_CHAR_ENCODING_SHIFT_JIS:
1070		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1071			       "encoding not supported %s\n",
1072			       BAD_CAST "Shift_JIS", NULL);
1073		break;
1074	    case XML_CHAR_ENCODING_EUC_JP:
1075		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1076			       "encoding not supported %s\n",
1077			       BAD_CAST "EUC-JP", NULL);
1078		break;
1079	    default:
1080	        break;
1081	}
1082    }
1083    if (handler == NULL)
1084	return(-1);
1085    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1086    return(xmlSwitchToEncoding(ctxt, handler));
1087}
1088
1089/**
1090 * xmlSwitchInputEncoding:
1091 * @ctxt:  the parser context
1092 * @input:  the input stream
1093 * @handler:  the encoding handler
1094 *
1095 * change the input functions when discovering the character encoding
1096 * of a given entity.
1097 *
1098 * Returns 0 in case of success, -1 otherwise
1099 */
1100int
1101xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1102                       xmlCharEncodingHandlerPtr handler)
1103{
1104    int nbchars;
1105
1106    if (handler == NULL)
1107        return (-1);
1108    if (input == NULL)
1109        return (-1);
1110    if (input->buf != NULL) {
1111        if (input->buf->encoder != NULL) {
1112            /*
1113             * Check in case the auto encoding detetection triggered
1114             * in already.
1115             */
1116            if (input->buf->encoder == handler)
1117                return (0);
1118
1119            /*
1120             * "UTF-16" can be used for both LE and BE
1121             if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
1122             BAD_CAST "UTF-16", 6)) &&
1123             (!xmlStrncmp(BAD_CAST handler->name,
1124             BAD_CAST "UTF-16", 6))) {
1125             return(0);
1126             }
1127             */
1128
1129            /*
1130             * Note: this is a bit dangerous, but that's what it
1131             * takes to use nearly compatible signature for different
1132             * encodings.
1133             */
1134            xmlCharEncCloseFunc(input->buf->encoder);
1135            input->buf->encoder = handler;
1136            return (0);
1137        }
1138        input->buf->encoder = handler;
1139
1140        /*
1141         * Is there already some content down the pipe to convert ?
1142         */
1143        if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) {
1144            int processed;
1145	    unsigned int use;
1146
1147            /*
1148             * Specific handling of the Byte Order Mark for
1149             * UTF-16
1150             */
1151            if ((handler->name != NULL) &&
1152                (!strcmp(handler->name, "UTF-16LE") ||
1153                 !strcmp(handler->name, "UTF-16")) &&
1154                (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1155                input->cur += 2;
1156            }
1157            if ((handler->name != NULL) &&
1158                (!strcmp(handler->name, "UTF-16BE")) &&
1159                (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1160                input->cur += 2;
1161            }
1162            /*
1163             * Errata on XML-1.0 June 20 2001
1164             * Specific handling of the Byte Order Mark for
1165             * UTF-8
1166             */
1167            if ((handler->name != NULL) &&
1168                (!strcmp(handler->name, "UTF-8")) &&
1169                (input->cur[0] == 0xEF) &&
1170                (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1171                input->cur += 3;
1172            }
1173
1174            /*
1175             * Shrink the current input buffer.
1176             * Move it as the raw buffer and create a new input buffer
1177             */
1178            processed = input->cur - input->base;
1179            xmlBufferShrink(input->buf->buffer, processed);
1180            input->buf->raw = input->buf->buffer;
1181            input->buf->buffer = xmlBufferCreate();
1182	    input->buf->rawconsumed = processed;
1183	    use = input->buf->raw->use;
1184
1185            if (ctxt->html) {
1186                /*
1187                 * convert as much as possible of the buffer
1188                 */
1189                nbchars = xmlCharEncInFunc(input->buf->encoder,
1190                                           input->buf->buffer,
1191                                           input->buf->raw);
1192            } else {
1193                /*
1194                 * convert just enough to get
1195                 * '<?xml version="1.0" encoding="xxx"?>'
1196                 * parsed with the autodetected encoding
1197                 * into the parser reading buffer.
1198                 */
1199                nbchars = xmlCharEncFirstLine(input->buf->encoder,
1200                                              input->buf->buffer,
1201                                              input->buf->raw);
1202            }
1203            if (nbchars < 0) {
1204                xmlErrInternal(ctxt,
1205                               "switching encoding: encoder error\n",
1206                               NULL);
1207                return (-1);
1208            }
1209	    input->buf->rawconsumed += use - input->buf->raw->use;
1210            input->base = input->cur = input->buf->buffer->content;
1211            input->end = &input->base[input->buf->buffer->use];
1212
1213        }
1214        return (0);
1215    } else if (input->length == 0) {
1216	/*
1217	 * When parsing a static memory array one must know the
1218	 * size to be able to convert the buffer.
1219	 */
1220	xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
1221	return (-1);
1222    }
1223    return (0);
1224}
1225
1226/**
1227 * xmlSwitchToEncoding:
1228 * @ctxt:  the parser context
1229 * @handler:  the encoding handler
1230 *
1231 * change the input functions when discovering the character encoding
1232 * of a given entity.
1233 *
1234 * Returns 0 in case of success, -1 otherwise
1235 */
1236int
1237xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1238{
1239    int ret = 0;
1240
1241    if (handler != NULL) {
1242        if (ctxt->input != NULL) {
1243	    ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1244	} else {
1245	    xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
1246	                   NULL);
1247	    return(-1);
1248	}
1249	/*
1250	 * The parsing is now done in UTF8 natively
1251	 */
1252	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1253    } else
1254	return(-1);
1255    return(ret);
1256}
1257
1258/************************************************************************
1259 *									*
1260 *	Commodity functions to handle entities processing		*
1261 *									*
1262 ************************************************************************/
1263
1264/**
1265 * xmlFreeInputStream:
1266 * @input:  an xmlParserInputPtr
1267 *
1268 * Free up an input stream.
1269 */
1270void
1271xmlFreeInputStream(xmlParserInputPtr input) {
1272    if (input == NULL) return;
1273
1274    if (input->filename != NULL) xmlFree((char *) input->filename);
1275    if (input->directory != NULL) xmlFree((char *) input->directory);
1276    if (input->encoding != NULL) xmlFree((char *) input->encoding);
1277    if (input->version != NULL) xmlFree((char *) input->version);
1278    if ((input->free != NULL) && (input->base != NULL))
1279        input->free((xmlChar *) input->base);
1280    if (input->buf != NULL)
1281        xmlFreeParserInputBuffer(input->buf);
1282    xmlFree(input);
1283}
1284
1285/**
1286 * xmlNewInputStream:
1287 * @ctxt:  an XML parser context
1288 *
1289 * Create a new input stream structure
1290 * Returns the new input stream or NULL
1291 */
1292xmlParserInputPtr
1293xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1294    xmlParserInputPtr input;
1295    static int id = 0;
1296
1297    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1298    if (input == NULL) {
1299        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1300	return(NULL);
1301    }
1302    memset(input, 0, sizeof(xmlParserInput));
1303    input->line = 1;
1304    input->col = 1;
1305    input->standalone = -1;
1306    /*
1307     * we don't care about thread reentrancy unicity for a single
1308     * parser context (and hence thread) is sufficient.
1309     */
1310    input->id = id++;
1311    return(input);
1312}
1313
1314/**
1315 * xmlNewIOInputStream:
1316 * @ctxt:  an XML parser context
1317 * @input:  an I/O Input
1318 * @enc:  the charset encoding if known
1319 *
1320 * Create a new input stream structure encapsulating the @input into
1321 * a stream suitable for the parser.
1322 *
1323 * Returns the new input stream or NULL
1324 */
1325xmlParserInputPtr
1326xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1327	            xmlCharEncoding enc) {
1328    xmlParserInputPtr inputStream;
1329
1330    if (input == NULL) return(NULL);
1331    if (xmlParserDebugEntities)
1332	xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1333    inputStream = xmlNewInputStream(ctxt);
1334    if (inputStream == NULL) {
1335	return(NULL);
1336    }
1337    inputStream->filename = NULL;
1338    inputStream->buf = input;
1339    inputStream->base = inputStream->buf->buffer->content;
1340    inputStream->cur = inputStream->buf->buffer->content;
1341    inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
1342    if (enc != XML_CHAR_ENCODING_NONE) {
1343        xmlSwitchEncoding(ctxt, enc);
1344    }
1345
1346    return(inputStream);
1347}
1348
1349/**
1350 * xmlNewEntityInputStream:
1351 * @ctxt:  an XML parser context
1352 * @entity:  an Entity pointer
1353 *
1354 * Create a new input stream based on an xmlEntityPtr
1355 *
1356 * Returns the new input stream or NULL
1357 */
1358xmlParserInputPtr
1359xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1360    xmlParserInputPtr input;
1361
1362    if (entity == NULL) {
1363        xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1364	               NULL);
1365	return(NULL);
1366    }
1367    if (xmlParserDebugEntities)
1368	xmlGenericError(xmlGenericErrorContext,
1369		"new input from entity: %s\n", entity->name);
1370    if (entity->content == NULL) {
1371	switch (entity->etype) {
1372            case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1373	        xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1374		               entity->name);
1375                break;
1376            case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1377            case XML_EXTERNAL_PARAMETER_ENTITY:
1378		return(xmlLoadExternalEntity((char *) entity->URI,
1379		       (char *) entity->ExternalID, ctxt));
1380            case XML_INTERNAL_GENERAL_ENTITY:
1381	        xmlErrInternal(ctxt,
1382		      "Internal entity %s without content !\n",
1383		               entity->name);
1384                break;
1385            case XML_INTERNAL_PARAMETER_ENTITY:
1386	        xmlErrInternal(ctxt,
1387		      "Internal parameter entity %s without content !\n",
1388		               entity->name);
1389                break;
1390            case XML_INTERNAL_PREDEFINED_ENTITY:
1391	        xmlErrInternal(ctxt,
1392		      "Predefined entity %s without content !\n",
1393		               entity->name);
1394                break;
1395	}
1396	return(NULL);
1397    }
1398    input = xmlNewInputStream(ctxt);
1399    if (input == NULL) {
1400	return(NULL);
1401    }
1402    if (entity->URI != NULL)
1403	input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1404    input->base = entity->content;
1405    input->cur = entity->content;
1406    input->length = entity->length;
1407    input->end = &entity->content[input->length];
1408    return(input);
1409}
1410
1411/**
1412 * xmlNewStringInputStream:
1413 * @ctxt:  an XML parser context
1414 * @buffer:  an memory buffer
1415 *
1416 * Create a new input stream based on a memory buffer.
1417 * Returns the new input stream
1418 */
1419xmlParserInputPtr
1420xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1421    xmlParserInputPtr input;
1422
1423    if (buffer == NULL) {
1424        xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1425	               NULL);
1426	return(NULL);
1427    }
1428    if (xmlParserDebugEntities)
1429	xmlGenericError(xmlGenericErrorContext,
1430		"new fixed input: %.30s\n", buffer);
1431    input = xmlNewInputStream(ctxt);
1432    if (input == NULL) {
1433        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1434	return(NULL);
1435    }
1436    input->base = buffer;
1437    input->cur = buffer;
1438    input->length = xmlStrlen(buffer);
1439    input->end = &buffer[input->length];
1440    return(input);
1441}
1442
1443/**
1444 * xmlNewInputFromFile:
1445 * @ctxt:  an XML parser context
1446 * @filename:  the filename to use as entity
1447 *
1448 * Create a new input stream based on a file or an URL.
1449 *
1450 * Returns the new input stream or NULL in case of error
1451 */
1452xmlParserInputPtr
1453xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1454    xmlParserInputBufferPtr buf;
1455    xmlParserInputPtr inputStream;
1456    char *directory = NULL;
1457    xmlChar *URI = NULL;
1458
1459    if (xmlParserDebugEntities)
1460	xmlGenericError(xmlGenericErrorContext,
1461		"new input from file: %s\n", filename);
1462    if (ctxt == NULL) return(NULL);
1463    buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1464    if (buf == NULL) {
1465	if (filename == NULL)
1466	    __xmlLoaderErr(ctxt,
1467	                   "failed to load external entity: NULL filename \n",
1468			   NULL);
1469	else
1470	    __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1471			   (const char *) filename);
1472	return(NULL);
1473    }
1474
1475    inputStream = xmlNewInputStream(ctxt);
1476    if (inputStream == NULL)
1477	return(NULL);
1478
1479    inputStream->buf = buf;
1480    inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1481    if (inputStream == NULL)
1482        return(NULL);
1483
1484    if (inputStream->filename == NULL)
1485	URI = xmlStrdup((xmlChar *) filename);
1486    else
1487	URI = xmlStrdup((xmlChar *) inputStream->filename);
1488    directory = xmlParserGetDirectory((const char *) URI);
1489    if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1490    inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1491    if (URI != NULL) xmlFree((char *) URI);
1492    inputStream->directory = directory;
1493
1494    inputStream->base = inputStream->buf->buffer->content;
1495    inputStream->cur = inputStream->buf->buffer->content;
1496    inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
1497    if ((ctxt->directory == NULL) && (directory != NULL))
1498        ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1499    return(inputStream);
1500}
1501
1502/************************************************************************
1503 *									*
1504 *		Commodity functions to handle parser contexts		*
1505 *									*
1506 ************************************************************************/
1507
1508/**
1509 * xmlInitParserCtxt:
1510 * @ctxt:  an XML parser context
1511 *
1512 * Initialize a parser context
1513 *
1514 * Returns 0 in case of success and -1 in case of error
1515 */
1516
1517int
1518xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1519{
1520    xmlParserInputPtr input;
1521
1522    if(ctxt==NULL) {
1523        xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1524        return(-1);
1525    }
1526
1527    xmlDefaultSAXHandlerInit();
1528
1529    if (ctxt->dict == NULL)
1530	ctxt->dict = xmlDictCreate();
1531    if (ctxt->dict == NULL) {
1532        xmlErrMemory(NULL, "cannot initialize parser context\n");
1533	return(-1);
1534    }
1535    if (ctxt->sax == NULL)
1536	ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1537    if (ctxt->sax == NULL) {
1538        xmlErrMemory(NULL, "cannot initialize parser context\n");
1539	return(-1);
1540    }
1541    else
1542        xmlSAXVersion(ctxt->sax, 2);
1543
1544    ctxt->maxatts = 0;
1545    ctxt->atts = NULL;
1546    /* Allocate the Input stack */
1547    if (ctxt->inputTab == NULL) {
1548	ctxt->inputTab = (xmlParserInputPtr *)
1549		    xmlMalloc(5 * sizeof(xmlParserInputPtr));
1550	ctxt->inputMax = 5;
1551    }
1552    if (ctxt->inputTab == NULL) {
1553        xmlErrMemory(NULL, "cannot initialize parser context\n");
1554	ctxt->inputNr = 0;
1555	ctxt->inputMax = 0;
1556	ctxt->input = NULL;
1557	return(-1);
1558    }
1559    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1560        xmlFreeInputStream(input);
1561    }
1562    ctxt->inputNr = 0;
1563    ctxt->input = NULL;
1564
1565    ctxt->version = NULL;
1566    ctxt->encoding = NULL;
1567    ctxt->standalone = -1;
1568    ctxt->hasExternalSubset = 0;
1569    ctxt->hasPErefs = 0;
1570    ctxt->html = 0;
1571    ctxt->external = 0;
1572    ctxt->instate = XML_PARSER_START;
1573    ctxt->token = 0;
1574    ctxt->directory = NULL;
1575
1576    /* Allocate the Node stack */
1577    if (ctxt->nodeTab == NULL) {
1578	ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1579	ctxt->nodeMax = 10;
1580    }
1581    if (ctxt->nodeTab == NULL) {
1582        xmlErrMemory(NULL, "cannot initialize parser context\n");
1583	ctxt->nodeNr = 0;
1584	ctxt->nodeMax = 0;
1585	ctxt->node = NULL;
1586	ctxt->inputNr = 0;
1587	ctxt->inputMax = 0;
1588	ctxt->input = NULL;
1589	return(-1);
1590    }
1591    ctxt->nodeNr = 0;
1592    ctxt->node = NULL;
1593
1594    /* Allocate the Name stack */
1595    if (ctxt->nameTab == NULL) {
1596	ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1597	ctxt->nameMax = 10;
1598    }
1599    if (ctxt->nameTab == NULL) {
1600        xmlErrMemory(NULL, "cannot initialize parser context\n");
1601	ctxt->nodeNr = 0;
1602	ctxt->nodeMax = 0;
1603	ctxt->node = NULL;
1604	ctxt->inputNr = 0;
1605	ctxt->inputMax = 0;
1606	ctxt->input = NULL;
1607	ctxt->nameNr = 0;
1608	ctxt->nameMax = 0;
1609	ctxt->name = NULL;
1610	return(-1);
1611    }
1612    ctxt->nameNr = 0;
1613    ctxt->name = NULL;
1614
1615    /* Allocate the space stack */
1616    if (ctxt->spaceTab == NULL) {
1617	ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1618	ctxt->spaceMax = 10;
1619    }
1620    if (ctxt->spaceTab == NULL) {
1621        xmlErrMemory(NULL, "cannot initialize parser context\n");
1622	ctxt->nodeNr = 0;
1623	ctxt->nodeMax = 0;
1624	ctxt->node = NULL;
1625	ctxt->inputNr = 0;
1626	ctxt->inputMax = 0;
1627	ctxt->input = NULL;
1628	ctxt->nameNr = 0;
1629	ctxt->nameMax = 0;
1630	ctxt->name = NULL;
1631	ctxt->spaceNr = 0;
1632	ctxt->spaceMax = 0;
1633	ctxt->space = NULL;
1634	return(-1);
1635    }
1636    ctxt->spaceNr = 1;
1637    ctxt->spaceMax = 10;
1638    ctxt->spaceTab[0] = -1;
1639    ctxt->space = &ctxt->spaceTab[0];
1640    ctxt->userData = ctxt;
1641    ctxt->myDoc = NULL;
1642    ctxt->wellFormed = 1;
1643    ctxt->nsWellFormed = 1;
1644    ctxt->valid = 1;
1645    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1646    ctxt->validate = xmlDoValidityCheckingDefaultValue;
1647    ctxt->pedantic = xmlPedanticParserDefaultValue;
1648    ctxt->linenumbers = xmlLineNumbersDefaultValue;
1649    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1650    if (ctxt->keepBlanks == 0)
1651	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1652
1653    ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
1654    ctxt->vctxt.userData = ctxt;
1655    ctxt->vctxt.error = xmlParserValidityError;
1656    ctxt->vctxt.warning = xmlParserValidityWarning;
1657    if (ctxt->validate) {
1658	if (xmlGetWarningsDefaultValue == 0)
1659	    ctxt->vctxt.warning = NULL;
1660	else
1661	    ctxt->vctxt.warning = xmlParserValidityWarning;
1662	ctxt->vctxt.nodeMax = 0;
1663    }
1664    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1665    ctxt->record_info = 0;
1666    ctxt->nbChars = 0;
1667    ctxt->checkIndex = 0;
1668    ctxt->inSubset = 0;
1669    ctxt->errNo = XML_ERR_OK;
1670    ctxt->depth = 0;
1671    ctxt->charset = XML_CHAR_ENCODING_UTF8;
1672    ctxt->catalogs = NULL;
1673    ctxt->nbentities = 0;
1674    xmlInitNodeInfoSeq(&ctxt->node_seq);
1675    return(0);
1676}
1677
1678/**
1679 * xmlFreeParserCtxt:
1680 * @ctxt:  an XML parser context
1681 *
1682 * Free all the memory used by a parser context. However the parsed
1683 * document in ctxt->myDoc is not freed.
1684 */
1685
1686void
1687xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1688{
1689    xmlParserInputPtr input;
1690
1691    if (ctxt == NULL) return;
1692
1693    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1694        xmlFreeInputStream(input);
1695    }
1696    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1697    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1698    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1699    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1700    if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1701    if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1702    if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1703    if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1704#ifdef LIBXML_SAX1_ENABLED
1705    if ((ctxt->sax != NULL) &&
1706        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1707#else
1708    if (ctxt->sax != NULL)
1709#endif /* LIBXML_SAX1_ENABLED */
1710        xmlFree(ctxt->sax);
1711    if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1712    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1713    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1714    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1715    if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1716    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1717    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1718    if (ctxt->attsDefault != NULL)
1719        xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
1720    if (ctxt->attsSpecial != NULL)
1721        xmlHashFree(ctxt->attsSpecial, NULL);
1722    if (ctxt->freeElems != NULL) {
1723        xmlNodePtr cur, next;
1724
1725	cur = ctxt->freeElems;
1726	while (cur != NULL) {
1727	    next = cur->next;
1728	    xmlFree(cur);
1729	    cur = next;
1730	}
1731    }
1732    if (ctxt->freeAttrs != NULL) {
1733        xmlAttrPtr cur, next;
1734
1735	cur = ctxt->freeAttrs;
1736	while (cur != NULL) {
1737	    next = cur->next;
1738	    xmlFree(cur);
1739	    cur = next;
1740	}
1741    }
1742    /*
1743     * cleanup the error strings
1744     */
1745    if (ctxt->lastError.message != NULL)
1746        xmlFree(ctxt->lastError.message);
1747    if (ctxt->lastError.file != NULL)
1748        xmlFree(ctxt->lastError.file);
1749    if (ctxt->lastError.str1 != NULL)
1750        xmlFree(ctxt->lastError.str1);
1751    if (ctxt->lastError.str2 != NULL)
1752        xmlFree(ctxt->lastError.str2);
1753    if (ctxt->lastError.str3 != NULL)
1754        xmlFree(ctxt->lastError.str3);
1755
1756#ifdef LIBXML_CATALOG_ENABLED
1757    if (ctxt->catalogs != NULL)
1758	xmlCatalogFreeLocal(ctxt->catalogs);
1759#endif
1760    xmlFree(ctxt);
1761}
1762
1763/**
1764 * xmlNewParserCtxt:
1765 *
1766 * Allocate and initialize a new parser context.
1767 *
1768 * Returns the xmlParserCtxtPtr or NULL
1769 */
1770
1771xmlParserCtxtPtr
1772xmlNewParserCtxt(void)
1773{
1774    xmlParserCtxtPtr ctxt;
1775
1776    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1777    if (ctxt == NULL) {
1778	xmlErrMemory(NULL, "cannot allocate parser context\n");
1779	return(NULL);
1780    }
1781    memset(ctxt, 0, sizeof(xmlParserCtxt));
1782    if (xmlInitParserCtxt(ctxt) < 0) {
1783        xmlFreeParserCtxt(ctxt);
1784	return(NULL);
1785    }
1786    return(ctxt);
1787}
1788
1789/************************************************************************
1790 *									*
1791 *		Handling of node informations				*
1792 *									*
1793 ************************************************************************/
1794
1795/**
1796 * xmlClearParserCtxt:
1797 * @ctxt:  an XML parser context
1798 *
1799 * Clear (release owned resources) and reinitialize a parser context
1800 */
1801
1802void
1803xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1804{
1805  if (ctxt==NULL)
1806    return;
1807  xmlClearNodeInfoSeq(&ctxt->node_seq);
1808  xmlCtxtReset(ctxt);
1809}
1810
1811
1812/**
1813 * xmlParserFindNodeInfo:
1814 * @ctx:  an XML parser context
1815 * @node:  an XML node within the tree
1816 *
1817 * Find the parser node info struct for a given node
1818 *
1819 * Returns an xmlParserNodeInfo block pointer or NULL
1820 */
1821const xmlParserNodeInfo *
1822xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1823{
1824    unsigned long pos;
1825
1826    if ((ctx == NULL) || (node == NULL))
1827        return (NULL);
1828    /* Find position where node should be at */
1829    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1830    if (pos < ctx->node_seq.length
1831        && ctx->node_seq.buffer[pos].node == node)
1832        return &ctx->node_seq.buffer[pos];
1833    else
1834        return NULL;
1835}
1836
1837
1838/**
1839 * xmlInitNodeInfoSeq:
1840 * @seq:  a node info sequence pointer
1841 *
1842 * -- Initialize (set to initial state) node info sequence
1843 */
1844void
1845xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1846{
1847    if (seq == NULL)
1848        return;
1849    seq->length = 0;
1850    seq->maximum = 0;
1851    seq->buffer = NULL;
1852}
1853
1854/**
1855 * xmlClearNodeInfoSeq:
1856 * @seq:  a node info sequence pointer
1857 *
1858 * -- Clear (release memory and reinitialize) node
1859 *   info sequence
1860 */
1861void
1862xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1863{
1864    if (seq == NULL)
1865        return;
1866    if (seq->buffer != NULL)
1867        xmlFree(seq->buffer);
1868    xmlInitNodeInfoSeq(seq);
1869}
1870
1871/**
1872 * xmlParserFindNodeInfoIndex:
1873 * @seq:  a node info sequence pointer
1874 * @node:  an XML node pointer
1875 *
1876 *
1877 * xmlParserFindNodeInfoIndex : Find the index that the info record for
1878 *   the given node is or should be at in a sorted sequence
1879 *
1880 * Returns a long indicating the position of the record
1881 */
1882unsigned long
1883xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1884                           const xmlNodePtr node)
1885{
1886    unsigned long upper, lower, middle;
1887    int found = 0;
1888
1889    if ((seq == NULL) || (node == NULL))
1890        return ((unsigned long) -1);
1891
1892    /* Do a binary search for the key */
1893    lower = 1;
1894    upper = seq->length;
1895    middle = 0;
1896    while (lower <= upper && !found) {
1897        middle = lower + (upper - lower) / 2;
1898        if (node == seq->buffer[middle - 1].node)
1899            found = 1;
1900        else if (node < seq->buffer[middle - 1].node)
1901            upper = middle - 1;
1902        else
1903            lower = middle + 1;
1904    }
1905
1906    /* Return position */
1907    if (middle == 0 || seq->buffer[middle - 1].node < node)
1908        return middle;
1909    else
1910        return middle - 1;
1911}
1912
1913
1914/**
1915 * xmlParserAddNodeInfo:
1916 * @ctxt:  an XML parser context
1917 * @info:  a node info sequence pointer
1918 *
1919 * Insert node info record into the sorted sequence
1920 */
1921void
1922xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1923                     const xmlParserNodeInfoPtr info)
1924{
1925    unsigned long pos;
1926
1927    if ((ctxt == NULL) || (info == NULL)) return;
1928
1929    /* Find pos and check to see if node is already in the sequence */
1930    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
1931                                     info->node);
1932
1933    if ((pos < ctxt->node_seq.length) &&
1934        (ctxt->node_seq.buffer != NULL) &&
1935        (ctxt->node_seq.buffer[pos].node == info->node)) {
1936        ctxt->node_seq.buffer[pos] = *info;
1937    }
1938
1939    /* Otherwise, we need to add new node to buffer */
1940    else {
1941        if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
1942            xmlParserNodeInfo *tmp_buffer;
1943            unsigned int byte_size;
1944
1945            if (ctxt->node_seq.maximum == 0)
1946                ctxt->node_seq.maximum = 2;
1947            byte_size = (sizeof(*ctxt->node_seq.buffer) *
1948			(2 * ctxt->node_seq.maximum));
1949
1950            if (ctxt->node_seq.buffer == NULL)
1951                tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
1952            else
1953                tmp_buffer =
1954                    (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
1955                                                     byte_size);
1956
1957            if (tmp_buffer == NULL) {
1958		xmlErrMemory(ctxt, "failed to allocate buffer\n");
1959                return;
1960            }
1961            ctxt->node_seq.buffer = tmp_buffer;
1962            ctxt->node_seq.maximum *= 2;
1963        }
1964
1965        /* If position is not at end, move elements out of the way */
1966        if (pos != ctxt->node_seq.length) {
1967            unsigned long i;
1968
1969            for (i = ctxt->node_seq.length; i > pos; i--)
1970                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1971        }
1972
1973        /* Copy element and increase length */
1974        ctxt->node_seq.buffer[pos] = *info;
1975        ctxt->node_seq.length++;
1976    }
1977}
1978
1979/************************************************************************
1980 *									*
1981 *		Defaults settings					*
1982 *									*
1983 ************************************************************************/
1984/**
1985 * xmlPedanticParserDefault:
1986 * @val:  int 0 or 1
1987 *
1988 * Set and return the previous value for enabling pedantic warnings.
1989 *
1990 * Returns the last value for 0 for no substitution, 1 for substitution.
1991 */
1992
1993int
1994xmlPedanticParserDefault(int val) {
1995    int old = xmlPedanticParserDefaultValue;
1996
1997    xmlPedanticParserDefaultValue = val;
1998    return(old);
1999}
2000
2001/**
2002 * xmlLineNumbersDefault:
2003 * @val:  int 0 or 1
2004 *
2005 * Set and return the previous value for enabling line numbers in elements
2006 * contents. This may break on old application and is turned off by default.
2007 *
2008 * Returns the last value for 0 for no substitution, 1 for substitution.
2009 */
2010
2011int
2012xmlLineNumbersDefault(int val) {
2013    int old = xmlLineNumbersDefaultValue;
2014
2015    xmlLineNumbersDefaultValue = val;
2016    return(old);
2017}
2018
2019/**
2020 * xmlSubstituteEntitiesDefault:
2021 * @val:  int 0 or 1
2022 *
2023 * Set and return the previous value for default entity support.
2024 * Initially the parser always keep entity references instead of substituting
2025 * entity values in the output. This function has to be used to change the
2026 * default parser behavior
2027 * SAX::substituteEntities() has to be used for changing that on a file by
2028 * file basis.
2029 *
2030 * Returns the last value for 0 for no substitution, 1 for substitution.
2031 */
2032
2033int
2034xmlSubstituteEntitiesDefault(int val) {
2035    int old = xmlSubstituteEntitiesDefaultValue;
2036
2037    xmlSubstituteEntitiesDefaultValue = val;
2038    return(old);
2039}
2040
2041/**
2042 * xmlKeepBlanksDefault:
2043 * @val:  int 0 or 1
2044 *
2045 * Set and return the previous value for default blanks text nodes support.
2046 * The 1.x version of the parser used an heuristic to try to detect
2047 * ignorable white spaces. As a result the SAX callback was generating
2048 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2049 * using the DOM output text nodes containing those blanks were not generated.
2050 * The 2.x and later version will switch to the XML standard way and
2051 * ignorableWhitespace() are only generated when running the parser in
2052 * validating mode and when the current element doesn't allow CDATA or
2053 * mixed content.
2054 * This function is provided as a way to force the standard behavior
2055 * on 1.X libs and to switch back to the old mode for compatibility when
2056 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2057 * by using xmlIsBlankNode() commodity function to detect the "empty"
2058 * nodes generated.
2059 * This value also affect autogeneration of indentation when saving code
2060 * if blanks sections are kept, indentation is not generated.
2061 *
2062 * Returns the last value for 0 for no substitution, 1 for substitution.
2063 */
2064
2065int
2066xmlKeepBlanksDefault(int val) {
2067    int old = xmlKeepBlanksDefaultValue;
2068
2069    xmlKeepBlanksDefaultValue = val;
2070    xmlIndentTreeOutput = !val;
2071    return(old);
2072}
2073
2074#define bottom_parserInternals
2075#include "elfgcchack.h"
2076