1/* 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the 3 * XML and HTML parsers. 4 * 5 * See Copyright for the status of this software. 6 * 7 * daniel@veillard.com 8 */ 9 10#define IN_LIBXML 11#include "libxml.h" 12 13#if defined(WIN32) && !defined (__CYGWIN__) 14#define XML_DIR_SEP '\\' 15#else 16#define XML_DIR_SEP '/' 17#endif 18 19#include <string.h> 20#ifdef HAVE_CTYPE_H 21#include <ctype.h> 22#endif 23#ifdef HAVE_STDLIB_H 24#include <stdlib.h> 25#endif 26#ifdef HAVE_SYS_STAT_H 27#include <sys/stat.h> 28#endif 29#ifdef HAVE_FCNTL_H 30#include <fcntl.h> 31#endif 32#ifdef HAVE_UNISTD_H 33#include <unistd.h> 34#endif 35#ifdef HAVE_ZLIB_H 36#include <zlib.h> 37#endif 38 39#include <libxml/xmlmemory.h> 40#include <libxml/tree.h> 41#include <libxml/parser.h> 42#include <libxml/parserInternals.h> 43#include <libxml/valid.h> 44#include <libxml/entities.h> 45#include <libxml/xmlerror.h> 46#include <libxml/encoding.h> 47#include <libxml/valid.h> 48#include <libxml/xmlIO.h> 49#include <libxml/uri.h> 50#include <libxml/dict.h> 51#include <libxml/SAX.h> 52#ifdef LIBXML_CATALOG_ENABLED 53#include <libxml/catalog.h> 54#endif 55#include <libxml/globals.h> 56#include <libxml/chvalid.h> 57 58/* 59 * Various global defaults for parsing 60 */ 61 62/** 63 * xmlCheckVersion: 64 * @version: the include version number 65 * 66 * check the compiled lib version against the include one. 67 * This can warn or immediately kill the application 68 */ 69void 70xmlCheckVersion(int version) { 71 int myversion = (int) LIBXML_VERSION; 72 73 xmlInitParser(); 74 75 if ((myversion / 10000) != (version / 10000)) { 76 xmlGenericError(xmlGenericErrorContext, 77 "Fatal: program compiled against libxml %d using libxml %d\n", 78 (version / 10000), (myversion / 10000)); 79 fprintf(stderr, 80 "Fatal: program compiled against libxml %d using libxml %d\n", 81 (version / 10000), (myversion / 10000)); 82 } 83 if ((myversion / 100) < (version / 100)) { 84 xmlGenericError(xmlGenericErrorContext, 85 "Warning: program compiled against libxml %d using older %d\n", 86 (version / 100), (myversion / 100)); 87 } 88} 89 90 91/************************************************************************ 92 * * 93 * Some factorized error routines * 94 * * 95 ************************************************************************/ 96 97 98/** 99 * xmlErrMemory: 100 * @ctxt: an XML parser context 101 * @extra: extra informations 102 * 103 * Handle a redefinition of attribute error 104 */ 105void 106xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 107{ 108 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 109 (ctxt->instate == XML_PARSER_EOF)) 110 return; 111 if (ctxt != NULL) { 112 ctxt->errNo = XML_ERR_NO_MEMORY; 113 ctxt->instate = XML_PARSER_EOF; 114 ctxt->disableSAX = 1; 115 } 116 if (extra) 117 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 118 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 119 NULL, NULL, 0, 0, 120 "Memory allocation failed : %s\n", extra); 121 else 122 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 123 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 124 NULL, NULL, 0, 0, "Memory allocation failed\n"); 125} 126 127/** 128 * __xmlErrEncoding: 129 * @ctxt: an XML parser context 130 * @xmlerr: the error number 131 * @msg: the error message 132 * @str1: an string info 133 * @str2: an string info 134 * 135 * Handle an encoding error 136 */ 137void 138__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 139 const char *msg, const xmlChar * str1, const xmlChar * str2) 140{ 141 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 142 (ctxt->instate == XML_PARSER_EOF)) 143 return; 144 if (ctxt != NULL) 145 ctxt->errNo = xmlerr; 146 __xmlRaiseError(NULL, NULL, NULL, 147 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 148 NULL, 0, (const char *) str1, (const char *) str2, 149 NULL, 0, 0, msg, str1, str2); 150 if (ctxt != NULL) { 151 ctxt->wellFormed = 0; 152 if (ctxt->recovery == 0) 153 ctxt->disableSAX = 1; 154 } 155} 156 157/** 158 * xmlErrInternal: 159 * @ctxt: an XML parser context 160 * @msg: the error message 161 * @str: error informations 162 * 163 * Handle an internal error 164 */ 165static void 166xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 167{ 168 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 169 (ctxt->instate == XML_PARSER_EOF)) 170 return; 171 if (ctxt != NULL) 172 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 173 __xmlRaiseError(NULL, NULL, NULL, 174 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 175 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 176 0, 0, msg, str); 177 if (ctxt != NULL) { 178 ctxt->wellFormed = 0; 179 if (ctxt->recovery == 0) 180 ctxt->disableSAX = 1; 181 } 182} 183 184/** 185 * xmlErrEncodingInt: 186 * @ctxt: an XML parser context 187 * @error: the error number 188 * @msg: the error message 189 * @val: an integer value 190 * 191 * n encoding error 192 */ 193static void 194xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 195 const char *msg, int val) 196{ 197 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 198 (ctxt->instate == XML_PARSER_EOF)) 199 return; 200 if (ctxt != NULL) 201 ctxt->errNo = error; 202 __xmlRaiseError(NULL, NULL, NULL, 203 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 204 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 205 if (ctxt != NULL) { 206 ctxt->wellFormed = 0; 207 if (ctxt->recovery == 0) 208 ctxt->disableSAX = 1; 209 } 210} 211 212/** 213 * xmlIsLetter: 214 * @c: an unicode character (int) 215 * 216 * Check whether the character is allowed by the production 217 * [84] Letter ::= BaseChar | Ideographic 218 * 219 * Returns 0 if not, non-zero otherwise 220 */ 221int 222xmlIsLetter(int c) { 223 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 224} 225 226/************************************************************************ 227 * * 228 * Input handling functions for progressive parsing * 229 * * 230 ************************************************************************/ 231 232/* #define DEBUG_INPUT */ 233/* #define DEBUG_STACK */ 234/* #define DEBUG_PUSH */ 235 236 237/* we need to keep enough input to show errors in context */ 238#define LINE_LEN 80 239 240#ifdef DEBUG_INPUT 241#define CHECK_BUFFER(in) check_buffer(in) 242 243static 244void check_buffer(xmlParserInputPtr in) { 245 if (in->base != in->buf->buffer->content) { 246 xmlGenericError(xmlGenericErrorContext, 247 "xmlParserInput: base mismatch problem\n"); 248 } 249 if (in->cur < in->base) { 250 xmlGenericError(xmlGenericErrorContext, 251 "xmlParserInput: cur < base problem\n"); 252 } 253 if (in->cur > in->base + in->buf->buffer->use) { 254 xmlGenericError(xmlGenericErrorContext, 255 "xmlParserInput: cur > base + use problem\n"); 256 } 257 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n", 258 (int) in, (int) in->buf->buffer->content, in->cur - in->base, 259 in->buf->buffer->use, in->buf->buffer->size); 260} 261 262#else 263#define CHECK_BUFFER(in) 264#endif 265 266 267/** 268 * xmlParserInputRead: 269 * @in: an XML parser input 270 * @len: an indicative size for the lookahead 271 * 272 * This function refresh the input for the parser. It doesn't try to 273 * preserve pointers to the input buffer, and discard already read data 274 * 275 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the 276 * end of this entity 277 */ 278int 279xmlParserInputRead(xmlParserInputPtr in, int len) { 280 int ret; 281 int used; 282 int indx; 283 284 if (in == NULL) return(-1); 285#ifdef DEBUG_INPUT 286 xmlGenericError(xmlGenericErrorContext, "Read\n"); 287#endif 288 if (in->buf == NULL) return(-1); 289 if (in->base == NULL) return(-1); 290 if (in->cur == NULL) return(-1); 291 if (in->buf->buffer == NULL) return(-1); 292 if (in->buf->readcallback == NULL) return(-1); 293 294 CHECK_BUFFER(in); 295 296 used = in->cur - in->buf->buffer->content; 297 ret = xmlBufferShrink(in->buf->buffer, used); 298 if (ret > 0) { 299 in->cur -= ret; 300 in->consumed += ret; 301 } 302 ret = xmlParserInputBufferRead(in->buf, len); 303 if (in->base != in->buf->buffer->content) { 304 /* 305 * the buffer has been reallocated 306 */ 307 indx = in->cur - in->base; 308 in->base = in->buf->buffer->content; 309 in->cur = &in->buf->buffer->content[indx]; 310 } 311 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 312 313 CHECK_BUFFER(in); 314 315 return(ret); 316} 317 318/** 319 * xmlParserInputGrow: 320 * @in: an XML parser input 321 * @len: an indicative size for the lookahead 322 * 323 * This function increase the input for the parser. It tries to 324 * preserve pointers to the input buffer, and keep already read data 325 * 326 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the 327 * end of this entity 328 */ 329int 330xmlParserInputGrow(xmlParserInputPtr in, int len) { 331 int ret; 332 int indx; 333 334 if (in == NULL) return(-1); 335#ifdef DEBUG_INPUT 336 xmlGenericError(xmlGenericErrorContext, "Grow\n"); 337#endif 338 if (in->buf == NULL) return(-1); 339 if (in->base == NULL) return(-1); 340 if (in->cur == NULL) return(-1); 341 if (in->buf->buffer == NULL) return(-1); 342 343 CHECK_BUFFER(in); 344 345 indx = in->cur - in->base; 346 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) { 347 348 CHECK_BUFFER(in); 349 350 return(0); 351 } 352 if (in->buf->readcallback != NULL) 353 ret = xmlParserInputBufferGrow(in->buf, len); 354 else 355 return(0); 356 357 /* 358 * NOTE : in->base may be a "dangling" i.e. freed pointer in this 359 * block, but we use it really as an integer to do some 360 * pointer arithmetic. Insure will raise it as a bug but in 361 * that specific case, that's not ! 362 */ 363 if (in->base != in->buf->buffer->content) { 364 /* 365 * the buffer has been reallocated 366 */ 367 indx = in->cur - in->base; 368 in->base = in->buf->buffer->content; 369 in->cur = &in->buf->buffer->content[indx]; 370 } 371 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 372 373 CHECK_BUFFER(in); 374 375 return(ret); 376} 377 378/** 379 * xmlParserInputShrink: 380 * @in: an XML parser input 381 * 382 * This function removes used input for the parser. 383 */ 384void 385xmlParserInputShrink(xmlParserInputPtr in) { 386 int used; 387 int ret; 388 int indx; 389 390#ifdef DEBUG_INPUT 391 xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 392#endif 393 if (in == NULL) return; 394 if (in->buf == NULL) return; 395 if (in->base == NULL) return; 396 if (in->cur == NULL) return; 397 if (in->buf->buffer == NULL) return; 398 399 CHECK_BUFFER(in); 400 401 used = in->cur - in->buf->buffer->content; 402 /* 403 * Do not shrink on large buffers whose only a tiny fraction 404 * was consumed 405 */ 406 if (used > INPUT_CHUNK) { 407 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); 408 if (ret > 0) { 409 in->cur -= ret; 410 in->consumed += ret; 411 } 412 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 413 } 414 415 CHECK_BUFFER(in); 416 417 if (in->buf->buffer->use > INPUT_CHUNK) { 418 return; 419 } 420 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 421 if (in->base != in->buf->buffer->content) { 422 /* 423 * the buffer has been reallocated 424 */ 425 indx = in->cur - in->base; 426 in->base = in->buf->buffer->content; 427 in->cur = &in->buf->buffer->content[indx]; 428 } 429 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 430 431 CHECK_BUFFER(in); 432} 433 434/************************************************************************ 435 * * 436 * UTF8 character input and related functions * 437 * * 438 ************************************************************************/ 439 440/** 441 * xmlNextChar: 442 * @ctxt: the XML parser context 443 * 444 * Skip to the next char input char. 445 */ 446 447void 448xmlNextChar(xmlParserCtxtPtr ctxt) 449{ 450 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 451 (ctxt->input == NULL)) 452 return; 453 454 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 455 if ((*ctxt->input->cur == 0) && 456 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && 457 (ctxt->instate != XML_PARSER_COMMENT)) { 458 /* 459 * If we are at the end of the current entity and 460 * the context allows it, we pop consumed entities 461 * automatically. 462 * the auto closing should be blocked in other cases 463 */ 464 xmlPopInput(ctxt); 465 } else { 466 const unsigned char *cur; 467 unsigned char c; 468 469 /* 470 * 2.11 End-of-Line Handling 471 * the literal two-character sequence "#xD#xA" or a standalone 472 * literal #xD, an XML processor must pass to the application 473 * the single character #xA. 474 */ 475 if (*(ctxt->input->cur) == '\n') { 476 ctxt->input->line++; ctxt->input->col = 1; 477 } else 478 ctxt->input->col++; 479 480 /* 481 * We are supposed to handle UTF8, check it's valid 482 * From rfc2044: encoding of the Unicode values on UTF-8: 483 * 484 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 485 * 0000 0000-0000 007F 0xxxxxxx 486 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 487 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 488 * 489 * Check for the 0x110000 limit too 490 */ 491 cur = ctxt->input->cur; 492 493 c = *cur; 494 if (c & 0x80) { 495 if (c == 0xC0) 496 goto encoding_error; 497 if (cur[1] == 0) 498 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 499 if ((cur[1] & 0xc0) != 0x80) 500 goto encoding_error; 501 if ((c & 0xe0) == 0xe0) { 502 unsigned int val; 503 504 if (cur[2] == 0) 505 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 506 if ((cur[2] & 0xc0) != 0x80) 507 goto encoding_error; 508 if ((c & 0xf0) == 0xf0) { 509 if (cur[3] == 0) 510 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 511 if (((c & 0xf8) != 0xf0) || 512 ((cur[3] & 0xc0) != 0x80)) 513 goto encoding_error; 514 /* 4-byte code */ 515 ctxt->input->cur += 4; 516 val = (cur[0] & 0x7) << 18; 517 val |= (cur[1] & 0x3f) << 12; 518 val |= (cur[2] & 0x3f) << 6; 519 val |= cur[3] & 0x3f; 520 } else { 521 /* 3-byte code */ 522 ctxt->input->cur += 3; 523 val = (cur[0] & 0xf) << 12; 524 val |= (cur[1] & 0x3f) << 6; 525 val |= cur[2] & 0x3f; 526 } 527 if (((val > 0xd7ff) && (val < 0xe000)) || 528 ((val > 0xfffd) && (val < 0x10000)) || 529 (val >= 0x110000)) { 530 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 531 "Char 0x%X out of allowed range\n", 532 val); 533 } 534 } else 535 /* 2-byte code */ 536 ctxt->input->cur += 2; 537 } else 538 /* 1-byte code */ 539 ctxt->input->cur++; 540 541 ctxt->nbChars++; 542 if (*ctxt->input->cur == 0) 543 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 544 } 545 } else { 546 /* 547 * Assume it's a fixed length encoding (1) with 548 * a compatible encoding for the ASCII set, since 549 * XML constructs only use < 128 chars 550 */ 551 552 if (*(ctxt->input->cur) == '\n') { 553 ctxt->input->line++; ctxt->input->col = 1; 554 } else 555 ctxt->input->col++; 556 ctxt->input->cur++; 557 ctxt->nbChars++; 558 if (*ctxt->input->cur == 0) 559 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 560 } 561 if ((*ctxt->input->cur == '%') && (!ctxt->html)) 562 xmlParserHandlePEReference(ctxt); 563 if ((*ctxt->input->cur == 0) && 564 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 565 xmlPopInput(ctxt); 566 return; 567encoding_error: 568 /* 569 * If we detect an UTF8 error that probably mean that the 570 * input encoding didn't get properly advertised in the 571 * declaration header. Report the error and switch the encoding 572 * to ISO-Latin-1 (if you don't like this policy, just declare the 573 * encoding !) 574 */ 575 if ((ctxt == NULL) || (ctxt->input == NULL) || 576 (ctxt->input->end - ctxt->input->cur < 4)) { 577 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 578 "Input is not proper UTF-8, indicate encoding !\n", 579 NULL, NULL); 580 } else { 581 char buffer[150]; 582 583 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 584 ctxt->input->cur[0], ctxt->input->cur[1], 585 ctxt->input->cur[2], ctxt->input->cur[3]); 586 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 587 "Input is not proper UTF-8, indicate encoding !\n%s", 588 BAD_CAST buffer, NULL); 589 } 590 ctxt->charset = XML_CHAR_ENCODING_8859_1; 591 ctxt->input->cur++; 592 return; 593} 594 595/** 596 * xmlCurrentChar: 597 * @ctxt: the XML parser context 598 * @len: pointer to the length of the char read 599 * 600 * The current char value, if using UTF-8 this may actually span multiple 601 * bytes in the input buffer. Implement the end of line normalization: 602 * 2.11 End-of-Line Handling 603 * Wherever an external parsed entity or the literal entity value 604 * of an internal parsed entity contains either the literal two-character 605 * sequence "#xD#xA" or a standalone literal #xD, an XML processor 606 * must pass to the application the single character #xA. 607 * This behavior can conveniently be produced by normalizing all 608 * line breaks to #xA on input, before parsing.) 609 * 610 * Returns the current char value and its length 611 */ 612 613int 614xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 615 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 616 if (ctxt->instate == XML_PARSER_EOF) 617 return(0); 618 619 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { 620 *len = 1; 621 return((int) *ctxt->input->cur); 622 } 623 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 624 /* 625 * We are supposed to handle UTF8, check it's valid 626 * From rfc2044: encoding of the Unicode values on UTF-8: 627 * 628 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 629 * 0000 0000-0000 007F 0xxxxxxx 630 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 631 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 632 * 633 * Check for the 0x110000 limit too 634 */ 635 const unsigned char *cur = ctxt->input->cur; 636 unsigned char c; 637 unsigned int val; 638 639 c = *cur; 640 if (c & 0x80) { 641 if (((c & 0x40) == 0) || (c == 0xC0)) 642 goto encoding_error; 643 if (cur[1] == 0) 644 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 645 if ((cur[1] & 0xc0) != 0x80) 646 goto encoding_error; 647 if ((c & 0xe0) == 0xe0) { 648 if (cur[2] == 0) 649 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 650 if ((cur[2] & 0xc0) != 0x80) 651 goto encoding_error; 652 if ((c & 0xf0) == 0xf0) { 653 if (cur[3] == 0) 654 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 655 if (((c & 0xf8) != 0xf0) || 656 ((cur[3] & 0xc0) != 0x80)) 657 goto encoding_error; 658 /* 4-byte code */ 659 *len = 4; 660 val = (cur[0] & 0x7) << 18; 661 val |= (cur[1] & 0x3f) << 12; 662 val |= (cur[2] & 0x3f) << 6; 663 val |= cur[3] & 0x3f; 664 if (val < 0x10000) 665 goto encoding_error; 666 } else { 667 /* 3-byte code */ 668 *len = 3; 669 val = (cur[0] & 0xf) << 12; 670 val |= (cur[1] & 0x3f) << 6; 671 val |= cur[2] & 0x3f; 672 if (val < 0x800) 673 goto encoding_error; 674 } 675 } else { 676 /* 2-byte code */ 677 *len = 2; 678 val = (cur[0] & 0x1f) << 6; 679 val |= cur[1] & 0x3f; 680 if (val < 0x80) 681 goto encoding_error; 682 } 683 if (!IS_CHAR(val)) { 684 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 685 "Char 0x%X out of allowed range\n", val); 686 } 687 return(val); 688 } else { 689 /* 1-byte code */ 690 *len = 1; 691 if (*ctxt->input->cur == 0) 692 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 693 if ((*ctxt->input->cur == 0) && 694 (ctxt->input->end > ctxt->input->cur)) { 695 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 696 "Char 0x0 out of allowed range\n", 0); 697 } 698 if (*ctxt->input->cur == 0xD) { 699 if (ctxt->input->cur[1] == 0xA) { 700 ctxt->nbChars++; 701 ctxt->input->cur++; 702 } 703 return(0xA); 704 } 705 return((int) *ctxt->input->cur); 706 } 707 } 708 /* 709 * Assume it's a fixed length encoding (1) with 710 * a compatible encoding for the ASCII set, since 711 * XML constructs only use < 128 chars 712 */ 713 *len = 1; 714 if (*ctxt->input->cur == 0xD) { 715 if (ctxt->input->cur[1] == 0xA) { 716 ctxt->nbChars++; 717 ctxt->input->cur++; 718 } 719 return(0xA); 720 } 721 return((int) *ctxt->input->cur); 722encoding_error: 723 /* 724 * An encoding problem may arise from a truncated input buffer 725 * splitting a character in the middle. In that case do not raise 726 * an error but return 0 to endicate an end of stream problem 727 */ 728 if (ctxt->input->end - ctxt->input->cur < 4) { 729 *len = 0; 730 return(0); 731 } 732 733 /* 734 * If we detect an UTF8 error that probably mean that the 735 * input encoding didn't get properly advertised in the 736 * declaration header. Report the error and switch the encoding 737 * to ISO-Latin-1 (if you don't like this policy, just declare the 738 * encoding !) 739 */ 740 { 741 char buffer[150]; 742 743 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 744 ctxt->input->cur[0], ctxt->input->cur[1], 745 ctxt->input->cur[2], ctxt->input->cur[3]); 746 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 747 "Input is not proper UTF-8, indicate encoding !\n%s", 748 BAD_CAST buffer, NULL); 749 } 750 ctxt->charset = XML_CHAR_ENCODING_8859_1; 751 *len = 1; 752 return((int) *ctxt->input->cur); 753} 754 755/** 756 * xmlStringCurrentChar: 757 * @ctxt: the XML parser context 758 * @cur: pointer to the beginning of the char 759 * @len: pointer to the length of the char read 760 * 761 * The current char value, if using UTF-8 this may actually span multiple 762 * bytes in the input buffer. 763 * 764 * Returns the current char value and its length 765 */ 766 767int 768xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 769{ 770 if ((len == NULL) || (cur == NULL)) return(0); 771 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 772 /* 773 * We are supposed to handle UTF8, check it's valid 774 * From rfc2044: encoding of the Unicode values on UTF-8: 775 * 776 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 777 * 0000 0000-0000 007F 0xxxxxxx 778 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 779 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 780 * 781 * Check for the 0x110000 limit too 782 */ 783 unsigned char c; 784 unsigned int val; 785 786 c = *cur; 787 if (c & 0x80) { 788 if ((cur[1] & 0xc0) != 0x80) 789 goto encoding_error; 790 if ((c & 0xe0) == 0xe0) { 791 792 if ((cur[2] & 0xc0) != 0x80) 793 goto encoding_error; 794 if ((c & 0xf0) == 0xf0) { 795 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) 796 goto encoding_error; 797 /* 4-byte code */ 798 *len = 4; 799 val = (cur[0] & 0x7) << 18; 800 val |= (cur[1] & 0x3f) << 12; 801 val |= (cur[2] & 0x3f) << 6; 802 val |= cur[3] & 0x3f; 803 } else { 804 /* 3-byte code */ 805 *len = 3; 806 val = (cur[0] & 0xf) << 12; 807 val |= (cur[1] & 0x3f) << 6; 808 val |= cur[2] & 0x3f; 809 } 810 } else { 811 /* 2-byte code */ 812 *len = 2; 813 val = (cur[0] & 0x1f) << 6; 814 val |= cur[1] & 0x3f; 815 } 816 if (!IS_CHAR(val)) { 817 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 818 "Char 0x%X out of allowed range\n", val); 819 } 820 return (val); 821 } else { 822 /* 1-byte code */ 823 *len = 1; 824 return ((int) *cur); 825 } 826 } 827 /* 828 * Assume it's a fixed length encoding (1) with 829 * a compatible encoding for the ASCII set, since 830 * XML constructs only use < 128 chars 831 */ 832 *len = 1; 833 return ((int) *cur); 834encoding_error: 835 836 /* 837 * An encoding problem may arise from a truncated input buffer 838 * splitting a character in the middle. In that case do not raise 839 * an error but return 0 to endicate an end of stream problem 840 */ 841 if ((ctxt == NULL) || (ctxt->input == NULL) || 842 (ctxt->input->end - ctxt->input->cur < 4)) { 843 *len = 0; 844 return(0); 845 } 846 /* 847 * If we detect an UTF8 error that probably mean that the 848 * input encoding didn't get properly advertised in the 849 * declaration header. Report the error and switch the encoding 850 * to ISO-Latin-1 (if you don't like this policy, just declare the 851 * encoding !) 852 */ 853 { 854 char buffer[150]; 855 856 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 857 ctxt->input->cur[0], ctxt->input->cur[1], 858 ctxt->input->cur[2], ctxt->input->cur[3]); 859 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 860 "Input is not proper UTF-8, indicate encoding !\n%s", 861 BAD_CAST buffer, NULL); 862 } 863 *len = 1; 864 return ((int) *cur); 865} 866 867/** 868 * xmlCopyCharMultiByte: 869 * @out: pointer to an array of xmlChar 870 * @val: the char value 871 * 872 * append the char value in the array 873 * 874 * Returns the number of xmlChar written 875 */ 876int 877xmlCopyCharMultiByte(xmlChar *out, int val) { 878 if (out == NULL) return(0); 879 /* 880 * We are supposed to handle UTF8, check it's valid 881 * From rfc2044: encoding of the Unicode values on UTF-8: 882 * 883 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 884 * 0000 0000-0000 007F 0xxxxxxx 885 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 886 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 887 */ 888 if (val >= 0x80) { 889 xmlChar *savedout = out; 890 int bits; 891 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 892 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 893 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 894 else { 895 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 896 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 897 val); 898 return(0); 899 } 900 for ( ; bits >= 0; bits-= 6) 901 *out++= ((val >> bits) & 0x3F) | 0x80 ; 902 return (out - savedout); 903 } 904 *out = (xmlChar) val; 905 return 1; 906} 907 908/** 909 * xmlCopyChar: 910 * @len: Ignored, compatibility 911 * @out: pointer to an array of xmlChar 912 * @val: the char value 913 * 914 * append the char value in the array 915 * 916 * Returns the number of xmlChar written 917 */ 918 919int 920xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 921 if (out == NULL) return(0); 922 /* the len parameter is ignored */ 923 if (val >= 0x80) { 924 return(xmlCopyCharMultiByte (out, val)); 925 } 926 *out = (xmlChar) val; 927 return 1; 928} 929 930/************************************************************************ 931 * * 932 * Commodity functions to switch encodings * 933 * * 934 ************************************************************************/ 935 936/** 937 * xmlSwitchEncoding: 938 * @ctxt: the parser context 939 * @enc: the encoding value (number) 940 * 941 * change the input functions when discovering the character encoding 942 * of a given entity. 943 * 944 * Returns 0 in case of success, -1 otherwise 945 */ 946int 947xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 948{ 949 xmlCharEncodingHandlerPtr handler; 950 951 if (ctxt == NULL) return(-1); 952 switch (enc) { 953 case XML_CHAR_ENCODING_ERROR: 954 __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, 955 "encoding unknown\n", NULL, NULL); 956 return(-1); 957 case XML_CHAR_ENCODING_NONE: 958 /* let's assume it's UTF-8 without the XML decl */ 959 ctxt->charset = XML_CHAR_ENCODING_UTF8; 960 return(0); 961 case XML_CHAR_ENCODING_UTF8: 962 /* default encoding, no conversion should be needed */ 963 ctxt->charset = XML_CHAR_ENCODING_UTF8; 964 965 /* 966 * Errata on XML-1.0 June 20 2001 967 * Specific handling of the Byte Order Mark for 968 * UTF-8 969 */ 970 if ((ctxt->input != NULL) && 971 (ctxt->input->cur[0] == 0xEF) && 972 (ctxt->input->cur[1] == 0xBB) && 973 (ctxt->input->cur[2] == 0xBF)) { 974 ctxt->input->cur += 3; 975 } 976 return(0); 977 case XML_CHAR_ENCODING_UTF16LE: 978 case XML_CHAR_ENCODING_UTF16BE: 979 /*The raw input characters are encoded 980 *in UTF-16. As we expect this function 981 *to be called after xmlCharEncInFunc, we expect 982 *ctxt->input->cur to contain UTF-8 encoded characters. 983 *So the raw UTF16 Byte Order Mark 984 *has also been converted into 985 *an UTF-8 BOM. Let's skip that BOM. 986 */ 987 if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) && 988 (ctxt->input->cur[0] == 0xEF) && 989 (ctxt->input->cur[1] == 0xBB) && 990 (ctxt->input->cur[2] == 0xBF)) { 991 ctxt->input->cur += 3; 992 } 993 break ; 994 default: 995 break; 996 } 997 handler = xmlGetCharEncodingHandler(enc); 998 if (handler == NULL) { 999 /* 1000 * Default handlers. 1001 */ 1002 switch (enc) { 1003 case XML_CHAR_ENCODING_ASCII: 1004 /* default encoding, no conversion should be needed */ 1005 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1006 return(0); 1007 case XML_CHAR_ENCODING_UTF16LE: 1008 break; 1009 case XML_CHAR_ENCODING_UTF16BE: 1010 break; 1011 case XML_CHAR_ENCODING_UCS4LE: 1012 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1013 "encoding not supported %s\n", 1014 BAD_CAST "USC4 little endian", NULL); 1015 break; 1016 case XML_CHAR_ENCODING_UCS4BE: 1017 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1018 "encoding not supported %s\n", 1019 BAD_CAST "USC4 big endian", NULL); 1020 break; 1021 case XML_CHAR_ENCODING_EBCDIC: 1022 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1023 "encoding not supported %s\n", 1024 BAD_CAST "EBCDIC", NULL); 1025 break; 1026 case XML_CHAR_ENCODING_UCS4_2143: 1027 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1028 "encoding not supported %s\n", 1029 BAD_CAST "UCS4 2143", NULL); 1030 break; 1031 case XML_CHAR_ENCODING_UCS4_3412: 1032 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1033 "encoding not supported %s\n", 1034 BAD_CAST "UCS4 3412", NULL); 1035 break; 1036 case XML_CHAR_ENCODING_UCS2: 1037 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1038 "encoding not supported %s\n", 1039 BAD_CAST "UCS2", NULL); 1040 break; 1041 case XML_CHAR_ENCODING_8859_1: 1042 case XML_CHAR_ENCODING_8859_2: 1043 case XML_CHAR_ENCODING_8859_3: 1044 case XML_CHAR_ENCODING_8859_4: 1045 case XML_CHAR_ENCODING_8859_5: 1046 case XML_CHAR_ENCODING_8859_6: 1047 case XML_CHAR_ENCODING_8859_7: 1048 case XML_CHAR_ENCODING_8859_8: 1049 case XML_CHAR_ENCODING_8859_9: 1050 /* 1051 * We used to keep the internal content in the 1052 * document encoding however this turns being unmaintainable 1053 * So xmlGetCharEncodingHandler() will return non-null 1054 * values for this now. 1055 */ 1056 if ((ctxt->inputNr == 1) && 1057 (ctxt->encoding == NULL) && 1058 (ctxt->input != NULL) && 1059 (ctxt->input->encoding != NULL)) { 1060 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 1061 } 1062 ctxt->charset = enc; 1063 return(0); 1064 case XML_CHAR_ENCODING_2022_JP: 1065 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1066 "encoding not supported %s\n", 1067 BAD_CAST "ISO-2022-JP", NULL); 1068 break; 1069 case XML_CHAR_ENCODING_SHIFT_JIS: 1070 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1071 "encoding not supported %s\n", 1072 BAD_CAST "Shift_JIS", NULL); 1073 break; 1074 case XML_CHAR_ENCODING_EUC_JP: 1075 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1076 "encoding not supported %s\n", 1077 BAD_CAST "EUC-JP", NULL); 1078 break; 1079 default: 1080 break; 1081 } 1082 } 1083 if (handler == NULL) 1084 return(-1); 1085 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1086 return(xmlSwitchToEncoding(ctxt, handler)); 1087} 1088 1089/** 1090 * xmlSwitchInputEncoding: 1091 * @ctxt: the parser context 1092 * @input: the input stream 1093 * @handler: the encoding handler 1094 * 1095 * change the input functions when discovering the character encoding 1096 * of a given entity. 1097 * 1098 * Returns 0 in case of success, -1 otherwise 1099 */ 1100int 1101xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1102 xmlCharEncodingHandlerPtr handler) 1103{ 1104 int nbchars; 1105 1106 if (handler == NULL) 1107 return (-1); 1108 if (input == NULL) 1109 return (-1); 1110 if (input->buf != NULL) { 1111 if (input->buf->encoder != NULL) { 1112 /* 1113 * Check in case the auto encoding detetection triggered 1114 * in already. 1115 */ 1116 if (input->buf->encoder == handler) 1117 return (0); 1118 1119 /* 1120 * "UTF-16" can be used for both LE and BE 1121 if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, 1122 BAD_CAST "UTF-16", 6)) && 1123 (!xmlStrncmp(BAD_CAST handler->name, 1124 BAD_CAST "UTF-16", 6))) { 1125 return(0); 1126 } 1127 */ 1128 1129 /* 1130 * Note: this is a bit dangerous, but that's what it 1131 * takes to use nearly compatible signature for different 1132 * encodings. 1133 */ 1134 xmlCharEncCloseFunc(input->buf->encoder); 1135 input->buf->encoder = handler; 1136 return (0); 1137 } 1138 input->buf->encoder = handler; 1139 1140 /* 1141 * Is there already some content down the pipe to convert ? 1142 */ 1143 if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) { 1144 int processed; 1145 unsigned int use; 1146 1147 /* 1148 * Specific handling of the Byte Order Mark for 1149 * UTF-16 1150 */ 1151 if ((handler->name != NULL) && 1152 (!strcmp(handler->name, "UTF-16LE") || 1153 !strcmp(handler->name, "UTF-16")) && 1154 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 1155 input->cur += 2; 1156 } 1157 if ((handler->name != NULL) && 1158 (!strcmp(handler->name, "UTF-16BE")) && 1159 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { 1160 input->cur += 2; 1161 } 1162 /* 1163 * Errata on XML-1.0 June 20 2001 1164 * Specific handling of the Byte Order Mark for 1165 * UTF-8 1166 */ 1167 if ((handler->name != NULL) && 1168 (!strcmp(handler->name, "UTF-8")) && 1169 (input->cur[0] == 0xEF) && 1170 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 1171 input->cur += 3; 1172 } 1173 1174 /* 1175 * Shrink the current input buffer. 1176 * Move it as the raw buffer and create a new input buffer 1177 */ 1178 processed = input->cur - input->base; 1179 xmlBufferShrink(input->buf->buffer, processed); 1180 input->buf->raw = input->buf->buffer; 1181 input->buf->buffer = xmlBufferCreate(); 1182 input->buf->rawconsumed = processed; 1183 use = input->buf->raw->use; 1184 1185 if (ctxt->html) { 1186 /* 1187 * convert as much as possible of the buffer 1188 */ 1189 nbchars = xmlCharEncInFunc(input->buf->encoder, 1190 input->buf->buffer, 1191 input->buf->raw); 1192 } else { 1193 /* 1194 * convert just enough to get 1195 * '<?xml version="1.0" encoding="xxx"?>' 1196 * parsed with the autodetected encoding 1197 * into the parser reading buffer. 1198 */ 1199 nbchars = xmlCharEncFirstLine(input->buf->encoder, 1200 input->buf->buffer, 1201 input->buf->raw); 1202 } 1203 if (nbchars < 0) { 1204 xmlErrInternal(ctxt, 1205 "switching encoding: encoder error\n", 1206 NULL); 1207 return (-1); 1208 } 1209 input->buf->rawconsumed += use - input->buf->raw->use; 1210 input->base = input->cur = input->buf->buffer->content; 1211 input->end = &input->base[input->buf->buffer->use]; 1212 1213 } 1214 return (0); 1215 } else if (input->length == 0) { 1216 /* 1217 * When parsing a static memory array one must know the 1218 * size to be able to convert the buffer. 1219 */ 1220 xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); 1221 return (-1); 1222 } 1223 return (0); 1224} 1225 1226/** 1227 * xmlSwitchToEncoding: 1228 * @ctxt: the parser context 1229 * @handler: the encoding handler 1230 * 1231 * change the input functions when discovering the character encoding 1232 * of a given entity. 1233 * 1234 * Returns 0 in case of success, -1 otherwise 1235 */ 1236int 1237xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1238{ 1239 int ret = 0; 1240 1241 if (handler != NULL) { 1242 if (ctxt->input != NULL) { 1243 ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler); 1244 } else { 1245 xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", 1246 NULL); 1247 return(-1); 1248 } 1249 /* 1250 * The parsing is now done in UTF8 natively 1251 */ 1252 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1253 } else 1254 return(-1); 1255 return(ret); 1256} 1257 1258/************************************************************************ 1259 * * 1260 * Commodity functions to handle entities processing * 1261 * * 1262 ************************************************************************/ 1263 1264/** 1265 * xmlFreeInputStream: 1266 * @input: an xmlParserInputPtr 1267 * 1268 * Free up an input stream. 1269 */ 1270void 1271xmlFreeInputStream(xmlParserInputPtr input) { 1272 if (input == NULL) return; 1273 1274 if (input->filename != NULL) xmlFree((char *) input->filename); 1275 if (input->directory != NULL) xmlFree((char *) input->directory); 1276 if (input->encoding != NULL) xmlFree((char *) input->encoding); 1277 if (input->version != NULL) xmlFree((char *) input->version); 1278 if ((input->free != NULL) && (input->base != NULL)) 1279 input->free((xmlChar *) input->base); 1280 if (input->buf != NULL) 1281 xmlFreeParserInputBuffer(input->buf); 1282 xmlFree(input); 1283} 1284 1285/** 1286 * xmlNewInputStream: 1287 * @ctxt: an XML parser context 1288 * 1289 * Create a new input stream structure 1290 * Returns the new input stream or NULL 1291 */ 1292xmlParserInputPtr 1293xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1294 xmlParserInputPtr input; 1295 static int id = 0; 1296 1297 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1298 if (input == NULL) { 1299 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1300 return(NULL); 1301 } 1302 memset(input, 0, sizeof(xmlParserInput)); 1303 input->line = 1; 1304 input->col = 1; 1305 input->standalone = -1; 1306 /* 1307 * we don't care about thread reentrancy unicity for a single 1308 * parser context (and hence thread) is sufficient. 1309 */ 1310 input->id = id++; 1311 return(input); 1312} 1313 1314/** 1315 * xmlNewIOInputStream: 1316 * @ctxt: an XML parser context 1317 * @input: an I/O Input 1318 * @enc: the charset encoding if known 1319 * 1320 * Create a new input stream structure encapsulating the @input into 1321 * a stream suitable for the parser. 1322 * 1323 * Returns the new input stream or NULL 1324 */ 1325xmlParserInputPtr 1326xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1327 xmlCharEncoding enc) { 1328 xmlParserInputPtr inputStream; 1329 1330 if (input == NULL) return(NULL); 1331 if (xmlParserDebugEntities) 1332 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1333 inputStream = xmlNewInputStream(ctxt); 1334 if (inputStream == NULL) { 1335 return(NULL); 1336 } 1337 inputStream->filename = NULL; 1338 inputStream->buf = input; 1339 inputStream->base = inputStream->buf->buffer->content; 1340 inputStream->cur = inputStream->buf->buffer->content; 1341 inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; 1342 if (enc != XML_CHAR_ENCODING_NONE) { 1343 xmlSwitchEncoding(ctxt, enc); 1344 } 1345 1346 return(inputStream); 1347} 1348 1349/** 1350 * xmlNewEntityInputStream: 1351 * @ctxt: an XML parser context 1352 * @entity: an Entity pointer 1353 * 1354 * Create a new input stream based on an xmlEntityPtr 1355 * 1356 * Returns the new input stream or NULL 1357 */ 1358xmlParserInputPtr 1359xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1360 xmlParserInputPtr input; 1361 1362 if (entity == NULL) { 1363 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 1364 NULL); 1365 return(NULL); 1366 } 1367 if (xmlParserDebugEntities) 1368 xmlGenericError(xmlGenericErrorContext, 1369 "new input from entity: %s\n", entity->name); 1370 if (entity->content == NULL) { 1371 switch (entity->etype) { 1372 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1373 xmlErrInternal(ctxt, "Cannot parse entity %s\n", 1374 entity->name); 1375 break; 1376 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1377 case XML_EXTERNAL_PARAMETER_ENTITY: 1378 return(xmlLoadExternalEntity((char *) entity->URI, 1379 (char *) entity->ExternalID, ctxt)); 1380 case XML_INTERNAL_GENERAL_ENTITY: 1381 xmlErrInternal(ctxt, 1382 "Internal entity %s without content !\n", 1383 entity->name); 1384 break; 1385 case XML_INTERNAL_PARAMETER_ENTITY: 1386 xmlErrInternal(ctxt, 1387 "Internal parameter entity %s without content !\n", 1388 entity->name); 1389 break; 1390 case XML_INTERNAL_PREDEFINED_ENTITY: 1391 xmlErrInternal(ctxt, 1392 "Predefined entity %s without content !\n", 1393 entity->name); 1394 break; 1395 } 1396 return(NULL); 1397 } 1398 input = xmlNewInputStream(ctxt); 1399 if (input == NULL) { 1400 return(NULL); 1401 } 1402 if (entity->URI != NULL) 1403 input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); 1404 input->base = entity->content; 1405 input->cur = entity->content; 1406 input->length = entity->length; 1407 input->end = &entity->content[input->length]; 1408 return(input); 1409} 1410 1411/** 1412 * xmlNewStringInputStream: 1413 * @ctxt: an XML parser context 1414 * @buffer: an memory buffer 1415 * 1416 * Create a new input stream based on a memory buffer. 1417 * Returns the new input stream 1418 */ 1419xmlParserInputPtr 1420xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 1421 xmlParserInputPtr input; 1422 1423 if (buffer == NULL) { 1424 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 1425 NULL); 1426 return(NULL); 1427 } 1428 if (xmlParserDebugEntities) 1429 xmlGenericError(xmlGenericErrorContext, 1430 "new fixed input: %.30s\n", buffer); 1431 input = xmlNewInputStream(ctxt); 1432 if (input == NULL) { 1433 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1434 return(NULL); 1435 } 1436 input->base = buffer; 1437 input->cur = buffer; 1438 input->length = xmlStrlen(buffer); 1439 input->end = &buffer[input->length]; 1440 return(input); 1441} 1442 1443/** 1444 * xmlNewInputFromFile: 1445 * @ctxt: an XML parser context 1446 * @filename: the filename to use as entity 1447 * 1448 * Create a new input stream based on a file or an URL. 1449 * 1450 * Returns the new input stream or NULL in case of error 1451 */ 1452xmlParserInputPtr 1453xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 1454 xmlParserInputBufferPtr buf; 1455 xmlParserInputPtr inputStream; 1456 char *directory = NULL; 1457 xmlChar *URI = NULL; 1458 1459 if (xmlParserDebugEntities) 1460 xmlGenericError(xmlGenericErrorContext, 1461 "new input from file: %s\n", filename); 1462 if (ctxt == NULL) return(NULL); 1463 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 1464 if (buf == NULL) { 1465 if (filename == NULL) 1466 __xmlLoaderErr(ctxt, 1467 "failed to load external entity: NULL filename \n", 1468 NULL); 1469 else 1470 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 1471 (const char *) filename); 1472 return(NULL); 1473 } 1474 1475 inputStream = xmlNewInputStream(ctxt); 1476 if (inputStream == NULL) 1477 return(NULL); 1478 1479 inputStream->buf = buf; 1480 inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1481 if (inputStream == NULL) 1482 return(NULL); 1483 1484 if (inputStream->filename == NULL) 1485 URI = xmlStrdup((xmlChar *) filename); 1486 else 1487 URI = xmlStrdup((xmlChar *) inputStream->filename); 1488 directory = xmlParserGetDirectory((const char *) URI); 1489 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1490 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1491 if (URI != NULL) xmlFree((char *) URI); 1492 inputStream->directory = directory; 1493 1494 inputStream->base = inputStream->buf->buffer->content; 1495 inputStream->cur = inputStream->buf->buffer->content; 1496 inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; 1497 if ((ctxt->directory == NULL) && (directory != NULL)) 1498 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1499 return(inputStream); 1500} 1501 1502/************************************************************************ 1503 * * 1504 * Commodity functions to handle parser contexts * 1505 * * 1506 ************************************************************************/ 1507 1508/** 1509 * xmlInitParserCtxt: 1510 * @ctxt: an XML parser context 1511 * 1512 * Initialize a parser context 1513 * 1514 * Returns 0 in case of success and -1 in case of error 1515 */ 1516 1517int 1518xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 1519{ 1520 xmlParserInputPtr input; 1521 1522 if(ctxt==NULL) { 1523 xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 1524 return(-1); 1525 } 1526 1527 xmlDefaultSAXHandlerInit(); 1528 1529 if (ctxt->dict == NULL) 1530 ctxt->dict = xmlDictCreate(); 1531 if (ctxt->dict == NULL) { 1532 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1533 return(-1); 1534 } 1535 if (ctxt->sax == NULL) 1536 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1537 if (ctxt->sax == NULL) { 1538 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1539 return(-1); 1540 } 1541 else 1542 xmlSAXVersion(ctxt->sax, 2); 1543 1544 ctxt->maxatts = 0; 1545 ctxt->atts = NULL; 1546 /* Allocate the Input stack */ 1547 if (ctxt->inputTab == NULL) { 1548 ctxt->inputTab = (xmlParserInputPtr *) 1549 xmlMalloc(5 * sizeof(xmlParserInputPtr)); 1550 ctxt->inputMax = 5; 1551 } 1552 if (ctxt->inputTab == NULL) { 1553 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1554 ctxt->inputNr = 0; 1555 ctxt->inputMax = 0; 1556 ctxt->input = NULL; 1557 return(-1); 1558 } 1559 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1560 xmlFreeInputStream(input); 1561 } 1562 ctxt->inputNr = 0; 1563 ctxt->input = NULL; 1564 1565 ctxt->version = NULL; 1566 ctxt->encoding = NULL; 1567 ctxt->standalone = -1; 1568 ctxt->hasExternalSubset = 0; 1569 ctxt->hasPErefs = 0; 1570 ctxt->html = 0; 1571 ctxt->external = 0; 1572 ctxt->instate = XML_PARSER_START; 1573 ctxt->token = 0; 1574 ctxt->directory = NULL; 1575 1576 /* Allocate the Node stack */ 1577 if (ctxt->nodeTab == NULL) { 1578 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 1579 ctxt->nodeMax = 10; 1580 } 1581 if (ctxt->nodeTab == NULL) { 1582 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1583 ctxt->nodeNr = 0; 1584 ctxt->nodeMax = 0; 1585 ctxt->node = NULL; 1586 ctxt->inputNr = 0; 1587 ctxt->inputMax = 0; 1588 ctxt->input = NULL; 1589 return(-1); 1590 } 1591 ctxt->nodeNr = 0; 1592 ctxt->node = NULL; 1593 1594 /* Allocate the Name stack */ 1595 if (ctxt->nameTab == NULL) { 1596 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 1597 ctxt->nameMax = 10; 1598 } 1599 if (ctxt->nameTab == NULL) { 1600 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1601 ctxt->nodeNr = 0; 1602 ctxt->nodeMax = 0; 1603 ctxt->node = NULL; 1604 ctxt->inputNr = 0; 1605 ctxt->inputMax = 0; 1606 ctxt->input = NULL; 1607 ctxt->nameNr = 0; 1608 ctxt->nameMax = 0; 1609 ctxt->name = NULL; 1610 return(-1); 1611 } 1612 ctxt->nameNr = 0; 1613 ctxt->name = NULL; 1614 1615 /* Allocate the space stack */ 1616 if (ctxt->spaceTab == NULL) { 1617 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 1618 ctxt->spaceMax = 10; 1619 } 1620 if (ctxt->spaceTab == NULL) { 1621 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1622 ctxt->nodeNr = 0; 1623 ctxt->nodeMax = 0; 1624 ctxt->node = NULL; 1625 ctxt->inputNr = 0; 1626 ctxt->inputMax = 0; 1627 ctxt->input = NULL; 1628 ctxt->nameNr = 0; 1629 ctxt->nameMax = 0; 1630 ctxt->name = NULL; 1631 ctxt->spaceNr = 0; 1632 ctxt->spaceMax = 0; 1633 ctxt->space = NULL; 1634 return(-1); 1635 } 1636 ctxt->spaceNr = 1; 1637 ctxt->spaceMax = 10; 1638 ctxt->spaceTab[0] = -1; 1639 ctxt->space = &ctxt->spaceTab[0]; 1640 ctxt->userData = ctxt; 1641 ctxt->myDoc = NULL; 1642 ctxt->wellFormed = 1; 1643 ctxt->nsWellFormed = 1; 1644 ctxt->valid = 1; 1645 ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1646 ctxt->validate = xmlDoValidityCheckingDefaultValue; 1647 ctxt->pedantic = xmlPedanticParserDefaultValue; 1648 ctxt->linenumbers = xmlLineNumbersDefaultValue; 1649 ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1650 if (ctxt->keepBlanks == 0) 1651 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1652 1653 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 1654 ctxt->vctxt.userData = ctxt; 1655 ctxt->vctxt.error = xmlParserValidityError; 1656 ctxt->vctxt.warning = xmlParserValidityWarning; 1657 if (ctxt->validate) { 1658 if (xmlGetWarningsDefaultValue == 0) 1659 ctxt->vctxt.warning = NULL; 1660 else 1661 ctxt->vctxt.warning = xmlParserValidityWarning; 1662 ctxt->vctxt.nodeMax = 0; 1663 } 1664 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1665 ctxt->record_info = 0; 1666 ctxt->nbChars = 0; 1667 ctxt->checkIndex = 0; 1668 ctxt->inSubset = 0; 1669 ctxt->errNo = XML_ERR_OK; 1670 ctxt->depth = 0; 1671 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1672 ctxt->catalogs = NULL; 1673 ctxt->nbentities = 0; 1674 xmlInitNodeInfoSeq(&ctxt->node_seq); 1675 return(0); 1676} 1677 1678/** 1679 * xmlFreeParserCtxt: 1680 * @ctxt: an XML parser context 1681 * 1682 * Free all the memory used by a parser context. However the parsed 1683 * document in ctxt->myDoc is not freed. 1684 */ 1685 1686void 1687xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 1688{ 1689 xmlParserInputPtr input; 1690 1691 if (ctxt == NULL) return; 1692 1693 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1694 xmlFreeInputStream(input); 1695 } 1696 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 1697 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 1698 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 1699 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 1700 if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 1701 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 1702 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 1703 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 1704#ifdef LIBXML_SAX1_ENABLED 1705 if ((ctxt->sax != NULL) && 1706 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 1707#else 1708 if (ctxt->sax != NULL) 1709#endif /* LIBXML_SAX1_ENABLED */ 1710 xmlFree(ctxt->sax); 1711 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 1712 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 1713 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 1714 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 1715 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 1716 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 1717 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 1718 if (ctxt->attsDefault != NULL) 1719 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 1720 if (ctxt->attsSpecial != NULL) 1721 xmlHashFree(ctxt->attsSpecial, NULL); 1722 if (ctxt->freeElems != NULL) { 1723 xmlNodePtr cur, next; 1724 1725 cur = ctxt->freeElems; 1726 while (cur != NULL) { 1727 next = cur->next; 1728 xmlFree(cur); 1729 cur = next; 1730 } 1731 } 1732 if (ctxt->freeAttrs != NULL) { 1733 xmlAttrPtr cur, next; 1734 1735 cur = ctxt->freeAttrs; 1736 while (cur != NULL) { 1737 next = cur->next; 1738 xmlFree(cur); 1739 cur = next; 1740 } 1741 } 1742 /* 1743 * cleanup the error strings 1744 */ 1745 if (ctxt->lastError.message != NULL) 1746 xmlFree(ctxt->lastError.message); 1747 if (ctxt->lastError.file != NULL) 1748 xmlFree(ctxt->lastError.file); 1749 if (ctxt->lastError.str1 != NULL) 1750 xmlFree(ctxt->lastError.str1); 1751 if (ctxt->lastError.str2 != NULL) 1752 xmlFree(ctxt->lastError.str2); 1753 if (ctxt->lastError.str3 != NULL) 1754 xmlFree(ctxt->lastError.str3); 1755 1756#ifdef LIBXML_CATALOG_ENABLED 1757 if (ctxt->catalogs != NULL) 1758 xmlCatalogFreeLocal(ctxt->catalogs); 1759#endif 1760 xmlFree(ctxt); 1761} 1762 1763/** 1764 * xmlNewParserCtxt: 1765 * 1766 * Allocate and initialize a new parser context. 1767 * 1768 * Returns the xmlParserCtxtPtr or NULL 1769 */ 1770 1771xmlParserCtxtPtr 1772xmlNewParserCtxt(void) 1773{ 1774 xmlParserCtxtPtr ctxt; 1775 1776 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 1777 if (ctxt == NULL) { 1778 xmlErrMemory(NULL, "cannot allocate parser context\n"); 1779 return(NULL); 1780 } 1781 memset(ctxt, 0, sizeof(xmlParserCtxt)); 1782 if (xmlInitParserCtxt(ctxt) < 0) { 1783 xmlFreeParserCtxt(ctxt); 1784 return(NULL); 1785 } 1786 return(ctxt); 1787} 1788 1789/************************************************************************ 1790 * * 1791 * Handling of node informations * 1792 * * 1793 ************************************************************************/ 1794 1795/** 1796 * xmlClearParserCtxt: 1797 * @ctxt: an XML parser context 1798 * 1799 * Clear (release owned resources) and reinitialize a parser context 1800 */ 1801 1802void 1803xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 1804{ 1805 if (ctxt==NULL) 1806 return; 1807 xmlClearNodeInfoSeq(&ctxt->node_seq); 1808 xmlCtxtReset(ctxt); 1809} 1810 1811 1812/** 1813 * xmlParserFindNodeInfo: 1814 * @ctx: an XML parser context 1815 * @node: an XML node within the tree 1816 * 1817 * Find the parser node info struct for a given node 1818 * 1819 * Returns an xmlParserNodeInfo block pointer or NULL 1820 */ 1821const xmlParserNodeInfo * 1822xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 1823{ 1824 unsigned long pos; 1825 1826 if ((ctx == NULL) || (node == NULL)) 1827 return (NULL); 1828 /* Find position where node should be at */ 1829 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 1830 if (pos < ctx->node_seq.length 1831 && ctx->node_seq.buffer[pos].node == node) 1832 return &ctx->node_seq.buffer[pos]; 1833 else 1834 return NULL; 1835} 1836 1837 1838/** 1839 * xmlInitNodeInfoSeq: 1840 * @seq: a node info sequence pointer 1841 * 1842 * -- Initialize (set to initial state) node info sequence 1843 */ 1844void 1845xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1846{ 1847 if (seq == NULL) 1848 return; 1849 seq->length = 0; 1850 seq->maximum = 0; 1851 seq->buffer = NULL; 1852} 1853 1854/** 1855 * xmlClearNodeInfoSeq: 1856 * @seq: a node info sequence pointer 1857 * 1858 * -- Clear (release memory and reinitialize) node 1859 * info sequence 1860 */ 1861void 1862xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1863{ 1864 if (seq == NULL) 1865 return; 1866 if (seq->buffer != NULL) 1867 xmlFree(seq->buffer); 1868 xmlInitNodeInfoSeq(seq); 1869} 1870 1871/** 1872 * xmlParserFindNodeInfoIndex: 1873 * @seq: a node info sequence pointer 1874 * @node: an XML node pointer 1875 * 1876 * 1877 * xmlParserFindNodeInfoIndex : Find the index that the info record for 1878 * the given node is or should be at in a sorted sequence 1879 * 1880 * Returns a long indicating the position of the record 1881 */ 1882unsigned long 1883xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1884 const xmlNodePtr node) 1885{ 1886 unsigned long upper, lower, middle; 1887 int found = 0; 1888 1889 if ((seq == NULL) || (node == NULL)) 1890 return ((unsigned long) -1); 1891 1892 /* Do a binary search for the key */ 1893 lower = 1; 1894 upper = seq->length; 1895 middle = 0; 1896 while (lower <= upper && !found) { 1897 middle = lower + (upper - lower) / 2; 1898 if (node == seq->buffer[middle - 1].node) 1899 found = 1; 1900 else if (node < seq->buffer[middle - 1].node) 1901 upper = middle - 1; 1902 else 1903 lower = middle + 1; 1904 } 1905 1906 /* Return position */ 1907 if (middle == 0 || seq->buffer[middle - 1].node < node) 1908 return middle; 1909 else 1910 return middle - 1; 1911} 1912 1913 1914/** 1915 * xmlParserAddNodeInfo: 1916 * @ctxt: an XML parser context 1917 * @info: a node info sequence pointer 1918 * 1919 * Insert node info record into the sorted sequence 1920 */ 1921void 1922xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 1923 const xmlParserNodeInfoPtr info) 1924{ 1925 unsigned long pos; 1926 1927 if ((ctxt == NULL) || (info == NULL)) return; 1928 1929 /* Find pos and check to see if node is already in the sequence */ 1930 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 1931 info->node); 1932 1933 if ((pos < ctxt->node_seq.length) && 1934 (ctxt->node_seq.buffer != NULL) && 1935 (ctxt->node_seq.buffer[pos].node == info->node)) { 1936 ctxt->node_seq.buffer[pos] = *info; 1937 } 1938 1939 /* Otherwise, we need to add new node to buffer */ 1940 else { 1941 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) { 1942 xmlParserNodeInfo *tmp_buffer; 1943 unsigned int byte_size; 1944 1945 if (ctxt->node_seq.maximum == 0) 1946 ctxt->node_seq.maximum = 2; 1947 byte_size = (sizeof(*ctxt->node_seq.buffer) * 1948 (2 * ctxt->node_seq.maximum)); 1949 1950 if (ctxt->node_seq.buffer == NULL) 1951 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 1952 else 1953 tmp_buffer = 1954 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 1955 byte_size); 1956 1957 if (tmp_buffer == NULL) { 1958 xmlErrMemory(ctxt, "failed to allocate buffer\n"); 1959 return; 1960 } 1961 ctxt->node_seq.buffer = tmp_buffer; 1962 ctxt->node_seq.maximum *= 2; 1963 } 1964 1965 /* If position is not at end, move elements out of the way */ 1966 if (pos != ctxt->node_seq.length) { 1967 unsigned long i; 1968 1969 for (i = ctxt->node_seq.length; i > pos; i--) 1970 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 1971 } 1972 1973 /* Copy element and increase length */ 1974 ctxt->node_seq.buffer[pos] = *info; 1975 ctxt->node_seq.length++; 1976 } 1977} 1978 1979/************************************************************************ 1980 * * 1981 * Defaults settings * 1982 * * 1983 ************************************************************************/ 1984/** 1985 * xmlPedanticParserDefault: 1986 * @val: int 0 or 1 1987 * 1988 * Set and return the previous value for enabling pedantic warnings. 1989 * 1990 * Returns the last value for 0 for no substitution, 1 for substitution. 1991 */ 1992 1993int 1994xmlPedanticParserDefault(int val) { 1995 int old = xmlPedanticParserDefaultValue; 1996 1997 xmlPedanticParserDefaultValue = val; 1998 return(old); 1999} 2000 2001/** 2002 * xmlLineNumbersDefault: 2003 * @val: int 0 or 1 2004 * 2005 * Set and return the previous value for enabling line numbers in elements 2006 * contents. This may break on old application and is turned off by default. 2007 * 2008 * Returns the last value for 0 for no substitution, 1 for substitution. 2009 */ 2010 2011int 2012xmlLineNumbersDefault(int val) { 2013 int old = xmlLineNumbersDefaultValue; 2014 2015 xmlLineNumbersDefaultValue = val; 2016 return(old); 2017} 2018 2019/** 2020 * xmlSubstituteEntitiesDefault: 2021 * @val: int 0 or 1 2022 * 2023 * Set and return the previous value for default entity support. 2024 * Initially the parser always keep entity references instead of substituting 2025 * entity values in the output. This function has to be used to change the 2026 * default parser behavior 2027 * SAX::substituteEntities() has to be used for changing that on a file by 2028 * file basis. 2029 * 2030 * Returns the last value for 0 for no substitution, 1 for substitution. 2031 */ 2032 2033int 2034xmlSubstituteEntitiesDefault(int val) { 2035 int old = xmlSubstituteEntitiesDefaultValue; 2036 2037 xmlSubstituteEntitiesDefaultValue = val; 2038 return(old); 2039} 2040 2041/** 2042 * xmlKeepBlanksDefault: 2043 * @val: int 0 or 1 2044 * 2045 * Set and return the previous value for default blanks text nodes support. 2046 * The 1.x version of the parser used an heuristic to try to detect 2047 * ignorable white spaces. As a result the SAX callback was generating 2048 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 2049 * using the DOM output text nodes containing those blanks were not generated. 2050 * The 2.x and later version will switch to the XML standard way and 2051 * ignorableWhitespace() are only generated when running the parser in 2052 * validating mode and when the current element doesn't allow CDATA or 2053 * mixed content. 2054 * This function is provided as a way to force the standard behavior 2055 * on 1.X libs and to switch back to the old mode for compatibility when 2056 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2057 * by using xmlIsBlankNode() commodity function to detect the "empty" 2058 * nodes generated. 2059 * This value also affect autogeneration of indentation when saving code 2060 * if blanks sections are kept, indentation is not generated. 2061 * 2062 * Returns the last value for 0 for no substitution, 1 for substitution. 2063 */ 2064 2065int 2066xmlKeepBlanksDefault(int val) { 2067 int old = xmlKeepBlanksDefaultValue; 2068 2069 xmlKeepBlanksDefaultValue = val; 2070 xmlIndentTreeOutput = !val; 2071 return(old); 2072} 2073 2074#define bottom_parserInternals 2075#include "elfgcchack.h" 2076