1/* 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the 3 * XML and HTML parsers. 4 * 5 * See Copyright for the status of this software. 6 * 7 * daniel@veillard.com 8 */ 9 10#define IN_LIBXML 11#include "libxml.h" 12 13#if defined(WIN32) && !defined (__CYGWIN__) 14#define XML_DIR_SEP '\\' 15#else 16#define XML_DIR_SEP '/' 17#endif 18 19#include <string.h> 20#ifdef HAVE_CTYPE_H 21#include <ctype.h> 22#endif 23#ifdef HAVE_STDLIB_H 24#include <stdlib.h> 25#endif 26#ifdef HAVE_SYS_STAT_H 27#include <sys/stat.h> 28#endif 29#ifdef HAVE_FCNTL_H 30#include <fcntl.h> 31#endif 32#ifdef HAVE_UNISTD_H 33#include <unistd.h> 34#endif 35#ifdef HAVE_ZLIB_H 36#include <zlib.h> 37#endif 38 39#include <libxml/xmlmemory.h> 40#include <libxml/tree.h> 41#include <libxml/parser.h> 42#include <libxml/parserInternals.h> 43#include <libxml/valid.h> 44#include <libxml/entities.h> 45#include <libxml/xmlerror.h> 46#include <libxml/encoding.h> 47#include <libxml/valid.h> 48#include <libxml/xmlIO.h> 49#include <libxml/uri.h> 50#include <libxml/dict.h> 51#include <libxml/SAX.h> 52#ifdef LIBXML_CATALOG_ENABLED 53#include <libxml/catalog.h> 54#endif 55#include <libxml/globals.h> 56#include <libxml/chvalid.h> 57 58#include "buf.h" 59#include "enc.h" 60 61/* 62 * Various global defaults for parsing 63 */ 64 65/** 66 * xmlCheckVersion: 67 * @version: the include version number 68 * 69 * check the compiled lib version against the include one. 70 * This can warn or immediately kill the application 71 */ 72void 73xmlCheckVersion(int version) { 74 int myversion = (int) LIBXML_VERSION; 75 76 xmlInitParser(); 77 78 if ((myversion / 10000) != (version / 10000)) { 79 xmlGenericError(xmlGenericErrorContext, 80 "Fatal: program compiled against libxml %d using libxml %d\n", 81 (version / 10000), (myversion / 10000)); 82 fprintf(stderr, 83 "Fatal: program compiled against libxml %d using libxml %d\n", 84 (version / 10000), (myversion / 10000)); 85 } 86 if ((myversion / 100) < (version / 100)) { 87 xmlGenericError(xmlGenericErrorContext, 88 "Warning: program compiled against libxml %d using older %d\n", 89 (version / 100), (myversion / 100)); 90 } 91} 92 93 94/************************************************************************ 95 * * 96 * Some factorized error routines * 97 * * 98 ************************************************************************/ 99 100 101/** 102 * xmlErrMemory: 103 * @ctxt: an XML parser context 104 * @extra: extra informations 105 * 106 * Handle a redefinition of attribute error 107 */ 108void 109xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 110{ 111 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 112 (ctxt->instate == XML_PARSER_EOF)) 113 return; 114 if (ctxt != NULL) { 115 ctxt->errNo = XML_ERR_NO_MEMORY; 116 ctxt->instate = XML_PARSER_EOF; 117 ctxt->disableSAX = 1; 118 } 119 if (extra) 120 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 121 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 122 NULL, NULL, 0, 0, 123 "Memory allocation failed : %s\n", extra); 124 else 125 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 126 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 127 NULL, NULL, 0, 0, "Memory allocation failed\n"); 128} 129 130/** 131 * __xmlErrEncoding: 132 * @ctxt: an XML parser context 133 * @xmlerr: the error number 134 * @msg: the error message 135 * @str1: an string info 136 * @str2: an string info 137 * 138 * Handle an encoding error 139 */ 140void 141__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 142 const char *msg, const xmlChar * str1, const xmlChar * str2) 143{ 144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 145 (ctxt->instate == XML_PARSER_EOF)) 146 return; 147 if (ctxt != NULL) 148 ctxt->errNo = xmlerr; 149 __xmlRaiseError(NULL, NULL, NULL, 150 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 151 NULL, 0, (const char *) str1, (const char *) str2, 152 NULL, 0, 0, msg, str1, str2); 153 if (ctxt != NULL) { 154 ctxt->wellFormed = 0; 155 if (ctxt->recovery == 0) 156 ctxt->disableSAX = 1; 157 } 158} 159 160/** 161 * xmlErrInternal: 162 * @ctxt: an XML parser context 163 * @msg: the error message 164 * @str: error informations 165 * 166 * Handle an internal error 167 */ 168static void 169xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 170{ 171 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 172 (ctxt->instate == XML_PARSER_EOF)) 173 return; 174 if (ctxt != NULL) 175 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 176 __xmlRaiseError(NULL, NULL, NULL, 177 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 178 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 179 0, 0, msg, str); 180 if (ctxt != NULL) { 181 ctxt->wellFormed = 0; 182 if (ctxt->recovery == 0) 183 ctxt->disableSAX = 1; 184 } 185} 186 187/** 188 * xmlErrEncodingInt: 189 * @ctxt: an XML parser context 190 * @error: the error number 191 * @msg: the error message 192 * @val: an integer value 193 * 194 * n encoding error 195 */ 196static void 197xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 198 const char *msg, int val) 199{ 200 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 201 (ctxt->instate == XML_PARSER_EOF)) 202 return; 203 if (ctxt != NULL) 204 ctxt->errNo = error; 205 __xmlRaiseError(NULL, NULL, NULL, 206 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 207 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 208 if (ctxt != NULL) { 209 ctxt->wellFormed = 0; 210 if (ctxt->recovery == 0) 211 ctxt->disableSAX = 1; 212 } 213} 214 215/** 216 * xmlIsLetter: 217 * @c: an unicode character (int) 218 * 219 * Check whether the character is allowed by the production 220 * [84] Letter ::= BaseChar | Ideographic 221 * 222 * Returns 0 if not, non-zero otherwise 223 */ 224int 225xmlIsLetter(int c) { 226 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 227} 228 229/************************************************************************ 230 * * 231 * Input handling functions for progressive parsing * 232 * * 233 ************************************************************************/ 234 235/* #define DEBUG_INPUT */ 236/* #define DEBUG_STACK */ 237/* #define DEBUG_PUSH */ 238 239 240/* we need to keep enough input to show errors in context */ 241#define LINE_LEN 80 242 243#ifdef DEBUG_INPUT 244#define CHECK_BUFFER(in) check_buffer(in) 245 246static 247void check_buffer(xmlParserInputPtr in) { 248 if (in->base != xmlBufContent(in->buf->buffer)) { 249 xmlGenericError(xmlGenericErrorContext, 250 "xmlParserInput: base mismatch problem\n"); 251 } 252 if (in->cur < in->base) { 253 xmlGenericError(xmlGenericErrorContext, 254 "xmlParserInput: cur < base problem\n"); 255 } 256 if (in->cur > in->base + xmlBufUse(in->buf->buffer)) { 257 xmlGenericError(xmlGenericErrorContext, 258 "xmlParserInput: cur > base + use problem\n"); 259 } 260 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n", 261 (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base, 262 xmlBufUse(in->buf->buffer)); 263} 264 265#else 266#define CHECK_BUFFER(in) 267#endif 268 269 270/** 271 * xmlParserInputRead: 272 * @in: an XML parser input 273 * @len: an indicative size for the lookahead 274 * 275 * This function was internal and is deprecated. 276 * 277 * Returns -1 as this is an error to use it. 278 */ 279int 280xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) { 281 return(-1); 282} 283 284/** 285 * xmlParserInputGrow: 286 * @in: an XML parser input 287 * @len: an indicative size for the lookahead 288 * 289 * This function increase the input for the parser. It tries to 290 * preserve pointers to the input buffer, and keep already read data 291 * 292 * Returns the amount of char read, or -1 in case of error, 0 indicate the 293 * end of this entity 294 */ 295int 296xmlParserInputGrow(xmlParserInputPtr in, int len) { 297 size_t ret; 298 size_t indx; 299 const xmlChar *content; 300 301 if ((in == NULL) || (len < 0)) return(-1); 302#ifdef DEBUG_INPUT 303 xmlGenericError(xmlGenericErrorContext, "Grow\n"); 304#endif 305 if (in->buf == NULL) return(-1); 306 if (in->base == NULL) return(-1); 307 if (in->cur == NULL) return(-1); 308 if (in->buf->buffer == NULL) return(-1); 309 310 CHECK_BUFFER(in); 311 312 indx = in->cur - in->base; 313 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) { 314 315 CHECK_BUFFER(in); 316 317 return(0); 318 } 319 if (in->buf->readcallback != NULL) { 320 ret = xmlParserInputBufferGrow(in->buf, len); 321 } else 322 return(0); 323 324 /* 325 * NOTE : in->base may be a "dangling" i.e. freed pointer in this 326 * block, but we use it really as an integer to do some 327 * pointer arithmetic. Insure will raise it as a bug but in 328 * that specific case, that's not ! 329 */ 330 331 content = xmlBufContent(in->buf->buffer); 332 if (in->base != content) { 333 /* 334 * the buffer has been reallocated 335 */ 336 indx = in->cur - in->base; 337 in->base = content; 338 in->cur = &content[indx]; 339 } 340 in->end = xmlBufEnd(in->buf->buffer); 341 342 CHECK_BUFFER(in); 343 344 return(ret); 345} 346 347/** 348 * xmlParserInputShrink: 349 * @in: an XML parser input 350 * 351 * This function removes used input for the parser. 352 */ 353void 354xmlParserInputShrink(xmlParserInputPtr in) { 355 size_t used; 356 size_t ret; 357 size_t indx; 358 const xmlChar *content; 359 360#ifdef DEBUG_INPUT 361 xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 362#endif 363 if (in == NULL) return; 364 if (in->buf == NULL) return; 365 if (in->base == NULL) return; 366 if (in->cur == NULL) return; 367 if (in->buf->buffer == NULL) return; 368 369 CHECK_BUFFER(in); 370 371 used = in->cur - xmlBufContent(in->buf->buffer); 372 /* 373 * Do not shrink on large buffers whose only a tiny fraction 374 * was consumed 375 */ 376 if (used > INPUT_CHUNK) { 377 ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN); 378 if (ret > 0) { 379 in->cur -= ret; 380 in->consumed += ret; 381 } 382 in->end = xmlBufEnd(in->buf->buffer); 383 } 384 385 CHECK_BUFFER(in); 386 387 if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) { 388 return; 389 } 390 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 391 content = xmlBufContent(in->buf->buffer); 392 if (in->base != content) { 393 /* 394 * the buffer has been reallocated 395 */ 396 indx = in->cur - in->base; 397 in->base = content; 398 in->cur = &content[indx]; 399 } 400 in->end = xmlBufEnd(in->buf->buffer); 401 402 CHECK_BUFFER(in); 403} 404 405/************************************************************************ 406 * * 407 * UTF8 character input and related functions * 408 * * 409 ************************************************************************/ 410 411/** 412 * xmlNextChar: 413 * @ctxt: the XML parser context 414 * 415 * Skip to the next char input char. 416 */ 417 418void 419xmlNextChar(xmlParserCtxtPtr ctxt) 420{ 421 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 422 (ctxt->input == NULL)) 423 return; 424 425 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 426 if ((*ctxt->input->cur == 0) && 427 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && 428 (ctxt->instate != XML_PARSER_COMMENT)) { 429 /* 430 * If we are at the end of the current entity and 431 * the context allows it, we pop consumed entities 432 * automatically. 433 * the auto closing should be blocked in other cases 434 */ 435 xmlPopInput(ctxt); 436 } else { 437 const unsigned char *cur; 438 unsigned char c; 439 440 /* 441 * 2.11 End-of-Line Handling 442 * the literal two-character sequence "#xD#xA" or a standalone 443 * literal #xD, an XML processor must pass to the application 444 * the single character #xA. 445 */ 446 if (*(ctxt->input->cur) == '\n') { 447 ctxt->input->line++; ctxt->input->col = 1; 448 } else 449 ctxt->input->col++; 450 451 /* 452 * We are supposed to handle UTF8, check it's valid 453 * From rfc2044: encoding of the Unicode values on UTF-8: 454 * 455 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 456 * 0000 0000-0000 007F 0xxxxxxx 457 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 458 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 459 * 460 * Check for the 0x110000 limit too 461 */ 462 cur = ctxt->input->cur; 463 464 c = *cur; 465 if (c & 0x80) { 466 if (c == 0xC0) 467 goto encoding_error; 468 if (cur[1] == 0) { 469 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 470 cur = ctxt->input->cur; 471 } 472 if ((cur[1] & 0xc0) != 0x80) 473 goto encoding_error; 474 if ((c & 0xe0) == 0xe0) { 475 unsigned int val; 476 477 if (cur[2] == 0) { 478 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 479 cur = ctxt->input->cur; 480 } 481 if ((cur[2] & 0xc0) != 0x80) 482 goto encoding_error; 483 if ((c & 0xf0) == 0xf0) { 484 if (cur[3] == 0) { 485 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 486 cur = ctxt->input->cur; 487 } 488 if (((c & 0xf8) != 0xf0) || 489 ((cur[3] & 0xc0) != 0x80)) 490 goto encoding_error; 491 /* 4-byte code */ 492 ctxt->input->cur += 4; 493 val = (cur[0] & 0x7) << 18; 494 val |= (cur[1] & 0x3f) << 12; 495 val |= (cur[2] & 0x3f) << 6; 496 val |= cur[3] & 0x3f; 497 } else { 498 /* 3-byte code */ 499 ctxt->input->cur += 3; 500 val = (cur[0] & 0xf) << 12; 501 val |= (cur[1] & 0x3f) << 6; 502 val |= cur[2] & 0x3f; 503 } 504 if (((val > 0xd7ff) && (val < 0xe000)) || 505 ((val > 0xfffd) && (val < 0x10000)) || 506 (val >= 0x110000)) { 507 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 508 "Char 0x%X out of allowed range\n", 509 val); 510 } 511 } else 512 /* 2-byte code */ 513 ctxt->input->cur += 2; 514 } else 515 /* 1-byte code */ 516 ctxt->input->cur++; 517 518 ctxt->nbChars++; 519 if (*ctxt->input->cur == 0) 520 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 521 } 522 } else { 523 /* 524 * Assume it's a fixed length encoding (1) with 525 * a compatible encoding for the ASCII set, since 526 * XML constructs only use < 128 chars 527 */ 528 529 if (*(ctxt->input->cur) == '\n') { 530 ctxt->input->line++; ctxt->input->col = 1; 531 } else 532 ctxt->input->col++; 533 ctxt->input->cur++; 534 ctxt->nbChars++; 535 if (*ctxt->input->cur == 0) 536 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 537 } 538 if ((*ctxt->input->cur == '%') && (!ctxt->html)) 539 xmlParserHandlePEReference(ctxt); 540 if ((*ctxt->input->cur == 0) && 541 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 542 xmlPopInput(ctxt); 543 return; 544encoding_error: 545 /* 546 * If we detect an UTF8 error that probably mean that the 547 * input encoding didn't get properly advertised in the 548 * declaration header. Report the error and switch the encoding 549 * to ISO-Latin-1 (if you don't like this policy, just declare the 550 * encoding !) 551 */ 552 if ((ctxt == NULL) || (ctxt->input == NULL) || 553 (ctxt->input->end - ctxt->input->cur < 4)) { 554 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 555 "Input is not proper UTF-8, indicate encoding !\n", 556 NULL, NULL); 557 } else { 558 char buffer[150]; 559 560 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 561 ctxt->input->cur[0], ctxt->input->cur[1], 562 ctxt->input->cur[2], ctxt->input->cur[3]); 563 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 564 "Input is not proper UTF-8, indicate encoding !\n%s", 565 BAD_CAST buffer, NULL); 566 } 567 ctxt->charset = XML_CHAR_ENCODING_8859_1; 568 ctxt->input->cur++; 569 return; 570} 571 572/** 573 * xmlCurrentChar: 574 * @ctxt: the XML parser context 575 * @len: pointer to the length of the char read 576 * 577 * The current char value, if using UTF-8 this may actually span multiple 578 * bytes in the input buffer. Implement the end of line normalization: 579 * 2.11 End-of-Line Handling 580 * Wherever an external parsed entity or the literal entity value 581 * of an internal parsed entity contains either the literal two-character 582 * sequence "#xD#xA" or a standalone literal #xD, an XML processor 583 * must pass to the application the single character #xA. 584 * This behavior can conveniently be produced by normalizing all 585 * line breaks to #xA on input, before parsing.) 586 * 587 * Returns the current char value and its length 588 */ 589 590int 591xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 592 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 593 if (ctxt->instate == XML_PARSER_EOF) 594 return(0); 595 596 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { 597 *len = 1; 598 return((int) *ctxt->input->cur); 599 } 600 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 601 /* 602 * We are supposed to handle UTF8, check it's valid 603 * From rfc2044: encoding of the Unicode values on UTF-8: 604 * 605 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 606 * 0000 0000-0000 007F 0xxxxxxx 607 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 608 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 609 * 610 * Check for the 0x110000 limit too 611 */ 612 const unsigned char *cur = ctxt->input->cur; 613 unsigned char c; 614 unsigned int val; 615 616 c = *cur; 617 if (c & 0x80) { 618 if (((c & 0x40) == 0) || (c == 0xC0)) 619 goto encoding_error; 620 if (cur[1] == 0) { 621 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 622 cur = ctxt->input->cur; 623 } 624 if ((cur[1] & 0xc0) != 0x80) 625 goto encoding_error; 626 if ((c & 0xe0) == 0xe0) { 627 if (cur[2] == 0) { 628 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 629 cur = ctxt->input->cur; 630 } 631 if ((cur[2] & 0xc0) != 0x80) 632 goto encoding_error; 633 if ((c & 0xf0) == 0xf0) { 634 if (cur[3] == 0) { 635 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 636 cur = ctxt->input->cur; 637 } 638 if (((c & 0xf8) != 0xf0) || 639 ((cur[3] & 0xc0) != 0x80)) 640 goto encoding_error; 641 /* 4-byte code */ 642 *len = 4; 643 val = (cur[0] & 0x7) << 18; 644 val |= (cur[1] & 0x3f) << 12; 645 val |= (cur[2] & 0x3f) << 6; 646 val |= cur[3] & 0x3f; 647 if (val < 0x10000) 648 goto encoding_error; 649 } else { 650 /* 3-byte code */ 651 *len = 3; 652 val = (cur[0] & 0xf) << 12; 653 val |= (cur[1] & 0x3f) << 6; 654 val |= cur[2] & 0x3f; 655 if (val < 0x800) 656 goto encoding_error; 657 } 658 } else { 659 /* 2-byte code */ 660 *len = 2; 661 val = (cur[0] & 0x1f) << 6; 662 val |= cur[1] & 0x3f; 663 if (val < 0x80) 664 goto encoding_error; 665 } 666 if (!IS_CHAR(val)) { 667 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 668 "Char 0x%X out of allowed range\n", val); 669 } 670 return(val); 671 } else { 672 /* 1-byte code */ 673 *len = 1; 674 if (*ctxt->input->cur == 0) 675 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 676 if ((*ctxt->input->cur == 0) && 677 (ctxt->input->end > ctxt->input->cur)) { 678 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 679 "Char 0x0 out of allowed range\n", 0); 680 } 681 if (*ctxt->input->cur == 0xD) { 682 if (ctxt->input->cur[1] == 0xA) { 683 ctxt->nbChars++; 684 ctxt->input->cur++; 685 } 686 return(0xA); 687 } 688 return((int) *ctxt->input->cur); 689 } 690 } 691 /* 692 * Assume it's a fixed length encoding (1) with 693 * a compatible encoding for the ASCII set, since 694 * XML constructs only use < 128 chars 695 */ 696 *len = 1; 697 if (*ctxt->input->cur == 0xD) { 698 if (ctxt->input->cur[1] == 0xA) { 699 ctxt->nbChars++; 700 ctxt->input->cur++; 701 } 702 return(0xA); 703 } 704 return((int) *ctxt->input->cur); 705encoding_error: 706 /* 707 * An encoding problem may arise from a truncated input buffer 708 * splitting a character in the middle. In that case do not raise 709 * an error but return 0 to endicate an end of stream problem 710 */ 711 if (ctxt->input->end - ctxt->input->cur < 4) { 712 *len = 0; 713 return(0); 714 } 715 716 /* 717 * If we detect an UTF8 error that probably mean that the 718 * input encoding didn't get properly advertised in the 719 * declaration header. Report the error and switch the encoding 720 * to ISO-Latin-1 (if you don't like this policy, just declare the 721 * encoding !) 722 */ 723 { 724 char buffer[150]; 725 726 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 727 ctxt->input->cur[0], ctxt->input->cur[1], 728 ctxt->input->cur[2], ctxt->input->cur[3]); 729 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 730 "Input is not proper UTF-8, indicate encoding !\n%s", 731 BAD_CAST buffer, NULL); 732 } 733 ctxt->charset = XML_CHAR_ENCODING_8859_1; 734 *len = 1; 735 return((int) *ctxt->input->cur); 736} 737 738/** 739 * xmlStringCurrentChar: 740 * @ctxt: the XML parser context 741 * @cur: pointer to the beginning of the char 742 * @len: pointer to the length of the char read 743 * 744 * The current char value, if using UTF-8 this may actually span multiple 745 * bytes in the input buffer. 746 * 747 * Returns the current char value and its length 748 */ 749 750int 751xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 752{ 753 if ((len == NULL) || (cur == NULL)) return(0); 754 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 755 /* 756 * We are supposed to handle UTF8, check it's valid 757 * From rfc2044: encoding of the Unicode values on UTF-8: 758 * 759 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 760 * 0000 0000-0000 007F 0xxxxxxx 761 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 762 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 763 * 764 * Check for the 0x110000 limit too 765 */ 766 unsigned char c; 767 unsigned int val; 768 769 c = *cur; 770 if (c & 0x80) { 771 if ((cur[1] & 0xc0) != 0x80) 772 goto encoding_error; 773 if ((c & 0xe0) == 0xe0) { 774 775 if ((cur[2] & 0xc0) != 0x80) 776 goto encoding_error; 777 if ((c & 0xf0) == 0xf0) { 778 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) 779 goto encoding_error; 780 /* 4-byte code */ 781 *len = 4; 782 val = (cur[0] & 0x7) << 18; 783 val |= (cur[1] & 0x3f) << 12; 784 val |= (cur[2] & 0x3f) << 6; 785 val |= cur[3] & 0x3f; 786 } else { 787 /* 3-byte code */ 788 *len = 3; 789 val = (cur[0] & 0xf) << 12; 790 val |= (cur[1] & 0x3f) << 6; 791 val |= cur[2] & 0x3f; 792 } 793 } else { 794 /* 2-byte code */ 795 *len = 2; 796 val = (cur[0] & 0x1f) << 6; 797 val |= cur[1] & 0x3f; 798 } 799 if (!IS_CHAR(val)) { 800 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 801 "Char 0x%X out of allowed range\n", val); 802 } 803 return (val); 804 } else { 805 /* 1-byte code */ 806 *len = 1; 807 return ((int) *cur); 808 } 809 } 810 /* 811 * Assume it's a fixed length encoding (1) with 812 * a compatible encoding for the ASCII set, since 813 * XML constructs only use < 128 chars 814 */ 815 *len = 1; 816 return ((int) *cur); 817encoding_error: 818 819 /* 820 * An encoding problem may arise from a truncated input buffer 821 * splitting a character in the middle. In that case do not raise 822 * an error but return 0 to endicate an end of stream problem 823 */ 824 if ((ctxt == NULL) || (ctxt->input == NULL) || 825 (ctxt->input->end - ctxt->input->cur < 4)) { 826 *len = 0; 827 return(0); 828 } 829 /* 830 * If we detect an UTF8 error that probably mean that the 831 * input encoding didn't get properly advertised in the 832 * declaration header. Report the error and switch the encoding 833 * to ISO-Latin-1 (if you don't like this policy, just declare the 834 * encoding !) 835 */ 836 { 837 char buffer[150]; 838 839 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 840 ctxt->input->cur[0], ctxt->input->cur[1], 841 ctxt->input->cur[2], ctxt->input->cur[3]); 842 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 843 "Input is not proper UTF-8, indicate encoding !\n%s", 844 BAD_CAST buffer, NULL); 845 } 846 *len = 1; 847 return ((int) *cur); 848} 849 850/** 851 * xmlCopyCharMultiByte: 852 * @out: pointer to an array of xmlChar 853 * @val: the char value 854 * 855 * append the char value in the array 856 * 857 * Returns the number of xmlChar written 858 */ 859int 860xmlCopyCharMultiByte(xmlChar *out, int val) { 861 if (out == NULL) return(0); 862 /* 863 * We are supposed to handle UTF8, check it's valid 864 * From rfc2044: encoding of the Unicode values on UTF-8: 865 * 866 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 867 * 0000 0000-0000 007F 0xxxxxxx 868 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 869 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 870 */ 871 if (val >= 0x80) { 872 xmlChar *savedout = out; 873 int bits; 874 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 875 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 876 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 877 else { 878 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 879 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 880 val); 881 return(0); 882 } 883 for ( ; bits >= 0; bits-= 6) 884 *out++= ((val >> bits) & 0x3F) | 0x80 ; 885 return (out - savedout); 886 } 887 *out = (xmlChar) val; 888 return 1; 889} 890 891/** 892 * xmlCopyChar: 893 * @len: Ignored, compatibility 894 * @out: pointer to an array of xmlChar 895 * @val: the char value 896 * 897 * append the char value in the array 898 * 899 * Returns the number of xmlChar written 900 */ 901 902int 903xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 904 if (out == NULL) return(0); 905 /* the len parameter is ignored */ 906 if (val >= 0x80) { 907 return(xmlCopyCharMultiByte (out, val)); 908 } 909 *out = (xmlChar) val; 910 return 1; 911} 912 913/************************************************************************ 914 * * 915 * Commodity functions to switch encodings * 916 * * 917 ************************************************************************/ 918 919static int 920xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 921 xmlCharEncodingHandlerPtr handler, int len); 922static int 923xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 924 xmlCharEncodingHandlerPtr handler, int len); 925/** 926 * xmlSwitchEncoding: 927 * @ctxt: the parser context 928 * @enc: the encoding value (number) 929 * 930 * change the input functions when discovering the character encoding 931 * of a given entity. 932 * 933 * Returns 0 in case of success, -1 otherwise 934 */ 935int 936xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 937{ 938 xmlCharEncodingHandlerPtr handler; 939 int len = -1; 940 941 if (ctxt == NULL) return(-1); 942 switch (enc) { 943 case XML_CHAR_ENCODING_ERROR: 944 __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, 945 "encoding unknown\n", NULL, NULL); 946 return(-1); 947 case XML_CHAR_ENCODING_NONE: 948 /* let's assume it's UTF-8 without the XML decl */ 949 ctxt->charset = XML_CHAR_ENCODING_UTF8; 950 return(0); 951 case XML_CHAR_ENCODING_UTF8: 952 /* default encoding, no conversion should be needed */ 953 ctxt->charset = XML_CHAR_ENCODING_UTF8; 954 955 /* 956 * Errata on XML-1.0 June 20 2001 957 * Specific handling of the Byte Order Mark for 958 * UTF-8 959 */ 960 if ((ctxt->input != NULL) && 961 (ctxt->input->cur[0] == 0xEF) && 962 (ctxt->input->cur[1] == 0xBB) && 963 (ctxt->input->cur[2] == 0xBF)) { 964 ctxt->input->cur += 3; 965 } 966 return(0); 967 case XML_CHAR_ENCODING_UTF16LE: 968 case XML_CHAR_ENCODING_UTF16BE: 969 /*The raw input characters are encoded 970 *in UTF-16. As we expect this function 971 *to be called after xmlCharEncInFunc, we expect 972 *ctxt->input->cur to contain UTF-8 encoded characters. 973 *So the raw UTF16 Byte Order Mark 974 *has also been converted into 975 *an UTF-8 BOM. Let's skip that BOM. 976 */ 977 if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) && 978 (ctxt->input->cur[0] == 0xEF) && 979 (ctxt->input->cur[1] == 0xBB) && 980 (ctxt->input->cur[2] == 0xBF)) { 981 ctxt->input->cur += 3; 982 } 983 len = 90; 984 break; 985 case XML_CHAR_ENCODING_UCS2: 986 len = 90; 987 break; 988 case XML_CHAR_ENCODING_UCS4BE: 989 case XML_CHAR_ENCODING_UCS4LE: 990 case XML_CHAR_ENCODING_UCS4_2143: 991 case XML_CHAR_ENCODING_UCS4_3412: 992 len = 180; 993 break; 994 case XML_CHAR_ENCODING_EBCDIC: 995 case XML_CHAR_ENCODING_8859_1: 996 case XML_CHAR_ENCODING_8859_2: 997 case XML_CHAR_ENCODING_8859_3: 998 case XML_CHAR_ENCODING_8859_4: 999 case XML_CHAR_ENCODING_8859_5: 1000 case XML_CHAR_ENCODING_8859_6: 1001 case XML_CHAR_ENCODING_8859_7: 1002 case XML_CHAR_ENCODING_8859_8: 1003 case XML_CHAR_ENCODING_8859_9: 1004 case XML_CHAR_ENCODING_ASCII: 1005 case XML_CHAR_ENCODING_2022_JP: 1006 case XML_CHAR_ENCODING_SHIFT_JIS: 1007 case XML_CHAR_ENCODING_EUC_JP: 1008 len = 45; 1009 break; 1010 } 1011 handler = xmlGetCharEncodingHandler(enc); 1012 if (handler == NULL) { 1013 /* 1014 * Default handlers. 1015 */ 1016 switch (enc) { 1017 case XML_CHAR_ENCODING_ASCII: 1018 /* default encoding, no conversion should be needed */ 1019 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1020 return(0); 1021 case XML_CHAR_ENCODING_UTF16LE: 1022 break; 1023 case XML_CHAR_ENCODING_UTF16BE: 1024 break; 1025 case XML_CHAR_ENCODING_UCS4LE: 1026 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1027 "encoding not supported %s\n", 1028 BAD_CAST "USC4 little endian", NULL); 1029 break; 1030 case XML_CHAR_ENCODING_UCS4BE: 1031 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1032 "encoding not supported %s\n", 1033 BAD_CAST "USC4 big endian", NULL); 1034 break; 1035 case XML_CHAR_ENCODING_EBCDIC: 1036 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1037 "encoding not supported %s\n", 1038 BAD_CAST "EBCDIC", NULL); 1039 break; 1040 case XML_CHAR_ENCODING_UCS4_2143: 1041 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1042 "encoding not supported %s\n", 1043 BAD_CAST "UCS4 2143", NULL); 1044 break; 1045 case XML_CHAR_ENCODING_UCS4_3412: 1046 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1047 "encoding not supported %s\n", 1048 BAD_CAST "UCS4 3412", NULL); 1049 break; 1050 case XML_CHAR_ENCODING_UCS2: 1051 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1052 "encoding not supported %s\n", 1053 BAD_CAST "UCS2", NULL); 1054 break; 1055 case XML_CHAR_ENCODING_8859_1: 1056 case XML_CHAR_ENCODING_8859_2: 1057 case XML_CHAR_ENCODING_8859_3: 1058 case XML_CHAR_ENCODING_8859_4: 1059 case XML_CHAR_ENCODING_8859_5: 1060 case XML_CHAR_ENCODING_8859_6: 1061 case XML_CHAR_ENCODING_8859_7: 1062 case XML_CHAR_ENCODING_8859_8: 1063 case XML_CHAR_ENCODING_8859_9: 1064 /* 1065 * We used to keep the internal content in the 1066 * document encoding however this turns being unmaintainable 1067 * So xmlGetCharEncodingHandler() will return non-null 1068 * values for this now. 1069 */ 1070 if ((ctxt->inputNr == 1) && 1071 (ctxt->encoding == NULL) && 1072 (ctxt->input != NULL) && 1073 (ctxt->input->encoding != NULL)) { 1074 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 1075 } 1076 ctxt->charset = enc; 1077 return(0); 1078 case XML_CHAR_ENCODING_2022_JP: 1079 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1080 "encoding not supported %s\n", 1081 BAD_CAST "ISO-2022-JP", NULL); 1082 break; 1083 case XML_CHAR_ENCODING_SHIFT_JIS: 1084 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1085 "encoding not supported %s\n", 1086 BAD_CAST "Shift_JIS", NULL); 1087 break; 1088 case XML_CHAR_ENCODING_EUC_JP: 1089 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1090 "encoding not supported %s\n", 1091 BAD_CAST "EUC-JP", NULL); 1092 break; 1093 default: 1094 break; 1095 } 1096 } 1097 if (handler == NULL) 1098 return(-1); 1099 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1100 return(xmlSwitchToEncodingInt(ctxt, handler, len)); 1101} 1102 1103/** 1104 * xmlSwitchInputEncoding: 1105 * @ctxt: the parser context 1106 * @input: the input stream 1107 * @handler: the encoding handler 1108 * @len: the number of bytes to convert for the first line or -1 1109 * 1110 * change the input functions when discovering the character encoding 1111 * of a given entity. 1112 * 1113 * Returns 0 in case of success, -1 otherwise 1114 */ 1115static int 1116xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1117 xmlCharEncodingHandlerPtr handler, int len) 1118{ 1119 int nbchars; 1120 1121 if (handler == NULL) 1122 return (-1); 1123 if (input == NULL) 1124 return (-1); 1125 if (input->buf != NULL) { 1126 if (input->buf->encoder != NULL) { 1127 /* 1128 * Check in case the auto encoding detetection triggered 1129 * in already. 1130 */ 1131 if (input->buf->encoder == handler) 1132 return (0); 1133 1134 /* 1135 * "UTF-16" can be used for both LE and BE 1136 if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, 1137 BAD_CAST "UTF-16", 6)) && 1138 (!xmlStrncmp(BAD_CAST handler->name, 1139 BAD_CAST "UTF-16", 6))) { 1140 return(0); 1141 } 1142 */ 1143 1144 /* 1145 * Note: this is a bit dangerous, but that's what it 1146 * takes to use nearly compatible signature for different 1147 * encodings. 1148 */ 1149 xmlCharEncCloseFunc(input->buf->encoder); 1150 input->buf->encoder = handler; 1151 return (0); 1152 } 1153 input->buf->encoder = handler; 1154 1155 /* 1156 * Is there already some content down the pipe to convert ? 1157 */ 1158 if (xmlBufIsEmpty(input->buf->buffer) == 0) { 1159 int processed; 1160 unsigned int use; 1161 1162 /* 1163 * Specific handling of the Byte Order Mark for 1164 * UTF-16 1165 */ 1166 if ((handler->name != NULL) && 1167 (!strcmp(handler->name, "UTF-16LE") || 1168 !strcmp(handler->name, "UTF-16")) && 1169 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 1170 input->cur += 2; 1171 } 1172 if ((handler->name != NULL) && 1173 (!strcmp(handler->name, "UTF-16BE")) && 1174 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { 1175 input->cur += 2; 1176 } 1177 /* 1178 * Errata on XML-1.0 June 20 2001 1179 * Specific handling of the Byte Order Mark for 1180 * UTF-8 1181 */ 1182 if ((handler->name != NULL) && 1183 (!strcmp(handler->name, "UTF-8")) && 1184 (input->cur[0] == 0xEF) && 1185 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 1186 input->cur += 3; 1187 } 1188 1189 /* 1190 * Shrink the current input buffer. 1191 * Move it as the raw buffer and create a new input buffer 1192 */ 1193 processed = input->cur - input->base; 1194 xmlBufShrink(input->buf->buffer, processed); 1195 input->buf->raw = input->buf->buffer; 1196 input->buf->buffer = xmlBufCreate(); 1197 input->buf->rawconsumed = processed; 1198 use = xmlBufUse(input->buf->raw); 1199 1200 if (ctxt->html) { 1201 /* 1202 * convert as much as possible of the buffer 1203 */ 1204 nbchars = xmlCharEncInput(input->buf, 1); 1205 } else { 1206 /* 1207 * convert just enough to get 1208 * '<?xml version="1.0" encoding="xxx"?>' 1209 * parsed with the autodetected encoding 1210 * into the parser reading buffer. 1211 */ 1212 nbchars = xmlCharEncFirstLineInput(input->buf, len); 1213 } 1214 if (nbchars < 0) { 1215 xmlErrInternal(ctxt, 1216 "switching encoding: encoder error\n", 1217 NULL); 1218 return (-1); 1219 } 1220 input->buf->rawconsumed += use - xmlBufUse(input->buf->raw); 1221 xmlBufResetInput(input->buf->buffer, input); 1222 } 1223 return (0); 1224 } else if (input->length == 0) { 1225 /* 1226 * When parsing a static memory array one must know the 1227 * size to be able to convert the buffer. 1228 */ 1229 xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); 1230 return (-1); 1231 } 1232 return (0); 1233} 1234 1235/** 1236 * xmlSwitchInputEncoding: 1237 * @ctxt: the parser context 1238 * @input: the input stream 1239 * @handler: the encoding handler 1240 * 1241 * change the input functions when discovering the character encoding 1242 * of a given entity. 1243 * 1244 * Returns 0 in case of success, -1 otherwise 1245 */ 1246int 1247xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1248 xmlCharEncodingHandlerPtr handler) { 1249 return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); 1250} 1251 1252/** 1253 * xmlSwitchToEncodingInt: 1254 * @ctxt: the parser context 1255 * @handler: the encoding handler 1256 * @len: the length to convert or -1 1257 * 1258 * change the input functions when discovering the character encoding 1259 * of a given entity, and convert only @len bytes of the output, this 1260 * is needed on auto detect to allows any declared encoding later to 1261 * convert the actual content after the xmlDecl 1262 * 1263 * Returns 0 in case of success, -1 otherwise 1264 */ 1265static int 1266xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 1267 xmlCharEncodingHandlerPtr handler, int len) { 1268 int ret = 0; 1269 1270 if (handler != NULL) { 1271 if (ctxt->input != NULL) { 1272 ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len); 1273 } else { 1274 xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", 1275 NULL); 1276 return(-1); 1277 } 1278 /* 1279 * The parsing is now done in UTF8 natively 1280 */ 1281 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1282 } else 1283 return(-1); 1284 return(ret); 1285} 1286 1287/** 1288 * xmlSwitchToEncoding: 1289 * @ctxt: the parser context 1290 * @handler: the encoding handler 1291 * 1292 * change the input functions when discovering the character encoding 1293 * of a given entity. 1294 * 1295 * Returns 0 in case of success, -1 otherwise 1296 */ 1297int 1298xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1299{ 1300 return (xmlSwitchToEncodingInt(ctxt, handler, -1)); 1301} 1302 1303/************************************************************************ 1304 * * 1305 * Commodity functions to handle entities processing * 1306 * * 1307 ************************************************************************/ 1308 1309/** 1310 * xmlFreeInputStream: 1311 * @input: an xmlParserInputPtr 1312 * 1313 * Free up an input stream. 1314 */ 1315void 1316xmlFreeInputStream(xmlParserInputPtr input) { 1317 if (input == NULL) return; 1318 1319 if (input->filename != NULL) xmlFree((char *) input->filename); 1320 if (input->directory != NULL) xmlFree((char *) input->directory); 1321 if (input->encoding != NULL) xmlFree((char *) input->encoding); 1322 if (input->version != NULL) xmlFree((char *) input->version); 1323 if ((input->free != NULL) && (input->base != NULL)) 1324 input->free((xmlChar *) input->base); 1325 if (input->buf != NULL) 1326 xmlFreeParserInputBuffer(input->buf); 1327 xmlFree(input); 1328} 1329 1330/** 1331 * xmlNewInputStream: 1332 * @ctxt: an XML parser context 1333 * 1334 * Create a new input stream structure. 1335 * 1336 * Returns the new input stream or NULL 1337 */ 1338xmlParserInputPtr 1339xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1340 xmlParserInputPtr input; 1341 1342 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1343 if (input == NULL) { 1344 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1345 return(NULL); 1346 } 1347 memset(input, 0, sizeof(xmlParserInput)); 1348 input->line = 1; 1349 input->col = 1; 1350 input->standalone = -1; 1351 1352 /* 1353 * If the context is NULL the id cannot be initialized, but that 1354 * should not happen while parsing which is the situation where 1355 * the id is actually needed. 1356 */ 1357 if (ctxt != NULL) 1358 input->id = ctxt->input_id++; 1359 1360 return(input); 1361} 1362 1363/** 1364 * xmlNewIOInputStream: 1365 * @ctxt: an XML parser context 1366 * @input: an I/O Input 1367 * @enc: the charset encoding if known 1368 * 1369 * Create a new input stream structure encapsulating the @input into 1370 * a stream suitable for the parser. 1371 * 1372 * Returns the new input stream or NULL 1373 */ 1374xmlParserInputPtr 1375xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1376 xmlCharEncoding enc) { 1377 xmlParserInputPtr inputStream; 1378 1379 if (input == NULL) return(NULL); 1380 if (xmlParserDebugEntities) 1381 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1382 inputStream = xmlNewInputStream(ctxt); 1383 if (inputStream == NULL) { 1384 return(NULL); 1385 } 1386 inputStream->filename = NULL; 1387 inputStream->buf = input; 1388 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1389 1390 if (enc != XML_CHAR_ENCODING_NONE) { 1391 xmlSwitchEncoding(ctxt, enc); 1392 } 1393 1394 return(inputStream); 1395} 1396 1397/** 1398 * xmlNewEntityInputStream: 1399 * @ctxt: an XML parser context 1400 * @entity: an Entity pointer 1401 * 1402 * Create a new input stream based on an xmlEntityPtr 1403 * 1404 * Returns the new input stream or NULL 1405 */ 1406xmlParserInputPtr 1407xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1408 xmlParserInputPtr input; 1409 1410 if (entity == NULL) { 1411 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 1412 NULL); 1413 return(NULL); 1414 } 1415 if (xmlParserDebugEntities) 1416 xmlGenericError(xmlGenericErrorContext, 1417 "new input from entity: %s\n", entity->name); 1418 if (entity->content == NULL) { 1419 switch (entity->etype) { 1420 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1421 xmlErrInternal(ctxt, "Cannot parse entity %s\n", 1422 entity->name); 1423 break; 1424 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1425 case XML_EXTERNAL_PARAMETER_ENTITY: 1426 return(xmlLoadExternalEntity((char *) entity->URI, 1427 (char *) entity->ExternalID, ctxt)); 1428 case XML_INTERNAL_GENERAL_ENTITY: 1429 xmlErrInternal(ctxt, 1430 "Internal entity %s without content !\n", 1431 entity->name); 1432 break; 1433 case XML_INTERNAL_PARAMETER_ENTITY: 1434 xmlErrInternal(ctxt, 1435 "Internal parameter entity %s without content !\n", 1436 entity->name); 1437 break; 1438 case XML_INTERNAL_PREDEFINED_ENTITY: 1439 xmlErrInternal(ctxt, 1440 "Predefined entity %s without content !\n", 1441 entity->name); 1442 break; 1443 } 1444 return(NULL); 1445 } 1446 input = xmlNewInputStream(ctxt); 1447 if (input == NULL) { 1448 return(NULL); 1449 } 1450 if (entity->URI != NULL) 1451 input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); 1452 input->base = entity->content; 1453 input->cur = entity->content; 1454 input->length = entity->length; 1455 input->end = &entity->content[input->length]; 1456 return(input); 1457} 1458 1459/** 1460 * xmlNewStringInputStream: 1461 * @ctxt: an XML parser context 1462 * @buffer: an memory buffer 1463 * 1464 * Create a new input stream based on a memory buffer. 1465 * Returns the new input stream 1466 */ 1467xmlParserInputPtr 1468xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 1469 xmlParserInputPtr input; 1470 1471 if (buffer == NULL) { 1472 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 1473 NULL); 1474 return(NULL); 1475 } 1476 if (xmlParserDebugEntities) 1477 xmlGenericError(xmlGenericErrorContext, 1478 "new fixed input: %.30s\n", buffer); 1479 input = xmlNewInputStream(ctxt); 1480 if (input == NULL) { 1481 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1482 return(NULL); 1483 } 1484 input->base = buffer; 1485 input->cur = buffer; 1486 input->length = xmlStrlen(buffer); 1487 input->end = &buffer[input->length]; 1488 return(input); 1489} 1490 1491/** 1492 * xmlNewInputFromFile: 1493 * @ctxt: an XML parser context 1494 * @filename: the filename to use as entity 1495 * 1496 * Create a new input stream based on a file or an URL. 1497 * 1498 * Returns the new input stream or NULL in case of error 1499 */ 1500xmlParserInputPtr 1501xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 1502 xmlParserInputBufferPtr buf; 1503 xmlParserInputPtr inputStream; 1504 char *directory = NULL; 1505 xmlChar *URI = NULL; 1506 1507 if (xmlParserDebugEntities) 1508 xmlGenericError(xmlGenericErrorContext, 1509 "new input from file: %s\n", filename); 1510 if (ctxt == NULL) return(NULL); 1511 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 1512 if (buf == NULL) { 1513 if (filename == NULL) 1514 __xmlLoaderErr(ctxt, 1515 "failed to load external entity: NULL filename \n", 1516 NULL); 1517 else 1518 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 1519 (const char *) filename); 1520 return(NULL); 1521 } 1522 1523 inputStream = xmlNewInputStream(ctxt); 1524 if (inputStream == NULL) 1525 return(NULL); 1526 1527 inputStream->buf = buf; 1528 inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1529 if (inputStream == NULL) 1530 return(NULL); 1531 1532 if (inputStream->filename == NULL) 1533 URI = xmlStrdup((xmlChar *) filename); 1534 else 1535 URI = xmlStrdup((xmlChar *) inputStream->filename); 1536 directory = xmlParserGetDirectory((const char *) URI); 1537 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1538 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1539 if (URI != NULL) xmlFree((char *) URI); 1540 inputStream->directory = directory; 1541 1542 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1543 if ((ctxt->directory == NULL) && (directory != NULL)) 1544 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1545 return(inputStream); 1546} 1547 1548/************************************************************************ 1549 * * 1550 * Commodity functions to handle parser contexts * 1551 * * 1552 ************************************************************************/ 1553 1554/** 1555 * xmlInitParserCtxt: 1556 * @ctxt: an XML parser context 1557 * 1558 * Initialize a parser context 1559 * 1560 * Returns 0 in case of success and -1 in case of error 1561 */ 1562 1563int 1564xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 1565{ 1566 xmlParserInputPtr input; 1567 1568 if(ctxt==NULL) { 1569 xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 1570 return(-1); 1571 } 1572 1573 xmlDefaultSAXHandlerInit(); 1574 1575 if (ctxt->dict == NULL) 1576 ctxt->dict = xmlDictCreate(); 1577 if (ctxt->dict == NULL) { 1578 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1579 return(-1); 1580 } 1581 xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT); 1582 1583 if (ctxt->sax == NULL) 1584 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1585 if (ctxt->sax == NULL) { 1586 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1587 return(-1); 1588 } 1589 else 1590 xmlSAXVersion(ctxt->sax, 2); 1591 1592 ctxt->maxatts = 0; 1593 ctxt->atts = NULL; 1594 /* Allocate the Input stack */ 1595 if (ctxt->inputTab == NULL) { 1596 ctxt->inputTab = (xmlParserInputPtr *) 1597 xmlMalloc(5 * sizeof(xmlParserInputPtr)); 1598 ctxt->inputMax = 5; 1599 } 1600 if (ctxt->inputTab == NULL) { 1601 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1602 ctxt->inputNr = 0; 1603 ctxt->inputMax = 0; 1604 ctxt->input = NULL; 1605 return(-1); 1606 } 1607 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1608 xmlFreeInputStream(input); 1609 } 1610 ctxt->inputNr = 0; 1611 ctxt->input = NULL; 1612 1613 ctxt->version = NULL; 1614 ctxt->encoding = NULL; 1615 ctxt->standalone = -1; 1616 ctxt->hasExternalSubset = 0; 1617 ctxt->hasPErefs = 0; 1618 ctxt->html = 0; 1619 ctxt->external = 0; 1620 ctxt->instate = XML_PARSER_START; 1621 ctxt->token = 0; 1622 ctxt->directory = NULL; 1623 1624 /* Allocate the Node stack */ 1625 if (ctxt->nodeTab == NULL) { 1626 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 1627 ctxt->nodeMax = 10; 1628 } 1629 if (ctxt->nodeTab == NULL) { 1630 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1631 ctxt->nodeNr = 0; 1632 ctxt->nodeMax = 0; 1633 ctxt->node = NULL; 1634 ctxt->inputNr = 0; 1635 ctxt->inputMax = 0; 1636 ctxt->input = NULL; 1637 return(-1); 1638 } 1639 ctxt->nodeNr = 0; 1640 ctxt->node = NULL; 1641 1642 /* Allocate the Name stack */ 1643 if (ctxt->nameTab == NULL) { 1644 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 1645 ctxt->nameMax = 10; 1646 } 1647 if (ctxt->nameTab == NULL) { 1648 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1649 ctxt->nodeNr = 0; 1650 ctxt->nodeMax = 0; 1651 ctxt->node = NULL; 1652 ctxt->inputNr = 0; 1653 ctxt->inputMax = 0; 1654 ctxt->input = NULL; 1655 ctxt->nameNr = 0; 1656 ctxt->nameMax = 0; 1657 ctxt->name = NULL; 1658 return(-1); 1659 } 1660 ctxt->nameNr = 0; 1661 ctxt->name = NULL; 1662 1663 /* Allocate the space stack */ 1664 if (ctxt->spaceTab == NULL) { 1665 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 1666 ctxt->spaceMax = 10; 1667 } 1668 if (ctxt->spaceTab == NULL) { 1669 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1670 ctxt->nodeNr = 0; 1671 ctxt->nodeMax = 0; 1672 ctxt->node = NULL; 1673 ctxt->inputNr = 0; 1674 ctxt->inputMax = 0; 1675 ctxt->input = NULL; 1676 ctxt->nameNr = 0; 1677 ctxt->nameMax = 0; 1678 ctxt->name = NULL; 1679 ctxt->spaceNr = 0; 1680 ctxt->spaceMax = 0; 1681 ctxt->space = NULL; 1682 return(-1); 1683 } 1684 ctxt->spaceNr = 1; 1685 ctxt->spaceMax = 10; 1686 ctxt->spaceTab[0] = -1; 1687 ctxt->space = &ctxt->spaceTab[0]; 1688 ctxt->userData = ctxt; 1689 ctxt->myDoc = NULL; 1690 ctxt->wellFormed = 1; 1691 ctxt->nsWellFormed = 1; 1692 ctxt->valid = 1; 1693 ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1694 if (ctxt->loadsubset) { 1695 ctxt->options |= XML_PARSE_DTDLOAD; 1696 } 1697 ctxt->validate = xmlDoValidityCheckingDefaultValue; 1698 ctxt->pedantic = xmlPedanticParserDefaultValue; 1699 if (ctxt->pedantic) { 1700 ctxt->options |= XML_PARSE_PEDANTIC; 1701 } 1702 ctxt->linenumbers = xmlLineNumbersDefaultValue; 1703 ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1704 if (ctxt->keepBlanks == 0) { 1705 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1706 ctxt->options |= XML_PARSE_NOBLANKS; 1707 } 1708 1709 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 1710 ctxt->vctxt.userData = ctxt; 1711 ctxt->vctxt.error = xmlParserValidityError; 1712 ctxt->vctxt.warning = xmlParserValidityWarning; 1713 if (ctxt->validate) { 1714 if (xmlGetWarningsDefaultValue == 0) 1715 ctxt->vctxt.warning = NULL; 1716 else 1717 ctxt->vctxt.warning = xmlParserValidityWarning; 1718 ctxt->vctxt.nodeMax = 0; 1719 ctxt->options |= XML_PARSE_DTDVALID; 1720 } 1721 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1722 if (ctxt->replaceEntities) { 1723 ctxt->options |= XML_PARSE_NOENT; 1724 } 1725 ctxt->record_info = 0; 1726 ctxt->nbChars = 0; 1727 ctxt->checkIndex = 0; 1728 ctxt->inSubset = 0; 1729 ctxt->errNo = XML_ERR_OK; 1730 ctxt->depth = 0; 1731 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1732 ctxt->catalogs = NULL; 1733 ctxt->nbentities = 0; 1734 ctxt->sizeentities = 0; 1735 ctxt->sizeentcopy = 0; 1736 ctxt->input_id = 1; 1737 xmlInitNodeInfoSeq(&ctxt->node_seq); 1738 return(0); 1739} 1740 1741/** 1742 * xmlFreeParserCtxt: 1743 * @ctxt: an XML parser context 1744 * 1745 * Free all the memory used by a parser context. However the parsed 1746 * document in ctxt->myDoc is not freed. 1747 */ 1748 1749void 1750xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 1751{ 1752 xmlParserInputPtr input; 1753 1754 if (ctxt == NULL) return; 1755 1756 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1757 xmlFreeInputStream(input); 1758 } 1759 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 1760 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 1761 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 1762 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); 1763 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 1764 if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 1765 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 1766 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 1767 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 1768#ifdef LIBXML_SAX1_ENABLED 1769 if ((ctxt->sax != NULL) && 1770 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 1771#else 1772 if (ctxt->sax != NULL) 1773#endif /* LIBXML_SAX1_ENABLED */ 1774 xmlFree(ctxt->sax); 1775 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 1776 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 1777 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 1778 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 1779 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 1780 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 1781 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 1782 if (ctxt->attsDefault != NULL) 1783 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 1784 if (ctxt->attsSpecial != NULL) 1785 xmlHashFree(ctxt->attsSpecial, NULL); 1786 if (ctxt->freeElems != NULL) { 1787 xmlNodePtr cur, next; 1788 1789 cur = ctxt->freeElems; 1790 while (cur != NULL) { 1791 next = cur->next; 1792 xmlFree(cur); 1793 cur = next; 1794 } 1795 } 1796 if (ctxt->freeAttrs != NULL) { 1797 xmlAttrPtr cur, next; 1798 1799 cur = ctxt->freeAttrs; 1800 while (cur != NULL) { 1801 next = cur->next; 1802 xmlFree(cur); 1803 cur = next; 1804 } 1805 } 1806 /* 1807 * cleanup the error strings 1808 */ 1809 if (ctxt->lastError.message != NULL) 1810 xmlFree(ctxt->lastError.message); 1811 if (ctxt->lastError.file != NULL) 1812 xmlFree(ctxt->lastError.file); 1813 if (ctxt->lastError.str1 != NULL) 1814 xmlFree(ctxt->lastError.str1); 1815 if (ctxt->lastError.str2 != NULL) 1816 xmlFree(ctxt->lastError.str2); 1817 if (ctxt->lastError.str3 != NULL) 1818 xmlFree(ctxt->lastError.str3); 1819 1820#ifdef LIBXML_CATALOG_ENABLED 1821 if (ctxt->catalogs != NULL) 1822 xmlCatalogFreeLocal(ctxt->catalogs); 1823#endif 1824 xmlFree(ctxt); 1825} 1826 1827/** 1828 * xmlNewParserCtxt: 1829 * 1830 * Allocate and initialize a new parser context. 1831 * 1832 * Returns the xmlParserCtxtPtr or NULL 1833 */ 1834 1835xmlParserCtxtPtr 1836xmlNewParserCtxt(void) 1837{ 1838 xmlParserCtxtPtr ctxt; 1839 1840 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 1841 if (ctxt == NULL) { 1842 xmlErrMemory(NULL, "cannot allocate parser context\n"); 1843 return(NULL); 1844 } 1845 memset(ctxt, 0, sizeof(xmlParserCtxt)); 1846 if (xmlInitParserCtxt(ctxt) < 0) { 1847 xmlFreeParserCtxt(ctxt); 1848 return(NULL); 1849 } 1850 return(ctxt); 1851} 1852 1853/************************************************************************ 1854 * * 1855 * Handling of node informations * 1856 * * 1857 ************************************************************************/ 1858 1859/** 1860 * xmlClearParserCtxt: 1861 * @ctxt: an XML parser context 1862 * 1863 * Clear (release owned resources) and reinitialize a parser context 1864 */ 1865 1866void 1867xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 1868{ 1869 if (ctxt==NULL) 1870 return; 1871 xmlClearNodeInfoSeq(&ctxt->node_seq); 1872 xmlCtxtReset(ctxt); 1873} 1874 1875 1876/** 1877 * xmlParserFindNodeInfo: 1878 * @ctx: an XML parser context 1879 * @node: an XML node within the tree 1880 * 1881 * Find the parser node info struct for a given node 1882 * 1883 * Returns an xmlParserNodeInfo block pointer or NULL 1884 */ 1885const xmlParserNodeInfo * 1886xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 1887{ 1888 unsigned long pos; 1889 1890 if ((ctx == NULL) || (node == NULL)) 1891 return (NULL); 1892 /* Find position where node should be at */ 1893 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 1894 if (pos < ctx->node_seq.length 1895 && ctx->node_seq.buffer[pos].node == node) 1896 return &ctx->node_seq.buffer[pos]; 1897 else 1898 return NULL; 1899} 1900 1901 1902/** 1903 * xmlInitNodeInfoSeq: 1904 * @seq: a node info sequence pointer 1905 * 1906 * -- Initialize (set to initial state) node info sequence 1907 */ 1908void 1909xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1910{ 1911 if (seq == NULL) 1912 return; 1913 seq->length = 0; 1914 seq->maximum = 0; 1915 seq->buffer = NULL; 1916} 1917 1918/** 1919 * xmlClearNodeInfoSeq: 1920 * @seq: a node info sequence pointer 1921 * 1922 * -- Clear (release memory and reinitialize) node 1923 * info sequence 1924 */ 1925void 1926xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1927{ 1928 if (seq == NULL) 1929 return; 1930 if (seq->buffer != NULL) 1931 xmlFree(seq->buffer); 1932 xmlInitNodeInfoSeq(seq); 1933} 1934 1935/** 1936 * xmlParserFindNodeInfoIndex: 1937 * @seq: a node info sequence pointer 1938 * @node: an XML node pointer 1939 * 1940 * 1941 * xmlParserFindNodeInfoIndex : Find the index that the info record for 1942 * the given node is or should be at in a sorted sequence 1943 * 1944 * Returns a long indicating the position of the record 1945 */ 1946unsigned long 1947xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1948 const xmlNodePtr node) 1949{ 1950 unsigned long upper, lower, middle; 1951 int found = 0; 1952 1953 if ((seq == NULL) || (node == NULL)) 1954 return ((unsigned long) -1); 1955 1956 /* Do a binary search for the key */ 1957 lower = 1; 1958 upper = seq->length; 1959 middle = 0; 1960 while (lower <= upper && !found) { 1961 middle = lower + (upper - lower) / 2; 1962 if (node == seq->buffer[middle - 1].node) 1963 found = 1; 1964 else if (node < seq->buffer[middle - 1].node) 1965 upper = middle - 1; 1966 else 1967 lower = middle + 1; 1968 } 1969 1970 /* Return position */ 1971 if (middle == 0 || seq->buffer[middle - 1].node < node) 1972 return middle; 1973 else 1974 return middle - 1; 1975} 1976 1977 1978/** 1979 * xmlParserAddNodeInfo: 1980 * @ctxt: an XML parser context 1981 * @info: a node info sequence pointer 1982 * 1983 * Insert node info record into the sorted sequence 1984 */ 1985void 1986xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 1987 const xmlParserNodeInfoPtr info) 1988{ 1989 unsigned long pos; 1990 1991 if ((ctxt == NULL) || (info == NULL)) return; 1992 1993 /* Find pos and check to see if node is already in the sequence */ 1994 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 1995 info->node); 1996 1997 if ((pos < ctxt->node_seq.length) && 1998 (ctxt->node_seq.buffer != NULL) && 1999 (ctxt->node_seq.buffer[pos].node == info->node)) { 2000 ctxt->node_seq.buffer[pos] = *info; 2001 } 2002 2003 /* Otherwise, we need to add new node to buffer */ 2004 else { 2005 if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) || 2006 (ctxt->node_seq.buffer == NULL)) { 2007 xmlParserNodeInfo *tmp_buffer; 2008 unsigned int byte_size; 2009 2010 if (ctxt->node_seq.maximum == 0) 2011 ctxt->node_seq.maximum = 2; 2012 byte_size = (sizeof(*ctxt->node_seq.buffer) * 2013 (2 * ctxt->node_seq.maximum)); 2014 2015 if (ctxt->node_seq.buffer == NULL) 2016 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 2017 else 2018 tmp_buffer = 2019 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 2020 byte_size); 2021 2022 if (tmp_buffer == NULL) { 2023 xmlErrMemory(ctxt, "failed to allocate buffer\n"); 2024 return; 2025 } 2026 ctxt->node_seq.buffer = tmp_buffer; 2027 ctxt->node_seq.maximum *= 2; 2028 } 2029 2030 /* If position is not at end, move elements out of the way */ 2031 if (pos != ctxt->node_seq.length) { 2032 unsigned long i; 2033 2034 for (i = ctxt->node_seq.length; i > pos; i--) 2035 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 2036 } 2037 2038 /* Copy element and increase length */ 2039 ctxt->node_seq.buffer[pos] = *info; 2040 ctxt->node_seq.length++; 2041 } 2042} 2043 2044/************************************************************************ 2045 * * 2046 * Defaults settings * 2047 * * 2048 ************************************************************************/ 2049/** 2050 * xmlPedanticParserDefault: 2051 * @val: int 0 or 1 2052 * 2053 * Set and return the previous value for enabling pedantic warnings. 2054 * 2055 * Returns the last value for 0 for no substitution, 1 for substitution. 2056 */ 2057 2058int 2059xmlPedanticParserDefault(int val) { 2060 int old = xmlPedanticParserDefaultValue; 2061 2062 xmlPedanticParserDefaultValue = val; 2063 return(old); 2064} 2065 2066/** 2067 * xmlLineNumbersDefault: 2068 * @val: int 0 or 1 2069 * 2070 * Set and return the previous value for enabling line numbers in elements 2071 * contents. This may break on old application and is turned off by default. 2072 * 2073 * Returns the last value for 0 for no substitution, 1 for substitution. 2074 */ 2075 2076int 2077xmlLineNumbersDefault(int val) { 2078 int old = xmlLineNumbersDefaultValue; 2079 2080 xmlLineNumbersDefaultValue = val; 2081 return(old); 2082} 2083 2084/** 2085 * xmlSubstituteEntitiesDefault: 2086 * @val: int 0 or 1 2087 * 2088 * Set and return the previous value for default entity support. 2089 * Initially the parser always keep entity references instead of substituting 2090 * entity values in the output. This function has to be used to change the 2091 * default parser behavior 2092 * SAX::substituteEntities() has to be used for changing that on a file by 2093 * file basis. 2094 * 2095 * Returns the last value for 0 for no substitution, 1 for substitution. 2096 */ 2097 2098int 2099xmlSubstituteEntitiesDefault(int val) { 2100 int old = xmlSubstituteEntitiesDefaultValue; 2101 2102 xmlSubstituteEntitiesDefaultValue = val; 2103 return(old); 2104} 2105 2106/** 2107 * xmlKeepBlanksDefault: 2108 * @val: int 0 or 1 2109 * 2110 * Set and return the previous value for default blanks text nodes support. 2111 * The 1.x version of the parser used an heuristic to try to detect 2112 * ignorable white spaces. As a result the SAX callback was generating 2113 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 2114 * using the DOM output text nodes containing those blanks were not generated. 2115 * The 2.x and later version will switch to the XML standard way and 2116 * ignorableWhitespace() are only generated when running the parser in 2117 * validating mode and when the current element doesn't allow CDATA or 2118 * mixed content. 2119 * This function is provided as a way to force the standard behavior 2120 * on 1.X libs and to switch back to the old mode for compatibility when 2121 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2122 * by using xmlIsBlankNode() commodity function to detect the "empty" 2123 * nodes generated. 2124 * This value also affect autogeneration of indentation when saving code 2125 * if blanks sections are kept, indentation is not generated. 2126 * 2127 * Returns the last value for 0 for no substitution, 1 for substitution. 2128 */ 2129 2130int 2131xmlKeepBlanksDefault(int val) { 2132 int old = xmlKeepBlanksDefaultValue; 2133 2134 xmlKeepBlanksDefaultValue = val; 2135 if (!val) xmlIndentTreeOutput = 1; 2136 return(old); 2137} 2138 2139#define bottom_parserInternals 2140#include "elfgcchack.h" 2141