1/* 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the 3 * XML and HTML parsers. 4 * 5 * See Copyright for the status of this software. 6 * 7 * daniel@veillard.com 8 */ 9 10#define IN_LIBXML 11#include "libxml.h" 12 13#if defined(WIN32) && !defined (__CYGWIN__) 14#define XML_DIR_SEP '\\' 15#else 16#define XML_DIR_SEP '/' 17#endif 18 19#include <string.h> 20#ifdef HAVE_CTYPE_H 21#include <ctype.h> 22#endif 23#ifdef HAVE_STDLIB_H 24#include <stdlib.h> 25#endif 26#ifdef HAVE_SYS_STAT_H 27#include <sys/stat.h> 28#endif 29#ifdef HAVE_FCNTL_H 30#include <fcntl.h> 31#endif 32#ifdef HAVE_UNISTD_H 33#include <unistd.h> 34#endif 35#ifdef HAVE_ZLIB_H 36#include <zlib.h> 37#endif 38 39#include <libxml/xmlmemory.h> 40#include <libxml/tree.h> 41#include <libxml/parser.h> 42#include <libxml/parserInternals.h> 43#include <libxml/valid.h> 44#include <libxml/entities.h> 45#include <libxml/xmlerror.h> 46#include <libxml/encoding.h> 47#include <libxml/valid.h> 48#include <libxml/xmlIO.h> 49#include <libxml/uri.h> 50#include <libxml/dict.h> 51#include <libxml/SAX.h> 52#ifdef LIBXML_CATALOG_ENABLED 53#include <libxml/catalog.h> 54#endif 55#include <libxml/globals.h> 56#include <libxml/chvalid.h> 57 58#define CUR(ctxt) ctxt->input->cur 59#define END(ctxt) ctxt->input->end 60#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt)) 61 62#include "buf.h" 63#include "enc.h" 64 65/* 66 * Various global defaults for parsing 67 */ 68 69/** 70 * xmlCheckVersion: 71 * @version: the include version number 72 * 73 * check the compiled lib version against the include one. 74 * This can warn or immediately kill the application 75 */ 76void 77xmlCheckVersion(int version) { 78 int myversion = (int) LIBXML_VERSION; 79 80 xmlInitParser(); 81 82 if ((myversion / 10000) != (version / 10000)) { 83 xmlGenericError(xmlGenericErrorContext, 84 "Fatal: program compiled against libxml %d using libxml %d\n", 85 (version / 10000), (myversion / 10000)); 86 fprintf(stderr, 87 "Fatal: program compiled against libxml %d using libxml %d\n", 88 (version / 10000), (myversion / 10000)); 89 } 90 if ((myversion / 100) < (version / 100)) { 91 xmlGenericError(xmlGenericErrorContext, 92 "Warning: program compiled against libxml %d using older %d\n", 93 (version / 100), (myversion / 100)); 94 } 95} 96 97 98/************************************************************************ 99 * * 100 * Some factorized error routines * 101 * * 102 ************************************************************************/ 103 104 105/** 106 * xmlErrMemory: 107 * @ctxt: an XML parser context 108 * @extra: extra informations 109 * 110 * Handle a redefinition of attribute error 111 */ 112void 113xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 114{ 115 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 116 (ctxt->instate == XML_PARSER_EOF)) 117 return; 118 if (ctxt != NULL) { 119 ctxt->errNo = XML_ERR_NO_MEMORY; 120 ctxt->instate = XML_PARSER_EOF; 121 ctxt->disableSAX = 1; 122 } 123 if (extra) 124 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 125 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 126 NULL, NULL, 0, 0, 127 "Memory allocation failed : %s\n", extra); 128 else 129 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 130 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 131 NULL, NULL, 0, 0, "Memory allocation failed\n"); 132} 133 134/** 135 * __xmlErrEncoding: 136 * @ctxt: an XML parser context 137 * @xmlerr: the error number 138 * @msg: the error message 139 * @str1: an string info 140 * @str2: an string info 141 * 142 * Handle an encoding error 143 */ 144void 145__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 146 const char *msg, const xmlChar * str1, const xmlChar * str2) 147{ 148 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 149 (ctxt->instate == XML_PARSER_EOF)) 150 return; 151 if (ctxt != NULL) 152 ctxt->errNo = xmlerr; 153 __xmlRaiseError(NULL, NULL, NULL, 154 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 155 NULL, 0, (const char *) str1, (const char *) str2, 156 NULL, 0, 0, msg, str1, str2); 157 if (ctxt != NULL) { 158 ctxt->wellFormed = 0; 159 if (ctxt->recovery == 0) 160 ctxt->disableSAX = 1; 161 } 162} 163 164/** 165 * xmlErrInternal: 166 * @ctxt: an XML parser context 167 * @msg: the error message 168 * @str: error informations 169 * 170 * Handle an internal error 171 */ 172static void LIBXML_ATTR_FORMAT(2,0) 173xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 174{ 175 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 176 (ctxt->instate == XML_PARSER_EOF)) 177 return; 178 if (ctxt != NULL) 179 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 180 __xmlRaiseError(NULL, NULL, NULL, 181 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 182 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 183 0, 0, msg, str); 184 if (ctxt != NULL) { 185 ctxt->wellFormed = 0; 186 if (ctxt->recovery == 0) 187 ctxt->disableSAX = 1; 188 } 189} 190 191/** 192 * xmlErrEncodingInt: 193 * @ctxt: an XML parser context 194 * @error: the error number 195 * @msg: the error message 196 * @val: an integer value 197 * 198 * n encoding error 199 */ 200static void LIBXML_ATTR_FORMAT(3,0) 201xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 202 const char *msg, int val) 203{ 204 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 205 (ctxt->instate == XML_PARSER_EOF)) 206 return; 207 if (ctxt != NULL) 208 ctxt->errNo = error; 209 __xmlRaiseError(NULL, NULL, NULL, 210 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 211 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 212 if (ctxt != NULL) { 213 ctxt->wellFormed = 0; 214 if (ctxt->recovery == 0) 215 ctxt->disableSAX = 1; 216 } 217} 218 219/** 220 * xmlIsLetter: 221 * @c: an unicode character (int) 222 * 223 * Check whether the character is allowed by the production 224 * [84] Letter ::= BaseChar | Ideographic 225 * 226 * Returns 0 if not, non-zero otherwise 227 */ 228int 229xmlIsLetter(int c) { 230 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 231} 232 233/************************************************************************ 234 * * 235 * Input handling functions for progressive parsing * 236 * * 237 ************************************************************************/ 238 239/* #define DEBUG_INPUT */ 240/* #define DEBUG_STACK */ 241/* #define DEBUG_PUSH */ 242 243 244/* we need to keep enough input to show errors in context */ 245#define LINE_LEN 80 246 247#ifdef DEBUG_INPUT 248#define CHECK_BUFFER(in) check_buffer(in) 249 250static 251void check_buffer(xmlParserInputPtr in) { 252 if (in->base != xmlBufContent(in->buf->buffer)) { 253 xmlGenericError(xmlGenericErrorContext, 254 "xmlParserInput: base mismatch problem\n"); 255 } 256 if (in->cur < in->base) { 257 xmlGenericError(xmlGenericErrorContext, 258 "xmlParserInput: cur < base problem\n"); 259 } 260 if (in->cur > in->base + xmlBufUse(in->buf->buffer)) { 261 xmlGenericError(xmlGenericErrorContext, 262 "xmlParserInput: cur > base + use problem\n"); 263 } 264 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n", 265 (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base, 266 xmlBufUse(in->buf->buffer)); 267} 268 269#else 270#define CHECK_BUFFER(in) 271#endif 272 273 274/** 275 * xmlParserInputRead: 276 * @in: an XML parser input 277 * @len: an indicative size for the lookahead 278 * 279 * This function was internal and is deprecated. 280 * 281 * Returns -1 as this is an error to use it. 282 */ 283int 284xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) { 285 return(-1); 286} 287 288/** 289 * xmlParserInputGrow: 290 * @in: an XML parser input 291 * @len: an indicative size for the lookahead 292 * 293 * This function increase the input for the parser. It tries to 294 * preserve pointers to the input buffer, and keep already read data 295 * 296 * Returns the amount of char read, or -1 in case of error, 0 indicate the 297 * end of this entity 298 */ 299int 300xmlParserInputGrow(xmlParserInputPtr in, int len) { 301 int ret; 302 size_t indx; 303 const xmlChar *content; 304 305 if ((in == NULL) || (len < 0)) return(-1); 306#ifdef DEBUG_INPUT 307 xmlGenericError(xmlGenericErrorContext, "Grow\n"); 308#endif 309 if (in->buf == NULL) return(-1); 310 if (in->base == NULL) return(-1); 311 if (in->cur == NULL) return(-1); 312 if (in->buf->buffer == NULL) return(-1); 313 314 CHECK_BUFFER(in); 315 316 indx = in->cur - in->base; 317 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) { 318 319 CHECK_BUFFER(in); 320 321 return(0); 322 } 323 if (in->buf->readcallback != NULL) { 324 ret = xmlParserInputBufferGrow(in->buf, len); 325 } else 326 return(0); 327 328 /* 329 * NOTE : in->base may be a "dangling" i.e. freed pointer in this 330 * block, but we use it really as an integer to do some 331 * pointer arithmetic. Insure will raise it as a bug but in 332 * that specific case, that's not ! 333 */ 334 335 content = xmlBufContent(in->buf->buffer); 336 if (in->base != content) { 337 /* 338 * the buffer has been reallocated 339 */ 340 indx = in->cur - in->base; 341 in->base = content; 342 in->cur = &content[indx]; 343 } 344 in->end = xmlBufEnd(in->buf->buffer); 345 346 CHECK_BUFFER(in); 347 348 return(ret); 349} 350 351/** 352 * xmlParserInputShrink: 353 * @in: an XML parser input 354 * 355 * This function removes used input for the parser. 356 */ 357void 358xmlParserInputShrink(xmlParserInputPtr in) { 359 size_t used; 360 size_t ret; 361 size_t indx; 362 const xmlChar *content; 363 364#ifdef DEBUG_INPUT 365 xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 366#endif 367 if (in == NULL) return; 368 if (in->buf == NULL) return; 369 if (in->base == NULL) return; 370 if (in->cur == NULL) return; 371 if (in->buf->buffer == NULL) return; 372 373 CHECK_BUFFER(in); 374 375 used = in->cur - xmlBufContent(in->buf->buffer); 376 /* 377 * Do not shrink on large buffers whose only a tiny fraction 378 * was consumed 379 */ 380 if (used > INPUT_CHUNK) { 381 ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN); 382 if (ret > 0) { 383 in->cur -= ret; 384 in->consumed += ret; 385 } 386 in->end = xmlBufEnd(in->buf->buffer); 387 } 388 389 CHECK_BUFFER(in); 390 391 if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) { 392 return; 393 } 394 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 395 content = xmlBufContent(in->buf->buffer); 396 if (in->base != content) { 397 /* 398 * the buffer has been reallocated 399 */ 400 indx = in->cur - in->base; 401 in->base = content; 402 in->cur = &content[indx]; 403 } 404 in->end = xmlBufEnd(in->buf->buffer); 405 406 CHECK_BUFFER(in); 407} 408 409/************************************************************************ 410 * * 411 * UTF8 character input and related functions * 412 * * 413 ************************************************************************/ 414 415/** 416 * xmlNextChar: 417 * @ctxt: the XML parser context 418 * 419 * Skip to the next char input char. 420 */ 421 422void 423xmlNextChar(xmlParserCtxtPtr ctxt) 424{ 425 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 426 (ctxt->input == NULL)) 427 return; 428 429 if (!(VALID_CTXT(ctxt))) { 430 xmlErrInternal(ctxt, "Parser input data memory error\n", NULL); 431 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 432 xmlStopParser(ctxt); 433 return; 434 } 435 436 if ((*ctxt->input->cur == 0) && 437 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { 438 if ((ctxt->instate != XML_PARSER_COMMENT)) 439 xmlPopInput(ctxt); 440 return; 441 } 442 443 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 444 const unsigned char *cur; 445 unsigned char c; 446 447 /* 448 * 2.11 End-of-Line Handling 449 * the literal two-character sequence "#xD#xA" or a standalone 450 * literal #xD, an XML processor must pass to the application 451 * the single character #xA. 452 */ 453 if (*(ctxt->input->cur) == '\n') { 454 ctxt->input->line++; ctxt->input->col = 1; 455 } else 456 ctxt->input->col++; 457 458 /* 459 * We are supposed to handle UTF8, check it's valid 460 * From rfc2044: encoding of the Unicode values on UTF-8: 461 * 462 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 463 * 0000 0000-0000 007F 0xxxxxxx 464 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 465 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 466 * 467 * Check for the 0x110000 limit too 468 */ 469 cur = ctxt->input->cur; 470 471 c = *cur; 472 if (c & 0x80) { 473 if (c == 0xC0) 474 goto encoding_error; 475 if (cur[1] == 0) { 476 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 477 cur = ctxt->input->cur; 478 } 479 if ((cur[1] & 0xc0) != 0x80) 480 goto encoding_error; 481 if ((c & 0xe0) == 0xe0) { 482 unsigned int val; 483 484 if (cur[2] == 0) { 485 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 486 cur = ctxt->input->cur; 487 } 488 if ((cur[2] & 0xc0) != 0x80) 489 goto encoding_error; 490 if ((c & 0xf0) == 0xf0) { 491 if (cur[3] == 0) { 492 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 493 cur = ctxt->input->cur; 494 } 495 if (((c & 0xf8) != 0xf0) || 496 ((cur[3] & 0xc0) != 0x80)) 497 goto encoding_error; 498 /* 4-byte code */ 499 ctxt->input->cur += 4; 500 val = (cur[0] & 0x7) << 18; 501 val |= (cur[1] & 0x3f) << 12; 502 val |= (cur[2] & 0x3f) << 6; 503 val |= cur[3] & 0x3f; 504 } else { 505 /* 3-byte code */ 506 ctxt->input->cur += 3; 507 val = (cur[0] & 0xf) << 12; 508 val |= (cur[1] & 0x3f) << 6; 509 val |= cur[2] & 0x3f; 510 } 511 if (((val > 0xd7ff) && (val < 0xe000)) || 512 ((val > 0xfffd) && (val < 0x10000)) || 513 (val >= 0x110000)) { 514 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 515 "Char 0x%X out of allowed range\n", 516 val); 517 } 518 } else 519 /* 2-byte code */ 520 ctxt->input->cur += 2; 521 } else 522 /* 1-byte code */ 523 ctxt->input->cur++; 524 525 ctxt->nbChars++; 526 if (*ctxt->input->cur == 0) 527 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 528 } else { 529 /* 530 * Assume it's a fixed length encoding (1) with 531 * a compatible encoding for the ASCII set, since 532 * XML constructs only use < 128 chars 533 */ 534 535 if (*(ctxt->input->cur) == '\n') { 536 ctxt->input->line++; ctxt->input->col = 1; 537 } else 538 ctxt->input->col++; 539 ctxt->input->cur++; 540 ctxt->nbChars++; 541 if (*ctxt->input->cur == 0) 542 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 543 } 544 if ((*ctxt->input->cur == '%') && (!ctxt->html)) 545 xmlParserHandlePEReference(ctxt); 546 if ((*ctxt->input->cur == 0) && 547 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 548 xmlPopInput(ctxt); 549 return; 550encoding_error: 551 /* 552 * If we detect an UTF8 error that probably mean that the 553 * input encoding didn't get properly advertised in the 554 * declaration header. Report the error and switch the encoding 555 * to ISO-Latin-1 (if you don't like this policy, just declare the 556 * encoding !) 557 */ 558 if ((ctxt == NULL) || (ctxt->input == NULL) || 559 (ctxt->input->end - ctxt->input->cur < 4)) { 560 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 561 "Input is not proper UTF-8, indicate encoding !\n", 562 NULL, NULL); 563 } else { 564 char buffer[150]; 565 566 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 567 ctxt->input->cur[0], ctxt->input->cur[1], 568 ctxt->input->cur[2], ctxt->input->cur[3]); 569 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 570 "Input is not proper UTF-8, indicate encoding !\n%s", 571 BAD_CAST buffer, NULL); 572 } 573 ctxt->charset = XML_CHAR_ENCODING_8859_1; 574 ctxt->input->cur++; 575 return; 576} 577 578/** 579 * xmlCurrentChar: 580 * @ctxt: the XML parser context 581 * @len: pointer to the length of the char read 582 * 583 * The current char value, if using UTF-8 this may actually span multiple 584 * bytes in the input buffer. Implement the end of line normalization: 585 * 2.11 End-of-Line Handling 586 * Wherever an external parsed entity or the literal entity value 587 * of an internal parsed entity contains either the literal two-character 588 * sequence "#xD#xA" or a standalone literal #xD, an XML processor 589 * must pass to the application the single character #xA. 590 * This behavior can conveniently be produced by normalizing all 591 * line breaks to #xA on input, before parsing.) 592 * 593 * Returns the current char value and its length 594 */ 595 596int 597xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 598 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 599 if (ctxt->instate == XML_PARSER_EOF) 600 return(0); 601 602 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { 603 *len = 1; 604 return((int) *ctxt->input->cur); 605 } 606 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 607 /* 608 * We are supposed to handle UTF8, check it's valid 609 * From rfc2044: encoding of the Unicode values on UTF-8: 610 * 611 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 612 * 0000 0000-0000 007F 0xxxxxxx 613 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 614 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 615 * 616 * Check for the 0x110000 limit too 617 */ 618 const unsigned char *cur = ctxt->input->cur; 619 unsigned char c; 620 unsigned int val; 621 622 c = *cur; 623 if (c & 0x80) { 624 if (((c & 0x40) == 0) || (c == 0xC0)) 625 goto encoding_error; 626 if (cur[1] == 0) { 627 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 628 cur = ctxt->input->cur; 629 } 630 if ((cur[1] & 0xc0) != 0x80) 631 goto encoding_error; 632 if ((c & 0xe0) == 0xe0) { 633 if (cur[2] == 0) { 634 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 635 cur = ctxt->input->cur; 636 } 637 if ((cur[2] & 0xc0) != 0x80) 638 goto encoding_error; 639 if ((c & 0xf0) == 0xf0) { 640 if (cur[3] == 0) { 641 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 642 cur = ctxt->input->cur; 643 } 644 if (((c & 0xf8) != 0xf0) || 645 ((cur[3] & 0xc0) != 0x80)) 646 goto encoding_error; 647 /* 4-byte code */ 648 *len = 4; 649 val = (cur[0] & 0x7) << 18; 650 val |= (cur[1] & 0x3f) << 12; 651 val |= (cur[2] & 0x3f) << 6; 652 val |= cur[3] & 0x3f; 653 if (val < 0x10000) 654 goto encoding_error; 655 } else { 656 /* 3-byte code */ 657 *len = 3; 658 val = (cur[0] & 0xf) << 12; 659 val |= (cur[1] & 0x3f) << 6; 660 val |= cur[2] & 0x3f; 661 if (val < 0x800) 662 goto encoding_error; 663 } 664 } else { 665 /* 2-byte code */ 666 *len = 2; 667 val = (cur[0] & 0x1f) << 6; 668 val |= cur[1] & 0x3f; 669 if (val < 0x80) 670 goto encoding_error; 671 } 672 if (!IS_CHAR(val)) { 673 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 674 "Char 0x%X out of allowed range\n", val); 675 } 676 return(val); 677 } else { 678 /* 1-byte code */ 679 *len = 1; 680 if (*ctxt->input->cur == 0) 681 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 682 if ((*ctxt->input->cur == 0) && 683 (ctxt->input->end > ctxt->input->cur)) { 684 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 685 "Char 0x0 out of allowed range\n", 0); 686 } 687 if (*ctxt->input->cur == 0xD) { 688 if (ctxt->input->cur[1] == 0xA) { 689 ctxt->nbChars++; 690 ctxt->input->cur++; 691 } 692 return(0xA); 693 } 694 return((int) *ctxt->input->cur); 695 } 696 } 697 /* 698 * Assume it's a fixed length encoding (1) with 699 * a compatible encoding for the ASCII set, since 700 * XML constructs only use < 128 chars 701 */ 702 *len = 1; 703 if (*ctxt->input->cur == 0xD) { 704 if (ctxt->input->cur[1] == 0xA) { 705 ctxt->nbChars++; 706 ctxt->input->cur++; 707 } 708 return(0xA); 709 } 710 return((int) *ctxt->input->cur); 711encoding_error: 712 /* 713 * An encoding problem may arise from a truncated input buffer 714 * splitting a character in the middle. In that case do not raise 715 * an error but return 0 to endicate an end of stream problem 716 */ 717 if (ctxt->input->end - ctxt->input->cur < 4) { 718 *len = 0; 719 return(0); 720 } 721 722 /* 723 * If we detect an UTF8 error that probably mean that the 724 * input encoding didn't get properly advertised in the 725 * declaration header. Report the error and switch the encoding 726 * to ISO-Latin-1 (if you don't like this policy, just declare the 727 * encoding !) 728 */ 729 { 730 char buffer[150]; 731 732 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 733 ctxt->input->cur[0], ctxt->input->cur[1], 734 ctxt->input->cur[2], ctxt->input->cur[3]); 735 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 736 "Input is not proper UTF-8, indicate encoding !\n%s", 737 BAD_CAST buffer, NULL); 738 } 739 ctxt->charset = XML_CHAR_ENCODING_8859_1; 740 *len = 1; 741 return((int) *ctxt->input->cur); 742} 743 744/** 745 * xmlStringCurrentChar: 746 * @ctxt: the XML parser context 747 * @cur: pointer to the beginning of the char 748 * @len: pointer to the length of the char read 749 * 750 * The current char value, if using UTF-8 this may actually span multiple 751 * bytes in the input buffer. 752 * 753 * Returns the current char value and its length 754 */ 755 756int 757xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 758{ 759 if ((len == NULL) || (cur == NULL)) return(0); 760 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 761 /* 762 * We are supposed to handle UTF8, check it's valid 763 * From rfc2044: encoding of the Unicode values on UTF-8: 764 * 765 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 766 * 0000 0000-0000 007F 0xxxxxxx 767 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 768 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 769 * 770 * Check for the 0x110000 limit too 771 */ 772 unsigned char c; 773 unsigned int val; 774 775 c = *cur; 776 if (c & 0x80) { 777 if ((cur[1] & 0xc0) != 0x80) 778 goto encoding_error; 779 if ((c & 0xe0) == 0xe0) { 780 781 if ((cur[2] & 0xc0) != 0x80) 782 goto encoding_error; 783 if ((c & 0xf0) == 0xf0) { 784 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) 785 goto encoding_error; 786 /* 4-byte code */ 787 *len = 4; 788 val = (cur[0] & 0x7) << 18; 789 val |= (cur[1] & 0x3f) << 12; 790 val |= (cur[2] & 0x3f) << 6; 791 val |= cur[3] & 0x3f; 792 } else { 793 /* 3-byte code */ 794 *len = 3; 795 val = (cur[0] & 0xf) << 12; 796 val |= (cur[1] & 0x3f) << 6; 797 val |= cur[2] & 0x3f; 798 } 799 } else { 800 /* 2-byte code */ 801 *len = 2; 802 val = (cur[0] & 0x1f) << 6; 803 val |= cur[1] & 0x3f; 804 } 805 if (!IS_CHAR(val)) { 806 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 807 "Char 0x%X out of allowed range\n", val); 808 } 809 return (val); 810 } else { 811 /* 1-byte code */ 812 *len = 1; 813 return ((int) *cur); 814 } 815 } 816 /* 817 * Assume it's a fixed length encoding (1) with 818 * a compatible encoding for the ASCII set, since 819 * XML constructs only use < 128 chars 820 */ 821 *len = 1; 822 return ((int) *cur); 823encoding_error: 824 825 /* 826 * An encoding problem may arise from a truncated input buffer 827 * splitting a character in the middle. In that case do not raise 828 * an error but return 0 to endicate an end of stream problem 829 */ 830 if ((ctxt == NULL) || (ctxt->input == NULL) || 831 (ctxt->input->end - ctxt->input->cur < 4)) { 832 *len = 0; 833 return(0); 834 } 835 /* 836 * If we detect an UTF8 error that probably mean that the 837 * input encoding didn't get properly advertised in the 838 * declaration header. Report the error and switch the encoding 839 * to ISO-Latin-1 (if you don't like this policy, just declare the 840 * encoding !) 841 */ 842 { 843 char buffer[150]; 844 845 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 846 ctxt->input->cur[0], ctxt->input->cur[1], 847 ctxt->input->cur[2], ctxt->input->cur[3]); 848 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 849 "Input is not proper UTF-8, indicate encoding !\n%s", 850 BAD_CAST buffer, NULL); 851 } 852 *len = 1; 853 return ((int) *cur); 854} 855 856/** 857 * xmlCopyCharMultiByte: 858 * @out: pointer to an array of xmlChar 859 * @val: the char value 860 * 861 * append the char value in the array 862 * 863 * Returns the number of xmlChar written 864 */ 865int 866xmlCopyCharMultiByte(xmlChar *out, int val) { 867 if (out == NULL) return(0); 868 /* 869 * We are supposed to handle UTF8, check it's valid 870 * From rfc2044: encoding of the Unicode values on UTF-8: 871 * 872 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 873 * 0000 0000-0000 007F 0xxxxxxx 874 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 875 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 876 */ 877 if (val >= 0x80) { 878 xmlChar *savedout = out; 879 int bits; 880 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 881 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 882 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 883 else { 884 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 885 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 886 val); 887 return(0); 888 } 889 for ( ; bits >= 0; bits-= 6) 890 *out++= ((val >> bits) & 0x3F) | 0x80 ; 891 return (out - savedout); 892 } 893 *out = (xmlChar) val; 894 return 1; 895} 896 897/** 898 * xmlCopyChar: 899 * @len: Ignored, compatibility 900 * @out: pointer to an array of xmlChar 901 * @val: the char value 902 * 903 * append the char value in the array 904 * 905 * Returns the number of xmlChar written 906 */ 907 908int 909xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 910 if (out == NULL) return(0); 911 /* the len parameter is ignored */ 912 if (val >= 0x80) { 913 return(xmlCopyCharMultiByte (out, val)); 914 } 915 *out = (xmlChar) val; 916 return 1; 917} 918 919/************************************************************************ 920 * * 921 * Commodity functions to switch encodings * 922 * * 923 ************************************************************************/ 924 925static int 926xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 927 xmlCharEncodingHandlerPtr handler, int len); 928static int 929xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 930 xmlCharEncodingHandlerPtr handler, int len); 931/** 932 * xmlSwitchEncoding: 933 * @ctxt: the parser context 934 * @enc: the encoding value (number) 935 * 936 * change the input functions when discovering the character encoding 937 * of a given entity. 938 * 939 * Returns 0 in case of success, -1 otherwise 940 */ 941int 942xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 943{ 944 xmlCharEncodingHandlerPtr handler; 945 int len = -1; 946 int ret; 947 948 if (ctxt == NULL) return(-1); 949 switch (enc) { 950 case XML_CHAR_ENCODING_ERROR: 951 __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, 952 "encoding unknown\n", NULL, NULL); 953 return(-1); 954 case XML_CHAR_ENCODING_NONE: 955 /* let's assume it's UTF-8 without the XML decl */ 956 ctxt->charset = XML_CHAR_ENCODING_UTF8; 957 return(0); 958 case XML_CHAR_ENCODING_UTF8: 959 /* default encoding, no conversion should be needed */ 960 ctxt->charset = XML_CHAR_ENCODING_UTF8; 961 962 /* 963 * Errata on XML-1.0 June 20 2001 964 * Specific handling of the Byte Order Mark for 965 * UTF-8 966 */ 967 if ((ctxt->input != NULL) && 968 (ctxt->input->cur[0] == 0xEF) && 969 (ctxt->input->cur[1] == 0xBB) && 970 (ctxt->input->cur[2] == 0xBF)) { 971 ctxt->input->cur += 3; 972 } 973 return(0); 974 case XML_CHAR_ENCODING_UTF16LE: 975 case XML_CHAR_ENCODING_UTF16BE: 976 /*The raw input characters are encoded 977 *in UTF-16. As we expect this function 978 *to be called after xmlCharEncInFunc, we expect 979 *ctxt->input->cur to contain UTF-8 encoded characters. 980 *So the raw UTF16 Byte Order Mark 981 *has also been converted into 982 *an UTF-8 BOM. Let's skip that BOM. 983 */ 984 if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) && 985 (ctxt->input->cur[0] == 0xEF) && 986 (ctxt->input->cur[1] == 0xBB) && 987 (ctxt->input->cur[2] == 0xBF)) { 988 ctxt->input->cur += 3; 989 } 990 len = 90; 991 break; 992 case XML_CHAR_ENCODING_UCS2: 993 len = 90; 994 break; 995 case XML_CHAR_ENCODING_UCS4BE: 996 case XML_CHAR_ENCODING_UCS4LE: 997 case XML_CHAR_ENCODING_UCS4_2143: 998 case XML_CHAR_ENCODING_UCS4_3412: 999 len = 180; 1000 break; 1001 case XML_CHAR_ENCODING_EBCDIC: 1002 case XML_CHAR_ENCODING_8859_1: 1003 case XML_CHAR_ENCODING_8859_2: 1004 case XML_CHAR_ENCODING_8859_3: 1005 case XML_CHAR_ENCODING_8859_4: 1006 case XML_CHAR_ENCODING_8859_5: 1007 case XML_CHAR_ENCODING_8859_6: 1008 case XML_CHAR_ENCODING_8859_7: 1009 case XML_CHAR_ENCODING_8859_8: 1010 case XML_CHAR_ENCODING_8859_9: 1011 case XML_CHAR_ENCODING_ASCII: 1012 case XML_CHAR_ENCODING_2022_JP: 1013 case XML_CHAR_ENCODING_SHIFT_JIS: 1014 case XML_CHAR_ENCODING_EUC_JP: 1015 len = 45; 1016 break; 1017 } 1018 handler = xmlGetCharEncodingHandler(enc); 1019 if (handler == NULL) { 1020 /* 1021 * Default handlers. 1022 */ 1023 switch (enc) { 1024 case XML_CHAR_ENCODING_ASCII: 1025 /* default encoding, no conversion should be needed */ 1026 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1027 return(0); 1028 case XML_CHAR_ENCODING_UTF16LE: 1029 break; 1030 case XML_CHAR_ENCODING_UTF16BE: 1031 break; 1032 case XML_CHAR_ENCODING_UCS4LE: 1033 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1034 "encoding not supported %s\n", 1035 BAD_CAST "USC4 little endian", NULL); 1036 break; 1037 case XML_CHAR_ENCODING_UCS4BE: 1038 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1039 "encoding not supported %s\n", 1040 BAD_CAST "USC4 big endian", NULL); 1041 break; 1042 case XML_CHAR_ENCODING_EBCDIC: 1043 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1044 "encoding not supported %s\n", 1045 BAD_CAST "EBCDIC", NULL); 1046 break; 1047 case XML_CHAR_ENCODING_UCS4_2143: 1048 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1049 "encoding not supported %s\n", 1050 BAD_CAST "UCS4 2143", NULL); 1051 break; 1052 case XML_CHAR_ENCODING_UCS4_3412: 1053 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1054 "encoding not supported %s\n", 1055 BAD_CAST "UCS4 3412", NULL); 1056 break; 1057 case XML_CHAR_ENCODING_UCS2: 1058 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1059 "encoding not supported %s\n", 1060 BAD_CAST "UCS2", NULL); 1061 break; 1062 case XML_CHAR_ENCODING_8859_1: 1063 case XML_CHAR_ENCODING_8859_2: 1064 case XML_CHAR_ENCODING_8859_3: 1065 case XML_CHAR_ENCODING_8859_4: 1066 case XML_CHAR_ENCODING_8859_5: 1067 case XML_CHAR_ENCODING_8859_6: 1068 case XML_CHAR_ENCODING_8859_7: 1069 case XML_CHAR_ENCODING_8859_8: 1070 case XML_CHAR_ENCODING_8859_9: 1071 /* 1072 * We used to keep the internal content in the 1073 * document encoding however this turns being unmaintainable 1074 * So xmlGetCharEncodingHandler() will return non-null 1075 * values for this now. 1076 */ 1077 if ((ctxt->inputNr == 1) && 1078 (ctxt->encoding == NULL) && 1079 (ctxt->input != NULL) && 1080 (ctxt->input->encoding != NULL)) { 1081 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 1082 } 1083 ctxt->charset = enc; 1084 return(0); 1085 case XML_CHAR_ENCODING_2022_JP: 1086 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1087 "encoding not supported %s\n", 1088 BAD_CAST "ISO-2022-JP", NULL); 1089 break; 1090 case XML_CHAR_ENCODING_SHIFT_JIS: 1091 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1092 "encoding not supported %s\n", 1093 BAD_CAST "Shift_JIS", NULL); 1094 break; 1095 case XML_CHAR_ENCODING_EUC_JP: 1096 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1097 "encoding not supported %s\n", 1098 BAD_CAST "EUC-JP", NULL); 1099 break; 1100 default: 1101 break; 1102 } 1103 } 1104 if (handler == NULL) 1105 return(-1); 1106 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1107 ret = xmlSwitchToEncodingInt(ctxt, handler, len); 1108 if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) { 1109 /* 1110 * on encoding conversion errors, stop the parser 1111 */ 1112 xmlStopParser(ctxt); 1113 ctxt->errNo = XML_I18N_CONV_FAILED; 1114 } 1115 return(ret); 1116} 1117 1118/** 1119 * xmlSwitchInputEncoding: 1120 * @ctxt: the parser context 1121 * @input: the input stream 1122 * @handler: the encoding handler 1123 * @len: the number of bytes to convert for the first line or -1 1124 * 1125 * change the input functions when discovering the character encoding 1126 * of a given entity. 1127 * 1128 * Returns 0 in case of success, -1 otherwise 1129 */ 1130static int 1131xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1132 xmlCharEncodingHandlerPtr handler, int len) 1133{ 1134 int nbchars; 1135 1136 if (handler == NULL) 1137 return (-1); 1138 if (input == NULL) 1139 return (-1); 1140 if (input->buf != NULL) { 1141 if (input->buf->encoder != NULL) { 1142 /* 1143 * Check in case the auto encoding detetection triggered 1144 * in already. 1145 */ 1146 if (input->buf->encoder == handler) 1147 return (0); 1148 1149 /* 1150 * "UTF-16" can be used for both LE and BE 1151 if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, 1152 BAD_CAST "UTF-16", 6)) && 1153 (!xmlStrncmp(BAD_CAST handler->name, 1154 BAD_CAST "UTF-16", 6))) { 1155 return(0); 1156 } 1157 */ 1158 1159 /* 1160 * Note: this is a bit dangerous, but that's what it 1161 * takes to use nearly compatible signature for different 1162 * encodings. 1163 */ 1164 xmlCharEncCloseFunc(input->buf->encoder); 1165 input->buf->encoder = handler; 1166 return (0); 1167 } 1168 input->buf->encoder = handler; 1169 1170 /* 1171 * Is there already some content down the pipe to convert ? 1172 */ 1173 if (xmlBufIsEmpty(input->buf->buffer) == 0) { 1174 int processed; 1175 unsigned int use; 1176 1177 /* 1178 * Specific handling of the Byte Order Mark for 1179 * UTF-16 1180 */ 1181 if ((handler->name != NULL) && 1182 (!strcmp(handler->name, "UTF-16LE") || 1183 !strcmp(handler->name, "UTF-16")) && 1184 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 1185 input->cur += 2; 1186 } 1187 if ((handler->name != NULL) && 1188 (!strcmp(handler->name, "UTF-16BE")) && 1189 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { 1190 input->cur += 2; 1191 } 1192 /* 1193 * Errata on XML-1.0 June 20 2001 1194 * Specific handling of the Byte Order Mark for 1195 * UTF-8 1196 */ 1197 if ((handler->name != NULL) && 1198 (!strcmp(handler->name, "UTF-8")) && 1199 (input->cur[0] == 0xEF) && 1200 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 1201 input->cur += 3; 1202 } 1203 1204 /* 1205 * Shrink the current input buffer. 1206 * Move it as the raw buffer and create a new input buffer 1207 */ 1208 processed = input->cur - input->base; 1209 xmlBufShrink(input->buf->buffer, processed); 1210 input->buf->raw = input->buf->buffer; 1211 input->buf->buffer = xmlBufCreate(); 1212 input->buf->rawconsumed = processed; 1213 use = xmlBufUse(input->buf->raw); 1214 1215 if (ctxt->html) { 1216 /* 1217 * convert as much as possible of the buffer 1218 */ 1219 nbchars = xmlCharEncInput(input->buf, 1); 1220 } else { 1221 /* 1222 * convert just enough to get 1223 * '<?xml version="1.0" encoding="xxx"?>' 1224 * parsed with the autodetected encoding 1225 * into the parser reading buffer. 1226 */ 1227 nbchars = xmlCharEncFirstLineInput(input->buf, len); 1228 } 1229 if (nbchars < 0) { 1230 xmlErrInternal(ctxt, 1231 "switching encoding: encoder error\n", 1232 NULL); 1233 return (-1); 1234 } 1235 input->buf->rawconsumed += use - xmlBufUse(input->buf->raw); 1236 xmlBufResetInput(input->buf->buffer, input); 1237 } 1238 return (0); 1239 } else if (input->length == 0) { 1240 /* 1241 * When parsing a static memory array one must know the 1242 * size to be able to convert the buffer. 1243 */ 1244 xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); 1245 return (-1); 1246 } 1247 return (0); 1248} 1249 1250/** 1251 * xmlSwitchInputEncoding: 1252 * @ctxt: the parser context 1253 * @input: the input stream 1254 * @handler: the encoding handler 1255 * 1256 * change the input functions when discovering the character encoding 1257 * of a given entity. 1258 * 1259 * Returns 0 in case of success, -1 otherwise 1260 */ 1261int 1262xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1263 xmlCharEncodingHandlerPtr handler) { 1264 return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); 1265} 1266 1267/** 1268 * xmlSwitchToEncodingInt: 1269 * @ctxt: the parser context 1270 * @handler: the encoding handler 1271 * @len: the length to convert or -1 1272 * 1273 * change the input functions when discovering the character encoding 1274 * of a given entity, and convert only @len bytes of the output, this 1275 * is needed on auto detect to allows any declared encoding later to 1276 * convert the actual content after the xmlDecl 1277 * 1278 * Returns 0 in case of success, -1 otherwise 1279 */ 1280static int 1281xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 1282 xmlCharEncodingHandlerPtr handler, int len) { 1283 int ret = 0; 1284 1285 if (handler != NULL) { 1286 if (ctxt->input != NULL) { 1287 ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len); 1288 } else { 1289 xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", 1290 NULL); 1291 return(-1); 1292 } 1293 /* 1294 * The parsing is now done in UTF8 natively 1295 */ 1296 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1297 } else 1298 return(-1); 1299 return(ret); 1300} 1301 1302/** 1303 * xmlSwitchToEncoding: 1304 * @ctxt: the parser context 1305 * @handler: the encoding handler 1306 * 1307 * change the input functions when discovering the character encoding 1308 * of a given entity. 1309 * 1310 * Returns 0 in case of success, -1 otherwise 1311 */ 1312int 1313xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1314{ 1315 return (xmlSwitchToEncodingInt(ctxt, handler, -1)); 1316} 1317 1318/************************************************************************ 1319 * * 1320 * Commodity functions to handle entities processing * 1321 * * 1322 ************************************************************************/ 1323 1324/** 1325 * xmlFreeInputStream: 1326 * @input: an xmlParserInputPtr 1327 * 1328 * Free up an input stream. 1329 */ 1330void 1331xmlFreeInputStream(xmlParserInputPtr input) { 1332 if (input == NULL) return; 1333 1334 if (input->filename != NULL) xmlFree((char *) input->filename); 1335 if (input->directory != NULL) xmlFree((char *) input->directory); 1336 if (input->encoding != NULL) xmlFree((char *) input->encoding); 1337 if (input->version != NULL) xmlFree((char *) input->version); 1338 if ((input->free != NULL) && (input->base != NULL)) 1339 input->free((xmlChar *) input->base); 1340 if (input->buf != NULL) 1341 xmlFreeParserInputBuffer(input->buf); 1342 xmlFree(input); 1343} 1344 1345/** 1346 * xmlNewInputStream: 1347 * @ctxt: an XML parser context 1348 * 1349 * Create a new input stream structure. 1350 * 1351 * Returns the new input stream or NULL 1352 */ 1353xmlParserInputPtr 1354xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1355 xmlParserInputPtr input; 1356 1357 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1358 if (input == NULL) { 1359 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1360 return(NULL); 1361 } 1362 memset(input, 0, sizeof(xmlParserInput)); 1363 input->line = 1; 1364 input->col = 1; 1365 input->standalone = -1; 1366 1367 /* 1368 * If the context is NULL the id cannot be initialized, but that 1369 * should not happen while parsing which is the situation where 1370 * the id is actually needed. 1371 */ 1372 if (ctxt != NULL) 1373 input->id = ctxt->input_id++; 1374 1375 return(input); 1376} 1377 1378/** 1379 * xmlNewIOInputStream: 1380 * @ctxt: an XML parser context 1381 * @input: an I/O Input 1382 * @enc: the charset encoding if known 1383 * 1384 * Create a new input stream structure encapsulating the @input into 1385 * a stream suitable for the parser. 1386 * 1387 * Returns the new input stream or NULL 1388 */ 1389xmlParserInputPtr 1390xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1391 xmlCharEncoding enc) { 1392 xmlParserInputPtr inputStream; 1393 1394 if (input == NULL) return(NULL); 1395 if (xmlParserDebugEntities) 1396 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1397 inputStream = xmlNewInputStream(ctxt); 1398 if (inputStream == NULL) { 1399 return(NULL); 1400 } 1401 inputStream->filename = NULL; 1402 inputStream->buf = input; 1403 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1404 1405 if (enc != XML_CHAR_ENCODING_NONE) { 1406 xmlSwitchEncoding(ctxt, enc); 1407 } 1408 1409 return(inputStream); 1410} 1411 1412/** 1413 * xmlNewEntityInputStream: 1414 * @ctxt: an XML parser context 1415 * @entity: an Entity pointer 1416 * 1417 * Create a new input stream based on an xmlEntityPtr 1418 * 1419 * Returns the new input stream or NULL 1420 */ 1421xmlParserInputPtr 1422xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1423 xmlParserInputPtr input; 1424 1425 if (entity == NULL) { 1426 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 1427 NULL); 1428 return(NULL); 1429 } 1430 if (xmlParserDebugEntities) 1431 xmlGenericError(xmlGenericErrorContext, 1432 "new input from entity: %s\n", entity->name); 1433 if (entity->content == NULL) { 1434 switch (entity->etype) { 1435 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1436 xmlErrInternal(ctxt, "Cannot parse entity %s\n", 1437 entity->name); 1438 break; 1439 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1440 case XML_EXTERNAL_PARAMETER_ENTITY: 1441 return(xmlLoadExternalEntity((char *) entity->URI, 1442 (char *) entity->ExternalID, ctxt)); 1443 case XML_INTERNAL_GENERAL_ENTITY: 1444 xmlErrInternal(ctxt, 1445 "Internal entity %s without content !\n", 1446 entity->name); 1447 break; 1448 case XML_INTERNAL_PARAMETER_ENTITY: 1449 xmlErrInternal(ctxt, 1450 "Internal parameter entity %s without content !\n", 1451 entity->name); 1452 break; 1453 case XML_INTERNAL_PREDEFINED_ENTITY: 1454 xmlErrInternal(ctxt, 1455 "Predefined entity %s without content !\n", 1456 entity->name); 1457 break; 1458 } 1459 return(NULL); 1460 } 1461 input = xmlNewInputStream(ctxt); 1462 if (input == NULL) { 1463 return(NULL); 1464 } 1465 if (entity->URI != NULL) 1466 input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); 1467 input->base = entity->content; 1468 if (entity->length == 0) 1469 entity->length = xmlStrlen(entity->content); 1470 input->cur = entity->content; 1471 input->length = entity->length; 1472 input->end = &entity->content[input->length]; 1473 return(input); 1474} 1475 1476/** 1477 * xmlNewStringInputStream: 1478 * @ctxt: an XML parser context 1479 * @buffer: an memory buffer 1480 * 1481 * Create a new input stream based on a memory buffer. 1482 * Returns the new input stream 1483 */ 1484xmlParserInputPtr 1485xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 1486 xmlParserInputPtr input; 1487 1488 if (buffer == NULL) { 1489 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 1490 NULL); 1491 return(NULL); 1492 } 1493 if (xmlParserDebugEntities) 1494 xmlGenericError(xmlGenericErrorContext, 1495 "new fixed input: %.30s\n", buffer); 1496 input = xmlNewInputStream(ctxt); 1497 if (input == NULL) { 1498 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1499 return(NULL); 1500 } 1501 input->base = buffer; 1502 input->cur = buffer; 1503 input->length = xmlStrlen(buffer); 1504 input->end = &buffer[input->length]; 1505 return(input); 1506} 1507 1508/** 1509 * xmlNewInputFromFile: 1510 * @ctxt: an XML parser context 1511 * @filename: the filename to use as entity 1512 * 1513 * Create a new input stream based on a file or an URL. 1514 * 1515 * Returns the new input stream or NULL in case of error 1516 */ 1517xmlParserInputPtr 1518xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 1519 xmlParserInputBufferPtr buf; 1520 xmlParserInputPtr inputStream; 1521 char *directory = NULL; 1522 xmlChar *URI = NULL; 1523 1524 if (xmlParserDebugEntities) 1525 xmlGenericError(xmlGenericErrorContext, 1526 "new input from file: %s\n", filename); 1527 if (ctxt == NULL) return(NULL); 1528 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 1529 if (buf == NULL) { 1530 if (filename == NULL) 1531 __xmlLoaderErr(ctxt, 1532 "failed to load external entity: NULL filename \n", 1533 NULL); 1534 else 1535 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 1536 (const char *) filename); 1537 return(NULL); 1538 } 1539 1540 inputStream = xmlNewInputStream(ctxt); 1541 if (inputStream == NULL) 1542 return(NULL); 1543 1544 inputStream->buf = buf; 1545 inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1546 if (inputStream == NULL) 1547 return(NULL); 1548 1549 if (inputStream->filename == NULL) 1550 URI = xmlStrdup((xmlChar *) filename); 1551 else 1552 URI = xmlStrdup((xmlChar *) inputStream->filename); 1553 directory = xmlParserGetDirectory((const char *) URI); 1554 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1555 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1556 if (URI != NULL) xmlFree((char *) URI); 1557 inputStream->directory = directory; 1558 1559 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1560 if ((ctxt->directory == NULL) && (directory != NULL)) 1561 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1562 return(inputStream); 1563} 1564 1565/************************************************************************ 1566 * * 1567 * Commodity functions to handle parser contexts * 1568 * * 1569 ************************************************************************/ 1570 1571/** 1572 * xmlInitParserCtxt: 1573 * @ctxt: an XML parser context 1574 * 1575 * Initialize a parser context 1576 * 1577 * Returns 0 in case of success and -1 in case of error 1578 */ 1579 1580int 1581xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 1582{ 1583 xmlParserInputPtr input; 1584 1585 if(ctxt==NULL) { 1586 xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 1587 return(-1); 1588 } 1589 1590 xmlDefaultSAXHandlerInit(); 1591 1592 if (ctxt->dict == NULL) 1593 ctxt->dict = xmlDictCreate(); 1594 if (ctxt->dict == NULL) { 1595 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1596 return(-1); 1597 } 1598 xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT); 1599 1600 if (ctxt->sax == NULL) 1601 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1602 if (ctxt->sax == NULL) { 1603 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1604 return(-1); 1605 } 1606 else 1607 xmlSAXVersion(ctxt->sax, 2); 1608 1609 ctxt->maxatts = 0; 1610 ctxt->atts = NULL; 1611 /* Allocate the Input stack */ 1612 if (ctxt->inputTab == NULL) { 1613 ctxt->inputTab = (xmlParserInputPtr *) 1614 xmlMalloc(5 * sizeof(xmlParserInputPtr)); 1615 ctxt->inputMax = 5; 1616 } 1617 if (ctxt->inputTab == NULL) { 1618 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1619 ctxt->inputNr = 0; 1620 ctxt->inputMax = 0; 1621 ctxt->input = NULL; 1622 return(-1); 1623 } 1624 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1625 xmlFreeInputStream(input); 1626 } 1627 ctxt->inputNr = 0; 1628 ctxt->input = NULL; 1629 1630 ctxt->version = NULL; 1631 ctxt->encoding = NULL; 1632 ctxt->standalone = -1; 1633 ctxt->hasExternalSubset = 0; 1634 ctxt->hasPErefs = 0; 1635 ctxt->html = 0; 1636 ctxt->external = 0; 1637 ctxt->instate = XML_PARSER_START; 1638 ctxt->token = 0; 1639 ctxt->directory = NULL; 1640 1641 /* Allocate the Node stack */ 1642 if (ctxt->nodeTab == NULL) { 1643 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 1644 ctxt->nodeMax = 10; 1645 } 1646 if (ctxt->nodeTab == NULL) { 1647 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1648 ctxt->nodeNr = 0; 1649 ctxt->nodeMax = 0; 1650 ctxt->node = NULL; 1651 ctxt->inputNr = 0; 1652 ctxt->inputMax = 0; 1653 ctxt->input = NULL; 1654 return(-1); 1655 } 1656 ctxt->nodeNr = 0; 1657 ctxt->node = NULL; 1658 1659 /* Allocate the Name stack */ 1660 if (ctxt->nameTab == NULL) { 1661 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 1662 ctxt->nameMax = 10; 1663 } 1664 if (ctxt->nameTab == NULL) { 1665 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1666 ctxt->nodeNr = 0; 1667 ctxt->nodeMax = 0; 1668 ctxt->node = NULL; 1669 ctxt->inputNr = 0; 1670 ctxt->inputMax = 0; 1671 ctxt->input = NULL; 1672 ctxt->nameNr = 0; 1673 ctxt->nameMax = 0; 1674 ctxt->name = NULL; 1675 return(-1); 1676 } 1677 ctxt->nameNr = 0; 1678 ctxt->name = NULL; 1679 1680 /* Allocate the space stack */ 1681 if (ctxt->spaceTab == NULL) { 1682 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 1683 ctxt->spaceMax = 10; 1684 } 1685 if (ctxt->spaceTab == NULL) { 1686 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1687 ctxt->nodeNr = 0; 1688 ctxt->nodeMax = 0; 1689 ctxt->node = NULL; 1690 ctxt->inputNr = 0; 1691 ctxt->inputMax = 0; 1692 ctxt->input = NULL; 1693 ctxt->nameNr = 0; 1694 ctxt->nameMax = 0; 1695 ctxt->name = NULL; 1696 ctxt->spaceNr = 0; 1697 ctxt->spaceMax = 0; 1698 ctxt->space = NULL; 1699 return(-1); 1700 } 1701 ctxt->spaceNr = 1; 1702 ctxt->spaceMax = 10; 1703 ctxt->spaceTab[0] = -1; 1704 ctxt->space = &ctxt->spaceTab[0]; 1705 ctxt->userData = ctxt; 1706 ctxt->myDoc = NULL; 1707 ctxt->wellFormed = 1; 1708 ctxt->nsWellFormed = 1; 1709 ctxt->valid = 1; 1710 ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1711 if (ctxt->loadsubset) { 1712 ctxt->options |= XML_PARSE_DTDLOAD; 1713 } 1714 ctxt->validate = xmlDoValidityCheckingDefaultValue; 1715 ctxt->pedantic = xmlPedanticParserDefaultValue; 1716 if (ctxt->pedantic) { 1717 ctxt->options |= XML_PARSE_PEDANTIC; 1718 } 1719 ctxt->linenumbers = xmlLineNumbersDefaultValue; 1720 ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1721 if (ctxt->keepBlanks == 0) { 1722 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1723 ctxt->options |= XML_PARSE_NOBLANKS; 1724 } 1725 1726 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 1727 ctxt->vctxt.userData = ctxt; 1728 ctxt->vctxt.error = xmlParserValidityError; 1729 ctxt->vctxt.warning = xmlParserValidityWarning; 1730 if (ctxt->validate) { 1731 if (xmlGetWarningsDefaultValue == 0) 1732 ctxt->vctxt.warning = NULL; 1733 else 1734 ctxt->vctxt.warning = xmlParserValidityWarning; 1735 ctxt->vctxt.nodeMax = 0; 1736 ctxt->options |= XML_PARSE_DTDVALID; 1737 } 1738 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1739 if (ctxt->replaceEntities) { 1740 ctxt->options |= XML_PARSE_NOENT; 1741 } 1742 ctxt->record_info = 0; 1743 ctxt->nbChars = 0; 1744 ctxt->checkIndex = 0; 1745 ctxt->inSubset = 0; 1746 ctxt->errNo = XML_ERR_OK; 1747 ctxt->depth = 0; 1748 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1749 ctxt->catalogs = NULL; 1750 ctxt->nbentities = 0; 1751 ctxt->sizeentities = 0; 1752 ctxt->sizeentcopy = 0; 1753 ctxt->input_id = 1; 1754 xmlInitNodeInfoSeq(&ctxt->node_seq); 1755 return(0); 1756} 1757 1758/** 1759 * xmlFreeParserCtxt: 1760 * @ctxt: an XML parser context 1761 * 1762 * Free all the memory used by a parser context. However the parsed 1763 * document in ctxt->myDoc is not freed. 1764 */ 1765 1766void 1767xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 1768{ 1769 xmlParserInputPtr input; 1770 1771 if (ctxt == NULL) return; 1772 1773 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1774 xmlFreeInputStream(input); 1775 } 1776 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 1777 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 1778 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 1779 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); 1780 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 1781 if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 1782 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 1783 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 1784 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 1785#ifdef LIBXML_SAX1_ENABLED 1786 if ((ctxt->sax != NULL) && 1787 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 1788#else 1789 if (ctxt->sax != NULL) 1790#endif /* LIBXML_SAX1_ENABLED */ 1791 xmlFree(ctxt->sax); 1792 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 1793 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 1794 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 1795 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 1796 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 1797 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 1798 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 1799 if (ctxt->attsDefault != NULL) 1800 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 1801 if (ctxt->attsSpecial != NULL) 1802 xmlHashFree(ctxt->attsSpecial, NULL); 1803 if (ctxt->freeElems != NULL) { 1804 xmlNodePtr cur, next; 1805 1806 cur = ctxt->freeElems; 1807 while (cur != NULL) { 1808 next = cur->next; 1809 xmlFree(cur); 1810 cur = next; 1811 } 1812 } 1813 if (ctxt->freeAttrs != NULL) { 1814 xmlAttrPtr cur, next; 1815 1816 cur = ctxt->freeAttrs; 1817 while (cur != NULL) { 1818 next = cur->next; 1819 xmlFree(cur); 1820 cur = next; 1821 } 1822 } 1823 /* 1824 * cleanup the error strings 1825 */ 1826 if (ctxt->lastError.message != NULL) 1827 xmlFree(ctxt->lastError.message); 1828 if (ctxt->lastError.file != NULL) 1829 xmlFree(ctxt->lastError.file); 1830 if (ctxt->lastError.str1 != NULL) 1831 xmlFree(ctxt->lastError.str1); 1832 if (ctxt->lastError.str2 != NULL) 1833 xmlFree(ctxt->lastError.str2); 1834 if (ctxt->lastError.str3 != NULL) 1835 xmlFree(ctxt->lastError.str3); 1836 1837#ifdef LIBXML_CATALOG_ENABLED 1838 if (ctxt->catalogs != NULL) 1839 xmlCatalogFreeLocal(ctxt->catalogs); 1840#endif 1841 xmlFree(ctxt); 1842} 1843 1844/** 1845 * xmlNewParserCtxt: 1846 * 1847 * Allocate and initialize a new parser context. 1848 * 1849 * Returns the xmlParserCtxtPtr or NULL 1850 */ 1851 1852xmlParserCtxtPtr 1853xmlNewParserCtxt(void) 1854{ 1855 xmlParserCtxtPtr ctxt; 1856 1857 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 1858 if (ctxt == NULL) { 1859 xmlErrMemory(NULL, "cannot allocate parser context\n"); 1860 return(NULL); 1861 } 1862 memset(ctxt, 0, sizeof(xmlParserCtxt)); 1863 if (xmlInitParserCtxt(ctxt) < 0) { 1864 xmlFreeParserCtxt(ctxt); 1865 return(NULL); 1866 } 1867 return(ctxt); 1868} 1869 1870/************************************************************************ 1871 * * 1872 * Handling of node informations * 1873 * * 1874 ************************************************************************/ 1875 1876/** 1877 * xmlClearParserCtxt: 1878 * @ctxt: an XML parser context 1879 * 1880 * Clear (release owned resources) and reinitialize a parser context 1881 */ 1882 1883void 1884xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 1885{ 1886 if (ctxt==NULL) 1887 return; 1888 xmlClearNodeInfoSeq(&ctxt->node_seq); 1889 xmlCtxtReset(ctxt); 1890} 1891 1892 1893/** 1894 * xmlParserFindNodeInfo: 1895 * @ctx: an XML parser context 1896 * @node: an XML node within the tree 1897 * 1898 * Find the parser node info struct for a given node 1899 * 1900 * Returns an xmlParserNodeInfo block pointer or NULL 1901 */ 1902const xmlParserNodeInfo * 1903xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 1904{ 1905 unsigned long pos; 1906 1907 if ((ctx == NULL) || (node == NULL)) 1908 return (NULL); 1909 /* Find position where node should be at */ 1910 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 1911 if (pos < ctx->node_seq.length 1912 && ctx->node_seq.buffer[pos].node == node) 1913 return &ctx->node_seq.buffer[pos]; 1914 else 1915 return NULL; 1916} 1917 1918 1919/** 1920 * xmlInitNodeInfoSeq: 1921 * @seq: a node info sequence pointer 1922 * 1923 * -- Initialize (set to initial state) node info sequence 1924 */ 1925void 1926xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1927{ 1928 if (seq == NULL) 1929 return; 1930 seq->length = 0; 1931 seq->maximum = 0; 1932 seq->buffer = NULL; 1933} 1934 1935/** 1936 * xmlClearNodeInfoSeq: 1937 * @seq: a node info sequence pointer 1938 * 1939 * -- Clear (release memory and reinitialize) node 1940 * info sequence 1941 */ 1942void 1943xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1944{ 1945 if (seq == NULL) 1946 return; 1947 if (seq->buffer != NULL) 1948 xmlFree(seq->buffer); 1949 xmlInitNodeInfoSeq(seq); 1950} 1951 1952/** 1953 * xmlParserFindNodeInfoIndex: 1954 * @seq: a node info sequence pointer 1955 * @node: an XML node pointer 1956 * 1957 * 1958 * xmlParserFindNodeInfoIndex : Find the index that the info record for 1959 * the given node is or should be at in a sorted sequence 1960 * 1961 * Returns a long indicating the position of the record 1962 */ 1963unsigned long 1964xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1965 const xmlNodePtr node) 1966{ 1967 unsigned long upper, lower, middle; 1968 int found = 0; 1969 1970 if ((seq == NULL) || (node == NULL)) 1971 return ((unsigned long) -1); 1972 1973 /* Do a binary search for the key */ 1974 lower = 1; 1975 upper = seq->length; 1976 middle = 0; 1977 while (lower <= upper && !found) { 1978 middle = lower + (upper - lower) / 2; 1979 if (node == seq->buffer[middle - 1].node) 1980 found = 1; 1981 else if (node < seq->buffer[middle - 1].node) 1982 upper = middle - 1; 1983 else 1984 lower = middle + 1; 1985 } 1986 1987 /* Return position */ 1988 if (middle == 0 || seq->buffer[middle - 1].node < node) 1989 return middle; 1990 else 1991 return middle - 1; 1992} 1993 1994 1995/** 1996 * xmlParserAddNodeInfo: 1997 * @ctxt: an XML parser context 1998 * @info: a node info sequence pointer 1999 * 2000 * Insert node info record into the sorted sequence 2001 */ 2002void 2003xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 2004 const xmlParserNodeInfoPtr info) 2005{ 2006 unsigned long pos; 2007 2008 if ((ctxt == NULL) || (info == NULL)) return; 2009 2010 /* Find pos and check to see if node is already in the sequence */ 2011 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 2012 info->node); 2013 2014 if ((pos < ctxt->node_seq.length) && 2015 (ctxt->node_seq.buffer != NULL) && 2016 (ctxt->node_seq.buffer[pos].node == info->node)) { 2017 ctxt->node_seq.buffer[pos] = *info; 2018 } 2019 2020 /* Otherwise, we need to add new node to buffer */ 2021 else { 2022 if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) || 2023 (ctxt->node_seq.buffer == NULL)) { 2024 xmlParserNodeInfo *tmp_buffer; 2025 unsigned int byte_size; 2026 2027 if (ctxt->node_seq.maximum == 0) 2028 ctxt->node_seq.maximum = 2; 2029 byte_size = (sizeof(*ctxt->node_seq.buffer) * 2030 (2 * ctxt->node_seq.maximum)); 2031 2032 if (ctxt->node_seq.buffer == NULL) 2033 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 2034 else 2035 tmp_buffer = 2036 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 2037 byte_size); 2038 2039 if (tmp_buffer == NULL) { 2040 xmlErrMemory(ctxt, "failed to allocate buffer\n"); 2041 return; 2042 } 2043 ctxt->node_seq.buffer = tmp_buffer; 2044 ctxt->node_seq.maximum *= 2; 2045 } 2046 2047 /* If position is not at end, move elements out of the way */ 2048 if (pos != ctxt->node_seq.length) { 2049 unsigned long i; 2050 2051 for (i = ctxt->node_seq.length; i > pos; i--) 2052 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 2053 } 2054 2055 /* Copy element and increase length */ 2056 ctxt->node_seq.buffer[pos] = *info; 2057 ctxt->node_seq.length++; 2058 } 2059} 2060 2061/************************************************************************ 2062 * * 2063 * Defaults settings * 2064 * * 2065 ************************************************************************/ 2066/** 2067 * xmlPedanticParserDefault: 2068 * @val: int 0 or 1 2069 * 2070 * Set and return the previous value for enabling pedantic warnings. 2071 * 2072 * Returns the last value for 0 for no substitution, 1 for substitution. 2073 */ 2074 2075int 2076xmlPedanticParserDefault(int val) { 2077 int old = xmlPedanticParserDefaultValue; 2078 2079 xmlPedanticParserDefaultValue = val; 2080 return(old); 2081} 2082 2083/** 2084 * xmlLineNumbersDefault: 2085 * @val: int 0 or 1 2086 * 2087 * Set and return the previous value for enabling line numbers in elements 2088 * contents. This may break on old application and is turned off by default. 2089 * 2090 * Returns the last value for 0 for no substitution, 1 for substitution. 2091 */ 2092 2093int 2094xmlLineNumbersDefault(int val) { 2095 int old = xmlLineNumbersDefaultValue; 2096 2097 xmlLineNumbersDefaultValue = val; 2098 return(old); 2099} 2100 2101/** 2102 * xmlSubstituteEntitiesDefault: 2103 * @val: int 0 or 1 2104 * 2105 * Set and return the previous value for default entity support. 2106 * Initially the parser always keep entity references instead of substituting 2107 * entity values in the output. This function has to be used to change the 2108 * default parser behavior 2109 * SAX::substituteEntities() has to be used for changing that on a file by 2110 * file basis. 2111 * 2112 * Returns the last value for 0 for no substitution, 1 for substitution. 2113 */ 2114 2115int 2116xmlSubstituteEntitiesDefault(int val) { 2117 int old = xmlSubstituteEntitiesDefaultValue; 2118 2119 xmlSubstituteEntitiesDefaultValue = val; 2120 return(old); 2121} 2122 2123/** 2124 * xmlKeepBlanksDefault: 2125 * @val: int 0 or 1 2126 * 2127 * Set and return the previous value for default blanks text nodes support. 2128 * The 1.x version of the parser used an heuristic to try to detect 2129 * ignorable white spaces. As a result the SAX callback was generating 2130 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 2131 * using the DOM output text nodes containing those blanks were not generated. 2132 * The 2.x and later version will switch to the XML standard way and 2133 * ignorableWhitespace() are only generated when running the parser in 2134 * validating mode and when the current element doesn't allow CDATA or 2135 * mixed content. 2136 * This function is provided as a way to force the standard behavior 2137 * on 1.X libs and to switch back to the old mode for compatibility when 2138 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2139 * by using xmlIsBlankNode() commodity function to detect the "empty" 2140 * nodes generated. 2141 * This value also affect autogeneration of indentation when saving code 2142 * if blanks sections are kept, indentation is not generated. 2143 * 2144 * Returns the last value for 0 for no substitution, 1 for substitution. 2145 */ 2146 2147int 2148xmlKeepBlanksDefault(int val) { 2149 int old = xmlKeepBlanksDefaultValue; 2150 2151 xmlKeepBlanksDefaultValue = val; 2152 if (!val) xmlIndentTreeOutput = 1; 2153 return(old); 2154} 2155 2156#define bottom_parserInternals 2157#include "elfgcchack.h" 2158