1/* 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the 3 * XML and HTML parsers. 4 * 5 * See Copyright for the status of this software. 6 * 7 * daniel@veillard.com 8 */ 9 10#define IN_LIBXML 11#include "libxml.h" 12 13#if defined(WIN32) && !defined (__CYGWIN__) 14#define XML_DIR_SEP '\\' 15#else 16#define XML_DIR_SEP '/' 17#endif 18 19#include <string.h> 20#ifdef HAVE_CTYPE_H 21#include <ctype.h> 22#endif 23#ifdef HAVE_STDLIB_H 24#include <stdlib.h> 25#endif 26#ifdef HAVE_SYS_STAT_H 27#include <sys/stat.h> 28#endif 29#ifdef HAVE_FCNTL_H 30#include <fcntl.h> 31#endif 32#ifdef HAVE_UNISTD_H 33#include <unistd.h> 34#endif 35#ifdef HAVE_ZLIB_H 36#include <zlib.h> 37#endif 38 39#include <libxml/xmlmemory.h> 40#include <libxml/tree.h> 41#include <libxml/parser.h> 42#include <libxml/parserInternals.h> 43#include <libxml/valid.h> 44#include <libxml/entities.h> 45#include <libxml/xmlerror.h> 46#include <libxml/encoding.h> 47#include <libxml/valid.h> 48#include <libxml/xmlIO.h> 49#include <libxml/uri.h> 50#include <libxml/dict.h> 51#include <libxml/SAX.h> 52#ifdef LIBXML_CATALOG_ENABLED 53#include <libxml/catalog.h> 54#endif 55#include <libxml/globals.h> 56#include <libxml/chvalid.h> 57 58/* 59 * Various global defaults for parsing 60 */ 61 62/** 63 * xmlCheckVersion: 64 * @version: the include version number 65 * 66 * check the compiled lib version against the include one. 67 * This can warn or immediately kill the application 68 */ 69void 70xmlCheckVersion(int version) { 71 int myversion = (int) LIBXML_VERSION; 72 73 xmlInitParser(); 74 75 if ((myversion / 10000) != (version / 10000)) { 76 xmlGenericError(xmlGenericErrorContext, 77 "Fatal: program compiled against libxml %d using libxml %d\n", 78 (version / 10000), (myversion / 10000)); 79 fprintf(stderr, 80 "Fatal: program compiled against libxml %d using libxml %d\n", 81 (version / 10000), (myversion / 10000)); 82 } 83 if ((myversion / 100) < (version / 100)) { 84 xmlGenericError(xmlGenericErrorContext, 85 "Warning: program compiled against libxml %d using older %d\n", 86 (version / 100), (myversion / 100)); 87 } 88} 89 90 91/************************************************************************ 92 * * 93 * Some factorized error routines * 94 * * 95 ************************************************************************/ 96 97 98/** 99 * xmlErrMemory: 100 * @ctxt: an XML parser context 101 * @extra: extra informations 102 * 103 * Handle a redefinition of attribute error 104 */ 105void 106xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 107{ 108 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 109 (ctxt->instate == XML_PARSER_EOF)) 110 return; 111 if (ctxt != NULL) { 112 ctxt->errNo = XML_ERR_NO_MEMORY; 113 ctxt->instate = XML_PARSER_EOF; 114 ctxt->disableSAX = 1; 115 } 116 if (extra) 117 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 118 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 119 NULL, NULL, 0, 0, 120 "Memory allocation failed : %s\n", extra); 121 else 122 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 123 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 124 NULL, NULL, 0, 0, "Memory allocation failed\n"); 125} 126 127/** 128 * __xmlErrEncoding: 129 * @ctxt: an XML parser context 130 * @xmlerr: the error number 131 * @msg: the error message 132 * @str1: an string info 133 * @str2: an string info 134 * 135 * Handle an encoding error 136 */ 137void 138__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 139 const char *msg, const xmlChar * str1, const xmlChar * str2) 140{ 141 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 142 (ctxt->instate == XML_PARSER_EOF)) 143 return; 144 if (ctxt != NULL) 145 ctxt->errNo = xmlerr; 146 __xmlRaiseError(NULL, NULL, NULL, 147 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 148 NULL, 0, (const char *) str1, (const char *) str2, 149 NULL, 0, 0, msg, str1, str2); 150 if (ctxt != NULL) { 151 ctxt->wellFormed = 0; 152 if (ctxt->recovery == 0) 153 ctxt->disableSAX = 1; 154 } 155} 156 157/** 158 * xmlErrInternal: 159 * @ctxt: an XML parser context 160 * @msg: the error message 161 * @str: error informations 162 * 163 * Handle an internal error 164 */ 165static void 166xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 167{ 168 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 169 (ctxt->instate == XML_PARSER_EOF)) 170 return; 171 if (ctxt != NULL) 172 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 173 __xmlRaiseError(NULL, NULL, NULL, 174 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 175 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 176 0, 0, msg, str); 177 if (ctxt != NULL) { 178 ctxt->wellFormed = 0; 179 if (ctxt->recovery == 0) 180 ctxt->disableSAX = 1; 181 } 182} 183 184/** 185 * xmlErrEncodingInt: 186 * @ctxt: an XML parser context 187 * @error: the error number 188 * @msg: the error message 189 * @val: an integer value 190 * 191 * n encoding error 192 */ 193static void 194xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 195 const char *msg, int val) 196{ 197 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 198 (ctxt->instate == XML_PARSER_EOF)) 199 return; 200 if (ctxt != NULL) 201 ctxt->errNo = error; 202 __xmlRaiseError(NULL, NULL, NULL, 203 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 204 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 205 if (ctxt != NULL) { 206 ctxt->wellFormed = 0; 207 if (ctxt->recovery == 0) 208 ctxt->disableSAX = 1; 209 } 210} 211 212/** 213 * xmlIsLetter: 214 * @c: an unicode character (int) 215 * 216 * Check whether the character is allowed by the production 217 * [84] Letter ::= BaseChar | Ideographic 218 * 219 * Returns 0 if not, non-zero otherwise 220 */ 221int 222xmlIsLetter(int c) { 223 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 224} 225 226/************************************************************************ 227 * * 228 * Input handling functions for progressive parsing * 229 * * 230 ************************************************************************/ 231 232/* #define DEBUG_INPUT */ 233/* #define DEBUG_STACK */ 234/* #define DEBUG_PUSH */ 235 236 237/* we need to keep enough input to show errors in context */ 238#define LINE_LEN 80 239 240#ifdef DEBUG_INPUT 241#define CHECK_BUFFER(in) check_buffer(in) 242 243static 244void check_buffer(xmlParserInputPtr in) { 245 if (in->base != in->buf->buffer->content) { 246 xmlGenericError(xmlGenericErrorContext, 247 "xmlParserInput: base mismatch problem\n"); 248 } 249 if (in->cur < in->base) { 250 xmlGenericError(xmlGenericErrorContext, 251 "xmlParserInput: cur < base problem\n"); 252 } 253 if (in->cur > in->base + in->buf->buffer->use) { 254 xmlGenericError(xmlGenericErrorContext, 255 "xmlParserInput: cur > base + use problem\n"); 256 } 257 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n", 258 (int) in, (int) in->buf->buffer->content, in->cur - in->base, 259 in->buf->buffer->use, in->buf->buffer->size); 260} 261 262#else 263#define CHECK_BUFFER(in) 264#endif 265 266 267/** 268 * xmlParserInputRead: 269 * @in: an XML parser input 270 * @len: an indicative size for the lookahead 271 * 272 * This function refresh the input for the parser. It doesn't try to 273 * preserve pointers to the input buffer, and discard already read data 274 * 275 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the 276 * end of this entity 277 */ 278int 279xmlParserInputRead(xmlParserInputPtr in, int len) { 280 int ret; 281 int used; 282 int indx; 283 284 if (in == NULL) return(-1); 285#ifdef DEBUG_INPUT 286 xmlGenericError(xmlGenericErrorContext, "Read\n"); 287#endif 288 if (in->buf == NULL) return(-1); 289 if (in->base == NULL) return(-1); 290 if (in->cur == NULL) return(-1); 291 if (in->buf->buffer == NULL) return(-1); 292 if (in->buf->readcallback == NULL) return(-1); 293 294 CHECK_BUFFER(in); 295 296 used = in->cur - in->buf->buffer->content; 297 ret = xmlBufferShrink(in->buf->buffer, used); 298 if (ret > 0) { 299 in->cur -= ret; 300 in->consumed += ret; 301 } 302 ret = xmlParserInputBufferRead(in->buf, len); 303 if (in->base != in->buf->buffer->content) { 304 /* 305 * the buffer has been reallocated 306 */ 307 indx = in->cur - in->base; 308 in->base = in->buf->buffer->content; 309 in->cur = &in->buf->buffer->content[indx]; 310 } 311 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 312 313 CHECK_BUFFER(in); 314 315 return(ret); 316} 317 318/** 319 * xmlParserInputGrow: 320 * @in: an XML parser input 321 * @len: an indicative size for the lookahead 322 * 323 * This function increase the input for the parser. It tries to 324 * preserve pointers to the input buffer, and keep already read data 325 * 326 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the 327 * end of this entity 328 */ 329int 330xmlParserInputGrow(xmlParserInputPtr in, int len) { 331 int ret; 332 int indx; 333 334 if (in == NULL) return(-1); 335#ifdef DEBUG_INPUT 336 xmlGenericError(xmlGenericErrorContext, "Grow\n"); 337#endif 338 if (in->buf == NULL) return(-1); 339 if (in->base == NULL) return(-1); 340 if (in->cur == NULL) return(-1); 341 if (in->buf->buffer == NULL) return(-1); 342 343 CHECK_BUFFER(in); 344 345 indx = in->cur - in->base; 346 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) { 347 348 CHECK_BUFFER(in); 349 350 return(0); 351 } 352 if (in->buf->readcallback != NULL) 353 ret = xmlParserInputBufferGrow(in->buf, len); 354 else 355 return(0); 356 357 /* 358 * NOTE : in->base may be a "dangling" i.e. freed pointer in this 359 * block, but we use it really as an integer to do some 360 * pointer arithmetic. Insure will raise it as a bug but in 361 * that specific case, that's not ! 362 */ 363 if (in->base != in->buf->buffer->content) { 364 /* 365 * the buffer has been reallocated 366 */ 367 indx = in->cur - in->base; 368 in->base = in->buf->buffer->content; 369 in->cur = &in->buf->buffer->content[indx]; 370 } 371 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 372 373 CHECK_BUFFER(in); 374 375 return(ret); 376} 377 378/** 379 * xmlParserInputShrink: 380 * @in: an XML parser input 381 * 382 * This function removes used input for the parser. 383 */ 384void 385xmlParserInputShrink(xmlParserInputPtr in) { 386 int used; 387 int ret; 388 int indx; 389 390#ifdef DEBUG_INPUT 391 xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 392#endif 393 if (in == NULL) return; 394 if (in->buf == NULL) return; 395 if (in->base == NULL) return; 396 if (in->cur == NULL) return; 397 if (in->buf->buffer == NULL) return; 398 399 CHECK_BUFFER(in); 400 401 used = in->cur - in->buf->buffer->content; 402 /* 403 * Do not shrink on large buffers whose only a tiny fraction 404 * was consumed 405 */ 406 if (used > INPUT_CHUNK) { 407 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); 408 if (ret > 0) { 409 in->cur -= ret; 410 in->consumed += ret; 411 } 412 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 413 } 414 415 CHECK_BUFFER(in); 416 417 if (in->buf->buffer->use > INPUT_CHUNK) { 418 return; 419 } 420 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 421 if (in->base != in->buf->buffer->content) { 422 /* 423 * the buffer has been reallocated 424 */ 425 indx = in->cur - in->base; 426 in->base = in->buf->buffer->content; 427 in->cur = &in->buf->buffer->content[indx]; 428 } 429 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 430 431 CHECK_BUFFER(in); 432} 433 434/************************************************************************ 435 * * 436 * UTF8 character input and related functions * 437 * * 438 ************************************************************************/ 439 440/** 441 * xmlNextChar: 442 * @ctxt: the XML parser context 443 * 444 * Skip to the next char input char. 445 */ 446 447void 448xmlNextChar(xmlParserCtxtPtr ctxt) 449{ 450 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 451 (ctxt->input == NULL)) 452 return; 453 454 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 455 if ((*ctxt->input->cur == 0) && 456 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && 457 (ctxt->instate != XML_PARSER_COMMENT)) { 458 /* 459 * If we are at the end of the current entity and 460 * the context allows it, we pop consumed entities 461 * automatically. 462 * the auto closing should be blocked in other cases 463 */ 464 xmlPopInput(ctxt); 465 } else { 466 const unsigned char *cur; 467 unsigned char c; 468 469 /* 470 * 2.11 End-of-Line Handling 471 * the literal two-character sequence "#xD#xA" or a standalone 472 * literal #xD, an XML processor must pass to the application 473 * the single character #xA. 474 */ 475 if (*(ctxt->input->cur) == '\n') { 476 ctxt->input->line++; ctxt->input->col = 1; 477 } else 478 ctxt->input->col++; 479 480 /* 481 * We are supposed to handle UTF8, check it's valid 482 * From rfc2044: encoding of the Unicode values on UTF-8: 483 * 484 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 485 * 0000 0000-0000 007F 0xxxxxxx 486 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 487 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 488 * 489 * Check for the 0x110000 limit too 490 */ 491 cur = ctxt->input->cur; 492 493 c = *cur; 494 if (c & 0x80) { 495 if (c == 0xC0) 496 goto encoding_error; 497 if (cur[1] == 0) { 498 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 499 cur = ctxt->input->cur; 500 } 501 if ((cur[1] & 0xc0) != 0x80) 502 goto encoding_error; 503 if ((c & 0xe0) == 0xe0) { 504 unsigned int val; 505 506 if (cur[2] == 0) { 507 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 508 cur = ctxt->input->cur; 509 } 510 if ((cur[2] & 0xc0) != 0x80) 511 goto encoding_error; 512 if ((c & 0xf0) == 0xf0) { 513 if (cur[3] == 0) { 514 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 515 cur = ctxt->input->cur; 516 } 517 if (((c & 0xf8) != 0xf0) || 518 ((cur[3] & 0xc0) != 0x80)) 519 goto encoding_error; 520 /* 4-byte code */ 521 ctxt->input->cur += 4; 522 val = (cur[0] & 0x7) << 18; 523 val |= (cur[1] & 0x3f) << 12; 524 val |= (cur[2] & 0x3f) << 6; 525 val |= cur[3] & 0x3f; 526 } else { 527 /* 3-byte code */ 528 ctxt->input->cur += 3; 529 val = (cur[0] & 0xf) << 12; 530 val |= (cur[1] & 0x3f) << 6; 531 val |= cur[2] & 0x3f; 532 } 533 if (((val > 0xd7ff) && (val < 0xe000)) || 534 ((val > 0xfffd) && (val < 0x10000)) || 535 (val >= 0x110000)) { 536 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 537 "Char 0x%X out of allowed range\n", 538 val); 539 } 540 } else 541 /* 2-byte code */ 542 ctxt->input->cur += 2; 543 } else 544 /* 1-byte code */ 545 ctxt->input->cur++; 546 547 ctxt->nbChars++; 548 if (*ctxt->input->cur == 0) 549 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 550 } 551 } else { 552 /* 553 * Assume it's a fixed length encoding (1) with 554 * a compatible encoding for the ASCII set, since 555 * XML constructs only use < 128 chars 556 */ 557 558 if (*(ctxt->input->cur) == '\n') { 559 ctxt->input->line++; ctxt->input->col = 1; 560 } else 561 ctxt->input->col++; 562 ctxt->input->cur++; 563 ctxt->nbChars++; 564 if (*ctxt->input->cur == 0) 565 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 566 } 567 if ((*ctxt->input->cur == '%') && (!ctxt->html)) 568 xmlParserHandlePEReference(ctxt); 569 if ((*ctxt->input->cur == 0) && 570 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 571 xmlPopInput(ctxt); 572 return; 573encoding_error: 574 /* 575 * If we detect an UTF8 error that probably mean that the 576 * input encoding didn't get properly advertised in the 577 * declaration header. Report the error and switch the encoding 578 * to ISO-Latin-1 (if you don't like this policy, just declare the 579 * encoding !) 580 */ 581 if ((ctxt == NULL) || (ctxt->input == NULL) || 582 (ctxt->input->end - ctxt->input->cur < 4)) { 583 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 584 "Input is not proper UTF-8, indicate encoding !\n", 585 NULL, NULL); 586 } else { 587 char buffer[150]; 588 589 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 590 ctxt->input->cur[0], ctxt->input->cur[1], 591 ctxt->input->cur[2], ctxt->input->cur[3]); 592 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 593 "Input is not proper UTF-8, indicate encoding !\n%s", 594 BAD_CAST buffer, NULL); 595 } 596 ctxt->charset = XML_CHAR_ENCODING_8859_1; 597 ctxt->input->cur++; 598 return; 599} 600 601/** 602 * xmlCurrentChar: 603 * @ctxt: the XML parser context 604 * @len: pointer to the length of the char read 605 * 606 * The current char value, if using UTF-8 this may actually span multiple 607 * bytes in the input buffer. Implement the end of line normalization: 608 * 2.11 End-of-Line Handling 609 * Wherever an external parsed entity or the literal entity value 610 * of an internal parsed entity contains either the literal two-character 611 * sequence "#xD#xA" or a standalone literal #xD, an XML processor 612 * must pass to the application the single character #xA. 613 * This behavior can conveniently be produced by normalizing all 614 * line breaks to #xA on input, before parsing.) 615 * 616 * Returns the current char value and its length 617 */ 618 619int 620xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 621 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 622 if (ctxt->instate == XML_PARSER_EOF) 623 return(0); 624 625 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { 626 *len = 1; 627 return((int) *ctxt->input->cur); 628 } 629 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 630 /* 631 * We are supposed to handle UTF8, check it's valid 632 * From rfc2044: encoding of the Unicode values on UTF-8: 633 * 634 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 635 * 0000 0000-0000 007F 0xxxxxxx 636 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 637 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 638 * 639 * Check for the 0x110000 limit too 640 */ 641 const unsigned char *cur = ctxt->input->cur; 642 unsigned char c; 643 unsigned int val; 644 645 c = *cur; 646 if (c & 0x80) { 647 if (((c & 0x40) == 0) || (c == 0xC0)) 648 goto encoding_error; 649 if (cur[1] == 0) { 650 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 651 cur = ctxt->input->cur; 652 } 653 if ((cur[1] & 0xc0) != 0x80) 654 goto encoding_error; 655 if ((c & 0xe0) == 0xe0) { 656 if (cur[2] == 0) { 657 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 658 cur = ctxt->input->cur; 659 } 660 if ((cur[2] & 0xc0) != 0x80) 661 goto encoding_error; 662 if ((c & 0xf0) == 0xf0) { 663 if (cur[3] == 0) { 664 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 665 cur = ctxt->input->cur; 666 } 667 if (((c & 0xf8) != 0xf0) || 668 ((cur[3] & 0xc0) != 0x80)) 669 goto encoding_error; 670 /* 4-byte code */ 671 *len = 4; 672 val = (cur[0] & 0x7) << 18; 673 val |= (cur[1] & 0x3f) << 12; 674 val |= (cur[2] & 0x3f) << 6; 675 val |= cur[3] & 0x3f; 676 if (val < 0x10000) 677 goto encoding_error; 678 } else { 679 /* 3-byte code */ 680 *len = 3; 681 val = (cur[0] & 0xf) << 12; 682 val |= (cur[1] & 0x3f) << 6; 683 val |= cur[2] & 0x3f; 684 if (val < 0x800) 685 goto encoding_error; 686 } 687 } else { 688 /* 2-byte code */ 689 *len = 2; 690 val = (cur[0] & 0x1f) << 6; 691 val |= cur[1] & 0x3f; 692 if (val < 0x80) 693 goto encoding_error; 694 } 695 if (!IS_CHAR(val)) { 696 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 697 "Char 0x%X out of allowed range\n", val); 698 } 699 return(val); 700 } else { 701 /* 1-byte code */ 702 *len = 1; 703 if (*ctxt->input->cur == 0) 704 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 705 if ((*ctxt->input->cur == 0) && 706 (ctxt->input->end > ctxt->input->cur)) { 707 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 708 "Char 0x0 out of allowed range\n", 0); 709 } 710 if (*ctxt->input->cur == 0xD) { 711 if (ctxt->input->cur[1] == 0xA) { 712 ctxt->nbChars++; 713 ctxt->input->cur++; 714 } 715 return(0xA); 716 } 717 return((int) *ctxt->input->cur); 718 } 719 } 720 /* 721 * Assume it's a fixed length encoding (1) with 722 * a compatible encoding for the ASCII set, since 723 * XML constructs only use < 128 chars 724 */ 725 *len = 1; 726 if (*ctxt->input->cur == 0xD) { 727 if (ctxt->input->cur[1] == 0xA) { 728 ctxt->nbChars++; 729 ctxt->input->cur++; 730 } 731 return(0xA); 732 } 733 return((int) *ctxt->input->cur); 734encoding_error: 735 /* 736 * An encoding problem may arise from a truncated input buffer 737 * splitting a character in the middle. In that case do not raise 738 * an error but return 0 to endicate an end of stream problem 739 */ 740 if (ctxt->input->end - ctxt->input->cur < 4) { 741 *len = 0; 742 return(0); 743 } 744 745 /* 746 * If we detect an UTF8 error that probably mean that the 747 * input encoding didn't get properly advertised in the 748 * declaration header. Report the error and switch the encoding 749 * to ISO-Latin-1 (if you don't like this policy, just declare the 750 * encoding !) 751 */ 752 { 753 char buffer[150]; 754 755 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 756 ctxt->input->cur[0], ctxt->input->cur[1], 757 ctxt->input->cur[2], ctxt->input->cur[3]); 758 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 759 "Input is not proper UTF-8, indicate encoding !\n%s", 760 BAD_CAST buffer, NULL); 761 } 762 ctxt->charset = XML_CHAR_ENCODING_8859_1; 763 *len = 1; 764 return((int) *ctxt->input->cur); 765} 766 767/** 768 * xmlStringCurrentChar: 769 * @ctxt: the XML parser context 770 * @cur: pointer to the beginning of the char 771 * @len: pointer to the length of the char read 772 * 773 * The current char value, if using UTF-8 this may actually span multiple 774 * bytes in the input buffer. 775 * 776 * Returns the current char value and its length 777 */ 778 779int 780xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 781{ 782 if ((len == NULL) || (cur == NULL)) return(0); 783 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 784 /* 785 * We are supposed to handle UTF8, check it's valid 786 * From rfc2044: encoding of the Unicode values on UTF-8: 787 * 788 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 789 * 0000 0000-0000 007F 0xxxxxxx 790 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 791 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 792 * 793 * Check for the 0x110000 limit too 794 */ 795 unsigned char c; 796 unsigned int val; 797 798 c = *cur; 799 if (c & 0x80) { 800 if ((cur[1] & 0xc0) != 0x80) 801 goto encoding_error; 802 if ((c & 0xe0) == 0xe0) { 803 804 if ((cur[2] & 0xc0) != 0x80) 805 goto encoding_error; 806 if ((c & 0xf0) == 0xf0) { 807 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) 808 goto encoding_error; 809 /* 4-byte code */ 810 *len = 4; 811 val = (cur[0] & 0x7) << 18; 812 val |= (cur[1] & 0x3f) << 12; 813 val |= (cur[2] & 0x3f) << 6; 814 val |= cur[3] & 0x3f; 815 } else { 816 /* 3-byte code */ 817 *len = 3; 818 val = (cur[0] & 0xf) << 12; 819 val |= (cur[1] & 0x3f) << 6; 820 val |= cur[2] & 0x3f; 821 } 822 } else { 823 /* 2-byte code */ 824 *len = 2; 825 val = (cur[0] & 0x1f) << 6; 826 val |= cur[1] & 0x3f; 827 } 828 if (!IS_CHAR(val)) { 829 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 830 "Char 0x%X out of allowed range\n", val); 831 } 832 return (val); 833 } else { 834 /* 1-byte code */ 835 *len = 1; 836 return ((int) *cur); 837 } 838 } 839 /* 840 * Assume it's a fixed length encoding (1) with 841 * a compatible encoding for the ASCII set, since 842 * XML constructs only use < 128 chars 843 */ 844 *len = 1; 845 return ((int) *cur); 846encoding_error: 847 848 /* 849 * An encoding problem may arise from a truncated input buffer 850 * splitting a character in the middle. In that case do not raise 851 * an error but return 0 to endicate an end of stream problem 852 */ 853 if ((ctxt == NULL) || (ctxt->input == NULL) || 854 (ctxt->input->end - ctxt->input->cur < 4)) { 855 *len = 0; 856 return(0); 857 } 858 /* 859 * If we detect an UTF8 error that probably mean that the 860 * input encoding didn't get properly advertised in the 861 * declaration header. Report the error and switch the encoding 862 * to ISO-Latin-1 (if you don't like this policy, just declare the 863 * encoding !) 864 */ 865 { 866 char buffer[150]; 867 868 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 869 ctxt->input->cur[0], ctxt->input->cur[1], 870 ctxt->input->cur[2], ctxt->input->cur[3]); 871 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 872 "Input is not proper UTF-8, indicate encoding !\n%s", 873 BAD_CAST buffer, NULL); 874 } 875 *len = 1; 876 return ((int) *cur); 877} 878 879/** 880 * xmlCopyCharMultiByte: 881 * @out: pointer to an array of xmlChar 882 * @val: the char value 883 * 884 * append the char value in the array 885 * 886 * Returns the number of xmlChar written 887 */ 888int 889xmlCopyCharMultiByte(xmlChar *out, int val) { 890 if (out == NULL) return(0); 891 /* 892 * We are supposed to handle UTF8, check it's valid 893 * From rfc2044: encoding of the Unicode values on UTF-8: 894 * 895 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 896 * 0000 0000-0000 007F 0xxxxxxx 897 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 898 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 899 */ 900 if (val >= 0x80) { 901 xmlChar *savedout = out; 902 int bits; 903 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 904 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 905 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 906 else { 907 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 908 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 909 val); 910 return(0); 911 } 912 for ( ; bits >= 0; bits-= 6) 913 *out++= ((val >> bits) & 0x3F) | 0x80 ; 914 return (out - savedout); 915 } 916 *out = (xmlChar) val; 917 return 1; 918} 919 920/** 921 * xmlCopyChar: 922 * @len: Ignored, compatibility 923 * @out: pointer to an array of xmlChar 924 * @val: the char value 925 * 926 * append the char value in the array 927 * 928 * Returns the number of xmlChar written 929 */ 930 931int 932xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 933 if (out == NULL) return(0); 934 /* the len parameter is ignored */ 935 if (val >= 0x80) { 936 return(xmlCopyCharMultiByte (out, val)); 937 } 938 *out = (xmlChar) val; 939 return 1; 940} 941 942/************************************************************************ 943 * * 944 * Commodity functions to switch encodings * 945 * * 946 ************************************************************************/ 947 948/* defined in encoding.c, not public */ 949int 950xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 951 xmlBufferPtr in, int len); 952 953static int 954xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 955 xmlCharEncodingHandlerPtr handler, int len); 956static int 957xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 958 xmlCharEncodingHandlerPtr handler, int len); 959/** 960 * xmlSwitchEncoding: 961 * @ctxt: the parser context 962 * @enc: the encoding value (number) 963 * 964 * change the input functions when discovering the character encoding 965 * of a given entity. 966 * 967 * Returns 0 in case of success, -1 otherwise 968 */ 969int 970xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 971{ 972 xmlCharEncodingHandlerPtr handler; 973 int len = -1; 974 975 if (ctxt == NULL) return(-1); 976 switch (enc) { 977 case XML_CHAR_ENCODING_ERROR: 978 __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, 979 "encoding unknown\n", NULL, NULL); 980 return(-1); 981 case XML_CHAR_ENCODING_NONE: 982 /* let's assume it's UTF-8 without the XML decl */ 983 ctxt->charset = XML_CHAR_ENCODING_UTF8; 984 return(0); 985 case XML_CHAR_ENCODING_UTF8: 986 /* default encoding, no conversion should be needed */ 987 ctxt->charset = XML_CHAR_ENCODING_UTF8; 988 989 /* 990 * Errata on XML-1.0 June 20 2001 991 * Specific handling of the Byte Order Mark for 992 * UTF-8 993 */ 994 if ((ctxt->input != NULL) && 995 (ctxt->input->cur[0] == 0xEF) && 996 (ctxt->input->cur[1] == 0xBB) && 997 (ctxt->input->cur[2] == 0xBF)) { 998 ctxt->input->cur += 3; 999 } 1000 return(0); 1001 case XML_CHAR_ENCODING_UTF16LE: 1002 case XML_CHAR_ENCODING_UTF16BE: 1003 /*The raw input characters are encoded 1004 *in UTF-16. As we expect this function 1005 *to be called after xmlCharEncInFunc, we expect 1006 *ctxt->input->cur to contain UTF-8 encoded characters. 1007 *So the raw UTF16 Byte Order Mark 1008 *has also been converted into 1009 *an UTF-8 BOM. Let's skip that BOM. 1010 */ 1011 if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) && 1012 (ctxt->input->cur[0] == 0xEF) && 1013 (ctxt->input->cur[1] == 0xBB) && 1014 (ctxt->input->cur[2] == 0xBF)) { 1015 ctxt->input->cur += 3; 1016 } 1017 len = 90; 1018 break; 1019 case XML_CHAR_ENCODING_UCS2: 1020 len = 90; 1021 break; 1022 case XML_CHAR_ENCODING_UCS4BE: 1023 case XML_CHAR_ENCODING_UCS4LE: 1024 case XML_CHAR_ENCODING_UCS4_2143: 1025 case XML_CHAR_ENCODING_UCS4_3412: 1026 len = 180; 1027 break; 1028 case XML_CHAR_ENCODING_EBCDIC: 1029 case XML_CHAR_ENCODING_8859_1: 1030 case XML_CHAR_ENCODING_8859_2: 1031 case XML_CHAR_ENCODING_8859_3: 1032 case XML_CHAR_ENCODING_8859_4: 1033 case XML_CHAR_ENCODING_8859_5: 1034 case XML_CHAR_ENCODING_8859_6: 1035 case XML_CHAR_ENCODING_8859_7: 1036 case XML_CHAR_ENCODING_8859_8: 1037 case XML_CHAR_ENCODING_8859_9: 1038 case XML_CHAR_ENCODING_ASCII: 1039 case XML_CHAR_ENCODING_2022_JP: 1040 case XML_CHAR_ENCODING_SHIFT_JIS: 1041 case XML_CHAR_ENCODING_EUC_JP: 1042 len = 45; 1043 break; 1044 } 1045 handler = xmlGetCharEncodingHandler(enc); 1046 if (handler == NULL) { 1047 /* 1048 * Default handlers. 1049 */ 1050 switch (enc) { 1051 case XML_CHAR_ENCODING_ASCII: 1052 /* default encoding, no conversion should be needed */ 1053 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1054 return(0); 1055 case XML_CHAR_ENCODING_UTF16LE: 1056 break; 1057 case XML_CHAR_ENCODING_UTF16BE: 1058 break; 1059 case XML_CHAR_ENCODING_UCS4LE: 1060 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1061 "encoding not supported %s\n", 1062 BAD_CAST "USC4 little endian", NULL); 1063 break; 1064 case XML_CHAR_ENCODING_UCS4BE: 1065 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1066 "encoding not supported %s\n", 1067 BAD_CAST "USC4 big endian", NULL); 1068 break; 1069 case XML_CHAR_ENCODING_EBCDIC: 1070 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1071 "encoding not supported %s\n", 1072 BAD_CAST "EBCDIC", NULL); 1073 break; 1074 case XML_CHAR_ENCODING_UCS4_2143: 1075 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1076 "encoding not supported %s\n", 1077 BAD_CAST "UCS4 2143", NULL); 1078 break; 1079 case XML_CHAR_ENCODING_UCS4_3412: 1080 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1081 "encoding not supported %s\n", 1082 BAD_CAST "UCS4 3412", NULL); 1083 break; 1084 case XML_CHAR_ENCODING_UCS2: 1085 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1086 "encoding not supported %s\n", 1087 BAD_CAST "UCS2", NULL); 1088 break; 1089 case XML_CHAR_ENCODING_8859_1: 1090 case XML_CHAR_ENCODING_8859_2: 1091 case XML_CHAR_ENCODING_8859_3: 1092 case XML_CHAR_ENCODING_8859_4: 1093 case XML_CHAR_ENCODING_8859_5: 1094 case XML_CHAR_ENCODING_8859_6: 1095 case XML_CHAR_ENCODING_8859_7: 1096 case XML_CHAR_ENCODING_8859_8: 1097 case XML_CHAR_ENCODING_8859_9: 1098 /* 1099 * We used to keep the internal content in the 1100 * document encoding however this turns being unmaintainable 1101 * So xmlGetCharEncodingHandler() will return non-null 1102 * values for this now. 1103 */ 1104 if ((ctxt->inputNr == 1) && 1105 (ctxt->encoding == NULL) && 1106 (ctxt->input != NULL) && 1107 (ctxt->input->encoding != NULL)) { 1108 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 1109 } 1110 ctxt->charset = enc; 1111 return(0); 1112 case XML_CHAR_ENCODING_2022_JP: 1113 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1114 "encoding not supported %s\n", 1115 BAD_CAST "ISO-2022-JP", NULL); 1116 break; 1117 case XML_CHAR_ENCODING_SHIFT_JIS: 1118 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1119 "encoding not supported %s\n", 1120 BAD_CAST "Shift_JIS", NULL); 1121 break; 1122 case XML_CHAR_ENCODING_EUC_JP: 1123 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1124 "encoding not supported %s\n", 1125 BAD_CAST "EUC-JP", NULL); 1126 break; 1127 default: 1128 break; 1129 } 1130 } 1131 if (handler == NULL) 1132 return(-1); 1133 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1134 return(xmlSwitchToEncodingInt(ctxt, handler, len)); 1135} 1136 1137/** 1138 * xmlSwitchInputEncoding: 1139 * @ctxt: the parser context 1140 * @input: the input stream 1141 * @handler: the encoding handler 1142 * @len: the number of bytes to convert for the first line or -1 1143 * 1144 * change the input functions when discovering the character encoding 1145 * of a given entity. 1146 * 1147 * Returns 0 in case of success, -1 otherwise 1148 */ 1149static int 1150xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1151 xmlCharEncodingHandlerPtr handler, int len) 1152{ 1153 int nbchars; 1154 1155 if (handler == NULL) 1156 return (-1); 1157 if (input == NULL) 1158 return (-1); 1159 if (input->buf != NULL) { 1160 if (input->buf->encoder != NULL) { 1161 /* 1162 * Check in case the auto encoding detetection triggered 1163 * in already. 1164 */ 1165 if (input->buf->encoder == handler) 1166 return (0); 1167 1168 /* 1169 * "UTF-16" can be used for both LE and BE 1170 if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, 1171 BAD_CAST "UTF-16", 6)) && 1172 (!xmlStrncmp(BAD_CAST handler->name, 1173 BAD_CAST "UTF-16", 6))) { 1174 return(0); 1175 } 1176 */ 1177 1178 /* 1179 * Note: this is a bit dangerous, but that's what it 1180 * takes to use nearly compatible signature for different 1181 * encodings. 1182 */ 1183 xmlCharEncCloseFunc(input->buf->encoder); 1184 input->buf->encoder = handler; 1185 return (0); 1186 } 1187 input->buf->encoder = handler; 1188 1189 /* 1190 * Is there already some content down the pipe to convert ? 1191 */ 1192 if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) { 1193 int processed; 1194 unsigned int use; 1195 1196 /* 1197 * Specific handling of the Byte Order Mark for 1198 * UTF-16 1199 */ 1200 if ((handler->name != NULL) && 1201 (!strcmp(handler->name, "UTF-16LE") || 1202 !strcmp(handler->name, "UTF-16")) && 1203 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 1204 input->cur += 2; 1205 } 1206 if ((handler->name != NULL) && 1207 (!strcmp(handler->name, "UTF-16BE")) && 1208 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { 1209 input->cur += 2; 1210 } 1211 /* 1212 * Errata on XML-1.0 June 20 2001 1213 * Specific handling of the Byte Order Mark for 1214 * UTF-8 1215 */ 1216 if ((handler->name != NULL) && 1217 (!strcmp(handler->name, "UTF-8")) && 1218 (input->cur[0] == 0xEF) && 1219 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 1220 input->cur += 3; 1221 } 1222 1223 /* 1224 * Shrink the current input buffer. 1225 * Move it as the raw buffer and create a new input buffer 1226 */ 1227 processed = input->cur - input->base; 1228 xmlBufferShrink(input->buf->buffer, processed); 1229 input->buf->raw = input->buf->buffer; 1230 input->buf->buffer = xmlBufferCreate(); 1231 input->buf->rawconsumed = processed; 1232 use = input->buf->raw->use; 1233 1234 if (ctxt->html) { 1235 /* 1236 * convert as much as possible of the buffer 1237 */ 1238 nbchars = xmlCharEncInFunc(input->buf->encoder, 1239 input->buf->buffer, 1240 input->buf->raw); 1241 } else { 1242 /* 1243 * convert just enough to get 1244 * '<?xml version="1.0" encoding="xxx"?>' 1245 * parsed with the autodetected encoding 1246 * into the parser reading buffer. 1247 */ 1248 nbchars = xmlCharEncFirstLineInt(input->buf->encoder, 1249 input->buf->buffer, 1250 input->buf->raw, 1251 len); 1252 } 1253 if (nbchars < 0) { 1254 xmlErrInternal(ctxt, 1255 "switching encoding: encoder error\n", 1256 NULL); 1257 return (-1); 1258 } 1259 input->buf->rawconsumed += use - input->buf->raw->use; 1260 input->base = input->cur = input->buf->buffer->content; 1261 input->end = &input->base[input->buf->buffer->use]; 1262 1263 } 1264 return (0); 1265 } else if (input->length == 0) { 1266 /* 1267 * When parsing a static memory array one must know the 1268 * size to be able to convert the buffer. 1269 */ 1270 xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); 1271 return (-1); 1272 } 1273 return (0); 1274} 1275 1276/** 1277 * xmlSwitchInputEncoding: 1278 * @ctxt: the parser context 1279 * @input: the input stream 1280 * @handler: the encoding handler 1281 * 1282 * change the input functions when discovering the character encoding 1283 * of a given entity. 1284 * 1285 * Returns 0 in case of success, -1 otherwise 1286 */ 1287int 1288xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1289 xmlCharEncodingHandlerPtr handler) { 1290 return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); 1291} 1292 1293/** 1294 * xmlSwitchToEncodingInt: 1295 * @ctxt: the parser context 1296 * @handler: the encoding handler 1297 * @len: the lenght to convert or -1 1298 * 1299 * change the input functions when discovering the character encoding 1300 * of a given entity, and convert only @len bytes of the output, this 1301 * is needed on auto detect to allows any declared encoding later to 1302 * convert the actual content after the xmlDecl 1303 * 1304 * Returns 0 in case of success, -1 otherwise 1305 */ 1306static int 1307xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 1308 xmlCharEncodingHandlerPtr handler, int len) { 1309 int ret = 0; 1310 1311 if (handler != NULL) { 1312 if (ctxt->input != NULL) { 1313 ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len); 1314 } else { 1315 xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", 1316 NULL); 1317 return(-1); 1318 } 1319 /* 1320 * The parsing is now done in UTF8 natively 1321 */ 1322 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1323 } else 1324 return(-1); 1325 return(ret); 1326} 1327 1328/** 1329 * xmlSwitchToEncoding: 1330 * @ctxt: the parser context 1331 * @handler: the encoding handler 1332 * 1333 * change the input functions when discovering the character encoding 1334 * of a given entity. 1335 * 1336 * Returns 0 in case of success, -1 otherwise 1337 */ 1338int 1339xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1340{ 1341 return (xmlSwitchToEncodingInt(ctxt, handler, -1)); 1342} 1343 1344/************************************************************************ 1345 * * 1346 * Commodity functions to handle entities processing * 1347 * * 1348 ************************************************************************/ 1349 1350/** 1351 * xmlFreeInputStream: 1352 * @input: an xmlParserInputPtr 1353 * 1354 * Free up an input stream. 1355 */ 1356void 1357xmlFreeInputStream(xmlParserInputPtr input) { 1358 if (input == NULL) return; 1359 1360 if (input->filename != NULL) xmlFree((char *) input->filename); 1361 if (input->directory != NULL) xmlFree((char *) input->directory); 1362 if (input->encoding != NULL) xmlFree((char *) input->encoding); 1363 if (input->version != NULL) xmlFree((char *) input->version); 1364 if ((input->free != NULL) && (input->base != NULL)) 1365 input->free((xmlChar *) input->base); 1366 if (input->buf != NULL) 1367 xmlFreeParserInputBuffer(input->buf); 1368 xmlFree(input); 1369} 1370 1371/** 1372 * xmlNewInputStream: 1373 * @ctxt: an XML parser context 1374 * 1375 * Create a new input stream structure 1376 * Returns the new input stream or NULL 1377 */ 1378xmlParserInputPtr 1379xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1380 xmlParserInputPtr input; 1381 static int id = 0; 1382 1383 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1384 if (input == NULL) { 1385 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1386 return(NULL); 1387 } 1388 memset(input, 0, sizeof(xmlParserInput)); 1389 input->line = 1; 1390 input->col = 1; 1391 input->standalone = -1; 1392 /* 1393 * we don't care about thread reentrancy unicity for a single 1394 * parser context (and hence thread) is sufficient. 1395 */ 1396 input->id = id++; 1397 return(input); 1398} 1399 1400/** 1401 * xmlNewIOInputStream: 1402 * @ctxt: an XML parser context 1403 * @input: an I/O Input 1404 * @enc: the charset encoding if known 1405 * 1406 * Create a new input stream structure encapsulating the @input into 1407 * a stream suitable for the parser. 1408 * 1409 * Returns the new input stream or NULL 1410 */ 1411xmlParserInputPtr 1412xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1413 xmlCharEncoding enc) { 1414 xmlParserInputPtr inputStream; 1415 1416 if (input == NULL) return(NULL); 1417 if (xmlParserDebugEntities) 1418 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1419 inputStream = xmlNewInputStream(ctxt); 1420 if (inputStream == NULL) { 1421 return(NULL); 1422 } 1423 inputStream->filename = NULL; 1424 inputStream->buf = input; 1425 inputStream->base = inputStream->buf->buffer->content; 1426 inputStream->cur = inputStream->buf->buffer->content; 1427 inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; 1428 if (enc != XML_CHAR_ENCODING_NONE) { 1429 xmlSwitchEncoding(ctxt, enc); 1430 } 1431 1432 return(inputStream); 1433} 1434 1435/** 1436 * xmlNewEntityInputStream: 1437 * @ctxt: an XML parser context 1438 * @entity: an Entity pointer 1439 * 1440 * Create a new input stream based on an xmlEntityPtr 1441 * 1442 * Returns the new input stream or NULL 1443 */ 1444xmlParserInputPtr 1445xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1446 xmlParserInputPtr input; 1447 1448 if (entity == NULL) { 1449 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 1450 NULL); 1451 return(NULL); 1452 } 1453 if (xmlParserDebugEntities) 1454 xmlGenericError(xmlGenericErrorContext, 1455 "new input from entity: %s\n", entity->name); 1456 if (entity->content == NULL) { 1457 switch (entity->etype) { 1458 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1459 xmlErrInternal(ctxt, "Cannot parse entity %s\n", 1460 entity->name); 1461 break; 1462 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1463 case XML_EXTERNAL_PARAMETER_ENTITY: 1464 return(xmlLoadExternalEntity((char *) entity->URI, 1465 (char *) entity->ExternalID, ctxt)); 1466 case XML_INTERNAL_GENERAL_ENTITY: 1467 xmlErrInternal(ctxt, 1468 "Internal entity %s without content !\n", 1469 entity->name); 1470 break; 1471 case XML_INTERNAL_PARAMETER_ENTITY: 1472 xmlErrInternal(ctxt, 1473 "Internal parameter entity %s without content !\n", 1474 entity->name); 1475 break; 1476 case XML_INTERNAL_PREDEFINED_ENTITY: 1477 xmlErrInternal(ctxt, 1478 "Predefined entity %s without content !\n", 1479 entity->name); 1480 break; 1481 } 1482 return(NULL); 1483 } 1484 input = xmlNewInputStream(ctxt); 1485 if (input == NULL) { 1486 return(NULL); 1487 } 1488 if (entity->URI != NULL) 1489 input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); 1490 input->base = entity->content; 1491 input->cur = entity->content; 1492 input->length = entity->length; 1493 input->end = &entity->content[input->length]; 1494 return(input); 1495} 1496 1497/** 1498 * xmlNewStringInputStream: 1499 * @ctxt: an XML parser context 1500 * @buffer: an memory buffer 1501 * 1502 * Create a new input stream based on a memory buffer. 1503 * Returns the new input stream 1504 */ 1505xmlParserInputPtr 1506xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 1507 xmlParserInputPtr input; 1508 1509 if (buffer == NULL) { 1510 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 1511 NULL); 1512 return(NULL); 1513 } 1514 if (xmlParserDebugEntities) 1515 xmlGenericError(xmlGenericErrorContext, 1516 "new fixed input: %.30s\n", buffer); 1517 input = xmlNewInputStream(ctxt); 1518 if (input == NULL) { 1519 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1520 return(NULL); 1521 } 1522 input->base = buffer; 1523 input->cur = buffer; 1524 input->length = xmlStrlen(buffer); 1525 input->end = &buffer[input->length]; 1526 return(input); 1527} 1528 1529/** 1530 * xmlNewInputFromFile: 1531 * @ctxt: an XML parser context 1532 * @filename: the filename to use as entity 1533 * 1534 * Create a new input stream based on a file or an URL. 1535 * 1536 * Returns the new input stream or NULL in case of error 1537 */ 1538xmlParserInputPtr 1539xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 1540 xmlParserInputBufferPtr buf; 1541 xmlParserInputPtr inputStream; 1542 char *directory = NULL; 1543 xmlChar *URI = NULL; 1544 1545 if (xmlParserDebugEntities) 1546 xmlGenericError(xmlGenericErrorContext, 1547 "new input from file: %s\n", filename); 1548 if (ctxt == NULL) return(NULL); 1549 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 1550 if (buf == NULL) { 1551 if (filename == NULL) 1552 __xmlLoaderErr(ctxt, 1553 "failed to load external entity: NULL filename \n", 1554 NULL); 1555 else 1556 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 1557 (const char *) filename); 1558 return(NULL); 1559 } 1560 1561 inputStream = xmlNewInputStream(ctxt); 1562 if (inputStream == NULL) 1563 return(NULL); 1564 1565 inputStream->buf = buf; 1566 inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1567 if (inputStream == NULL) 1568 return(NULL); 1569 1570 if (inputStream->filename == NULL) 1571 URI = xmlStrdup((xmlChar *) filename); 1572 else 1573 URI = xmlStrdup((xmlChar *) inputStream->filename); 1574 directory = xmlParserGetDirectory((const char *) URI); 1575 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1576 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1577 if (URI != NULL) xmlFree((char *) URI); 1578 inputStream->directory = directory; 1579 1580 inputStream->base = inputStream->buf->buffer->content; 1581 inputStream->cur = inputStream->buf->buffer->content; 1582 inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; 1583 if ((ctxt->directory == NULL) && (directory != NULL)) 1584 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1585 return(inputStream); 1586} 1587 1588/************************************************************************ 1589 * * 1590 * Commodity functions to handle parser contexts * 1591 * * 1592 ************************************************************************/ 1593 1594/** 1595 * xmlInitParserCtxt: 1596 * @ctxt: an XML parser context 1597 * 1598 * Initialize a parser context 1599 * 1600 * Returns 0 in case of success and -1 in case of error 1601 */ 1602 1603int 1604xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 1605{ 1606 xmlParserInputPtr input; 1607 1608 if(ctxt==NULL) { 1609 xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 1610 return(-1); 1611 } 1612 1613 xmlDefaultSAXHandlerInit(); 1614 1615 if (ctxt->dict == NULL) 1616 ctxt->dict = xmlDictCreate(); 1617 if (ctxt->dict == NULL) { 1618 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1619 return(-1); 1620 } 1621 if (ctxt->sax == NULL) 1622 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1623 if (ctxt->sax == NULL) { 1624 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1625 return(-1); 1626 } 1627 else 1628 xmlSAXVersion(ctxt->sax, 2); 1629 1630 ctxt->maxatts = 0; 1631 ctxt->atts = NULL; 1632 /* Allocate the Input stack */ 1633 if (ctxt->inputTab == NULL) { 1634 ctxt->inputTab = (xmlParserInputPtr *) 1635 xmlMalloc(5 * sizeof(xmlParserInputPtr)); 1636 ctxt->inputMax = 5; 1637 } 1638 if (ctxt->inputTab == NULL) { 1639 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1640 ctxt->inputNr = 0; 1641 ctxt->inputMax = 0; 1642 ctxt->input = NULL; 1643 return(-1); 1644 } 1645 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1646 xmlFreeInputStream(input); 1647 } 1648 ctxt->inputNr = 0; 1649 ctxt->input = NULL; 1650 1651 ctxt->version = NULL; 1652 ctxt->encoding = NULL; 1653 ctxt->standalone = -1; 1654 ctxt->hasExternalSubset = 0; 1655 ctxt->hasPErefs = 0; 1656 ctxt->html = 0; 1657 ctxt->external = 0; 1658 ctxt->instate = XML_PARSER_START; 1659 ctxt->token = 0; 1660 ctxt->directory = NULL; 1661 1662 /* Allocate the Node stack */ 1663 if (ctxt->nodeTab == NULL) { 1664 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 1665 ctxt->nodeMax = 10; 1666 } 1667 if (ctxt->nodeTab == NULL) { 1668 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1669 ctxt->nodeNr = 0; 1670 ctxt->nodeMax = 0; 1671 ctxt->node = NULL; 1672 ctxt->inputNr = 0; 1673 ctxt->inputMax = 0; 1674 ctxt->input = NULL; 1675 return(-1); 1676 } 1677 ctxt->nodeNr = 0; 1678 ctxt->node = NULL; 1679 1680 /* Allocate the Name stack */ 1681 if (ctxt->nameTab == NULL) { 1682 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 1683 ctxt->nameMax = 10; 1684 } 1685 if (ctxt->nameTab == NULL) { 1686 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1687 ctxt->nodeNr = 0; 1688 ctxt->nodeMax = 0; 1689 ctxt->node = NULL; 1690 ctxt->inputNr = 0; 1691 ctxt->inputMax = 0; 1692 ctxt->input = NULL; 1693 ctxt->nameNr = 0; 1694 ctxt->nameMax = 0; 1695 ctxt->name = NULL; 1696 return(-1); 1697 } 1698 ctxt->nameNr = 0; 1699 ctxt->name = NULL; 1700 1701 /* Allocate the space stack */ 1702 if (ctxt->spaceTab == NULL) { 1703 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 1704 ctxt->spaceMax = 10; 1705 } 1706 if (ctxt->spaceTab == NULL) { 1707 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1708 ctxt->nodeNr = 0; 1709 ctxt->nodeMax = 0; 1710 ctxt->node = NULL; 1711 ctxt->inputNr = 0; 1712 ctxt->inputMax = 0; 1713 ctxt->input = NULL; 1714 ctxt->nameNr = 0; 1715 ctxt->nameMax = 0; 1716 ctxt->name = NULL; 1717 ctxt->spaceNr = 0; 1718 ctxt->spaceMax = 0; 1719 ctxt->space = NULL; 1720 return(-1); 1721 } 1722 ctxt->spaceNr = 1; 1723 ctxt->spaceMax = 10; 1724 ctxt->spaceTab[0] = -1; 1725 ctxt->space = &ctxt->spaceTab[0]; 1726 ctxt->userData = ctxt; 1727 ctxt->myDoc = NULL; 1728 ctxt->wellFormed = 1; 1729 ctxt->nsWellFormed = 1; 1730 ctxt->valid = 1; 1731 ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1732 ctxt->validate = xmlDoValidityCheckingDefaultValue; 1733 ctxt->pedantic = xmlPedanticParserDefaultValue; 1734 ctxt->linenumbers = xmlLineNumbersDefaultValue; 1735 ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1736 if (ctxt->keepBlanks == 0) 1737 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1738 1739 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 1740 ctxt->vctxt.userData = ctxt; 1741 ctxt->vctxt.error = xmlParserValidityError; 1742 ctxt->vctxt.warning = xmlParserValidityWarning; 1743 if (ctxt->validate) { 1744 if (xmlGetWarningsDefaultValue == 0) 1745 ctxt->vctxt.warning = NULL; 1746 else 1747 ctxt->vctxt.warning = xmlParserValidityWarning; 1748 ctxt->vctxt.nodeMax = 0; 1749 } 1750 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1751 ctxt->record_info = 0; 1752 ctxt->nbChars = 0; 1753 ctxt->checkIndex = 0; 1754 ctxt->inSubset = 0; 1755 ctxt->errNo = XML_ERR_OK; 1756 ctxt->depth = 0; 1757 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1758 ctxt->catalogs = NULL; 1759 ctxt->nbentities = 0; 1760 xmlInitNodeInfoSeq(&ctxt->node_seq); 1761 return(0); 1762} 1763 1764/** 1765 * xmlFreeParserCtxt: 1766 * @ctxt: an XML parser context 1767 * 1768 * Free all the memory used by a parser context. However the parsed 1769 * document in ctxt->myDoc is not freed. 1770 */ 1771 1772void 1773xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 1774{ 1775 xmlParserInputPtr input; 1776 1777 if (ctxt == NULL) return; 1778 1779 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1780 xmlFreeInputStream(input); 1781 } 1782 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 1783 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 1784 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 1785 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); 1786 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 1787 if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 1788 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 1789 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 1790 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 1791#ifdef LIBXML_SAX1_ENABLED 1792 if ((ctxt->sax != NULL) && 1793 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 1794#else 1795 if (ctxt->sax != NULL) 1796#endif /* LIBXML_SAX1_ENABLED */ 1797 xmlFree(ctxt->sax); 1798 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 1799 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 1800 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 1801 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 1802 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 1803 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 1804 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 1805 if (ctxt->attsDefault != NULL) 1806 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 1807 if (ctxt->attsSpecial != NULL) 1808 xmlHashFree(ctxt->attsSpecial, NULL); 1809 if (ctxt->freeElems != NULL) { 1810 xmlNodePtr cur, next; 1811 1812 cur = ctxt->freeElems; 1813 while (cur != NULL) { 1814 next = cur->next; 1815 xmlFree(cur); 1816 cur = next; 1817 } 1818 } 1819 if (ctxt->freeAttrs != NULL) { 1820 xmlAttrPtr cur, next; 1821 1822 cur = ctxt->freeAttrs; 1823 while (cur != NULL) { 1824 next = cur->next; 1825 xmlFree(cur); 1826 cur = next; 1827 } 1828 } 1829 /* 1830 * cleanup the error strings 1831 */ 1832 if (ctxt->lastError.message != NULL) 1833 xmlFree(ctxt->lastError.message); 1834 if (ctxt->lastError.file != NULL) 1835 xmlFree(ctxt->lastError.file); 1836 if (ctxt->lastError.str1 != NULL) 1837 xmlFree(ctxt->lastError.str1); 1838 if (ctxt->lastError.str2 != NULL) 1839 xmlFree(ctxt->lastError.str2); 1840 if (ctxt->lastError.str3 != NULL) 1841 xmlFree(ctxt->lastError.str3); 1842 1843#ifdef LIBXML_CATALOG_ENABLED 1844 if (ctxt->catalogs != NULL) 1845 xmlCatalogFreeLocal(ctxt->catalogs); 1846#endif 1847 xmlFree(ctxt); 1848} 1849 1850/** 1851 * xmlNewParserCtxt: 1852 * 1853 * Allocate and initialize a new parser context. 1854 * 1855 * Returns the xmlParserCtxtPtr or NULL 1856 */ 1857 1858xmlParserCtxtPtr 1859xmlNewParserCtxt(void) 1860{ 1861 xmlParserCtxtPtr ctxt; 1862 1863 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 1864 if (ctxt == NULL) { 1865 xmlErrMemory(NULL, "cannot allocate parser context\n"); 1866 return(NULL); 1867 } 1868 memset(ctxt, 0, sizeof(xmlParserCtxt)); 1869 if (xmlInitParserCtxt(ctxt) < 0) { 1870 xmlFreeParserCtxt(ctxt); 1871 return(NULL); 1872 } 1873 return(ctxt); 1874} 1875 1876/************************************************************************ 1877 * * 1878 * Handling of node informations * 1879 * * 1880 ************************************************************************/ 1881 1882/** 1883 * xmlClearParserCtxt: 1884 * @ctxt: an XML parser context 1885 * 1886 * Clear (release owned resources) and reinitialize a parser context 1887 */ 1888 1889void 1890xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 1891{ 1892 if (ctxt==NULL) 1893 return; 1894 xmlClearNodeInfoSeq(&ctxt->node_seq); 1895 xmlCtxtReset(ctxt); 1896} 1897 1898 1899/** 1900 * xmlParserFindNodeInfo: 1901 * @ctx: an XML parser context 1902 * @node: an XML node within the tree 1903 * 1904 * Find the parser node info struct for a given node 1905 * 1906 * Returns an xmlParserNodeInfo block pointer or NULL 1907 */ 1908const xmlParserNodeInfo * 1909xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 1910{ 1911 unsigned long pos; 1912 1913 if ((ctx == NULL) || (node == NULL)) 1914 return (NULL); 1915 /* Find position where node should be at */ 1916 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 1917 if (pos < ctx->node_seq.length 1918 && ctx->node_seq.buffer[pos].node == node) 1919 return &ctx->node_seq.buffer[pos]; 1920 else 1921 return NULL; 1922} 1923 1924 1925/** 1926 * xmlInitNodeInfoSeq: 1927 * @seq: a node info sequence pointer 1928 * 1929 * -- Initialize (set to initial state) node info sequence 1930 */ 1931void 1932xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1933{ 1934 if (seq == NULL) 1935 return; 1936 seq->length = 0; 1937 seq->maximum = 0; 1938 seq->buffer = NULL; 1939} 1940 1941/** 1942 * xmlClearNodeInfoSeq: 1943 * @seq: a node info sequence pointer 1944 * 1945 * -- Clear (release memory and reinitialize) node 1946 * info sequence 1947 */ 1948void 1949xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1950{ 1951 if (seq == NULL) 1952 return; 1953 if (seq->buffer != NULL) 1954 xmlFree(seq->buffer); 1955 xmlInitNodeInfoSeq(seq); 1956} 1957 1958/** 1959 * xmlParserFindNodeInfoIndex: 1960 * @seq: a node info sequence pointer 1961 * @node: an XML node pointer 1962 * 1963 * 1964 * xmlParserFindNodeInfoIndex : Find the index that the info record for 1965 * the given node is or should be at in a sorted sequence 1966 * 1967 * Returns a long indicating the position of the record 1968 */ 1969unsigned long 1970xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1971 const xmlNodePtr node) 1972{ 1973 unsigned long upper, lower, middle; 1974 int found = 0; 1975 1976 if ((seq == NULL) || (node == NULL)) 1977 return ((unsigned long) -1); 1978 1979 /* Do a binary search for the key */ 1980 lower = 1; 1981 upper = seq->length; 1982 middle = 0; 1983 while (lower <= upper && !found) { 1984 middle = lower + (upper - lower) / 2; 1985 if (node == seq->buffer[middle - 1].node) 1986 found = 1; 1987 else if (node < seq->buffer[middle - 1].node) 1988 upper = middle - 1; 1989 else 1990 lower = middle + 1; 1991 } 1992 1993 /* Return position */ 1994 if (middle == 0 || seq->buffer[middle - 1].node < node) 1995 return middle; 1996 else 1997 return middle - 1; 1998} 1999 2000 2001/** 2002 * xmlParserAddNodeInfo: 2003 * @ctxt: an XML parser context 2004 * @info: a node info sequence pointer 2005 * 2006 * Insert node info record into the sorted sequence 2007 */ 2008void 2009xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 2010 const xmlParserNodeInfoPtr info) 2011{ 2012 unsigned long pos; 2013 2014 if ((ctxt == NULL) || (info == NULL)) return; 2015 2016 /* Find pos and check to see if node is already in the sequence */ 2017 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 2018 info->node); 2019 2020 if ((pos < ctxt->node_seq.length) && 2021 (ctxt->node_seq.buffer != NULL) && 2022 (ctxt->node_seq.buffer[pos].node == info->node)) { 2023 ctxt->node_seq.buffer[pos] = *info; 2024 } 2025 2026 /* Otherwise, we need to add new node to buffer */ 2027 else { 2028 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) { 2029 xmlParserNodeInfo *tmp_buffer; 2030 unsigned int byte_size; 2031 2032 if (ctxt->node_seq.maximum == 0) 2033 ctxt->node_seq.maximum = 2; 2034 byte_size = (sizeof(*ctxt->node_seq.buffer) * 2035 (2 * ctxt->node_seq.maximum)); 2036 2037 if (ctxt->node_seq.buffer == NULL) 2038 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 2039 else 2040 tmp_buffer = 2041 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 2042 byte_size); 2043 2044 if (tmp_buffer == NULL) { 2045 xmlErrMemory(ctxt, "failed to allocate buffer\n"); 2046 return; 2047 } 2048 ctxt->node_seq.buffer = tmp_buffer; 2049 ctxt->node_seq.maximum *= 2; 2050 } 2051 2052 /* If position is not at end, move elements out of the way */ 2053 if (pos != ctxt->node_seq.length) { 2054 unsigned long i; 2055 2056 for (i = ctxt->node_seq.length; i > pos; i--) 2057 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 2058 } 2059 2060 /* Copy element and increase length */ 2061 ctxt->node_seq.buffer[pos] = *info; 2062 ctxt->node_seq.length++; 2063 } 2064} 2065 2066/************************************************************************ 2067 * * 2068 * Defaults settings * 2069 * * 2070 ************************************************************************/ 2071/** 2072 * xmlPedanticParserDefault: 2073 * @val: int 0 or 1 2074 * 2075 * Set and return the previous value for enabling pedantic warnings. 2076 * 2077 * Returns the last value for 0 for no substitution, 1 for substitution. 2078 */ 2079 2080int 2081xmlPedanticParserDefault(int val) { 2082 int old = xmlPedanticParserDefaultValue; 2083 2084 xmlPedanticParserDefaultValue = val; 2085 return(old); 2086} 2087 2088/** 2089 * xmlLineNumbersDefault: 2090 * @val: int 0 or 1 2091 * 2092 * Set and return the previous value for enabling line numbers in elements 2093 * contents. This may break on old application and is turned off by default. 2094 * 2095 * Returns the last value for 0 for no substitution, 1 for substitution. 2096 */ 2097 2098int 2099xmlLineNumbersDefault(int val) { 2100 int old = xmlLineNumbersDefaultValue; 2101 2102 xmlLineNumbersDefaultValue = val; 2103 return(old); 2104} 2105 2106/** 2107 * xmlSubstituteEntitiesDefault: 2108 * @val: int 0 or 1 2109 * 2110 * Set and return the previous value for default entity support. 2111 * Initially the parser always keep entity references instead of substituting 2112 * entity values in the output. This function has to be used to change the 2113 * default parser behavior 2114 * SAX::substituteEntities() has to be used for changing that on a file by 2115 * file basis. 2116 * 2117 * Returns the last value for 0 for no substitution, 1 for substitution. 2118 */ 2119 2120int 2121xmlSubstituteEntitiesDefault(int val) { 2122 int old = xmlSubstituteEntitiesDefaultValue; 2123 2124 xmlSubstituteEntitiesDefaultValue = val; 2125 return(old); 2126} 2127 2128/** 2129 * xmlKeepBlanksDefault: 2130 * @val: int 0 or 1 2131 * 2132 * Set and return the previous value for default blanks text nodes support. 2133 * The 1.x version of the parser used an heuristic to try to detect 2134 * ignorable white spaces. As a result the SAX callback was generating 2135 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 2136 * using the DOM output text nodes containing those blanks were not generated. 2137 * The 2.x and later version will switch to the XML standard way and 2138 * ignorableWhitespace() are only generated when running the parser in 2139 * validating mode and when the current element doesn't allow CDATA or 2140 * mixed content. 2141 * This function is provided as a way to force the standard behavior 2142 * on 1.X libs and to switch back to the old mode for compatibility when 2143 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2144 * by using xmlIsBlankNode() commodity function to detect the "empty" 2145 * nodes generated. 2146 * This value also affect autogeneration of indentation when saving code 2147 * if blanks sections are kept, indentation is not generated. 2148 * 2149 * Returns the last value for 0 for no substitution, 1 for substitution. 2150 */ 2151 2152int 2153xmlKeepBlanksDefault(int val) { 2154 int old = xmlKeepBlanksDefaultValue; 2155 2156 xmlKeepBlanksDefaultValue = val; 2157 if (!val) xmlIndentTreeOutput = 1; 2158 return(old); 2159} 2160 2161#define bottom_parserInternals 2162#include "elfgcchack.h" 2163