parser.c revision 90fb02caaf63d480fd05364b5c19c6c84c6d8323
1/* 2 * parser.c : an XML 1.0 non-verifying parser 3 * 4 * See Copyright for the status of this software. 5 * 6 * Daniel.Veillard@w3.org 7 */ 8 9#ifdef WIN32 10#include "win32config.h" 11#else 12#include "config.h" 13#endif 14 15#include <stdio.h> 16#include <string.h> /* for memset() only */ 17#ifdef HAVE_CTYPE_H 18#include <ctype.h> 19#endif 20#ifdef HAVE_STDLIB_H 21#include <stdlib.h> 22#endif 23#ifdef HAVE_SYS_STAT_H 24#include <sys/stat.h> 25#endif 26#ifdef HAVE_FCNTL_H 27#include <fcntl.h> 28#endif 29#ifdef HAVE_UNISTD_H 30#include <unistd.h> 31#endif 32#ifdef HAVE_ZLIB_H 33#include <zlib.h> 34#endif 35 36#include "xmlmemory.h" 37#include "tree.h" 38#include "parser.h" 39#include "entities.h" 40#include "encoding.h" 41#include "valid.h" 42#include "parserInternals.h" 43#include "xmlIO.h" 44#include "xml-error.h" 45 46#define XML_PARSER_BIG_BUFFER_SIZE 1000 47#define XML_PARSER_BUFFER_SIZE 100 48 49const char *xmlParserVersion = LIBXML_VERSION; 50 51/* 52 * List of XML prefixed PI allowed by W3C specs 53 */ 54 55const char *xmlW3CPIs[] = { 56 "xml-stylesheet", 57 NULL 58}; 59 60/************************************************************************ 61 * * 62 * Input handling functions for progressive parsing * 63 * * 64 ************************************************************************/ 65 66/* #define DEBUG_INPUT */ 67/* #define DEBUG_STACK */ 68/* #define DEBUG_PUSH */ 69 70 71#define INPUT_CHUNK 250 72/* we need to keep enough input to show errors in context */ 73#define LINE_LEN 80 74 75#ifdef DEBUG_INPUT 76#define CHECK_BUFFER(in) check_buffer(in) 77 78void check_buffer(xmlParserInputPtr in) { 79 if (in->base != in->buf->buffer->content) { 80 fprintf(stderr, "xmlParserInput: base mismatch problem\n"); 81 } 82 if (in->cur < in->base) { 83 fprintf(stderr, "xmlParserInput: cur < base problem\n"); 84 } 85 if (in->cur > in->base + in->buf->buffer->use) { 86 fprintf(stderr, "xmlParserInput: cur > base + use problem\n"); 87 } 88 fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n", 89 (int) in, (int) in->buf->buffer->content, in->cur - in->base, 90 in->buf->buffer->use, in->buf->buffer->size); 91} 92 93#else 94#define CHECK_BUFFER(in) 95#endif 96 97 98/** 99 * xmlParserInputRead: 100 * @in: an XML parser input 101 * @len: an indicative size for the lookahead 102 * 103 * This function refresh the input for the parser. It doesn't try to 104 * preserve pointers to the input buffer, and discard already read data 105 * 106 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the 107 * end of this entity 108 */ 109int 110xmlParserInputRead(xmlParserInputPtr in, int len) { 111 int ret; 112 int used; 113 int index; 114 115#ifdef DEBUG_INPUT 116 fprintf(stderr, "Read\n"); 117#endif 118 if (in->buf == NULL) return(-1); 119 if (in->base == NULL) return(-1); 120 if (in->cur == NULL) return(-1); 121 if (in->buf->buffer == NULL) return(-1); 122 123 CHECK_BUFFER(in); 124 125 used = in->cur - in->buf->buffer->content; 126 ret = xmlBufferShrink(in->buf->buffer, used); 127 if (ret > 0) { 128 in->cur -= ret; 129 in->consumed += ret; 130 } 131 ret = xmlParserInputBufferRead(in->buf, len); 132 if (in->base != in->buf->buffer->content) { 133 /* 134 * the buffer has been realloced 135 */ 136 index = in->cur - in->base; 137 in->base = in->buf->buffer->content; 138 in->cur = &in->buf->buffer->content[index]; 139 } 140 141 CHECK_BUFFER(in); 142 143 return(ret); 144} 145 146/** 147 * xmlParserInputGrow: 148 * @in: an XML parser input 149 * @len: an indicative size for the lookahead 150 * 151 * This function increase the input for the parser. It tries to 152 * preserve pointers to the input buffer, and keep already read data 153 * 154 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the 155 * end of this entity 156 */ 157int 158xmlParserInputGrow(xmlParserInputPtr in, int len) { 159 int ret; 160 int index; 161 162#ifdef DEBUG_INPUT 163 fprintf(stderr, "Grow\n"); 164#endif 165 if (in->buf == NULL) return(-1); 166 if (in->base == NULL) return(-1); 167 if (in->cur == NULL) return(-1); 168 if (in->buf->buffer == NULL) return(-1); 169 170 CHECK_BUFFER(in); 171 172 index = in->cur - in->base; 173 if (in->buf->buffer->use > index + INPUT_CHUNK) { 174 175 CHECK_BUFFER(in); 176 177 return(0); 178 } 179 if ((in->buf->httpIO != NULL) || (in->buf->ftpIO != NULL) || 180 (in->buf->file != NULL) || 181#ifdef HAVE_ZLIB_H 182 (in->buf->gzfile != NULL) || 183#endif 184 (in->buf->fd >= 0)) 185 ret = xmlParserInputBufferGrow(in->buf, len); 186 else 187 return(0); 188 189 /* 190 * NOTE : in->base may be a "dandling" i.e. freed pointer in this 191 * block, but we use it really as an integer to do some 192 * pointer arithmetic. Insure will raise it as a bug but in 193 * that specific case, that's not ! 194 */ 195 if (in->base != in->buf->buffer->content) { 196 /* 197 * the buffer has been realloced 198 */ 199 index = in->cur - in->base; 200 in->base = in->buf->buffer->content; 201 in->cur = &in->buf->buffer->content[index]; 202 } 203 204 CHECK_BUFFER(in); 205 206 return(ret); 207} 208 209/** 210 * xmlParserInputShrink: 211 * @in: an XML parser input 212 * 213 * This function removes used input for the parser. 214 */ 215void 216xmlParserInputShrink(xmlParserInputPtr in) { 217 int used; 218 int ret; 219 int index; 220 221#ifdef DEBUG_INPUT 222 fprintf(stderr, "Shrink\n"); 223#endif 224 if (in->buf == NULL) return; 225 if (in->base == NULL) return; 226 if (in->cur == NULL) return; 227 if (in->buf->buffer == NULL) return; 228 229 CHECK_BUFFER(in); 230 231 used = in->cur - in->buf->buffer->content; 232 if (used > INPUT_CHUNK) { 233 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); 234 if (ret > 0) { 235 in->cur -= ret; 236 in->consumed += ret; 237 } 238 } 239 240 CHECK_BUFFER(in); 241 242 if (in->buf->buffer->use > INPUT_CHUNK) { 243 return; 244 } 245 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 246 if (in->base != in->buf->buffer->content) { 247 /* 248 * the buffer has been realloced 249 */ 250 index = in->cur - in->base; 251 in->base = in->buf->buffer->content; 252 in->cur = &in->buf->buffer->content[index]; 253 } 254 255 CHECK_BUFFER(in); 256} 257 258/************************************************************************ 259 * * 260 * Parser stacks related functions and macros * 261 * * 262 ************************************************************************/ 263 264int xmlSubstituteEntitiesDefaultValue = 0; 265int xmlDoValidityCheckingDefaultValue = 0; 266int xmlKeepBlanksDefaultValue = 0; 267xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 268 const xmlChar ** str); 269 270/* 271 * Generic function for accessing stacks in the Parser Context 272 */ 273 274#define PUSH_AND_POP(scope, type, name) \ 275scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \ 276 if (ctxt->name##Nr >= ctxt->name##Max) { \ 277 ctxt->name##Max *= 2; \ 278 ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \ 279 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ 280 if (ctxt->name##Tab == NULL) { \ 281 fprintf(stderr, "realloc failed !\n"); \ 282 return(0); \ 283 } \ 284 } \ 285 ctxt->name##Tab[ctxt->name##Nr] = value; \ 286 ctxt->name = value; \ 287 return(ctxt->name##Nr++); \ 288} \ 289scope type name##Pop(xmlParserCtxtPtr ctxt) { \ 290 type ret; \ 291 if (ctxt->name##Nr <= 0) return(0); \ 292 ctxt->name##Nr--; \ 293 if (ctxt->name##Nr > 0) \ 294 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ 295 else \ 296 ctxt->name = NULL; \ 297 ret = ctxt->name##Tab[ctxt->name##Nr]; \ 298 ctxt->name##Tab[ctxt->name##Nr] = 0; \ 299 return(ret); \ 300} \ 301 302PUSH_AND_POP(extern, xmlParserInputPtr, input) 303PUSH_AND_POP(extern, xmlNodePtr, node) 304PUSH_AND_POP(extern, xmlChar*, name) 305 306/* 307 * Macros for accessing the content. Those should be used only by the parser, 308 * and not exported. 309 * 310 * Dirty macros, i.e. one need to make assumption on the context to use them 311 * 312 * CUR_PTR return the current pointer to the xmlChar to be parsed. 313 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 314 * in ISO-Latin or UTF-8, and the current 16 bit value if compiled 315 * in UNICODE mode. This should be used internally by the parser 316 * only to compare to ASCII values otherwise it would break when 317 * running with UTF-8 encoding. 318 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 319 * to compare on ASCII based substring. 320 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 321 * strings within the parser. 322 * 323 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 324 * 325 * CURRENT Returns the current char value, with the full decoding of 326 * UTF-8 if we are using this mode. It returns an int. 327 * NEXT Skip to the next character, this does the proper decoding 328 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 329 * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly 330 */ 331 332#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur)) 333#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val) 334#define NXT(val) ctxt->input->cur[(val)] 335#define CUR_PTR ctxt->input->cur 336#define SHRINK xmlParserInputShrink(ctxt->input); \ 337 if ((*ctxt->input->cur == 0) && \ 338 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 339 xmlPopInput(ctxt) 340 341#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 342 if ((*ctxt->input->cur == 0) && \ 343 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 344 xmlPopInput(ctxt) 345 346#define SKIP_BLANKS \ 347 do { \ 348 while (IS_BLANK(CUR)) NEXT; \ 349 while ((CUR == 0) && (ctxt->inputNr > 1)) \ 350 xmlPopInput(ctxt); \ 351 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 352 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \ 353 } while (IS_BLANK(CUR)); 354 355#define CURRENT (*ctxt->input->cur) 356#define NEXT { \ 357 if (ctxt->token != 0) ctxt->token = 0; \ 358 else { \ 359 if ((*ctxt->input->cur == 0) && \ 360 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \ 361 xmlPopInput(ctxt); \ 362 } else { \ 363 if (*(ctxt->input->cur) == '\n') { \ 364 ctxt->input->line++; ctxt->input->col = 1; \ 365 } else ctxt->input->col++; \ 366 ctxt->input->cur++; \ 367 ctxt->nbChars++; \ 368 if (*ctxt->input->cur == 0) \ 369 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 370 } \ 371 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 372 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \ 373}} 374 375 376/************************************************************************ 377 * * 378 * Commodity functions to handle entities processing * 379 * * 380 ************************************************************************/ 381 382/** 383 * xmlPopInput: 384 * @ctxt: an XML parser context 385 * 386 * xmlPopInput: the current input pointed by ctxt->input came to an end 387 * pop it and return the next char. 388 * 389 * Returns the current xmlChar in the parser context 390 */ 391xmlChar 392xmlPopInput(xmlParserCtxtPtr ctxt) { 393 if (ctxt->inputNr == 1) return(0); /* End of main Input */ 394 xmlFreeInputStream(inputPop(ctxt)); 395 if ((*ctxt->input->cur == 0) && 396 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 397 return(xmlPopInput(ctxt)); 398 return(CUR); 399} 400 401/** 402 * xmlPushInput: 403 * @ctxt: an XML parser context 404 * @input: an XML parser input fragment (entity, XML fragment ...). 405 * 406 * xmlPushInput: switch to a new input stream which is stacked on top 407 * of the previous one(s). 408 */ 409void 410xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 411 if (input == NULL) return; 412 inputPush(ctxt, input); 413} 414 415/** 416 * xmlFreeInputStream: 417 * @input: an xmlParserInputPtr 418 * 419 * Free up an input stream. 420 */ 421void 422xmlFreeInputStream(xmlParserInputPtr input) { 423 if (input == NULL) return; 424 425 if (input->filename != NULL) xmlFree((char *) input->filename); 426 if (input->directory != NULL) xmlFree((char *) input->directory); 427 if ((input->free != NULL) && (input->base != NULL)) 428 input->free((xmlChar *) input->base); 429 if (input->buf != NULL) 430 xmlFreeParserInputBuffer(input->buf); 431 memset(input, -1, sizeof(xmlParserInput)); 432 xmlFree(input); 433} 434 435/** 436 * xmlNewInputStream: 437 * @ctxt: an XML parser context 438 * 439 * Create a new input stream structure 440 * Returns the new input stream or NULL 441 */ 442xmlParserInputPtr 443xmlNewInputStream(xmlParserCtxtPtr ctxt) { 444 xmlParserInputPtr input; 445 446 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 447 if (input == NULL) { 448 ctxt->errNo = XML_ERR_NO_MEMORY; 449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 450 ctxt->sax->error(ctxt->userData, 451 "malloc: couldn't allocate a new input stream\n"); 452 ctxt->errNo = XML_ERR_NO_MEMORY; 453 return(NULL); 454 } 455 input->filename = NULL; 456 input->directory = NULL; 457 input->base = NULL; 458 input->cur = NULL; 459 input->buf = NULL; 460 input->line = 1; 461 input->col = 1; 462 input->buf = NULL; 463 input->free = NULL; 464 input->consumed = 0; 465 input->length = 0; 466 return(input); 467} 468 469/** 470 * xmlNewEntityInputStream: 471 * @ctxt: an XML parser context 472 * @entity: an Entity pointer 473 * 474 * Create a new input stream based on an xmlEntityPtr 475 * 476 * Returns the new input stream or NULL 477 */ 478xmlParserInputPtr 479xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 480 xmlParserInputPtr input; 481 482 if (entity == NULL) { 483 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 485 ctxt->sax->error(ctxt->userData, 486 "internal: xmlNewEntityInputStream entity = NULL\n"); 487 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 488 return(NULL); 489 } 490 if (entity->content == NULL) { 491 switch (entity->type) { 492 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 493 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 495 ctxt->sax->error(ctxt->userData, 496 "xmlNewEntityInputStream unparsed entity !\n"); 497 break; 498 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 499 case XML_EXTERNAL_PARAMETER_ENTITY: 500 return(xmlLoadExternalEntity((char *) entity->SystemID, 501 (char *) entity->ExternalID, ctxt)); 502 case XML_INTERNAL_GENERAL_ENTITY: 503 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 504 ctxt->sax->error(ctxt->userData, 505 "Internal entity %s without content !\n", entity->name); 506 break; 507 case XML_INTERNAL_PARAMETER_ENTITY: 508 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 510 ctxt->sax->error(ctxt->userData, 511 "Internal parameter entity %s without content !\n", entity->name); 512 break; 513 case XML_INTERNAL_PREDEFINED_ENTITY: 514 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 516 ctxt->sax->error(ctxt->userData, 517 "Predefined entity %s without content !\n", entity->name); 518 break; 519 } 520 return(NULL); 521 } 522 input = xmlNewInputStream(ctxt); 523 if (input == NULL) { 524 return(NULL); 525 } 526 input->filename = (char *) entity->SystemID; /* TODO !!! char <- xmlChar */ 527 input->base = entity->content; 528 input->cur = entity->content; 529 input->length = entity->length; 530 return(input); 531} 532 533/** 534 * xmlNewStringInputStream: 535 * @ctxt: an XML parser context 536 * @buffer: an memory buffer 537 * 538 * Create a new input stream based on a memory buffer. 539 * Returns the new input stream 540 */ 541xmlParserInputPtr 542xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 543 xmlParserInputPtr input; 544 545 if (buffer == NULL) { 546 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 548 ctxt->sax->error(ctxt->userData, 549 "internal: xmlNewStringInputStream string = NULL\n"); 550 return(NULL); 551 } 552 input = xmlNewInputStream(ctxt); 553 if (input == NULL) { 554 return(NULL); 555 } 556 input->base = buffer; 557 input->cur = buffer; 558 input->length = xmlStrlen(buffer); 559 return(input); 560} 561 562/** 563 * xmlNewInputFromFile: 564 * @ctxt: an XML parser context 565 * @filename: the filename to use as entity 566 * 567 * Create a new input stream based on a file. 568 * 569 * Returns the new input stream or NULL in case of error 570 */ 571xmlParserInputPtr 572xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 573 xmlParserInputBufferPtr buf; 574 xmlParserInputPtr inputStream; 575 char *directory = NULL; 576 577 if (ctxt == NULL) return(NULL); 578 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 579 if (buf == NULL) { 580 char name[XML_PARSER_BIG_BUFFER_SIZE]; 581 582 if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) { 583#ifdef WIN32 584 sprintf(name, "%s\\%s", ctxt->input->directory, filename); 585#else 586 sprintf(name, "%s/%s", ctxt->input->directory, filename); 587#endif 588 buf = xmlParserInputBufferCreateFilename(name, 589 XML_CHAR_ENCODING_NONE); 590 if (buf != NULL) 591 directory = xmlParserGetDirectory(name); 592 } 593 if ((buf == NULL) && (ctxt->directory != NULL)) { 594#ifdef WIN32 595 sprintf(name, "%s\\%s", ctxt->directory, filename); 596#else 597 sprintf(name, "%s/%s", ctxt->directory, filename); 598#endif 599 buf = xmlParserInputBufferCreateFilename(name, 600 XML_CHAR_ENCODING_NONE); 601 if (buf != NULL) 602 directory = xmlParserGetDirectory(name); 603 } 604 if (buf == NULL) 605 return(NULL); 606 } 607 if (directory == NULL) 608 directory = xmlParserGetDirectory(filename); 609 610 inputStream = xmlNewInputStream(ctxt); 611 if (inputStream == NULL) { 612 if (directory != NULL) xmlFree((char *) directory); 613 return(NULL); 614 } 615 616 inputStream->filename = xmlMemStrdup(filename); 617 inputStream->directory = directory; 618 inputStream->buf = buf; 619 620 inputStream->base = inputStream->buf->buffer->content; 621 inputStream->cur = inputStream->buf->buffer->content; 622 if ((ctxt->directory == NULL) && (directory != NULL)) 623 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 624 return(inputStream); 625} 626 627/************************************************************************ 628 * * 629 * Commodity functions to handle parser contexts * 630 * * 631 ************************************************************************/ 632 633/** 634 * xmlInitParserCtxt: 635 * @ctxt: an XML parser context 636 * 637 * Initialize a parser context 638 */ 639 640void 641xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 642{ 643 xmlSAXHandler *sax; 644 645 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 646 if (sax == NULL) { 647 fprintf(stderr, "xmlInitParserCtxt: out of memory\n"); 648 } 649 650 /* Allocate the Input stack */ 651 ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr)); 652 ctxt->inputNr = 0; 653 ctxt->inputMax = 5; 654 ctxt->input = NULL; 655 ctxt->version = NULL; 656 ctxt->encoding = NULL; 657 ctxt->standalone = -1; 658 ctxt->hasExternalSubset = 0; 659 ctxt->hasPErefs = 0; 660 ctxt->html = 0; 661 ctxt->external = 0; 662 ctxt->instate = XML_PARSER_START; 663 ctxt->token = 0; 664 ctxt->directory = NULL; 665 666 /* Allocate the Node stack */ 667 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 668 ctxt->nodeNr = 0; 669 ctxt->nodeMax = 10; 670 ctxt->node = NULL; 671 672 /* Allocate the Name stack */ 673 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 674 ctxt->nameNr = 0; 675 ctxt->nameMax = 10; 676 ctxt->name = NULL; 677 678 if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler; 679 else { 680 ctxt->sax = sax; 681 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler)); 682 } 683 ctxt->userData = ctxt; 684 ctxt->myDoc = NULL; 685 ctxt->wellFormed = 1; 686 ctxt->valid = 1; 687 ctxt->validate = xmlDoValidityCheckingDefaultValue; 688 ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 689 ctxt->vctxt.userData = ctxt; 690 if (ctxt->validate) { 691 ctxt->vctxt.error = xmlParserValidityError; 692 ctxt->vctxt.warning = xmlParserValidityWarning; 693 } else { 694 ctxt->vctxt.error = NULL; 695 ctxt->vctxt.warning = NULL; 696 } 697 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 698 ctxt->record_info = 0; 699 ctxt->nbChars = 0; 700 ctxt->checkIndex = 0; 701 ctxt->errNo = XML_ERR_OK; 702 xmlInitNodeInfoSeq(&ctxt->node_seq); 703} 704 705/** 706 * xmlFreeParserCtxt: 707 * @ctxt: an XML parser context 708 * 709 * Free all the memory used by a parser context. However the parsed 710 * document in ctxt->myDoc is not freed. 711 */ 712 713void 714xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 715{ 716 xmlParserInputPtr input; 717 xmlChar *oldname; 718 719 if (ctxt == NULL) return; 720 721 while ((input = inputPop(ctxt)) != NULL) { 722 xmlFreeInputStream(input); 723 } 724 while ((oldname = namePop(ctxt)) != NULL) { 725 xmlFree(oldname); 726 } 727 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab); 728 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 729 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 730 if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 731 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 732 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler)) 733 xmlFree(ctxt->sax); 734 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 735 xmlFree(ctxt); 736} 737 738/** 739 * xmlNewParserCtxt: 740 * 741 * Allocate and initialize a new parser context. 742 * 743 * Returns the xmlParserCtxtPtr or NULL 744 */ 745 746xmlParserCtxtPtr 747xmlNewParserCtxt() 748{ 749 xmlParserCtxtPtr ctxt; 750 751 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 752 if (ctxt == NULL) { 753 fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n"); 754 perror("malloc"); 755 return(NULL); 756 } 757 xmlInitParserCtxt(ctxt); 758 return(ctxt); 759} 760 761/** 762 * xmlClearParserCtxt: 763 * @ctxt: an XML parser context 764 * 765 * Clear (release owned resources) and reinitialize a parser context 766 */ 767 768void 769xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 770{ 771 xmlClearNodeInfoSeq(&ctxt->node_seq); 772 xmlInitParserCtxt(ctxt); 773} 774 775/************************************************************************ 776 * * 777 * Commodity functions to handle entities * 778 * * 779 ************************************************************************/ 780 781void xmlParserHandleReference(xmlParserCtxtPtr ctxt); 782void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt); 783xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 784 const xmlChar **str); 785 786/** 787 * xmlParseCharRef: 788 * @ctxt: an XML parser context 789 * 790 * parse Reference declarations 791 * 792 * [66] CharRef ::= '&#' [0-9]+ ';' | 793 * '&#x' [0-9a-fA-F]+ ';' 794 * 795 * [ WFC: Legal Character ] 796 * Characters referred to using character references must match the 797 * production for Char. 798 * 799 * Returns the value parsed (as an int), 0 in case of error 800 */ 801int 802xmlParseCharRef(xmlParserCtxtPtr ctxt) { 803 int val = 0; 804 805 if (ctxt->token != 0) { 806 val = ctxt->token; 807 ctxt->token = 0; 808 return(val); 809 } 810 if ((CUR == '&') && (NXT(1) == '#') && 811 (NXT(2) == 'x')) { 812 SKIP(3); 813 while (CUR != ';') { 814 if ((CUR >= '0') && (CUR <= '9')) 815 val = val * 16 + (CUR - '0'); 816 else if ((CUR >= 'a') && (CUR <= 'f')) 817 val = val * 16 + (CUR - 'a') + 10; 818 else if ((CUR >= 'A') && (CUR <= 'F')) 819 val = val * 16 + (CUR - 'A') + 10; 820 else { 821 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 823 ctxt->sax->error(ctxt->userData, 824 "xmlParseCharRef: invalid hexadecimal value\n"); 825 ctxt->wellFormed = 0; 826 val = 0; 827 break; 828 } 829 NEXT; 830 } 831 if (CUR == ';') 832 SKIP(1); /* on purpose to avoid reentrancy problems with NEXT */ 833 } else if ((CUR == '&') && (NXT(1) == '#')) { 834 SKIP(2); 835 while (CUR != ';') { 836 if ((CUR >= '0') && (CUR <= '9')) 837 val = val * 10 + (CUR - '0'); 838 else { 839 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 840 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 841 ctxt->sax->error(ctxt->userData, 842 "xmlParseCharRef: invalid decimal value\n"); 843 ctxt->wellFormed = 0; 844 val = 0; 845 break; 846 } 847 NEXT; 848 } 849 if (CUR == ';') 850 SKIP(1); /* on purpose to avoid reentrancy problems with NEXT */ 851 } else { 852 ctxt->errNo = XML_ERR_INVALID_CHARREF; 853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 854 ctxt->sax->error(ctxt->userData, 855 "xmlParseCharRef: invalid value\n"); 856 ctxt->wellFormed = 0; 857 } 858 859 /* 860 * [ WFC: Legal Character ] 861 * Characters referred to using character references must match the 862 * production for Char. 863 */ 864 if (IS_CHAR(val)) { 865 return(val); 866 } else { 867 ctxt->errNo = XML_ERR_INVALID_CHAR; 868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 869 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n", 870 val); 871 ctxt->wellFormed = 0; 872 } 873 return(0); 874} 875 876/** 877 * xmlParseStringCharRef: 878 * @ctxt: an XML parser context 879 * @str: a pointer to an index in the string 880 * 881 * parse Reference declarations, variant parsing from a string rather 882 * than an an input flow. 883 * 884 * [66] CharRef ::= '&#' [0-9]+ ';' | 885 * '&#x' [0-9a-fA-F]+ ';' 886 * 887 * [ WFC: Legal Character ] 888 * Characters referred to using character references must match the 889 * production for Char. 890 * 891 * Returns the value parsed (as an int), 0 in case of error, str will be 892 * updated to the current value of the index 893 */ 894int 895xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 896 const xmlChar *ptr; 897 xmlChar cur; 898 int val = 0; 899 900 if ((str == NULL) || (*str == NULL)) return(0); 901 ptr = *str; 902 cur = *ptr; 903 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 904 ptr += 3; 905 cur = *ptr; 906 while (cur != ';') { 907 if ((cur >= '0') && (cur <= '9')) 908 val = val * 16 + (cur - '0'); 909 else if ((cur >= 'a') && (cur <= 'f')) 910 val = val * 16 + (cur - 'a') + 10; 911 else if ((cur >= 'A') && (cur <= 'F')) 912 val = val * 16 + (cur - 'A') + 10; 913 else { 914 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; 915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 916 ctxt->sax->error(ctxt->userData, 917 "xmlParseCharRef: invalid hexadecimal value\n"); 918 ctxt->wellFormed = 0; 919 val = 0; 920 break; 921 } 922 ptr++; 923 cur = *ptr; 924 } 925 if (cur == ';') 926 ptr++; 927 } else if ((cur == '&') && (ptr[1] == '#')){ 928 ptr += 2; 929 cur = *ptr; 930 while (cur != ';') { 931 if ((cur >= '0') && (cur <= '9')) 932 val = val * 10 + (cur - '0'); 933 else { 934 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; 935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 936 ctxt->sax->error(ctxt->userData, 937 "xmlParseCharRef: invalid decimal value\n"); 938 ctxt->wellFormed = 0; 939 val = 0; 940 break; 941 } 942 ptr++; 943 cur = *ptr; 944 } 945 if (cur == ';') 946 ptr++; 947 } else { 948 ctxt->errNo = XML_ERR_INVALID_CHARREF; 949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 950 ctxt->sax->error(ctxt->userData, 951 "xmlParseCharRef: invalid value\n"); 952 ctxt->wellFormed = 0; 953 return(0); 954 } 955 *str = ptr; 956 957 /* 958 * [ WFC: Legal Character ] 959 * Characters referred to using character references must match the 960 * production for Char. 961 */ 962 if (IS_CHAR(val)) { 963 return(val); 964 } else { 965 ctxt->errNo = XML_ERR_INVALID_CHAR; 966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 967 ctxt->sax->error(ctxt->userData, 968 "CharRef: invalid xmlChar value %d\n", val); 969 ctxt->wellFormed = 0; 970 } 971 return(0); 972} 973 974/** 975 * xmlParserHandleReference: 976 * @ctxt: the parser context 977 * 978 * [67] Reference ::= EntityRef | CharRef 979 * 980 * [68] EntityRef ::= '&' Name ';' 981 * 982 * [ WFC: Entity Declared ] 983 * the Name given in the entity reference must match that in an entity 984 * declaration, except that well-formed documents need not declare any 985 * of the following entities: amp, lt, gt, apos, quot. 986 * 987 * [ WFC: Parsed Entity ] 988 * An entity reference must not contain the name of an unparsed entity 989 * 990 * [66] CharRef ::= '&#' [0-9]+ ';' | 991 * '&#x' [0-9a-fA-F]+ ';' 992 * 993 * A PEReference may have been detectect in the current input stream 994 * the handling is done accordingly to 995 * http://www.w3.org/TR/REC-xml#entproc 996 */ 997void 998xmlParserHandleReference(xmlParserCtxtPtr ctxt) { 999 xmlParserInputPtr input; 1000 xmlChar *name; 1001 xmlEntityPtr ent = NULL; 1002 1003 if (ctxt->token != 0) { 1004 return; 1005 } 1006 if (CUR != '&') return; 1007 GROW; 1008 if ((CUR == '&') && (NXT(1) == '#')) { 1009 switch(ctxt->instate) { 1010 case XML_PARSER_ENTITY_DECL: 1011 case XML_PARSER_PI: 1012 case XML_PARSER_CDATA_SECTION: 1013 case XML_PARSER_COMMENT: 1014 /* we just ignore it there */ 1015 return; 1016 case XML_PARSER_START_TAG: 1017 return; 1018 case XML_PARSER_END_TAG: 1019 return; 1020 case XML_PARSER_EOF: 1021 ctxt->errNo = XML_ERR_CHARREF_AT_EOF; 1022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1023 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n"); 1024 ctxt->wellFormed = 0; 1025 return; 1026 case XML_PARSER_PROLOG: 1027 case XML_PARSER_START: 1028 case XML_PARSER_MISC: 1029 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG; 1030 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1031 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n"); 1032 ctxt->wellFormed = 0; 1033 return; 1034 case XML_PARSER_EPILOG: 1035 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG; 1036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1037 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n"); 1038 ctxt->wellFormed = 0; 1039 return; 1040 case XML_PARSER_DTD: 1041 ctxt->errNo = XML_ERR_CHARREF_IN_DTD; 1042 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1043 ctxt->sax->error(ctxt->userData, 1044 "CharRef are forbiden in DTDs!\n"); 1045 ctxt->wellFormed = 0; 1046 return; 1047 case XML_PARSER_ENTITY_VALUE: 1048 /* 1049 * NOTE: in the case of entity values, we don't do the 1050 * substitution here since we need the literal 1051 * entity value to be able to save the internal 1052 * subset of the document. 1053 * This will be handled by xmlDecodeEntities 1054 */ 1055 return; 1056 case XML_PARSER_CONTENT: 1057 case XML_PARSER_ATTRIBUTE_VALUE: 1058 /* !!! this may not be Ok for UTF-8, multibyte sequence */ 1059 ctxt->token = xmlParseCharRef(ctxt); 1060 return; 1061 } 1062 return; 1063 } 1064 1065 switch(ctxt->instate) { 1066 case XML_PARSER_CDATA_SECTION: 1067 return; 1068 case XML_PARSER_PI: 1069 case XML_PARSER_COMMENT: 1070 return; 1071 case XML_PARSER_START_TAG: 1072 return; 1073 case XML_PARSER_END_TAG: 1074 return; 1075 case XML_PARSER_EOF: 1076 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF; 1077 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1078 ctxt->sax->error(ctxt->userData, "Reference at EOF\n"); 1079 ctxt->wellFormed = 0; 1080 return; 1081 case XML_PARSER_PROLOG: 1082 case XML_PARSER_START: 1083 case XML_PARSER_MISC: 1084 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG; 1085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1086 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n"); 1087 ctxt->wellFormed = 0; 1088 return; 1089 case XML_PARSER_EPILOG: 1090 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG; 1091 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1092 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n"); 1093 ctxt->wellFormed = 0; 1094 return; 1095 case XML_PARSER_ENTITY_VALUE: 1096 /* 1097 * NOTE: in the case of entity values, we don't do the 1098 * substitution here since we need the literal 1099 * entity value to be able to save the internal 1100 * subset of the document. 1101 * This will be handled by xmlDecodeEntities 1102 */ 1103 return; 1104 case XML_PARSER_ATTRIBUTE_VALUE: 1105 /* 1106 * NOTE: in the case of attributes values, we don't do the 1107 * substitution here unless we are in a mode where 1108 * the parser is explicitely asked to substitute 1109 * entities. The SAX callback is called with values 1110 * without entity substitution. 1111 * This will then be handled by xmlDecodeEntities 1112 */ 1113 return; 1114 case XML_PARSER_ENTITY_DECL: 1115 /* 1116 * we just ignore it there 1117 * the substitution will be done once the entity is referenced 1118 */ 1119 return; 1120 case XML_PARSER_DTD: 1121 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD; 1122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1123 ctxt->sax->error(ctxt->userData, 1124 "Entity references are forbiden in DTDs!\n"); 1125 ctxt->wellFormed = 0; 1126 return; 1127 case XML_PARSER_CONTENT: 1128 return; 1129 } 1130 1131 NEXT; 1132 name = xmlScanName(ctxt); 1133 if (name == NULL) { 1134 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME; 1135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1136 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n"); 1137 ctxt->wellFormed = 0; 1138 ctxt->token = '&'; 1139 return; 1140 } 1141 if (NXT(xmlStrlen(name)) != ';') { 1142 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 1143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1144 ctxt->sax->error(ctxt->userData, 1145 "Entity reference: ';' expected\n"); 1146 ctxt->wellFormed = 0; 1147 ctxt->token = '&'; 1148 xmlFree(name); 1149 return; 1150 } 1151 SKIP(xmlStrlen(name) + 1); 1152 if (ctxt->sax != NULL) { 1153 if (ctxt->sax->getEntity != NULL) 1154 ent = ctxt->sax->getEntity(ctxt->userData, name); 1155 } 1156 1157 /* 1158 * [ WFC: Entity Declared ] 1159 * the Name given in the entity reference must match that in an entity 1160 * declaration, except that well-formed documents need not declare any 1161 * of the following entities: amp, lt, gt, apos, quot. 1162 */ 1163 if (ent == NULL) 1164 ent = xmlGetPredefinedEntity(name); 1165 if (ent == NULL) { 1166 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 1167 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1168 ctxt->sax->error(ctxt->userData, 1169 "Entity reference: entity %s not declared\n", 1170 name); 1171 ctxt->wellFormed = 0; 1172 xmlFree(name); 1173 return; 1174 } 1175 1176 /* 1177 * [ WFC: Parsed Entity ] 1178 * An entity reference must not contain the name of an unparsed entity 1179 */ 1180 if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 1181 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 1182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1183 ctxt->sax->error(ctxt->userData, 1184 "Entity reference to unparsed entity %s\n", name); 1185 ctxt->wellFormed = 0; 1186 } 1187 1188 if (ent->type == XML_INTERNAL_PREDEFINED_ENTITY) { 1189 ctxt->token = ent->content[0]; 1190 xmlFree(name); 1191 return; 1192 } 1193 input = xmlNewEntityInputStream(ctxt, ent); 1194 xmlPushInput(ctxt, input); 1195 xmlFree(name); 1196 return; 1197} 1198 1199/** 1200 * xmlParserHandlePEReference: 1201 * @ctxt: the parser context 1202 * 1203 * [69] PEReference ::= '%' Name ';' 1204 * 1205 * [ WFC: No Recursion ] 1206 * TODO A parsed entity must not contain a recursive 1207 * reference to itself, either directly or indirectly. 1208 * 1209 * [ WFC: Entity Declared ] 1210 * In a document without any DTD, a document with only an internal DTD 1211 * subset which contains no parameter entity references, or a document 1212 * with "standalone='yes'", ... ... The declaration of a parameter 1213 * entity must precede any reference to it... 1214 * 1215 * [ VC: Entity Declared ] 1216 * In a document with an external subset or external parameter entities 1217 * with "standalone='no'", ... ... The declaration of a parameter entity 1218 * must precede any reference to it... 1219 * 1220 * [ WFC: In DTD ] 1221 * Parameter-entity references may only appear in the DTD. 1222 * NOTE: misleading but this is handled. 1223 * 1224 * A PEReference may have been detected in the current input stream 1225 * the handling is done accordingly to 1226 * http://www.w3.org/TR/REC-xml#entproc 1227 * i.e. 1228 * - Included in literal in entity values 1229 * - Included as Paraemeter Entity reference within DTDs 1230 */ 1231void 1232xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 1233 xmlChar *name; 1234 xmlEntityPtr entity = NULL; 1235 xmlParserInputPtr input; 1236 1237 if (ctxt->token != 0) { 1238 return; 1239 } 1240 if (CUR != '%') return; 1241 switch(ctxt->instate) { 1242 case XML_PARSER_CDATA_SECTION: 1243 return; 1244 case XML_PARSER_COMMENT: 1245 return; 1246 case XML_PARSER_START_TAG: 1247 return; 1248 case XML_PARSER_END_TAG: 1249 return; 1250 case XML_PARSER_EOF: 1251 ctxt->errNo = XML_ERR_PEREF_AT_EOF; 1252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1253 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n"); 1254 ctxt->wellFormed = 0; 1255 return; 1256 case XML_PARSER_PROLOG: 1257 case XML_PARSER_START: 1258 case XML_PARSER_MISC: 1259 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG; 1260 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1261 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n"); 1262 ctxt->wellFormed = 0; 1263 return; 1264 case XML_PARSER_ENTITY_DECL: 1265 case XML_PARSER_CONTENT: 1266 case XML_PARSER_ATTRIBUTE_VALUE: 1267 case XML_PARSER_PI: 1268 /* we just ignore it there */ 1269 return; 1270 case XML_PARSER_EPILOG: 1271 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG; 1272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1273 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n"); 1274 ctxt->wellFormed = 0; 1275 return; 1276 case XML_PARSER_ENTITY_VALUE: 1277 /* 1278 * NOTE: in the case of entity values, we don't do the 1279 * substitution here since we need the literal 1280 * entity value to be able to save the internal 1281 * subset of the document. 1282 * This will be handled by xmlDecodeEntities 1283 */ 1284 return; 1285 case XML_PARSER_DTD: 1286 /* 1287 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 1288 * In the internal DTD subset, parameter-entity references 1289 * can occur only where markup declarations can occur, not 1290 * within markup declarations. 1291 * In that case this is handled in xmlParseMarkupDecl 1292 */ 1293 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 1294 return; 1295 } 1296 1297 NEXT; 1298 name = xmlParseName(ctxt); 1299 if (name == NULL) { 1300 ctxt->errNo = XML_ERR_PEREF_NO_NAME; 1301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1302 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n"); 1303 ctxt->wellFormed = 0; 1304 } else { 1305 if (CUR == ';') { 1306 NEXT; 1307 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 1308 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 1309 if (entity == NULL) { 1310 1311 /* 1312 * [ WFC: Entity Declared ] 1313 * In a document without any DTD, a document with only an 1314 * internal DTD subset which contains no parameter entity 1315 * references, or a document with "standalone='yes'", ... 1316 * ... The declaration of a parameter entity must precede 1317 * any reference to it... 1318 */ 1319 if ((ctxt->standalone == 1) || 1320 ((ctxt->hasExternalSubset == 0) && 1321 (ctxt->hasPErefs == 0))) { 1322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1323 ctxt->sax->error(ctxt->userData, 1324 "PEReference: %%%s; not found\n", name); 1325 ctxt->wellFormed = 0; 1326 } else { 1327 /* 1328 * [ VC: Entity Declared ] 1329 * In a document with an external subset or external 1330 * parameter entities with "standalone='no'", ... 1331 * ... The declaration of a parameter entity must precede 1332 * any reference to it... 1333 */ 1334 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 1335 ctxt->sax->warning(ctxt->userData, 1336 "PEReference: %%%s; not found\n", name); 1337 ctxt->valid = 0; 1338 } 1339 } else { 1340 if ((entity->type == XML_INTERNAL_PARAMETER_ENTITY) || 1341 (entity->type == XML_EXTERNAL_PARAMETER_ENTITY)) { 1342 /* 1343 * TODO !!!! handle the extra spaces added before and after 1344 * c.f. http://www.w3.org/TR/REC-xml#as-PE 1345 * TODO !!!! Avoid quote processing in parameters value 1346 * c.f. http://www.w3.org/TR/REC-xml#inliteral 1347 */ 1348 input = xmlNewEntityInputStream(ctxt, entity); 1349 xmlPushInput(ctxt, input); 1350 } else { 1351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1352 ctxt->sax->error(ctxt->userData, 1353 "xmlHandlePEReference: %s is not a parameter entity\n", 1354 name); 1355 ctxt->wellFormed = 0; 1356 } 1357 } 1358 } else { 1359 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING; 1360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1361 ctxt->sax->error(ctxt->userData, 1362 "xmlHandlePEReference: expecting ';'\n"); 1363 ctxt->wellFormed = 0; 1364 } 1365 xmlFree(name); 1366 } 1367} 1368 1369/* 1370 * Macro used to grow the current buffer. 1371 */ 1372#define growBuffer(buffer) { \ 1373 buffer##_size *= 2; \ 1374 buffer = (xmlChar *) \ 1375 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 1376 if (buffer == NULL) { \ 1377 perror("realloc failed"); \ 1378 return(NULL); \ 1379 } \ 1380} 1381 1382/** 1383 * xmlDecodeEntities: 1384 * @ctxt: the parser context 1385 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 1386 * @len: the len to decode (in bytes !), -1 for no size limit 1387 * @end: an end marker xmlChar, 0 if none 1388 * @end2: an end marker xmlChar, 0 if none 1389 * @end3: an end marker xmlChar, 0 if none 1390 * 1391 * [67] Reference ::= EntityRef | CharRef 1392 * 1393 * [69] PEReference ::= '%' Name ';' 1394 * 1395 * Returns A newly allocated string with the substitution done. The caller 1396 * must deallocate it ! 1397 */ 1398xmlChar * 1399xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what, 1400 xmlChar end, xmlChar end2, xmlChar end3) { 1401 xmlChar *buffer = NULL; 1402 int buffer_size = 0; 1403 xmlChar *out = NULL; 1404 1405 xmlChar *current = NULL; 1406 xmlEntityPtr ent; 1407 int nbchars = 0; 1408 unsigned int max = (unsigned int) len; 1409 xmlChar cur; 1410 1411 /* 1412 * allocate a translation buffer. 1413 */ 1414 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 1415 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); 1416 if (buffer == NULL) { 1417 perror("xmlDecodeEntities: malloc failed"); 1418 return(NULL); 1419 } 1420 out = buffer; 1421 1422 /* 1423 * Ok loop until we reach one of the ending char or a size limit. 1424 */ 1425 cur = CUR; 1426 while ((nbchars < max) && (cur != end) && 1427 (cur != end2) && (cur != end3)) { 1428 1429 if (cur == 0) break; 1430 if ((cur == '&') && (NXT(1) == '#')) { 1431 int val = xmlParseCharRef(ctxt); 1432 *out++ = val; 1433 nbchars += 3; 1434 } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) { 1435 ent = xmlParseEntityRef(ctxt); 1436 if ((ent != NULL) && 1437 (ctxt->replaceEntities != 0)) { 1438 current = ent->content; 1439 while (*current != 0) { 1440 *out++ = *current++; 1441 if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) { 1442 int index = out - buffer; 1443 1444 growBuffer(buffer); 1445 out = &buffer[index]; 1446 } 1447 } 1448 nbchars += 3 + xmlStrlen(ent->name); 1449 } else if (ent != NULL) { 1450 int i = xmlStrlen(ent->name); 1451 const xmlChar *cur = ent->name; 1452 1453 nbchars += i + 2; 1454 *out++ = '&'; 1455 if (out - buffer > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 1456 int index = out - buffer; 1457 1458 growBuffer(buffer); 1459 out = &buffer[index]; 1460 } 1461 for (;i > 0;i--) 1462 *out++ = *cur++; 1463 *out++ = ';'; 1464 } 1465 } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) { 1466 /* 1467 * a PEReference induce to switch the entity flow, 1468 * we break here to flush the current set of chars 1469 * parsed if any. We will be called back later. 1470 */ 1471 if (nbchars != 0) break; 1472 1473 xmlParsePEReference(ctxt); 1474 1475 /* 1476 * Pop-up of finished entities. 1477 */ 1478 while ((CUR == 0) && (ctxt->inputNr > 1)) 1479 xmlPopInput(ctxt); 1480 1481 break; 1482 } else { 1483 /* invalid for UTF-8 , use COPY(out); !!!!!! */ 1484 *out++ = cur; 1485 nbchars++; 1486 if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) { 1487 int index = out - buffer; 1488 1489 growBuffer(buffer); 1490 out = &buffer[index]; 1491 } 1492 NEXT; 1493 } 1494 cur = CUR; 1495 } 1496 *out++ = 0; 1497 return(buffer); 1498} 1499 1500/** 1501 * xmlStringDecodeEntities: 1502 * @ctxt: the parser context 1503 * @str: the input string 1504 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 1505 * @end: an end marker xmlChar, 0 if none 1506 * @end2: an end marker xmlChar, 0 if none 1507 * @end3: an end marker xmlChar, 0 if none 1508 * 1509 * [67] Reference ::= EntityRef | CharRef 1510 * 1511 * [69] PEReference ::= '%' Name ';' 1512 * 1513 * Returns A newly allocated string with the substitution done. The caller 1514 * must deallocate it ! 1515 */ 1516xmlChar * 1517xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 1518 xmlChar end, xmlChar end2, xmlChar end3) { 1519 xmlChar *buffer = NULL; 1520 int buffer_size = 0; 1521 xmlChar *out = NULL; 1522 1523 xmlChar *current = NULL; 1524 xmlEntityPtr ent; 1525 xmlChar cur; 1526 1527 /* 1528 * allocate a translation buffer. 1529 */ 1530 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 1531 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); 1532 if (buffer == NULL) { 1533 perror("xmlDecodeEntities: malloc failed"); 1534 return(NULL); 1535 } 1536 out = buffer; 1537 1538 /* 1539 * Ok loop until we reach one of the ending char or a size limit. 1540 */ 1541 cur = *str; 1542 while ((cur != 0) && (cur != end) && 1543 (cur != end2) && (cur != end3)) { 1544 1545 if (cur == 0) break; 1546 if ((cur == '&') && (str[1] == '#')) { 1547 int val = xmlParseStringCharRef(ctxt, &str); 1548 if (val != 0) 1549 *out++ = val; 1550 } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) { 1551 ent = xmlParseStringEntityRef(ctxt, &str); 1552 if ((ent != NULL) && 1553 (ctxt->replaceEntities != 0)) { 1554 current = ent->content; 1555 while (*current != 0) { 1556 *out++ = *current++; 1557 if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) { 1558 int index = out - buffer; 1559 1560 growBuffer(buffer); 1561 out = &buffer[index]; 1562 } 1563 } 1564 } else if (ent != NULL) { 1565 int i = xmlStrlen(ent->name); 1566 const xmlChar *cur = ent->name; 1567 1568 *out++ = '&'; 1569 if (out - buffer > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 1570 int index = out - buffer; 1571 1572 growBuffer(buffer); 1573 out = &buffer[index]; 1574 } 1575 for (;i > 0;i--) 1576 *out++ = *cur++; 1577 *out++ = ';'; 1578 } 1579 } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) { 1580 ent = xmlParseStringPEReference(ctxt, &str); 1581 if (ent != NULL) { 1582 current = ent->content; 1583 while (*current != 0) { 1584 *out++ = *current++; 1585 if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) { 1586 int index = out - buffer; 1587 1588 growBuffer(buffer); 1589 out = &buffer[index]; 1590 } 1591 } 1592 } 1593 } else { 1594 /* invalid for UTF-8 , use COPY(out); !!!!!! */ 1595 *out++ = cur; 1596 if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) { 1597 int index = out - buffer; 1598 1599 growBuffer(buffer); 1600 out = &buffer[index]; 1601 } 1602 str++; 1603 } 1604 cur = *str; 1605 } 1606 *out = 0; 1607 return(buffer); 1608} 1609 1610 1611/************************************************************************ 1612 * * 1613 * Commodity functions to handle encodings * 1614 * * 1615 ************************************************************************/ 1616 1617/** 1618 * xmlSwitchEncoding: 1619 * @ctxt: the parser context 1620 * @enc: the encoding value (number) 1621 * 1622 * change the input functions when discovering the character encoding 1623 * of a given entity. 1624 */ 1625void 1626xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 1627{ 1628 switch (enc) { 1629 case XML_CHAR_ENCODING_ERROR: 1630 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING; 1631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1632 ctxt->sax->error(ctxt->userData, "encoding unknown\n"); 1633 ctxt->wellFormed = 0; 1634 break; 1635 case XML_CHAR_ENCODING_NONE: 1636 /* let's assume it's UTF-8 without the XML decl */ 1637 return; 1638 case XML_CHAR_ENCODING_UTF8: 1639 /* default encoding, no conversion should be needed */ 1640 return; 1641 case XML_CHAR_ENCODING_UTF16LE: 1642 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1644 ctxt->sax->error(ctxt->userData, 1645 "char encoding UTF16 little endian not supported\n"); 1646 break; 1647 case XML_CHAR_ENCODING_UTF16BE: 1648 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1650 ctxt->sax->error(ctxt->userData, 1651 "char encoding UTF16 big endian not supported\n"); 1652 break; 1653 case XML_CHAR_ENCODING_UCS4LE: 1654 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1656 ctxt->sax->error(ctxt->userData, 1657 "char encoding USC4 little endian not supported\n"); 1658 break; 1659 case XML_CHAR_ENCODING_UCS4BE: 1660 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1662 ctxt->sax->error(ctxt->userData, 1663 "char encoding USC4 big endian not supported\n"); 1664 break; 1665 case XML_CHAR_ENCODING_EBCDIC: 1666 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1668 ctxt->sax->error(ctxt->userData, 1669 "char encoding EBCDIC not supported\n"); 1670 break; 1671 case XML_CHAR_ENCODING_UCS4_2143: 1672 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1674 ctxt->sax->error(ctxt->userData, 1675 "char encoding UCS4 2143 not supported\n"); 1676 break; 1677 case XML_CHAR_ENCODING_UCS4_3412: 1678 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1680 ctxt->sax->error(ctxt->userData, 1681 "char encoding UCS4 3412 not supported\n"); 1682 break; 1683 case XML_CHAR_ENCODING_UCS2: 1684 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1686 ctxt->sax->error(ctxt->userData, 1687 "char encoding UCS2 not supported\n"); 1688 break; 1689 case XML_CHAR_ENCODING_8859_1: 1690 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1692 ctxt->sax->error(ctxt->userData, 1693 "char encoding ISO_8859_1 ISO Latin 1 not supported\n"); 1694 break; 1695 case XML_CHAR_ENCODING_8859_2: 1696 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1698 ctxt->sax->error(ctxt->userData, 1699 "char encoding ISO_8859_2 ISO Latin 2 not supported\n"); 1700 break; 1701 case XML_CHAR_ENCODING_8859_3: 1702 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1704 ctxt->sax->error(ctxt->userData, 1705 "char encoding ISO_8859_3 not supported\n"); 1706 break; 1707 case XML_CHAR_ENCODING_8859_4: 1708 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1710 ctxt->sax->error(ctxt->userData, 1711 "char encoding ISO_8859_4 not supported\n"); 1712 break; 1713 case XML_CHAR_ENCODING_8859_5: 1714 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1716 ctxt->sax->error(ctxt->userData, 1717 "char encoding ISO_8859_5 not supported\n"); 1718 break; 1719 case XML_CHAR_ENCODING_8859_6: 1720 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1722 ctxt->sax->error(ctxt->userData, 1723 "char encoding ISO_8859_6 not supported\n"); 1724 break; 1725 case XML_CHAR_ENCODING_8859_7: 1726 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1728 ctxt->sax->error(ctxt->userData, 1729 "char encoding ISO_8859_7 not supported\n"); 1730 break; 1731 case XML_CHAR_ENCODING_8859_8: 1732 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1734 ctxt->sax->error(ctxt->userData, 1735 "char encoding ISO_8859_8 not supported\n"); 1736 break; 1737 case XML_CHAR_ENCODING_8859_9: 1738 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1740 ctxt->sax->error(ctxt->userData, 1741 "char encoding ISO_8859_9 not supported\n"); 1742 break; 1743 case XML_CHAR_ENCODING_2022_JP: 1744 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1746 ctxt->sax->error(ctxt->userData, 1747 "char encoding ISO-2022-JPnot supported\n"); 1748 break; 1749 case XML_CHAR_ENCODING_SHIFT_JIS: 1750 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1751 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1752 ctxt->sax->error(ctxt->userData, 1753 "char encoding Shift_JISnot supported\n"); 1754 break; 1755 case XML_CHAR_ENCODING_EUC_JP: 1756 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 1757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 1758 ctxt->sax->error(ctxt->userData, 1759 "char encoding EUC-JPnot supported\n"); 1760 break; 1761 } 1762} 1763 1764/************************************************************************ 1765 * * 1766 * Commodity functions to handle xmlChars * 1767 * * 1768 ************************************************************************/ 1769 1770/** 1771 * xmlStrndup: 1772 * @cur: the input xmlChar * 1773 * @len: the len of @cur 1774 * 1775 * a strndup for array of xmlChar's 1776 * 1777 * Returns a new xmlChar * or NULL 1778 */ 1779xmlChar * 1780xmlStrndup(const xmlChar *cur, int len) { 1781 xmlChar *ret; 1782 1783 if ((cur == NULL) || (len < 0)) return(NULL); 1784 ret = xmlMalloc((len + 1) * sizeof(xmlChar)); 1785 if (ret == NULL) { 1786 fprintf(stderr, "malloc of %ld byte failed\n", 1787 (len + 1) * (long)sizeof(xmlChar)); 1788 return(NULL); 1789 } 1790 memcpy(ret, cur, len * sizeof(xmlChar)); 1791 ret[len] = 0; 1792 return(ret); 1793} 1794 1795/** 1796 * xmlStrdup: 1797 * @cur: the input xmlChar * 1798 * 1799 * a strdup for array of xmlChar's 1800 * 1801 * Returns a new xmlChar * or NULL 1802 */ 1803xmlChar * 1804xmlStrdup(const xmlChar *cur) { 1805 const xmlChar *p = cur; 1806 1807 if (cur == NULL) return(NULL); 1808 while (IS_CHAR(*p)) p++; 1809 return(xmlStrndup(cur, p - cur)); 1810} 1811 1812/** 1813 * xmlCharStrndup: 1814 * @cur: the input char * 1815 * @len: the len of @cur 1816 * 1817 * a strndup for char's to xmlChar's 1818 * 1819 * Returns a new xmlChar * or NULL 1820 */ 1821 1822xmlChar * 1823xmlCharStrndup(const char *cur, int len) { 1824 int i; 1825 xmlChar *ret; 1826 1827 if ((cur == NULL) || (len < 0)) return(NULL); 1828 ret = xmlMalloc((len + 1) * sizeof(xmlChar)); 1829 if (ret == NULL) { 1830 fprintf(stderr, "malloc of %ld byte failed\n", 1831 (len + 1) * (long)sizeof(xmlChar)); 1832 return(NULL); 1833 } 1834 for (i = 0;i < len;i++) 1835 ret[i] = (xmlChar) cur[i]; 1836 ret[len] = 0; 1837 return(ret); 1838} 1839 1840/** 1841 * xmlCharStrdup: 1842 * @cur: the input char * 1843 * @len: the len of @cur 1844 * 1845 * a strdup for char's to xmlChar's 1846 * 1847 * Returns a new xmlChar * or NULL 1848 */ 1849 1850xmlChar * 1851xmlCharStrdup(const char *cur) { 1852 const char *p = cur; 1853 1854 if (cur == NULL) return(NULL); 1855 while (*p != '\0') p++; 1856 return(xmlCharStrndup(cur, p - cur)); 1857} 1858 1859/** 1860 * xmlStrcmp: 1861 * @str1: the first xmlChar * 1862 * @str2: the second xmlChar * 1863 * 1864 * a strcmp for xmlChar's 1865 * 1866 * Returns the integer result of the comparison 1867 */ 1868 1869int 1870xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { 1871 register int tmp; 1872 1873 if ((str1 == NULL) && (str2 == NULL)) return(0); 1874 if (str1 == NULL) return(-1); 1875 if (str2 == NULL) return(1); 1876 do { 1877 tmp = *str1++ - *str2++; 1878 if (tmp != 0) return(tmp); 1879 } while ((*str1 != 0) && (*str2 != 0)); 1880 return (*str1 - *str2); 1881} 1882 1883/** 1884 * xmlStrncmp: 1885 * @str1: the first xmlChar * 1886 * @str2: the second xmlChar * 1887 * @len: the max comparison length 1888 * 1889 * a strncmp for xmlChar's 1890 * 1891 * Returns the integer result of the comparison 1892 */ 1893 1894int 1895xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { 1896 register int tmp; 1897 1898 if (len <= 0) return(0); 1899 if ((str1 == NULL) && (str2 == NULL)) return(0); 1900 if (str1 == NULL) return(-1); 1901 if (str2 == NULL) return(1); 1902 do { 1903 tmp = *str1++ - *str2++; 1904 if (tmp != 0) return(tmp); 1905 len--; 1906 if (len <= 0) return(0); 1907 } while ((*str1 != 0) && (*str2 != 0)); 1908 return (*str1 - *str2); 1909} 1910 1911/** 1912 * xmlStrchr: 1913 * @str: the xmlChar * array 1914 * @val: the xmlChar to search 1915 * 1916 * a strchr for xmlChar's 1917 * 1918 * Returns the xmlChar * for the first occurence or NULL. 1919 */ 1920 1921const xmlChar * 1922xmlStrchr(const xmlChar *str, xmlChar val) { 1923 if (str == NULL) return(NULL); 1924 while (*str != 0) { 1925 if (*str == val) return((xmlChar *) str); 1926 str++; 1927 } 1928 return(NULL); 1929} 1930 1931/** 1932 * xmlStrstr: 1933 * @str: the xmlChar * array (haystack) 1934 * @val: the xmlChar to search (needle) 1935 * 1936 * a strstr for xmlChar's 1937 * 1938 * Returns the xmlChar * for the first occurence or NULL. 1939 */ 1940 1941const xmlChar * 1942xmlStrstr(const xmlChar *str, xmlChar *val) { 1943 int n; 1944 1945 if (str == NULL) return(NULL); 1946 if (val == NULL) return(NULL); 1947 n = xmlStrlen(val); 1948 1949 if (n == 0) return(str); 1950 while (*str != 0) { 1951 if (*str == *val) { 1952 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); 1953 } 1954 str++; 1955 } 1956 return(NULL); 1957} 1958 1959/** 1960 * xmlStrsub: 1961 * @str: the xmlChar * array (haystack) 1962 * @start: the index of the first char (zero based) 1963 * @len: the length of the substring 1964 * 1965 * Extract a substring of a given string 1966 * 1967 * Returns the xmlChar * for the first occurence or NULL. 1968 */ 1969 1970xmlChar * 1971xmlStrsub(const xmlChar *str, int start, int len) { 1972 int i; 1973 1974 if (str == NULL) return(NULL); 1975 if (start < 0) return(NULL); 1976 if (len < 0) return(NULL); 1977 1978 for (i = 0;i < start;i++) { 1979 if (*str == 0) return(NULL); 1980 str++; 1981 } 1982 if (*str == 0) return(NULL); 1983 return(xmlStrndup(str, len)); 1984} 1985 1986/** 1987 * xmlStrlen: 1988 * @str: the xmlChar * array 1989 * 1990 * length of a xmlChar's string 1991 * 1992 * Returns the number of xmlChar contained in the ARRAY. 1993 */ 1994 1995int 1996xmlStrlen(const xmlChar *str) { 1997 int len = 0; 1998 1999 if (str == NULL) return(0); 2000 while (*str != 0) { 2001 str++; 2002 len++; 2003 } 2004 return(len); 2005} 2006 2007/** 2008 * xmlStrncat: 2009 * @cur: the original xmlChar * array 2010 * @add: the xmlChar * array added 2011 * @len: the length of @add 2012 * 2013 * a strncat for array of xmlChar's 2014 * 2015 * Returns a new xmlChar * containing the concatenated string. 2016 */ 2017 2018xmlChar * 2019xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { 2020 int size; 2021 xmlChar *ret; 2022 2023 if ((add == NULL) || (len == 0)) 2024 return(cur); 2025 if (cur == NULL) 2026 return(xmlStrndup(add, len)); 2027 2028 size = xmlStrlen(cur); 2029 ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); 2030 if (ret == NULL) { 2031 fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n", 2032 (size + len + 1) * (long)sizeof(xmlChar)); 2033 return(cur); 2034 } 2035 memcpy(&ret[size], add, len * sizeof(xmlChar)); 2036 ret[size + len] = 0; 2037 return(ret); 2038} 2039 2040/** 2041 * xmlStrcat: 2042 * @cur: the original xmlChar * array 2043 * @add: the xmlChar * array added 2044 * 2045 * a strcat for array of xmlChar's 2046 * 2047 * Returns a new xmlChar * containing the concatenated string. 2048 */ 2049xmlChar * 2050xmlStrcat(xmlChar *cur, const xmlChar *add) { 2051 const xmlChar *p = add; 2052 2053 if (add == NULL) return(cur); 2054 if (cur == NULL) 2055 return(xmlStrdup(add)); 2056 2057 while (IS_CHAR(*p)) p++; 2058 return(xmlStrncat(cur, add, p - add)); 2059} 2060 2061/************************************************************************ 2062 * * 2063 * Commodity functions, cleanup needed ? * 2064 * * 2065 ************************************************************************/ 2066 2067/** 2068 * areBlanks: 2069 * @ctxt: an XML parser context 2070 * @str: a xmlChar * 2071 * @len: the size of @str 2072 * 2073 * Is this a sequence of blank chars that one can ignore ? 2074 * 2075 * Returns 1 if ignorable 0 otherwise. 2076 */ 2077 2078static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { 2079 int i, ret; 2080 xmlNodePtr lastChild; 2081 2082 /* 2083 * Check that the string is made of blanks 2084 */ 2085 for (i = 0;i < len;i++) 2086 if (!(IS_BLANK(str[i]))) return(0); 2087 2088 /* 2089 * Look if the element is mixed content in the Dtd if available 2090 */ 2091 if (ctxt->myDoc != NULL) { 2092 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2093 if (ret == 0) return(1); 2094 if (ret == 1) return(0); 2095 } 2096 2097 /* 2098 * Do we allow an heuristic on white space 2099 */ 2100 if (ctxt->keepBlanks) 2101 return(0); 2102 if (CUR != '<') return(0); 2103 if (ctxt->node == NULL) return(0); 2104 if ((ctxt->node->childs == NULL) && 2105 (CUR == '<') && (NXT(1) == '/')) return(0); 2106 2107 lastChild = xmlGetLastChild(ctxt->node); 2108 if (lastChild == NULL) { 2109 if (ctxt->node->content != NULL) return(0); 2110 } else if (xmlNodeIsText(lastChild)) 2111 return(0); 2112 else if ((ctxt->node->childs != NULL) && 2113 (xmlNodeIsText(ctxt->node->childs))) 2114 return(0); 2115 return(1); 2116} 2117 2118/** 2119 * xmlHandleEntity: 2120 * @ctxt: an XML parser context 2121 * @entity: an XML entity pointer. 2122 * 2123 * Default handling of defined entities, when should we define a new input 2124 * stream ? When do we just handle that as a set of chars ? 2125 * 2126 * OBSOLETE: to be removed at some point. 2127 */ 2128 2129void 2130xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2131 int len; 2132 xmlParserInputPtr input; 2133 2134 if (entity->content == NULL) { 2135 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 2136 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2137 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n", 2138 entity->name); 2139 ctxt->wellFormed = 0; 2140 return; 2141 } 2142 len = xmlStrlen(entity->content); 2143 if (len <= 2) goto handle_as_char; 2144 2145 /* 2146 * Redefine its content as an input stream. 2147 */ 2148 input = xmlNewEntityInputStream(ctxt, entity); 2149 xmlPushInput(ctxt, input); 2150 return; 2151 2152handle_as_char: 2153 /* 2154 * Just handle the content as a set of chars. 2155 */ 2156 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) 2157 ctxt->sax->characters(ctxt->userData, entity->content, len); 2158 2159} 2160 2161/* 2162 * Forward definition for recusive behaviour. 2163 */ 2164void xmlParsePEReference(xmlParserCtxtPtr ctxt); 2165void xmlParseReference(xmlParserCtxtPtr ctxt); 2166 2167/************************************************************************ 2168 * * 2169 * Extra stuff for namespace support * 2170 * Relates to http://www.w3.org/TR/WD-xml-names * 2171 * * 2172 ************************************************************************/ 2173 2174/** 2175 * xmlNamespaceParseNCName: 2176 * @ctxt: an XML parser context 2177 * 2178 * parse an XML namespace name. 2179 * 2180 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)* 2181 * 2182 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 2183 * CombiningChar | Extender 2184 * 2185 * Returns the namespace name or NULL 2186 */ 2187 2188xmlChar * 2189xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) { 2190 xmlChar buf[XML_MAX_NAMELEN]; 2191 int len = 0; 2192 2193 if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL); 2194 2195 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || 2196 (CUR == '.') || (CUR == '-') || 2197 (CUR == '_') || 2198 (IS_COMBINING(CUR)) || 2199 (IS_EXTENDER(CUR))) { 2200 buf[len++] = CUR; 2201 NEXT; 2202 if (len >= XML_MAX_NAMELEN) { 2203 fprintf(stderr, 2204 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n"); 2205 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || 2206 (CUR == '.') || (CUR == '-') || 2207 (CUR == '_') || 2208 (IS_COMBINING(CUR)) || 2209 (IS_EXTENDER(CUR))) 2210 NEXT; 2211 break; 2212 } 2213 } 2214 return(xmlStrndup(buf, len)); 2215} 2216 2217/** 2218 * xmlNamespaceParseQName: 2219 * @ctxt: an XML parser context 2220 * @prefix: a xmlChar ** 2221 * 2222 * parse an XML qualified name 2223 * 2224 * [NS 5] QName ::= (Prefix ':')? LocalPart 2225 * 2226 * [NS 6] Prefix ::= NCName 2227 * 2228 * [NS 7] LocalPart ::= NCName 2229 * 2230 * Returns the local part, and prefix is updated 2231 * to get the Prefix if any. 2232 */ 2233 2234xmlChar * 2235xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) { 2236 xmlChar *ret = NULL; 2237 2238 *prefix = NULL; 2239 ret = xmlNamespaceParseNCName(ctxt); 2240 if (CUR == ':') { 2241 *prefix = ret; 2242 NEXT; 2243 ret = xmlNamespaceParseNCName(ctxt); 2244 } 2245 2246 return(ret); 2247} 2248 2249/** 2250 * xmlSplitQName: 2251 * @name: an XML parser context 2252 * @prefix: a xmlChar ** 2253 * 2254 * parse an XML qualified name string 2255 * 2256 * [NS 5] QName ::= (Prefix ':')? LocalPart 2257 * 2258 * [NS 6] Prefix ::= NCName 2259 * 2260 * [NS 7] LocalPart ::= NCName 2261 * 2262 * Returns the local part, and prefix is updated 2263 * to get the Prefix if any. 2264 */ 2265 2266xmlChar * 2267xmlSplitQName(const xmlChar *name, xmlChar **prefix) { 2268 xmlChar *ret = NULL; 2269 const xmlChar *q; 2270 const xmlChar *cur = name; 2271 2272 *prefix = NULL; 2273 2274 /* xml: prefix is not really a namespace */ 2275 if ((cur[0] == 'x') && (cur[1] == 'm') && 2276 (cur[2] == 'l') && (cur[3] == ':')) 2277 return(xmlStrdup(name)); 2278 2279 if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL); 2280 q = cur++; 2281 2282 while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || 2283 (*cur == '.') || (*cur == '-') || 2284 (*cur == '_') || 2285 (IS_COMBINING(*cur)) || 2286 (IS_EXTENDER(*cur))) 2287 cur++; 2288 2289 ret = xmlStrndup(q, cur - q); 2290 2291 if (*cur == ':') { 2292 cur++; 2293 if (!IS_LETTER(*cur) && (*cur != '_')) return(ret); 2294 *prefix = ret; 2295 2296 q = cur++; 2297 2298 while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || 2299 (*cur == '.') || (*cur == '-') || 2300 (*cur == '_') || 2301 (IS_COMBINING(*cur)) || 2302 (IS_EXTENDER(*cur))) 2303 cur++; 2304 2305 ret = xmlStrndup(q, cur - q); 2306 } 2307 2308 return(ret); 2309} 2310/** 2311 * xmlNamespaceParseNSDef: 2312 * @ctxt: an XML parser context 2313 * 2314 * parse a namespace prefix declaration 2315 * 2316 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral 2317 * 2318 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)? 2319 * 2320 * Returns the namespace name 2321 */ 2322 2323xmlChar * 2324xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) { 2325 xmlChar *name = NULL; 2326 2327 if ((CUR == 'x') && (NXT(1) == 'm') && 2328 (NXT(2) == 'l') && (NXT(3) == 'n') && 2329 (NXT(4) == 's')) { 2330 SKIP(5); 2331 if (CUR == ':') { 2332 NEXT; 2333 name = xmlNamespaceParseNCName(ctxt); 2334 } 2335 } 2336 return(name); 2337} 2338 2339/** 2340 * xmlParseQuotedString: 2341 * @ctxt: an XML parser context 2342 * 2343 * [OLD] Parse and return a string between quotes or doublequotes 2344 * To be removed at next drop of binary compatibility 2345 * 2346 * Returns the string parser or NULL. 2347 */ 2348xmlChar * 2349xmlParseQuotedString(xmlParserCtxtPtr ctxt) { 2350 xmlChar *buf = NULL; 2351 int len = 0; 2352 int size = XML_PARSER_BUFFER_SIZE; 2353 xmlChar c; 2354 2355 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2356 if (buf == NULL) { 2357 fprintf(stderr, "malloc of %d byte failed\n", size); 2358 return(NULL); 2359 } 2360 if (CUR == '"') { 2361 NEXT; 2362 c = CUR; 2363 while (IS_CHAR(c) && (c != '"')) { 2364 if (len + 1 >= size) { 2365 size *= 2; 2366 buf = xmlRealloc(buf, size * sizeof(xmlChar)); 2367 if (buf == NULL) { 2368 fprintf(stderr, "realloc of %d byte failed\n", size); 2369 return(NULL); 2370 } 2371 } 2372 buf[len++] = c; 2373 NEXT; 2374 c = CUR; 2375 } 2376 if (c != '"') { 2377 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 2378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2379 ctxt->sax->error(ctxt->userData, 2380 "String not closed \"%.50s\"\n", buf); 2381 ctxt->wellFormed = 0; 2382 } else { 2383 NEXT; 2384 } 2385 } else if (CUR == '\''){ 2386 NEXT; 2387 c = CUR; 2388 while (IS_CHAR(c) && (c != '\'')) { 2389 if (len + 1 >= size) { 2390 size *= 2; 2391 buf = xmlRealloc(buf, size * sizeof(xmlChar)); 2392 if (buf == NULL) { 2393 fprintf(stderr, "realloc of %d byte failed\n", size); 2394 return(NULL); 2395 } 2396 } 2397 buf[len++] = c; 2398 NEXT; 2399 c = CUR; 2400 } 2401 if (CUR != '\'') { 2402 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 2403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2404 ctxt->sax->error(ctxt->userData, 2405 "String not closed \"%.50s\"\n", buf); 2406 ctxt->wellFormed = 0; 2407 } else { 2408 NEXT; 2409 } 2410 } 2411 return(buf); 2412} 2413 2414/** 2415 * xmlParseNamespace: 2416 * @ctxt: an XML parser context 2417 * 2418 * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs. 2419 * 2420 * This is what the older xml-name Working Draft specified, a bunch of 2421 * other stuff may still rely on it, so support is still here as 2422 * if it was declared on the root of the Tree:-( 2423 * 2424 * To be removed at next drop of binary compatibility 2425 */ 2426 2427void 2428xmlParseNamespace(xmlParserCtxtPtr ctxt) { 2429 xmlChar *href = NULL; 2430 xmlChar *prefix = NULL; 2431 int garbage = 0; 2432 2433 /* 2434 * We just skipped "namespace" or "xml:namespace" 2435 */ 2436 SKIP_BLANKS; 2437 2438 while (IS_CHAR(CUR) && (CUR != '>')) { 2439 /* 2440 * We can have "ns" or "prefix" attributes 2441 * Old encoding as 'href' or 'AS' attributes is still supported 2442 */ 2443 if ((CUR == 'n') && (NXT(1) == 's')) { 2444 garbage = 0; 2445 SKIP(2); 2446 SKIP_BLANKS; 2447 2448 if (CUR != '=') continue; 2449 NEXT; 2450 SKIP_BLANKS; 2451 2452 href = xmlParseQuotedString(ctxt); 2453 SKIP_BLANKS; 2454 } else if ((CUR == 'h') && (NXT(1) == 'r') && 2455 (NXT(2) == 'e') && (NXT(3) == 'f')) { 2456 garbage = 0; 2457 SKIP(4); 2458 SKIP_BLANKS; 2459 2460 if (CUR != '=') continue; 2461 NEXT; 2462 SKIP_BLANKS; 2463 2464 href = xmlParseQuotedString(ctxt); 2465 SKIP_BLANKS; 2466 } else if ((CUR == 'p') && (NXT(1) == 'r') && 2467 (NXT(2) == 'e') && (NXT(3) == 'f') && 2468 (NXT(4) == 'i') && (NXT(5) == 'x')) { 2469 garbage = 0; 2470 SKIP(6); 2471 SKIP_BLANKS; 2472 2473 if (CUR != '=') continue; 2474 NEXT; 2475 SKIP_BLANKS; 2476 2477 prefix = xmlParseQuotedString(ctxt); 2478 SKIP_BLANKS; 2479 } else if ((CUR == 'A') && (NXT(1) == 'S')) { 2480 garbage = 0; 2481 SKIP(2); 2482 SKIP_BLANKS; 2483 2484 if (CUR != '=') continue; 2485 NEXT; 2486 SKIP_BLANKS; 2487 2488 prefix = xmlParseQuotedString(ctxt); 2489 SKIP_BLANKS; 2490 } else if ((CUR == '?') && (NXT(1) == '>')) { 2491 garbage = 0; 2492 NEXT; 2493 } else { 2494 /* 2495 * Found garbage when parsing the namespace 2496 */ 2497 if (!garbage) { 2498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2499 ctxt->sax->error(ctxt->userData, 2500 "xmlParseNamespace found garbage\n"); 2501 } 2502 ctxt->errNo = XML_ERR_NS_DECL_ERROR; 2503 ctxt->wellFormed = 0; 2504 NEXT; 2505 } 2506 } 2507 2508 MOVETO_ENDTAG(CUR_PTR); 2509 NEXT; 2510 2511 /* 2512 * Register the DTD. 2513 if (href != NULL) 2514 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL)) 2515 ctxt->sax->globalNamespace(ctxt->userData, href, prefix); 2516 */ 2517 2518 if (prefix != NULL) xmlFree(prefix); 2519 if (href != NULL) xmlFree(href); 2520} 2521 2522/************************************************************************ 2523 * * 2524 * The parser itself * 2525 * Relates to http://www.w3.org/TR/REC-xml * 2526 * * 2527 ************************************************************************/ 2528 2529/** 2530 * xmlScanName: 2531 * @ctxt: an XML parser context 2532 * 2533 * Trickery: parse an XML name but without consuming the input flow 2534 * Needed for rollback cases. 2535 * 2536 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2537 * CombiningChar | Extender 2538 * 2539 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2540 * 2541 * [6] Names ::= Name (S Name)* 2542 * 2543 * Returns the Name parsed or NULL 2544 */ 2545 2546xmlChar * 2547xmlScanName(xmlParserCtxtPtr ctxt) { 2548 xmlChar buf[XML_MAX_NAMELEN]; 2549 int len = 0; 2550 2551 GROW; 2552 if (!IS_LETTER(CUR) && (CUR != '_') && 2553 (CUR != ':')) { 2554 return(NULL); 2555 } 2556 2557 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || 2558 (NXT(len) == '.') || (NXT(len) == '-') || 2559 (NXT(len) == '_') || (NXT(len) == ':') || 2560 (IS_COMBINING(NXT(len))) || 2561 (IS_EXTENDER(NXT(len)))) { 2562 buf[len] = NXT(len); 2563 len++; 2564 if (len >= XML_MAX_NAMELEN) { 2565 fprintf(stderr, 2566 "xmlScanName: reached XML_MAX_NAMELEN limit\n"); 2567 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || 2568 (NXT(len) == '.') || (NXT(len) == '-') || 2569 (NXT(len) == '_') || (NXT(len) == ':') || 2570 (IS_COMBINING(NXT(len))) || 2571 (IS_EXTENDER(NXT(len)))) 2572 len++; 2573 break; 2574 } 2575 } 2576 return(xmlStrndup(buf, len)); 2577} 2578 2579/** 2580 * xmlParseName: 2581 * @ctxt: an XML parser context 2582 * 2583 * parse an XML name. 2584 * 2585 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2586 * CombiningChar | Extender 2587 * 2588 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2589 * 2590 * [6] Names ::= Name (S Name)* 2591 * 2592 * Returns the Name parsed or NULL 2593 */ 2594 2595xmlChar * 2596xmlParseName(xmlParserCtxtPtr ctxt) { 2597 xmlChar buf[XML_MAX_NAMELEN]; 2598 int len = 0; 2599 xmlChar cur; 2600 2601 GROW; 2602 cur = CUR; 2603 if (!IS_LETTER(cur) && (cur != '_') && 2604 (cur != ':')) { 2605 return(NULL); 2606 } 2607 2608 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || 2609 (cur == '.') || (cur == '-') || 2610 (cur == '_') || (cur == ':') || 2611 (IS_COMBINING(cur)) || 2612 (IS_EXTENDER(cur))) { 2613 buf[len++] = cur; 2614 NEXT; 2615 cur = CUR; 2616 if (len >= XML_MAX_NAMELEN) { 2617 fprintf(stderr, 2618 "xmlParseName: reached XML_MAX_NAMELEN limit\n"); 2619 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || 2620 (cur == '.') || (cur == '-') || 2621 (cur == '_') || (cur == ':') || 2622 (IS_COMBINING(cur)) || 2623 (IS_EXTENDER(cur))) { 2624 NEXT; 2625 cur = CUR; 2626 } 2627 break; 2628 } 2629 } 2630 return(xmlStrndup(buf, len)); 2631} 2632 2633/** 2634 * xmlParseStringName: 2635 * @ctxt: an XML parser context 2636 * @str: a pointer to an index in the string 2637 * 2638 * parse an XML name. 2639 * 2640 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2641 * CombiningChar | Extender 2642 * 2643 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2644 * 2645 * [6] Names ::= Name (S Name)* 2646 * 2647 * Returns the Name parsed or NULL. The str pointer 2648 * is updated to the current location in the string. 2649 */ 2650 2651xmlChar * 2652xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 2653 const xmlChar *ptr; 2654 const xmlChar *start; 2655 xmlChar cur; 2656 2657 if ((str == NULL) || (*str == NULL)) return(NULL); 2658 2659 start = ptr = *str; 2660 cur = *ptr; 2661 if (!IS_LETTER(cur) && (cur != '_') && 2662 (cur != ':')) { 2663 return(NULL); 2664 } 2665 2666 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || 2667 (cur == '.') || (cur == '-') || 2668 (cur == '_') || (cur == ':') || 2669 (IS_COMBINING(cur)) || 2670 (IS_EXTENDER(cur))) { 2671 ptr++; 2672 cur = *ptr; 2673 } 2674 *str = ptr; 2675 return(xmlStrndup(start, ptr - start )); 2676} 2677 2678/** 2679 * xmlParseNmtoken: 2680 * @ctxt: an XML parser context 2681 * 2682 * parse an XML Nmtoken. 2683 * 2684 * [7] Nmtoken ::= (NameChar)+ 2685 * 2686 * [8] Nmtokens ::= Nmtoken (S Nmtoken)* 2687 * 2688 * Returns the Nmtoken parsed or NULL 2689 */ 2690 2691xmlChar * 2692xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 2693 xmlChar buf[XML_MAX_NAMELEN]; 2694 int len = 0; 2695 2696 GROW; 2697 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || 2698 (CUR == '.') || (CUR == '-') || 2699 (CUR == '_') || (CUR == ':') || 2700 (IS_COMBINING(CUR)) || 2701 (IS_EXTENDER(CUR))) { 2702 buf[len++] = CUR; 2703 NEXT; 2704 if (len >= XML_MAX_NAMELEN) { 2705 fprintf(stderr, 2706 "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n"); 2707 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || 2708 (CUR == '.') || (CUR == '-') || 2709 (CUR == '_') || (CUR == ':') || 2710 (IS_COMBINING(CUR)) || 2711 (IS_EXTENDER(CUR))) 2712 NEXT; 2713 break; 2714 } 2715 } 2716 return(xmlStrndup(buf, len)); 2717} 2718 2719/** 2720 * xmlParseEntityValue: 2721 * @ctxt: an XML parser context 2722 * @orig: if non-NULL store a copy of the original entity value 2723 * 2724 * parse a value for ENTITY decl. 2725 * 2726 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 2727 * "'" ([^%&'] | PEReference | Reference)* "'" 2728 * 2729 * Returns the EntityValue parsed with reference substitued or NULL 2730 */ 2731 2732xmlChar * 2733xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 2734 xmlChar *buf = NULL; 2735 int len = 0; 2736 int size = XML_PARSER_BUFFER_SIZE; 2737 xmlChar c; 2738 xmlChar stop; 2739 xmlChar *ret = NULL; 2740 xmlParserInputPtr input; 2741 2742 if (CUR == '"') stop = '"'; 2743 else if (CUR == '\'') stop = '\''; 2744 else { 2745 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED; 2746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2747 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n"); 2748 ctxt->wellFormed = 0; 2749 return(NULL); 2750 } 2751 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 2752 if (buf == NULL) { 2753 fprintf(stderr, "malloc of %d byte failed\n", size); 2754 return(NULL); 2755 } 2756 2757 /* 2758 * The content of the entity definition is copied in a buffer. 2759 */ 2760 2761 ctxt->instate = XML_PARSER_ENTITY_VALUE; 2762 input = ctxt->input; 2763 GROW; 2764 NEXT; 2765 c = CUR; 2766 /* 2767 * NOTE: 4.4.5 Included in Literal 2768 * When a parameter entity reference appears in a literal entity 2769 * value, ... a single or double quote character in the replacement 2770 * text is always treated as a normal data character and will not 2771 * terminate the literal. 2772 * In practice it means we stop the loop only when back at parsing 2773 * the initial entity and the quote is found 2774 */ 2775 while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) { 2776 if (len + 1 >= size) { 2777 size *= 2; 2778 buf = xmlRealloc(buf, size * sizeof(xmlChar)); 2779 if (buf == NULL) { 2780 fprintf(stderr, "realloc of %d byte failed\n", size); 2781 return(NULL); 2782 } 2783 } 2784 buf[len++] = c; 2785 NEXT; 2786 /* 2787 * Pop-up of finished entities. 2788 */ 2789 while ((CUR == 0) && (ctxt->inputNr > 1)) 2790 xmlPopInput(ctxt); 2791 c = CUR; 2792 if (c == 0) { 2793 GROW; 2794 c = CUR; 2795 } 2796 } 2797 buf[len] = 0; 2798 2799 /* 2800 * Then PEReference entities are substituted. 2801 */ 2802 if (c != stop) { 2803 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 2804 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2805 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n"); 2806 ctxt->wellFormed = 0; 2807 } else { 2808 NEXT; 2809 /* 2810 * NOTE: 4.4.7 Bypassed 2811 * When a general entity reference appears in the EntityValue in 2812 * an entity declaration, it is bypassed and left as is. 2813 * so XML_SUBSTITUTE_REF is not set. 2814 */ 2815 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 2816 0, 0, 0); 2817 if (orig != NULL) 2818 *orig = buf; 2819 else 2820 xmlFree(buf); 2821 } 2822 2823 return(ret); 2824} 2825 2826/** 2827 * xmlParseAttValue: 2828 * @ctxt: an XML parser context 2829 * 2830 * parse a value for an attribute 2831 * Note: the parser won't do substitution of entities here, this 2832 * will be handled later in xmlStringGetNodeList 2833 * 2834 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 2835 * "'" ([^<&'] | Reference)* "'" 2836 * 2837 * 3.3.3 Attribute-Value Normalization: 2838 * Before the value of an attribute is passed to the application or 2839 * checked for validity, the XML processor must normalize it as follows: 2840 * - a character reference is processed by appending the referenced 2841 * character to the attribute value 2842 * - an entity reference is processed by recursively processing the 2843 * replacement text of the entity 2844 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 2845 * appending #x20 to the normalized value, except that only a single 2846 * #x20 is appended for a "#xD#xA" sequence that is part of an external 2847 * parsed entity or the literal entity value of an internal parsed entity 2848 * - other characters are processed by appending them to the normalized value 2849 * If the declared value is not CDATA, then the XML processor must further 2850 * process the normalized attribute value by discarding any leading and 2851 * trailing space (#x20) characters, and by replacing sequences of space 2852 * (#x20) characters by a single space (#x20) character. 2853 * All attributes for which no declaration has been read should be treated 2854 * by a non-validating parser as if declared CDATA. 2855 * 2856 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 2857 */ 2858 2859xmlChar * 2860xmlParseAttValue(xmlParserCtxtPtr ctxt) { 2861 xmlChar limit = 0; 2862 xmlChar *buffer = NULL; 2863 int buffer_size = 0; 2864 xmlChar *out = NULL; 2865 2866 xmlChar *current = NULL; 2867 xmlEntityPtr ent; 2868 xmlChar cur; 2869 2870 2871 SHRINK; 2872 if (CUR == '"') { 2873 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2874 limit = '"'; 2875 NEXT; 2876 } else if (CUR == '\'') { 2877 limit = '\''; 2878 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 2879 NEXT; 2880 } else { 2881 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; 2882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2883 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); 2884 ctxt->wellFormed = 0; 2885 return(NULL); 2886 } 2887 2888 /* 2889 * allocate a translation buffer. 2890 */ 2891 buffer_size = XML_PARSER_BUFFER_SIZE; 2892 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); 2893 if (buffer == NULL) { 2894 perror("xmlParseAttValue: malloc failed"); 2895 return(NULL); 2896 } 2897 out = buffer; 2898 2899 /* 2900 * Ok loop until we reach one of the ending char or a size limit. 2901 */ 2902 cur = CUR; 2903 while ((cur != limit) && (cur != '<')) { 2904 if (cur == 0) break; 2905 if ((cur == '&') && (NXT(1) == '#')) { 2906 int val = xmlParseCharRef(ctxt); 2907 *out++ = val; 2908 } else if (cur == '&') { 2909 ent = xmlParseEntityRef(ctxt); 2910 if ((ent != NULL) && 2911 (ctxt->replaceEntities != 0)) { 2912 current = ent->content; 2913 while (*current != 0) { 2914 *out++ = *current++; 2915 if (out - buffer > buffer_size - 10) { 2916 int index = out - buffer; 2917 2918 growBuffer(buffer); 2919 out = &buffer[index]; 2920 } 2921 } 2922 } else if (ent != NULL) { 2923 int i = xmlStrlen(ent->name); 2924 const xmlChar *cur = ent->name; 2925 2926 *out++ = '&'; 2927 if (out - buffer > buffer_size - i - 10) { 2928 int index = out - buffer; 2929 2930 growBuffer(buffer); 2931 out = &buffer[index]; 2932 } 2933 for (;i > 0;i--) 2934 *out++ = *cur++; 2935 *out++ = ';'; 2936 } 2937 } else { 2938 /* invalid for UTF-8 , use COPY(out); !!!!!! */ 2939 if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) { 2940 *out++ = 0x20; 2941 if (out - buffer > buffer_size - 10) { 2942 int index = out - buffer; 2943 2944 growBuffer(buffer); 2945 out = &buffer[index]; 2946 } 2947 } else { 2948 *out++ = cur; 2949 if (out - buffer > buffer_size - 10) { 2950 int index = out - buffer; 2951 2952 growBuffer(buffer); 2953 out = &buffer[index]; 2954 } 2955 } 2956 NEXT; 2957 } 2958 cur = CUR; 2959 } 2960 *out++ = 0; 2961 if (CUR == '<') { 2962 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2963 ctxt->sax->error(ctxt->userData, 2964 "Unescaped '<' not allowed in attributes values\n"); 2965 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 2966 ctxt->wellFormed = 0; 2967 } else if (CUR != limit) { 2968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 2969 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); 2970 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED; 2971 ctxt->wellFormed = 0; 2972 } else 2973 NEXT; 2974 return(buffer); 2975} 2976 2977/** 2978 * xmlParseSystemLiteral: 2979 * @ctxt: an XML parser context 2980 * 2981 * parse an XML Literal 2982 * 2983 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 2984 * 2985 * Returns the SystemLiteral parsed or NULL 2986 */ 2987 2988xmlChar * 2989xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 2990 xmlChar *buf = NULL; 2991 int len = 0; 2992 int size = XML_PARSER_BUFFER_SIZE; 2993 xmlChar cur; 2994 xmlChar stop; 2995 2996 SHRINK; 2997 if (CUR == '"') { 2998 NEXT; 2999 stop = '"'; 3000 } else if (CUR == '\'') { 3001 NEXT; 3002 stop = '\''; 3003 } else { 3004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3005 ctxt->sax->error(ctxt->userData, 3006 "SystemLiteral \" or ' expected\n"); 3007 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 3008 ctxt->wellFormed = 0; 3009 return(NULL); 3010 } 3011 3012 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 3013 if (buf == NULL) { 3014 fprintf(stderr, "malloc of %d byte failed\n", size); 3015 return(NULL); 3016 } 3017 cur = CUR; 3018 while ((IS_CHAR(cur)) && (cur != stop)) { 3019 if (len + 1 >= size) { 3020 size *= 2; 3021 buf = xmlRealloc(buf, size * sizeof(xmlChar)); 3022 if (buf == NULL) { 3023 fprintf(stderr, "realloc of %d byte failed\n", size); 3024 return(NULL); 3025 } 3026 } 3027 buf[len++] = cur; 3028 NEXT; 3029 cur = CUR; 3030 if (cur == 0) { 3031 GROW; 3032 SHRINK; 3033 cur = CUR; 3034 } 3035 } 3036 buf[len] = 0; 3037 if (!IS_CHAR(cur)) { 3038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3039 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); 3040 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 3041 ctxt->wellFormed = 0; 3042 } else { 3043 NEXT; 3044 } 3045 return(buf); 3046} 3047 3048/** 3049 * xmlParsePubidLiteral: 3050 * @ctxt: an XML parser context 3051 * 3052 * parse an XML public literal 3053 * 3054 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 3055 * 3056 * Returns the PubidLiteral parsed or NULL. 3057 */ 3058 3059xmlChar * 3060xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 3061 xmlChar *buf = NULL; 3062 int len = 0; 3063 int size = XML_PARSER_BUFFER_SIZE; 3064 xmlChar cur; 3065 xmlChar stop; 3066 3067 SHRINK; 3068 if (CUR == '"') { 3069 NEXT; 3070 stop = '"'; 3071 } else if (CUR == '\'') { 3072 NEXT; 3073 stop = '\''; 3074 } else { 3075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3076 ctxt->sax->error(ctxt->userData, 3077 "SystemLiteral \" or ' expected\n"); 3078 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; 3079 ctxt->wellFormed = 0; 3080 return(NULL); 3081 } 3082 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 3083 if (buf == NULL) { 3084 fprintf(stderr, "malloc of %d byte failed\n", size); 3085 return(NULL); 3086 } 3087 cur = CUR; 3088 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { 3089 if (len + 1 >= size) { 3090 size *= 2; 3091 buf = xmlRealloc(buf, size * sizeof(xmlChar)); 3092 if (buf == NULL) { 3093 fprintf(stderr, "realloc of %d byte failed\n", size); 3094 return(NULL); 3095 } 3096 } 3097 buf[len++] = cur; 3098 NEXT; 3099 cur = CUR; 3100 if (cur == 0) { 3101 GROW; 3102 SHRINK; 3103 cur = CUR; 3104 } 3105 } 3106 buf[len] = 0; 3107 if (cur != stop) { 3108 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3109 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); 3110 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; 3111 ctxt->wellFormed = 0; 3112 } else { 3113 NEXT; 3114 } 3115 return(buf); 3116} 3117 3118/** 3119 * xmlParseCharData: 3120 * @ctxt: an XML parser context 3121 * @cdata: int indicating whether we are within a CDATA section 3122 * 3123 * parse a CharData section. 3124 * if we are within a CDATA section ']]>' marks an end of section. 3125 * 3126 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 3127 */ 3128 3129void 3130xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 3131 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE]; 3132 int nbchar = 0; 3133 xmlChar cur; 3134 3135 SHRINK; 3136 cur = CUR; 3137 while ((IS_CHAR(cur)) && (cur != '<') && 3138 (cur != '&')) { 3139 if ((cur == ']') && (NXT(1) == ']') && 3140 (NXT(2) == '>')) { 3141 if (cdata) break; 3142 else { 3143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3144 ctxt->sax->warning(ctxt->userData, 3145 "Sequence ']]>' not allowed in content\n"); 3146 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; 3147 } 3148 } 3149 buf[nbchar++] = CUR; 3150 if (nbchar == XML_PARSER_BIG_BUFFER_SIZE) { 3151 /* 3152 * Ok the segment is to be consumed as chars. 3153 */ 3154 if (ctxt->sax != NULL) { 3155 if (areBlanks(ctxt, buf, nbchar)) { 3156 if (ctxt->sax->ignorableWhitespace != NULL) 3157 ctxt->sax->ignorableWhitespace(ctxt->userData, 3158 buf, nbchar); 3159 } else { 3160 if (ctxt->sax->characters != NULL) 3161 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3162 } 3163 } 3164 nbchar = 0; 3165 } 3166 NEXT; 3167 cur = CUR; 3168 } 3169 if (nbchar != 0) { 3170 /* 3171 * Ok the segment is to be consumed as chars. 3172 */ 3173 if (ctxt->sax != NULL) { 3174 if (areBlanks(ctxt, buf, nbchar)) { 3175 if (ctxt->sax->ignorableWhitespace != NULL) 3176 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 3177 } else { 3178 if (ctxt->sax->characters != NULL) 3179 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3180 } 3181 } 3182 } 3183} 3184 3185/** 3186 * xmlParseExternalID: 3187 * @ctxt: an XML parser context 3188 * @publicID: a xmlChar** receiving PubidLiteral 3189 * @strict: indicate whether we should restrict parsing to only 3190 * production [75], see NOTE below 3191 * 3192 * Parse an External ID or a Public ID 3193 * 3194 * NOTE: Productions [75] and [83] interract badly since [75] can generate 3195 * 'PUBLIC' S PubidLiteral S SystemLiteral 3196 * 3197 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 3198 * | 'PUBLIC' S PubidLiteral S SystemLiteral 3199 * 3200 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 3201 * 3202 * Returns the function returns SystemLiteral and in the second 3203 * case publicID receives PubidLiteral, is strict is off 3204 * it is possible to return NULL and have publicID set. 3205 */ 3206 3207xmlChar * 3208xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 3209 xmlChar *URI = NULL; 3210 3211 SHRINK; 3212 if ((CUR == 'S') && (NXT(1) == 'Y') && 3213 (NXT(2) == 'S') && (NXT(3) == 'T') && 3214 (NXT(4) == 'E') && (NXT(5) == 'M')) { 3215 SKIP(6); 3216 if (!IS_BLANK(CUR)) { 3217 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3218 ctxt->sax->error(ctxt->userData, 3219 "Space required after 'SYSTEM'\n"); 3220 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3221 ctxt->wellFormed = 0; 3222 } 3223 SKIP_BLANKS; 3224 URI = xmlParseSystemLiteral(ctxt); 3225 if (URI == NULL) { 3226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3227 ctxt->sax->error(ctxt->userData, 3228 "xmlParseExternalID: SYSTEM, no URI\n"); 3229 ctxt->errNo = XML_ERR_URI_REQUIRED; 3230 ctxt->wellFormed = 0; 3231 } 3232 } else if ((CUR == 'P') && (NXT(1) == 'U') && 3233 (NXT(2) == 'B') && (NXT(3) == 'L') && 3234 (NXT(4) == 'I') && (NXT(5) == 'C')) { 3235 SKIP(6); 3236 if (!IS_BLANK(CUR)) { 3237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3238 ctxt->sax->error(ctxt->userData, 3239 "Space required after 'PUBLIC'\n"); 3240 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3241 ctxt->wellFormed = 0; 3242 } 3243 SKIP_BLANKS; 3244 *publicID = xmlParsePubidLiteral(ctxt); 3245 if (*publicID == NULL) { 3246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3247 ctxt->sax->error(ctxt->userData, 3248 "xmlParseExternalID: PUBLIC, no Public Identifier\n"); 3249 ctxt->errNo = XML_ERR_PUBID_REQUIRED; 3250 ctxt->wellFormed = 0; 3251 } 3252 if (strict) { 3253 /* 3254 * We don't handle [83] so "S SystemLiteral" is required. 3255 */ 3256 if (!IS_BLANK(CUR)) { 3257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3258 ctxt->sax->error(ctxt->userData, 3259 "Space required after the Public Identifier\n"); 3260 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3261 ctxt->wellFormed = 0; 3262 } 3263 } else { 3264 /* 3265 * We handle [83] so we return immediately, if 3266 * "S SystemLiteral" is not detected. From a purely parsing 3267 * point of view that's a nice mess. 3268 */ 3269 const xmlChar *ptr; 3270 GROW; 3271 3272 ptr = CUR_PTR; 3273 if (!IS_BLANK(*ptr)) return(NULL); 3274 3275 while (IS_BLANK(*ptr)) ptr++; 3276 if ((*ptr != '\'') || (*ptr != '"')) return(NULL); 3277 } 3278 SKIP_BLANKS; 3279 URI = xmlParseSystemLiteral(ctxt); 3280 if (URI == NULL) { 3281 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3282 ctxt->sax->error(ctxt->userData, 3283 "xmlParseExternalID: PUBLIC, no URI\n"); 3284 ctxt->errNo = XML_ERR_URI_REQUIRED; 3285 ctxt->wellFormed = 0; 3286 } 3287 } 3288 return(URI); 3289} 3290 3291/** 3292 * xmlParseComment: 3293 * @ctxt: an XML parser context 3294 * 3295 * Skip an XML (SGML) comment <!-- .... --> 3296 * The spec says that "For compatibility, the string "--" (double-hyphen) 3297 * must not occur within comments. " 3298 * 3299 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 3300 */ 3301void 3302xmlParseComment(xmlParserCtxtPtr ctxt) { 3303 xmlChar *buf = NULL; 3304 int len = 0; 3305 int size = XML_PARSER_BUFFER_SIZE; 3306 xmlChar q; 3307 xmlChar r; 3308 xmlChar cur; 3309 xmlParserInputState state; 3310 3311 /* 3312 * Check that there is a comment right here. 3313 */ 3314 if ((CUR != '<') || (NXT(1) != '!') || 3315 (NXT(2) != '-') || (NXT(3) != '-')) return; 3316 3317 state = ctxt->instate; 3318 ctxt->instate = XML_PARSER_COMMENT; 3319 SHRINK; 3320 SKIP(4); 3321 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 3322 if (buf == NULL) { 3323 fprintf(stderr, "malloc of %d byte failed\n", size); 3324 ctxt->instate = state; 3325 return; 3326 } 3327 q = CUR; 3328 NEXT; 3329 r = CUR; 3330 NEXT; 3331 cur = CUR; 3332 while (IS_CHAR(cur) && 3333 ((cur != '>') || 3334 (r != '-') || (q != '-'))) { 3335 if ((r == '-') && (q == '-')) { 3336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3337 ctxt->sax->error(ctxt->userData, 3338 "Comment must not contain '--' (double-hyphen)`\n"); 3339 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT; 3340 ctxt->wellFormed = 0; 3341 } 3342 if (len + 1 >= size) { 3343 size *= 2; 3344 buf = xmlRealloc(buf, size * sizeof(xmlChar)); 3345 if (buf == NULL) { 3346 fprintf(stderr, "realloc of %d byte failed\n", size); 3347 ctxt->instate = state; 3348 return; 3349 } 3350 } 3351 buf[len++] = q; 3352 q = r; 3353 r = cur; 3354 NEXT; 3355 cur = CUR; 3356 if (cur == 0) { 3357 SHRINK; 3358 GROW; 3359 cur = CUR; 3360 } 3361 } 3362 buf[len] = 0; 3363 if (!IS_CHAR(cur)) { 3364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3365 ctxt->sax->error(ctxt->userData, 3366 "Comment not terminated \n<!--%.50s\n", buf); 3367 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; 3368 ctxt->wellFormed = 0; 3369 } else { 3370 NEXT; 3371 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL)) 3372 ctxt->sax->comment(ctxt->userData, buf); 3373 xmlFree(buf); 3374 } 3375 ctxt->instate = state; 3376} 3377 3378/** 3379 * xmlParsePITarget: 3380 * @ctxt: an XML parser context 3381 * 3382 * parse the name of a PI 3383 * 3384 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 3385 * 3386 * Returns the PITarget name or NULL 3387 */ 3388 3389xmlChar * 3390xmlParsePITarget(xmlParserCtxtPtr ctxt) { 3391 xmlChar *name; 3392 3393 name = xmlParseName(ctxt); 3394 if ((name != NULL) && 3395 ((name[0] == 'x') || (name[0] == 'X')) && 3396 ((name[1] == 'm') || (name[1] == 'M')) && 3397 ((name[2] == 'l') || (name[2] == 'L'))) { 3398 int i; 3399 for (i = 0;;i++) { 3400 if (xmlW3CPIs[i] == NULL) break; 3401 if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i])) 3402 return(name); 3403 } 3404 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) { 3405 ctxt->sax->warning(ctxt->userData, 3406 "xmlParsePItarget: invalid name prefix 'xml'\n"); 3407 ctxt->errNo = XML_ERR_RESERVED_XML_NAME; 3408 } 3409 } 3410 return(name); 3411} 3412 3413/** 3414 * xmlParsePI: 3415 * @ctxt: an XML parser context 3416 * 3417 * parse an XML Processing Instruction. 3418 * 3419 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 3420 * 3421 * The processing is transfered to SAX once parsed. 3422 */ 3423 3424void 3425xmlParsePI(xmlParserCtxtPtr ctxt) { 3426 xmlChar *buf = NULL; 3427 int len = 0; 3428 int size = XML_PARSER_BUFFER_SIZE; 3429 xmlChar cur; 3430 xmlChar *target; 3431 xmlParserInputState state; 3432 3433 if ((CUR == '<') && (NXT(1) == '?')) { 3434 state = ctxt->instate; 3435 ctxt->instate = XML_PARSER_PI; 3436 /* 3437 * this is a Processing Instruction. 3438 */ 3439 SKIP(2); 3440 SHRINK; 3441 3442 /* 3443 * Parse the target name and check for special support like 3444 * namespace. 3445 */ 3446 target = xmlParsePITarget(ctxt); 3447 if (target != NULL) { 3448 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 3449 if (buf == NULL) { 3450 fprintf(stderr, "malloc of %d byte failed\n", size); 3451 ctxt->instate = state; 3452 return; 3453 } 3454 cur = CUR; 3455 if (!IS_BLANK(cur)) { 3456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3457 ctxt->sax->error(ctxt->userData, 3458 "xmlParsePI: PI %s space expected\n", target); 3459 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3460 ctxt->wellFormed = 0; 3461 } 3462 SKIP_BLANKS; 3463 cur = CUR; 3464 while (IS_CHAR(cur) && 3465 ((cur != '?') || (NXT(1) != '>'))) { 3466 if (len + 1 >= size) { 3467 size *= 2; 3468 buf = xmlRealloc(buf, size * sizeof(xmlChar)); 3469 if (buf == NULL) { 3470 fprintf(stderr, "realloc of %d byte failed\n", size); 3471 ctxt->instate = state; 3472 return; 3473 } 3474 } 3475 buf[len++] = cur; 3476 NEXT; 3477 cur = CUR; 3478 if (cur == 0) { 3479 SHRINK; 3480 GROW; 3481 cur = CUR; 3482 } 3483 } 3484 buf[len] = 0; 3485 if (!IS_CHAR(cur)) { 3486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3487 ctxt->sax->error(ctxt->userData, 3488 "xmlParsePI: PI %s never end ...\n", target); 3489 ctxt->errNo = XML_ERR_PI_NOT_FINISHED; 3490 ctxt->wellFormed = 0; 3491 } else { 3492 SKIP(2); 3493 3494 /* 3495 * SAX: PI detected. 3496 */ 3497 if ((ctxt->sax) && 3498 (ctxt->sax->processingInstruction != NULL)) 3499 ctxt->sax->processingInstruction(ctxt->userData, 3500 target, buf); 3501 } 3502 xmlFree(buf); 3503 xmlFree(target); 3504 } else { 3505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3506 ctxt->sax->error(ctxt->userData, 3507 "xmlParsePI : no target name\n"); 3508 ctxt->errNo = XML_ERR_PI_NOT_STARTED; 3509 ctxt->wellFormed = 0; 3510 } 3511 ctxt->instate = state; 3512 } 3513} 3514 3515/** 3516 * xmlParseNotationDecl: 3517 * @ctxt: an XML parser context 3518 * 3519 * parse a notation declaration 3520 * 3521 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 3522 * 3523 * Hence there is actually 3 choices: 3524 * 'PUBLIC' S PubidLiteral 3525 * 'PUBLIC' S PubidLiteral S SystemLiteral 3526 * and 'SYSTEM' S SystemLiteral 3527 * 3528 * See the NOTE on xmlParseExternalID(). 3529 */ 3530 3531void 3532xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 3533 xmlChar *name; 3534 xmlChar *Pubid; 3535 xmlChar *Systemid; 3536 3537 if ((CUR == '<') && (NXT(1) == '!') && 3538 (NXT(2) == 'N') && (NXT(3) == 'O') && 3539 (NXT(4) == 'T') && (NXT(5) == 'A') && 3540 (NXT(6) == 'T') && (NXT(7) == 'I') && 3541 (NXT(8) == 'O') && (NXT(9) == 'N')) { 3542 SHRINK; 3543 SKIP(10); 3544 if (!IS_BLANK(CUR)) { 3545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3546 ctxt->sax->error(ctxt->userData, 3547 "Space required after '<!NOTATION'\n"); 3548 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3549 ctxt->wellFormed = 0; 3550 return; 3551 } 3552 SKIP_BLANKS; 3553 3554 name = xmlParseName(ctxt); 3555 if (name == NULL) { 3556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3557 ctxt->sax->error(ctxt->userData, 3558 "NOTATION: Name expected here\n"); 3559 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3560 ctxt->wellFormed = 0; 3561 return; 3562 } 3563 if (!IS_BLANK(CUR)) { 3564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3565 ctxt->sax->error(ctxt->userData, 3566 "Space required after the NOTATION name'\n"); 3567 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3568 ctxt->wellFormed = 0; 3569 return; 3570 } 3571 SKIP_BLANKS; 3572 3573 /* 3574 * Parse the IDs. 3575 */ 3576 Systemid = xmlParseExternalID(ctxt, &Pubid, 1); 3577 SKIP_BLANKS; 3578 3579 if (CUR == '>') { 3580 NEXT; 3581 if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL)) 3582 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 3583 } else { 3584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3585 ctxt->sax->error(ctxt->userData, 3586 "'>' required to close NOTATION declaration\n"); 3587 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3588 ctxt->wellFormed = 0; 3589 } 3590 xmlFree(name); 3591 if (Systemid != NULL) xmlFree(Systemid); 3592 if (Pubid != NULL) xmlFree(Pubid); 3593 } 3594} 3595 3596/** 3597 * xmlParseEntityDecl: 3598 * @ctxt: an XML parser context 3599 * 3600 * parse <!ENTITY declarations 3601 * 3602 * [70] EntityDecl ::= GEDecl | PEDecl 3603 * 3604 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 3605 * 3606 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 3607 * 3608 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 3609 * 3610 * [74] PEDef ::= EntityValue | ExternalID 3611 * 3612 * [76] NDataDecl ::= S 'NDATA' S Name 3613 * 3614 * [ VC: Notation Declared ] 3615 * The Name must match the declared name of a notation. 3616 */ 3617 3618void 3619xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 3620 xmlChar *name = NULL; 3621 xmlChar *value = NULL; 3622 xmlChar *URI = NULL, *literal = NULL; 3623 xmlChar *ndata = NULL; 3624 int isParameter = 0; 3625 xmlChar *orig = NULL; 3626 3627 GROW; 3628 if ((CUR == '<') && (NXT(1) == '!') && 3629 (NXT(2) == 'E') && (NXT(3) == 'N') && 3630 (NXT(4) == 'T') && (NXT(5) == 'I') && 3631 (NXT(6) == 'T') && (NXT(7) == 'Y')) { 3632 ctxt->instate = XML_PARSER_ENTITY_DECL; 3633 SHRINK; 3634 SKIP(8); 3635 if (!IS_BLANK(CUR)) { 3636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3637 ctxt->sax->error(ctxt->userData, 3638 "Space required after '<!ENTITY'\n"); 3639 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3640 ctxt->wellFormed = 0; 3641 } 3642 SKIP_BLANKS; 3643 3644 if (CUR == '%') { 3645 NEXT; 3646 if (!IS_BLANK(CUR)) { 3647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3648 ctxt->sax->error(ctxt->userData, 3649 "Space required after '%'\n"); 3650 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3651 ctxt->wellFormed = 0; 3652 } 3653 SKIP_BLANKS; 3654 isParameter = 1; 3655 } 3656 3657 name = xmlParseName(ctxt); 3658 if (name == NULL) { 3659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3660 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n"); 3661 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3662 ctxt->wellFormed = 0; 3663 return; 3664 } 3665 if (!IS_BLANK(CUR)) { 3666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3667 ctxt->sax->error(ctxt->userData, 3668 "Space required after the entity name\n"); 3669 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3670 ctxt->wellFormed = 0; 3671 } 3672 SKIP_BLANKS; 3673 3674 /* 3675 * handle the various case of definitions... 3676 */ 3677 if (isParameter) { 3678 if ((CUR == '"') || (CUR == '\'')) 3679 value = xmlParseEntityValue(ctxt, &orig); 3680 if (value) { 3681 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL)) 3682 ctxt->sax->entityDecl(ctxt->userData, name, 3683 XML_INTERNAL_PARAMETER_ENTITY, 3684 NULL, NULL, value); 3685 } 3686 else { 3687 URI = xmlParseExternalID(ctxt, &literal, 1); 3688 if (URI) { 3689 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL)) 3690 ctxt->sax->entityDecl(ctxt->userData, name, 3691 XML_EXTERNAL_PARAMETER_ENTITY, 3692 literal, URI, NULL); 3693 } 3694 } 3695 } else { 3696 if ((CUR == '"') || (CUR == '\'')) { 3697 value = xmlParseEntityValue(ctxt, &orig); 3698 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL)) 3699 ctxt->sax->entityDecl(ctxt->userData, name, 3700 XML_INTERNAL_GENERAL_ENTITY, 3701 NULL, NULL, value); 3702 } else { 3703 URI = xmlParseExternalID(ctxt, &literal, 1); 3704 if ((CUR != '>') && (!IS_BLANK(CUR))) { 3705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3706 ctxt->sax->error(ctxt->userData, 3707 "Space required before 'NDATA'\n"); 3708 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3709 ctxt->wellFormed = 0; 3710 } 3711 SKIP_BLANKS; 3712 if ((CUR == 'N') && (NXT(1) == 'D') && 3713 (NXT(2) == 'A') && (NXT(3) == 'T') && 3714 (NXT(4) == 'A')) { 3715 SKIP(5); 3716 if (!IS_BLANK(CUR)) { 3717 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3718 ctxt->sax->error(ctxt->userData, 3719 "Space required after 'NDATA'\n"); 3720 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3721 ctxt->wellFormed = 0; 3722 } 3723 SKIP_BLANKS; 3724 ndata = xmlParseName(ctxt); 3725 if ((ctxt->sax != NULL) && 3726 (ctxt->sax->unparsedEntityDecl != NULL)) 3727 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 3728 literal, URI, ndata); 3729 } else { 3730 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL)) 3731 ctxt->sax->entityDecl(ctxt->userData, name, 3732 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 3733 literal, URI, NULL); 3734 } 3735 } 3736 } 3737 SKIP_BLANKS; 3738 if (CUR != '>') { 3739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3740 ctxt->sax->error(ctxt->userData, 3741 "xmlParseEntityDecl: entity %s not terminated\n", name); 3742 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; 3743 ctxt->wellFormed = 0; 3744 } else 3745 NEXT; 3746 if (orig != NULL) { 3747 /* 3748 * Ugly mechanism to save the raw entity value. 3749 */ 3750 xmlEntityPtr cur = NULL; 3751 3752 if (isParameter) { 3753 if ((ctxt->sax != NULL) && 3754 (ctxt->sax->getParameterEntity != NULL)) 3755 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 3756 } else { 3757 if ((ctxt->sax != NULL) && 3758 (ctxt->sax->getEntity != NULL)) 3759 cur = ctxt->sax->getEntity(ctxt->userData, name); 3760 } 3761 if (cur != NULL) { 3762 if (cur->orig != NULL) 3763 xmlFree(orig); 3764 else 3765 cur->orig = orig; 3766 } else 3767 xmlFree(orig); 3768 } 3769 if (name != NULL) xmlFree(name); 3770 if (value != NULL) xmlFree(value); 3771 if (URI != NULL) xmlFree(URI); 3772 if (literal != NULL) xmlFree(literal); 3773 if (ndata != NULL) xmlFree(ndata); 3774 } 3775} 3776 3777/** 3778 * xmlParseDefaultDecl: 3779 * @ctxt: an XML parser context 3780 * @value: Receive a possible fixed default value for the attribute 3781 * 3782 * Parse an attribute default declaration 3783 * 3784 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 3785 * 3786 * [ VC: Required Attribute ] 3787 * if the default declaration is the keyword #REQUIRED, then the 3788 * attribute must be specified for all elements of the type in the 3789 * attribute-list declaration. 3790 * 3791 * [ VC: Attribute Default Legal ] 3792 * The declared default value must meet the lexical constraints of 3793 * the declared attribute type c.f. xmlValidateAttributeDecl() 3794 * 3795 * [ VC: Fixed Attribute Default ] 3796 * if an attribute has a default value declared with the #FIXED 3797 * keyword, instances of that attribute must match the default value. 3798 * 3799 * [ WFC: No < in Attribute Values ] 3800 * handled in xmlParseAttValue() 3801 * 3802 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 3803 * or XML_ATTRIBUTE_FIXED. 3804 */ 3805 3806int 3807xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 3808 int val; 3809 xmlChar *ret; 3810 3811 *value = NULL; 3812 if ((CUR == '#') && (NXT(1) == 'R') && 3813 (NXT(2) == 'E') && (NXT(3) == 'Q') && 3814 (NXT(4) == 'U') && (NXT(5) == 'I') && 3815 (NXT(6) == 'R') && (NXT(7) == 'E') && 3816 (NXT(8) == 'D')) { 3817 SKIP(9); 3818 return(XML_ATTRIBUTE_REQUIRED); 3819 } 3820 if ((CUR == '#') && (NXT(1) == 'I') && 3821 (NXT(2) == 'M') && (NXT(3) == 'P') && 3822 (NXT(4) == 'L') && (NXT(5) == 'I') && 3823 (NXT(6) == 'E') && (NXT(7) == 'D')) { 3824 SKIP(8); 3825 return(XML_ATTRIBUTE_IMPLIED); 3826 } 3827 val = XML_ATTRIBUTE_NONE; 3828 if ((CUR == '#') && (NXT(1) == 'F') && 3829 (NXT(2) == 'I') && (NXT(3) == 'X') && 3830 (NXT(4) == 'E') && (NXT(5) == 'D')) { 3831 SKIP(6); 3832 val = XML_ATTRIBUTE_FIXED; 3833 if (!IS_BLANK(CUR)) { 3834 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3835 ctxt->sax->error(ctxt->userData, 3836 "Space required after '#FIXED'\n"); 3837 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 3838 ctxt->wellFormed = 0; 3839 } 3840 SKIP_BLANKS; 3841 } 3842 ret = xmlParseAttValue(ctxt); 3843 ctxt->instate = XML_PARSER_DTD; 3844 if (ret == NULL) { 3845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3846 ctxt->sax->error(ctxt->userData, 3847 "Attribute default value declaration error\n"); 3848 ctxt->wellFormed = 0; 3849 } else 3850 *value = ret; 3851 return(val); 3852} 3853 3854/** 3855 * xmlParseNotationType: 3856 * @ctxt: an XML parser context 3857 * 3858 * parse an Notation attribute type. 3859 * 3860 * Note: the leading 'NOTATION' S part has already being parsed... 3861 * 3862 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3863 * 3864 * [ VC: Notation Attributes ] 3865 * Values of this type must match one of the notation names included 3866 * in the declaration; all notation names in the declaration must be declared. 3867 * 3868 * Returns: the notation attribute tree built while parsing 3869 */ 3870 3871xmlEnumerationPtr 3872xmlParseNotationType(xmlParserCtxtPtr ctxt) { 3873 xmlChar *name; 3874 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3875 3876 if (CUR != '(') { 3877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3878 ctxt->sax->error(ctxt->userData, 3879 "'(' required to start 'NOTATION'\n"); 3880 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED; 3881 ctxt->wellFormed = 0; 3882 return(NULL); 3883 } 3884 SHRINK; 3885 do { 3886 NEXT; 3887 SKIP_BLANKS; 3888 name = xmlParseName(ctxt); 3889 if (name == NULL) { 3890 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3891 ctxt->sax->error(ctxt->userData, 3892 "Name expected in NOTATION declaration\n"); 3893 ctxt->errNo = XML_ERR_NAME_REQUIRED; 3894 ctxt->wellFormed = 0; 3895 return(ret); 3896 } 3897 cur = xmlCreateEnumeration(name); 3898 xmlFree(name); 3899 if (cur == NULL) return(ret); 3900 if (last == NULL) ret = last = cur; 3901 else { 3902 last->next = cur; 3903 last = cur; 3904 } 3905 SKIP_BLANKS; 3906 } while (CUR == '|'); 3907 if (CUR != ')') { 3908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3909 ctxt->sax->error(ctxt->userData, 3910 "')' required to finish NOTATION declaration\n"); 3911 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED; 3912 ctxt->wellFormed = 0; 3913 return(ret); 3914 } 3915 NEXT; 3916 return(ret); 3917} 3918 3919/** 3920 * xmlParseEnumerationType: 3921 * @ctxt: an XML parser context 3922 * 3923 * parse an Enumeration attribute type. 3924 * 3925 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 3926 * 3927 * [ VC: Enumeration ] 3928 * Values of this type must match one of the Nmtoken tokens in 3929 * the declaration 3930 * 3931 * Returns: the enumeration attribute tree built while parsing 3932 */ 3933 3934xmlEnumerationPtr 3935xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 3936 xmlChar *name; 3937 xmlEnumerationPtr ret = NULL, last = NULL, cur; 3938 3939 if (CUR != '(') { 3940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3941 ctxt->sax->error(ctxt->userData, 3942 "'(' required to start ATTLIST enumeration\n"); 3943 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED; 3944 ctxt->wellFormed = 0; 3945 return(NULL); 3946 } 3947 SHRINK; 3948 do { 3949 NEXT; 3950 SKIP_BLANKS; 3951 name = xmlParseNmtoken(ctxt); 3952 if (name == NULL) { 3953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3954 ctxt->sax->error(ctxt->userData, 3955 "NmToken expected in ATTLIST enumeration\n"); 3956 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED; 3957 ctxt->wellFormed = 0; 3958 return(ret); 3959 } 3960 cur = xmlCreateEnumeration(name); 3961 xmlFree(name); 3962 if (cur == NULL) return(ret); 3963 if (last == NULL) ret = last = cur; 3964 else { 3965 last->next = cur; 3966 last = cur; 3967 } 3968 SKIP_BLANKS; 3969 } while (CUR == '|'); 3970 if (CUR != ')') { 3971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 3972 ctxt->sax->error(ctxt->userData, 3973 "')' required to finish ATTLIST enumeration\n"); 3974 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED; 3975 ctxt->wellFormed = 0; 3976 return(ret); 3977 } 3978 NEXT; 3979 return(ret); 3980} 3981 3982/** 3983 * xmlParseEnumeratedType: 3984 * @ctxt: an XML parser context 3985 * @tree: the enumeration tree built while parsing 3986 * 3987 * parse an Enumerated attribute type. 3988 * 3989 * [57] EnumeratedType ::= NotationType | Enumeration 3990 * 3991 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 3992 * 3993 * 3994 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 3995 */ 3996 3997int 3998xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 3999 if ((CUR == 'N') && (NXT(1) == 'O') && 4000 (NXT(2) == 'T') && (NXT(3) == 'A') && 4001 (NXT(4) == 'T') && (NXT(5) == 'I') && 4002 (NXT(6) == 'O') && (NXT(7) == 'N')) { 4003 SKIP(8); 4004 if (!IS_BLANK(CUR)) { 4005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4006 ctxt->sax->error(ctxt->userData, 4007 "Space required after 'NOTATION'\n"); 4008 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4009 ctxt->wellFormed = 0; 4010 return(0); 4011 } 4012 SKIP_BLANKS; 4013 *tree = xmlParseNotationType(ctxt); 4014 if (*tree == NULL) return(0); 4015 return(XML_ATTRIBUTE_NOTATION); 4016 } 4017 *tree = xmlParseEnumerationType(ctxt); 4018 if (*tree == NULL) return(0); 4019 return(XML_ATTRIBUTE_ENUMERATION); 4020} 4021 4022/** 4023 * xmlParseAttributeType: 4024 * @ctxt: an XML parser context 4025 * @tree: the enumeration tree built while parsing 4026 * 4027 * parse the Attribute list def for an element 4028 * 4029 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 4030 * 4031 * [55] StringType ::= 'CDATA' 4032 * 4033 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 4034 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 4035 * 4036 * Validity constraints for attribute values syntax are checked in 4037 * xmlValidateAttributeValue() 4038 * 4039 * [ VC: ID ] 4040 * Values of type ID must match the Name production. A name must not 4041 * appear more than once in an XML document as a value of this type; 4042 * i.e., ID values must uniquely identify the elements which bear them. 4043 * 4044 * [ VC: One ID per Element Type ] 4045 * No element type may have more than one ID attribute specified. 4046 * 4047 * [ VC: ID Attribute Default ] 4048 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 4049 * 4050 * [ VC: IDREF ] 4051 * Values of type IDREF must match the Name production, and values 4052 * of type IDREFS must match Names; each IDREF Name must match the value 4053 * of an ID attribute on some element in the XML document; i.e. IDREF 4054 * values must match the value of some ID attribute. 4055 * 4056 * [ VC: Entity Name ] 4057 * Values of type ENTITY must match the Name production, values 4058 * of type ENTITIES must match Names; each Entity Name must match the 4059 * name of an unparsed entity declared in the DTD. 4060 * 4061 * [ VC: Name Token ] 4062 * Values of type NMTOKEN must match the Nmtoken production; values 4063 * of type NMTOKENS must match Nmtokens. 4064 * 4065 * Returns the attribute type 4066 */ 4067int 4068xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4069 SHRINK; 4070 if ((CUR == 'C') && (NXT(1) == 'D') && 4071 (NXT(2) == 'A') && (NXT(3) == 'T') && 4072 (NXT(4) == 'A')) { 4073 SKIP(5); 4074 return(XML_ATTRIBUTE_CDATA); 4075 } else if ((CUR == 'I') && (NXT(1) == 'D') && 4076 (NXT(2) == 'R') && (NXT(3) == 'E') && 4077 (NXT(4) == 'F') && (NXT(5) == 'S')) { 4078 SKIP(6); 4079 return(XML_ATTRIBUTE_IDREFS); 4080 } else if ((CUR == 'I') && (NXT(1) == 'D') && 4081 (NXT(2) == 'R') && (NXT(3) == 'E') && 4082 (NXT(4) == 'F')) { 4083 SKIP(5); 4084 return(XML_ATTRIBUTE_IDREF); 4085 } else if ((CUR == 'I') && (NXT(1) == 'D')) { 4086 SKIP(2); 4087 return(XML_ATTRIBUTE_ID); 4088 } else if ((CUR == 'E') && (NXT(1) == 'N') && 4089 (NXT(2) == 'T') && (NXT(3) == 'I') && 4090 (NXT(4) == 'T') && (NXT(5) == 'Y')) { 4091 SKIP(6); 4092 return(XML_ATTRIBUTE_ENTITY); 4093 } else if ((CUR == 'E') && (NXT(1) == 'N') && 4094 (NXT(2) == 'T') && (NXT(3) == 'I') && 4095 (NXT(4) == 'T') && (NXT(5) == 'I') && 4096 (NXT(6) == 'E') && (NXT(7) == 'S')) { 4097 SKIP(8); 4098 return(XML_ATTRIBUTE_ENTITIES); 4099 } else if ((CUR == 'N') && (NXT(1) == 'M') && 4100 (NXT(2) == 'T') && (NXT(3) == 'O') && 4101 (NXT(4) == 'K') && (NXT(5) == 'E') && 4102 (NXT(6) == 'N') && (NXT(7) == 'S')) { 4103 SKIP(8); 4104 return(XML_ATTRIBUTE_NMTOKENS); 4105 } else if ((CUR == 'N') && (NXT(1) == 'M') && 4106 (NXT(2) == 'T') && (NXT(3) == 'O') && 4107 (NXT(4) == 'K') && (NXT(5) == 'E') && 4108 (NXT(6) == 'N')) { 4109 SKIP(7); 4110 return(XML_ATTRIBUTE_NMTOKEN); 4111 } 4112 return(xmlParseEnumeratedType(ctxt, tree)); 4113} 4114 4115/** 4116 * xmlParseAttributeListDecl: 4117 * @ctxt: an XML parser context 4118 * 4119 * : parse the Attribute list def for an element 4120 * 4121 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 4122 * 4123 * [53] AttDef ::= S Name S AttType S DefaultDecl 4124 * 4125 */ 4126void 4127xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 4128 xmlChar *elemName; 4129 xmlChar *attrName; 4130 xmlEnumerationPtr tree; 4131 4132 if ((CUR == '<') && (NXT(1) == '!') && 4133 (NXT(2) == 'A') && (NXT(3) == 'T') && 4134 (NXT(4) == 'T') && (NXT(5) == 'L') && 4135 (NXT(6) == 'I') && (NXT(7) == 'S') && 4136 (NXT(8) == 'T')) { 4137 SKIP(9); 4138 if (!IS_BLANK(CUR)) { 4139 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4140 ctxt->sax->error(ctxt->userData, 4141 "Space required after '<!ATTLIST'\n"); 4142 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4143 ctxt->wellFormed = 0; 4144 } 4145 SKIP_BLANKS; 4146 elemName = xmlParseName(ctxt); 4147 if (elemName == NULL) { 4148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4149 ctxt->sax->error(ctxt->userData, 4150 "ATTLIST: no name for Element\n"); 4151 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4152 ctxt->wellFormed = 0; 4153 return; 4154 } 4155 SKIP_BLANKS; 4156 while (CUR != '>') { 4157 const xmlChar *check = CUR_PTR; 4158 int type; 4159 int def; 4160 xmlChar *defaultValue = NULL; 4161 4162 tree = NULL; 4163 attrName = xmlParseName(ctxt); 4164 if (attrName == NULL) { 4165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4166 ctxt->sax->error(ctxt->userData, 4167 "ATTLIST: no name for Attribute\n"); 4168 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4169 ctxt->wellFormed = 0; 4170 break; 4171 } 4172 GROW; 4173 if (!IS_BLANK(CUR)) { 4174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4175 ctxt->sax->error(ctxt->userData, 4176 "Space required after the attribute name\n"); 4177 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4178 ctxt->wellFormed = 0; 4179 break; 4180 } 4181 SKIP_BLANKS; 4182 4183 type = xmlParseAttributeType(ctxt, &tree); 4184 if (type <= 0) break; 4185 4186 GROW; 4187 if (!IS_BLANK(CUR)) { 4188 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4189 ctxt->sax->error(ctxt->userData, 4190 "Space required after the attribute type\n"); 4191 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4192 ctxt->wellFormed = 0; 4193 break; 4194 } 4195 SKIP_BLANKS; 4196 4197 def = xmlParseDefaultDecl(ctxt, &defaultValue); 4198 if (def <= 0) break; 4199 4200 GROW; 4201 if (CUR != '>') { 4202 if (!IS_BLANK(CUR)) { 4203 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4204 ctxt->sax->error(ctxt->userData, 4205 "Space required after the attribute default value\n"); 4206 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4207 ctxt->wellFormed = 0; 4208 break; 4209 } 4210 SKIP_BLANKS; 4211 } 4212 if (check == CUR_PTR) { 4213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4214 ctxt->sax->error(ctxt->userData, 4215 "xmlParseAttributeListDecl: detected internal error\n"); 4216 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 4217 break; 4218 } 4219 if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL)) 4220 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 4221 type, def, defaultValue, tree); 4222 if (attrName != NULL) 4223 xmlFree(attrName); 4224 if (defaultValue != NULL) 4225 xmlFree(defaultValue); 4226 GROW; 4227 } 4228 if (CUR == '>') 4229 NEXT; 4230 4231 xmlFree(elemName); 4232 } 4233} 4234 4235/** 4236 * xmlParseElementMixedContentDecl: 4237 * @ctxt: an XML parser context 4238 * 4239 * parse the declaration for a Mixed Element content 4240 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4241 * 4242 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 4243 * '(' S? '#PCDATA' S? ')' 4244 * 4245 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 4246 * 4247 * [ VC: No Duplicate Types ] 4248 * The same name must not appear more than once in a single 4249 * mixed-content declaration. 4250 * 4251 * returns: the list of the xmlElementContentPtr describing the element choices 4252 */ 4253xmlElementContentPtr 4254xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) { 4255 xmlElementContentPtr ret = NULL, cur = NULL, n; 4256 xmlChar *elem = NULL; 4257 4258 GROW; 4259 if ((CUR == '#') && (NXT(1) == 'P') && 4260 (NXT(2) == 'C') && (NXT(3) == 'D') && 4261 (NXT(4) == 'A') && (NXT(5) == 'T') && 4262 (NXT(6) == 'A')) { 4263 SKIP(7); 4264 SKIP_BLANKS; 4265 SHRINK; 4266 if (CUR == ')') { 4267 NEXT; 4268 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4269 if (CUR == '*') { 4270 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4271 NEXT; 4272 } 4273 return(ret); 4274 } 4275 if ((CUR == '(') || (CUR == '|')) { 4276 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); 4277 if (ret == NULL) return(NULL); 4278 } 4279 while (CUR == '|') { 4280 NEXT; 4281 if (elem == NULL) { 4282 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4283 if (ret == NULL) return(NULL); 4284 ret->c1 = cur; 4285 cur = ret; 4286 } else { 4287 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4288 if (n == NULL) return(NULL); 4289 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4290 cur->c2 = n; 4291 cur = n; 4292 xmlFree(elem); 4293 } 4294 SKIP_BLANKS; 4295 elem = xmlParseName(ctxt); 4296 if (elem == NULL) { 4297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4298 ctxt->sax->error(ctxt->userData, 4299 "xmlParseElementMixedContentDecl : Name expected\n"); 4300 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4301 ctxt->wellFormed = 0; 4302 xmlFreeElementContent(cur); 4303 return(NULL); 4304 } 4305 SKIP_BLANKS; 4306 GROW; 4307 } 4308 if ((CUR == ')') && (NXT(1) == '*')) { 4309 if (elem != NULL) { 4310 cur->c2 = xmlNewElementContent(elem, 4311 XML_ELEMENT_CONTENT_ELEMENT); 4312 xmlFree(elem); 4313 } 4314 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4315 SKIP(2); 4316 } else { 4317 if (elem != NULL) xmlFree(elem); 4318 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4319 ctxt->sax->error(ctxt->userData, 4320 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n"); 4321 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED; 4322 ctxt->wellFormed = 0; 4323 xmlFreeElementContent(ret); 4324 return(NULL); 4325 } 4326 4327 } else { 4328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4329 ctxt->sax->error(ctxt->userData, 4330 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n"); 4331 ctxt->errNo = XML_ERR_PCDATA_REQUIRED; 4332 ctxt->wellFormed = 0; 4333 } 4334 return(ret); 4335} 4336 4337/** 4338 * xmlParseElementChildrenContentDecl: 4339 * @ctxt: an XML parser context 4340 * 4341 * parse the declaration for a Mixed Element content 4342 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 4343 * 4344 * 4345 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 4346 * 4347 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 4348 * 4349 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 4350 * 4351 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 4352 * 4353 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 4354 * TODO Parameter-entity replacement text must be properly nested 4355 * with parenthetized groups. That is to say, if either of the 4356 * opening or closing parentheses in a choice, seq, or Mixed 4357 * construct is contained in the replacement text for a parameter 4358 * entity, both must be contained in the same replacement text. For 4359 * interoperability, if a parameter-entity reference appears in a 4360 * choice, seq, or Mixed construct, its replacement text should not 4361 * be empty, and neither the first nor last non-blank character of 4362 * the replacement text should be a connector (| or ,). 4363 * 4364 * returns: the tree of xmlElementContentPtr describing the element 4365 * hierarchy. 4366 */ 4367xmlElementContentPtr 4368xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) { 4369 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 4370 xmlChar *elem; 4371 xmlChar type = 0; 4372 4373 SKIP_BLANKS; 4374 GROW; 4375 if (CUR == '(') { 4376 /* Recurse on first child */ 4377 NEXT; 4378 SKIP_BLANKS; 4379 cur = ret = xmlParseElementChildrenContentDecl(ctxt); 4380 SKIP_BLANKS; 4381 GROW; 4382 } else { 4383 elem = xmlParseName(ctxt); 4384 if (elem == NULL) { 4385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4386 ctxt->sax->error(ctxt->userData, 4387 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4388 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4389 ctxt->wellFormed = 0; 4390 return(NULL); 4391 } 4392 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4393 GROW; 4394 if (CUR == '?') { 4395 cur->ocur = XML_ELEMENT_CONTENT_OPT; 4396 NEXT; 4397 } else if (CUR == '*') { 4398 cur->ocur = XML_ELEMENT_CONTENT_MULT; 4399 NEXT; 4400 } else if (CUR == '+') { 4401 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 4402 NEXT; 4403 } else { 4404 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 4405 } 4406 xmlFree(elem); 4407 GROW; 4408 } 4409 SKIP_BLANKS; 4410 SHRINK; 4411 while (CUR != ')') { 4412 /* 4413 * Each loop we parse one separator and one element. 4414 */ 4415 if (CUR == ',') { 4416 if (type == 0) type = CUR; 4417 4418 /* 4419 * Detect "Name | Name , Name" error 4420 */ 4421 else if (type != CUR) { 4422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4423 ctxt->sax->error(ctxt->userData, 4424 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4425 type); 4426 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4427 ctxt->wellFormed = 0; 4428 xmlFreeElementContent(ret); 4429 return(NULL); 4430 } 4431 NEXT; 4432 4433 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ); 4434 if (op == NULL) { 4435 xmlFreeElementContent(ret); 4436 return(NULL); 4437 } 4438 if (last == NULL) { 4439 op->c1 = ret; 4440 ret = cur = op; 4441 } else { 4442 cur->c2 = op; 4443 op->c1 = last; 4444 cur =op; 4445 last = NULL; 4446 } 4447 } else if (CUR == '|') { 4448 if (type == 0) type = CUR; 4449 4450 /* 4451 * Detect "Name , Name | Name" error 4452 */ 4453 else if (type != CUR) { 4454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4455 ctxt->sax->error(ctxt->userData, 4456 "xmlParseElementChildrenContentDecl : '%c' expected\n", 4457 type); 4458 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED; 4459 ctxt->wellFormed = 0; 4460 xmlFreeElementContent(ret); 4461 return(NULL); 4462 } 4463 NEXT; 4464 4465 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); 4466 if (op == NULL) { 4467 xmlFreeElementContent(ret); 4468 return(NULL); 4469 } 4470 if (last == NULL) { 4471 op->c1 = ret; 4472 ret = cur = op; 4473 } else { 4474 cur->c2 = op; 4475 op->c1 = last; 4476 cur =op; 4477 last = NULL; 4478 } 4479 } else { 4480 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4481 ctxt->sax->error(ctxt->userData, 4482 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n"); 4483 ctxt->wellFormed = 0; 4484 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED; 4485 xmlFreeElementContent(ret); 4486 return(NULL); 4487 } 4488 GROW; 4489 SKIP_BLANKS; 4490 GROW; 4491 if (CUR == '(') { 4492 /* Recurse on second child */ 4493 NEXT; 4494 SKIP_BLANKS; 4495 last = xmlParseElementChildrenContentDecl(ctxt); 4496 SKIP_BLANKS; 4497 } else { 4498 elem = xmlParseName(ctxt); 4499 if (elem == NULL) { 4500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4501 ctxt->sax->error(ctxt->userData, 4502 "xmlParseElementChildrenContentDecl : Name or '(' expected\n"); 4503 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4504 ctxt->wellFormed = 0; 4505 return(NULL); 4506 } 4507 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); 4508 xmlFree(elem); 4509 if (CUR == '?') { 4510 last->ocur = XML_ELEMENT_CONTENT_OPT; 4511 NEXT; 4512 } else if (CUR == '*') { 4513 last->ocur = XML_ELEMENT_CONTENT_MULT; 4514 NEXT; 4515 } else if (CUR == '+') { 4516 last->ocur = XML_ELEMENT_CONTENT_PLUS; 4517 NEXT; 4518 } else { 4519 last->ocur = XML_ELEMENT_CONTENT_ONCE; 4520 } 4521 } 4522 SKIP_BLANKS; 4523 GROW; 4524 } 4525 if ((cur != NULL) && (last != NULL)) { 4526 cur->c2 = last; 4527 } 4528 NEXT; 4529 if (CUR == '?') { 4530 ret->ocur = XML_ELEMENT_CONTENT_OPT; 4531 NEXT; 4532 } else if (CUR == '*') { 4533 ret->ocur = XML_ELEMENT_CONTENT_MULT; 4534 NEXT; 4535 } else if (CUR == '+') { 4536 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 4537 NEXT; 4538 } 4539 return(ret); 4540} 4541 4542/** 4543 * xmlParseElementContentDecl: 4544 * @ctxt: an XML parser context 4545 * @name: the name of the element being defined. 4546 * @result: the Element Content pointer will be stored here if any 4547 * 4548 * parse the declaration for an Element content either Mixed or Children, 4549 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 4550 * 4551 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 4552 * 4553 * returns: the type of element content XML_ELEMENT_TYPE_xxx 4554 */ 4555 4556int 4557xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name, 4558 xmlElementContentPtr *result) { 4559 4560 xmlElementContentPtr tree = NULL; 4561 int res; 4562 4563 *result = NULL; 4564 4565 if (CUR != '(') { 4566 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4567 ctxt->sax->error(ctxt->userData, 4568 "xmlParseElementContentDecl : '(' expected\n"); 4569 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4570 ctxt->wellFormed = 0; 4571 return(-1); 4572 } 4573 NEXT; 4574 GROW; 4575 SKIP_BLANKS; 4576 if ((CUR == '#') && (NXT(1) == 'P') && 4577 (NXT(2) == 'C') && (NXT(3) == 'D') && 4578 (NXT(4) == 'A') && (NXT(5) == 'T') && 4579 (NXT(6) == 'A')) { 4580 tree = xmlParseElementMixedContentDecl(ctxt); 4581 res = XML_ELEMENT_TYPE_MIXED; 4582 } else { 4583 tree = xmlParseElementChildrenContentDecl(ctxt); 4584 res = XML_ELEMENT_TYPE_ELEMENT; 4585 } 4586 SKIP_BLANKS; 4587 /**************************** 4588 if (CUR != ')') { 4589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4590 ctxt->sax->error(ctxt->userData, 4591 "xmlParseElementContentDecl : ')' expected\n"); 4592 ctxt->wellFormed = 0; 4593 return(-1); 4594 } 4595 ****************************/ 4596 *result = tree; 4597 return(res); 4598} 4599 4600/** 4601 * xmlParseElementDecl: 4602 * @ctxt: an XML parser context 4603 * 4604 * parse an Element declaration. 4605 * 4606 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 4607 * 4608 * [ VC: Unique Element Type Declaration ] 4609 * No element type may be declared more than once 4610 * 4611 * Returns the type of the element, or -1 in case of error 4612 */ 4613int 4614xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 4615 xmlChar *name; 4616 int ret = -1; 4617 xmlElementContentPtr content = NULL; 4618 4619 GROW; 4620 if ((CUR == '<') && (NXT(1) == '!') && 4621 (NXT(2) == 'E') && (NXT(3) == 'L') && 4622 (NXT(4) == 'E') && (NXT(5) == 'M') && 4623 (NXT(6) == 'E') && (NXT(7) == 'N') && 4624 (NXT(8) == 'T')) { 4625 SKIP(9); 4626 if (!IS_BLANK(CUR)) { 4627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4628 ctxt->sax->error(ctxt->userData, 4629 "Space required after 'ELEMENT'\n"); 4630 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4631 ctxt->wellFormed = 0; 4632 } 4633 SKIP_BLANKS; 4634 name = xmlParseName(ctxt); 4635 if (name == NULL) { 4636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4637 ctxt->sax->error(ctxt->userData, 4638 "xmlParseElementDecl: no name for Element\n"); 4639 ctxt->errNo = XML_ERR_NAME_REQUIRED; 4640 ctxt->wellFormed = 0; 4641 return(-1); 4642 } 4643 if (!IS_BLANK(CUR)) { 4644 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4645 ctxt->sax->error(ctxt->userData, 4646 "Space required after the element name\n"); 4647 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4648 ctxt->wellFormed = 0; 4649 } 4650 SKIP_BLANKS; 4651 if ((CUR == 'E') && (NXT(1) == 'M') && 4652 (NXT(2) == 'P') && (NXT(3) == 'T') && 4653 (NXT(4) == 'Y')) { 4654 SKIP(5); 4655 /* 4656 * Element must always be empty. 4657 */ 4658 ret = XML_ELEMENT_TYPE_EMPTY; 4659 } else if ((CUR == 'A') && (NXT(1) == 'N') && 4660 (NXT(2) == 'Y')) { 4661 SKIP(3); 4662 /* 4663 * Element is a generic container. 4664 */ 4665 ret = XML_ELEMENT_TYPE_ANY; 4666 } else if (CUR == '(') { 4667 ret = xmlParseElementContentDecl(ctxt, name, &content); 4668 } else { 4669 /* 4670 * [ WFC: PEs in Internal Subset ] error handling. 4671 */ 4672 if ((CUR == '%') && (ctxt->external == 0) && 4673 (ctxt->inputNr == 1)) { 4674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4675 ctxt->sax->error(ctxt->userData, 4676 "PEReference: forbidden within markup decl in internal subset\n"); 4677 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET; 4678 } else { 4679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4680 ctxt->sax->error(ctxt->userData, 4681 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 4682 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED; 4683 } 4684 ctxt->wellFormed = 0; 4685 if (name != NULL) xmlFree(name); 4686 return(-1); 4687 } 4688 4689 SKIP_BLANKS; 4690 /* 4691 * Pop-up of finished entities. 4692 */ 4693 while ((CUR == 0) && (ctxt->inputNr > 1)) 4694 xmlPopInput(ctxt); 4695 SKIP_BLANKS; 4696 4697 if (CUR != '>') { 4698 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4699 ctxt->sax->error(ctxt->userData, 4700 "xmlParseElementDecl: expected '>' at the end\n"); 4701 ctxt->errNo = XML_ERR_GT_REQUIRED; 4702 ctxt->wellFormed = 0; 4703 } else { 4704 NEXT; 4705 if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL)) 4706 ctxt->sax->elementDecl(ctxt->userData, name, ret, 4707 content); 4708 } 4709 if (content != NULL) { 4710 xmlFreeElementContent(content); 4711 } 4712 if (name != NULL) { 4713 xmlFree(name); 4714 } 4715 } 4716 return(ret); 4717} 4718 4719/** 4720 * xmlParseMarkupDecl: 4721 * @ctxt: an XML parser context 4722 * 4723 * parse Markup declarations 4724 * 4725 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 4726 * NotationDecl | PI | Comment 4727 * 4728 * [ VC: Proper Declaration/PE Nesting ] 4729 * TODO Parameter-entity replacement text must be properly nested with 4730 * markup declarations. That is to say, if either the first character 4731 * or the last character of a markup declaration (markupdecl above) is 4732 * contained in the replacement text for a parameter-entity reference, 4733 * both must be contained in the same replacement text. 4734 * 4735 * [ WFC: PEs in Internal Subset ] 4736 * In the internal DTD subset, parameter-entity references can occur 4737 * only where markup declarations can occur, not within markup declarations. 4738 * (This does not apply to references that occur in external parameter 4739 * entities or to the external subset.) 4740 */ 4741void 4742xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 4743 GROW; 4744 xmlParseElementDecl(ctxt); 4745 xmlParseAttributeListDecl(ctxt); 4746 xmlParseEntityDecl(ctxt); 4747 xmlParseNotationDecl(ctxt); 4748 xmlParsePI(ctxt); 4749 xmlParseComment(ctxt); 4750 /* 4751 * This is only for internal subset. On external entities, 4752 * the replacement is done before parsing stage 4753 */ 4754 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 4755 xmlParsePEReference(ctxt); 4756 ctxt->instate = XML_PARSER_DTD; 4757} 4758 4759/** 4760 * xmlParseTextDecl: 4761 * @ctxt: an XML parser context 4762 * 4763 * parse an XML declaration header for external entities 4764 * 4765 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 4766 * 4767 * Returns the only valuable info for an external parsed entity, the encoding 4768 */ 4769 4770xmlChar * 4771xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 4772 xmlChar *version; 4773 xmlChar *encoding = NULL; 4774 4775 /* 4776 * We know that '<?xml' is here. 4777 */ 4778 SKIP(5); 4779 4780 if (!IS_BLANK(CUR)) { 4781 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4782 ctxt->sax->error(ctxt->userData, 4783 "Space needed after '<?xml'\n"); 4784 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4785 ctxt->wellFormed = 0; 4786 } 4787 SKIP_BLANKS; 4788 4789 /* 4790 * We may have the VersionInfo here. 4791 */ 4792 version = xmlParseVersionInfo(ctxt); 4793 if (version == NULL) 4794 version = xmlCharStrdup(XML_DEFAULT_VERSION); 4795 ctxt->version = xmlStrdup(version); 4796 xmlFree(version); 4797 4798 /* 4799 * We must have the encoding declaration 4800 */ 4801 if (!IS_BLANK(CUR)) { 4802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4803 ctxt->sax->error(ctxt->userData, "Space needed here\n"); 4804 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 4805 ctxt->wellFormed = 0; 4806 } 4807 encoding = xmlParseEncodingDecl(ctxt); 4808 4809 SKIP_BLANKS; 4810 if ((CUR == '?') && (NXT(1) == '>')) { 4811 SKIP(2); 4812 } else if (CUR == '>') { 4813 /* Deprecated old WD ... */ 4814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4815 ctxt->sax->error(ctxt->userData, 4816 "XML declaration must end-up with '?>'\n"); 4817 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 4818 ctxt->wellFormed = 0; 4819 NEXT; 4820 } else { 4821 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4822 ctxt->sax->error(ctxt->userData, 4823 "parsing XML declaration: '?>' expected\n"); 4824 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 4825 ctxt->wellFormed = 0; 4826 MOVETO_ENDTAG(CUR_PTR); 4827 NEXT; 4828 } 4829 return(encoding); 4830} 4831 4832/* 4833 * xmlParseConditionalSections 4834 * @ctxt: an XML parser context 4835 * 4836 * TODO : Conditionnal section are not yet supported ! 4837 * 4838 * [61] conditionalSect ::= includeSect | ignoreSect 4839 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 4840 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 4841 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 4842 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 4843 */ 4844 4845void 4846xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 4847 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 4848 ctxt->sax->warning(ctxt->userData, 4849 "XML conditional section not supported\n"); 4850 /* 4851 * Skip up to the end of the conditionnal section. 4852 */ 4853 while ((CUR != 0) && ((CUR != ']') || (NXT(1) != ']') || (NXT(2) != '>'))) { 4854 NEXT; 4855 /* 4856 * Pop-up of finished entities. 4857 */ 4858 while ((CUR == 0) && (ctxt->inputNr > 1)) 4859 xmlPopInput(ctxt); 4860 4861 if (CUR == 0) 4862 GROW; 4863 } 4864 4865 if (CUR == 0) 4866 SHRINK; 4867 4868 if (CUR == 0) { 4869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4870 ctxt->sax->error(ctxt->userData, 4871 "XML conditional section not closed\n"); 4872 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED; 4873 ctxt->wellFormed = 0; 4874 } else { 4875 SKIP(3); 4876 } 4877} 4878 4879/** 4880 * xmlParseExternalSubset: 4881 * @ctxt: an XML parser context 4882 * @ExternalID: the external identifier 4883 * @SystemID: the system identifier (or URL) 4884 * 4885 * parse Markup declarations from an external subset 4886 * 4887 * [30] extSubset ::= textDecl? extSubsetDecl 4888 * 4889 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 4890 */ 4891void 4892xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 4893 const xmlChar *SystemID) { 4894 GROW; 4895 if ((CUR == '<') && (NXT(1) == '?') && 4896 (NXT(2) == 'x') && (NXT(3) == 'm') && 4897 (NXT(4) == 'l')) { 4898 xmlChar *decl; 4899 4900 decl = xmlParseTextDecl(ctxt); 4901 if (decl != NULL) 4902 xmlFree(decl); 4903 } 4904 if (ctxt->myDoc == NULL) { 4905 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 4906 } 4907 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 4908 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 4909 4910 ctxt->instate = XML_PARSER_DTD; 4911 ctxt->external = 1; 4912 while (((CUR == '<') && (NXT(1) == '?')) || 4913 ((CUR == '<') && (NXT(1) == '!')) || 4914 IS_BLANK(CUR)) { 4915 const xmlChar *check = CUR_PTR; 4916 int cons = ctxt->input->consumed; 4917 4918 if ((CUR == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 4919 xmlParseConditionalSections(ctxt); 4920 } else if (IS_BLANK(CUR)) { 4921 NEXT; 4922 } else if (CUR == '%') { 4923 xmlParsePEReference(ctxt); 4924 } else 4925 xmlParseMarkupDecl(ctxt); 4926 4927 /* 4928 * Pop-up of finished entities. 4929 */ 4930 while ((CUR == 0) && (ctxt->inputNr > 1)) 4931 xmlPopInput(ctxt); 4932 4933 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 4934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4935 ctxt->sax->error(ctxt->userData, 4936 "Content error in the external subset\n"); 4937 ctxt->wellFormed = 0; 4938 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4939 break; 4940 } 4941 } 4942 4943 if (CUR != 0) { 4944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 4945 ctxt->sax->error(ctxt->userData, 4946 "Extra content at the end of the document\n"); 4947 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; 4948 ctxt->wellFormed = 0; 4949 } 4950 4951} 4952 4953/** 4954 * xmlParseReference: 4955 * @ctxt: an XML parser context 4956 * 4957 * parse and handle entity references in content, depending on the SAX 4958 * interface, this may end-up in a call to character() if this is a 4959 * CharRef, a predefined entity, if there is no reference() callback. 4960 * or if the parser was asked to switch to that mode. 4961 * 4962 * [67] Reference ::= EntityRef | CharRef 4963 */ 4964void 4965xmlParseReference(xmlParserCtxtPtr ctxt) { 4966 xmlEntityPtr ent; 4967 xmlChar *val; 4968 if (CUR != '&') return; 4969 4970 if (ctxt->inputNr > 1) { 4971 xmlChar cur[2] = { '&' , 0 } ; 4972 4973 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) 4974 ctxt->sax->characters(ctxt->userData, cur, 1); 4975 if (ctxt->token == '&') 4976 ctxt->token = 0; 4977 else { 4978 SKIP(1); 4979 } 4980 return; 4981 } 4982 if (NXT(1) == '#') { 4983 xmlChar out[2]; 4984 int val = xmlParseCharRef(ctxt); 4985 /* invalid for UTF-8 variable encoding !!!!! */ 4986 out[0] = val; 4987 out[1] = 0; 4988 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) 4989 ctxt->sax->characters(ctxt->userData, out, 1); 4990 } else { 4991 ent = xmlParseEntityRef(ctxt); 4992 if (ent == NULL) return; 4993 if ((ent->name != NULL) && 4994 (ent->type != XML_INTERNAL_PREDEFINED_ENTITY)) { 4995 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 4996 (ctxt->replaceEntities == 0)) { 4997 /* 4998 * Create a node. 4999 */ 5000 ctxt->sax->reference(ctxt->userData, ent->name); 5001 return; 5002 } else if (ctxt->replaceEntities) { 5003 xmlParserInputPtr input; 5004 5005 input = xmlNewEntityInputStream(ctxt, ent); 5006 xmlPushInput(ctxt, input); 5007 return; 5008 } 5009 } 5010 val = ent->content; 5011 if (val == NULL) return; 5012 /* 5013 * inline the entity. 5014 */ 5015 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) 5016 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 5017 } 5018} 5019 5020/** 5021 * xmlParseEntityRef: 5022 * @ctxt: an XML parser context 5023 * 5024 * parse ENTITY references declarations 5025 * 5026 * [68] EntityRef ::= '&' Name ';' 5027 * 5028 * [ WFC: Entity Declared ] 5029 * In a document without any DTD, a document with only an internal DTD 5030 * subset which contains no parameter entity references, or a document 5031 * with "standalone='yes'", the Name given in the entity reference 5032 * must match that in an entity declaration, except that well-formed 5033 * documents need not declare any of the following entities: amp, lt, 5034 * gt, apos, quot. The declaration of a parameter entity must precede 5035 * any reference to it. Similarly, the declaration of a general entity 5036 * must precede any reference to it which appears in a default value in an 5037 * attribute-list declaration. Note that if entities are declared in the 5038 * external subset or in external parameter entities, a non-validating 5039 * processor is not obligated to read and process their declarations; 5040 * for such documents, the rule that an entity must be declared is a 5041 * well-formedness constraint only if standalone='yes'. 5042 * 5043 * [ WFC: Parsed Entity ] 5044 * An entity reference must not contain the name of an unparsed entity 5045 * 5046 * Returns the xmlEntityPtr if found, or NULL otherwise. 5047 */ 5048xmlEntityPtr 5049xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 5050 xmlChar *name; 5051 xmlEntityPtr ent = NULL; 5052 5053 GROW; 5054 5055 if (CUR == '&') { 5056 NEXT; 5057 name = xmlParseName(ctxt); 5058 if (name == NULL) { 5059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5060 ctxt->sax->error(ctxt->userData, 5061 "xmlParseEntityRef: no name\n"); 5062 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5063 ctxt->wellFormed = 0; 5064 } else { 5065 if (CUR == ';') { 5066 NEXT; 5067 /* 5068 * Ask first SAX for entity resolution, otherwise try the 5069 * predefined set. 5070 */ 5071 if (ctxt->sax != NULL) { 5072 if (ctxt->sax->getEntity != NULL) 5073 ent = ctxt->sax->getEntity(ctxt->userData, name); 5074 if (ent == NULL) 5075 ent = xmlGetPredefinedEntity(name); 5076 } 5077 /* 5078 * [ WFC: Entity Declared ] 5079 * In a document without any DTD, a document with only an 5080 * internal DTD subset which contains no parameter entity 5081 * references, or a document with "standalone='yes'", the 5082 * Name given in the entity reference must match that in an 5083 * entity declaration, except that well-formed documents 5084 * need not declare any of the following entities: amp, lt, 5085 * gt, apos, quot. 5086 * The declaration of a parameter entity must precede any 5087 * reference to it. 5088 * Similarly, the declaration of a general entity must 5089 * precede any reference to it which appears in a default 5090 * value in an attribute-list declaration. Note that if 5091 * entities are declared in the external subset or in 5092 * external parameter entities, a non-validating processor 5093 * is not obligated to read and process their declarations; 5094 * for such documents, the rule that an entity must be 5095 * declared is a well-formedness constraint only if 5096 * standalone='yes'. 5097 */ 5098 if (ent == NULL) { 5099 if ((ctxt->standalone == 1) || 5100 ((ctxt->hasExternalSubset == 0) && 5101 (ctxt->hasPErefs == 0))) { 5102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5103 ctxt->sax->error(ctxt->userData, 5104 "Entity '%s' not defined\n", name); 5105 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5106 ctxt->wellFormed = 0; 5107 } else { 5108 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5109 ctxt->sax->warning(ctxt->userData, 5110 "Entity '%s' not defined\n", name); 5111 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5112 } 5113 } 5114 5115 /* 5116 * [ WFC: Parsed Entity ] 5117 * An entity reference must not contain the name of an 5118 * unparsed entity 5119 */ 5120 else if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5122 ctxt->sax->error(ctxt->userData, 5123 "Entity reference to unparsed entity %s\n", name); 5124 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5125 ctxt->wellFormed = 0; 5126 } 5127 5128 /* 5129 * [ WFC: No External Entity References ] 5130 * Attribute values cannot contain direct or indirect 5131 * entity references to external entities. 5132 */ 5133 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5134 (ent->type == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5136 ctxt->sax->error(ctxt->userData, 5137 "Attribute references external entity '%s'\n", name); 5138 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5139 ctxt->wellFormed = 0; 5140 } 5141 /* 5142 * [ WFC: No < in Attribute Values ] 5143 * The replacement text of any entity referred to directly or 5144 * indirectly in an attribute value (other than "<") must 5145 * not contain a <. 5146 */ 5147 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5148 (ent != NULL) && 5149 (xmlStrcmp(ent->name, BAD_CAST "lt")) && 5150 (ent->content != NULL) && 5151 (xmlStrchr(ent->content, '<'))) { 5152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5153 ctxt->sax->error(ctxt->userData, 5154 "'<' in entity '%s' is not allowed in attributes values\n", name); 5155 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5156 ctxt->wellFormed = 0; 5157 } 5158 5159 /* 5160 * Internal check, no parameter entities here ... 5161 */ 5162 else { 5163 switch (ent->type) { 5164 case XML_INTERNAL_PARAMETER_ENTITY: 5165 case XML_EXTERNAL_PARAMETER_ENTITY: 5166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5167 ctxt->sax->error(ctxt->userData, 5168 "Attempt to reference the parameter entity '%s'\n", name); 5169 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5170 ctxt->wellFormed = 0; 5171 break; 5172 } 5173 } 5174 5175 /* 5176 * [ WFC: No Recursion ] 5177 * TODO A parsed entity must not contain a recursive reference 5178 * to itself, either directly or indirectly. 5179 */ 5180 5181 } else { 5182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5183 ctxt->sax->error(ctxt->userData, 5184 "xmlParseEntityRef: expecting ';'\n"); 5185 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5186 ctxt->wellFormed = 0; 5187 } 5188 xmlFree(name); 5189 } 5190 } 5191 return(ent); 5192} 5193/** 5194 * xmlParseStringEntityRef: 5195 * @ctxt: an XML parser context 5196 * @str: a pointer to an index in the string 5197 * 5198 * parse ENTITY references declarations, but this version parses it from 5199 * a string value. 5200 * 5201 * [68] EntityRef ::= '&' Name ';' 5202 * 5203 * [ WFC: Entity Declared ] 5204 * In a document without any DTD, a document with only an internal DTD 5205 * subset which contains no parameter entity references, or a document 5206 * with "standalone='yes'", the Name given in the entity reference 5207 * must match that in an entity declaration, except that well-formed 5208 * documents need not declare any of the following entities: amp, lt, 5209 * gt, apos, quot. The declaration of a parameter entity must precede 5210 * any reference to it. Similarly, the declaration of a general entity 5211 * must precede any reference to it which appears in a default value in an 5212 * attribute-list declaration. Note that if entities are declared in the 5213 * external subset or in external parameter entities, a non-validating 5214 * processor is not obligated to read and process their declarations; 5215 * for such documents, the rule that an entity must be declared is a 5216 * well-formedness constraint only if standalone='yes'. 5217 * 5218 * [ WFC: Parsed Entity ] 5219 * An entity reference must not contain the name of an unparsed entity 5220 * 5221 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 5222 * is updated to the current location in the string. 5223 */ 5224xmlEntityPtr 5225xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 5226 xmlChar *name; 5227 const xmlChar *ptr; 5228 xmlChar cur; 5229 xmlEntityPtr ent = NULL; 5230 5231 GROW; 5232 5233 if ((str == NULL) || (*str == NULL)) return(NULL); /* !!! */ 5234 ptr = *str; 5235 cur = *ptr; 5236 if (cur == '&') { 5237 ptr++; 5238 cur = *ptr; 5239 name = xmlParseStringName(ctxt, &ptr); 5240 if (name == NULL) { 5241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5242 ctxt->sax->error(ctxt->userData, 5243 "xmlParseEntityRef: no name\n"); 5244 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5245 ctxt->wellFormed = 0; 5246 } else { 5247 if (CUR == ';') { 5248 NEXT; 5249 /* 5250 * Ask first SAX for entity resolution, otherwise try the 5251 * predefined set. 5252 */ 5253 if (ctxt->sax != NULL) { 5254 if (ctxt->sax->getEntity != NULL) 5255 ent = ctxt->sax->getEntity(ctxt->userData, name); 5256 if (ent == NULL) 5257 ent = xmlGetPredefinedEntity(name); 5258 } 5259 /* 5260 * [ WFC: Entity Declared ] 5261 * In a document without any DTD, a document with only an 5262 * internal DTD subset which contains no parameter entity 5263 * references, or a document with "standalone='yes'", the 5264 * Name given in the entity reference must match that in an 5265 * entity declaration, except that well-formed documents 5266 * need not declare any of the following entities: amp, lt, 5267 * gt, apos, quot. 5268 * The declaration of a parameter entity must precede any 5269 * reference to it. 5270 * Similarly, the declaration of a general entity must 5271 * precede any reference to it which appears in a default 5272 * value in an attribute-list declaration. Note that if 5273 * entities are declared in the external subset or in 5274 * external parameter entities, a non-validating processor 5275 * is not obligated to read and process their declarations; 5276 * for such documents, the rule that an entity must be 5277 * declared is a well-formedness constraint only if 5278 * standalone='yes'. 5279 */ 5280 if (ent == NULL) { 5281 if ((ctxt->standalone == 1) || 5282 ((ctxt->hasExternalSubset == 0) && 5283 (ctxt->hasPErefs == 0))) { 5284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5285 ctxt->sax->error(ctxt->userData, 5286 "Entity '%s' not defined\n", name); 5287 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5288 ctxt->wellFormed = 0; 5289 } else { 5290 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5291 ctxt->sax->warning(ctxt->userData, 5292 "Entity '%s' not defined\n", name); 5293 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY; 5294 } 5295 } 5296 5297 /* 5298 * [ WFC: Parsed Entity ] 5299 * An entity reference must not contain the name of an 5300 * unparsed entity 5301 */ 5302 else if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 5303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5304 ctxt->sax->error(ctxt->userData, 5305 "Entity reference to unparsed entity %s\n", name); 5306 ctxt->errNo = XML_ERR_UNPARSED_ENTITY; 5307 ctxt->wellFormed = 0; 5308 } 5309 5310 /* 5311 * [ WFC: No External Entity References ] 5312 * Attribute values cannot contain direct or indirect 5313 * entity references to external entities. 5314 */ 5315 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5316 (ent->type == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 5317 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5318 ctxt->sax->error(ctxt->userData, 5319 "Attribute references external entity '%s'\n", name); 5320 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL; 5321 ctxt->wellFormed = 0; 5322 } 5323 /* 5324 * [ WFC: No < in Attribute Values ] 5325 * The replacement text of any entity referred to directly or 5326 * indirectly in an attribute value (other than "<") must 5327 * not contain a <. 5328 */ 5329 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 5330 (ent != NULL) && 5331 (xmlStrcmp(ent->name, BAD_CAST "lt")) && 5332 (ent->content != NULL) && 5333 (xmlStrchr(ent->content, '<'))) { 5334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5335 ctxt->sax->error(ctxt->userData, 5336 "'<' in entity '%s' is not allowed in attributes values\n", name); 5337 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; 5338 ctxt->wellFormed = 0; 5339 } 5340 5341 /* 5342 * Internal check, no parameter entities here ... 5343 */ 5344 else { 5345 switch (ent->type) { 5346 case XML_INTERNAL_PARAMETER_ENTITY: 5347 case XML_EXTERNAL_PARAMETER_ENTITY: 5348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5349 ctxt->sax->error(ctxt->userData, 5350 "Attempt to reference the parameter entity '%s'\n", name); 5351 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER; 5352 ctxt->wellFormed = 0; 5353 break; 5354 } 5355 } 5356 5357 /* 5358 * [ WFC: No Recursion ] 5359 * TODO A parsed entity must not contain a recursive reference 5360 * to itself, either directly or indirectly. 5361 */ 5362 5363 } else { 5364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5365 ctxt->sax->error(ctxt->userData, 5366 "xmlParseEntityRef: expecting ';'\n"); 5367 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5368 ctxt->wellFormed = 0; 5369 } 5370 xmlFree(name); 5371 } 5372 } 5373 return(ent); 5374} 5375 5376/** 5377 * xmlParsePEReference: 5378 * @ctxt: an XML parser context 5379 * 5380 * parse PEReference declarations 5381 * The entity content is handled directly by pushing it's content as 5382 * a new input stream. 5383 * 5384 * [69] PEReference ::= '%' Name ';' 5385 * 5386 * [ WFC: No Recursion ] 5387 * TODO A parsed entity must not contain a recursive 5388 * reference to itself, either directly or indirectly. 5389 * 5390 * [ WFC: Entity Declared ] 5391 * In a document without any DTD, a document with only an internal DTD 5392 * subset which contains no parameter entity references, or a document 5393 * with "standalone='yes'", ... ... The declaration of a parameter 5394 * entity must precede any reference to it... 5395 * 5396 * [ VC: Entity Declared ] 5397 * In a document with an external subset or external parameter entities 5398 * with "standalone='no'", ... ... The declaration of a parameter entity 5399 * must precede any reference to it... 5400 * 5401 * [ WFC: In DTD ] 5402 * Parameter-entity references may only appear in the DTD. 5403 * NOTE: misleading but this is handled. 5404 */ 5405void 5406xmlParsePEReference(xmlParserCtxtPtr ctxt) { 5407 xmlChar *name; 5408 xmlEntityPtr entity = NULL; 5409 xmlParserInputPtr input; 5410 5411 if (CUR == '%') { 5412 NEXT; 5413 name = xmlParseName(ctxt); 5414 if (name == NULL) { 5415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5416 ctxt->sax->error(ctxt->userData, 5417 "xmlParsePEReference: no name\n"); 5418 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5419 ctxt->wellFormed = 0; 5420 } else { 5421 if (CUR == ';') { 5422 NEXT; 5423 if ((ctxt->sax != NULL) && 5424 (ctxt->sax->getParameterEntity != NULL)) 5425 entity = ctxt->sax->getParameterEntity(ctxt->userData, 5426 name); 5427 if (entity == NULL) { 5428 /* 5429 * [ WFC: Entity Declared ] 5430 * In a document without any DTD, a document with only an 5431 * internal DTD subset which contains no parameter entity 5432 * references, or a document with "standalone='yes'", ... 5433 * ... The declaration of a parameter entity must precede 5434 * any reference to it... 5435 */ 5436 if ((ctxt->standalone == 1) || 5437 ((ctxt->hasExternalSubset == 0) && 5438 (ctxt->hasPErefs == 0))) { 5439 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5440 ctxt->sax->error(ctxt->userData, 5441 "PEReference: %%%s; not found\n", name); 5442 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5443 ctxt->wellFormed = 0; 5444 } else { 5445 /* 5446 * [ VC: Entity Declared ] 5447 * In a document with an external subset or external 5448 * parameter entities with "standalone='no'", ... 5449 * ... The declaration of a parameter entity must precede 5450 * any reference to it... 5451 */ 5452 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5453 ctxt->sax->warning(ctxt->userData, 5454 "PEReference: %%%s; not found\n", name); 5455 ctxt->valid = 0; 5456 } 5457 } else { 5458 /* 5459 * Internal checking in case the entity quest barfed 5460 */ 5461 if ((entity->type != XML_INTERNAL_PARAMETER_ENTITY) && 5462 (entity->type != XML_EXTERNAL_PARAMETER_ENTITY)) { 5463 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5464 ctxt->sax->warning(ctxt->userData, 5465 "Internal: %%%s; is not a parameter entity\n", name); 5466 } else { 5467 input = xmlNewEntityInputStream(ctxt, entity); 5468 xmlPushInput(ctxt, input); 5469 } 5470 } 5471 ctxt->hasPErefs = 1; 5472 } else { 5473 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5474 ctxt->sax->error(ctxt->userData, 5475 "xmlParsePEReference: expecting ';'\n"); 5476 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5477 ctxt->wellFormed = 0; 5478 } 5479 xmlFree(name); 5480 } 5481 } 5482} 5483 5484/** 5485 * xmlParseStringPEReference: 5486 * @ctxt: an XML parser context 5487 * @str: a pointer to an index in the string 5488 * 5489 * parse PEReference declarations 5490 * 5491 * [69] PEReference ::= '%' Name ';' 5492 * 5493 * [ WFC: No Recursion ] 5494 * TODO A parsed entity must not contain a recursive 5495 * reference to itself, either directly or indirectly. 5496 * 5497 * [ WFC: Entity Declared ] 5498 * In a document without any DTD, a document with only an internal DTD 5499 * subset which contains no parameter entity references, or a document 5500 * with "standalone='yes'", ... ... The declaration of a parameter 5501 * entity must precede any reference to it... 5502 * 5503 * [ VC: Entity Declared ] 5504 * In a document with an external subset or external parameter entities 5505 * with "standalone='no'", ... ... The declaration of a parameter entity 5506 * must precede any reference to it... 5507 * 5508 * [ WFC: In DTD ] 5509 * Parameter-entity references may only appear in the DTD. 5510 * NOTE: misleading but this is handled. 5511 * 5512 * Returns the string of the entity content. 5513 * str is updated to the current value of the index 5514 */ 5515xmlEntityPtr 5516xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 5517 const xmlChar *ptr; 5518 xmlChar cur; 5519 xmlChar *name; 5520 xmlEntityPtr entity = NULL; 5521 5522 if ((str == NULL) || (*str == NULL)) return(NULL); 5523 ptr = *str; 5524 cur = *ptr; 5525 if (cur == '%') { 5526 ptr++; 5527 cur = *ptr; 5528 name = xmlParseStringName(ctxt, &ptr); 5529 if (name == NULL) { 5530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5531 ctxt->sax->error(ctxt->userData, 5532 "xmlParseStringPEReference: no name\n"); 5533 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5534 ctxt->wellFormed = 0; 5535 } else { 5536 cur = *ptr; 5537 if (cur == ';') { 5538 ptr++; 5539 cur = *ptr; 5540 if ((ctxt->sax != NULL) && 5541 (ctxt->sax->getParameterEntity != NULL)) 5542 entity = ctxt->sax->getParameterEntity(ctxt->userData, 5543 name); 5544 if (entity == NULL) { 5545 /* 5546 * [ WFC: Entity Declared ] 5547 * In a document without any DTD, a document with only an 5548 * internal DTD subset which contains no parameter entity 5549 * references, or a document with "standalone='yes'", ... 5550 * ... The declaration of a parameter entity must precede 5551 * any reference to it... 5552 */ 5553 if ((ctxt->standalone == 1) || 5554 ((ctxt->hasExternalSubset == 0) && 5555 (ctxt->hasPErefs == 0))) { 5556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5557 ctxt->sax->error(ctxt->userData, 5558 "PEReference: %%%s; not found\n", name); 5559 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; 5560 ctxt->wellFormed = 0; 5561 } else { 5562 /* 5563 * [ VC: Entity Declared ] 5564 * In a document with an external subset or external 5565 * parameter entities with "standalone='no'", ... 5566 * ... The declaration of a parameter entity must 5567 * precede any reference to it... 5568 */ 5569 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5570 ctxt->sax->warning(ctxt->userData, 5571 "PEReference: %%%s; not found\n", name); 5572 ctxt->valid = 0; 5573 } 5574 } else { 5575 /* 5576 * Internal checking in case the entity quest barfed 5577 */ 5578 if ((entity->type != XML_INTERNAL_PARAMETER_ENTITY) && 5579 (entity->type != XML_EXTERNAL_PARAMETER_ENTITY)) { 5580 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 5581 ctxt->sax->warning(ctxt->userData, 5582 "Internal: %%%s; is not a parameter entity\n", name); 5583 } 5584 } 5585 ctxt->hasPErefs = 1; 5586 } else { 5587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5588 ctxt->sax->error(ctxt->userData, 5589 "xmlParseStringPEReference: expecting ';'\n"); 5590 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; 5591 ctxt->wellFormed = 0; 5592 } 5593 xmlFree(name); 5594 } 5595 } 5596 *str = ptr; 5597 return(entity); 5598} 5599 5600/** 5601 * xmlParseDocTypeDecl : 5602 * @ctxt: an XML parser context 5603 * 5604 * parse a DOCTYPE declaration 5605 * 5606 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 5607 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 5608 * 5609 * [ VC: Root Element Type ] 5610 * The Name in the document type declaration must match the element 5611 * type of the root element. 5612 */ 5613 5614void 5615xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 5616 xmlChar *name; 5617 xmlChar *ExternalID = NULL; 5618 xmlChar *URI = NULL; 5619 5620 /* 5621 * We know that '<!DOCTYPE' has been detected. 5622 */ 5623 SKIP(9); 5624 5625 SKIP_BLANKS; 5626 5627 /* 5628 * Parse the DOCTYPE name. 5629 */ 5630 name = xmlParseName(ctxt); 5631 if (name == NULL) { 5632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5633 ctxt->sax->error(ctxt->userData, 5634 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 5635 ctxt->wellFormed = 0; 5636 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5637 } 5638 5639 SKIP_BLANKS; 5640 5641 /* 5642 * Check for SystemID and ExternalID 5643 */ 5644 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 5645 5646 if ((URI != NULL) || (ExternalID != NULL)) { 5647 ctxt->hasExternalSubset = 1; 5648 } 5649 5650 SKIP_BLANKS; 5651 5652 /* 5653 * NOTE: the SAX callback may try to fetch the external subset 5654 * entity and fill it up ! 5655 */ 5656 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL)) 5657 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 5658 5659 /* 5660 * Cleanup 5661 */ 5662 if (URI != NULL) xmlFree(URI); 5663 if (ExternalID != NULL) xmlFree(ExternalID); 5664 if (name != NULL) xmlFree(name); 5665 5666 /* 5667 * Is there any internal subset declarations ? 5668 * they are handled separately in xmlParseInternalSubset() 5669 */ 5670 if (CUR == '[') 5671 return; 5672 5673 /* 5674 * We should be at the end of the DOCTYPE declaration. 5675 */ 5676 if (CUR != '>') { 5677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5678 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n"); 5679 ctxt->wellFormed = 0; 5680 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 5681 } 5682 NEXT; 5683} 5684 5685/** 5686 * xmlParseInternalsubset : 5687 * @ctxt: an XML parser context 5688 * 5689 * parse the internal subset declaration 5690 * 5691 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 5692 */ 5693 5694void 5695xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 5696 /* 5697 * Is there any DTD definition ? 5698 */ 5699 if (CUR == '[') { 5700 ctxt->instate = XML_PARSER_DTD; 5701 NEXT; 5702 /* 5703 * Parse the succession of Markup declarations and 5704 * PEReferences. 5705 * Subsequence (markupdecl | PEReference | S)* 5706 */ 5707 while (CUR != ']') { 5708 const xmlChar *check = CUR_PTR; 5709 int cons = ctxt->input->consumed; 5710 5711 SKIP_BLANKS; 5712 xmlParseMarkupDecl(ctxt); 5713 xmlParsePEReference(ctxt); 5714 5715 /* 5716 * Pop-up of finished entities. 5717 */ 5718 while ((CUR == 0) && (ctxt->inputNr > 1)) 5719 xmlPopInput(ctxt); 5720 5721 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5723 ctxt->sax->error(ctxt->userData, 5724 "xmlParseInternalSubset: error detected in Markup declaration\n"); 5725 ctxt->wellFormed = 0; 5726 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 5727 break; 5728 } 5729 } 5730 if (CUR == ']') NEXT; 5731 } 5732 5733 /* 5734 * We should be at the end of the DOCTYPE declaration. 5735 */ 5736 if (CUR != '>') { 5737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5738 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n"); 5739 ctxt->wellFormed = 0; 5740 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; 5741 } 5742 NEXT; 5743} 5744 5745/** 5746 * xmlParseAttribute: 5747 * @ctxt: an XML parser context 5748 * @value: a xmlChar ** used to store the value of the attribute 5749 * 5750 * parse an attribute 5751 * 5752 * [41] Attribute ::= Name Eq AttValue 5753 * 5754 * [ WFC: No External Entity References ] 5755 * Attribute values cannot contain direct or indirect entity references 5756 * to external entities. 5757 * 5758 * [ WFC: No < in Attribute Values ] 5759 * The replacement text of any entity referred to directly or indirectly in 5760 * an attribute value (other than "<") must not contain a <. 5761 * 5762 * [ VC: Attribute Value Type ] 5763 * The attribute must have been declared; the value must be of the type 5764 * declared for it. 5765 * 5766 * [25] Eq ::= S? '=' S? 5767 * 5768 * With namespace: 5769 * 5770 * [NS 11] Attribute ::= QName Eq AttValue 5771 * 5772 * Also the case QName == xmlns:??? is handled independently as a namespace 5773 * definition. 5774 * 5775 * Returns the attribute name, and the value in *value. 5776 */ 5777 5778xmlChar * 5779xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 5780 xmlChar *name, *val; 5781 5782 *value = NULL; 5783 name = xmlParseName(ctxt); 5784 if (name == NULL) { 5785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5786 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n"); 5787 ctxt->wellFormed = 0; 5788 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5789 return(NULL); 5790 } 5791 5792 /* 5793 * read the value 5794 */ 5795 SKIP_BLANKS; 5796 if (CUR == '=') { 5797 NEXT; 5798 SKIP_BLANKS; 5799 val = xmlParseAttValue(ctxt); 5800 ctxt->instate = XML_PARSER_CONTENT; 5801 } else { 5802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5803 ctxt->sax->error(ctxt->userData, 5804 "Specification mandate value for attribute %s\n", name); 5805 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; 5806 ctxt->wellFormed = 0; 5807 return(NULL); 5808 } 5809 5810 *value = val; 5811 return(name); 5812} 5813 5814/** 5815 * xmlParseStartTag: 5816 * @ctxt: an XML parser context 5817 * 5818 * parse a start of tag either for rule element or 5819 * EmptyElement. In both case we don't parse the tag closing chars. 5820 * 5821 * [40] STag ::= '<' Name (S Attribute)* S? '>' 5822 * 5823 * [ WFC: Unique Att Spec ] 5824 * No attribute name may appear more than once in the same start-tag or 5825 * empty-element tag. 5826 * 5827 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 5828 * 5829 * [ WFC: Unique Att Spec ] 5830 * No attribute name may appear more than once in the same start-tag or 5831 * empty-element tag. 5832 * 5833 * With namespace: 5834 * 5835 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 5836 * 5837 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 5838 * 5839 * Returne the element name parsed 5840 */ 5841 5842xmlChar * 5843xmlParseStartTag(xmlParserCtxtPtr ctxt) { 5844 xmlChar *name; 5845 xmlChar *attname; 5846 xmlChar *attvalue; 5847 const xmlChar **atts = NULL; 5848 int nbatts = 0; 5849 int maxatts = 0; 5850 int i; 5851 5852 if (CUR != '<') return(NULL); 5853 NEXT; 5854 5855 name = xmlParseName(ctxt); 5856 if (name == NULL) { 5857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5858 ctxt->sax->error(ctxt->userData, 5859 "xmlParseStartTag: invalid element name\n"); 5860 ctxt->errNo = XML_ERR_NAME_REQUIRED; 5861 ctxt->wellFormed = 0; 5862 return(NULL); 5863 } 5864 5865 /* 5866 * Now parse the attributes, it ends up with the ending 5867 * 5868 * (S Attribute)* S? 5869 */ 5870 SKIP_BLANKS; 5871 GROW; 5872 while ((IS_CHAR(CUR)) && 5873 (CUR != '>') && 5874 ((CUR != '/') || (NXT(1) != '>'))) { 5875 const xmlChar *q = CUR_PTR; 5876 int cons = ctxt->input->consumed; 5877 5878 attname = xmlParseAttribute(ctxt, &attvalue); 5879 if ((attname != NULL) && (attvalue != NULL)) { 5880 /* 5881 * [ WFC: Unique Att Spec ] 5882 * No attribute name may appear more than once in the same 5883 * start-tag or empty-element tag. 5884 */ 5885 for (i = 0; i < nbatts;i += 2) { 5886 if (!xmlStrcmp(atts[i], attname)) { 5887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5888 ctxt->sax->error(ctxt->userData, 5889 "Attribute %s redefined\n", 5890 attname); 5891 ctxt->wellFormed = 0; 5892 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 5893 xmlFree(attname); 5894 xmlFree(attvalue); 5895 goto failed; 5896 } 5897 } 5898 5899 /* 5900 * Add the pair to atts 5901 */ 5902 if (atts == NULL) { 5903 maxatts = 10; 5904 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *)); 5905 if (atts == NULL) { 5906 fprintf(stderr, "malloc of %ld byte failed\n", 5907 maxatts * (long)sizeof(xmlChar *)); 5908 return(NULL); 5909 } 5910 } else if (nbatts + 4 > maxatts) { 5911 maxatts *= 2; 5912 atts = (const xmlChar **) xmlRealloc(atts, 5913 maxatts * sizeof(xmlChar *)); 5914 if (atts == NULL) { 5915 fprintf(stderr, "realloc of %ld byte failed\n", 5916 maxatts * (long)sizeof(xmlChar *)); 5917 return(NULL); 5918 } 5919 } 5920 atts[nbatts++] = attname; 5921 atts[nbatts++] = attvalue; 5922 atts[nbatts] = NULL; 5923 atts[nbatts + 1] = NULL; 5924 } 5925 5926failed: 5927 SKIP_BLANKS; 5928 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) { 5929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5930 ctxt->sax->error(ctxt->userData, 5931 "xmlParseStartTag: problem parsing attributes\n"); 5932 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 5933 ctxt->wellFormed = 0; 5934 break; 5935 } 5936 GROW; 5937 } 5938 5939 /* 5940 * SAX: Start of Element ! 5941 */ 5942 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) 5943 ctxt->sax->startElement(ctxt->userData, name, atts); 5944 5945 if (atts != NULL) { 5946 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]); 5947 xmlFree(atts); 5948 } 5949 return(name); 5950} 5951 5952/** 5953 * xmlParseEndTag: 5954 * @ctxt: an XML parser context 5955 * 5956 * parse an end of tag 5957 * 5958 * [42] ETag ::= '</' Name S? '>' 5959 * 5960 * With namespace 5961 * 5962 * [NS 9] ETag ::= '</' QName S? '>' 5963 */ 5964 5965void 5966xmlParseEndTag(xmlParserCtxtPtr ctxt) { 5967 xmlChar *name; 5968 xmlChar *oldname; 5969 5970 GROW; 5971 if ((CUR != '<') || (NXT(1) != '/')) { 5972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5973 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n"); 5974 ctxt->wellFormed = 0; 5975 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED; 5976 return; 5977 } 5978 SKIP(2); 5979 5980 name = xmlParseName(ctxt); 5981 5982 /* 5983 * We should definitely be at the ending "S? '>'" part 5984 */ 5985 GROW; 5986 SKIP_BLANKS; 5987 if ((!IS_CHAR(CUR)) || (CUR != '>')) { 5988 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 5989 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n"); 5990 ctxt->errNo = XML_ERR_GT_REQUIRED; 5991 ctxt->wellFormed = 0; 5992 } else 5993 NEXT; 5994 5995 /* 5996 * [ WFC: Element Type Match ] 5997 * The Name in an element's end-tag must match the element type in the 5998 * start-tag. 5999 * 6000 */ 6001 if ((name == NULL) || (ctxt->name == NULL) || 6002 (xmlStrcmp(name, ctxt->name))) { 6003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { 6004 if ((name != NULL) && (ctxt->name != NULL)) { 6005 ctxt->sax->error(ctxt->userData, 6006 "Opening and ending tag mismatch: %s and %s\n", 6007 ctxt->name, name); 6008 } else if (ctxt->name != NULL) { 6009 ctxt->sax->error(ctxt->userData, 6010 "Ending tag eror for: %s\n", ctxt->name); 6011 } else { 6012 ctxt->sax->error(ctxt->userData, 6013 "Ending tag error: internal error ???\n"); 6014 } 6015 6016 } 6017 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH; 6018 ctxt->wellFormed = 0; 6019 } 6020 6021 /* 6022 * SAX: End of Tag 6023 */ 6024 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 6025 ctxt->sax->endElement(ctxt->userData, name); 6026 6027 if (name != NULL) 6028 xmlFree(name); 6029 oldname = namePop(ctxt); 6030 if (oldname != NULL) { 6031#ifdef DEBUG_STACK 6032 fprintf(stderr,"Close: popped %s\n", oldname); 6033#endif 6034 xmlFree(oldname); 6035 } 6036 return; 6037} 6038 6039/** 6040 * xmlParseCDSect: 6041 * @ctxt: an XML parser context 6042 * 6043 * Parse escaped pure raw content. 6044 * 6045 * [18] CDSect ::= CDStart CData CDEnd 6046 * 6047 * [19] CDStart ::= '<![CDATA[' 6048 * 6049 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 6050 * 6051 * [21] CDEnd ::= ']]>' 6052 */ 6053void 6054xmlParseCDSect(xmlParserCtxtPtr ctxt) { 6055 xmlChar *buf = NULL; 6056 int len = 0; 6057 int size = XML_PARSER_BUFFER_SIZE; 6058 xmlChar r, s; 6059 xmlChar cur; 6060 6061 if ((NXT(0) == '<') && (NXT(1) == '!') && 6062 (NXT(2) == '[') && (NXT(3) == 'C') && 6063 (NXT(4) == 'D') && (NXT(5) == 'A') && 6064 (NXT(6) == 'T') && (NXT(7) == 'A') && 6065 (NXT(8) == '[')) { 6066 SKIP(9); 6067 } else 6068 return; 6069 6070 ctxt->instate = XML_PARSER_CDATA_SECTION; 6071 if (!IS_CHAR(CUR)) { 6072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6073 ctxt->sax->error(ctxt->userData, 6074 "CData section not finished\n"); 6075 ctxt->wellFormed = 0; 6076 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6077 ctxt->instate = XML_PARSER_CONTENT; 6078 return; 6079 } 6080 r = CUR; 6081 NEXT; 6082 if (!IS_CHAR(CUR)) { 6083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6084 ctxt->sax->error(ctxt->userData, 6085 "CData section not finished\n"); 6086 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6087 ctxt->wellFormed = 0; 6088 ctxt->instate = XML_PARSER_CONTENT; 6089 return; 6090 } 6091 s = CUR; 6092 NEXT; 6093 cur = CUR; 6094 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6095 if (buf == NULL) { 6096 fprintf(stderr, "malloc of %d byte failed\n", size); 6097 return; 6098 } 6099 while (IS_CHAR(cur) && 6100 ((r != ']') || (s != ']') || (cur != '>'))) { 6101 if (len + 1 >= size) { 6102 size *= 2; 6103 buf = xmlRealloc(buf, size * sizeof(xmlChar)); 6104 if (buf == NULL) { 6105 fprintf(stderr, "realloc of %d byte failed\n", size); 6106 return; 6107 } 6108 } 6109 buf[len++] = r; 6110 r = s; 6111 s = cur; 6112 NEXT; 6113 cur = CUR; 6114 } 6115 buf[len] = 0; 6116 ctxt->instate = XML_PARSER_CONTENT; 6117 if (!IS_CHAR(CUR)) { 6118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6119 ctxt->sax->error(ctxt->userData, 6120 "CData section not finished\n%.50s\n", buf); 6121 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED; 6122 ctxt->wellFormed = 0; 6123 xmlFree(buf); 6124 return; 6125 } 6126 NEXT; 6127 6128 /* 6129 * Ok the buffer is to be consumed as cdata. 6130 */ 6131 if (ctxt->sax != NULL) { 6132 if (ctxt->sax->cdataBlock != NULL) 6133 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 6134 } 6135 xmlFree(buf); 6136} 6137 6138/** 6139 * xmlParseContent: 6140 * @ctxt: an XML parser context 6141 * 6142 * Parse a content: 6143 * 6144 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 6145 */ 6146 6147void 6148xmlParseContent(xmlParserCtxtPtr ctxt) { 6149 GROW; 6150 while ((CUR != '<') || (NXT(1) != '/')) { 6151 const xmlChar *test = CUR_PTR; 6152 int cons = ctxt->input->consumed; 6153 xmlChar tok = ctxt->token; 6154 6155 /* 6156 * First case : a Processing Instruction. 6157 */ 6158 if ((CUR == '<') && (NXT(1) == '?')) { 6159 xmlParsePI(ctxt); 6160 } 6161 6162 /* 6163 * Second case : a CDSection 6164 */ 6165 else if ((CUR == '<') && (NXT(1) == '!') && 6166 (NXT(2) == '[') && (NXT(3) == 'C') && 6167 (NXT(4) == 'D') && (NXT(5) == 'A') && 6168 (NXT(6) == 'T') && (NXT(7) == 'A') && 6169 (NXT(8) == '[')) { 6170 xmlParseCDSect(ctxt); 6171 } 6172 6173 /* 6174 * Third case : a comment 6175 */ 6176 else if ((CUR == '<') && (NXT(1) == '!') && 6177 (NXT(2) == '-') && (NXT(3) == '-')) { 6178 xmlParseComment(ctxt); 6179 ctxt->instate = XML_PARSER_CONTENT; 6180 } 6181 6182 /* 6183 * Fourth case : a sub-element. 6184 */ 6185 else if (CUR == '<') { 6186 xmlParseElement(ctxt); 6187 } 6188 6189 /* 6190 * Fifth case : a reference. If if has not been resolved, 6191 * parsing returns it's Name, create the node 6192 */ 6193 6194 else if (CUR == '&') { 6195 xmlParseReference(ctxt); 6196 } 6197 6198 /* 6199 * Last case, text. Note that References are handled directly. 6200 */ 6201 else { 6202 xmlParseCharData(ctxt, 0); 6203 } 6204 6205 GROW; 6206 /* 6207 * Pop-up of finished entities. 6208 */ 6209 while ((CUR == 0) && (ctxt->inputNr > 1)) 6210 xmlPopInput(ctxt); 6211 SHRINK; 6212 6213 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) && 6214 (tok == ctxt->token)) { 6215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6216 ctxt->sax->error(ctxt->userData, 6217 "detected an error in element content\n"); 6218 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 6219 ctxt->wellFormed = 0; 6220 break; 6221 } 6222 } 6223} 6224 6225/** 6226 * xmlParseElement: 6227 * @ctxt: an XML parser context 6228 * 6229 * parse an XML element, this is highly recursive 6230 * 6231 * [39] element ::= EmptyElemTag | STag content ETag 6232 * 6233 * [ WFC: Element Type Match ] 6234 * The Name in an element's end-tag must match the element type in the 6235 * start-tag. 6236 * 6237 * [ VC: Element Valid ] 6238 * An element is valid if there is a declaration matching elementdecl 6239 * where the Name matches the element type and one of the following holds: 6240 * - The declaration matches EMPTY and the element has no content. 6241 * - The declaration matches children and the sequence of child elements 6242 * belongs to the language generated by the regular expression in the 6243 * content model, with optional white space (characters matching the 6244 * nonterminal S) between each pair of child elements. 6245 * - The declaration matches Mixed and the content consists of character 6246 * data and child elements whose types match names in the content model. 6247 * - The declaration matches ANY, and the types of any child elements have 6248 * been declared. 6249 */ 6250 6251void 6252xmlParseElement(xmlParserCtxtPtr ctxt) { 6253 const xmlChar *openTag = CUR_PTR; 6254 xmlChar *name; 6255 xmlChar *oldname; 6256 xmlParserNodeInfo node_info; 6257 xmlNodePtr ret; 6258 6259 /* Capture start position */ 6260 if (ctxt->record_info) { 6261 node_info.begin_pos = ctxt->input->consumed + 6262 (CUR_PTR - ctxt->input->base); 6263 node_info.begin_line = ctxt->input->line; 6264 } 6265 6266 name = xmlParseStartTag(ctxt); 6267 if (name == NULL) { 6268 return; 6269 } 6270 namePush(ctxt, name); 6271 ret = ctxt->node; 6272 6273 /* 6274 * [ VC: Root Element Type ] 6275 * The Name in the document type declaration must match the element 6276 * type of the root element. 6277 */ 6278 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 6279 ctxt->node && (ctxt->node == ctxt->myDoc->root)) 6280 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 6281 6282 /* 6283 * Check for an Empty Element. 6284 */ 6285 if ((CUR == '/') && (NXT(1) == '>')) { 6286 SKIP(2); 6287 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 6288 ctxt->sax->endElement(ctxt->userData, name); 6289 oldname = namePop(ctxt); 6290 if (oldname != NULL) { 6291#ifdef DEBUG_STACK 6292 fprintf(stderr,"Close: popped %s\n", oldname); 6293#endif 6294 xmlFree(oldname); 6295 } 6296 return; 6297 } 6298 if (CUR == '>') { 6299 NEXT; 6300 } else { 6301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6302 ctxt->sax->error(ctxt->userData, 6303 "Couldn't find end of Start Tag\n%.30s\n", 6304 openTag); 6305 ctxt->wellFormed = 0; 6306 ctxt->errNo = XML_ERR_GT_REQUIRED; 6307 6308 /* 6309 * end of parsing of this node. 6310 */ 6311 nodePop(ctxt); 6312 oldname = namePop(ctxt); 6313 if (oldname != NULL) { 6314#ifdef DEBUG_STACK 6315 fprintf(stderr,"Close: popped %s\n", oldname); 6316#endif 6317 xmlFree(oldname); 6318 } 6319 6320 /* 6321 * Capture end position and add node 6322 */ 6323 if ( ret != NULL && ctxt->record_info ) { 6324 node_info.end_pos = ctxt->input->consumed + 6325 (CUR_PTR - ctxt->input->base); 6326 node_info.end_line = ctxt->input->line; 6327 node_info.node = ret; 6328 xmlParserAddNodeInfo(ctxt, &node_info); 6329 } 6330 return; 6331 } 6332 6333 /* 6334 * Parse the content of the element: 6335 */ 6336 xmlParseContent(ctxt); 6337 if (!IS_CHAR(CUR)) { 6338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6339 ctxt->sax->error(ctxt->userData, 6340 "Premature end of data in tag %.30s\n", openTag); 6341 ctxt->wellFormed = 0; 6342 ctxt->errNo = XML_ERR_TAG_NOT_FINISED; 6343 6344 /* 6345 * end of parsing of this node. 6346 */ 6347 nodePop(ctxt); 6348 oldname = namePop(ctxt); 6349 if (oldname != NULL) { 6350#ifdef DEBUG_STACK 6351 fprintf(stderr,"Close: popped %s\n", oldname); 6352#endif 6353 xmlFree(oldname); 6354 } 6355 return; 6356 } 6357 6358 /* 6359 * parse the end of tag: '</' should be here. 6360 */ 6361 xmlParseEndTag(ctxt); 6362 6363 /* 6364 * Capture end position and add node 6365 */ 6366 if ( ret != NULL && ctxt->record_info ) { 6367 node_info.end_pos = ctxt->input->consumed + 6368 (CUR_PTR - ctxt->input->base); 6369 node_info.end_line = ctxt->input->line; 6370 node_info.node = ret; 6371 xmlParserAddNodeInfo(ctxt, &node_info); 6372 } 6373} 6374 6375/** 6376 * xmlParseVersionNum: 6377 * @ctxt: an XML parser context 6378 * 6379 * parse the XML version value. 6380 * 6381 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 6382 * 6383 * Returns the string giving the XML version number, or NULL 6384 */ 6385xmlChar * 6386xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 6387 xmlChar *buf = NULL; 6388 int len = 0; 6389 int size = 10; 6390 xmlChar cur; 6391 6392 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6393 if (buf == NULL) { 6394 fprintf(stderr, "malloc of %d byte failed\n", size); 6395 return(NULL); 6396 } 6397 cur = CUR; 6398 while (IS_CHAR(cur) && 6399 (((cur >= 'a') && (cur <= 'z')) || 6400 ((cur >= 'A') && (cur <= 'Z')) || 6401 ((cur >= '0') && (cur <= '9')) || 6402 (cur == '_') || (cur == '.') || 6403 (cur == ':') || (cur == '-'))) { 6404 if (len + 1 >= size) { 6405 size *= 2; 6406 buf = xmlRealloc(buf, size * sizeof(xmlChar)); 6407 if (buf == NULL) { 6408 fprintf(stderr, "realloc of %d byte failed\n", size); 6409 return(NULL); 6410 } 6411 } 6412 buf[len++] = cur; 6413 NEXT; 6414 cur=CUR; 6415 } 6416 buf[len] = 0; 6417 return(buf); 6418} 6419 6420/** 6421 * xmlParseVersionInfo: 6422 * @ctxt: an XML parser context 6423 * 6424 * parse the XML version. 6425 * 6426 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 6427 * 6428 * [25] Eq ::= S? '=' S? 6429 * 6430 * Returns the version string, e.g. "1.0" 6431 */ 6432 6433xmlChar * 6434xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 6435 xmlChar *version = NULL; 6436 const xmlChar *q; 6437 6438 if ((CUR == 'v') && (NXT(1) == 'e') && 6439 (NXT(2) == 'r') && (NXT(3) == 's') && 6440 (NXT(4) == 'i') && (NXT(5) == 'o') && 6441 (NXT(6) == 'n')) { 6442 SKIP(7); 6443 SKIP_BLANKS; 6444 if (CUR != '=') { 6445 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6446 ctxt->sax->error(ctxt->userData, 6447 "xmlParseVersionInfo : expected '='\n"); 6448 ctxt->wellFormed = 0; 6449 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 6450 return(NULL); 6451 } 6452 NEXT; 6453 SKIP_BLANKS; 6454 if (CUR == '"') { 6455 NEXT; 6456 q = CUR_PTR; 6457 version = xmlParseVersionNum(ctxt); 6458 if (CUR != '"') { 6459 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6460 ctxt->sax->error(ctxt->userData, 6461 "String not closed\n%.50s\n", q); 6462 ctxt->wellFormed = 0; 6463 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6464 } else 6465 NEXT; 6466 } else if (CUR == '\''){ 6467 NEXT; 6468 q = CUR_PTR; 6469 version = xmlParseVersionNum(ctxt); 6470 if (CUR != '\'') { 6471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6472 ctxt->sax->error(ctxt->userData, 6473 "String not closed\n%.50s\n", q); 6474 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6475 ctxt->wellFormed = 0; 6476 } else 6477 NEXT; 6478 } else { 6479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6480 ctxt->sax->error(ctxt->userData, 6481 "xmlParseVersionInfo : expected ' or \"\n"); 6482 ctxt->wellFormed = 0; 6483 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 6484 } 6485 } 6486 return(version); 6487} 6488 6489/** 6490 * xmlParseEncName: 6491 * @ctxt: an XML parser context 6492 * 6493 * parse the XML encoding name 6494 * 6495 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 6496 * 6497 * Returns the encoding name value or NULL 6498 */ 6499xmlChar * 6500xmlParseEncName(xmlParserCtxtPtr ctxt) { 6501 xmlChar *buf = NULL; 6502 int len = 0; 6503 int size = 10; 6504 xmlChar cur; 6505 6506 cur = CUR; 6507 if (((cur >= 'a') && (cur <= 'z')) || 6508 ((cur >= 'A') && (cur <= 'Z'))) { 6509 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); 6510 if (buf == NULL) { 6511 fprintf(stderr, "malloc of %d byte failed\n", size); 6512 return(NULL); 6513 } 6514 6515 buf[len++] = cur; 6516 NEXT; 6517 cur = CUR; 6518 while (IS_CHAR(cur) && 6519 (((cur >= 'a') && (cur <= 'z')) || 6520 ((cur >= 'A') && (cur <= 'Z')) || 6521 ((cur >= '0') && (cur <= '9')) || 6522 (cur == '.') || (cur == '_') || 6523 (cur == '-'))) { 6524 if (len + 1 >= size) { 6525 size *= 2; 6526 buf = xmlRealloc(buf, size * sizeof(xmlChar)); 6527 if (buf == NULL) { 6528 fprintf(stderr, "realloc of %d byte failed\n", size); 6529 return(NULL); 6530 } 6531 } 6532 buf[len++] = cur; 6533 NEXT; 6534 cur = CUR; 6535 if (cur == 0) { 6536 SHRINK; 6537 GROW; 6538 cur = CUR; 6539 } 6540 } 6541 buf[len] = 0; 6542 } else { 6543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6544 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n"); 6545 ctxt->wellFormed = 0; 6546 ctxt->errNo = XML_ERR_ENCODING_NAME; 6547 } 6548 return(buf); 6549} 6550 6551/** 6552 * xmlParseEncodingDecl: 6553 * @ctxt: an XML parser context 6554 * 6555 * parse the XML encoding declaration 6556 * 6557 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 6558 * 6559 * TODO: this should setup the conversion filters. 6560 * 6561 * Returns the encoding value or NULL 6562 */ 6563 6564xmlChar * 6565xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 6566 xmlChar *encoding = NULL; 6567 const xmlChar *q; 6568 6569 SKIP_BLANKS; 6570 if ((CUR == 'e') && (NXT(1) == 'n') && 6571 (NXT(2) == 'c') && (NXT(3) == 'o') && 6572 (NXT(4) == 'd') && (NXT(5) == 'i') && 6573 (NXT(6) == 'n') && (NXT(7) == 'g')) { 6574 SKIP(8); 6575 SKIP_BLANKS; 6576 if (CUR != '=') { 6577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6578 ctxt->sax->error(ctxt->userData, 6579 "xmlParseEncodingDecl : expected '='\n"); 6580 ctxt->wellFormed = 0; 6581 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 6582 return(NULL); 6583 } 6584 NEXT; 6585 SKIP_BLANKS; 6586 if (CUR == '"') { 6587 NEXT; 6588 q = CUR_PTR; 6589 encoding = xmlParseEncName(ctxt); 6590 if (CUR != '"') { 6591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6592 ctxt->sax->error(ctxt->userData, 6593 "String not closed\n%.50s\n", q); 6594 ctxt->wellFormed = 0; 6595 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6596 } else 6597 NEXT; 6598 } else if (CUR == '\''){ 6599 NEXT; 6600 q = CUR_PTR; 6601 encoding = xmlParseEncName(ctxt); 6602 if (CUR != '\'') { 6603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6604 ctxt->sax->error(ctxt->userData, 6605 "String not closed\n%.50s\n", q); 6606 ctxt->wellFormed = 0; 6607 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6608 } else 6609 NEXT; 6610 } else if (CUR == '"'){ 6611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6612 ctxt->sax->error(ctxt->userData, 6613 "xmlParseEncodingDecl : expected ' or \"\n"); 6614 ctxt->wellFormed = 0; 6615 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 6616 } 6617 } 6618 return(encoding); 6619} 6620 6621/** 6622 * xmlParseSDDecl: 6623 * @ctxt: an XML parser context 6624 * 6625 * parse the XML standalone declaration 6626 * 6627 * [32] SDDecl ::= S 'standalone' Eq 6628 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 6629 * 6630 * [ VC: Standalone Document Declaration ] 6631 * TODO The standalone document declaration must have the value "no" 6632 * if any external markup declarations contain declarations of: 6633 * - attributes with default values, if elements to which these 6634 * attributes apply appear in the document without specifications 6635 * of values for these attributes, or 6636 * - entities (other than amp, lt, gt, apos, quot), if references 6637 * to those entities appear in the document, or 6638 * - attributes with values subject to normalization, where the 6639 * attribute appears in the document with a value which will change 6640 * as a result of normalization, or 6641 * - element types with element content, if white space occurs directly 6642 * within any instance of those types. 6643 * 6644 * Returns 1 if standalone, 0 otherwise 6645 */ 6646 6647int 6648xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 6649 int standalone = -1; 6650 6651 SKIP_BLANKS; 6652 if ((CUR == 's') && (NXT(1) == 't') && 6653 (NXT(2) == 'a') && (NXT(3) == 'n') && 6654 (NXT(4) == 'd') && (NXT(5) == 'a') && 6655 (NXT(6) == 'l') && (NXT(7) == 'o') && 6656 (NXT(8) == 'n') && (NXT(9) == 'e')) { 6657 SKIP(10); 6658 SKIP_BLANKS; 6659 if (CUR != '=') { 6660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6661 ctxt->sax->error(ctxt->userData, 6662 "XML standalone declaration : expected '='\n"); 6663 ctxt->errNo = XML_ERR_EQUAL_REQUIRED; 6664 ctxt->wellFormed = 0; 6665 return(standalone); 6666 } 6667 NEXT; 6668 SKIP_BLANKS; 6669 if (CUR == '\''){ 6670 NEXT; 6671 if ((CUR == 'n') && (NXT(1) == 'o')) { 6672 standalone = 0; 6673 SKIP(2); 6674 } else if ((CUR == 'y') && (NXT(1) == 'e') && 6675 (NXT(2) == 's')) { 6676 standalone = 1; 6677 SKIP(3); 6678 } else { 6679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6680 ctxt->sax->error(ctxt->userData, 6681 "standalone accepts only 'yes' or 'no'\n"); 6682 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 6683 ctxt->wellFormed = 0; 6684 } 6685 if (CUR != '\'') { 6686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6687 ctxt->sax->error(ctxt->userData, "String not closed\n"); 6688 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6689 ctxt->wellFormed = 0; 6690 } else 6691 NEXT; 6692 } else if (CUR == '"'){ 6693 NEXT; 6694 if ((CUR == 'n') && (NXT(1) == 'o')) { 6695 standalone = 0; 6696 SKIP(2); 6697 } else if ((CUR == 'y') && (NXT(1) == 'e') && 6698 (NXT(2) == 's')) { 6699 standalone = 1; 6700 SKIP(3); 6701 } else { 6702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6703 ctxt->sax->error(ctxt->userData, 6704 "standalone accepts only 'yes' or 'no'\n"); 6705 ctxt->errNo = XML_ERR_STANDALONE_VALUE; 6706 ctxt->wellFormed = 0; 6707 } 6708 if (CUR != '"') { 6709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6710 ctxt->sax->error(ctxt->userData, "String not closed\n"); 6711 ctxt->wellFormed = 0; 6712 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; 6713 } else 6714 NEXT; 6715 } else { 6716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6717 ctxt->sax->error(ctxt->userData, 6718 "Standalone value not found\n"); 6719 ctxt->wellFormed = 0; 6720 ctxt->errNo = XML_ERR_STRING_NOT_STARTED; 6721 } 6722 } 6723 return(standalone); 6724} 6725 6726/** 6727 * xmlParseXMLDecl: 6728 * @ctxt: an XML parser context 6729 * 6730 * parse an XML declaration header 6731 * 6732 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 6733 */ 6734 6735void 6736xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 6737 xmlChar *version; 6738 6739 /* 6740 * We know that '<?xml' is here. 6741 */ 6742 SKIP(5); 6743 6744 if (!IS_BLANK(CUR)) { 6745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6746 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n"); 6747 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 6748 ctxt->wellFormed = 0; 6749 } 6750 SKIP_BLANKS; 6751 6752 /* 6753 * We should have the VersionInfo here. 6754 */ 6755 version = xmlParseVersionInfo(ctxt); 6756 if (version == NULL) 6757 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6758 ctxt->version = xmlStrdup(version); 6759 xmlFree(version); 6760 6761 /* 6762 * We may have the encoding declaration 6763 */ 6764 if (!IS_BLANK(CUR)) { 6765 if ((CUR == '?') && (NXT(1) == '>')) { 6766 SKIP(2); 6767 return; 6768 } 6769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6770 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 6771 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 6772 ctxt->wellFormed = 0; 6773 } 6774 ctxt->encoding = xmlParseEncodingDecl(ctxt); 6775 6776 /* 6777 * We may have the standalone status. 6778 */ 6779 if ((ctxt->encoding != NULL) && (!IS_BLANK(CUR))) { 6780 if ((CUR == '?') && (NXT(1) == '>')) { 6781 SKIP(2); 6782 return; 6783 } 6784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6785 ctxt->sax->error(ctxt->userData, "Blank needed here\n"); 6786 ctxt->wellFormed = 0; 6787 ctxt->errNo = XML_ERR_SPACE_REQUIRED; 6788 } 6789 SKIP_BLANKS; 6790 ctxt->standalone = xmlParseSDDecl(ctxt); 6791 6792 SKIP_BLANKS; 6793 if ((CUR == '?') && (NXT(1) == '>')) { 6794 SKIP(2); 6795 } else if (CUR == '>') { 6796 /* Deprecated old WD ... */ 6797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6798 ctxt->sax->error(ctxt->userData, 6799 "XML declaration must end-up with '?>'\n"); 6800 ctxt->wellFormed = 0; 6801 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 6802 NEXT; 6803 } else { 6804 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6805 ctxt->sax->error(ctxt->userData, 6806 "parsing XML declaration: '?>' expected\n"); 6807 ctxt->wellFormed = 0; 6808 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED; 6809 MOVETO_ENDTAG(CUR_PTR); 6810 NEXT; 6811 } 6812} 6813 6814/** 6815 * xmlParseMisc: 6816 * @ctxt: an XML parser context 6817 * 6818 * parse an XML Misc* optionnal field. 6819 * 6820 * [27] Misc ::= Comment | PI | S 6821 */ 6822 6823void 6824xmlParseMisc(xmlParserCtxtPtr ctxt) { 6825 while (((CUR == '<') && (NXT(1) == '?')) || 6826 ((CUR == '<') && (NXT(1) == '!') && 6827 (NXT(2) == '-') && (NXT(3) == '-')) || 6828 IS_BLANK(CUR)) { 6829 if ((CUR == '<') && (NXT(1) == '?')) { 6830 xmlParsePI(ctxt); 6831 } else if (IS_BLANK(CUR)) { 6832 NEXT; 6833 } else 6834 xmlParseComment(ctxt); 6835 } 6836} 6837 6838/** 6839 * xmlParseDocument : 6840 * @ctxt: an XML parser context 6841 * 6842 * parse an XML document (and build a tree if using the standard SAX 6843 * interface). 6844 * 6845 * [1] document ::= prolog element Misc* 6846 * 6847 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 6848 * 6849 * Returns 0, -1 in case of error. the parser context is augmented 6850 * as a result of the parsing. 6851 */ 6852 6853int 6854xmlParseDocument(xmlParserCtxtPtr ctxt) { 6855 xmlDefaultSAXHandlerInit(); 6856 6857 GROW; 6858 6859 /* 6860 * SAX: beginning of the document processing. 6861 */ 6862 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 6863 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 6864 6865 /* 6866 * TODO We should check for encoding here and plug-in some 6867 * conversion code !!!! 6868 */ 6869 6870 /* 6871 * Wipe out everything which is before the first '<' 6872 */ 6873 if (IS_BLANK(CUR)) { 6874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6875 ctxt->sax->error(ctxt->userData, 6876 "Extra spaces at the beginning of the document are not allowed\n"); 6877 ctxt->errNo = XML_ERR_DOCUMENT_START; 6878 ctxt->wellFormed = 0; 6879 SKIP_BLANKS; 6880 } 6881 6882 if (CUR == 0) { 6883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6884 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 6885 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 6886 ctxt->wellFormed = 0; 6887 } 6888 6889 /* 6890 * Check for the XMLDecl in the Prolog. 6891 */ 6892 GROW; 6893 if ((CUR == '<') && (NXT(1) == '?') && 6894 (NXT(2) == 'x') && (NXT(3) == 'm') && 6895 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { 6896 xmlParseXMLDecl(ctxt); 6897 SKIP_BLANKS; 6898 } else if ((CUR == '<') && (NXT(1) == '?') && 6899 (NXT(2) == 'X') && (NXT(3) == 'M') && 6900 (NXT(4) == 'L') && (IS_BLANK(NXT(5)))) { 6901 /* 6902 * The first drafts were using <?XML and the final W3C REC 6903 * now use <?xml ... 6904 */ 6905 xmlParseXMLDecl(ctxt); 6906 SKIP_BLANKS; 6907 } else { 6908 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 6909 } 6910 if ((ctxt->sax) && (ctxt->sax->startDocument)) 6911 ctxt->sax->startDocument(ctxt->userData); 6912 6913 /* 6914 * The Misc part of the Prolog 6915 */ 6916 GROW; 6917 xmlParseMisc(ctxt); 6918 6919 /* 6920 * Then possibly doc type declaration(s) and more Misc 6921 * (doctypedecl Misc*)? 6922 */ 6923 GROW; 6924 if ((CUR == '<') && (NXT(1) == '!') && 6925 (NXT(2) == 'D') && (NXT(3) == 'O') && 6926 (NXT(4) == 'C') && (NXT(5) == 'T') && 6927 (NXT(6) == 'Y') && (NXT(7) == 'P') && 6928 (NXT(8) == 'E')) { 6929 xmlParseDocTypeDecl(ctxt); 6930 if (CUR == '[') { 6931 ctxt->instate = XML_PARSER_DTD; 6932 xmlParseInternalSubset(ctxt); 6933 } 6934 ctxt->instate = XML_PARSER_PROLOG; 6935 xmlParseMisc(ctxt); 6936 } 6937 6938 /* 6939 * Time to start parsing the tree itself 6940 */ 6941 GROW; 6942 if (CUR != '<') { 6943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6944 ctxt->sax->error(ctxt->userData, 6945 "Start tag expect, '<' not found\n"); 6946 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 6947 ctxt->wellFormed = 0; 6948 ctxt->instate = XML_PARSER_EOF; 6949 } else { 6950 ctxt->instate = XML_PARSER_CONTENT; 6951 xmlParseElement(ctxt); 6952 ctxt->instate = XML_PARSER_EPILOG; 6953 6954 6955 /* 6956 * The Misc part at the end 6957 */ 6958 xmlParseMisc(ctxt); 6959 6960 if (CUR != 0) { 6961 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 6962 ctxt->sax->error(ctxt->userData, 6963 "Extra content at the end of the document\n"); 6964 ctxt->wellFormed = 0; 6965 ctxt->errNo = XML_ERR_DOCUMENT_END; 6966 } 6967 ctxt->instate = XML_PARSER_EOF; 6968 } 6969 6970 /* 6971 * SAX: end of the document processing. 6972 */ 6973 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 6974 ctxt->sax->endDocument(ctxt->userData); 6975 if (! ctxt->wellFormed) return(-1); 6976 return(0); 6977} 6978 6979/************************************************************************ 6980 * * 6981 * Progressive parsing interfaces * 6982 * * 6983 ************************************************************************/ 6984 6985/** 6986 * xmlParseLookupSequence: 6987 * @ctxt: an XML parser context 6988 * @first: the first char to lookup 6989 * @next: the next char to lookup or zero 6990 * @third: the next char to lookup or zero 6991 * 6992 * Try to find if a sequence (first, next, third) or just (first next) or 6993 * (first) is available in the input stream. 6994 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 6995 * to avoid rescanning sequences of bytes, it DOES change the state of the 6996 * parser, do not use liberally. 6997 * 6998 * Returns the index to the current parsing point if the full sequence 6999 * is available, -1 otherwise. 7000 */ 7001int 7002xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 7003 xmlChar next, xmlChar third) { 7004 int base, len; 7005 xmlParserInputPtr in; 7006 const xmlChar *buf; 7007 7008 in = ctxt->input; 7009 if (in == NULL) return(-1); 7010 base = in->cur - in->base; 7011 if (base < 0) return(-1); 7012 if (ctxt->checkIndex > base) 7013 base = ctxt->checkIndex; 7014 if (in->buf == NULL) { 7015 buf = in->base; 7016 len = in->length; 7017 } else { 7018 buf = in->buf->buffer->content; 7019 len = in->buf->buffer->use; 7020 } 7021 /* take into account the sequence length */ 7022 if (third) len -= 2; 7023 else if (next) len --; 7024 for (;base < len;base++) { 7025 if (buf[base] == first) { 7026 if (third != 0) { 7027 if ((buf[base + 1] != next) || 7028 (buf[base + 2] != third)) continue; 7029 } else if (next != 0) { 7030 if (buf[base + 1] != next) continue; 7031 } 7032 ctxt->checkIndex = 0; 7033#ifdef DEBUG_PUSH 7034 if (next == 0) 7035 fprintf(stderr, "PP: lookup '%c' found at %d\n", 7036 first, base); 7037 else if (third == 0) 7038 fprintf(stderr, "PP: lookup '%c%c' found at %d\n", 7039 first, next, base); 7040 else 7041 fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n", 7042 first, next, third, base); 7043#endif 7044 return(base - (in->cur - in->base)); 7045 } 7046 } 7047 ctxt->checkIndex = base; 7048#ifdef DEBUG_PUSH 7049 if (next == 0) 7050 fprintf(stderr, "PP: lookup '%c' failed\n", first); 7051 else if (third == 0) 7052 fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next); 7053 else 7054 fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third); 7055#endif 7056 return(-1); 7057} 7058 7059/** 7060 * xmlParseTryOrFinish: 7061 * @ctxt: an XML parser context 7062 * @terminate: last chunk indicator 7063 * 7064 * Try to progress on parsing 7065 * 7066 * Returns zero if no parsing was possible 7067 */ 7068int 7069xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 7070 int ret = 0; 7071 xmlParserInputPtr in; 7072 int avail; 7073 xmlChar cur, next; 7074 7075#ifdef DEBUG_PUSH 7076 switch (ctxt->instate) { 7077 case XML_PARSER_EOF: 7078 fprintf(stderr, "PP: try EOF\n"); break; 7079 case XML_PARSER_START: 7080 fprintf(stderr, "PP: try START\n"); break; 7081 case XML_PARSER_MISC: 7082 fprintf(stderr, "PP: try MISC\n");break; 7083 case XML_PARSER_COMMENT: 7084 fprintf(stderr, "PP: try COMMENT\n");break; 7085 case XML_PARSER_PROLOG: 7086 fprintf(stderr, "PP: try PROLOG\n");break; 7087 case XML_PARSER_START_TAG: 7088 fprintf(stderr, "PP: try START_TAG\n");break; 7089 case XML_PARSER_CONTENT: 7090 fprintf(stderr, "PP: try CONTENT\n");break; 7091 case XML_PARSER_CDATA_SECTION: 7092 fprintf(stderr, "PP: try CDATA_SECTION\n");break; 7093 case XML_PARSER_END_TAG: 7094 fprintf(stderr, "PP: try END_TAG\n");break; 7095 case XML_PARSER_ENTITY_DECL: 7096 fprintf(stderr, "PP: try ENTITY_DECL\n");break; 7097 case XML_PARSER_ENTITY_VALUE: 7098 fprintf(stderr, "PP: try ENTITY_VALUE\n");break; 7099 case XML_PARSER_ATTRIBUTE_VALUE: 7100 fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break; 7101 case XML_PARSER_DTD: 7102 fprintf(stderr, "PP: try DTD\n");break; 7103 case XML_PARSER_EPILOG: 7104 fprintf(stderr, "PP: try EPILOG\n");break; 7105 case XML_PARSER_PI: 7106 fprintf(stderr, "PP: try PI\n");break; 7107 } 7108#endif 7109 7110 while (1) { 7111 /* 7112 * Pop-up of finished entities. 7113 */ 7114 while ((CUR == 0) && (ctxt->inputNr > 1)) 7115 xmlPopInput(ctxt); 7116 7117 in = ctxt->input; 7118 if (in == NULL) break; 7119 if (in->buf == NULL) 7120 avail = in->length - (in->cur - in->base); 7121 else 7122 avail = in->buf->buffer->use - (in->cur - in->base); 7123 if (avail < 1) 7124 goto done; 7125 switch (ctxt->instate) { 7126 case XML_PARSER_EOF: 7127 /* 7128 * Document parsing is done ! 7129 */ 7130 goto done; 7131 case XML_PARSER_START: 7132 /* 7133 * Very first chars read from the document flow. 7134 */ 7135 cur = in->cur[0]; 7136 if (IS_BLANK(cur)) { 7137 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7138 ctxt->sax->setDocumentLocator(ctxt->userData, 7139 &xmlDefaultSAXLocator); 7140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7141 ctxt->sax->error(ctxt->userData, 7142 "Extra spaces at the beginning of the document are not allowed\n"); 7143 ctxt->errNo = XML_ERR_DOCUMENT_START; 7144 ctxt->wellFormed = 0; 7145 SKIP_BLANKS; 7146 ret++; 7147 if (in->buf == NULL) 7148 avail = in->length - (in->cur - in->base); 7149 else 7150 avail = in->buf->buffer->use - (in->cur - in->base); 7151 } 7152 if (avail < 2) 7153 goto done; 7154 7155 cur = in->cur[0]; 7156 next = in->cur[1]; 7157 if (cur == 0) { 7158 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7159 ctxt->sax->setDocumentLocator(ctxt->userData, 7160 &xmlDefaultSAXLocator); 7161 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7162 ctxt->sax->error(ctxt->userData, "Document is empty\n"); 7163 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7164 ctxt->wellFormed = 0; 7165 ctxt->instate = XML_PARSER_EOF; 7166#ifdef DEBUG_PUSH 7167 fprintf(stderr, "PP: entering EOF\n"); 7168#endif 7169 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 7170 ctxt->sax->endDocument(ctxt->userData); 7171 goto done; 7172 } 7173 if ((cur == '<') && (next == '?')) { 7174 /* PI or XML decl */ 7175 if (avail < 5) return(ret); 7176 if ((!terminate) && 7177 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7178 return(ret); 7179 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7180 ctxt->sax->setDocumentLocator(ctxt->userData, 7181 &xmlDefaultSAXLocator); 7182 if ((in->cur[2] == 'x') && 7183 (in->cur[3] == 'm') && 7184 (in->cur[4] == 'l') && 7185 (IS_BLANK(in->cur[5]))) { 7186 ret += 5; 7187#ifdef DEBUG_PUSH 7188 fprintf(stderr, "PP: Parsing XML Decl\n"); 7189#endif 7190 xmlParseXMLDecl(ctxt); 7191 if ((ctxt->sax) && (ctxt->sax->startDocument)) 7192 ctxt->sax->startDocument(ctxt->userData); 7193 ctxt->instate = XML_PARSER_MISC; 7194#ifdef DEBUG_PUSH 7195 fprintf(stderr, "PP: entering MISC\n"); 7196#endif 7197 } else { 7198 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7199 if ((ctxt->sax) && (ctxt->sax->startDocument)) 7200 ctxt->sax->startDocument(ctxt->userData); 7201 ctxt->instate = XML_PARSER_MISC; 7202#ifdef DEBUG_PUSH 7203 fprintf(stderr, "PP: entering MISC\n"); 7204#endif 7205 } 7206 } else { 7207 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 7208 ctxt->sax->setDocumentLocator(ctxt->userData, 7209 &xmlDefaultSAXLocator); 7210 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 7211 if ((ctxt->sax) && (ctxt->sax->startDocument)) 7212 ctxt->sax->startDocument(ctxt->userData); 7213 ctxt->instate = XML_PARSER_MISC; 7214#ifdef DEBUG_PUSH 7215 fprintf(stderr, "PP: entering MISC\n"); 7216#endif 7217 } 7218 break; 7219 case XML_PARSER_MISC: 7220 SKIP_BLANKS; 7221 if (in->buf == NULL) 7222 avail = in->length - (in->cur - in->base); 7223 else 7224 avail = in->buf->buffer->use - (in->cur - in->base); 7225 if (avail < 2) 7226 goto done; 7227 cur = in->cur[0]; 7228 next = in->cur[1]; 7229 if ((cur == '<') && (next == '?')) { 7230 if ((!terminate) && 7231 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7232 goto done; 7233#ifdef DEBUG_PUSH 7234 fprintf(stderr, "PP: Parsing PI\n"); 7235#endif 7236 xmlParsePI(ctxt); 7237 } else if ((cur == '<') && (next == '!') && 7238 (in->cur[2] == '-') && (in->cur[3] == '-')) { 7239 if ((!terminate) && 7240 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7241 goto done; 7242#ifdef DEBUG_PUSH 7243 fprintf(stderr, "PP: Parsing Comment\n"); 7244#endif 7245 xmlParseComment(ctxt); 7246 ctxt->instate = XML_PARSER_MISC; 7247 } else if ((cur == '<') && (next == '!') && 7248 (in->cur[2] == 'D') && (in->cur[3] == 'O') && 7249 (in->cur[4] == 'C') && (in->cur[5] == 'T') && 7250 (in->cur[6] == 'Y') && (in->cur[7] == 'P') && 7251 (in->cur[8] == 'E')) { 7252 if ((!terminate) && 7253 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 7254 goto done; 7255#ifdef DEBUG_PUSH 7256 fprintf(stderr, "PP: Parsing internal subset\n"); 7257#endif 7258 xmlParseDocTypeDecl(ctxt); 7259 if (CUR == '[') { 7260 ctxt->instate = XML_PARSER_DTD; 7261#ifdef DEBUG_PUSH 7262 fprintf(stderr, "PP: entering DTD\n"); 7263#endif 7264 } else { 7265 ctxt->instate = XML_PARSER_PROLOG; 7266#ifdef DEBUG_PUSH 7267 fprintf(stderr, "PP: entering PROLOG\n"); 7268#endif 7269 } 7270 } else if ((cur == '<') && (next == '!') && 7271 (avail < 9)) { 7272 goto done; 7273 } else { 7274 ctxt->instate = XML_PARSER_START_TAG; 7275#ifdef DEBUG_PUSH 7276 fprintf(stderr, "PP: entering START_TAG\n"); 7277#endif 7278 } 7279 break; 7280 case XML_PARSER_PROLOG: 7281 SKIP_BLANKS; 7282 if (in->buf == NULL) 7283 avail = in->length - (in->cur - in->base); 7284 else 7285 avail = in->buf->buffer->use - (in->cur - in->base); 7286 if (avail < 2) 7287 goto done; 7288 cur = in->cur[0]; 7289 next = in->cur[1]; 7290 if ((cur == '<') && (next == '?')) { 7291 if ((!terminate) && 7292 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7293 goto done; 7294#ifdef DEBUG_PUSH 7295 fprintf(stderr, "PP: Parsing PI\n"); 7296#endif 7297 xmlParsePI(ctxt); 7298 } else if ((cur == '<') && (next == '!') && 7299 (in->cur[2] == '-') && (in->cur[3] == '-')) { 7300 if ((!terminate) && 7301 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7302 goto done; 7303#ifdef DEBUG_PUSH 7304 fprintf(stderr, "PP: Parsing Comment\n"); 7305#endif 7306 xmlParseComment(ctxt); 7307 ctxt->instate = XML_PARSER_PROLOG; 7308 } else if ((cur == '<') && (next == '!') && 7309 (avail < 4)) { 7310 goto done; 7311 } else { 7312 ctxt->instate = XML_PARSER_START_TAG; 7313#ifdef DEBUG_PUSH 7314 fprintf(stderr, "PP: entering START_TAG\n"); 7315#endif 7316 } 7317 break; 7318 case XML_PARSER_EPILOG: 7319 SKIP_BLANKS; 7320 if (in->buf == NULL) 7321 avail = in->length - (in->cur - in->base); 7322 else 7323 avail = in->buf->buffer->use - (in->cur - in->base); 7324 if (avail < 2) 7325 goto done; 7326 cur = in->cur[0]; 7327 next = in->cur[1]; 7328 if ((cur == '<') && (next == '?')) { 7329 if ((!terminate) && 7330 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7331 goto done; 7332#ifdef DEBUG_PUSH 7333 fprintf(stderr, "PP: Parsing PI\n"); 7334#endif 7335 xmlParsePI(ctxt); 7336 ctxt->instate = XML_PARSER_EPILOG; 7337 } else if ((cur == '<') && (next == '!') && 7338 (in->cur[2] == '-') && (in->cur[3] == '-')) { 7339 if ((!terminate) && 7340 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7341 goto done; 7342#ifdef DEBUG_PUSH 7343 fprintf(stderr, "PP: Parsing Comment\n"); 7344#endif 7345 xmlParseComment(ctxt); 7346 ctxt->instate = XML_PARSER_EPILOG; 7347 } else if ((cur == '<') && (next == '!') && 7348 (avail < 4)) { 7349 goto done; 7350 } else { 7351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7352 ctxt->sax->error(ctxt->userData, 7353 "Extra content at the end of the document\n"); 7354 ctxt->wellFormed = 0; 7355 ctxt->errNo = XML_ERR_DOCUMENT_END; 7356 ctxt->instate = XML_PARSER_EOF; 7357#ifdef DEBUG_PUSH 7358 fprintf(stderr, "PP: entering EOF\n"); 7359#endif 7360 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 7361 ctxt->sax->endDocument(ctxt->userData); 7362 goto done; 7363 } 7364 break; 7365 case XML_PARSER_START_TAG: { 7366 xmlChar *name, *oldname; 7367 7368 if (avail < 2) 7369 goto done; 7370 cur = in->cur[0]; 7371 if (cur != '<') { 7372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7373 ctxt->sax->error(ctxt->userData, 7374 "Start tag expect, '<' not found\n"); 7375 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; 7376 ctxt->wellFormed = 0; 7377 ctxt->instate = XML_PARSER_EOF; 7378#ifdef DEBUG_PUSH 7379 fprintf(stderr, "PP: entering EOF\n"); 7380#endif 7381 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 7382 ctxt->sax->endDocument(ctxt->userData); 7383 goto done; 7384 } 7385 if ((!terminate) && 7386 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 7387 goto done; 7388 name = xmlParseStartTag(ctxt); 7389 if (name == NULL) { 7390 ctxt->instate = XML_PARSER_EOF; 7391#ifdef DEBUG_PUSH 7392 fprintf(stderr, "PP: entering EOF\n"); 7393#endif 7394 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 7395 ctxt->sax->endDocument(ctxt->userData); 7396 goto done; 7397 } 7398 namePush(ctxt, xmlStrdup(name)); 7399 7400 /* 7401 * [ VC: Root Element Type ] 7402 * The Name in the document type declaration must match 7403 * the element type of the root element. 7404 */ 7405 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 7406 ctxt->node && (ctxt->node == ctxt->myDoc->root)) 7407 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 7408 7409 /* 7410 * Check for an Empty Element. 7411 */ 7412 if ((CUR == '/') && (NXT(1) == '>')) { 7413 SKIP(2); 7414 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 7415 ctxt->sax->endElement(ctxt->userData, name); 7416 xmlFree(name); 7417 oldname = namePop(ctxt); 7418 if (oldname != NULL) { 7419#ifdef DEBUG_STACK 7420 fprintf(stderr,"Close: popped %s\n", oldname); 7421#endif 7422 xmlFree(oldname); 7423 } 7424 if (ctxt->name == NULL) { 7425 ctxt->instate = XML_PARSER_EPILOG; 7426#ifdef DEBUG_PUSH 7427 fprintf(stderr, "PP: entering EPILOG\n"); 7428#endif 7429 } else { 7430 ctxt->instate = XML_PARSER_CONTENT; 7431#ifdef DEBUG_PUSH 7432 fprintf(stderr, "PP: entering CONTENT\n"); 7433#endif 7434 } 7435 break; 7436 } 7437 if (CUR == '>') { 7438 NEXT; 7439 } else { 7440 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7441 ctxt->sax->error(ctxt->userData, 7442 "Couldn't find end of Start Tag %s\n", 7443 name); 7444 ctxt->wellFormed = 0; 7445 ctxt->errNo = XML_ERR_GT_REQUIRED; 7446 7447 /* 7448 * end of parsing of this node. 7449 */ 7450 nodePop(ctxt); 7451 oldname = namePop(ctxt); 7452 if (oldname != NULL) { 7453#ifdef DEBUG_STACK 7454 fprintf(stderr,"Close: popped %s\n", oldname); 7455#endif 7456 xmlFree(oldname); 7457 } 7458 } 7459 xmlFree(name); 7460 ctxt->instate = XML_PARSER_CONTENT; 7461#ifdef DEBUG_PUSH 7462 fprintf(stderr, "PP: entering CONTENT\n"); 7463#endif 7464 break; 7465 } 7466 case XML_PARSER_CONTENT: 7467 /* 7468 * Handle preparsed entities and charRef 7469 */ 7470 if (ctxt->token != 0) { 7471 xmlChar cur[2] = { 0 , 0 } ; 7472 7473 cur[0] = (xmlChar) ctxt->token; 7474 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) 7475 ctxt->sax->characters(ctxt->userData, cur, 1); 7476 ctxt->token = 0; 7477 } 7478 if (avail < 2) 7479 goto done; 7480 cur = in->cur[0]; 7481 next = in->cur[1]; 7482 if ((cur == '<') && (next == '?')) { 7483 if ((!terminate) && 7484 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 7485 goto done; 7486#ifdef DEBUG_PUSH 7487 fprintf(stderr, "PP: Parsing PI\n"); 7488#endif 7489 xmlParsePI(ctxt); 7490 } else if ((cur == '<') && (next == '!') && 7491 (in->cur[2] == '-') && (in->cur[3] == '-')) { 7492 if ((!terminate) && 7493 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 7494 goto done; 7495#ifdef DEBUG_PUSH 7496 fprintf(stderr, "PP: Parsing Comment\n"); 7497#endif 7498 xmlParseComment(ctxt); 7499 ctxt->instate = XML_PARSER_CONTENT; 7500 } else if ((cur == '<') && (in->cur[1] == '!') && 7501 (in->cur[2] == '[') && (NXT(3) == 'C') && 7502 (in->cur[4] == 'D') && (NXT(5) == 'A') && 7503 (in->cur[6] == 'T') && (NXT(7) == 'A') && 7504 (in->cur[8] == '[')) { 7505 SKIP(9); 7506 ctxt->instate = XML_PARSER_CDATA_SECTION; 7507#ifdef DEBUG_PUSH 7508 fprintf(stderr, "PP: entering CDATA_SECTION\n"); 7509#endif 7510 break; 7511 } else if ((cur == '<') && (next == '!') && 7512 (avail < 9)) { 7513 goto done; 7514 } else if ((cur == '<') && (next == '/')) { 7515 ctxt->instate = XML_PARSER_END_TAG; 7516#ifdef DEBUG_PUSH 7517 fprintf(stderr, "PP: entering END_TAG\n"); 7518#endif 7519 break; 7520 } else if (cur == '<') { 7521 ctxt->instate = XML_PARSER_START_TAG; 7522#ifdef DEBUG_PUSH 7523 fprintf(stderr, "PP: entering START_TAG\n"); 7524#endif 7525 break; 7526 } else if (cur == '&') { 7527 if ((!terminate) && 7528 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 7529 goto done; 7530#ifdef DEBUG_PUSH 7531 fprintf(stderr, "PP: Parsing Reference\n"); 7532#endif 7533 /* TODO: check generation of subtrees if noent !!! */ 7534 xmlParseReference(ctxt); 7535 } else { 7536 /* TODO Avoid the extra copy, handle directly !!!!!! */ 7537 /* 7538 * Goal of the following test is : 7539 * - minimize calls to the SAX 'character' callback 7540 * when they are mergeable 7541 * - handle an problem for isBlank when we only parse 7542 * a sequence of blank chars and the next one is 7543 * not available to check against '<' presence. 7544 * - tries to homogenize the differences in SAX 7545 * callbacks beween the push and pull versions 7546 * of the parser. 7547 */ 7548 if ((ctxt->inputNr == 1) && 7549 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 7550 if ((!terminate) && 7551 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0)) 7552 goto done; 7553 } 7554 ctxt->checkIndex = 0; 7555#ifdef DEBUG_PUSH 7556 fprintf(stderr, "PP: Parsing char data\n"); 7557#endif 7558 xmlParseCharData(ctxt, 0); 7559 } 7560 /* 7561 * Pop-up of finished entities. 7562 */ 7563 while ((CUR == 0) && (ctxt->inputNr > 1)) 7564 xmlPopInput(ctxt); 7565 break; 7566 case XML_PARSER_CDATA_SECTION: { 7567 /* 7568 * The Push mode need to have the SAX callback for 7569 * cdataBlock merge back contiguous callbacks. 7570 */ 7571 int base; 7572 7573 in = ctxt->input; 7574 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 7575 if (base < 0) { 7576 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 7577 if (ctxt->sax != NULL) { 7578 if (ctxt->sax->cdataBlock != NULL) 7579 ctxt->sax->cdataBlock(ctxt->userData, in->cur, 7580 XML_PARSER_BIG_BUFFER_SIZE); 7581 } 7582 SKIP(XML_PARSER_BIG_BUFFER_SIZE); 7583 ctxt->checkIndex = 0; 7584 } 7585 goto done; 7586 } else { 7587 if ((ctxt->sax != NULL) && (base > 0)) { 7588 if (ctxt->sax->cdataBlock != NULL) 7589 ctxt->sax->cdataBlock(ctxt->userData, 7590 in->cur, base); 7591 } 7592 SKIP(base + 3); 7593 ctxt->checkIndex = 0; 7594 ctxt->instate = XML_PARSER_CONTENT; 7595#ifdef DEBUG_PUSH 7596 fprintf(stderr, "PP: entering CONTENT\n"); 7597#endif 7598 } 7599 break; 7600 } 7601 case XML_PARSER_END_TAG: 7602 if (avail < 2) 7603 goto done; 7604 if ((!terminate) && 7605 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 7606 goto done; 7607 xmlParseEndTag(ctxt); 7608 if (ctxt->name == NULL) { 7609 ctxt->instate = XML_PARSER_EPILOG; 7610#ifdef DEBUG_PUSH 7611 fprintf(stderr, "PP: entering EPILOG\n"); 7612#endif 7613 } else { 7614 ctxt->instate = XML_PARSER_CONTENT; 7615#ifdef DEBUG_PUSH 7616 fprintf(stderr, "PP: entering CONTENT\n"); 7617#endif 7618 } 7619 break; 7620 case XML_PARSER_DTD: { 7621 /* 7622 * Sorry but progressive parsing of the internal subset 7623 * is not expected to be supported. We first check that 7624 * the full content of the internal subset is available and 7625 * the parsing is launched only at that point. 7626 * Internal subset ends up with "']' S? '>'" in an unescaped 7627 * section and not in a ']]>' sequence which are conditional 7628 * sections (whoever argued to keep that crap in XML deserve 7629 * a place in hell !). 7630 */ 7631 int base, i; 7632 xmlChar *buf; 7633 xmlChar quote = 0; 7634 7635 base = in->cur - in->base; 7636 if (base < 0) return(0); 7637 if (ctxt->checkIndex > base) 7638 base = ctxt->checkIndex; 7639 buf = in->buf->buffer->content; 7640 for (;base < in->buf->buffer->use;base++) { 7641 if (quote != 0) { 7642 if (buf[base] == quote) 7643 quote = 0; 7644 continue; 7645 } 7646 if (buf[base] == '"') { 7647 quote = '"'; 7648 continue; 7649 } 7650 if (buf[base] == '\'') { 7651 quote = '\''; 7652 continue; 7653 } 7654 if (buf[base] == ']') { 7655 if (base +1 >= in->buf->buffer->use) 7656 break; 7657 if (buf[base + 1] == ']') { 7658 /* conditional crap, skip both ']' ! */ 7659 base++; 7660 continue; 7661 } 7662 for (i = 0;base + i < in->buf->buffer->use;i++) { 7663 if (buf[base + i] == '>') 7664 goto found_end_int_subset; 7665 } 7666 break; 7667 } 7668 } 7669 /* 7670 * We didn't found the end of the Internal subset 7671 */ 7672 if (quote == 0) 7673 ctxt->checkIndex = base; 7674#ifdef DEBUG_PUSH 7675 if (next == 0) 7676 fprintf(stderr, "PP: lookup of int subset end filed\n"); 7677#endif 7678 goto done; 7679 7680found_end_int_subset: 7681 xmlParseInternalSubset(ctxt); 7682 ctxt->instate = XML_PARSER_PROLOG; 7683 ctxt->checkIndex = 0; 7684#ifdef DEBUG_PUSH 7685 fprintf(stderr, "PP: entering PROLOG\n"); 7686#endif 7687 break; 7688 } 7689 case XML_PARSER_COMMENT: 7690 fprintf(stderr, "PP: internal error, state == COMMENT\n"); 7691 ctxt->instate = XML_PARSER_CONTENT; 7692#ifdef DEBUG_PUSH 7693 fprintf(stderr, "PP: entering CONTENT\n"); 7694#endif 7695 break; 7696 case XML_PARSER_PI: 7697 fprintf(stderr, "PP: internal error, state == PI\n"); 7698 ctxt->instate = XML_PARSER_CONTENT; 7699#ifdef DEBUG_PUSH 7700 fprintf(stderr, "PP: entering CONTENT\n"); 7701#endif 7702 break; 7703 case XML_PARSER_ENTITY_DECL: 7704 fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n"); 7705 ctxt->instate = XML_PARSER_DTD; 7706#ifdef DEBUG_PUSH 7707 fprintf(stderr, "PP: entering DTD\n"); 7708#endif 7709 break; 7710 case XML_PARSER_ENTITY_VALUE: 7711 fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n"); 7712 ctxt->instate = XML_PARSER_CONTENT; 7713#ifdef DEBUG_PUSH 7714 fprintf(stderr, "PP: entering DTD\n"); 7715#endif 7716 break; 7717 case XML_PARSER_ATTRIBUTE_VALUE: 7718 fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n"); 7719 ctxt->instate = XML_PARSER_START_TAG; 7720#ifdef DEBUG_PUSH 7721 fprintf(stderr, "PP: entering START_TAG\n"); 7722#endif 7723 break; 7724 } 7725 } 7726done: 7727#ifdef DEBUG_PUSH 7728 fprintf(stderr, "PP: done %d\n", ret); 7729#endif 7730 return(ret); 7731} 7732 7733/** 7734 * xmlParseTry: 7735 * @ctxt: an XML parser context 7736 * 7737 * Try to progress on parsing 7738 * 7739 * Returns zero if no parsing was possible 7740 */ 7741int 7742xmlParseTry(xmlParserCtxtPtr ctxt) { 7743 return(xmlParseTryOrFinish(ctxt, 0)); 7744} 7745 7746/** 7747 * xmlParseChunk: 7748 * @ctxt: an XML parser context 7749 * @chunk: an char array 7750 * @size: the size in byte of the chunk 7751 * @terminate: last chunk indicator 7752 * 7753 * Parse a Chunk of memory 7754 * 7755 * Returns zero if no error, the xmlParserErrors otherwise. 7756 */ 7757int 7758xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 7759 int terminate) { 7760 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 7761 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 7762 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 7763 int cur = ctxt->input->cur - ctxt->input->base; 7764 7765 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 7766 ctxt->input->base = ctxt->input->buf->buffer->content + base; 7767 ctxt->input->cur = ctxt->input->base + cur; 7768#ifdef DEBUG_PUSH 7769 fprintf(stderr, "PP: pushed %d\n", size); 7770#endif 7771 7772 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) 7773 xmlParseTryOrFinish(ctxt, terminate); 7774 } else if (ctxt->instate != XML_PARSER_EOF) 7775 xmlParseTryOrFinish(ctxt, terminate); 7776 if (terminate) { 7777 if ((ctxt->instate != XML_PARSER_EOF) && 7778 (ctxt->instate != XML_PARSER_EPILOG)) { 7779 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 7780 ctxt->sax->error(ctxt->userData, 7781 "Extra content at the end of the document\n"); 7782 ctxt->wellFormed = 0; 7783 ctxt->errNo = XML_ERR_DOCUMENT_END; 7784 } 7785 if (ctxt->instate != XML_PARSER_EOF) { 7786 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 7787 ctxt->sax->endDocument(ctxt->userData); 7788 } 7789 ctxt->instate = XML_PARSER_EOF; 7790 } 7791 return((xmlParserErrors) ctxt->errNo); 7792} 7793 7794/************************************************************************ 7795 * * 7796 * I/O front end functions to the parser * 7797 * * 7798 ************************************************************************/ 7799 7800/** 7801 * xmlCreatePushParserCtxt : 7802 * @sax: a SAX handler 7803 * @user_data: The user data returned on SAX callbacks 7804 * @chunk: a pointer to an array of chars 7805 * @size: number of chars in the array 7806 * @filename: an optional file name or URI 7807 * 7808 * Create a parser context for using the XML parser in push mode 7809 * To allow content encoding detection, @size should be >= 4 7810 * The value of @filename is used for fetching external entities 7811 * and error/warning reports. 7812 * 7813 * Returns the new parser context or NULL 7814 */ 7815xmlParserCtxtPtr 7816xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 7817 const char *chunk, int size, const char *filename) { 7818 xmlParserCtxtPtr ctxt; 7819 xmlParserInputPtr inputStream; 7820 xmlParserInputBufferPtr buf; 7821 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 7822 7823 /* 7824 * plug some encoding conversion routines here. !!! 7825 */ 7826 if ((chunk != NULL) && (size >= 4)) 7827 enc = xmlDetectCharEncoding((const xmlChar *) chunk); 7828 7829 buf = xmlAllocParserInputBuffer(enc); 7830 if (buf == NULL) return(NULL); 7831 7832 ctxt = xmlNewParserCtxt(); 7833 if (ctxt == NULL) { 7834 xmlFree(buf); 7835 return(NULL); 7836 } 7837 if (sax != NULL) { 7838 if (ctxt->sax != &xmlDefaultSAXHandler) 7839 xmlFree(ctxt->sax); 7840 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 7841 if (ctxt->sax == NULL) { 7842 xmlFree(buf); 7843 xmlFree(ctxt); 7844 return(NULL); 7845 } 7846 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 7847 if (user_data != NULL) 7848 ctxt->userData = user_data; 7849 } 7850 if (filename == NULL) { 7851 ctxt->directory = NULL; 7852 } else { 7853 ctxt->directory = xmlParserGetDirectory(filename); 7854 } 7855 7856 inputStream = xmlNewInputStream(ctxt); 7857 if (inputStream == NULL) { 7858 xmlFreeParserCtxt(ctxt); 7859 return(NULL); 7860 } 7861 7862 if (filename == NULL) 7863 inputStream->filename = NULL; 7864 else 7865 inputStream->filename = xmlMemStrdup(filename); 7866 inputStream->buf = buf; 7867 inputStream->base = inputStream->buf->buffer->content; 7868 inputStream->cur = inputStream->buf->buffer->content; 7869 7870 inputPush(ctxt, inputStream); 7871 7872 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 7873 (ctxt->input->buf != NULL)) { 7874 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 7875#ifdef DEBUG_PUSH 7876 fprintf(stderr, "PP: pushed %d\n", size); 7877#endif 7878 } 7879 7880 return(ctxt); 7881} 7882 7883/** 7884 * xmlCreateDocParserCtxt : 7885 * @cur: a pointer to an array of xmlChar 7886 * 7887 * Create a parser context for an XML in-memory document. 7888 * 7889 * Returns the new parser context or NULL 7890 */ 7891xmlParserCtxtPtr 7892xmlCreateDocParserCtxt(xmlChar *cur) { 7893 xmlParserCtxtPtr ctxt; 7894 xmlParserInputPtr input; 7895 xmlCharEncoding enc; 7896 7897 ctxt = xmlNewParserCtxt(); 7898 if (ctxt == NULL) { 7899 return(NULL); 7900 } 7901 input = xmlNewInputStream(ctxt); 7902 if (input == NULL) { 7903 xmlFreeParserCtxt(ctxt); 7904 return(NULL); 7905 } 7906 7907 /* 7908 * plug some encoding conversion routines here. !!! 7909 */ 7910 enc = xmlDetectCharEncoding(cur); 7911 xmlSwitchEncoding(ctxt, enc); 7912 7913 input->base = cur; 7914 input->cur = cur; 7915 7916 inputPush(ctxt, input); 7917 return(ctxt); 7918} 7919 7920/** 7921 * xmlSAXParseDoc : 7922 * @sax: the SAX handler block 7923 * @cur: a pointer to an array of xmlChar 7924 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 7925 * documents 7926 * 7927 * parse an XML in-memory document and build a tree. 7928 * It use the given SAX function block to handle the parsing callback. 7929 * If sax is NULL, fallback to the default DOM tree building routines. 7930 * 7931 * Returns the resulting document tree 7932 */ 7933 7934xmlDocPtr 7935xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) { 7936 xmlDocPtr ret; 7937 xmlParserCtxtPtr ctxt; 7938 7939 if (cur == NULL) return(NULL); 7940 7941 7942 ctxt = xmlCreateDocParserCtxt(cur); 7943 if (ctxt == NULL) return(NULL); 7944 if (sax != NULL) { 7945 ctxt->sax = sax; 7946 ctxt->userData = NULL; 7947 } 7948 7949 xmlParseDocument(ctxt); 7950 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 7951 else { 7952 ret = NULL; 7953 xmlFreeDoc(ctxt->myDoc); 7954 ctxt->myDoc = NULL; 7955 } 7956 if (sax != NULL) 7957 ctxt->sax = NULL; 7958 xmlFreeParserCtxt(ctxt); 7959 7960 return(ret); 7961} 7962 7963/** 7964 * xmlParseDoc : 7965 * @cur: a pointer to an array of xmlChar 7966 * 7967 * parse an XML in-memory document and build a tree. 7968 * 7969 * Returns the resulting document tree 7970 */ 7971 7972xmlDocPtr 7973xmlParseDoc(xmlChar *cur) { 7974 return(xmlSAXParseDoc(NULL, cur, 0)); 7975} 7976 7977/** 7978 * xmlSAXParseDTD : 7979 * @sax: the SAX handler block 7980 * @ExternalID: a NAME* containing the External ID of the DTD 7981 * @SystemID: a NAME* containing the URL to the DTD 7982 * 7983 * Load and parse an external subset. 7984 * 7985 * Returns the resulting xmlDtdPtr or NULL in case of error. 7986 */ 7987 7988xmlDtdPtr 7989xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 7990 const xmlChar *SystemID) { 7991 xmlDtdPtr ret = NULL; 7992 xmlParserCtxtPtr ctxt; 7993 xmlParserInputPtr input = NULL; 7994 xmlCharEncoding enc; 7995 7996 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 7997 7998 ctxt = xmlNewParserCtxt(); 7999 if (ctxt == NULL) { 8000 return(NULL); 8001 } 8002 8003 /* 8004 * Set-up the SAX context 8005 */ 8006 if (ctxt == NULL) return(NULL); 8007 if (sax != NULL) { 8008 if (ctxt->sax != NULL) 8009 xmlFree(ctxt->sax); 8010 ctxt->sax = sax; 8011 ctxt->userData = NULL; 8012 } 8013 8014 /* 8015 * Ask the Entity resolver to load the damn thing 8016 */ 8017 8018 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 8019 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID); 8020 if (input == NULL) { 8021 if (sax != NULL) ctxt->sax = NULL; 8022 xmlFreeParserCtxt(ctxt); 8023 return(NULL); 8024 } 8025 8026 /* 8027 * plug some encoding conversion routines here. !!! 8028 */ 8029 xmlPushInput(ctxt, input); 8030 enc = xmlDetectCharEncoding(ctxt->input->cur); 8031 xmlSwitchEncoding(ctxt, enc); 8032 8033 if (input->filename == NULL) 8034 input->filename = (char *) xmlStrdup(SystemID); /* !!!!!!! */ 8035 input->line = 1; 8036 input->col = 1; 8037 input->base = ctxt->input->cur; 8038 input->cur = ctxt->input->cur; 8039 input->free = NULL; 8040 8041 /* 8042 * let's parse that entity knowing it's an external subset. 8043 */ 8044 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 8045 8046 if (ctxt->myDoc != NULL) { 8047 if (ctxt->wellFormed) { 8048 ret = ctxt->myDoc->intSubset; 8049 ctxt->myDoc->intSubset = NULL; 8050 } else { 8051 ret = NULL; 8052 } 8053 xmlFreeDoc(ctxt->myDoc); 8054 ctxt->myDoc = NULL; 8055 } 8056 if (sax != NULL) ctxt->sax = NULL; 8057 xmlFreeParserCtxt(ctxt); 8058 8059 return(ret); 8060} 8061 8062/** 8063 * xmlParseDTD : 8064 * @ExternalID: a NAME* containing the External ID of the DTD 8065 * @SystemID: a NAME* containing the URL to the DTD 8066 * 8067 * Load and parse an external subset. 8068 * 8069 * Returns the resulting xmlDtdPtr or NULL in case of error. 8070 */ 8071 8072xmlDtdPtr 8073xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 8074 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 8075} 8076 8077/** 8078 * xmlSAXParseBalancedChunk : 8079 * @ctx: an XML parser context (possibly NULL) 8080 * @sax: the SAX handler bloc (possibly NULL) 8081 * @user_data: The user data returned on SAX callbacks (possibly NULL) 8082 * @input: a parser input stream 8083 * @enc: the encoding 8084 * 8085 * Parse a well-balanced chunk of an XML document 8086 * The user has to provide SAX callback block whose routines will be 8087 * called by the parser 8088 * The allowed sequence for the Well Balanced Chunk is the one defined by 8089 * the content production in the XML grammar: 8090 * 8091 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 8092 * 8093 * Returns 0 id the chunk is well balanced, -1 in case of args problem and 8094 * the error code otherwise 8095 */ 8096 8097int 8098xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax, 8099 void *user_data, xmlParserInputPtr input, 8100 xmlCharEncoding enc) { 8101 xmlParserCtxtPtr ctxt; 8102 int ret; 8103 8104 if (input == NULL) return(-1); 8105 8106 if (ctx != NULL) 8107 ctxt = ctx; 8108 else { 8109 ctxt = xmlNewParserCtxt(); 8110 if (ctxt == NULL) 8111 return(-1); 8112 if (sax == NULL) 8113 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 8114 } 8115 8116 /* 8117 * Set-up the SAX context 8118 */ 8119 if (sax != NULL) { 8120 if (ctxt->sax != NULL) 8121 xmlFree(ctxt->sax); 8122 ctxt->sax = sax; 8123 ctxt->userData = user_data; 8124 } 8125 8126 /* 8127 * plug some encoding conversion routines here. 8128 */ 8129 xmlPushInput(ctxt, input); 8130 if (enc != XML_CHAR_ENCODING_NONE) 8131 xmlSwitchEncoding(ctxt, enc); 8132 8133 /* 8134 * let's parse that entity knowing it's an external subset. 8135 */ 8136 xmlParseContent(ctxt); 8137 ret = ctxt->errNo; 8138 8139 if (ctx == NULL) { 8140 if (sax != NULL) 8141 ctxt->sax = NULL; 8142 else 8143 xmlFreeDoc(ctxt->myDoc); 8144 xmlFreeParserCtxt(ctxt); 8145 } 8146 return(ret); 8147} 8148 8149/** 8150 * xmlParseBalancedChunk : 8151 * @doc: the document the chunk pertains to 8152 * @node: the node defining the context in which informations will be added 8153 * 8154 * Parse a well-balanced chunk of an XML document present in memory 8155 * 8156 * Returns the resulting list of nodes resulting from the parsing, 8157 * they are not added to @node 8158 */ 8159 8160xmlNodePtr 8161xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlNodePtr node) { 8162} 8163 8164/** 8165 * xmlParseBalancedChunkFile : 8166 * @doc: the document the chunk pertains to 8167 * 8168 * Parse a well-balanced chunk of an XML document contained in a file 8169 * 8170 * Returns the resulting list of nodes resulting from the parsing, 8171 * they are not added to @node 8172 */ 8173 8174xmlNodePtr 8175xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) { 8176} 8177 8178/** 8179 * xmlRecoverDoc : 8180 * @cur: a pointer to an array of xmlChar 8181 * 8182 * parse an XML in-memory document and build a tree. 8183 * In the case the document is not Well Formed, a tree is built anyway 8184 * 8185 * Returns the resulting document tree 8186 */ 8187 8188xmlDocPtr 8189xmlRecoverDoc(xmlChar *cur) { 8190 return(xmlSAXParseDoc(NULL, cur, 1)); 8191} 8192 8193/** 8194 * xmlCreateFileParserCtxt : 8195 * @filename: the filename 8196 * 8197 * Create a parser context for a file content. 8198 * Automatic support for ZLIB/Compress compressed document is provided 8199 * by default if found at compile-time. 8200 * 8201 * Returns the new parser context or NULL 8202 */ 8203xmlParserCtxtPtr 8204xmlCreateFileParserCtxt(const char *filename) 8205{ 8206 xmlParserCtxtPtr ctxt; 8207 xmlParserInputPtr inputStream; 8208 xmlParserInputBufferPtr buf; 8209 char *directory = NULL; 8210 8211 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 8212 if (buf == NULL) return(NULL); 8213 8214 ctxt = xmlNewParserCtxt(); 8215 if (ctxt == NULL) { 8216 return(NULL); 8217 } 8218 8219 inputStream = xmlNewInputStream(ctxt); 8220 if (inputStream == NULL) { 8221 xmlFreeParserCtxt(ctxt); 8222 return(NULL); 8223 } 8224 8225 inputStream->filename = xmlMemStrdup(filename); 8226 inputStream->buf = buf; 8227 inputStream->base = inputStream->buf->buffer->content; 8228 inputStream->cur = inputStream->buf->buffer->content; 8229 8230 inputPush(ctxt, inputStream); 8231 if ((ctxt->directory == NULL) && (directory == NULL)) 8232 directory = xmlParserGetDirectory(filename); 8233 if ((ctxt->directory == NULL) && (directory != NULL)) 8234 ctxt->directory = directory; 8235 8236 return(ctxt); 8237} 8238 8239/** 8240 * xmlSAXParseFile : 8241 * @sax: the SAX handler block 8242 * @filename: the filename 8243 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 8244 * documents 8245 * 8246 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 8247 * compressed document is provided by default if found at compile-time. 8248 * It use the given SAX function block to handle the parsing callback. 8249 * If sax is NULL, fallback to the default DOM tree building routines. 8250 * 8251 * Returns the resulting document tree 8252 */ 8253 8254xmlDocPtr 8255xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 8256 int recovery) { 8257 xmlDocPtr ret; 8258 xmlParserCtxtPtr ctxt; 8259 char *directory = NULL; 8260 8261 ctxt = xmlCreateFileParserCtxt(filename); 8262 if (ctxt == NULL) return(NULL); 8263 if (sax != NULL) { 8264 if (ctxt->sax != NULL) 8265 xmlFree(ctxt->sax); 8266 ctxt->sax = sax; 8267 ctxt->userData = NULL; 8268 } 8269 8270 if ((ctxt->directory == NULL) && (directory == NULL)) 8271 directory = xmlParserGetDirectory(filename); 8272 if ((ctxt->directory == NULL) && (directory != NULL)) 8273 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); /* !!!!!!! */ 8274 8275 xmlParseDocument(ctxt); 8276 8277 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 8278 else { 8279 ret = NULL; 8280 xmlFreeDoc(ctxt->myDoc); 8281 ctxt->myDoc = NULL; 8282 } 8283 if (sax != NULL) 8284 ctxt->sax = NULL; 8285 xmlFreeParserCtxt(ctxt); 8286 8287 return(ret); 8288} 8289 8290/** 8291 * xmlParseFile : 8292 * @filename: the filename 8293 * 8294 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 8295 * compressed document is provided by default if found at compile-time. 8296 * 8297 * Returns the resulting document tree 8298 */ 8299 8300xmlDocPtr 8301xmlParseFile(const char *filename) { 8302 return(xmlSAXParseFile(NULL, filename, 0)); 8303} 8304 8305/** 8306 * xmlRecoverFile : 8307 * @filename: the filename 8308 * 8309 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 8310 * compressed document is provided by default if found at compile-time. 8311 * In the case the document is not Well Formed, a tree is built anyway 8312 * 8313 * Returns the resulting document tree 8314 */ 8315 8316xmlDocPtr 8317xmlRecoverFile(const char *filename) { 8318 return(xmlSAXParseFile(NULL, filename, 1)); 8319} 8320 8321/** 8322 * xmlCreateMemoryParserCtxt : 8323 * @buffer: an pointer to a char array 8324 * @size: the size of the array 8325 * 8326 * Create a parser context for an XML in-memory document. 8327 * 8328 * Returns the new parser context or NULL 8329 */ 8330xmlParserCtxtPtr 8331xmlCreateMemoryParserCtxt(char *buffer, int size) { 8332 xmlParserCtxtPtr ctxt; 8333 xmlParserInputPtr input; 8334 xmlCharEncoding enc; 8335 8336 if (buffer[size] != '\0') 8337 buffer[size] = '\0'; 8338 8339 ctxt = xmlNewParserCtxt(); 8340 if (ctxt == NULL) { 8341 return(NULL); 8342 } 8343 8344 input = xmlNewInputStream(ctxt); 8345 if (input == NULL) { 8346 xmlFreeParserCtxt(ctxt); 8347 return(NULL); 8348 } 8349 8350 input->filename = NULL; 8351 input->line = 1; 8352 input->col = 1; 8353 input->buf = NULL; 8354 input->consumed = 0; 8355 8356 /* 8357 * plug some encoding conversion routines here. !!! 8358 */ 8359 enc = xmlDetectCharEncoding(BAD_CAST buffer); 8360 xmlSwitchEncoding(ctxt, enc); 8361 8362 input->base = BAD_CAST buffer; 8363 input->cur = BAD_CAST buffer; 8364 input->free = NULL; 8365 8366 inputPush(ctxt, input); 8367 return(ctxt); 8368} 8369 8370/** 8371 * xmlSAXParseMemory : 8372 * @sax: the SAX handler block 8373 * @buffer: an pointer to a char array 8374 * @size: the size of the array 8375 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 8376 * documents 8377 * 8378 * parse an XML in-memory block and use the given SAX function block 8379 * to handle the parsing callback. If sax is NULL, fallback to the default 8380 * DOM tree building routines. 8381 * 8382 * Returns the resulting document tree 8383 */ 8384xmlDocPtr 8385xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) { 8386 xmlDocPtr ret; 8387 xmlParserCtxtPtr ctxt; 8388 8389 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 8390 if (ctxt == NULL) return(NULL); 8391 if (sax != NULL) { 8392 ctxt->sax = sax; 8393 ctxt->userData = NULL; 8394 } 8395 8396 xmlParseDocument(ctxt); 8397 8398 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 8399 else { 8400 ret = NULL; 8401 xmlFreeDoc(ctxt->myDoc); 8402 ctxt->myDoc = NULL; 8403 } 8404 if (sax != NULL) 8405 ctxt->sax = NULL; 8406 xmlFreeParserCtxt(ctxt); 8407 8408 return(ret); 8409} 8410 8411/** 8412 * xmlParseMemory : 8413 * @buffer: an pointer to a char array 8414 * @size: the size of the array 8415 * 8416 * parse an XML in-memory block and build a tree. 8417 * 8418 * Returns the resulting document tree 8419 */ 8420 8421xmlDocPtr xmlParseMemory(char *buffer, int size) { 8422 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 8423} 8424 8425/** 8426 * xmlRecoverMemory : 8427 * @buffer: an pointer to a char array 8428 * @size: the size of the array 8429 * 8430 * parse an XML in-memory block and build a tree. 8431 * In the case the document is not Well Formed, a tree is built anyway 8432 * 8433 * Returns the resulting document tree 8434 */ 8435 8436xmlDocPtr xmlRecoverMemory(char *buffer, int size) { 8437 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 8438} 8439 8440 8441/** 8442 * xmlSetupParserForBuffer: 8443 * @ctxt: an XML parser context 8444 * @buffer: a xmlChar * buffer 8445 * @filename: a file name 8446 * 8447 * Setup the parser context to parse a new buffer; Clears any prior 8448 * contents from the parser context. The buffer parameter must not be 8449 * NULL, but the filename parameter can be 8450 */ 8451void 8452xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 8453 const char* filename) 8454{ 8455 xmlParserInputPtr input; 8456 8457 input = xmlNewInputStream(ctxt); 8458 if (input == NULL) { 8459 perror("malloc"); 8460 xmlFree(ctxt); 8461 return; 8462 } 8463 8464 xmlClearParserCtxt(ctxt); 8465 if (filename != NULL) 8466 input->filename = xmlMemStrdup(filename); 8467 input->base = buffer; 8468 input->cur = buffer; 8469 inputPush(ctxt, input); 8470} 8471 8472/** 8473 * xmlSAXUserParseFile: 8474 * @sax: a SAX handler 8475 * @user_data: The user data returned on SAX callbacks 8476 * @filename: a file name 8477 * 8478 * parse an XML file and call the given SAX handler routines. 8479 * Automatic support for ZLIB/Compress compressed document is provided 8480 * 8481 * Returns 0 in case of success or a error number otherwise 8482 */ 8483int 8484xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 8485 const char *filename) { 8486 int ret = 0; 8487 xmlParserCtxtPtr ctxt; 8488 8489 ctxt = xmlCreateFileParserCtxt(filename); 8490 if (ctxt == NULL) return -1; 8491 if (ctxt->sax != &xmlDefaultSAXHandler) 8492 xmlFree(ctxt->sax); 8493 ctxt->sax = sax; 8494 if (user_data != NULL) 8495 ctxt->userData = user_data; 8496 8497 xmlParseDocument(ctxt); 8498 8499 if (ctxt->wellFormed) 8500 ret = 0; 8501 else { 8502 if (ctxt->errNo != 0) 8503 ret = ctxt->errNo; 8504 else 8505 ret = -1; 8506 } 8507 if (sax != NULL) 8508 ctxt->sax = NULL; 8509 xmlFreeParserCtxt(ctxt); 8510 8511 return ret; 8512} 8513 8514/** 8515 * xmlSAXUserParseMemory: 8516 * @sax: a SAX handler 8517 * @user_data: The user data returned on SAX callbacks 8518 * @buffer: an in-memory XML document input 8519 * @size: the length of the XML document in bytes 8520 * 8521 * A better SAX parsing routine. 8522 * parse an XML in-memory buffer and call the given SAX handler routines. 8523 * 8524 * Returns 0 in case of success or a error number otherwise 8525 */ 8526int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 8527 char *buffer, int size) { 8528 int ret = 0; 8529 xmlParserCtxtPtr ctxt; 8530 8531 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 8532 if (ctxt == NULL) return -1; 8533 ctxt->sax = sax; 8534 ctxt->userData = user_data; 8535 8536 xmlParseDocument(ctxt); 8537 8538 if (ctxt->wellFormed) 8539 ret = 0; 8540 else { 8541 if (ctxt->errNo != 0) 8542 ret = ctxt->errNo; 8543 else 8544 ret = -1; 8545 } 8546 if (sax != NULL) 8547 ctxt->sax = NULL; 8548 xmlFreeParserCtxt(ctxt); 8549 8550 return ret; 8551} 8552 8553 8554/************************************************************************ 8555 * * 8556 * Miscellaneous * 8557 * * 8558 ************************************************************************/ 8559 8560/** 8561 * xmlCleanupParser: 8562 * 8563 * Cleanup function for the XML parser. It tries to reclaim all 8564 * parsing related global memory allocated for the parser processing. 8565 * It doesn't deallocate any document related memory. Calling this 8566 * function should not prevent reusing the parser. 8567 */ 8568 8569void 8570xmlCleanupParser(void) { 8571 xmlCleanupCharEncodingHandlers(); 8572 xmlCleanupPredefinedEntities(); 8573} 8574 8575/** 8576 * xmlParserFindNodeInfo: 8577 * @ctxt: an XML parser context 8578 * @node: an XML node within the tree 8579 * 8580 * Find the parser node info struct for a given node 8581 * 8582 * Returns an xmlParserNodeInfo block pointer or NULL 8583 */ 8584const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx, 8585 const xmlNode* node) 8586{ 8587 unsigned long pos; 8588 8589 /* Find position where node should be at */ 8590 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 8591 if ( ctx->node_seq.buffer[pos].node == node ) 8592 return &ctx->node_seq.buffer[pos]; 8593 else 8594 return NULL; 8595} 8596 8597 8598/** 8599 * xmlInitNodeInfoSeq : 8600 * @seq: a node info sequence pointer 8601 * 8602 * -- Initialize (set to initial state) node info sequence 8603 */ 8604void 8605xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 8606{ 8607 seq->length = 0; 8608 seq->maximum = 0; 8609 seq->buffer = NULL; 8610} 8611 8612/** 8613 * xmlClearNodeInfoSeq : 8614 * @seq: a node info sequence pointer 8615 * 8616 * -- Clear (release memory and reinitialize) node 8617 * info sequence 8618 */ 8619void 8620xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 8621{ 8622 if ( seq->buffer != NULL ) 8623 xmlFree(seq->buffer); 8624 xmlInitNodeInfoSeq(seq); 8625} 8626 8627 8628/** 8629 * xmlParserFindNodeInfoIndex: 8630 * @seq: a node info sequence pointer 8631 * @node: an XML node pointer 8632 * 8633 * 8634 * xmlParserFindNodeInfoIndex : Find the index that the info record for 8635 * the given node is or should be at in a sorted sequence 8636 * 8637 * Returns a long indicating the position of the record 8638 */ 8639unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq, 8640 const xmlNode* node) 8641{ 8642 unsigned long upper, lower, middle; 8643 int found = 0; 8644 8645 /* Do a binary search for the key */ 8646 lower = 1; 8647 upper = seq->length; 8648 middle = 0; 8649 while ( lower <= upper && !found) { 8650 middle = lower + (upper - lower) / 2; 8651 if ( node == seq->buffer[middle - 1].node ) 8652 found = 1; 8653 else if ( node < seq->buffer[middle - 1].node ) 8654 upper = middle - 1; 8655 else 8656 lower = middle + 1; 8657 } 8658 8659 /* Return position */ 8660 if ( middle == 0 || seq->buffer[middle - 1].node < node ) 8661 return middle; 8662 else 8663 return middle - 1; 8664} 8665 8666 8667/** 8668 * xmlParserAddNodeInfo: 8669 * @ctxt: an XML parser context 8670 * @info: a node info sequence pointer 8671 * 8672 * Insert node info record into the sorted sequence 8673 */ 8674void 8675xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 8676 const xmlParserNodeInfo* info) 8677{ 8678 unsigned long pos; 8679 static unsigned int block_size = 5; 8680 8681 /* Find pos and check to see if node is already in the sequence */ 8682 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node); 8683 if ( pos < ctxt->node_seq.length 8684 && ctxt->node_seq.buffer[pos].node == info->node ) { 8685 ctxt->node_seq.buffer[pos] = *info; 8686 } 8687 8688 /* Otherwise, we need to add new node to buffer */ 8689 else { 8690 /* Expand buffer by 5 if needed */ 8691 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) { 8692 xmlParserNodeInfo* tmp_buffer; 8693 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer) 8694 *(ctxt->node_seq.maximum + block_size)); 8695 8696 if ( ctxt->node_seq.buffer == NULL ) 8697 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size); 8698 else 8699 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size); 8700 8701 if ( tmp_buffer == NULL ) { 8702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) 8703 ctxt->sax->error(ctxt->userData, "Out of memory\n"); 8704 ctxt->errNo = XML_ERR_NO_MEMORY; 8705 return; 8706 } 8707 ctxt->node_seq.buffer = tmp_buffer; 8708 ctxt->node_seq.maximum += block_size; 8709 } 8710 8711 /* If position is not at end, move elements out of the way */ 8712 if ( pos != ctxt->node_seq.length ) { 8713 unsigned long i; 8714 8715 for ( i = ctxt->node_seq.length; i > pos; i-- ) 8716 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 8717 } 8718 8719 /* Copy element and increase length */ 8720 ctxt->node_seq.buffer[pos] = *info; 8721 ctxt->node_seq.length++; 8722 } 8723} 8724 8725 8726/** 8727 * xmlSubstituteEntitiesDefault : 8728 * @val: int 0 or 1 8729 * 8730 * Set and return the previous value for default entity support. 8731 * Initially the parser always keep entity references instead of substituting 8732 * entity values in the output. This function has to be used to change the 8733 * default parser behaviour 8734 * SAX::subtituteEntities() has to be used for changing that on a file by 8735 * file basis. 8736 * 8737 * Returns the last value for 0 for no substitution, 1 for substitution. 8738 */ 8739 8740int 8741xmlSubstituteEntitiesDefault(int val) { 8742 int old = xmlSubstituteEntitiesDefaultValue; 8743 8744 xmlSubstituteEntitiesDefaultValue = val; 8745 return(old); 8746} 8747 8748