1/* 2 * Summary: internals routines exported by the parser. 3 * Description: this module exports a number of internal parsing routines 4 * they are not really all intended for applications but 5 * can prove useful doing low level processing. 6 * 7 * Copy: See Copyright for the status of this software. 8 * 9 * Author: Daniel Veillard 10 */ 11 12#ifndef __XML_PARSER_INTERNALS_H__ 13#define __XML_PARSER_INTERNALS_H__ 14 15#include <libxml/xmlversion.h> 16#include <libxml/parser.h> 17#include <libxml/HTMLparser.h> 18#include <libxml/chvalid.h> 19 20#ifdef __cplusplus 21extern "C" { 22#endif 23 24/** 25 * xmlParserMaxDepth: 26 * 27 * arbitrary depth limit for the XML documents that we allow to 28 * process. This is not a limitation of the parser but a safety 29 * boundary feature, use XML_PARSE_HUGE option to override it. 30 */ 31XMLPUBVAR unsigned int xmlParserMaxDepth; 32 33/** 34 * XML_MAX_TEXT_LENGTH: 35 * 36 * Maximum size allowed for a single text node when building a tree. 37 * This is not a limitation of the parser but a safety boundary feature, 38 * use XML_PARSE_HUGE option to override it. 39 */ 40#define XML_MAX_TEXT_LENGTH 10000000 41 42/** 43 * XML_MAX_NAMELEN: 44 * 45 * Identifiers can be longer, but this will be more costly 46 * at runtime. 47 */ 48#define XML_MAX_NAMELEN 100 49 50/** 51 * INPUT_CHUNK: 52 * 53 * The parser tries to always have that amount of input ready. 54 * One of the point is providing context when reporting errors. 55 */ 56#define INPUT_CHUNK 250 57 58/************************************************************************ 59 * * 60 * UNICODE version of the macros. * 61 * * 62 ************************************************************************/ 63/** 64 * IS_BYTE_CHAR: 65 * @c: an byte value (int) 66 * 67 * Macro to check the following production in the XML spec: 68 * 69 * [2] Char ::= #x9 | #xA | #xD | [#x20...] 70 * any byte character in the accepted range 71 */ 72#define IS_BYTE_CHAR(c) xmlIsChar_ch(c) 73 74/** 75 * IS_CHAR: 76 * @c: an UNICODE value (int) 77 * 78 * Macro to check the following production in the XML spec: 79 * 80 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] 81 * | [#x10000-#x10FFFF] 82 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. 83 */ 84#define IS_CHAR(c) xmlIsCharQ(c) 85 86/** 87 * IS_CHAR_CH: 88 * @c: an xmlChar (usually an unsigned char) 89 * 90 * Behaves like IS_CHAR on single-byte value 91 */ 92#define IS_CHAR_CH(c) xmlIsChar_ch(c) 93 94/** 95 * IS_BLANK: 96 * @c: an UNICODE value (int) 97 * 98 * Macro to check the following production in the XML spec: 99 * 100 * [3] S ::= (#x20 | #x9 | #xD | #xA)+ 101 */ 102#define IS_BLANK(c) xmlIsBlankQ(c) 103 104/** 105 * IS_BLANK_CH: 106 * @c: an xmlChar value (normally unsigned char) 107 * 108 * Behaviour same as IS_BLANK 109 */ 110#define IS_BLANK_CH(c) xmlIsBlank_ch(c) 111 112/** 113 * IS_BASECHAR: 114 * @c: an UNICODE value (int) 115 * 116 * Macro to check the following production in the XML spec: 117 * 118 * [85] BaseChar ::= ... long list see REC ... 119 */ 120#define IS_BASECHAR(c) xmlIsBaseCharQ(c) 121 122/** 123 * IS_DIGIT: 124 * @c: an UNICODE value (int) 125 * 126 * Macro to check the following production in the XML spec: 127 * 128 * [88] Digit ::= ... long list see REC ... 129 */ 130#define IS_DIGIT(c) xmlIsDigitQ(c) 131 132/** 133 * IS_DIGIT_CH: 134 * @c: an xmlChar value (usually an unsigned char) 135 * 136 * Behaves like IS_DIGIT but with a single byte argument 137 */ 138#define IS_DIGIT_CH(c) xmlIsDigit_ch(c) 139 140/** 141 * IS_COMBINING: 142 * @c: an UNICODE value (int) 143 * 144 * Macro to check the following production in the XML spec: 145 * 146 * [87] CombiningChar ::= ... long list see REC ... 147 */ 148#define IS_COMBINING(c) xmlIsCombiningQ(c) 149 150/** 151 * IS_COMBINING_CH: 152 * @c: an xmlChar (usually an unsigned char) 153 * 154 * Always false (all combining chars > 0xff) 155 */ 156#define IS_COMBINING_CH(c) 0 157 158/** 159 * IS_EXTENDER: 160 * @c: an UNICODE value (int) 161 * 162 * Macro to check the following production in the XML spec: 163 * 164 * 165 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | 166 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | 167 * [#x309D-#x309E] | [#x30FC-#x30FE] 168 */ 169#define IS_EXTENDER(c) xmlIsExtenderQ(c) 170 171/** 172 * IS_EXTENDER_CH: 173 * @c: an xmlChar value (usually an unsigned char) 174 * 175 * Behaves like IS_EXTENDER but with a single-byte argument 176 */ 177#define IS_EXTENDER_CH(c) xmlIsExtender_ch(c) 178 179/** 180 * IS_IDEOGRAPHIC: 181 * @c: an UNICODE value (int) 182 * 183 * Macro to check the following production in the XML spec: 184 * 185 * 186 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] 187 */ 188#define IS_IDEOGRAPHIC(c) xmlIsIdeographicQ(c) 189 190/** 191 * IS_LETTER: 192 * @c: an UNICODE value (int) 193 * 194 * Macro to check the following production in the XML spec: 195 * 196 * 197 * [84] Letter ::= BaseChar | Ideographic 198 */ 199#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)) 200 201/** 202 * IS_LETTER_CH: 203 * @c: an xmlChar value (normally unsigned char) 204 * 205 * Macro behaves like IS_LETTER, but only check base chars 206 * 207 */ 208#define IS_LETTER_CH(c) xmlIsBaseChar_ch(c) 209 210/** 211 * IS_ASCII_LETTER: 212 * @c: an xmlChar value 213 * 214 * Macro to check [a-zA-Z] 215 * 216 */ 217#define IS_ASCII_LETTER(c) (((0x41 <= (c)) && ((c) <= 0x5a)) || \ 218 ((0x61 <= (c)) && ((c) <= 0x7a))) 219 220/** 221 * IS_ASCII_DIGIT: 222 * @c: an xmlChar value 223 * 224 * Macro to check [0-9] 225 * 226 */ 227#define IS_ASCII_DIGIT(c) ((0x30 <= (c)) && ((c) <= 0x39)) 228 229/** 230 * IS_PUBIDCHAR: 231 * @c: an UNICODE value (int) 232 * 233 * Macro to check the following production in the XML spec: 234 * 235 * 236 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 237 */ 238#define IS_PUBIDCHAR(c) xmlIsPubidCharQ(c) 239 240/** 241 * IS_PUBIDCHAR_CH: 242 * @c: an xmlChar value (normally unsigned char) 243 * 244 * Same as IS_PUBIDCHAR but for single-byte value 245 */ 246#define IS_PUBIDCHAR_CH(c) xmlIsPubidChar_ch(c) 247 248/** 249 * SKIP_EOL: 250 * @p: and UTF8 string pointer 251 * 252 * Skips the end of line chars. 253 */ 254#define SKIP_EOL(p) \ 255 if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \ 256 if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; } 257 258/** 259 * MOVETO_ENDTAG: 260 * @p: and UTF8 string pointer 261 * 262 * Skips to the next '>' char. 263 */ 264#define MOVETO_ENDTAG(p) \ 265 while ((*p) && (*(p) != '>')) (p)++ 266 267/** 268 * MOVETO_STARTTAG: 269 * @p: and UTF8 string pointer 270 * 271 * Skips to the next '<' char. 272 */ 273#define MOVETO_STARTTAG(p) \ 274 while ((*p) && (*(p) != '<')) (p)++ 275 276/** 277 * Global variables used for predefined strings. 278 */ 279XMLPUBVAR const xmlChar xmlStringText[]; 280XMLPUBVAR const xmlChar xmlStringTextNoenc[]; 281XMLPUBVAR const xmlChar xmlStringComment[]; 282 283/* 284 * Function to finish the work of the macros where needed. 285 */ 286XMLPUBFUN int XMLCALL xmlIsLetter (int c); 287 288/** 289 * Parser context. 290 */ 291XMLPUBFUN xmlParserCtxtPtr XMLCALL 292 xmlCreateFileParserCtxt (const char *filename); 293XMLPUBFUN xmlParserCtxtPtr XMLCALL 294 xmlCreateURLParserCtxt (const char *filename, 295 int options); 296XMLPUBFUN xmlParserCtxtPtr XMLCALL 297 xmlCreateMemoryParserCtxt(const char *buffer, 298 int size); 299XMLPUBFUN xmlParserCtxtPtr XMLCALL 300 xmlCreateEntityParserCtxt(const xmlChar *URL, 301 const xmlChar *ID, 302 const xmlChar *base); 303XMLPUBFUN int XMLCALL 304 xmlSwitchEncoding (xmlParserCtxtPtr ctxt, 305 xmlCharEncoding enc); 306XMLPUBFUN int XMLCALL 307 xmlSwitchToEncoding (xmlParserCtxtPtr ctxt, 308 xmlCharEncodingHandlerPtr handler); 309XMLPUBFUN int XMLCALL 310 xmlSwitchInputEncoding (xmlParserCtxtPtr ctxt, 311 xmlParserInputPtr input, 312 xmlCharEncodingHandlerPtr handler); 313 314#ifdef IN_LIBXML 315/* internal error reporting */ 316XMLPUBFUN void XMLCALL 317 __xmlErrEncoding (xmlParserCtxtPtr ctxt, 318 xmlParserErrors xmlerr, 319 const char *msg, 320 const xmlChar * str1, 321 const xmlChar * str2); 322#endif 323 324/** 325 * Input Streams. 326 */ 327XMLPUBFUN xmlParserInputPtr XMLCALL 328 xmlNewStringInputStream (xmlParserCtxtPtr ctxt, 329 const xmlChar *buffer); 330XMLPUBFUN xmlParserInputPtr XMLCALL 331 xmlNewEntityInputStream (xmlParserCtxtPtr ctxt, 332 xmlEntityPtr entity); 333XMLPUBFUN int XMLCALL 334 xmlPushInput (xmlParserCtxtPtr ctxt, 335 xmlParserInputPtr input); 336XMLPUBFUN xmlChar XMLCALL 337 xmlPopInput (xmlParserCtxtPtr ctxt); 338XMLPUBFUN void XMLCALL 339 xmlFreeInputStream (xmlParserInputPtr input); 340XMLPUBFUN xmlParserInputPtr XMLCALL 341 xmlNewInputFromFile (xmlParserCtxtPtr ctxt, 342 const char *filename); 343XMLPUBFUN xmlParserInputPtr XMLCALL 344 xmlNewInputStream (xmlParserCtxtPtr ctxt); 345 346/** 347 * Namespaces. 348 */ 349XMLPUBFUN xmlChar * XMLCALL 350 xmlSplitQName (xmlParserCtxtPtr ctxt, 351 const xmlChar *name, 352 xmlChar **prefix); 353 354/** 355 * Generic production rules. 356 */ 357XMLPUBFUN const xmlChar * XMLCALL 358 xmlParseName (xmlParserCtxtPtr ctxt); 359XMLPUBFUN xmlChar * XMLCALL 360 xmlParseNmtoken (xmlParserCtxtPtr ctxt); 361XMLPUBFUN xmlChar * XMLCALL 362 xmlParseEntityValue (xmlParserCtxtPtr ctxt, 363 xmlChar **orig); 364XMLPUBFUN xmlChar * XMLCALL 365 xmlParseAttValue (xmlParserCtxtPtr ctxt); 366XMLPUBFUN xmlChar * XMLCALL 367 xmlParseSystemLiteral (xmlParserCtxtPtr ctxt); 368XMLPUBFUN xmlChar * XMLCALL 369 xmlParsePubidLiteral (xmlParserCtxtPtr ctxt); 370XMLPUBFUN void XMLCALL 371 xmlParseCharData (xmlParserCtxtPtr ctxt, 372 int cdata); 373XMLPUBFUN xmlChar * XMLCALL 374 xmlParseExternalID (xmlParserCtxtPtr ctxt, 375 xmlChar **publicID, 376 int strict); 377XMLPUBFUN void XMLCALL 378 xmlParseComment (xmlParserCtxtPtr ctxt); 379XMLPUBFUN const xmlChar * XMLCALL 380 xmlParsePITarget (xmlParserCtxtPtr ctxt); 381XMLPUBFUN void XMLCALL 382 xmlParsePI (xmlParserCtxtPtr ctxt); 383XMLPUBFUN void XMLCALL 384 xmlParseNotationDecl (xmlParserCtxtPtr ctxt); 385XMLPUBFUN void XMLCALL 386 xmlParseEntityDecl (xmlParserCtxtPtr ctxt); 387XMLPUBFUN int XMLCALL 388 xmlParseDefaultDecl (xmlParserCtxtPtr ctxt, 389 xmlChar **value); 390XMLPUBFUN xmlEnumerationPtr XMLCALL 391 xmlParseNotationType (xmlParserCtxtPtr ctxt); 392XMLPUBFUN xmlEnumerationPtr XMLCALL 393 xmlParseEnumerationType (xmlParserCtxtPtr ctxt); 394XMLPUBFUN int XMLCALL 395 xmlParseEnumeratedType (xmlParserCtxtPtr ctxt, 396 xmlEnumerationPtr *tree); 397XMLPUBFUN int XMLCALL 398 xmlParseAttributeType (xmlParserCtxtPtr ctxt, 399 xmlEnumerationPtr *tree); 400XMLPUBFUN void XMLCALL 401 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt); 402XMLPUBFUN xmlElementContentPtr XMLCALL 403 xmlParseElementMixedContentDecl 404 (xmlParserCtxtPtr ctxt, 405 int inputchk); 406XMLPUBFUN xmlElementContentPtr XMLCALL 407 xmlParseElementChildrenContentDecl 408 (xmlParserCtxtPtr ctxt, 409 int inputchk); 410XMLPUBFUN int XMLCALL 411 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, 412 const xmlChar *name, 413 xmlElementContentPtr *result); 414XMLPUBFUN int XMLCALL 415 xmlParseElementDecl (xmlParserCtxtPtr ctxt); 416XMLPUBFUN void XMLCALL 417 xmlParseMarkupDecl (xmlParserCtxtPtr ctxt); 418XMLPUBFUN int XMLCALL 419 xmlParseCharRef (xmlParserCtxtPtr ctxt); 420XMLPUBFUN xmlEntityPtr XMLCALL 421 xmlParseEntityRef (xmlParserCtxtPtr ctxt); 422XMLPUBFUN void XMLCALL 423 xmlParseReference (xmlParserCtxtPtr ctxt); 424XMLPUBFUN void XMLCALL 425 xmlParsePEReference (xmlParserCtxtPtr ctxt); 426XMLPUBFUN void XMLCALL 427 xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt); 428#ifdef LIBXML_SAX1_ENABLED 429XMLPUBFUN const xmlChar * XMLCALL 430 xmlParseAttribute (xmlParserCtxtPtr ctxt, 431 xmlChar **value); 432XMLPUBFUN const xmlChar * XMLCALL 433 xmlParseStartTag (xmlParserCtxtPtr ctxt); 434XMLPUBFUN void XMLCALL 435 xmlParseEndTag (xmlParserCtxtPtr ctxt); 436#endif /* LIBXML_SAX1_ENABLED */ 437XMLPUBFUN void XMLCALL 438 xmlParseCDSect (xmlParserCtxtPtr ctxt); 439XMLPUBFUN void XMLCALL 440 xmlParseContent (xmlParserCtxtPtr ctxt); 441XMLPUBFUN void XMLCALL 442 xmlParseElement (xmlParserCtxtPtr ctxt); 443XMLPUBFUN xmlChar * XMLCALL 444 xmlParseVersionNum (xmlParserCtxtPtr ctxt); 445XMLPUBFUN xmlChar * XMLCALL 446 xmlParseVersionInfo (xmlParserCtxtPtr ctxt); 447XMLPUBFUN xmlChar * XMLCALL 448 xmlParseEncName (xmlParserCtxtPtr ctxt); 449XMLPUBFUN const xmlChar * XMLCALL 450 xmlParseEncodingDecl (xmlParserCtxtPtr ctxt); 451XMLPUBFUN int XMLCALL 452 xmlParseSDDecl (xmlParserCtxtPtr ctxt); 453XMLPUBFUN void XMLCALL 454 xmlParseXMLDecl (xmlParserCtxtPtr ctxt); 455XMLPUBFUN void XMLCALL 456 xmlParseTextDecl (xmlParserCtxtPtr ctxt); 457XMLPUBFUN void XMLCALL 458 xmlParseMisc (xmlParserCtxtPtr ctxt); 459XMLPUBFUN void XMLCALL 460 xmlParseExternalSubset (xmlParserCtxtPtr ctxt, 461 const xmlChar *ExternalID, 462 const xmlChar *SystemID); 463/** 464 * XML_SUBSTITUTE_NONE: 465 * 466 * If no entities need to be substituted. 467 */ 468#define XML_SUBSTITUTE_NONE 0 469/** 470 * XML_SUBSTITUTE_REF: 471 * 472 * Whether general entities need to be substituted. 473 */ 474#define XML_SUBSTITUTE_REF 1 475/** 476 * XML_SUBSTITUTE_PEREF: 477 * 478 * Whether parameter entities need to be substituted. 479 */ 480#define XML_SUBSTITUTE_PEREF 2 481/** 482 * XML_SUBSTITUTE_BOTH: 483 * 484 * Both general and parameter entities need to be substituted. 485 */ 486#define XML_SUBSTITUTE_BOTH 3 487 488XMLPUBFUN xmlChar * XMLCALL 489 xmlStringDecodeEntities (xmlParserCtxtPtr ctxt, 490 const xmlChar *str, 491 int what, 492 xmlChar end, 493 xmlChar end2, 494 xmlChar end3); 495XMLPUBFUN xmlChar * XMLCALL 496 xmlStringLenDecodeEntities (xmlParserCtxtPtr ctxt, 497 const xmlChar *str, 498 int len, 499 int what, 500 xmlChar end, 501 xmlChar end2, 502 xmlChar end3); 503 504/* 505 * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP. 506 */ 507XMLPUBFUN int XMLCALL nodePush (xmlParserCtxtPtr ctxt, 508 xmlNodePtr value); 509XMLPUBFUN xmlNodePtr XMLCALL nodePop (xmlParserCtxtPtr ctxt); 510XMLPUBFUN int XMLCALL inputPush (xmlParserCtxtPtr ctxt, 511 xmlParserInputPtr value); 512XMLPUBFUN xmlParserInputPtr XMLCALL inputPop (xmlParserCtxtPtr ctxt); 513XMLPUBFUN const xmlChar * XMLCALL namePop (xmlParserCtxtPtr ctxt); 514XMLPUBFUN int XMLCALL namePush (xmlParserCtxtPtr ctxt, 515 const xmlChar *value); 516 517/* 518 * other commodities shared between parser.c and parserInternals. 519 */ 520XMLPUBFUN int XMLCALL xmlSkipBlankChars (xmlParserCtxtPtr ctxt); 521XMLPUBFUN int XMLCALL xmlStringCurrentChar (xmlParserCtxtPtr ctxt, 522 const xmlChar *cur, 523 int *len); 524XMLPUBFUN void XMLCALL xmlParserHandlePEReference(xmlParserCtxtPtr ctxt); 525XMLPUBFUN int XMLCALL xmlCheckLanguageID (const xmlChar *lang); 526 527/* 528 * Really core function shared with HTML parser. 529 */ 530XMLPUBFUN int XMLCALL xmlCurrentChar (xmlParserCtxtPtr ctxt, 531 int *len); 532XMLPUBFUN int XMLCALL xmlCopyCharMultiByte (xmlChar *out, 533 int val); 534XMLPUBFUN int XMLCALL xmlCopyChar (int len, 535 xmlChar *out, 536 int val); 537XMLPUBFUN void XMLCALL xmlNextChar (xmlParserCtxtPtr ctxt); 538XMLPUBFUN void XMLCALL xmlParserInputShrink (xmlParserInputPtr in); 539 540#ifdef LIBXML_HTML_ENABLED 541/* 542 * Actually comes from the HTML parser but launched from the init stuff. 543 */ 544XMLPUBFUN void XMLCALL htmlInitAutoClose (void); 545XMLPUBFUN htmlParserCtxtPtr XMLCALL htmlCreateFileParserCtxt(const char *filename, 546 const char *encoding); 547#endif 548 549/* 550 * Specific function to keep track of entities references 551 * and used by the XSLT debugger. 552 */ 553#ifdef LIBXML_LEGACY_ENABLED 554/** 555 * xmlEntityReferenceFunc: 556 * @ent: the entity 557 * @firstNode: the fist node in the chunk 558 * @lastNode: the last nod in the chunk 559 * 560 * Callback function used when one needs to be able to track back the 561 * provenance of a chunk of nodes inherited from an entity replacement. 562 */ 563typedef void (*xmlEntityReferenceFunc) (xmlEntityPtr ent, 564 xmlNodePtr firstNode, 565 xmlNodePtr lastNode); 566 567XMLPUBFUN void XMLCALL xmlSetEntityReferenceFunc (xmlEntityReferenceFunc func); 568 569XMLPUBFUN xmlChar * XMLCALL 570 xmlParseQuotedString (xmlParserCtxtPtr ctxt); 571XMLPUBFUN void XMLCALL 572 xmlParseNamespace (xmlParserCtxtPtr ctxt); 573XMLPUBFUN xmlChar * XMLCALL 574 xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt); 575XMLPUBFUN xmlChar * XMLCALL 576 xmlScanName (xmlParserCtxtPtr ctxt); 577XMLPUBFUN xmlChar * XMLCALL 578 xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt); 579XMLPUBFUN void XMLCALL xmlParserHandleReference(xmlParserCtxtPtr ctxt); 580XMLPUBFUN xmlChar * XMLCALL 581 xmlNamespaceParseQName (xmlParserCtxtPtr ctxt, 582 xmlChar **prefix); 583/** 584 * Entities 585 */ 586XMLPUBFUN xmlChar * XMLCALL 587 xmlDecodeEntities (xmlParserCtxtPtr ctxt, 588 int len, 589 int what, 590 xmlChar end, 591 xmlChar end2, 592 xmlChar end3); 593XMLPUBFUN void XMLCALL 594 xmlHandleEntity (xmlParserCtxtPtr ctxt, 595 xmlEntityPtr entity); 596 597#endif /* LIBXML_LEGACY_ENABLED */ 598 599#ifdef IN_LIBXML 600/* 601 * internal only 602 */ 603XMLPUBFUN void XMLCALL 604 xmlErrMemory (xmlParserCtxtPtr ctxt, 605 const char *extra); 606#endif 607 608#ifdef __cplusplus 609} 610#endif 611#endif /* __XML_PARSER_INTERNALS_H__ */ 612