Lines Matching refs:ctxt

56 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
58 static void htmlParseComment(htmlParserCtxtPtr ctxt);
68 * @ctxt: an HTML parser context
74 htmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
76 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
77 (ctxt->instate == XML_PARSER_EOF))
79 if (ctxt != NULL) {
80 ctxt->errNo = XML_ERR_NO_MEMORY;
81 ctxt->instate = XML_PARSER_EOF;
82 ctxt->disableSAX = 1;
85 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
90 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
97 * @ctxt: an HTML parser context
106 htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
109 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
110 (ctxt->instate == XML_PARSER_EOF))
112 if (ctxt != NULL)
113 ctxt->errNo = error;
114 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
119 if (ctxt != NULL)
120 ctxt->wellFormed = 0;
125 * @ctxt: an HTML parser context
133 htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
136 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
137 (ctxt->instate == XML_PARSER_EOF))
139 if (ctxt != NULL)
140 ctxt->errNo = error;
141 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
144 if (ctxt != NULL)
145 ctxt->wellFormed = 0;
156 * @ctxt: an HTML parser context
164 htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)
166 if ((ctxt->html < 3) && (xmlStrEqual(value, BAD_CAST "head")))
167 ctxt->html = 3;
168 if ((ctxt->html < 10) && (xmlStrEqual(value, BAD_CAST "body")))
169 ctxt->html = 10;
170 if (ctxt->nameNr >= ctxt->nameMax) {
171 ctxt->nameMax *= 2;
172 ctxt->nameTab = (const xmlChar * *)
173 xmlRealloc((xmlChar * *)ctxt->nameTab,
174 ctxt->nameMax *
175 sizeof(ctxt->nameTab[0]));
176 if (ctxt->nameTab == NULL) {
177 htmlErrMemory(ctxt, NULL);
181 ctxt->nameTab[ctxt->nameNr] = value;
182 ctxt->name = value;
183 return (ctxt->nameNr++);
187 * @ctxt: an HTML parser context
194 htmlnamePop(htmlParserCtxtPtr ctxt)
198 if (ctxt->nameNr <= 0)
200 ctxt->nameNr--;
201 if (ctxt->nameNr < 0)
203 if (ctxt->nameNr > 0)
204 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
206 ctxt->name = NULL;
207 ret = ctxt->nameTab[ctxt->nameNr];
208 ctxt->nameTab[ctxt->nameNr] = NULL;
214 * @ctxt: an HTML parser context
222 htmlNodeInfoPush(htmlParserCtxtPtr ctxt, htmlParserNodeInfo *value)
224 if (ctxt->nodeInfoNr >= ctxt->nodeInfoMax) {
225 if (ctxt->nodeInfoMax == 0)
226 ctxt->nodeInfoMax = 5;
227 ctxt->nodeInfoMax *= 2;
228 ctxt->nodeInfoTab = (htmlParserNodeInfo *)
229 xmlRealloc((htmlParserNodeInfo *)ctxt->nodeInfoTab,
230 ctxt->nodeInfoMax *
231 sizeof(ctxt->nodeInfoTab[0]));
232 if (ctxt->nodeInfoTab == NULL) {
233 htmlErrMemory(ctxt, NULL);
237 ctxt->nodeInfoTab[ctxt->nodeInfoNr] = *value;
238 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
239 return (ctxt->nodeInfoNr++);
244 * @ctxt: an HTML parser context
251 htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
253 if (ctxt->nodeInfoNr <= 0)
255 ctxt->nodeInfoNr--;
256 if (ctxt->nodeInfoNr < 0)
258 if (ctxt->nodeInfoNr > 0)
259 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr - 1];
261 ctxt->nodeInfo = NULL;
262 return &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
294 #define UPPER (toupper(*ctxt->input->cur))
296 #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val)
298 #define NXT(val) ctxt->input->cur[(val)]
300 #define UPP(val) (toupper(ctxt->input->cur[(val)]))
302 #define CUR_PTR ctxt->input->cur
304 #define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
305 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
306 xmlParserInputShrink(ctxt->input)
308 #define GROW if ((ctxt->progressive == 0) && \
309 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
310 xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
312 #define CURRENT ((int) (*ctxt->input->cur))
314 #define SKIP_BLANKS htmlSkipBlankChars(ctxt)
318 /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
319 #define CUR ((int) (*ctxt->input->cur))
320 #define NEXT xmlNextChar(ctxt)
322 #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
326 if (*(ctxt->input->cur) == '\n') { \
327 ctxt->input->line++; ctxt->input->col = 1; \
328 } else ctxt->input->col++; \
329 ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \
334 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
335 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
338 #define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)
339 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
360 htmlFindEncoding(xmlParserCtxtPtr ctxt) {
363 if ((ctxt == NULL) || (ctxt->input == NULL) ||
364 (ctxt->input->encoding != NULL) || (ctxt->input->buf == NULL) ||
365 (ctxt->input->buf->encoder != NULL))
367 if ((ctxt->input->cur == NULL) || (ctxt->input->end == NULL))
370 start = ctxt->input->cur;
371 end = ctxt->input->end;
399 * @ctxt: the HTML parser context
412 htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
413 if (ctxt->instate == XML_PARSER_EOF)
416 if (ctxt->token != 0) {
418 return(ctxt->token);
420 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
432 const unsigned char *cur = ctxt->input->cur;
439 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
440 cur = ctxt->input->cur;
447 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
448 cur = ctxt->input->cur;
454 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
455 cur = ctxt->input->cur;
480 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
485 if ((*ctxt->input->cur == 0) &&
486 (ctxt->input->cur < ctxt->input->end)) {
487 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
494 return((int) *ctxt->input->cur);
503 if ((int) *ctxt->input->cur < 0x80)
504 return((int) *ctxt->input->cur);
513 guess = htmlFindEncoding(ctxt);
515 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
517 if (ctxt->input->encoding != NULL)
518 xmlFree((xmlChar *) ctxt->input->encoding);
519 ctxt->input->encoding = guess;
522 xmlSwitchToEncoding(ctxt, handler);
524 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
528 ctxt->charset = XML_CHAR_ENCODING_UTF8;
531 return(xmlCurrentChar(ctxt, len));
544 if (ctxt->input->end - ctxt->input->cur >= 4) {
546 ctxt->input->cur[0], ctxt->input->cur[1],
547 ctxt->input->cur[2], ctxt->input->cur[3]);
549 snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]);
551 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
556 ctxt->charset = XML_CHAR_ENCODING_8859_1;
558 return((int) *ctxt->input->cur);
563 * @ctxt: the HTML parser context
571 htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
574 while (IS_BLANK_CH(*(ctxt->input->cur))) {
575 if ((*ctxt->input->cur == 0) &&
576 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
577 xmlPopInput(ctxt);
579 if (*(ctxt->input->cur) == '\n') {
580 ctxt->input->line++; ctxt->input->col = 1;
581 } else ctxt->input->col++;
582 ctxt->input->cur++;
583 ctxt->nbChars++;
584 if (*ctxt->input->cur == 0)
585 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1285 * @ctxt: an HTML parser context
1292 htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1299 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1301 if (xmlStrEqual(newtag, ctxt->nameTab[i]))
1309 if (htmlGetEndPriority(ctxt->nameTab[i]) > priority)
1315 while (!xmlStrEqual(newtag, ctxt->name)) {
1316 info = htmlTagLookup(ctxt->name);
1318 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
1320 newtag, ctxt->name);
1322 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1323 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1324 htmlnamePop(ctxt);
1330 * @ctxt: an HTML parser context
1335 htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)
1339 if (ctxt->nameNr == 0)
1341 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1342 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1343 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1344 htmlnamePop(ctxt);
1350 * @ctxt: an HTML parser context
1361 htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1363 while ((newtag != NULL) && (ctxt->name != NULL) &&
1364 (htmlCheckAutoClose(newtag, ctxt->name))) {
1365 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1366 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1367 htmlnamePop(ctxt);
1370 htmlAutoCloseOnEnd(ctxt);
1373 while ((newtag == NULL) && (ctxt->name != NULL) &&
1374 ((xmlStrEqual(ctxt->name, BAD_CAST "head")) ||
1375 (xmlStrEqual(ctxt->name, BAD_CAST "body")) ||
1376 (xmlStrEqual(ctxt->name, BAD_CAST "html")))) {
1377 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1378 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1379 htmlnamePop(ctxt);
1437 * @ctxt: an HTML parser context
1445 htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
1448 if (ctxt->options & HTML_PARSE_NOIMPLIED)
1454 if (ctxt->nameNr <= 0) {
1455 htmlnamePush(ctxt, BAD_CAST"html");
1456 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1457 ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
1461 if ((ctxt->nameNr <= 1) &&
1468 if (ctxt->html >= 3) {
1476 htmlnamePush(ctxt, BAD_CAST"head");
1477 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1478 ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
1482 if (ctxt->html >= 10) {
1486 for (i = 0;i < ctxt->nameNr;i++) {
1487 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) {
1490 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) {
1495 htmlnamePush(ctxt, BAD_CAST"body");
1496 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1497 ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
1503 * @ctxt: an HTML parser context
1513 htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
1517 if (ctxt == NULL)
1519 tag = ctxt->name;
1521 htmlAutoClose(ctxt, BAD_CAST"p");
1522 htmlCheckImplied(ctxt, BAD_CAST"p");
1523 htmlnamePush(ctxt, BAD_CAST"p");
1524 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1525 ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1532 htmlAutoClose(ctxt, BAD_CAST"p");
1533 htmlCheckImplied(ctxt, BAD_CAST"p");
1534 htmlnamePush(ctxt, BAD_CAST"p");
1535 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1536 ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1875 htmlErrMemory(ctxt, "growing buffer\n"); \
2140 * @ctxt: an HTML parser context
2146 htmlNewInputStream(htmlParserCtxtPtr ctxt) {
2151 htmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
2193 * @ctxt: an HTML parser context
2202 static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2213 if (ctxt->name == NULL)
2215 if (xmlStrEqual(ctxt->name, BAD_CAST"html"))
2217 if (xmlStrEqual(ctxt->name, BAD_CAST"head"))
2221 if (xmlStrEqual(ctxt->name, BAD_CAST "body") && ctxt->myDoc != NULL) {
2222 dtd = xmlGetIntSubset(ctxt->myDoc);
2230 if (ctxt->node == NULL) return(0);
2231 lastChild = xmlGetLastChild(ctxt->node);
2235 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2236 (ctxt->node->content != NULL)) return(0);
2240 if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) {
2337 static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
2341 * @ctxt: an HTML parser context
2350 htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
2368 return(xmlDictLookup(ctxt->dict, loc, i));
2374 * @ctxt: an HTML parser context
2384 htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {
2399 return(xmlDictLookup(ctxt->dict, loc, i));
2405 * @ctxt: an HTML parser context
2413 htmlParseName(htmlParserCtxtPtr ctxt) {
2423 in = ctxt->input->cur;
2435 count = in - ctxt->input->cur;
2436 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2437 ctxt->input->cur = in;
2438 ctxt->nbChars += count;
2439 ctxt->input->col += count;
2443 return(htmlParseNameComplex(ctxt));
2447 htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2477 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2483 * @ctxt: an HTML parser context
2493 htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
2507 htmlErrMemory(ctxt, "buffer allocation failed\n");
2523 c = htmlParseCharRef(ctxt);
2544 ent = htmlParseEntityRef(ctxt, &name);
2622 * @ctxt: an HTML parser context
2633 htmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) {
2638 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
2642 name = htmlParseName(ctxt);
2644 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
2659 htmlParseErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING,
2672 * @ctxt: an HTML parser context
2677 * asked for ctxt->replaceEntities != 0
2683 htmlParseAttValue(htmlParserCtxtPtr ctxt) {
2688 ret = htmlParseHTMLAttribute(ctxt, '"');
2690 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2696 ret = htmlParseHTMLAttribute(ctxt, '\'');
2698 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2706 ret = htmlParseHTMLAttribute(ctxt, 0);
2708 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
2717 * @ctxt: an HTML parser context
2727 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
2737 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2749 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2756 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2765 * @ctxt: an HTML parser context
2775 htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
2786 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2798 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2805 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2814 * @ctxt: an HTML parser context
2834 htmlParseScript(htmlParserCtxtPtr ctxt) {
2854 if (ctxt->recovery) {
2855 if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2,
2856 xmlStrlen(ctxt->name)) == 0)
2860 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
2862 ctxt->name, NULL);
2874 if (ctxt->sax->cdataBlock!= NULL) {
2878 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2879 } else if (ctxt->sax->characters != NULL) {
2880 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2889 if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) {
2890 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
2895 if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2896 if (ctxt->sax->cdataBlock!= NULL) {
2900 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2901 } else if (ctxt->sax->characters != NULL) {
2902 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2910 * @ctxt: an HTML parser context
2919 htmlParseCharData(htmlParserCtxtPtr ctxt) {
2927 while (((cur != '<') || (ctxt->token == '<')) &&
2928 ((cur != '&') || (ctxt->token == '&')) &&
2931 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
2940 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2941 if (areBlanks(ctxt, buf, nbchar)) {
2942 if (ctxt->sax->ignorableWhitespace != NULL)
2943 ctxt->sax->ignorableWhitespace(ctxt->userData,
2946 htmlCheckParagraph(ctxt);
2947 if (ctxt->sax->characters != NULL)
2948 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2973 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2974 if (areBlanks(ctxt, buf, nbchar)) {
2975 if (ctxt->sax->ignorableWhitespace != NULL)
2976 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2978 htmlCheckParagraph(ctxt);
2979 if (ctxt->sax->characters != NULL)
2980 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2988 ctxt->instate = XML_PARSER_EOF;
2994 * @ctxt: an HTML parser context
3010 htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
3018 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
3022 URI = htmlParseSystemLiteral(ctxt);
3024 htmlParseErr(ctxt, XML_ERR_URI_REQUIRED,
3032 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
3036 *publicID = htmlParsePubidLiteral(ctxt);
3038 htmlParseErr(ctxt, XML_ERR_PUBID_REQUIRED,
3044 URI = htmlParseSystemLiteral(ctxt);
3052 * @ctxt: an XML parser context
3059 htmlParsePI(htmlParserCtxtPtr ctxt) {
3069 state = ctxt->instate;
3070 ctxt->instate = XML_PARSER_PI;
3081 target = htmlParseName(ctxt);
3089 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3090 (ctxt->sax->processingInstruction != NULL))
3091 ctxt->sax->processingInstruction(ctxt->userData,
3093 ctxt->instate = state;
3098 htmlErrMemory(ctxt, NULL);
3099 ctxt->instate = state;
3104 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
3116 htmlErrMemory(ctxt, NULL);
3118 ctxt->instate = state;
3139 htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
3147 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3148 (ctxt->sax->processingInstruction != NULL))
3149 ctxt->sax->processingInstruction(ctxt->userData,
3154 htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,
3157 ctxt->instate = state;
3163 * @ctxt: an HTML parser context
3170 htmlParseComment(htmlParserCtxtPtr ctxt) {
3185 state = ctxt->instate;
3186 ctxt->instate = XML_PARSER_COMMENT;
3191 htmlErrMemory(ctxt, "buffer allocation failed\n");
3192 ctxt->instate = state;
3211 htmlErrMemory(ctxt, "growing buffer failed\n");
3212 ctxt->instate = state;
3232 htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3237 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3238 (!ctxt->disableSAX))
3239 ctxt->sax->comment(ctxt->userData, buf);
3242 ctxt->instate = state;
3247 * @ctxt: an HTML parser context
3257 htmlParseCharRef(htmlParserCtxtPtr ctxt) {
3260 if ((ctxt == NULL) || (ctxt->input == NULL)) {
3261 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3277 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,
3292 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,
3302 htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF,
3311 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
3321 * @ctxt: an HTML parser context
3330 htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
3345 name = htmlParseName(ctxt);
3347 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3360 URI = htmlParseExternalID(ctxt, &ExternalID);
3367 htmlParseErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED,
3376 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
3377 (!ctxt->disableSAX))
3378 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
3389 * @ctxt: an HTML parser context
3409 htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
3414 name = htmlParseHTMLName(ctxt);
3416 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3428 val = htmlParseAttValue(ctxt);
3437 * @ctxt: an HTML parser context
3446 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
3449 if ((ctxt == NULL) || (attvalue == NULL))
3453 if (ctxt->input->encoding != NULL)
3470 if (ctxt->input->encoding != NULL)
3471 xmlFree((xmlChar *) ctxt->input->encoding);
3472 ctxt->input->encoding = xmlStrdup(encoding);
3483 (ctxt->input->buf != NULL) &&
3484 (ctxt->input->buf->encoder == NULL)) {
3485 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
3489 xmlSwitchEncoding(ctxt, enc);
3491 ctxt->charset = XML_CHAR_ENCODING_UTF8;
3498 xmlSwitchToEncoding(ctxt, handler);
3499 ctxt->charset = XML_CHAR_ENCODING_UTF8;
3501 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
3505 if ((ctxt->input->buf != NULL) &&
3506 (ctxt->input->buf->encoder != NULL) &&
3507 (ctxt->input->buf->raw != NULL) &&
3508 (ctxt->input->buf->buffer != NULL)) {
3515 processed = ctxt->input->cur - ctxt->input->base;
3516 xmlBufferShrink(ctxt->input->buf->buffer, processed);
3517 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
3518 ctxt->input->buf->buffer,
3519 ctxt->input->buf->raw);
3521 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
3525 ctxt->input->base =
3526 ctxt->input->cur = ctxt->input->buf->buffer->content;
3527 ctxt->input->end =
3528 &ctxt->input->base[ctxt->input->buf->buffer->use];
3535 * @ctxt: an HTML parser context
3541 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
3547 if ((ctxt == NULL) || (atts == NULL))
3562 htmlCheckEncoding(ctxt, content);
3568 * @ctxt: an HTML parser context
3587 htmlParseStartTag(htmlParserCtxtPtr ctxt) {
3598 if (ctxt->instate == XML_PARSER_EOF)
3600 if ((ctxt == NULL) || (ctxt->input == NULL)) {
3601 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3608 atts = ctxt->atts;
3609 maxatts = ctxt->maxatts;
3612 name = htmlParseHTMLName(ctxt);
3614 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3619 (ctxt->instate != XML_PARSER_EOF))
3629 htmlAutoClose(ctxt, name);
3634 htmlCheckImplied(ctxt, name);
3640 if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) {
3641 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3645 ctxt->depth++;
3647 if ((ctxt->nameNr != 1) &&
3649 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3653 ctxt->depth++;
3657 for (indx = 0;indx < ctxt->nameNr;indx++) {
3658 if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {
3659 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3663 ctxt->depth++;
3677 long cons = ctxt->nbChars;
3680 attname = htmlParseAttribute(ctxt, &attvalue);
3688 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_REDEFINED,
3704 htmlErrMemory(ctxt, NULL);
3709 ctxt->atts = atts;
3710 ctxt->maxatts = maxatts;
3718 htmlErrMemory(ctxt, NULL);
3724 ctxt->atts = atts;
3725 ctxt->maxatts = maxatts;
3745 if (cons == ctxt->nbChars) {
3746 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3757 htmlCheckMeta(ctxt, atts);
3763 htmlnamePush(ctxt, name);
3764 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
3766 ctxt->sax->startElement(ctxt->userData, name, atts);
3768 ctxt->sax->startElement(ctxt->userData, name, NULL);
3784 * @ctxt: an HTML parser context
3798 htmlParseEndTag(htmlParserCtxtPtr ctxt)
3805 htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED,
3811 name = htmlParseHTMLName(ctxt);
3819 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
3821 if (ctxt->recovery) {
3837 if ((ctxt->depth > 0) &&
3841 ctxt->depth--;
3849 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
3850 if (xmlStrEqual(name, ctxt->nameTab[i]))
3854 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
3864 htmlAutoCloseOnClose(ctxt, name);
3871 if (!xmlStrEqual(name, ctxt->name)) {
3872 if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) {
3873 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
3875 name, ctxt->name);
3882 oldname = ctxt->name;
3884 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
3885 ctxt->sax->endElement(ctxt->userData, name);
3886 htmlnamePop(ctxt);
3898 * @ctxt: an HTML parser context
3905 htmlParseReference(htmlParserCtxtPtr ctxt) {
3915 c = htmlParseCharRef(ctxt);
3929 htmlCheckParagraph(ctxt);
3930 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3931 ctxt->sax->characters(ctxt->userData, out, i);
3933 ent = htmlParseEntityRef(ctxt, &name);
3935 htmlCheckParagraph(ctxt);
3936 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3937 ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
3941 htmlCheckParagraph(ctxt);
3942 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
3943 ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
3944 ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
3945 /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
3966 htmlCheckParagraph(ctxt);
3967 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3968 ctxt->sax->characters(ctxt->userData, out, i);
3975 * @ctxt: an HTML parser context
3982 htmlParseContent(htmlParserCtxtPtr ctxt) {
3987 currentNode = xmlStrdup(ctxt->name);
3988 depth = ctxt->nameNr;
3990 long cons = ctxt->nbChars;
3994 if (ctxt->instate == XML_PARSER_EOF)
4001 if (htmlParseEndTag(ctxt) &&
4002 ((currentNode != NULL) || (ctxt->nameNr == 0))) {
4013 name = htmlParseHTMLName_nonInvasive(ctxt);
4015 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
4027 if (ctxt->name != NULL) {
4028 if (htmlCheckAutoClose(name, ctxt->name) == 1) {
4029 htmlAutoClose(ctxt, name);
4039 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
4040 (!xmlStrEqual(currentNode, ctxt->name)))
4051 htmlParseScript(ctxt);
4061 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
4064 htmlParseDocTypeDecl(ctxt);
4072 htmlParseComment(ctxt);
4079 htmlParsePI(ctxt);
4086 htmlParseElement(ctxt);
4094 htmlParseReference(ctxt);
4101 htmlAutoCloseOnEnd(ctxt);
4109 htmlParseCharData(ctxt);
4112 if (cons == ctxt->nbChars) {
4113 if (ctxt->node != NULL) {
4114 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4128 * @ctxt: an HTML parser context
4139 htmlParseElement(htmlParserCtxtPtr ctxt) {
4148 if ((ctxt == NULL) || (ctxt->input == NULL)) {
4149 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4154 if (ctxt->instate == XML_PARSER_EOF)
4158 if (ctxt->record_info) {
4159 node_info.begin_pos = ctxt->input->consumed +
4160 (CUR_PTR - ctxt->input->base);
4161 node_info.begin_line = ctxt->input->line;
4164 failed = htmlParseStartTag(ctxt);
4165 name = ctxt->name;
4177 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
4186 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4187 ctxt->sax->endElement(ctxt->userData, name);
4188 htmlnamePop(ctxt);
4195 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
4201 if (xmlStrEqual(name, ctxt->name)) {
4202 nodePop(ctxt);
4203 htmlnamePop(ctxt);
4209 if (ctxt->record_info) {
4210 node_info.end_pos = ctxt->input->consumed +
4211 (CUR_PTR - ctxt->input->base);
4212 node_info.end_line = ctxt->input->line;
4213 node_info.node = ctxt->node;
4214 xmlParserAddNodeInfo(ctxt, &node_info);
4223 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4224 ctxt->sax->endElement(ctxt->userData, name);
4225 htmlnamePop(ctxt);
4232 currentNode = xmlStrdup(ctxt->name);
4233 depth = ctxt->nameNr;
4235 oldptr = ctxt->input->cur;
4236 htmlParseContent(ctxt);
4237 if (oldptr==ctxt->input->cur) break;
4238 if (ctxt->nameNr < depth) break;
4244 if ( currentNode != NULL && ctxt->record_info ) {
4245 node_info.end_pos = ctxt->input->consumed +
4246 (CUR_PTR - ctxt->input->base);
4247 node_info.end_line = ctxt->input->line;
4248 node_info.node = ctxt->node;
4249 xmlParserAddNodeInfo(ctxt, &node_info);
4252 htmlAutoCloseOnEnd(ctxt);
4260 htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) {
4264 if ( ctxt->node != NULL && ctxt->record_info ) {
4265 ctxt->nodeInfo->end_pos = ctxt->input->consumed +
4266 (CUR_PTR - ctxt->input->base);
4267 ctxt->nodeInfo->end_line = ctxt->input->line;
4268 ctxt->nodeInfo->node = ctxt->node;
4269 xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo);
4270 htmlNodeInfoPop(ctxt);
4273 htmlAutoCloseOnEnd(ctxt);
4279 * @ctxt: an HTML parser context
4289 htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
4295 if ((ctxt == NULL) || (ctxt->input == NULL)) {
4296 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4301 if (ctxt->instate == XML_PARSER_EOF)
4305 if (ctxt->record_info) {
4306 node_info.begin_pos = ctxt->input->consumed +
4307 (CUR_PTR - ctxt->input->base);
4308 node_info.begin_line = ctxt->input->line;
4311 failed = htmlParseStartTag(ctxt);
4312 name = ctxt->name;
4324 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
4333 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4334 ctxt->sax->endElement(ctxt->userData, name);
4335 htmlnamePop(ctxt);
4342 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
4348 if (xmlStrEqual(name, ctxt->name)) {
4349 nodePop(ctxt);
4350 htmlnamePop(ctxt);
4353 if (ctxt->record_info)
4354 htmlNodeInfoPush(ctxt, &node_info);
4355 htmlParserFinishElementParsing(ctxt);
4363 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4364 ctxt->sax->endElement(ctxt->userData, name);
4365 htmlnamePop(ctxt);
4369 if (ctxt->record_info)
4370 htmlNodeInfoPush(ctxt, &node_info);
4375 * @ctxt: an HTML parser context
4382 htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
4387 currentNode = xmlStrdup(ctxt->name);
4388 depth = ctxt->nameNr;
4390 long cons = ctxt->nbChars;
4394 if (ctxt->instate == XML_PARSER_EOF)
4401 if (htmlParseEndTag(ctxt) &&
4402 ((currentNode != NULL) || (ctxt->nameNr == 0))) {
4406 currentNode = xmlStrdup(ctxt->name);
4407 depth = ctxt->nameNr;
4415 name = htmlParseHTMLName_nonInvasive(ctxt);
4417 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
4424 htmlParserFinishElementParsing(ctxt);
4428 currentNode = xmlStrdup(ctxt->name);
4429 depth = ctxt->nameNr;
4433 if (ctxt->name != NULL) {
4434 if (htmlCheckAutoClose(name, ctxt->name) == 1) {
4435 htmlAutoClose(ctxt, name);
4445 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
4446 (!xmlStrEqual(currentNode, ctxt->name)))
4448 htmlParserFinishElementParsing(ctxt);
4451 currentNode = xmlStrdup(ctxt->name);
4452 depth = ctxt->nameNr;
4461 htmlParseScript(ctxt);
4471 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
4474 htmlParseDocTypeDecl(ctxt);
4482 htmlParseComment(ctxt);
4489 htmlParsePI(ctxt);
4496 htmlParseElementInternal(ctxt);
4499 currentNode = xmlStrdup(ctxt->name);
4500 depth = ctxt->nameNr;
4508 htmlParseReference(ctxt);
4515 htmlAutoCloseOnEnd(ctxt);
4523 htmlParseCharData(ctxt);
4526 if (cons == ctxt->nbChars) {
4527 if (ctxt->node != NULL) {
4528 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4542 * @ctxt: an HTML parser context
4549 __htmlParseContent(void *ctxt) {
4550 if (ctxt != NULL)
4551 htmlParseContentInternal((htmlParserCtxtPtr) ctxt);
4556 * @ctxt: an HTML parser context
4566 htmlParseDocument(htmlParserCtxtPtr ctxt) {
4575 if ((ctxt == NULL) || (ctxt->input == NULL)) {
4576 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4580 ctxt->html = 1;
4581 ctxt->linenumbers = 1;
4586 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
4587 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
4589 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
4590 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
4602 xmlSwitchEncoding(ctxt, enc);
4611 htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY,
4615 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
4616 ctxt->sax->startDocument(ctxt->userData);
4625 htmlParseComment(ctxt);
4626 htmlParsePI(ctxt);
4640 htmlParseDocTypeDecl(ctxt);
4650 htmlParseComment(ctxt);
4651 htmlParsePI(ctxt);
4658 htmlParseContentInternal(ctxt);
4664 htmlAutoCloseOnEnd(ctxt);
4670 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4671 ctxt->sax->endDocument(ctxt->userData);
4673 if (ctxt->myDoc != NULL) {
4674 dtd = xmlGetIntSubset(ctxt->myDoc);
4676 ctxt->myDoc->intSubset =
4677 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
4681 if (! ctxt->wellFormed) return(-1);
4694 * @ctxt: an HTML parser context
4702 htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
4706 if (ctxt == NULL) return(-1);
4707 memset(ctxt, 0, sizeof(htmlParserCtxt));
4709 ctxt->dict = xmlDictCreate();
4710 if (ctxt->dict == NULL) {
4723 ctxt->inputTab = (htmlParserInputPtr *)
4725 if (ctxt->inputTab == NULL) {
4727 ctxt->inputNr = 0;
4728 ctxt->inputMax = 0;
4729 ctxt->input = NULL;
4732 ctxt->inputNr = 0;
4733 ctxt->inputMax = 5;
4734 ctxt->input = NULL;
4735 ctxt->version = NULL;
4736 ctxt->encoding = NULL;
4737 ctxt->standalone = -1;
4738 ctxt->instate = XML_PARSER_START;
4741 ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
4742 if (ctxt->nodeTab == NULL) {
4744 ctxt->nodeNr = 0;
4745 ctxt->nodeMax = 0;
4746 ctxt->node = NULL;
4747 ctxt->inputNr = 0;
4748 ctxt->inputMax = 0;
4749 ctxt->input = NULL;
4752 ctxt->nodeNr = 0;
4753 ctxt->nodeMax = 10;
4754 ctxt->node = NULL;
4757 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
4758 if (ctxt->nameTab == NULL) {
4760 ctxt->nameNr = 0;
4761 ctxt->nameMax = 0;
4762 ctxt->name = NULL;
4763 ctxt->nodeNr = 0;
4764 ctxt->nodeMax = 0;
4765 ctxt->node = NULL;
4766 ctxt->inputNr = 0;
4767 ctxt->inputMax = 0;
4768 ctxt->input = NULL;
4771 ctxt->nameNr = 0;
4772 ctxt->nameMax = 10;
4773 ctxt->name = NULL;
4775 ctxt->nodeInfoTab = NULL;
4776 ctxt->nodeInfoNr = 0;
4777 ctxt->nodeInfoMax = 0;
4779 if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
4781 ctxt->sax = sax;
4784 ctxt->userData = ctxt;
4785 ctxt->myDoc = NULL;
4786 ctxt->wellFormed = 1;
4787 ctxt->replaceEntities = 0;
4788 ctxt->linenumbers = xmlLineNumbersDefaultValue;
4789 ctxt->html = 1;
4790 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
4791 ctxt->vctxt.userData = ctxt;
4792 ctxt->vctxt.error = xmlParserValidityError;
4793 ctxt->vctxt.warning = xmlParserValidityWarning;
4794 ctxt->record_info = 0;
4795 ctxt->validate = 0;
4796 ctxt->nbChars = 0;
4797 ctxt->checkIndex = 0;
4798 ctxt->catalogs = NULL;
4799 xmlInitNodeInfoSeq(&ctxt->node_seq);
4805 * @ctxt: an HTML parser context
4808 * document in ctxt->myDoc is not freed.
4812 htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
4814 xmlFreeParserCtxt(ctxt);
4828 xmlParserCtxtPtr ctxt;
4830 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
4831 if (ctxt == NULL) {
4835 memset(ctxt, 0, sizeof(xmlParserCtxt));
4836 if (htmlInitParserCtxt(ctxt) < 0) {
4837 htmlFreeParserCtxt(ctxt);
4840 return(ctxt);
4854 xmlParserCtxtPtr ctxt;
4863 ctxt = htmlNewParserCtxt();
4864 if (ctxt == NULL)
4870 input = xmlNewInputStream(ctxt);
4872 xmlFreeParserCtxt(ctxt);
4882 inputPush(ctxt, input);
4883 return(ctxt);
4900 htmlParserCtxtPtr ctxt;
4905 ctxt = htmlCreateMemoryParserCtxt((char *)cur, len);
4906 if (ctxt == NULL)
4913 if (ctxt->input->encoding != NULL)
4914 xmlFree((xmlChar *) ctxt->input->encoding);
4915 ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding);
4922 xmlSwitchEncoding(ctxt, enc);
4923 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4924 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
4934 xmlSwitchToEncoding(ctxt, handler);
4936 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
4942 return(ctxt);
4954 * @ctxt: an HTML parser context
4962 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
4971 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
4982 in = ctxt->input;
4990 if (ctxt->checkIndex > base)
4991 base = ctxt->checkIndex;
5049 ctxt->checkIndex = 0;
5068 ctxt->checkIndex = base;
5086 * @ctxt: an HTML parser context
5092 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
5100 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
5109 in = ctxt->input;
5117 if (ctxt->checkIndex > base)
5118 base = ctxt->checkIndex;
5149 ctxt->checkIndex = 0;
5154 ctxt->checkIndex = base;
5160 * @ctxt: an HTML parser context
5168 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
5175 switch (ctxt->instate) {
5229 in = ctxt->input;
5236 htmlAutoCloseOnEnd(ctxt);
5237 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
5241 ctxt->instate = XML_PARSER_EOF;
5242 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5243 ctxt->sax->endDocument(ctxt->userData);
5254 switch (ctxt->instate) {
5272 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
5273 ctxt->sax->setDocumentLocator(ctxt->userData,
5275 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
5276 (!ctxt->disableSAX))
5277 ctxt->sax->startDocument(ctxt->userData);
5287 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5293 htmlParseDocTypeDecl(ctxt);
5294 ctxt->instate = XML_PARSER_PROLOG;
5300 ctxt->instate = XML_PARSER_MISC;
5320 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
5326 htmlParseComment(ctxt);
5327 ctxt->instate = XML_PARSER_MISC;
5330 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5336 htmlParsePI(ctxt);
5337 ctxt->instate = XML_PARSER_MISC;
5344 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5350 htmlParseDocTypeDecl(ctxt);
5351 ctxt->instate = XML_PARSER_PROLOG;
5360 ctxt->instate = XML_PARSER_START_TAG;
5380 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
5386 htmlParseComment(ctxt);
5387 ctxt->instate = XML_PARSER_PROLOG;
5390 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5396 htmlParsePI(ctxt);
5397 ctxt->instate = XML_PARSER_PROLOG;
5402 ctxt->instate = XML_PARSER_START_TAG;
5418 htmlParseCharData(ctxt);
5427 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
5433 htmlParseComment(ctxt);
5434 ctxt->instate = XML_PARSER_EPILOG;
5437 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5443 htmlParsePI(ctxt);
5444 ctxt->instate = XML_PARSER_EPILOG;
5449 ctxt->errNo = XML_ERR_DOCUMENT_END;
5450 ctxt->wellFormed = 0;
5451 ctxt->instate = XML_PARSER_EOF;
5456 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5457 ctxt->sax->endDocument(ctxt->userData);
5470 ctxt->instate = XML_PARSER_CONTENT;
5478 ctxt->instate = XML_PARSER_END_TAG;
5479 ctxt->checkIndex = 0;
5487 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5490 failed = htmlParseStartTag(ctxt);
5491 name = ctxt->name;
5504 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
5513 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
5514 ctxt->sax->endElement(ctxt->userData, name);
5515 htmlnamePop(ctxt);
5516 ctxt->instate = XML_PARSER_CONTENT;
5527 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
5534 if (xmlStrEqual(name, ctxt->name)) {
5535 nodePop(ctxt);
5536 htmlnamePop(ctxt);
5539 ctxt->instate = XML_PARSER_CONTENT;
5551 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
5552 ctxt->sax->endElement(ctxt->userData, name);
5553 htmlnamePop(ctxt);
5555 ctxt->instate = XML_PARSER_CONTENT;
5567 if (ctxt->token != 0) {
5570 chr[0] = (xmlChar) ctxt->token;
5571 htmlCheckParagraph(ctxt);
5572 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5573 ctxt->sax->characters(ctxt->userData, chr, 1);
5574 ctxt->token = 0;
5575 ctxt->checkIndex = 0;
5580 if (ctxt->sax != NULL) {
5582 if (ctxt->sax->ignorableWhitespace != NULL)
5583 ctxt->sax->ignorableWhitespace(
5584 ctxt->userData, &cur, 1);
5586 htmlCheckParagraph(ctxt);
5587 if (ctxt->sax->characters != NULL)
5588 ctxt->sax->characters(
5589 ctxt->userData, &cur, 1);
5592 ctxt->token = 0;
5593 ctxt->checkIndex = 0;
5602 cons = ctxt->nbChars;
5603 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) ||
5604 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) {
5612 idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 1);
5619 htmlParseScript(ctxt);
5621 ctxt->instate = XML_PARSER_END_TAG;
5622 ctxt->checkIndex = 0;
5639 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5641 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
5644 htmlParseDocTypeDecl(ctxt);
5649 ctxt, '-', '-', '>', 1, 1) < 0))
5655 htmlParseComment(ctxt);
5656 ctxt->instate = XML_PARSER_CONTENT;
5659 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5665 htmlParsePI(ctxt);
5666 ctxt->instate = XML_PARSER_CONTENT;
5670 ctxt->instate = XML_PARSER_END_TAG;
5671 ctxt->checkIndex = 0;
5678 ctxt->instate = XML_PARSER_START_TAG;
5679 ctxt->checkIndex = 0;
5687 (htmlParseLookupChars(ctxt,
5695 htmlParseReference(ctxt);
5704 (htmlParseLookupChars(ctxt, BAD_CAST "<&", 2) < 0))
5706 ctxt->checkIndex = 0;
5711 htmlParseCharData(ctxt);
5714 if (cons == ctxt->nbChars) {
5715 if (ctxt->node != NULL) {
5716 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5730 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5732 htmlParseEndTag(ctxt);
5733 if (ctxt->nameNr == 0) {
5734 ctxt->instate = XML_PARSER_EPILOG;
5736 ctxt->instate = XML_PARSER_CONTENT;
5738 ctxt->checkIndex = 0;
5745 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5748 ctxt->instate = XML_PARSER_CONTENT;
5749 ctxt->checkIndex = 0;
5756 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5759 ctxt->instate = XML_PARSER_CONTENT;
5760 ctxt->checkIndex = 0;
5767 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5770 ctxt->instate = XML_PARSER_CONTENT;
5771 ctxt->checkIndex = 0;
5778 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5781 ctxt->instate = XML_PARSER_CONTENT;
5782 ctxt->checkIndex = 0;
5789 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5792 ctxt->instate = XML_PARSER_CONTENT;
5793 ctxt->checkIndex = 0;
5800 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5803 ctxt->instate = XML_PARSER_CONTENT;
5804 ctxt->checkIndex = 0;
5811 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5814 ctxt->instate = XML_PARSER_START_TAG;
5815 ctxt->checkIndex = 0;
5822 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5825 ctxt->instate = XML_PARSER_CONTENT;
5826 ctxt->checkIndex = 0;
5833 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5836 ctxt->instate = XML_PARSER_CONTENT;
5837 ctxt->checkIndex = 0;
5844 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5847 ctxt->instate = XML_PARSER_CONTENT;
5848 ctxt->checkIndex = 0;
5859 htmlAutoCloseOnEnd(ctxt);
5860 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
5864 ctxt->instate = XML_PARSER_EOF;
5865 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5866 ctxt->sax->endDocument(ctxt->userData);
5869 if ((ctxt->myDoc != NULL) &&
5870 ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
5871 (ctxt->instate == XML_PARSER_EPILOG))) {
5873 dtd = xmlGetIntSubset(ctxt->myDoc);
5875 ctxt->myDoc->intSubset =
5876 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
5888 * @ctxt: an HTML parser context
5898 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
5900 if ((ctxt == NULL) || (ctxt->input == NULL)) {
5901 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5905 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
5906 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
5907 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
5908 int cur = ctxt->input->cur - ctxt->input->base;
5911 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
5913 ctxt->errNo = XML_PARSER_EOF;
5914 ctxt->disableSAX = 1;
5917 ctxt->input->base = ctxt->input->buf->buffer->content + base;
5918 ctxt->input->cur = ctxt->input->base + cur;
5919 ctxt->input->end =
5920 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
5926 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
5927 htmlParseTryOrFinish(ctxt, terminate);
5929 } else if (ctxt->instate != XML_PARSER_EOF) {
5930 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
5931 xmlParserInputBufferPtr in = ctxt->input->buf;
5938 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
5945 htmlParseTryOrFinish(ctxt, terminate);
5947 if ((ctxt->instate != XML_PARSER_EOF) &&
5948 (ctxt->instate != XML_PARSER_EPILOG) &&
5949 (ctxt->instate != XML_PARSER_MISC)) {
5950 ctxt->errNo = XML_ERR_DOCUMENT_END;
5951 ctxt->wellFormed = 0;
5953 if (ctxt->instate != XML_PARSER_EOF) {
5954 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5955 ctxt->sax->endDocument(ctxt->userData);
5957 ctxt->instate = XML_PARSER_EOF;
5959 return((xmlParserErrors) ctxt->errNo);
5987 htmlParserCtxtPtr ctxt;
5996 ctxt = htmlNewParserCtxt();
5997 if (ctxt == NULL) {
6002 ctxt->charset=XML_CHAR_ENCODING_UTF8;
6004 if (ctxt->sax != (xmlSAXHandlerPtr) &htmlDefaultSAXHandler)
6005 xmlFree(ctxt->sax);
6006 ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler));
6007 if (ctxt->sax == NULL) {
6009 xmlFree(ctxt);
6012 memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler));
6014 ctxt->userData = user_data;
6017 ctxt->directory = NULL;
6019 ctxt->directory = xmlParserGetDirectory(filename);
6022 inputStream = htmlNewInputStream(ctxt);
6024 xmlFreeParserCtxt(ctxt);
6040 inputPush(ctxt, inputStream);
6042 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
6043 (ctxt->input->buf != NULL)) {
6044 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
6045 int cur = ctxt->input->cur - ctxt->input->base;
6047 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
6049 ctxt->input->base = ctxt->input->buf->buffer->content + base;
6050 ctxt->input->cur = ctxt->input->base + cur;
6051 ctxt->input->end =
6052 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
6057 ctxt->progressive = 1;
6059 return(ctxt);
6081 htmlParserCtxtPtr ctxt;
6088 ctxt = htmlCreateDocParserCtxt(cur, encoding);
6089 if (ctxt == NULL) return(NULL);
6091 if (ctxt->sax != NULL) xmlFree (ctxt->sax);
6092 ctxt->sax = sax;
6093 ctxt->userData = userData;
6096 htmlParseDocument(ctxt);
6097 ret = ctxt->myDoc;
6099 ctxt->sax = NULL;
6100 ctxt->userData = NULL;
6102 htmlFreeParserCtxt(ctxt);
6137 htmlParserCtxtPtr ctxt;
6146 ctxt = htmlNewParserCtxt();
6147 if (ctxt == NULL) {
6157 xmlFreeParserCtxt(ctxt);
6161 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
6164 xmlFreeParserCtxt(ctxt);
6168 inputPush(ctxt, inputStream);
6176 htmlCheckEncoding (ctxt, content);
6181 return(ctxt);
6204 htmlParserCtxtPtr ctxt;
6209 ctxt = htmlCreateFileParserCtxt(filename, encoding);
6210 if (ctxt == NULL) return(NULL);
6212 oldsax = ctxt->sax;
6213 ctxt->sax = sax;
6214 ctxt->userData = userData;
6217 htmlParseDocument(ctxt);
6219 ret = ctxt->myDoc;
6221 ctxt->sax = oldsax;
6222 ctxt->userData = NULL;
6224 htmlFreeParserCtxt(ctxt);
6393 * @ctxt: an HTML parser context
6398 htmlCtxtReset(htmlParserCtxtPtr ctxt)
6403 if (ctxt == NULL)
6407 dict = ctxt->dict;
6409 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
6412 ctxt->inputNr = 0;
6413 ctxt->input = NULL;
6415 ctxt->spaceNr = 0;
6416 if (ctxt->spaceTab != NULL) {
6417 ctxt->spaceTab[0] = -1;
6418 ctxt->space = &ctxt->spaceTab[0];
6420 ctxt->space = NULL;
6424 ctxt->nodeNr = 0;
6425 ctxt->node = NULL;
6427 ctxt->nameNr = 0;
6428 ctxt->name = NULL;
6430 DICT_FREE(ctxt->version);
6431 ctxt->version = NULL;
6432 DICT_FREE(ctxt->encoding);
6433 ctxt->encoding = NULL;
6434 DICT_FREE(ctxt->directory);
6435 ctxt->directory = NULL;
6436 DICT_FREE(ctxt->extSubURI);
6437 ctxt->extSubURI = NULL;
6438 DICT_FREE(ctxt->extSubSystem);
6439 ctxt->extSubSystem = NULL;
6440 if (ctxt->myDoc != NULL)
6441 xmlFreeDoc(ctxt->myDoc);
6442 ctxt->myDoc = NULL;
6444 ctxt->standalone = -1;
6445 ctxt->hasExternalSubset = 0;
6446 ctxt->hasPErefs = 0;
6447 ctxt->html = 1;
6448 ctxt->external = 0;
6449 ctxt->instate = XML_PARSER_START;
6450 ctxt->token = 0;
6452 ctxt->wellFormed = 1;
6453 ctxt->nsWellFormed = 1;
6454 ctxt->valid = 1;
6455 ctxt->vctxt.userData = ctxt;
6456 ctxt->vctxt.error = xmlParserValidityError;
6457 ctxt->vctxt.warning = xmlParserValidityWarning;
6458 ctxt->record_info = 0;
6459 ctxt->nbChars = 0;
6460 ctxt->checkIndex = 0;
6461 ctxt->inSubset = 0;
6462 ctxt->errNo = XML_ERR_OK;
6463 ctxt->depth = 0;
6464 ctxt->charset = XML_CHAR_ENCODING_NONE;
6465 ctxt->catalogs = NULL;
6466 xmlInitNodeInfoSeq(&ctxt->node_seq);
6468 if (ctxt->attsDefault != NULL) {
6469 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
6470 ctxt->attsDefault = NULL;
6472 if (ctxt->attsSpecial != NULL) {
6473 xmlHashFree(ctxt->attsSpecial, NULL);
6474 ctxt->attsSpecial = NULL;
6480 * @ctxt: an HTML parser context
6489 htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
6491 if (ctxt == NULL)
6495 ctxt->sax->warning = NULL;
6496 ctxt->vctxt.warning = NULL;
6498 ctxt->options |= XML_PARSE_NOWARNING;
6501 ctxt->sax->error = NULL;
6502 ctxt->vctxt.error = NULL;
6503 ctxt->sax->fatalError = NULL;
6505 ctxt->options |= XML_PARSE_NOERROR;
6508 ctxt->pedantic = 1;
6510 ctxt->options |= XML_PARSE_PEDANTIC;
6512 ctxt->pedantic = 0;
6514 ctxt->keepBlanks = 0;
6515 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
6517 ctxt->options |= XML_PARSE_NOBLANKS;
6519 ctxt->keepBlanks = 1;
6521 ctxt->recovery = 1;
6524 ctxt->recovery = 0;
6526 ctxt->options |= HTML_PARSE_COMPACT;
6530 ctxt->options |= XML_PARSE_HUGE;
6533 ctxt->dictNames = 0;
6539 * @ctxt: an HTML parser context
6550 htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
6555 htmlCtxtUseOptions(ctxt, options);
6556 ctxt->html = 1;
6562 xmlSwitchToEncoding(ctxt, hdlr);
6563 if (ctxt->input->encoding != NULL)
6564 xmlFree((xmlChar *) ctxt->input->encoding);
6565 ctxt->input->encoding = xmlStrdup((xmlChar *)encoding);
6568 if ((URL != NULL) && (ctxt->input != NULL) &&
6569 (ctxt->input->filename == NULL))
6570 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
6571 htmlParseDocument(ctxt);
6572 ret = ctxt->myDoc;
6573 ctxt->myDoc = NULL;
6575 if ((ctxt->dictNames) &&
6577 (ret->dict == ctxt->dict))
6578 ctxt->dict = NULL;
6579 xmlFreeParserCtxt(ctxt);
6598 htmlParserCtxtPtr ctxt;
6604 ctxt = htmlCreateDocParserCtxt(cur, NULL);
6605 if (ctxt == NULL)
6607 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6623 htmlParserCtxtPtr ctxt;
6626 ctxt = htmlCreateFileParserCtxt(filename, encoding);
6627 if (ctxt == NULL)
6629 return (htmlDoRead(ctxt, NULL, NULL, options, 0));
6647 htmlParserCtxtPtr ctxt;
6650 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6651 if (ctxt == NULL)
6654 if (ctxt->sax != NULL)
6655 memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
6656 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6673 htmlParserCtxtPtr ctxt;
6684 ctxt = xmlNewParserCtxt();
6685 if (ctxt == NULL) {
6689 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6692 xmlFreeParserCtxt(ctxt);
6695 inputPush(ctxt, stream);
6696 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6716 htmlParserCtxtPtr ctxt;
6728 ctxt = htmlNewParserCtxt();
6729 if (ctxt == NULL) {
6733 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6736 xmlFreeParserCtxt(ctxt);
6739 inputPush(ctxt, stream);
6740 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6745 * @ctxt: an HTML parser context
6752 * This reuses the existing @ctxt parser context
6757 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
6764 if (ctxt == NULL)
6767 htmlCtxtReset(ctxt);
6769 stream = xmlNewStringInputStream(ctxt, cur);
6773 inputPush(ctxt, stream);
6774 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6779 * @ctxt: an HTML parser context
6785 * This reuses the existing @ctxt parser context
6790 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
6797 if (ctxt == NULL)
6800 htmlCtxtReset(ctxt);
6802 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
6806 inputPush(ctxt, stream);
6807 return (htmlDoRead(ctxt, NULL, encoding, options, 1));
6812 * @ctxt: an HTML parser context
6820 * This reuses the existing @ctxt parser context
6825 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
6831 if (ctxt == NULL)
6836 htmlCtxtReset(ctxt);
6843 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6849 inputPush(ctxt, stream);
6850 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6855 * @ctxt: an HTML parser context
6862 * This reuses the existing @ctxt parser context
6867 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
6875 if (ctxt == NULL)
6878 htmlCtxtReset(ctxt);
6884 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6889 inputPush(ctxt, stream);
6890 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6895 * @ctxt: an HTML parser context
6904 * This reuses the existing @ctxt parser context
6909 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
6919 if (ctxt == NULL)
6922 htmlCtxtReset(ctxt);
6928 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6933 inputPush(ctxt, stream);
6934 return (htmlDoRead(ctxt, URL, encoding, options, 1));