15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * HTMLparser.c : an HTML 4.0 non-verifying parser 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * See Copyright for the status of this software. 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * daniel@veillard.com 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define IN_LIBXML 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "libxml.h" 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef LIBXML_HTML_ENABLED 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string.h> 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef HAVE_CTYPE_H 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <ctype.h> 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef HAVE_STDLIB_H 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdlib.h> 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef HAVE_SYS_STAT_H 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <sys/stat.h> 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef HAVE_FCNTL_H 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <fcntl.h> 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef HAVE_UNISTD_H 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <unistd.h> 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef HAVE_ZLIB_H 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <zlib.h> 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/xmlmemory.h> 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/tree.h> 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/parser.h> 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/parserInternals.h> 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/xmlerror.h> 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/HTMLparser.h> 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/HTMLtree.h> 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/entities.h> 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/encoding.h> 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/valid.h> 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/xmlIO.h> 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/globals.h> 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/uri.h> 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define HTML_MAX_NAMELEN 1000 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define HTML_PARSER_BIG_BUFFER_SIZE 1000 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define HTML_PARSER_BUFFER_SIZE 100 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* #define DEBUG */ 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* #define DEBUG_PUSH */ 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int htmlOmittedDefaultValue = 1; 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len, 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar end, xmlChar end2, xmlChar end3); 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void htmlParseComment(htmlParserCtxtPtr ctxt); 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************ 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Some factorized error routines * 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/ 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlErrMemory: 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @extra: extra informations 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Handle a redefinition of attribute error 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->instate == XML_PARSER_EOF)) 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt != NULL) { 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->errNo = XML_ERR_NO_MEMORY; 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_EOF; 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->disableSAX = 1; 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (extra) 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL, 0, 0, 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Memory allocation failed : %s\n", extra); 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL, 0, 0, "Memory allocation failed\n"); 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseErr: 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @error: the error number 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @msg: the error message 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @str1: string infor 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @str2: string infor 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Handle a fatal parser error, i.e. violating Well-Formedness constraints 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *msg, const xmlChar *str1, const xmlChar *str2) 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->instate == XML_PARSER_EOF)) 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt != NULL) 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->errNo = error; 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error, 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) XML_ERR_ERROR, NULL, 0, 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (const char *) str1, (const char *) str2, 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, 0, 0, 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) msg, str1, str2); 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt != NULL) 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->wellFormed = 0; 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseErrInt: 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @error: the error number 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @msg: the error message 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @val: integer info 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Handle a fatal parser error, i.e. violating Well-Formedness constraints 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *msg, int val) 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->instate == XML_PARSER_EOF)) 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt != NULL) 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->errNo = error; 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error, 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) XML_ERR_ERROR, NULL, 0, NULL, NULL, 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, val, 0, msg, val); 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt != NULL) 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->wellFormed = 0; 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************ 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parser stacks related functions and macros * 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/ 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlnamePush: 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @value: the element name 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Pushes a new element name on top of the name stack 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 in case of error, the index in the stack otherwise 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value) 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->html < 3) && (xmlStrEqual(value, BAD_CAST "head"))) 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->html = 3; 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->html < 10) && (xmlStrEqual(value, BAD_CAST "body"))) 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->html = 10; 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nameNr >= ctxt->nameMax) { 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nameMax *= 2; 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nameTab = (const xmlChar * *) 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlRealloc((xmlChar * *)ctxt->nameTab, 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nameMax * 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sizeof(ctxt->nameTab[0])); 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nameTab == NULL) { 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(ctxt, NULL); 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (0); 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nameTab[ctxt->nameNr] = value; 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->name = value; 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (ctxt->nameNr++); 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlnamePop: 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Pops the top element name from the name stack 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the name just removed 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const xmlChar * 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlnamePop(htmlParserCtxtPtr ctxt) 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *ret; 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nameNr <= 0) 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nameNr--; 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nameNr < 0) 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nameNr > 0) 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->name = NULL; 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret = ctxt->nameTab[ctxt->nameNr]; 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nameTab[ctxt->nameNr] = NULL; 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (ret); 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlNodeInfoPush: 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @value: the node info 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Pushes a new element name on top of the node info stack 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 in case of error, the index in the stack otherwise 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlNodeInfoPush(htmlParserCtxtPtr ctxt, htmlParserNodeInfo *value) 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nodeInfoNr >= ctxt->nodeInfoMax) { 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nodeInfoMax == 0) 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeInfoMax = 5; 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeInfoMax *= 2; 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeInfoTab = (htmlParserNodeInfo *) 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlRealloc((htmlParserNodeInfo *)ctxt->nodeInfoTab, 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeInfoMax * 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sizeof(ctxt->nodeInfoTab[0])); 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nodeInfoTab == NULL) { 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(ctxt, NULL); 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (0); 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeInfoTab[ctxt->nodeInfoNr] = *value; 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr]; 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (ctxt->nodeInfoNr++); 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlNodeInfoPop: 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Pops the top element name from the node info stack 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 in case of error, the pointer to NodeInfo otherwise 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static htmlParserNodeInfo * 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlNodeInfoPop(htmlParserCtxtPtr ctxt) 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nodeInfoNr <= 0) 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeInfoNr--; 2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nodeInfoNr < 0) 2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nodeInfoNr > 0) 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr - 1]; 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeInfo = NULL; 2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return &ctxt->nodeInfoTab[ctxt->nodeInfoNr]; 2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Macros for accessing the content. Those should be used only by the parser, 2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * and not exported. 2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Dirty macros, i.e. one need to make assumption on the context to use them 2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * CUR_PTR return the current pointer to the xmlChar to be parsed. 2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * in ISO-Latin or UTF-8, and the current 16 bit value if compiled 2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * in UNICODE mode. This should be used internally by the parser 2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * only to compare to ASCII values otherwise it would break when 2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * running with UTF-8 encoding. 2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * to compare on ASCII based substring. 2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR 2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * it should be used only to compare on ASCII based substring. 2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * strings without newlines within the parser. 2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * CURRENT Returns the current char value, with the full decoding of 2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * UTF-8 if we are using this mode. It returns an int. 2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * NEXT Skip to the next character, this does the proper decoding 2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * in UTF-8 mode. It also pop-up unfinished entities on the fly. 2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * NEXTL(l) Skip the current unicode character of l xmlChars long. 2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly 2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define UPPER (toupper(*ctxt->input->cur)) 2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val) 2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NXT(val) ctxt->input->cur[(val)] 2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define UPP(val) (toupper(ctxt->input->cur[(val)])) 3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CUR_PTR ctxt->input->cur 3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputShrink(ctxt->input) 3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define GROW if ((ctxt->progressive == 0) && \ 3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputGrow(ctxt->input, INPUT_CHUNK) 3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CURRENT ((int) (*ctxt->input->cur)) 3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define SKIP_BLANKS htmlSkipBlankChars(ctxt) 3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* Inported from XML */ 3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */ 3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CUR ((int) (*ctxt->input->cur)) 3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NEXT xmlNextChar(ctxt) 3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) 3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NEXTL(l) do { \ 3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (*(ctxt->input->cur) == '\n') { \ 3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->line++; ctxt->input->col = 1; \ 3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else ctxt->input->col++; \ 3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \ 3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } while (0) 3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************ 3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) \ 3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); 3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************/ 3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CUR_CHAR(l) htmlCurrentChar(ctxt, &l) 3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define COPY_BUF(l,b,i,v) \ 3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (l == 1) b[i++] = (xmlChar) v; \ 3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else i += xmlCopyChar(l,&b[i],v) 3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlFindEncoding: 3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @the HTML parser context 3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Ty to find and encoding in the current data available in the input 3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * buffer this is needed to try to switch to the proper encoding when 3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * one face a character error. 3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * That's an heuristic, since it's operating outside of parsing it could 3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * try to use a meta which had been commented out, that's the reason it 3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * should only be used in case of error, not as a default. 3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns an encoding string or NULL if not found, the string need to 3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * be freed 3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static xmlChar * 3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlFindEncoding(xmlParserCtxtPtr ctxt) { 3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *start, *cur, *end; 3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt == NULL) || (ctxt->input == NULL) || 3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->input->encoding != NULL) || (ctxt->input->buf == NULL) || 3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->input->buf->encoder != NULL)) 3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->input->cur == NULL) || (ctxt->input->end == NULL)) 3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) start = ctxt->input->cur; 3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) end = ctxt->input->end; 3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* we also expect the input buffer to be zero terminated */ 3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (*end != 0) 3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = xmlStrcasestr(start, BAD_CAST "HTTP-EQUIV"); 3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur == NULL) 3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = xmlStrcasestr(cur, BAD_CAST "CONTENT"); 3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur == NULL) 3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = xmlStrcasestr(cur, BAD_CAST "CHARSET="); 3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur == NULL) 3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur += 8; 3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) start = cur; 3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (((*cur >= 'A') && (*cur <= 'Z')) || 3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((*cur >= 'a') && (*cur <= 'z')) || 3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((*cur >= '0') && (*cur <= '9')) || 3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (*cur == '-') || (*cur == '_') || (*cur == ':') || (*cur == '/')) 3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur++; 3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur == start) 3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(xmlStrndup(start, cur - start)); 3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCurrentChar: 3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: the HTML parser context 4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @len: pointer to the length of the char read 4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The current char value, if using UTF-8 this may actually span multiple 4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * bytes in the input buffer. Implement the end of line normalization: 4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2.11 End-of-Line Handling 4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * If the encoding is unspecified, in the case we find an ISO-Latin-1 4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * char, then the encoding converter is plugged in automatically. 4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the current char value and its length 4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int 4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->instate == XML_PARSER_EOF) 4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->token != 0) { 4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *len = 0; 4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(ctxt->token); 4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * We are supposed to handle UTF8, check it's valid 4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * From rfc2044: encoding of the Unicode values on UTF-8: 4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * UCS-4 range (hex.) UTF-8 octet sequence (binary) 4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 0000 0000-0000 007F 0xxxxxxx 4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check for the 0x110000 limit too 4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char *cur = ctxt->input->cur; 4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned char c; 4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned int val; 4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = *cur; 4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (c & 0x80) { 4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur[1] == 0) { 4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = ctxt->input->cur; 4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((cur[1] & 0xc0) != 0x80) 4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto encoding_error; 4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((c & 0xe0) == 0xe0) { 4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur[2] == 0) { 4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = ctxt->input->cur; 4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((cur[2] & 0xc0) != 0x80) 4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto encoding_error; 4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((c & 0xf0) == 0xf0) { 4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur[3] == 0) { 4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = ctxt->input->cur; 4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (((c & 0xf8) != 0xf0) || 4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((cur[3] & 0xc0) != 0x80)) 4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto encoding_error; 4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 4-byte code */ 4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *len = 4; 4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val = (cur[0] & 0x7) << 18; 4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val |= (cur[1] & 0x3f) << 12; 4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val |= (cur[2] & 0x3f) << 6; 4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val |= cur[3] & 0x3f; 4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 3-byte code */ 4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *len = 3; 4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val = (cur[0] & 0xf) << 12; 4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val |= (cur[1] & 0x3f) << 6; 4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val |= cur[2] & 0x3f; 4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 2-byte code */ 4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *len = 2; 4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val = (cur[0] & 0x1f) << 6; 4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val |= cur[1] & 0x3f; 4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!IS_CHAR(val)) { 4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, 4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Char 0x%X out of allowed range\n", val); 4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(val); 4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((*ctxt->input->cur == 0) && 4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->input->cur < ctxt->input->end)) { 4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, 4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Char 0x%X out of allowed range\n", 0); 4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *len = 1; 4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(' '); 4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 1-byte code */ 4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *len = 1; 4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return((int) *ctxt->input->cur); 4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Assume it's a fixed length encoding (1) with 4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * a compatible encoding for the ASCII set, since 5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * XML constructs only use < 128 chars 5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *len = 1; 5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((int) *ctxt->input->cur < 0x80) 5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return((int) *ctxt->input->cur); 5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Humm this is bad, do an automatic flow conversion 5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar * guess; 5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlCharEncodingHandlerPtr handler; 5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) guess = htmlFindEncoding(ctxt); 5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (guess == NULL) { 5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); 5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->input->encoding != NULL) 5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree((xmlChar *) ctxt->input->encoding); 5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->encoding = guess; 5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) handler = xmlFindCharEncodingHandler((const char *) guess); 5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (handler != NULL) { 5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlSwitchToEncoding(ctxt, handler); 5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, 5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Unsupported encoding %s", guess, NULL); 5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->charset = XML_CHAR_ENCODING_UTF8; 5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(xmlCurrentChar(ctxt, len)); 5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)encoding_error: 5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * If we detect an UTF8 error that probably mean that the 5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * input encoding didn't get properly advertized in the 5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * declaration header. Report the error and switch the encoding 5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * to ISO-Latin-1 (if you don't like this policy, just declare the 5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * encoding !) 5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char buffer[150]; 5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->input->end - ctxt->input->cur >= 4) { 5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->cur[0], ctxt->input->cur[1], 5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->cur[2], ctxt->input->cur[3]); 5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]); 5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, 5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Input is not proper UTF-8, indicate encoding !\n", 5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) BAD_CAST buffer, NULL); 5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->charset = XML_CHAR_ENCODING_8859_1; 5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *len = 1; 5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return((int) *ctxt->input->cur); 5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlSkipBlankChars: 5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: the HTML parser context 5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * skip all blanks character found at that point in the input streams. 5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the number of space chars skipped 5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int 5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int res = 0; 5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (IS_BLANK_CH(*(ctxt->input->cur))) { 5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((*ctxt->input->cur == 0) && 5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { 5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlPopInput(ctxt); 5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (*(ctxt->input->cur) == '\n') { 5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->line++; ctxt->input->col = 1; 5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else ctxt->input->col++; 5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->cur++; 5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nbChars++; 5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (*ctxt->input->cur == 0) 5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) res++; 5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(res); 5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************ 5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The list of HTML elements and their properties * 5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/ 5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Start Tag: 1 means the start tag can be ommited 6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * End Tag: 1 means the end tag can be ommited 6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2 means it's forbidden (empty elements) 6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 3 means the tag is stylistic and should be closed easily 6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Depr: this element is deprecated 6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * DTD: 1 means that this element is valid only in the Loose DTD 6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2 means that this element is valid only in the Frameset DTD 6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description 6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) , subElements , impliedsubelt , Attributes, userdata 6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* Definitions and a couple of vars for HTML Elements */ 6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small" 6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_FONTSTYLE 8 6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym" 6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_PHRASE 10 6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define SPECIAL "a", "img", "applet", "embed", "object", "font", "basefont", "br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe" 6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_SPECIAL 16 6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define INLINE FONTSTYLE, PHRASE, SPECIAL, FORMCTRL 6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_INLINE NB_PCDATA + NB_FONTSTYLE + NB_PHRASE + NB_SPECIAL + NB_FORMCTRL 6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define BLOCK HEADING, LIST, "pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address" 6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_BLOCK NB_HEADING + NB_LIST + 14 6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define FORMCTRL "input", "select", "textarea", "label", "button" 6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_FORMCTRL 5 6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define PCDATA 6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_PCDATA 0 6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define HEADING "h1", "h2", "h3", "h4", "h5", "h6" 6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_HEADING 6 6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define LIST "ul", "ol", "dir", "menu" 6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_LIST 4 6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define MODIFIER 6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_MODIFIER 0 6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define FLOW BLOCK,INLINE 6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_FLOW NB_BLOCK + NB_INLINE 6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define EMPTY NULL 6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const html_flow[] = { FLOW, NULL } ; 6415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const html_inline[] = { INLINE, NULL } ; 6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* placeholders: elts with content but no subelements */ 6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const html_pcdata[] = { NULL } ; 6455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define html_cdata html_pcdata 6465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* ... and for HTML Attributes */ 6495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define COREATTRS "id", "class", "style", "title" 6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_COREATTRS 4 6525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define I18N "lang", "dir" 6535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_I18N 2 6545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define EVENTS "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress", "onkeydown", "onkeyup" 6555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_EVENTS 9 6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define ATTRS COREATTRS,I18N,EVENTS 6575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_ATTRS NB_NB_COREATTRS + NB_I18N + NB_EVENTS 6585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CELLHALIGN "align", "char", "charoff" 6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_CELLHALIGN 3 6605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CELLVALIGN "valign" 6615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_CELLVALIGN 1 6625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const html_attrs[] = { ATTRS, NULL } ; 6645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const core_i18n_attrs[] = { COREATTRS, I18N, NULL } ; 6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const core_attrs[] = { COREATTRS, NULL } ; 6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const i18n_attrs[] = { I18N, NULL } ; 6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* Other declarations that should go inline ... */ 6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const a_attrs[] = { ATTRS, "charset", "type", "name", 6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "href", "hreflang", "rel", "rev", "accesskey", "shape", "coords", 6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "tabindex", "onfocus", "onblur", NULL } ; 6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const target_attr[] = { "target", NULL } ; 6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const rows_cols_attr[] = { "rows", "cols", NULL } ; 6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const alt_attr[] = { "alt", NULL } ; 6765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const src_alt_attrs[] = { "src", "alt", NULL } ; 6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const href_attrs[] = { "href", NULL } ; 6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const clear_attrs[] = { "clear", NULL } ; 6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const inline_p[] = { INLINE, "p", NULL } ; 6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const flow_param[] = { FLOW, "param", NULL } ; 6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const applet_attrs[] = { COREATTRS , "codebase", 6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "archive", "alt", "name", "height", "width", "align", 6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "hspace", "vspace", NULL } ; 6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const area_attrs[] = { "shape", "coords", "href", "nohref", 6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "tabindex", "accesskey", "onfocus", "onblur", NULL } ; 6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const basefont_attrs[] = 6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { "id", "size", "color", "face", NULL } ; 6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const quote_attrs[] = { ATTRS, "cite", NULL } ; 6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const body_contents[] = { FLOW, "ins", "del", NULL } ; 6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const body_attrs[] = { ATTRS, "onload", "onunload", NULL } ; 6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const body_depr[] = { "background", "bgcolor", "text", 6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "link", "vlink", "alink", NULL } ; 6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const button_attrs[] = { ATTRS, "name", "value", "type", 6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "disabled", "tabindex", "accesskey", "onfocus", "onblur", NULL } ; 6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const col_attrs[] = { ATTRS, "span", "width", CELLHALIGN, CELLVALIGN, NULL } ; 6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const col_elt[] = { "col", NULL } ; 7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const edit_attrs[] = { ATTRS, "datetime", "cite", NULL } ; 7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const compact_attrs[] = { ATTRS, "compact", NULL } ; 7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const dl_contents[] = { "dt", "dd", NULL } ; 7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const compact_attr[] = { "compact", NULL } ; 7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const label_attr[] = { "label", NULL } ; 7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const fieldset_contents[] = { FLOW, "legend" } ; 7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const font_attrs[] = { COREATTRS, I18N, "size", "color", "face" , NULL } ; 7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const form_contents[] = { HEADING, LIST, INLINE, "pre", "p", "div", "center", "noscript", "noframes", "blockquote", "isindex", "hr", "table", "fieldset", "address", NULL } ; 7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const form_attrs[] = { ATTRS, "method", "enctype", "accept", "name", "onsubmit", "onreset", "accept-charset", NULL } ; 7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const frame_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "noresize", "scrolling" , NULL } ; 7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const frameset_attrs[] = { COREATTRS, "rows", "cols", "onload", "onunload", NULL } ; 7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const frameset_contents[] = { "frameset", "frame", "noframes", NULL } ; 7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const head_attrs[] = { I18N, "profile", NULL } ; 7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const head_contents[] = { "title", "isindex", "base", "script", "style", "meta", "link", "object", NULL } ; 7145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const hr_depr[] = { "align", "noshade", "size", "width", NULL } ; 7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const version_attr[] = { "version", NULL } ; 7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const html_content[] = { "head", "body", "frameset", NULL } ; 7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const iframe_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "scrolling", "align", "height", "width", NULL } ; 7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const img_attrs[] = { ATTRS, "longdesc", "name", "height", "width", "usemap", "ismap", NULL } ; 7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const embed_attrs[] = { COREATTRS, "align", "alt", "border", "code", "codebase", "frameborder", "height", "hidden", "hspace", "name", "palette", "pluginspace", "pluginurl", "src", "type", "units", "vspace", "width", NULL } ; 7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const input_attrs[] = { ATTRS, "type", "name", "value", "checked", "disabled", "readonly", "size", "maxlength", "src", "alt", "usemap", "ismap", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", "accept", NULL } ; 7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const prompt_attrs[] = { COREATTRS, I18N, "prompt", NULL } ; 7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const label_attrs[] = { ATTRS, "for", "accesskey", "onfocus", "onblur", NULL } ; 7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const legend_attrs[] = { ATTRS, "accesskey", NULL } ; 7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const align_attr[] = { "align", NULL } ; 7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const link_attrs[] = { ATTRS, "charset", "href", "hreflang", "type", "rel", "rev", "media", NULL } ; 7265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const map_contents[] = { BLOCK, "area", NULL } ; 7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const name_attr[] = { "name", NULL } ; 7285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const action_attr[] = { "action", NULL } ; 7295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const blockli_elt[] = { BLOCK, "li", NULL } ; 7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", NULL } ; 7315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const content_attr[] = { "content", NULL } ; 7325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const type_attr[] = { "type", NULL } ; 7335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ; 7345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const object_contents[] = { FLOW, "param", NULL } ; 7355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const object_attrs[] = { ATTRS, "declare", "classid", "codebase", "data", "type", "codetype", "archive", "standby", "height", "width", "usemap", "name", "tabindex", NULL } ; 7365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const object_depr[] = { "align", "border", "hspace", "vspace", NULL } ; 7375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const ol_attrs[] = { "type", "compact", "start", NULL} ; 7385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const option_elt[] = { "option", NULL } ; 7395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ; 7405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selected", "value", NULL } ; 7415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const param_attrs[] = { "id", "value", "valuetype", "type", NULL } ; 7425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const width_attr[] = { "width", NULL } ; 7435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const pre_content[] = { PHRASE, "tt", "i", "b", "u", "s", "strike", "a", "br", "script", "map", "q", "span", "bdo", "iframe", NULL } ; 7445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const script_attrs[] = { "charset", "src", "defer", "event", "for", NULL } ; 7455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const language_attr[] = { "language", NULL } ; 7465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const select_content[] = { "optgroup", "option", NULL } ; 7475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const select_attrs[] = { ATTRS, "name", "size", "multiple", "disabled", "tabindex", "onfocus", "onblur", "onchange", NULL } ; 7485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const style_attrs[] = { I18N, "media", "title", NULL } ; 7495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const table_attrs[] = { ATTRS, "summary", "width", "border", "frame", "rules", "cellspacing", "cellpadding", "datapagesize", NULL } ; 7505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const table_depr[] = { "align", "bgcolor", NULL } ; 7515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const table_contents[] = { "caption", "col", "colgroup", "thead", "tfoot", "tbody", "tr", NULL} ; 7525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const tr_elt[] = { "tr", NULL } ; 7535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN, NULL} ; 7545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const th_td_depr[] = { "nowrap", "bgcolor", "width", "height", NULL } ; 7555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const th_td_attr[] = { ATTRS, "abbr", "axis", "headers", "scope", "rowspan", "colspan", CELLHALIGN, CELLVALIGN, NULL } ; 7565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const textarea_attrs[] = { ATTRS, "name", "disabled", "readonly", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", NULL } ; 7575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const tr_contents[] = { "th", "td", NULL } ; 7585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const bgcolor_attr[] = { "bgcolor", NULL } ; 7595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const li_elt[] = { "li", NULL } ; 7605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const ul_depr[] = { "type", "compact", NULL} ; 7615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const dir_attr[] = { "dir", NULL} ; 7625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define DECL (const char**) 7645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const htmlElemDesc 7665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)html40ElementTable[] = { 7675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "a", 0, 0, 0, 0, 0, 0, 1, "anchor ", 7685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline , NULL , DECL a_attrs , DECL target_attr, NULL 7695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 7705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "abbr", 0, 0, 0, 0, 0, 0, 1, "abbreviated form", 7715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline , NULL , DECL html_attrs, NULL, NULL 7725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 7735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "acronym", 0, 0, 0, 0, 0, 0, 1, "", 7745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline , NULL , DECL html_attrs, NULL, NULL 7755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 7765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "address", 0, 0, 0, 0, 0, 0, 0, "information on author ", 7775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL inline_p , NULL , DECL html_attrs, NULL, NULL 7785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 7795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "applet", 0, 0, 0, 0, 1, 1, 2, "java applet ", 7805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL flow_param , NULL , NULL , DECL applet_attrs, NULL 7815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 7825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "area", 0, 2, 2, 1, 0, 0, 0, "client-side image map area ", 7835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EMPTY , NULL , DECL area_attrs , DECL target_attr, DECL alt_attr 7845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 7855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "b", 0, 3, 0, 0, 0, 0, 1, "bold text style", 7865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline , NULL , DECL html_attrs, NULL, NULL 7875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 7885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "base", 0, 2, 2, 1, 0, 0, 0, "document base uri ", 7895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EMPTY , NULL , NULL , DECL target_attr, DECL href_attrs 7905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 7915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "basefont", 0, 2, 2, 1, 1, 1, 1, "base font size " , 7925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EMPTY , NULL , NULL, DECL basefont_attrs, NULL 7935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 7945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "bdo", 0, 0, 0, 0, 0, 0, 1, "i18n bidi over-ride ", 7955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline , NULL , DECL core_i18n_attrs, NULL, DECL dir_attr 7965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 7975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "big", 0, 3, 0, 0, 0, 0, 1, "large text style", 7985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline , NULL , DECL html_attrs, NULL, NULL 7995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "blockquote", 0, 0, 0, 0, 0, 0, 0, "long quotation ", 8015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_flow , NULL , DECL quote_attrs , NULL, NULL 8025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "body", 1, 1, 0, 0, 0, 0, 0, "document body ", 8045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL body_contents , "div" , DECL body_attrs, DECL body_depr, NULL 8055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "br", 0, 2, 2, 1, 0, 0, 1, "forced line break ", 8075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EMPTY , NULL , DECL core_attrs, DECL clear_attrs , NULL 8085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "button", 0, 0, 0, 0, 0, 0, 2, "push button ", 8105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_flow MODIFIER , NULL , DECL button_attrs, NULL, NULL 8115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "caption", 0, 0, 0, 0, 0, 0, 0, "table caption ", 8135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline , NULL , DECL html_attrs, NULL, NULL 8145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "center", 0, 3, 0, 0, 1, 1, 0, "shorthand for div align=center ", 8165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_flow , NULL , NULL, DECL html_attrs, NULL 8175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "cite", 0, 0, 0, 0, 0, 0, 1, "citation", 8195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline , NULL , DECL html_attrs, NULL, NULL 8205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "code", 0, 0, 0, 0, 0, 0, 1, "computer code fragment", 8225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline , NULL , DECL html_attrs, NULL, NULL 8235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "col", 0, 2, 2, 1, 0, 0, 0, "table column ", 8255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EMPTY , NULL , DECL col_attrs , NULL, NULL 8265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "colgroup", 0, 1, 0, 0, 0, 0, 0, "table column group ", 8285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL col_elt , "col" , DECL col_attrs , NULL, NULL 8295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "dd", 0, 1, 0, 0, 0, 0, 0, "definition description ", 8315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_flow , NULL , DECL html_attrs, NULL, NULL 8325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "del", 0, 0, 0, 0, 0, 0, 2, "deleted text ", 8345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_flow , NULL , DECL edit_attrs , NULL, NULL 8355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "dfn", 0, 0, 0, 0, 0, 0, 1, "instance definition", 8375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline , NULL , DECL html_attrs, NULL, NULL 8385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "dir", 0, 0, 0, 0, 1, 1, 0, "directory list", 8405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL blockli_elt, "li" , NULL, DECL compact_attrs, NULL 8415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "div", 0, 0, 0, 0, 0, 0, 0, "generic language/style container", 8435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_flow, NULL, DECL html_attrs, DECL align_attr, NULL 8445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "dl", 0, 0, 0, 0, 0, 0, 0, "definition list ", 8465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL dl_contents , "dd" , DECL html_attrs, DECL compact_attr, NULL 8475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "dt", 0, 1, 0, 0, 0, 0, 0, "definition term ", 8495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, NULL, NULL 8505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "em", 0, 3, 0, 0, 0, 0, 1, "emphasis", 8525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, NULL, NULL 8535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "embed", 0, 1, 0, 0, 1, 1, 1, "generic embedded object ", 8555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EMPTY, NULL, DECL embed_attrs, NULL, NULL 8565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "fieldset", 0, 0, 0, 0, 0, 0, 0, "form control group ", 8585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL fieldset_contents , NULL, DECL html_attrs, NULL, NULL 8595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "font", 0, 3, 0, 0, 1, 1, 1, "local change to font ", 8615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, NULL, DECL font_attrs, NULL 8625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "form", 0, 0, 0, 0, 0, 0, 0, "interactive form ", 8645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL form_contents, "fieldset", DECL form_attrs , DECL target_attr, DECL action_attr 8655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "frame", 0, 2, 2, 1, 0, 2, 0, "subwindow " , 8675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EMPTY, NULL, NULL, DECL frame_attrs, NULL 8685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "frameset", 0, 0, 0, 0, 0, 2, 0, "window subdivision" , 8705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL frameset_contents, "noframes" , NULL , DECL frameset_attrs, NULL 8715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "h1", 0, 0, 0, 0, 0, 0, 0, "heading ", 8735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL 8745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "h2", 0, 0, 0, 0, 0, 0, 0, "heading ", 8765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL 8775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "h3", 0, 0, 0, 0, 0, 0, 0, "heading ", 8795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL 8805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "h4", 0, 0, 0, 0, 0, 0, 0, "heading ", 8825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL 8835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "h5", 0, 0, 0, 0, 0, 0, 0, "heading ", 8855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL 8865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "h6", 0, 0, 0, 0, 0, 0, 0, "heading ", 8885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL 8895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "head", 1, 1, 0, 0, 0, 0, 0, "document head ", 8915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL head_contents, NULL, DECL head_attrs, NULL, NULL 8925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "hr", 0, 2, 2, 1, 0, 0, 0, "horizontal rule " , 8945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EMPTY, NULL, DECL html_attrs, DECL hr_depr, NULL 8955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "html", 1, 1, 0, 0, 0, 0, 0, "document root element ", 8975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_content , NULL , DECL i18n_attrs, DECL version_attr, NULL 8985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 8995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "i", 0, 3, 0, 0, 0, 0, 1, "italic text style", 9005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, NULL, NULL 9015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "iframe", 0, 0, 0, 0, 0, 1, 2, "inline subwindow ", 9035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_flow, NULL, NULL, DECL iframe_attrs, NULL 9045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "img", 0, 2, 2, 1, 0, 0, 1, "embedded image ", 9065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EMPTY, NULL, DECL img_attrs, DECL align_attr, DECL src_alt_attrs 9075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "input", 0, 2, 2, 1, 0, 0, 1, "form control ", 9095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EMPTY, NULL, DECL input_attrs , DECL align_attr, NULL 9105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "ins", 0, 0, 0, 0, 0, 0, 2, "inserted text", 9125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_flow, NULL, DECL edit_attrs, NULL, NULL 9135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "isindex", 0, 2, 2, 1, 1, 1, 0, "single line prompt ", 9155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EMPTY, NULL, NULL, DECL prompt_attrs, NULL 9165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "kbd", 0, 0, 0, 0, 0, 0, 1, "text to be entered by the user", 9185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, NULL, NULL 9195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "label", 0, 0, 0, 0, 0, 0, 1, "form field label text ", 9215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline MODIFIER, NULL, DECL label_attrs , NULL, NULL 9225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "legend", 0, 0, 0, 0, 0, 0, 0, "fieldset legend ", 9245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL legend_attrs , DECL align_attr, NULL 9255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "li", 0, 1, 1, 0, 0, 0, 0, "list item ", 9275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_flow, NULL, DECL html_attrs, NULL, NULL 9285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "link", 0, 2, 2, 1, 0, 0, 0, "a media-independent link ", 9305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EMPTY, NULL, DECL link_attrs, DECL target_attr, NULL 9315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "map", 0, 0, 0, 0, 0, 0, 2, "client-side image map ", 9335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL map_contents , NULL, DECL html_attrs , NULL, DECL name_attr 9345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "menu", 0, 0, 0, 0, 1, 1, 0, "menu list ", 9365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL blockli_elt , NULL, NULL, DECL compact_attrs, NULL 9375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "meta", 0, 2, 2, 1, 0, 0, 0, "generic metainformation ", 9395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EMPTY, NULL, DECL meta_attrs , NULL , DECL content_attr 9405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "noframes", 0, 0, 0, 0, 0, 2, 0, "alternate content container for non frame-based rendering ", 9425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL noframes_content, "body" , DECL html_attrs, NULL, NULL 9435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "noscript", 0, 0, 0, 0, 0, 0, 0, "alternate content container for non script-based rendering ", 9455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_flow, "div", DECL html_attrs, NULL, NULL 9465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "object", 0, 0, 0, 0, 0, 0, 2, "generic embedded object ", 9485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL object_contents , "div" , DECL object_attrs, DECL object_depr, NULL 9495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "ol", 0, 0, 0, 0, 0, 0, 0, "ordered list ", 9515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL li_elt , "li" , DECL html_attrs, DECL ol_attrs, NULL 9525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "optgroup", 0, 0, 0, 0, 0, 0, 0, "option group ", 9545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL option_elt , "option", DECL optgroup_attrs, NULL, DECL label_attr 9555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "option", 0, 1, 0, 0, 0, 0, 0, "selectable choice " , 9575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_pcdata, NULL, DECL option_attrs, NULL, NULL 9585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "p", 0, 1, 0, 0, 0, 0, 0, "paragraph ", 9605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL 9615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "param", 0, 2, 2, 1, 0, 0, 0, "named property value ", 9635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EMPTY, NULL, DECL param_attrs, NULL, DECL name_attr 9645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "pre", 0, 0, 0, 0, 0, 0, 0, "preformatted text ", 9665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL pre_content, NULL, DECL html_attrs, DECL width_attr, NULL 9675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "q", 0, 0, 0, 0, 0, 0, 1, "short inline quotation ", 9695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL quote_attrs, NULL, NULL 9705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "s", 0, 3, 0, 0, 1, 1, 1, "strike-through text style", 9725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, NULL, DECL html_attrs, NULL 9735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "samp", 0, 0, 0, 0, 0, 0, 1, "sample program output, scripts, etc.", 9755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, NULL, NULL 9765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "script", 0, 0, 0, 0, 0, 0, 2, "script statements ", 9785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_cdata, NULL, DECL script_attrs, DECL language_attr, DECL type_attr 9795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "select", 0, 0, 0, 0, 0, 0, 1, "option selector ", 9815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL select_content, NULL, DECL select_attrs, NULL, NULL 9825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "small", 0, 3, 0, 0, 0, 0, 1, "small text style", 9845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, NULL, NULL 9855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "span", 0, 0, 0, 0, 0, 0, 1, "generic language/style container ", 9875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, NULL, NULL 9885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "strike", 0, 3, 0, 0, 1, 1, 1, "strike-through text", 9905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, NULL, DECL html_attrs, NULL 9915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "strong", 0, 3, 0, 0, 0, 0, 1, "strong emphasis", 9935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, NULL, NULL 9945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "style", 0, 0, 0, 0, 0, 0, 0, "style info ", 9965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_cdata, NULL, DECL style_attrs, NULL, DECL type_attr 9975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 9985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "sub", 0, 3, 0, 0, 0, 0, 1, "subscript", 9995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, NULL, NULL 10005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 10015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "sup", 0, 3, 0, 0, 0, 0, 1, "superscript ", 10025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, NULL, NULL 10035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 10045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "table", 0, 0, 0, 0, 0, 0, 0, "", 10055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL table_contents , "tr" , DECL table_attrs , DECL table_depr, NULL 10065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 10075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "tbody", 1, 0, 0, 0, 0, 0, 0, "table body ", 10085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL 10095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 10105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "td", 0, 0, 0, 0, 0, 0, 0, "table data cell", 10115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL 10125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 10135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "textarea", 0, 0, 0, 0, 0, 0, 1, "multi-line text field ", 10145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_pcdata, NULL, DECL textarea_attrs, NULL, DECL rows_cols_attr 10155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 10165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "tfoot", 0, 1, 0, 0, 0, 0, 0, "table footer ", 10175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL 10185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 10195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "th", 0, 1, 0, 0, 0, 0, 0, "table header cell", 10205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL 10215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 10225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "thead", 0, 1, 0, 0, 0, 0, 0, "table header ", 10235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL 10245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 10255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "title", 0, 0, 0, 0, 0, 0, 0, "document title ", 10265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_pcdata, NULL, DECL i18n_attrs, NULL, NULL 10275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 10285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "tr", 0, 0, 0, 0, 0, 0, 0, "table row ", 10295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL tr_contents , "td" , DECL talign_attrs, DECL bgcolor_attr, NULL 10305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 10315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "tt", 0, 3, 0, 0, 0, 0, 1, "teletype or monospaced text style", 10325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, NULL, NULL 10335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 10345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "u", 0, 3, 0, 0, 1, 1, 1, "underlined text style", 10355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, NULL, DECL html_attrs, NULL 10365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 10375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "ul", 0, 0, 0, 0, 0, 0, 0, "unordered list ", 10385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL li_elt , "li" , DECL html_attrs, DECL ul_depr, NULL 10395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}, 10405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "var", 0, 0, 0, 0, 0, 0, 1, "instance of a variable or program argument", 10415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DECL html_inline, NULL, DECL html_attrs, NULL, NULL 10425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 10435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 10445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 10465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * start tags that imply the end of current element 10475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 10485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char * const htmlStartClose[] = { 10495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"form", "form", "p", "hr", "h1", "h2", "h3", "h4", "h5", "h6", 10505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "dl", "ul", "ol", "menu", "dir", "address", "pre", 10515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "listing", "xmp", "head", NULL, 10525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"head", "p", NULL, 10535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"title", "p", NULL, 10545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"body", "head", "style", "link", "title", "p", NULL, 10555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"frameset", "head", "style", "link", "title", "p", NULL, 10565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"li", "p", "h1", "h2", "h3", "h4", "h5", "h6", "dl", "address", 10575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "pre", "listing", "xmp", "head", "li", NULL, 10585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"hr", "p", "head", NULL, 10595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"h1", "p", "head", NULL, 10605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"h2", "p", "head", NULL, 10615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"h3", "p", "head", NULL, 10625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"h4", "p", "head", NULL, 10635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"h5", "p", "head", NULL, 10645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"h6", "p", "head", NULL, 10655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"dir", "p", "head", NULL, 10665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"address", "p", "head", "ul", NULL, 10675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"pre", "p", "head", "ul", NULL, 10685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"listing", "p", "head", NULL, 10695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"xmp", "p", "head", NULL, 10705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"blockquote", "p", "head", NULL, 10715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"dl", "p", "dt", "menu", "dir", "address", "pre", "listing", 10725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "xmp", "head", NULL, 10735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"dt", "p", "menu", "dir", "address", "pre", "listing", "xmp", 10745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "head", "dd", NULL, 10755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"dd", "p", "menu", "dir", "address", "pre", "listing", "xmp", 10765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "head", "dt", NULL, 10775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"ul", "p", "head", "ol", "menu", "dir", "address", "pre", 10785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "listing", "xmp", NULL, 10795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"ol", "p", "head", "ul", NULL, 10805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"menu", "p", "head", "ul", NULL, 10815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL, 10825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"div", "p", "head", NULL, 10835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"noscript", "p", "head", NULL, 10845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"center", "font", "b", "i", "p", "head", NULL, 10855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"a", "a", NULL, 10865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"caption", "p", NULL, 10875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"colgroup", "caption", "colgroup", "col", "p", NULL, 10885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"col", "caption", "col", "p", NULL, 10895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre", 10905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "listing", "xmp", "a", NULL, 10915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, 10925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"td", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, 10935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL, 10945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"thead", "caption", "col", "colgroup", NULL, 10955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead", 10965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "tbody", "p", NULL, 10975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead", 10985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "tfoot", "tbody", "p", NULL, 10995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"optgroup", "option", NULL, 11005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"option", "option", NULL, 11015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", 11025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "pre", "listing", "xmp", "a", NULL, 11035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)NULL 11045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 11055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 11075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The list of HTML elements which are supposed not to have 11085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * CDATA content and where a p element will be implied 11095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 11105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * TODO: extend that list by reading the HTML SGML DTD on 11115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * implied paragraph 11125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 11135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char *const htmlNoContentElements[] = { 11145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "html", 11155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "head", 11165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL 11175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 11185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 11205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The list of HTML attributes which are of content %Script; 11215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * NOTE: when adding ones, check htmlIsScriptAttribute() since 11225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * it assumes the name starts with 'on' 11235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 11245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char *const htmlScriptAttributes[] = { 11255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onclick", 11265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "ondblclick", 11275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onmousedown", 11285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onmouseup", 11295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onmouseover", 11305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onmousemove", 11315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onmouseout", 11325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onkeypress", 11335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onkeydown", 11345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onkeyup", 11355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onload", 11365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onunload", 11375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onfocus", 11385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onblur", 11395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onsubmit", 11405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onrest", 11415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onchange", 11425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "onselect" 11435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 11445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 11465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This table is used by the htmlparser to know what to do with 11475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * broken html pages. By assigning different priorities to different 11485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * elements the parser can decide how to handle extra endtags. 11495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Endtags are only allowed to close elements with lower or equal 11505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * priority. 11515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 11525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef struct { 11545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *name; 11555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int priority; 11565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} elementPriority; 11575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const elementPriority htmlEndPriority[] = { 11595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) {"div", 150}, 11605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) {"td", 160}, 11615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) {"th", 160}, 11625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) {"tr", 170}, 11635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) {"thead", 180}, 11645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) {"tbody", 180}, 11655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) {"tfoot", 180}, 11665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) {"table", 190}, 11675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) {"head", 200}, 11685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) {"body", 200}, 11695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) {"html", 220}, 11705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) {NULL, 100} /* Default priority */ 11715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 11725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char** htmlStartCloseIndex[100]; 11745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int htmlStartCloseIndexinitialized = 0; 11755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************ 11775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 11785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * functions to handle HTML specific data * 11795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 11805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/ 11815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 11835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlInitAutoClose: 11845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 11855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. 11865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This is not reentrant. Call xmlInitParser() once before processing in 11875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * case of use in multithreaded programs. 11885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 11895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void 11905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlInitAutoClose(void) { 11915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int indx, i = 0; 11925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (htmlStartCloseIndexinitialized) return; 11945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (indx = 0;indx < 100;indx ++) htmlStartCloseIndex[indx] = NULL; 11965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) indx = 0; 11975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((htmlStartClose[i] != NULL) && (indx < 100 - 1)) { 11985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlStartCloseIndex[indx++] = (const char**) &htmlStartClose[i]; 11995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (htmlStartClose[i] != NULL) i++; 12005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) i++; 12015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 12025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlStartCloseIndexinitialized = 1; 12035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 12045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 12065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlTagLookup: 12075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @tag: The tag name in lowercase 12085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 12095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Lookup the HTML tag in the ElementTable 12105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 12115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the related htmlElemDescPtr or NULL if not found. 12125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 12135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const htmlElemDesc * 12145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlTagLookup(const xmlChar *tag) { 12155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned int i; 12165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (i = 0; i < (sizeof(html40ElementTable) / 12185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sizeof(html40ElementTable[0]));i++) { 12195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name)) 12205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return((htmlElemDescPtr) &html40ElementTable[i]); 12215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 12225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 12235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 12245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 12265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlGetEndPriority: 12275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @name: The name of the element to look up the priority for. 12285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 12295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Return value: The "endtag" priority. 12305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) **/ 12315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int 12325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlGetEndPriority (const xmlChar *name) { 12335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int i = 0; 12345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((htmlEndPriority[i].name != NULL) && 12365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name))) 12375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) i++; 12385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(htmlEndPriority[i].priority); 12405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 12415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 12445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCheckAutoClose: 12455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @newtag: The new tag name 12465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @oldtag: The old tag name 12475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 12485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Checks whether the new tag is one of the registered valid tags for 12495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * closing old. 12505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. 12515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 12525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 if no, 1 if yes. 12535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 12545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int 12555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCheckAutoClose(const xmlChar * newtag, const xmlChar * oldtag) 12565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 12575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int i, indx; 12585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char **closed = NULL; 12595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (htmlStartCloseIndexinitialized == 0) 12615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlInitAutoClose(); 12625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* inefficient, but not a big deal */ 12645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (indx = 0; indx < 100; indx++) { 12655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) closed = htmlStartCloseIndex[indx]; 12665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (closed == NULL) 12675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (0); 12685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(BAD_CAST * closed, newtag)) 12695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 12705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 12715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) i = closed - htmlStartClose; 12735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) i++; 12745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (htmlStartClose[i] != NULL) { 12755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(BAD_CAST htmlStartClose[i], oldtag)) { 12765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (1); 12775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 12785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) i++; 12795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 12805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (0); 12815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 12825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 12845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlAutoCloseOnClose: 12855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 12865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @newtag: The new tag name 12875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @force: force the tag closure 12885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 12895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The HTML DTD allows an ending tag to implicitly close other tags. 12905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 12915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 12925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag) 12935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 12945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const htmlElemDesc *info; 12955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int i, priority; 12965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) priority = htmlGetEndPriority(newtag); 12985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 12995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (i = (ctxt->nameNr - 1); i >= 0; i--) { 13005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 13015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(newtag, ctxt->nameTab[i])) 13025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 13035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 13045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * A missplaced endtag can only close elements with lower 13055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * or equal priority, so if we find an element with higher 13065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * priority before we find an element with 13075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * matching name, we just ignore this endtag 13085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 13095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (htmlGetEndPriority(ctxt->nameTab[i]) > priority) 13105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 13115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 13125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (i < 0) 13135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 13145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 13155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (!xmlStrEqual(newtag, ctxt->name)) { 13165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) info = htmlTagLookup(ctxt->name); 13175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((info != NULL) && (info->endTag == 3)) { 13185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 13195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Opening and ending tag mismatch: %s and %s\n", 13205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) newtag, ctxt->name); 13215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 13225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 13235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endElement(ctxt->userData, ctxt->name); 13245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePop(ctxt); 13255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 13265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 13275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 13285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 13295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlAutoCloseOnEnd: 13305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 13315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 13325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Close all remaining tags at the end of the stream 13335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 13345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 13355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt) 13365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 13375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int i; 13385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 13395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nameNr == 0) 13405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 13415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (i = (ctxt->nameNr - 1); i >= 0; i--) { 13425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 13435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endElement(ctxt->userData, ctxt->name); 13445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePop(ctxt); 13455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 13465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 13475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 13485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 13495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlAutoClose: 13505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 13515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @newtag: The new tag name or NULL 13525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 13535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The HTML DTD allows a tag to implicitly close other tags. 13545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The list is kept in htmlStartClose array. This function is 13555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * called when a new tag has been detected and generates the 13565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * appropriates closes if possible/needed. 13575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * If newtag is NULL this mean we are at the end of the resource 13585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * and we should check 13595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 13605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 13615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag) 13625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 13635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((newtag != NULL) && (ctxt->name != NULL) && 13645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlCheckAutoClose(newtag, ctxt->name))) { 13655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 13665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endElement(ctxt->userData, ctxt->name); 13675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePop(ctxt); 13685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 13695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (newtag == NULL) { 13705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlAutoCloseOnEnd(ctxt); 13715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 13725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 13735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((newtag == NULL) && (ctxt->name != NULL) && 13745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((xmlStrEqual(ctxt->name, BAD_CAST "head")) || 13755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (xmlStrEqual(ctxt->name, BAD_CAST "body")) || 13765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (xmlStrEqual(ctxt->name, BAD_CAST "html")))) { 13775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 13785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endElement(ctxt->userData, ctxt->name); 13795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePop(ctxt); 13805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 13815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 13825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 13835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 13845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlAutoCloseTag: 13855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @doc: the HTML document 13865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @name: The tag name 13875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @elem: the HTML element 13885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 13895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The HTML DTD allows a tag to implicitly close other tags. 13905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The list is kept in htmlStartClose array. This function checks 13915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * if the element or one of it's children would autoclose the 13925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * given tag. 13935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 13945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 1 if autoclose, 0 otherwise 13955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 13965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int 13975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) { 13985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlNodePtr child; 13995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 14005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (elem == NULL) return(1); 14015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(name, elem->name)) return(0); 14025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (htmlCheckAutoClose(elem->name, name)) return(1); 14035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) child = elem->children; 14045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (child != NULL) { 14055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (htmlAutoCloseTag(doc, name, child)) return(1); 14065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) child = child->next; 14075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 14085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 14095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 14105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 14115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 14125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlIsAutoClosed: 14135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @doc: the HTML document 14145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @elem: the HTML element 14155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 14165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The HTML DTD allows a tag to implicitly close other tags. 14175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The list is kept in htmlStartClose array. This function checks 14185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * if a tag is autoclosed by one of it's child 14195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 14205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 1 if autoclosed, 0 otherwise 14215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 14225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int 14235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) { 14245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlNodePtr child; 14255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 14265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (elem == NULL) return(1); 14275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) child = elem->children; 14285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (child != NULL) { 14295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (htmlAutoCloseTag(doc, elem->name, child)) return(1); 14305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) child = child->next; 14315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 14325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 14335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 14345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 14355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 14365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCheckImplied: 14375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 14385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @newtag: The new tag name 14395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 14405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The HTML DTD allows a tag to exists only implicitly 14415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * called when a new tag has been detected and generates the 14425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * appropriates implicit tags if missing 14435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 14445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 14455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { 14465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int i; 14475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 14485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->options & HTML_PARSE_NOIMPLIED) 14495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 14505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!htmlOmittedDefaultValue) 14515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 14525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(newtag, BAD_CAST"html")) 14535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 14545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nameNr <= 0) { 14555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePush(ctxt, BAD_CAST"html"); 14565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) 14575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL); 14585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 14595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"head"))) 14605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 14615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->nameNr <= 1) && 14625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((xmlStrEqual(newtag, BAD_CAST"script")) || 14635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (xmlStrEqual(newtag, BAD_CAST"style")) || 14645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (xmlStrEqual(newtag, BAD_CAST"meta")) || 14655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (xmlStrEqual(newtag, BAD_CAST"link")) || 14665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (xmlStrEqual(newtag, BAD_CAST"title")) || 14675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (xmlStrEqual(newtag, BAD_CAST"base")))) { 14685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->html >= 3) { 14695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* we already saw or generated an <head> before */ 14705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 14715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 14725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 14735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * dropped OBJECT ... i you put it first BODY will be 14745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * assumed ! 14755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 14765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePush(ctxt, BAD_CAST"head"); 14775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) 14785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL); 14795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) && 14805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (!xmlStrEqual(newtag, BAD_CAST"frame")) && 14815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (!xmlStrEqual(newtag, BAD_CAST"frameset"))) { 14825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->html >= 10) { 14835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* we already saw or generated a <body> before */ 14845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 14855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 14865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (i = 0;i < ctxt->nameNr;i++) { 14875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) { 14885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 14895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 14905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) { 14915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 14925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 14935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 14945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 14955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePush(ctxt, BAD_CAST"body"); 14965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) 14975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL); 14985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 14995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 15005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 15015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 15025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCheckParagraph 15035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 15045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 15055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check whether a p element need to be implied before inserting 15065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * characters in the current element. 15075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 15085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 1 if a paragraph has been inserted, 0 if not and -1 15095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * in case of error. 15105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 15115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 15125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int 15135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCheckParagraph(htmlParserCtxtPtr ctxt) { 15145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *tag; 15155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int i; 15165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 15175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) 15185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(-1); 15195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) tag = ctxt->name; 15205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (tag == NULL) { 15215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlAutoClose(ctxt, BAD_CAST"p"); 15225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCheckImplied(ctxt, BAD_CAST"p"); 15235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePush(ctxt, BAD_CAST"p"); 15245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) 15255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL); 15265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(1); 15275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 15285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!htmlOmittedDefaultValue) 15295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 15305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (i = 0; htmlNoContentElements[i] != NULL; i++) { 15315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(tag, BAD_CAST htmlNoContentElements[i])) { 15325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlAutoClose(ctxt, BAD_CAST"p"); 15335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCheckImplied(ctxt, BAD_CAST"p"); 15345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePush(ctxt, BAD_CAST"p"); 15355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) 15365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL); 15375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(1); 15385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 15395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 15405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 15415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 15425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 15435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 15445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlIsScriptAttribute: 15455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @name: an attribute name 15465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 15475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check if an attribute is of content type Script 15485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 15495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 1 is the attribute is a script 0 otherwise 15505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 15515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int 15525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlIsScriptAttribute(const xmlChar *name) { 15535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned int i; 15545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 15555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (name == NULL) 15565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 15575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 15585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * all script attributes start with 'on' 15595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 15605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((name[0] != 'o') || (name[1] != 'n')) 15615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 15625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (i = 0; 15635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]); 15645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) i++) { 15655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i])) 15665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(1); 15675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 15685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 15695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 15705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 15715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************ 15725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 15735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The list of HTML predefined entities * 15745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 15755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/ 15765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 15775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 15785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const htmlEntityDesc html40EntitiesTable[] = { 15795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 15805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * the 4 absolute ones, plus apostrophe. 15815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 15825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 34, "quot", "quotation mark = APL quote, U+0022 ISOnum" }, 15835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 38, "amp", "ampersand, U+0026 ISOnum" }, 15845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 39, "apos", "single quote" }, 15855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 60, "lt", "less-than sign, U+003C ISOnum" }, 15865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 62, "gt", "greater-than sign, U+003E ISOnum" }, 15875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 15885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 15895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * A bunch still in the 128-255 range 15905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Replacing them depend really on the charset used. 15915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 15925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 160, "nbsp", "no-break space = non-breaking space, U+00A0 ISOnum" }, 15935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 161, "iexcl","inverted exclamation mark, U+00A1 ISOnum" }, 15945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 162, "cent", "cent sign, U+00A2 ISOnum" }, 15955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 163, "pound","pound sign, U+00A3 ISOnum" }, 15965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 164, "curren","currency sign, U+00A4 ISOnum" }, 15975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 165, "yen", "yen sign = yuan sign, U+00A5 ISOnum" }, 15985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 166, "brvbar","broken bar = broken vertical bar, U+00A6 ISOnum" }, 15995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 167, "sect", "section sign, U+00A7 ISOnum" }, 16005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 168, "uml", "diaeresis = spacing diaeresis, U+00A8 ISOdia" }, 16015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 169, "copy", "copyright sign, U+00A9 ISOnum" }, 16025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 170, "ordf", "feminine ordinal indicator, U+00AA ISOnum" }, 16035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 171, "laquo","left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum" }, 16045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 172, "not", "not sign, U+00AC ISOnum" }, 16055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 173, "shy", "soft hyphen = discretionary hyphen, U+00AD ISOnum" }, 16065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 174, "reg", "registered sign = registered trade mark sign, U+00AE ISOnum" }, 16075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 175, "macr", "macron = spacing macron = overline = APL overbar, U+00AF ISOdia" }, 16085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 176, "deg", "degree sign, U+00B0 ISOnum" }, 16095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 177, "plusmn","plus-minus sign = plus-or-minus sign, U+00B1 ISOnum" }, 16105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 178, "sup2", "superscript two = superscript digit two = squared, U+00B2 ISOnum" }, 16115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 179, "sup3", "superscript three = superscript digit three = cubed, U+00B3 ISOnum" }, 16125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 180, "acute","acute accent = spacing acute, U+00B4 ISOdia" }, 16135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 181, "micro","micro sign, U+00B5 ISOnum" }, 16145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 182, "para", "pilcrow sign = paragraph sign, U+00B6 ISOnum" }, 16155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 183, "middot","middle dot = Georgian comma Greek middle dot, U+00B7 ISOnum" }, 16165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 184, "cedil","cedilla = spacing cedilla, U+00B8 ISOdia" }, 16175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 185, "sup1", "superscript one = superscript digit one, U+00B9 ISOnum" }, 16185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 186, "ordm", "masculine ordinal indicator, U+00BA ISOnum" }, 16195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 187, "raquo","right-pointing double angle quotation mark right pointing guillemet, U+00BB ISOnum" }, 16205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 188, "frac14","vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum" }, 16215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 189, "frac12","vulgar fraction one half = fraction one half, U+00BD ISOnum" }, 16225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 190, "frac34","vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum" }, 16235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 191, "iquest","inverted question mark = turned question mark, U+00BF ISOnum" }, 16245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 192, "Agrave","latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1" }, 16255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 193, "Aacute","latin capital letter A with acute, U+00C1 ISOlat1" }, 16265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 194, "Acirc","latin capital letter A with circumflex, U+00C2 ISOlat1" }, 16275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 195, "Atilde","latin capital letter A with tilde, U+00C3 ISOlat1" }, 16285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 196, "Auml", "latin capital letter A with diaeresis, U+00C4 ISOlat1" }, 16295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 197, "Aring","latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1" }, 16305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 198, "AElig","latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1" }, 16315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 199, "Ccedil","latin capital letter C with cedilla, U+00C7 ISOlat1" }, 16325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 200, "Egrave","latin capital letter E with grave, U+00C8 ISOlat1" }, 16335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 201, "Eacute","latin capital letter E with acute, U+00C9 ISOlat1" }, 16345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 202, "Ecirc","latin capital letter E with circumflex, U+00CA ISOlat1" }, 16355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 203, "Euml", "latin capital letter E with diaeresis, U+00CB ISOlat1" }, 16365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 204, "Igrave","latin capital letter I with grave, U+00CC ISOlat1" }, 16375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 205, "Iacute","latin capital letter I with acute, U+00CD ISOlat1" }, 16385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 206, "Icirc","latin capital letter I with circumflex, U+00CE ISOlat1" }, 16395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 207, "Iuml", "latin capital letter I with diaeresis, U+00CF ISOlat1" }, 16405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 208, "ETH", "latin capital letter ETH, U+00D0 ISOlat1" }, 16415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 209, "Ntilde","latin capital letter N with tilde, U+00D1 ISOlat1" }, 16425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 210, "Ograve","latin capital letter O with grave, U+00D2 ISOlat1" }, 16435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 211, "Oacute","latin capital letter O with acute, U+00D3 ISOlat1" }, 16445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 212, "Ocirc","latin capital letter O with circumflex, U+00D4 ISOlat1" }, 16455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 213, "Otilde","latin capital letter O with tilde, U+00D5 ISOlat1" }, 16465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 214, "Ouml", "latin capital letter O with diaeresis, U+00D6 ISOlat1" }, 16475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 215, "times","multiplication sign, U+00D7 ISOnum" }, 16485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 216, "Oslash","latin capital letter O with stroke latin capital letter O slash, U+00D8 ISOlat1" }, 16495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 217, "Ugrave","latin capital letter U with grave, U+00D9 ISOlat1" }, 16505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 218, "Uacute","latin capital letter U with acute, U+00DA ISOlat1" }, 16515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 219, "Ucirc","latin capital letter U with circumflex, U+00DB ISOlat1" }, 16525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 220, "Uuml", "latin capital letter U with diaeresis, U+00DC ISOlat1" }, 16535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 221, "Yacute","latin capital letter Y with acute, U+00DD ISOlat1" }, 16545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 222, "THORN","latin capital letter THORN, U+00DE ISOlat1" }, 16555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 223, "szlig","latin small letter sharp s = ess-zed, U+00DF ISOlat1" }, 16565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 224, "agrave","latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1" }, 16575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 225, "aacute","latin small letter a with acute, U+00E1 ISOlat1" }, 16585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 226, "acirc","latin small letter a with circumflex, U+00E2 ISOlat1" }, 16595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 227, "atilde","latin small letter a with tilde, U+00E3 ISOlat1" }, 16605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 228, "auml", "latin small letter a with diaeresis, U+00E4 ISOlat1" }, 16615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 229, "aring","latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1" }, 16625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 230, "aelig","latin small letter ae = latin small ligature ae, U+00E6 ISOlat1" }, 16635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 231, "ccedil","latin small letter c with cedilla, U+00E7 ISOlat1" }, 16645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 232, "egrave","latin small letter e with grave, U+00E8 ISOlat1" }, 16655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 233, "eacute","latin small letter e with acute, U+00E9 ISOlat1" }, 16665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 234, "ecirc","latin small letter e with circumflex, U+00EA ISOlat1" }, 16675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 235, "euml", "latin small letter e with diaeresis, U+00EB ISOlat1" }, 16685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 236, "igrave","latin small letter i with grave, U+00EC ISOlat1" }, 16695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 237, "iacute","latin small letter i with acute, U+00ED ISOlat1" }, 16705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 238, "icirc","latin small letter i with circumflex, U+00EE ISOlat1" }, 16715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 239, "iuml", "latin small letter i with diaeresis, U+00EF ISOlat1" }, 16725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 240, "eth", "latin small letter eth, U+00F0 ISOlat1" }, 16735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 241, "ntilde","latin small letter n with tilde, U+00F1 ISOlat1" }, 16745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 242, "ograve","latin small letter o with grave, U+00F2 ISOlat1" }, 16755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 243, "oacute","latin small letter o with acute, U+00F3 ISOlat1" }, 16765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 244, "ocirc","latin small letter o with circumflex, U+00F4 ISOlat1" }, 16775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 245, "otilde","latin small letter o with tilde, U+00F5 ISOlat1" }, 16785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 246, "ouml", "latin small letter o with diaeresis, U+00F6 ISOlat1" }, 16795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 247, "divide","division sign, U+00F7 ISOnum" }, 16805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 248, "oslash","latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1" }, 16815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 249, "ugrave","latin small letter u with grave, U+00F9 ISOlat1" }, 16825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 250, "uacute","latin small letter u with acute, U+00FA ISOlat1" }, 16835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 251, "ucirc","latin small letter u with circumflex, U+00FB ISOlat1" }, 16845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 252, "uuml", "latin small letter u with diaeresis, U+00FC ISOlat1" }, 16855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 253, "yacute","latin small letter y with acute, U+00FD ISOlat1" }, 16865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 254, "thorn","latin small letter thorn with, U+00FE ISOlat1" }, 16875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 255, "yuml", "latin small letter y with diaeresis, U+00FF ISOlat1" }, 16885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 16895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 338, "OElig","latin capital ligature OE, U+0152 ISOlat2" }, 16905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 339, "oelig","latin small ligature oe, U+0153 ISOlat2" }, 16915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 352, "Scaron","latin capital letter S with caron, U+0160 ISOlat2" }, 16925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 353, "scaron","latin small letter s with caron, U+0161 ISOlat2" }, 16935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 376, "Yuml", "latin capital letter Y with diaeresis, U+0178 ISOlat2" }, 16945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 16955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 16965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Anything below should really be kept as entities references 16975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 16985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 402, "fnof", "latin small f with hook = function = florin, U+0192 ISOtech" }, 16995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 17005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 710, "circ", "modifier letter circumflex accent, U+02C6 ISOpub" }, 17015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 732, "tilde","small tilde, U+02DC ISOdia" }, 17025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 17035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 913, "Alpha","greek capital letter alpha, U+0391" }, 17045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 914, "Beta", "greek capital letter beta, U+0392" }, 17055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 915, "Gamma","greek capital letter gamma, U+0393 ISOgrk3" }, 17065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 916, "Delta","greek capital letter delta, U+0394 ISOgrk3" }, 17075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 917, "Epsilon","greek capital letter epsilon, U+0395" }, 17085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 918, "Zeta", "greek capital letter zeta, U+0396" }, 17095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 919, "Eta", "greek capital letter eta, U+0397" }, 17105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 920, "Theta","greek capital letter theta, U+0398 ISOgrk3" }, 17115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 921, "Iota", "greek capital letter iota, U+0399" }, 17125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 922, "Kappa","greek capital letter kappa, U+039A" }, 17135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 923, "Lambda", "greek capital letter lambda, U+039B ISOgrk3" }, 17145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 924, "Mu", "greek capital letter mu, U+039C" }, 17155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 925, "Nu", "greek capital letter nu, U+039D" }, 17165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 926, "Xi", "greek capital letter xi, U+039E ISOgrk3" }, 17175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 927, "Omicron","greek capital letter omicron, U+039F" }, 17185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 928, "Pi", "greek capital letter pi, U+03A0 ISOgrk3" }, 17195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 929, "Rho", "greek capital letter rho, U+03A1" }, 17205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 931, "Sigma","greek capital letter sigma, U+03A3 ISOgrk3" }, 17215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 932, "Tau", "greek capital letter tau, U+03A4" }, 17225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 933, "Upsilon","greek capital letter upsilon, U+03A5 ISOgrk3" }, 17235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 934, "Phi", "greek capital letter phi, U+03A6 ISOgrk3" }, 17245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 935, "Chi", "greek capital letter chi, U+03A7" }, 17255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 936, "Psi", "greek capital letter psi, U+03A8 ISOgrk3" }, 17265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 937, "Omega","greek capital letter omega, U+03A9 ISOgrk3" }, 17275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 17285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 945, "alpha","greek small letter alpha, U+03B1 ISOgrk3" }, 17295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 946, "beta", "greek small letter beta, U+03B2 ISOgrk3" }, 17305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 947, "gamma","greek small letter gamma, U+03B3 ISOgrk3" }, 17315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 948, "delta","greek small letter delta, U+03B4 ISOgrk3" }, 17325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 949, "epsilon","greek small letter epsilon, U+03B5 ISOgrk3" }, 17335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 950, "zeta", "greek small letter zeta, U+03B6 ISOgrk3" }, 17345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 951, "eta", "greek small letter eta, U+03B7 ISOgrk3" }, 17355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 952, "theta","greek small letter theta, U+03B8 ISOgrk3" }, 17365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 953, "iota", "greek small letter iota, U+03B9 ISOgrk3" }, 17375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 954, "kappa","greek small letter kappa, U+03BA ISOgrk3" }, 17385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 955, "lambda","greek small letter lambda, U+03BB ISOgrk3" }, 17395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 956, "mu", "greek small letter mu, U+03BC ISOgrk3" }, 17405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 957, "nu", "greek small letter nu, U+03BD ISOgrk3" }, 17415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 958, "xi", "greek small letter xi, U+03BE ISOgrk3" }, 17425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 959, "omicron","greek small letter omicron, U+03BF NEW" }, 17435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 960, "pi", "greek small letter pi, U+03C0 ISOgrk3" }, 17445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 961, "rho", "greek small letter rho, U+03C1 ISOgrk3" }, 17455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 962, "sigmaf","greek small letter final sigma, U+03C2 ISOgrk3" }, 17465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 963, "sigma","greek small letter sigma, U+03C3 ISOgrk3" }, 17475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 964, "tau", "greek small letter tau, U+03C4 ISOgrk3" }, 17485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 965, "upsilon","greek small letter upsilon, U+03C5 ISOgrk3" }, 17495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 966, "phi", "greek small letter phi, U+03C6 ISOgrk3" }, 17505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 967, "chi", "greek small letter chi, U+03C7 ISOgrk3" }, 17515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 968, "psi", "greek small letter psi, U+03C8 ISOgrk3" }, 17525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 969, "omega","greek small letter omega, U+03C9 ISOgrk3" }, 17535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 977, "thetasym","greek small letter theta symbol, U+03D1 NEW" }, 17545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 978, "upsih","greek upsilon with hook symbol, U+03D2 NEW" }, 17555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 982, "piv", "greek pi symbol, U+03D6 ISOgrk3" }, 17565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 17575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8194, "ensp", "en space, U+2002 ISOpub" }, 17585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8195, "emsp", "em space, U+2003 ISOpub" }, 17595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8201, "thinsp","thin space, U+2009 ISOpub" }, 17605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8204, "zwnj", "zero width non-joiner, U+200C NEW RFC 2070" }, 17615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8205, "zwj", "zero width joiner, U+200D NEW RFC 2070" }, 17625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8206, "lrm", "left-to-right mark, U+200E NEW RFC 2070" }, 17635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8207, "rlm", "right-to-left mark, U+200F NEW RFC 2070" }, 17645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8211, "ndash","en dash, U+2013 ISOpub" }, 17655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8212, "mdash","em dash, U+2014 ISOpub" }, 17665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8216, "lsquo","left single quotation mark, U+2018 ISOnum" }, 17675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8217, "rsquo","right single quotation mark, U+2019 ISOnum" }, 17685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8218, "sbquo","single low-9 quotation mark, U+201A NEW" }, 17695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8220, "ldquo","left double quotation mark, U+201C ISOnum" }, 17705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8221, "rdquo","right double quotation mark, U+201D ISOnum" }, 17715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8222, "bdquo","double low-9 quotation mark, U+201E NEW" }, 17725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8224, "dagger","dagger, U+2020 ISOpub" }, 17735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8225, "Dagger","double dagger, U+2021 ISOpub" }, 17745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 17755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8226, "bull", "bullet = black small circle, U+2022 ISOpub" }, 17765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8230, "hellip","horizontal ellipsis = three dot leader, U+2026 ISOpub" }, 17775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 17785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8240, "permil","per mille sign, U+2030 ISOtech" }, 17795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 17805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8242, "prime","prime = minutes = feet, U+2032 ISOtech" }, 17815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8243, "Prime","double prime = seconds = inches, U+2033 ISOtech" }, 17825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 17835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8249, "lsaquo","single left-pointing angle quotation mark, U+2039 ISO proposed" }, 17845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8250, "rsaquo","single right-pointing angle quotation mark, U+203A ISO proposed" }, 17855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 17865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8254, "oline","overline = spacing overscore, U+203E NEW" }, 17875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8260, "frasl","fraction slash, U+2044 NEW" }, 17885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 17895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8364, "euro", "euro sign, U+20AC NEW" }, 17905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 17915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8465, "image","blackletter capital I = imaginary part, U+2111 ISOamso" }, 17925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8472, "weierp","script capital P = power set = Weierstrass p, U+2118 ISOamso" }, 17935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8476, "real", "blackletter capital R = real part symbol, U+211C ISOamso" }, 17945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8482, "trade","trade mark sign, U+2122 ISOnum" }, 17955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8501, "alefsym","alef symbol = first transfinite cardinal, U+2135 NEW" }, 17965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8592, "larr", "leftwards arrow, U+2190 ISOnum" }, 17975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8593, "uarr", "upwards arrow, U+2191 ISOnum" }, 17985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8594, "rarr", "rightwards arrow, U+2192 ISOnum" }, 17995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8595, "darr", "downwards arrow, U+2193 ISOnum" }, 18005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8596, "harr", "left right arrow, U+2194 ISOamsa" }, 18015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8629, "crarr","downwards arrow with corner leftwards = carriage return, U+21B5 NEW" }, 18025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8656, "lArr", "leftwards double arrow, U+21D0 ISOtech" }, 18035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8657, "uArr", "upwards double arrow, U+21D1 ISOamsa" }, 18045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8658, "rArr", "rightwards double arrow, U+21D2 ISOtech" }, 18055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8659, "dArr", "downwards double arrow, U+21D3 ISOamsa" }, 18065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8660, "hArr", "left right double arrow, U+21D4 ISOamsa" }, 18075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 18085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8704, "forall","for all, U+2200 ISOtech" }, 18095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8706, "part", "partial differential, U+2202 ISOtech" }, 18105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8707, "exist","there exists, U+2203 ISOtech" }, 18115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8709, "empty","empty set = null set = diameter, U+2205 ISOamso" }, 18125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8711, "nabla","nabla = backward difference, U+2207 ISOtech" }, 18135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8712, "isin", "element of, U+2208 ISOtech" }, 18145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8713, "notin","not an element of, U+2209 ISOtech" }, 18155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8715, "ni", "contains as member, U+220B ISOtech" }, 18165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8719, "prod", "n-ary product = product sign, U+220F ISOamsb" }, 18175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8721, "sum", "n-ary summation, U+2211 ISOamsb" }, 18185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8722, "minus","minus sign, U+2212 ISOtech" }, 18195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8727, "lowast","asterisk operator, U+2217 ISOtech" }, 18205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8730, "radic","square root = radical sign, U+221A ISOtech" }, 18215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8733, "prop", "proportional to, U+221D ISOtech" }, 18225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8734, "infin","infinity, U+221E ISOtech" }, 18235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8736, "ang", "angle, U+2220 ISOamso" }, 18245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8743, "and", "logical and = wedge, U+2227 ISOtech" }, 18255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8744, "or", "logical or = vee, U+2228 ISOtech" }, 18265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8745, "cap", "intersection = cap, U+2229 ISOtech" }, 18275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8746, "cup", "union = cup, U+222A ISOtech" }, 18285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8747, "int", "integral, U+222B ISOtech" }, 18295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8756, "there4","therefore, U+2234 ISOtech" }, 18305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8764, "sim", "tilde operator = varies with = similar to, U+223C ISOtech" }, 18315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8773, "cong", "approximately equal to, U+2245 ISOtech" }, 18325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8776, "asymp","almost equal to = asymptotic to, U+2248 ISOamsr" }, 18335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8800, "ne", "not equal to, U+2260 ISOtech" }, 18345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8801, "equiv","identical to, U+2261 ISOtech" }, 18355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8804, "le", "less-than or equal to, U+2264 ISOtech" }, 18365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8805, "ge", "greater-than or equal to, U+2265 ISOtech" }, 18375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8834, "sub", "subset of, U+2282 ISOtech" }, 18385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8835, "sup", "superset of, U+2283 ISOtech" }, 18395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8836, "nsub", "not a subset of, U+2284 ISOamsn" }, 18405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8838, "sube", "subset of or equal to, U+2286 ISOtech" }, 18415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8839, "supe", "superset of or equal to, U+2287 ISOtech" }, 18425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8853, "oplus","circled plus = direct sum, U+2295 ISOamsb" }, 18435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8855, "otimes","circled times = vector product, U+2297 ISOamsb" }, 18445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8869, "perp", "up tack = orthogonal to = perpendicular, U+22A5 ISOtech" }, 18455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8901, "sdot", "dot operator, U+22C5 ISOamsb" }, 18465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8968, "lceil","left ceiling = apl upstile, U+2308 ISOamsc" }, 18475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8969, "rceil","right ceiling, U+2309 ISOamsc" }, 18485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8970, "lfloor","left floor = apl downstile, U+230A ISOamsc" }, 18495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8971, "rfloor","right floor, U+230B ISOamsc" }, 18505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 9001, "lang", "left-pointing angle bracket = bra, U+2329 ISOtech" }, 18515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 9002, "rang", "right-pointing angle bracket = ket, U+232A ISOtech" }, 18525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 9674, "loz", "lozenge, U+25CA ISOpub" }, 18535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 18545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 9824, "spades","black spade suit, U+2660 ISOpub" }, 18555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 9827, "clubs","black club suit = shamrock, U+2663 ISOpub" }, 18565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 9829, "hearts","black heart suit = valentine, U+2665 ISOpub" }, 18575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 9830, "diams","black diamond suit, U+2666 ISOpub" }, 18585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 18595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 18605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 18615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************ 18625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 18635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Commodity functions to handle entities * 18645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 18655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/ 18665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 18675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 18685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Macro used to grow the current buffer. 18695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 18705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define growBuffer(buffer) { \ 18715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *tmp; \ 18725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buffer##_size *= 2; \ 18735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) tmp = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 18745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (tmp == NULL) { \ 18755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(ctxt, "growing buffer\n"); \ 18765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(buffer); \ 18775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); \ 18785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } \ 18795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buffer = tmp; \ 18805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 18815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 18825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 18835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlEntityLookup: 18845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @name: the entity name 18855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 18865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Lookup the given entity in EntitiesTable 18875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 18885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * TODO: the linear scan is really ugly, an hash table is really needed. 18895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 18905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the associated htmlEntityDescPtr if found, NULL otherwise. 18915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 18925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const htmlEntityDesc * 18935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlEntityLookup(const xmlChar *name) { 18945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned int i; 18955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 18965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (i = 0;i < (sizeof(html40EntitiesTable)/ 18975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sizeof(html40EntitiesTable[0]));i++) { 18985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(name, BAD_CAST html40EntitiesTable[i].name)) { 18995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return((htmlEntityDescPtr) &html40EntitiesTable[i]); 19005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 19015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 19025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 19035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 19045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 19055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 19065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlEntityValueLookup: 19075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @value: the entity's unicode value 19085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 19095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Lookup the given entity in EntitiesTable 19105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 19115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * TODO: the linear scan is really ugly, an hash table is really needed. 19125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 19135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the associated htmlEntityDescPtr if found, NULL otherwise. 19145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 19155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const htmlEntityDesc * 19165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlEntityValueLookup(unsigned int value) { 19175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned int i; 19185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 19195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (i = 0;i < (sizeof(html40EntitiesTable)/ 19205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sizeof(html40EntitiesTable[0]));i++) { 19215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (html40EntitiesTable[i].value >= value) { 19225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (html40EntitiesTable[i].value > value) 19235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 19245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return((htmlEntityDescPtr) &html40EntitiesTable[i]); 19255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 19265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 19275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 19285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 19295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 19305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 19315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * UTF8ToHtml: 19325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @out: a pointer to an array of bytes to store the result 19335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @outlen: the length of @out 19345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @in: a pointer to an array of UTF-8 chars 19355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @inlen: the length of @in 19365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 19375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Take a block of UTF-8 chars in and try to convert it to an ASCII 19385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * plus HTML entities block of chars out. 19395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 19405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 19415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The value of @inlen after return is the number of octets consumed 19425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * as the return value is positive, else unpredictable. 19435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The value of @outlen after return is the number of octets consumed. 19445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 19455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int 19465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)UTF8ToHtml(unsigned char* out, int *outlen, 19475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char* in, int *inlen) { 19485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char* processed = in; 19495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char* outend; 19505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char* outstart = out; 19515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char* instart = in; 19525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char* inend; 19535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned int c, d; 19545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int trailing; 19555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 19565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 19575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (in == NULL) { 19585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 19595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * initialization nothing to do 19605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 19615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *outlen = 0; 19625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *inlen = 0; 19635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 19645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 19655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inend = in + (*inlen); 19665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) outend = out + (*outlen); 19675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (in < inend) { 19685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) d = *in++; 19695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (d < 0x80) { c= d; trailing= 0; } 19705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (d < 0xC0) { 19715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* trailing byte in leading position */ 19725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *outlen = out - outstart; 19735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *inlen = processed - instart; 19745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(-2); 19755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 19765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 19775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 19785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else { 19795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* no chance for this in Ascii */ 19805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *outlen = out - outstart; 19815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *inlen = processed - instart; 19825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(-2); 19835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 19845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 19855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (inend - in < trailing) { 19865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 19875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 19885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 19895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for ( ; trailing; trailing--) { 19905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 19915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 19925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c <<= 6; 19935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c |= d & 0x3F; 19945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 19955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 19965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* assertion: c is a single UTF-4 value */ 19975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (c < 0x80) { 19985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (out + 1 >= outend) 19995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 20005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *out++ = c; 20015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 20025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int len; 20035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const htmlEntityDesc * ent; 20045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *cp; 20055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char nbuf[16]; 20065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 20075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 20085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Try to lookup a predefined HTML entity for it 20095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 20105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 20115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ent = htmlEntityValueLookup(c); 20125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ent == NULL) { 20135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) snprintf(nbuf, sizeof(nbuf), "#%u", c); 20145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cp = nbuf; 20155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 20165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 20175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cp = ent->name; 20185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) len = strlen(cp); 20195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (out + 2 + len >= outend) 20205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 20215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *out++ = '&'; 20225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memcpy(out, cp, len); 20235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out += len; 20245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *out++ = ';'; 20255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 20265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) processed = in; 20275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 20285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *outlen = out - outstart; 20295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *inlen = processed - instart; 20305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 20315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 20325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 20335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 20345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlEncodeEntities: 20355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @out: a pointer to an array of bytes to store the result 20365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @outlen: the length of @out 20375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @in: a pointer to an array of UTF-8 chars 20385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @inlen: the length of @in 20395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @quoteChar: the quote character to escape (' or ") or zero. 20405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 20415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Take a block of UTF-8 chars in and try to convert it to an ASCII 20425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * plus HTML entities block of chars out. 20435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 20445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 20455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The value of @inlen after return is the number of octets consumed 20465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * as the return value is positive, else unpredictable. 20475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The value of @outlen after return is the number of octets consumed. 20485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 20495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int 20505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlEncodeEntities(unsigned char* out, int *outlen, 20515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char* in, int *inlen, int quoteChar) { 20525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char* processed = in; 20535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char* outend; 20545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char* outstart = out; 20555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char* instart = in; 20565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char* inend; 20575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned int c, d; 20585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int trailing; 20595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 20605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) 20615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(-1); 20625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) outend = out + (*outlen); 20635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inend = in + (*inlen); 20645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (in < inend) { 20655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) d = *in++; 20665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (d < 0x80) { c= d; trailing= 0; } 20675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (d < 0xC0) { 20685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* trailing byte in leading position */ 20695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *outlen = out - outstart; 20705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *inlen = processed - instart; 20715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(-2); 20725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 20735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 20745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 20755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else { 20765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* no chance for this in Ascii */ 20775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *outlen = out - outstart; 20785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *inlen = processed - instart; 20795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(-2); 20805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 20815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 20825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (inend - in < trailing) 20835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 20845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 20855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (trailing--) { 20865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (((d= *in++) & 0xC0) != 0x80) { 20875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *outlen = out - outstart; 20885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *inlen = processed - instart; 20895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(-2); 20905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 20915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c <<= 6; 20925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c |= d & 0x3F; 20935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 20945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 20955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* assertion: c is a single UTF-4 value */ 20965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((c < 0x80) && (c != (unsigned int) quoteChar) && 20975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (c != '&') && (c != '<') && (c != '>')) { 20985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (out >= outend) 20995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 21005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *out++ = c; 21015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 21025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const htmlEntityDesc * ent; 21035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *cp; 21045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char nbuf[16]; 21055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int len; 21065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 21075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 21085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Try to lookup a predefined HTML entity for it 21095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 21105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ent = htmlEntityValueLookup(c); 21115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ent == NULL) { 21125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) snprintf(nbuf, sizeof(nbuf), "#%u", c); 21135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cp = nbuf; 21145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 21155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 21165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cp = ent->name; 21175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) len = strlen(cp); 21185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (out + 2 + len > outend) 21195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 21205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *out++ = '&'; 21215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memcpy(out, cp, len); 21225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out += len; 21235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *out++ = ';'; 21245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 21255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) processed = in; 21265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 21275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *outlen = out - outstart; 21285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *inlen = processed - instart; 21295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 21305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 21315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 21325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************ 21335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 21345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Commodity functions to handle streams * 21355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 21365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/ 21375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 21385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 21395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlNewInputStream: 21405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 21415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 21425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Create a new input stream structure 21435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the new input stream or NULL 21445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 21455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static htmlParserInputPtr 21465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlNewInputStream(htmlParserCtxtPtr ctxt) { 21475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserInputPtr input; 21485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 21495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input = (xmlParserInputPtr) xmlMalloc(sizeof(htmlParserInput)); 21505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (input == NULL) { 21515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 21525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 21535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 21545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memset(input, 0, sizeof(htmlParserInput)); 21555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->filename = NULL; 21565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->directory = NULL; 21575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->base = NULL; 21585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->cur = NULL; 21595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->buf = NULL; 21605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->line = 1; 21615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->col = 1; 21625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->buf = NULL; 21635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->free = NULL; 21645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->version = NULL; 21655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->consumed = 0; 21665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->length = 0; 21675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(input); 21685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 21695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 21705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 21715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************ 21725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 21735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Commodity functions, cleanup needed ? * 21745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 21755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/ 21765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 21775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * all tags allowing pc data from the html 4.01 loose dtd 21785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * NOTE: it might be more apropriate to integrate this information 21795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * into the html40ElementTable array but I don't want to risk any 21805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * binary incomptibility 21815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 21825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char *allowPCData[] = { 21835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "a", "abbr", "acronym", "address", "applet", "b", "bdo", "big", 21845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "blockquote", "body", "button", "caption", "center", "cite", "code", 21855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2", 21865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend", 21875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp", 21885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "small", "span", "strike", "strong", "td", "th", "tt", "u", "var" 21895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 21905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 21915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 21925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * areBlanks: 21935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 21945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @str: a xmlChar * 21955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @len: the size of @str 21965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 21975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Is this a sequence of blank chars that one can ignore ? 21985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 21995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 1 if ignorable 0 otherwise. 22005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 22015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 22025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) { 22035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned int i; 22045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int j; 22055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlNodePtr lastChild; 22065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlDtdPtr dtd; 22075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 22085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (j = 0;j < len;j++) 22095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!(IS_BLANK_CH(str[j]))) return(0); 22105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 22115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == 0) return(1); 22125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR != '<') return(0); 22135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->name == NULL) 22145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(1); 22155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(ctxt->name, BAD_CAST"html")) 22165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(1); 22175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(ctxt->name, BAD_CAST"head")) 22185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(1); 22195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 22205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* Only strip CDATA children of the body tag for strict HTML DTDs */ 22215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(ctxt->name, BAD_CAST "body") && ctxt->myDoc != NULL) { 22225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) dtd = xmlGetIntSubset(ctxt->myDoc); 22235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dtd != NULL && dtd->ExternalID != NULL) { 22245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4.01//EN") || 22255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) !xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4//EN")) 22265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(1); 22275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 22285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 22295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 22305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->node == NULL) return(0); 22315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lastChild = xmlGetLastChild(ctxt->node); 22325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((lastChild) && (lastChild->type == XML_COMMENT_NODE)) 22335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lastChild = lastChild->prev; 22345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (lastChild == NULL) { 22355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->node->type != XML_ELEMENT_NODE) && 22365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->node->content != NULL)) return(0); 22375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* keep ws in constructs like ...<b> </b>... 22385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for all tags "b" allowing PCDATA */ 22395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) { 22405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) { 22415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 22425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 22435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 22445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (xmlNodeIsText(lastChild)) { 22455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 22465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 22475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* keep ws in constructs like <p><b>xy</b> <i>z</i><p> 22485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for all tags "p" allowing PCDATA */ 22495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) { 22505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) { 22515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 22525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 22535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 22545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 22555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(1); 22565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 22575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 22585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 22595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlNewDocNoDtD: 22605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URI: URI for the dtd, or NULL 22615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ExternalID: the external ID of the DTD, or NULL 22625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 22635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Creates a new HTML document without a DTD node if @URI and @ExternalID 22645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * are NULL 22655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 22665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns a new document, do not initialize the DTD if not provided 22675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 22685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 22695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) { 22705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlDocPtr cur; 22715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 22725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 22735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Allocate a new document and fill the fields. 22745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 22755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc)); 22765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur == NULL) { 22775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(NULL, "HTML document creation failed\n"); 22785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 22795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 22805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memset(cur, 0, sizeof(xmlDoc)); 22815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 22825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->type = XML_HTML_DOCUMENT_NODE; 22835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->version = NULL; 22845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->intSubset = NULL; 22855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->doc = cur; 22865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->name = NULL; 22875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->children = NULL; 22885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->extSubset = NULL; 22895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->oldNs = NULL; 22905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->encoding = NULL; 22915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->standalone = 1; 22925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->compression = 0; 22935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->ids = NULL; 22945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->refs = NULL; 22955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->_private = NULL; 22965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->charset = XML_CHAR_ENCODING_UTF8; 22975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur->properties = XML_DOC_HTML | XML_DOC_USERBUILT; 22985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ExternalID != NULL) || 22995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (URI != NULL)) 23005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI); 23015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(cur); 23025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 23035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 23055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlNewDoc: 23065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URI: URI for the dtd, or NULL 23075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ExternalID: the external ID of the DTD, or NULL 23085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 23095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Creates a new HTML document 23105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 23115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns a new document 23125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 23135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 23145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) { 23155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((URI == NULL) && (ExternalID == NULL)) 23165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(htmlNewDocNoDtD( 23175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd", 23185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN")); 23195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(htmlNewDocNoDtD(URI, ExternalID)); 23215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 23225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************ 23255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 23265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The parser itself * 23275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Relates to http://www.w3.org/TR/html40 * 23285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 23295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/ 23305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************ 23325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 23335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The parser itself * 23345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 23355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/ 23365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt); 23385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 23405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseHTMLName: 23415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 23425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 23435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML tag or attribute name, note that we convert it to lowercase 23445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * since HTML names are not case-sensitive. 23455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 23465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the Tag Name parsed or NULL 23475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 23485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const xmlChar * 23505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseHTMLName(htmlParserCtxtPtr ctxt) { 23515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int i = 0; 23525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar loc[HTML_PARSER_BUFFER_SIZE]; 23535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!IS_ASCII_LETTER(CUR) && (CUR != '_') && 23555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (CUR != ':') && (CUR != '.')) return(NULL); 23565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((i < HTML_PARSER_BUFFER_SIZE) && 23585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) || 23595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (CUR == ':') || (CUR == '-') || (CUR == '_') || 23605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (CUR == '.'))) { 23615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20; 23625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else loc[i] = CUR; 23635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) i++; 23645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 23665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 23675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(xmlDictLookup(ctxt->dict, loc, i)); 23695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 23705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 23735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseHTMLName_nonInvasive: 23745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 23755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 23765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML tag or attribute name, note that we convert it to lowercase 23775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * since HTML names are not case-sensitive, this doesn't consume the data 23785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * from the stream, it's a look-ahead 23795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 23805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the Tag Name parsed or NULL 23815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 23825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const xmlChar * 23845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) { 23855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int i = 0; 23865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar loc[HTML_PARSER_BUFFER_SIZE]; 23875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!IS_ASCII_LETTER(NXT(1)) && (NXT(1) != '_') && 23895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (NXT(1) != ':')) return(NULL); 23905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((i < HTML_PARSER_BUFFER_SIZE) && 23925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((IS_ASCII_LETTER(NXT(1+i))) || (IS_ASCII_DIGIT(NXT(1+i))) || 23935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (NXT(1+i) == ':') || (NXT(1+i) == '-') || (NXT(1+i) == '_'))) { 23945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((NXT(1+i) >= 'A') && (NXT(1+i) <= 'Z')) loc[i] = NXT(1+i) + 0x20; 23955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else loc[i] = NXT(1+i); 23965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) i++; 23975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 23985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 23995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(xmlDictLookup(ctxt->dict, loc, i)); 24005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 24015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 24025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 24035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 24045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseName: 24055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 24065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 24075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML name, this routine is case sensitive. 24085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 24095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the Name parsed or NULL 24105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 24115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 24125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const xmlChar * 24135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseName(htmlParserCtxtPtr ctxt) { 24145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *in; 24155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *ret; 24165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int count = 0; 24175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 24185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 24195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 24205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 24215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Accelerator for simple ASCII names 24225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 24235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) in = ctxt->input->cur; 24245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (((*in >= 0x61) && (*in <= 0x7A)) || 24255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((*in >= 0x41) && (*in <= 0x5A)) || 24265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (*in == '_') || (*in == ':')) { 24275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) in++; 24285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (((*in >= 0x61) && (*in <= 0x7A)) || 24295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((*in >= 0x41) && (*in <= 0x5A)) || 24305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((*in >= 0x30) && (*in <= 0x39)) || 24315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (*in == '_') || (*in == '-') || 24325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (*in == ':') || (*in == '.')) 24335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) in++; 24345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((*in > 0) && (*in < 0x80)) { 24355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) count = in - ctxt->input->cur; 24365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 24375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->cur = in; 24385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nbChars += count; 24395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->col += count; 24405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(ret); 24415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 24425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 24435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(htmlParseNameComplex(ctxt)); 24445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 24455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 24465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const xmlChar * 24475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseNameComplex(xmlParserCtxtPtr ctxt) { 24485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int len = 0, l; 24495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int c; 24505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int count = 0; 24515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 24525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 24535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Handler for more complex cases 24545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 24555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 24565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = CUR_CHAR(l); 24575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 24585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (!IS_LETTER(c) && (c != '_') && 24595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (c != ':'))) { 24605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 24615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 24625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 24635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 24645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((IS_LETTER(c)) || (IS_DIGIT(c)) || 24655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (c == '.') || (c == '-') || 24665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (c == '_') || (c == ':') || 24675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (IS_COMBINING(c)) || 24685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (IS_EXTENDER(c)))) { 24695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (count++ > 100) { 24705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) count = 0; 24715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 24725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 24735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) len += l; 24745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXTL(l); 24755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = CUR_CHAR(l); 24765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 24775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 24785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 24795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 24805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 24815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 24825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseHTMLAttribute: 24835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 24845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @stop: a char stop value 24855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 24865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML attribute value till the stop (quote), if 24875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * stop is 0 then it stops at the first space 24885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 24895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the attribute parsed or NULL 24905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 24915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 24925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static xmlChar * 24935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) { 24945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *buffer = NULL; 24955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int buffer_size = 0; 24965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *out = NULL; 24975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *name = NULL; 24985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *cur = NULL; 24995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const htmlEntityDesc * ent; 25005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 25015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 25025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * allocate a translation buffer. 25035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 25045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buffer_size = HTML_PARSER_BUFFER_SIZE; 25055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); 25065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (buffer == NULL) { 25075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(ctxt, "buffer allocation failed\n"); 25085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 25095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 25105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out = buffer; 25115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 25125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 25135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Ok loop until we reach one of the ending chars 25145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 25155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((CUR != 0) && (CUR != stop)) { 25165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((stop == 0) && (CUR == '>')) break; 25175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((stop == 0) && (IS_BLANK_CH(CUR))) break; 25185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == '&') { 25195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (NXT(1) == '#') { 25205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned int c; 25215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int bits; 25225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 25235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = htmlParseCharRef(ctxt); 25245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (c < 0x80) 25255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { *out++ = c; bits= -6; } 25265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (c < 0x800) 25275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } 25285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (c < 0x10000) 25295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } 25305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 25315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } 25325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 25335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for ( ; bits >= 0; bits-= 6) { 25345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *out++ = ((c >> bits) & 0x3F) | 0x80; 25355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 25365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 25375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (out - buffer > buffer_size - 100) { 25385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int indx = out - buffer; 25395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 25405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) growBuffer(buffer); 25415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out = &buffer[indx]; 25425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 25435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 25445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ent = htmlParseEntityRef(ctxt, &name); 25455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (name == NULL) { 25465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *out++ = '&'; 25475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (out - buffer > buffer_size - 100) { 25485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int indx = out - buffer; 25495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 25505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) growBuffer(buffer); 25515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out = &buffer[indx]; 25525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 25535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (ent == NULL) { 25545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *out++ = '&'; 25555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = name; 25565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (*cur != 0) { 25575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (out - buffer > buffer_size - 100) { 25585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int indx = out - buffer; 25595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 25605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) growBuffer(buffer); 25615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out = &buffer[indx]; 25625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 25635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *out++ = *cur++; 25645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 25655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 25665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned int c; 25675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int bits; 25685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 25695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (out - buffer > buffer_size - 100) { 25705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int indx = out - buffer; 25715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 25725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) growBuffer(buffer); 25735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out = &buffer[indx]; 25745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 25755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = ent->value; 25765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (c < 0x80) 25775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { *out++ = c; bits= -6; } 25785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (c < 0x800) 25795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } 25805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (c < 0x10000) 25815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } 25825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 25835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } 25845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 25855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for ( ; bits >= 0; bits-= 6) { 25865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *out++ = ((c >> bits) & 0x3F) | 0x80; 25875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 25885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 25895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 25905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 25915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned int c; 25925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int bits, l; 25935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 25945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (out - buffer > buffer_size - 100) { 25955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int indx = out - buffer; 25965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 25975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) growBuffer(buffer); 25985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out = &buffer[indx]; 25995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 26005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = CUR_CHAR(l); 26015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (c < 0x80) 26025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { *out++ = c; bits= -6; } 26035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (c < 0x800) 26045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } 26055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (c < 0x10000) 26065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } 26075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 26085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } 26095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 26105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for ( ; bits >= 0; bits-= 6) { 26115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *out++ = ((c >> bits) & 0x3F) | 0x80; 26125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 26135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 26145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 26155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 26165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *out = 0; 26175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(buffer); 26185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 26195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 26205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 26215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseEntityRef: 26225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 26235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @str: location to store the entity name 26245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 26255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML ENTITY references 26265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 26275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [68] EntityRef ::= '&' Name ';' 26285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 26295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the associated htmlEntityDescPtr if found, or NULL otherwise, 26305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * if non-NULL *str will have to be freed by the caller. 26315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 26325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const htmlEntityDesc * 26335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) { 26345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *name; 26355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const htmlEntityDesc * ent = NULL; 26365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 26375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (str != NULL) *str = NULL; 26385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 26395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 26405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == '&') { 26415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 26425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) name = htmlParseName(ctxt); 26435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (name == NULL) { 26445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, 26455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseEntityRef: no name\n", NULL, NULL); 26465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 26475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 26485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == ';') { 26495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (str != NULL) 26505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *str = name; 26515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 26525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 26535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Lookup the entity in the table. 26545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 26555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ent = htmlEntityLookup(name); 26565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ent != NULL) /* OK that's ugly !!! */ 26575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 26585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 26595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, 26605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseEntityRef: expecting ';'\n", 26615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 26625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (str != NULL) 26635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *str = name; 26645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 26655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 26665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 26675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(ent); 26685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 26695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 26705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 26715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseAttValue: 26725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 26735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 26745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse a value for an attribute 26755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Note: the parser won't do substitution of entities here, this 26765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * will be handled later in xmlStringGetNodeList, unless it was 26775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * asked for ctxt->replaceEntities != 0 26785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 26795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the AttValue parsed or NULL. 26805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 26815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 26825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static xmlChar * 26835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseAttValue(htmlParserCtxtPtr ctxt) { 26845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *ret = NULL; 26855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 26865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == '"') { 26875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 26885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret = htmlParseHTMLAttribute(ctxt, '"'); 26895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR != '"') { 26905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 26915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "AttValue: \" expected\n", NULL, NULL); 26925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else 26935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 26945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (CUR == '\'') { 26955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 26965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret = htmlParseHTMLAttribute(ctxt, '\''); 26975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR != '\'') { 26985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 26995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "AttValue: ' expected\n", NULL, NULL); 27005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else 27015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 27025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 27035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 27045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * That's an HTMLism, the attribute value may not be quoted 27055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 27065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret = htmlParseHTMLAttribute(ctxt, 0); 27075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ret == NULL) { 27085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 27095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "AttValue: no value found\n", NULL, NULL); 27105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 27115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 27125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(ret); 27135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 27145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 27155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 27165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseSystemLiteral: 27175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 27185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 27195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML Literal 27205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 27215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 27225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 27235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the SystemLiteral parsed or NULL 27245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 27255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 27265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static xmlChar * 27275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { 27285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *q; 27295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *ret = NULL; 27305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 27315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == '"') { 27325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 27335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) q = CUR_PTR; 27345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((IS_CHAR_CH(CUR)) && (CUR != '"')) 27355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 27365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!IS_CHAR_CH(CUR)) { 27375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, 27385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Unfinished SystemLiteral\n", NULL, NULL); 27395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 27405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret = xmlStrndup(q, CUR_PTR - q); 27415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 27425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 27435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (CUR == '\'') { 27445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 27455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) q = CUR_PTR; 27465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) 27475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 27485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!IS_CHAR_CH(CUR)) { 27495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, 27505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Unfinished SystemLiteral\n", NULL, NULL); 27515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 27525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret = xmlStrndup(q, CUR_PTR - q); 27535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 27545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 27555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 27565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, 27575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) " or ' expected\n", NULL, NULL); 27585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 27595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 27605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(ret); 27615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 27625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 27635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 27645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParsePubidLiteral: 27655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 27665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 27675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML public literal 27685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 27695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 27705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 27715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the PubidLiteral parsed or NULL. 27725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 27735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 27745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static xmlChar * 27755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) { 27765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *q; 27775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *ret = NULL; 27785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 27795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Name ::= (Letter | '_') (NameChar)* 27805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 27815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == '"') { 27825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 27835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) q = CUR_PTR; 27845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (IS_PUBIDCHAR_CH(CUR)) NEXT; 27855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR != '"') { 27865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, 27875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Unfinished PubidLiteral\n", NULL, NULL); 27885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 27895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret = xmlStrndup(q, CUR_PTR - q); 27905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 27915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 27925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (CUR == '\'') { 27935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 27945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) q = CUR_PTR; 27955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')) 27965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 27975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR != '\'') { 27985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, 27995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Unfinished PubidLiteral\n", NULL, NULL); 28005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 28015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret = xmlStrndup(q, CUR_PTR - q); 28025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 28035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 28045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 28055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, 28065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "PubidLiteral \" or ' expected\n", NULL, NULL); 28075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 28085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 28095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(ret); 28105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 28115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 28125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 28135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseScript: 28145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 28155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 28165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse the content of an HTML SCRIPT or STYLE element 28175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * http://www.w3.org/TR/html4/sgml/dtd.html#Script 28185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet 28195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * http://www.w3.org/TR/html4/types.html#type-script 28205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * http://www.w3.org/TR/html4/types.html#h-6.15 28215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.2.1 28225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 28235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Script data ( %Script; in the DTD) can be the content of the SCRIPT 28245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * element and the value of intrinsic event attributes. User agents must 28255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * not evaluate script data as HTML markup but instead must pass it on as 28265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * data to a script engine. 28275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * NOTES: 28285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * - The content is passed like CDATA 28295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * - the attributes for style and scripting "onXXX" are also described 28305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * as CDATA but SGML allows entities references in attributes so their 28315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * processing is identical as other attributes 28325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 28335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 28345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseScript(htmlParserCtxtPtr ctxt) { 28355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5]; 28365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nbchar = 0; 28375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int cur,l; 28385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 28395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SHRINK; 28405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = CUR_CHAR(l); 28415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (IS_CHAR_CH(cur)) { 28425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((cur == '<') && (NXT(1) == '/')) { 28435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 28445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * One should break here, the specification is clear: 28455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Authors should therefore escape "</" within the content. 28465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Escape mechanisms are specific to each scripting or 28475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * style sheet language. 28485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 28495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * In recovery mode, only break if end tag match the 28505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * current tag, effectively ignoring all tags inside the 28515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * script/style block and treating the entire block as 28525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * CDATA. 28535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 28545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->recovery) { 28555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2, 28565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlStrlen(ctxt->name)) == 0) 28575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 28585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; /* while */ 28595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 28605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 28615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Element %s embeds close tag\n", 28625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->name, NULL); 28635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 28645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 28655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (((NXT(2) >= 'A') && (NXT(2) <= 'Z')) || 28665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((NXT(2) >= 'a') && (NXT(2) <= 'z'))) 28675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 28685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; /* while */ 28695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 28705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 28715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 28725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) COPY_BUF(l,buf,nbchar,cur); 28735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { 28745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->sax->cdataBlock!= NULL) { 28755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 28765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Insert as CDATA, which is the same as HTML_PRESERVE_NODE 28775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 28785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); 28795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (ctxt->sax->characters != NULL) { 28805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->characters(ctxt->userData, buf, nbchar); 28815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 28825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nbchar = 0; 28835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 28845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 28855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXTL(l); 28865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = CUR_CHAR(l); 28875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 28885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 28895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) { 28905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, 28915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Invalid char in CDATA 0x%X\n", cur); 28925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 28935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 28945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 28955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) { 28965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->sax->cdataBlock!= NULL) { 28975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 28985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Insert as CDATA, which is the same as HTML_PRESERVE_NODE 28995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 29005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); 29015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (ctxt->sax->characters != NULL) { 29025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->characters(ctxt->userData, buf, nbchar); 29035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 29045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 29055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 29065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 29075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 29085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 29095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseCharData: 29105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 29115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 29125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse a CharData section. 29135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * if we are within a CDATA section ']]>' marks an end of section. 29145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 29155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 29165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 29175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 29185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 29195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseCharData(htmlParserCtxtPtr ctxt) { 29205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5]; 29215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nbchar = 0; 29225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int cur, l; 29235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int chunk = 0; 29245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 29255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SHRINK; 29265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = CUR_CHAR(l); 29275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (((cur != '<') || (ctxt->token == '<')) && 29285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((cur != '&') || (ctxt->token == '&')) && 29295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (cur != 0)) { 29305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!(IS_CHAR(cur))) { 29315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, 29325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Invalid char in CDATA 0x%X\n", cur); 29335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 29345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) COPY_BUF(l,buf,nbchar,cur); 29355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 29365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { 29375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 29385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Ok the segment is to be consumed as chars. 29395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 29405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 29415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (areBlanks(ctxt, buf, nbchar)) { 29425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->sax->ignorableWhitespace != NULL) 29435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->ignorableWhitespace(ctxt->userData, 29445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf, nbchar); 29455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 29465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCheckParagraph(ctxt); 29475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->sax->characters != NULL) 29485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->characters(ctxt->userData, buf, nbchar); 29495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 29505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 29515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nbchar = 0; 29525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 29535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXTL(l); 29545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) chunk++; 29555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (chunk > HTML_PARSER_BUFFER_SIZE) { 29565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) chunk = 0; 29575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SHRINK; 29585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 29595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 29605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = CUR_CHAR(l); 29615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur == 0) { 29625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SHRINK; 29635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 29645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = CUR_CHAR(l); 29655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 29665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 29675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nbchar != 0) { 29685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf[nbchar] = 0; 29695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 29705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 29715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Ok the segment is to be consumed as chars. 29725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 29735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 29745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (areBlanks(ctxt, buf, nbchar)) { 29755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->sax->ignorableWhitespace != NULL) 29765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 29775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 29785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCheckParagraph(ctxt); 29795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->sax->characters != NULL) 29805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->characters(ctxt->userData, buf, nbchar); 29815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 29825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 29835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 29845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 29855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Loop detection 29865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 29875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur == 0) 29885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_EOF; 29895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 29905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 29915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 29925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 29935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseExternalID: 29945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 29955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @publicID: a xmlChar** receiving PubidLiteral 29965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 29975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse an External ID or a Public ID 29985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 29995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 30005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * | 'PUBLIC' S PubidLiteral S SystemLiteral 30015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 30025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [83] PublicID ::= 'PUBLIC' S PubidLiteral 30035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 30045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the function returns SystemLiteral and in the second 30055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * case publicID receives PubidLiteral, is strict is off 30065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * it is possible to return NULL and have publicID set. 30075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 30085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 30095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static xmlChar * 30105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) { 30115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *URI = NULL; 30125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 30135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((UPPER == 'S') && (UPP(1) == 'Y') && 30145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(2) == 'S') && (UPP(3) == 'T') && 30155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(4) == 'E') && (UPP(5) == 'M')) { 30165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP(6); 30175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!IS_BLANK_CH(CUR)) { 30185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED, 30195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Space required after 'SYSTEM'\n", NULL, NULL); 30205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 30215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 30225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) URI = htmlParseSystemLiteral(ctxt); 30235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (URI == NULL) { 30245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_URI_REQUIRED, 30255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseExternalID: SYSTEM, no URI\n", NULL, NULL); 30265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 30275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((UPPER == 'P') && (UPP(1) == 'U') && 30285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(2) == 'B') && (UPP(3) == 'L') && 30295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(4) == 'I') && (UPP(5) == 'C')) { 30305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP(6); 30315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!IS_BLANK_CH(CUR)) { 30325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED, 30335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Space required after 'PUBLIC'\n", NULL, NULL); 30345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 30355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 30365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *publicID = htmlParsePubidLiteral(ctxt); 30375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (*publicID == NULL) { 30385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_PUBID_REQUIRED, 30395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseExternalID: PUBLIC, no Public Identifier\n", 30405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 30415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 30425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 30435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR == '"') || (CUR == '\'')) { 30445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) URI = htmlParseSystemLiteral(ctxt); 30455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 30465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 30475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(URI); 30485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 30495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 30505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 30515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * xmlParsePI: 30525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an XML parser context 30535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 30545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML Processing Instruction. 30555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 30565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 30575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 30585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 30595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParsePI(htmlParserCtxtPtr ctxt) { 30605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *buf = NULL; 30615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int len = 0; 30625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int size = HTML_PARSER_BUFFER_SIZE; 30635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int cur, l; 30645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *target; 30655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputState state; 30665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int count = 0; 30675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 30685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((RAW == '<') && (NXT(1) == '?')) { 30695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) state = ctxt->instate; 30705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_PI; 30715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 30725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * this is a Processing Instruction. 30735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 30745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP(2); 30755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SHRINK; 30765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 30775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 30785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse the target name and check for special support like 30795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * namespace. 30805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 30815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) target = htmlParseName(ctxt); 30825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (target != NULL) { 30835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (RAW == '>') { 30845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP(1); 30855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 30865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 30875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * SAX: PI detected. 30885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 30895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax) && (!ctxt->disableSAX) && 30905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->sax->processingInstruction != NULL)) 30915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->processingInstruction(ctxt->userData, 30925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) target, NULL); 30935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = state; 30945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 30955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 30965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 30975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (buf == NULL) { 30985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(ctxt, NULL); 30995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = state; 31005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 31015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 31025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = CUR; 31035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!IS_BLANK(cur)) { 31045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED, 31055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "ParsePI: PI %s space expected\n", target, NULL); 31065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 31075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 31085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = CUR_CHAR(l); 31095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (IS_CHAR(cur) && (cur != '>')) { 31105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (len + 5 >= size) { 31115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *tmp; 31125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 31135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size *= 2; 31145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 31155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (tmp == NULL) { 31165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(ctxt, NULL); 31175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(buf); 31185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = state; 31195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 31205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 31215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf = tmp; 31225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 31235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) count++; 31245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (count > 50) { 31255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 31265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) count = 0; 31275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 31285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) COPY_BUF(l,buf,len,cur); 31295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXTL(l); 31305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = CUR_CHAR(l); 31315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur == 0) { 31325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SHRINK; 31335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 31345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = CUR_CHAR(l); 31355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 31365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 31375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf[len] = 0; 31385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur != '>') { 31395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED, 31405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "ParsePI: PI %s never end ...\n", target, NULL); 31415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 31425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP(1); 31435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 31445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 31455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * SAX: PI detected. 31465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 31475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax) && (!ctxt->disableSAX) && 31485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->sax->processingInstruction != NULL)) 31495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->processingInstruction(ctxt->userData, 31505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) target, buf); 31515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 31525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(buf); 31535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 31545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED, 31555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "PI is not started correctly", NULL, NULL); 31565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 31575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = state; 31585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 31595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 31605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 31615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 31625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseComment: 31635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 31645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 31655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse an XML (SGML) comment <!-- .... --> 31665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 31675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 31685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 31695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 31705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseComment(htmlParserCtxtPtr ctxt) { 31715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *buf = NULL; 31725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int len; 31735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int size = HTML_PARSER_BUFFER_SIZE; 31745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int q, ql; 31755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int r, rl; 31765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int cur, l; 31775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputState state; 31785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 31795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 31805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check that there is a comment right here. 31815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 31825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((RAW != '<') || (NXT(1) != '!') || 31835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (NXT(2) != '-') || (NXT(3) != '-')) return; 31845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 31855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) state = ctxt->instate; 31865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_COMMENT; 31875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SHRINK; 31885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP(4); 31895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 31905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (buf == NULL) { 31915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(ctxt, "buffer allocation failed\n"); 31925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = state; 31935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 31945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 31955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) q = CUR_CHAR(ql); 31965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXTL(ql); 31975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) r = CUR_CHAR(rl); 31985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXTL(rl); 31995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = CUR_CHAR(l); 32005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) len = 0; 32015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (IS_CHAR(cur) && 32025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((cur != '>') || 32035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (r != '-') || (q != '-'))) { 32045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (len + 5 >= size) { 32055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *tmp; 32065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 32075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size *= 2; 32085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 32095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (tmp == NULL) { 32105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(buf); 32115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(ctxt, "growing buffer failed\n"); 32125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = state; 32135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 32145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 32155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf = tmp; 32165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 32175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) COPY_BUF(ql,buf,len,q); 32185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) q = r; 32195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ql = rl; 32205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) r = cur; 32215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rl = l; 32225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXTL(l); 32235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = CUR_CHAR(l); 32245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur == 0) { 32255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SHRINK; 32265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 32275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = CUR_CHAR(l); 32285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 32295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 32305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf[len] = 0; 32315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!IS_CHAR(cur)) { 32325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 32335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Comment not terminated \n<!--%.50s\n", buf, NULL); 32345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(buf); 32355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 32365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 32375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 32385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (!ctxt->disableSAX)) 32395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->comment(ctxt->userData, buf); 32405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(buf); 32415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 32425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = state; 32435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 32445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 32455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 32465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseCharRef: 32475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 32485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 32495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse Reference declarations 32505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 32515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [66] CharRef ::= '&#' [0-9]+ ';' | 32525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * '&#x' [0-9a-fA-F]+ ';' 32535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 32545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the value parsed (as an int) 32555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 32565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int 32575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseCharRef(htmlParserCtxtPtr ctxt) { 32585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int val = 0; 32595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 32605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt == NULL) || (ctxt->input == NULL)) { 32615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 32625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseCharRef: context error\n", 32635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 32645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 32655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 32665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR == '&') && (NXT(1) == '#') && 32675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((NXT(2) == 'x') || NXT(2) == 'X')) { 32685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP(3); 32695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (CUR != ';') { 32705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR >= '0') && (CUR <= '9')) 32715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val = val * 16 + (CUR - '0'); 32725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if ((CUR >= 'a') && (CUR <= 'f')) 32735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val = val * 16 + (CUR - 'a') + 10; 32745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if ((CUR >= 'A') && (CUR <= 'F')) 32755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val = val * 16 + (CUR - 'A') + 10; 32765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else { 32775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, 32785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseCharRef: missing semicolumn\n", 32795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 32805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 32815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 32825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 32835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 32845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == ';') 32855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 32865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((CUR == '&') && (NXT(1) == '#')) { 32875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP(2); 32885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (CUR != ';') { 32895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR >= '0') && (CUR <= '9')) 32905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val = val * 10 + (CUR - '0'); 32915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else { 32925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, 32935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseCharRef: missing semicolumn\n", 32945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 32955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 32965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 32975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 32985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 32995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == ';') 33005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 33015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 33025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF, 33035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseCharRef: invalid value\n", NULL, NULL); 33045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 33055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 33065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check the value IS_CHAR ... 33075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 33085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (IS_CHAR(val)) { 33095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(val); 33105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 33115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, 33125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseCharRef: invalid xmlChar value %d\n", 33135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val); 33145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 33155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 33165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 33175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 33185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 33195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 33205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseDocTypeDecl: 33215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 33225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 33235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse a DOCTYPE declaration 33245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 33255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 33265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 33275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 33285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 33295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 33305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) { 33315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *name; 33325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *ExternalID = NULL; 33335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *URI = NULL; 33345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 33355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 33365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * We know that '<!DOCTYPE' has been detected. 33375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 33385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP(9); 33395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 33405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 33415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 33425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 33435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse the DOCTYPE name. 33445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 33455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) name = htmlParseName(ctxt); 33465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (name == NULL) { 33475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, 33485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseDocTypeDecl : no DOCTYPE name !\n", 33495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 33505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 33515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 33525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check that upper(name) == "HTML" !!!!!!!!!!!!! 33535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 33545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 33555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 33565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 33575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 33585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check for SystemID and ExternalID 33595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 33605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) URI = htmlParseExternalID(ctxt, &ExternalID); 33615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 33625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 33635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 33645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * We should be at the end of the DOCTYPE declaration. 33655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 33665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR != '>') { 33675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, 33685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "DOCTYPE improperly terminated\n", NULL, NULL); 33695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* We shouldn't try to resynchronize ... */ 33705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 33715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 33725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 33735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 33745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Create or update the document accordingly to the DOCTYPE 33755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 33765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 33775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (!ctxt->disableSAX)) 33785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 33795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 33805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 33815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Cleanup, since we don't use all those identifiers 33825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 33835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (URI != NULL) xmlFree(URI); 33845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ExternalID != NULL) xmlFree(ExternalID); 33855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 33865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 33875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 33885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseAttribute: 33895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 33905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @value: a xmlChar ** used to store the value of the attribute 33915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 33925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an attribute 33935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 33945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [41] Attribute ::= Name Eq AttValue 33955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 33965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [25] Eq ::= S? '=' S? 33975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 33985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * With namespace: 33995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 34005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [NS 11] Attribute ::= QName Eq AttValue 34015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 34025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Also the case QName == xmlns:??? is handled independently as a namespace 34035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * definition. 34045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 34055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the attribute name, and the value in *value. 34065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 34075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 34085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const xmlChar * 34095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) { 34105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *name; 34115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *val = NULL; 34125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 34135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *value = NULL; 34145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) name = htmlParseHTMLName(ctxt); 34155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (name == NULL) { 34165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, 34175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "error parsing attribute name\n", NULL, NULL); 34185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 34195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 34205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 34215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 34225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * read the value 34235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 34245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 34255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == '=') { 34265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 34275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 34285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val = htmlParseAttValue(ctxt); 34295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 34305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 34315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *value = val; 34325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(name); 34335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 34345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 34355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 34365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCheckEncoding: 34375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 34385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @attvalue: the attribute value 34395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 34405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Checks an http-equiv attribute from a Meta tag to detect 34415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * the encoding 34425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * If a new encoding is detected the parser is switched to decode 34435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * it and pass UTF8 34445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 34455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 34465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { 34475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *encoding; 34485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 34495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt == NULL) || (attvalue == NULL)) 34505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 34515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 34525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* do not change encoding */ 34535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->input->encoding != NULL) 34545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 34555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 34565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) encoding = xmlStrcasestr(attvalue, BAD_CAST"charset="); 34575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (encoding != NULL) { 34585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) encoding += 8; 34595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 34605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) encoding = xmlStrcasestr(attvalue, BAD_CAST"charset ="); 34615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (encoding != NULL) 34625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) encoding += 9; 34635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 34645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (encoding != NULL) { 34655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlCharEncoding enc; 34665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlCharEncodingHandlerPtr handler; 34675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 34685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((*encoding == ' ') || (*encoding == '\t')) encoding++; 34695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 34705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->input->encoding != NULL) 34715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree((xmlChar *) ctxt->input->encoding); 34725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->encoding = xmlStrdup(encoding); 34735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 34745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) enc = xmlParseCharEncoding((const char *) encoding); 34755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 34765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * registered set of known encodings 34775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 34785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (enc != XML_CHAR_ENCODING_ERROR) { 34795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (((enc == XML_CHAR_ENCODING_UTF16LE) || 34805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (enc == XML_CHAR_ENCODING_UTF16BE) || 34815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (enc == XML_CHAR_ENCODING_UCS4LE) || 34825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (enc == XML_CHAR_ENCODING_UCS4BE)) && 34835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->input->buf != NULL) && 34845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->input->buf->encoder == NULL)) { 34855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, 34865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlCheckEncoding: wrong encoding meta\n", 34875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 34885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 34895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlSwitchEncoding(ctxt, enc); 34905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 34915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->charset = XML_CHAR_ENCODING_UTF8; 34925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 34935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 34945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * fallback for unknown encodings 34955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 34965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) handler = xmlFindCharEncodingHandler((const char *) encoding); 34975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (handler != NULL) { 34985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlSwitchToEncoding(ctxt, handler); 34995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->charset = XML_CHAR_ENCODING_UTF8; 35005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 35015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 35025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 35035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 35045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 35055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->input->buf != NULL) && 35065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->input->buf->encoder != NULL) && 35075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->input->buf->raw != NULL) && 35085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->input->buf->buffer != NULL)) { 35095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nbchars; 35105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int processed; 35115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 35125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 35135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * convert as much as possible to the parser reading buffer. 35145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 35155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) processed = ctxt->input->cur - ctxt->input->base; 35165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlBufferShrink(ctxt->input->buf->buffer, processed); 35175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, 35185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->buf->buffer, 35195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->buf->raw); 35205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nbchars < 0) { 35215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, 35225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlCheckEncoding: encoder error\n", 35235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 35245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 35255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->base = 35265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->cur = ctxt->input->buf->buffer->content; 35275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->end = 35285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) &ctxt->input->base[ctxt->input->buf->buffer->use]; 35295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 35305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 35315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 35325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 35335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 35345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCheckMeta: 35355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 35365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @atts: the attributes values 35375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 35385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Checks an attributes from a Meta tag 35395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 35405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 35415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) { 35425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int i; 35435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *att, *value; 35445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int http = 0; 35455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *content = NULL; 35465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 35475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt == NULL) || (atts == NULL)) 35485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 35495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 35505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) i = 0; 35515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) att = atts[i++]; 35525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (att != NULL) { 35535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) value = atts[i++]; 35545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv")) 35555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) 35565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) http = 1; 35575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content"))) 35585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) content = value; 35595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) att = atts[i++]; 35605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 35615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((http) && (content != NULL)) 35625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCheckEncoding(ctxt, content); 35635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 35645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 35655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 35665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 35675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseStartTag: 35685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 35695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 35705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse a start of tag either for rule element or 35715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * EmptyElement. In both case we don't parse the tag closing chars. 35725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 35735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [40] STag ::= '<' Name (S Attribute)* S? '>' 35745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 35755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 35765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 35775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * With namespace: 35785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 35795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 35805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 35815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 35825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 35835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 in case of success, -1 in case of error and 1 if discarded 35845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 35855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 35865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int 35875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseStartTag(htmlParserCtxtPtr ctxt) { 35885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *name; 35895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *attname; 35905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *attvalue; 35915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar **atts; 35925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nbatts = 0; 35935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int maxatts; 35945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int meta = 0; 35955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int i; 35965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int discardtag = 0; 35975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 35985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->instate == XML_PARSER_EOF) 35995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(-1); 36005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt == NULL) || (ctxt->input == NULL)) { 36015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 36025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseStartTag: context error\n", NULL, NULL); 36035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1; 36045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 36055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR != '<') return -1; 36065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 36075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 36085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) atts = ctxt->atts; 36095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) maxatts = ctxt->maxatts; 36105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 36115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 36125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) name = htmlParseHTMLName(ctxt); 36135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (name == NULL) { 36145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, 36155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseStartTag: invalid element name\n", 36165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 36175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* Dump the bogus tag like browsers do */ 36185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((IS_CHAR_CH(CUR)) && (CUR != '>') && 36195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->instate != XML_PARSER_EOF)) 36205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 36215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1; 36225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 36235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(name, BAD_CAST"meta")) 36245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) meta = 1; 36255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 36265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 36275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check for auto-closure of HTML elements. 36285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 36295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlAutoClose(ctxt, name); 36305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 36315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 36325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check for implied HTML elements. 36335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 36345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCheckImplied(ctxt, name); 36355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 36365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 36375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Avoid html at any level > 0, head at any level != 1 36385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * or any attempt to recurse body 36395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 36405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) { 36415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, 36425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseStartTag: misplaced <html> tag\n", 36435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) name, NULL); 36445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) discardtag = 1; 36455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->depth++; 36465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 36475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->nameNr != 1) && 36485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (xmlStrEqual(name, BAD_CAST"head"))) { 36495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, 36505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseStartTag: misplaced <head> tag\n", 36515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) name, NULL); 36525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) discardtag = 1; 36535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->depth++; 36545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 36555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(name, BAD_CAST"body")) { 36565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int indx; 36575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (indx = 0;indx < ctxt->nameNr;indx++) { 36585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) { 36595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, 36605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseStartTag: misplaced <body> tag\n", 36615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) name, NULL); 36625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) discardtag = 1; 36635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->depth++; 36645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 36655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 36665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 36675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 36685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 36695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Now parse the attributes, it ends up with the ending 36705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 36715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * (S Attribute)* S? 36725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 36735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 36745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((IS_CHAR_CH(CUR)) && 36755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (CUR != '>') && 36765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((CUR != '/') || (NXT(1) != '>'))) { 36775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) long cons = ctxt->nbChars; 36785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 36795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 36805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) attname = htmlParseAttribute(ctxt, &attvalue); 36815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (attname != NULL) { 36825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 36835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 36845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Well formedness requires at most one declaration of an attribute 36855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 36865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (i = 0; i < nbatts;i += 2) { 36875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(atts[i], attname)) { 36885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_REDEFINED, 36895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Attribute %s redefined\n", attname, NULL); 36905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (attvalue != NULL) 36915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(attvalue); 36925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto failed; 36935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 36945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 36955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 36965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 36975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Add the pair to atts 36985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 36995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (atts == NULL) { 37005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) maxatts = 22; /* allow for 10 attrs by default */ 37015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) atts = (const xmlChar **) 37025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlMalloc(maxatts * sizeof(xmlChar *)); 37035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (atts == NULL) { 37045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(ctxt, NULL); 37055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (attvalue != NULL) 37065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(attvalue); 37075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto failed; 37085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 37095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->atts = atts; 37105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->maxatts = maxatts; 37115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (nbatts + 4 > maxatts) { 37125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar **n; 37135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 37145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) maxatts *= 2; 37155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) n = (const xmlChar **) xmlRealloc((void *) atts, 37165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) maxatts * sizeof(const xmlChar *)); 37175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (n == NULL) { 37185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(ctxt, NULL); 37195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (attvalue != NULL) 37205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(attvalue); 37215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto failed; 37225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 37235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) atts = n; 37245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->atts = atts; 37255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->maxatts = maxatts; 37265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 37275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) atts[nbatts++] = attname; 37285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) atts[nbatts++] = attvalue; 37295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) atts[nbatts] = NULL; 37305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) atts[nbatts + 1] = NULL; 37315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 37325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else { 37335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (attvalue != NULL) 37345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(attvalue); 37355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* Dump the bogus attribute string up to the next blank or 37365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * the end of the tag. */ 37375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((IS_CHAR_CH(CUR)) && 37385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) !(IS_BLANK_CH(CUR)) && (CUR != '>') && 37395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((CUR != '/') || (NXT(1) != '>'))) 37405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 37415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 37425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 37435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)failed: 37445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 37455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cons == ctxt->nbChars) { 37465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 37475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseStartTag: problem parsing attributes\n", 37485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 37495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 37505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 37515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 37525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 37535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 37545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Handle specific association to the META tag 37555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 37565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (meta && (nbatts != 0)) 37575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCheckMeta(ctxt, atts); 37585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 37595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 37605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * SAX: Start of Element ! 37615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 37625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!discardtag) { 37635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePush(ctxt, name); 37645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) { 37655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nbatts != 0) 37665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->startElement(ctxt->userData, name, atts); 37675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 37685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->startElement(ctxt->userData, name, NULL); 37695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 37705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 37715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 37725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (atts != NULL) { 37735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (i = 1;i < nbatts;i += 2) { 37745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (atts[i] != NULL) 37755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree((xmlChar *) atts[i]); 37765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 37775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 37785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 37795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(discardtag); 37805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 37815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 37825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 37835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseEndTag: 37845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 37855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 37865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an end of tag 37875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 37885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [42] ETag ::= '</' Name S? '>' 37895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 37905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * With namespace 37915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 37925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [NS 9] ETag ::= '</' QName S? '>' 37935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 37945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 1 if the current level should be closed. 37955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 37965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 37975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int 37985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseEndTag(htmlParserCtxtPtr ctxt) 37995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 38005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *name; 38015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *oldname; 38025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int i, ret; 38035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 38045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR != '<') || (NXT(1) != '/')) { 38055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED, 38065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseEndTag: '</' not found\n", NULL, NULL); 38075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (0); 38085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 38095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP(2); 38105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 38115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) name = htmlParseHTMLName(ctxt); 38125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (name == NULL) 38135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (0); 38145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 38155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * We should definitely be at the ending "S? '>'" part 38165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 38175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 38185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!IS_CHAR_CH(CUR)) || (CUR != '>')) { 38195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, 38205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "End tag : expected '>'\n", NULL, NULL); 38215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->recovery) { 38225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 38235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * We're not at the ending > !! 38245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Error, unless in recover mode where we search forwards 38255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * until we find a > 38265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 38275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (CUR != '\0' && CUR != '>') NEXT; 38285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 38295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 38305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else 38315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 38325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 38335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 38345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * if we ignored misplaced tags in htmlParseStartTag don't pop them 38355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * out now. 38365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 38375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->depth > 0) && 38385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (xmlStrEqual(name, BAD_CAST "html") || 38395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlStrEqual(name, BAD_CAST "body") || 38405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlStrEqual(name, BAD_CAST "head"))) { 38415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->depth--; 38425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (0); 38435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 38445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 38455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 38465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * If the name read is not one of the element in the parsing stack 38475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * then return, it's just an error. 38485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 38495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (i = (ctxt->nameNr - 1); i >= 0; i--) { 38505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(name, ctxt->nameTab[i])) 38515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 38525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 38535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (i < 0) { 38545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 38555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Unexpected end tag : %s\n", name, NULL); 38565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (0); 38575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 38585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 38595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 38605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 38615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check for auto-closure of HTML elements. 38625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 38635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 38645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlAutoCloseOnClose(ctxt, name); 38655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 38665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 38675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Well formedness constraints, opening and closing must match. 38685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * With the exception that the autoclose may have popped stuff out 38695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * of the stack. 38705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 38715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!xmlStrEqual(name, ctxt->name)) { 38725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) { 38735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 38745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Opening and ending tag mismatch: %s and %s\n", 38755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) name, ctxt->name); 38765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 38775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 38785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 38795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 38805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * SAX: End of Tag 38815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 38825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) oldname = ctxt->name; 38835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((oldname != NULL) && (xmlStrEqual(oldname, name))) { 38845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 38855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endElement(ctxt->userData, name); 38865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePop(ctxt); 38875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret = 1; 38885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 38895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret = 0; 38905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 38915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 38925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (ret); 38935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 38945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 38955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 38965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 38975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseReference: 38985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 38995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 39005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse and handle entity references in content, 39015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * this will end-up in a call to character() since this is either a 39025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * CharRef, or a predefined entity. 39035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 39045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 39055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseReference(htmlParserCtxtPtr ctxt) { 39065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const htmlEntityDesc * ent; 39075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar out[6]; 39085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *name; 39095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR != '&') return; 39105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 39115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (NXT(1) == '#') { 39125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned int c; 39135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int bits, i = 0; 39145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 39155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = htmlParseCharRef(ctxt); 39165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (c == 0) 39175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 39185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 39195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (c < 0x80) { out[i++]= c; bits= -6; } 39205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } 39215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } 39225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } 39235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 39245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for ( ; bits >= 0; bits-= 6) { 39255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out[i++]= ((c >> bits) & 0x3F) | 0x80; 39265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 39275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out[i] = 0; 39285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 39295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCheckParagraph(ctxt); 39305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) 39315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->characters(ctxt->userData, out, i); 39325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 39335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ent = htmlParseEntityRef(ctxt, &name); 39345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (name == NULL) { 39355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCheckParagraph(ctxt); 39365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) 39375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1); 39385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 39395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 39405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ent == NULL) || !(ent->value > 0)) { 39415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCheckParagraph(ctxt); 39425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) { 39435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1); 39445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name)); 39455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */ 39465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 39475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 39485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned int c; 39495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int bits, i = 0; 39505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 39515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = ent->value; 39525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (c < 0x80) 39535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { out[i++]= c; bits= -6; } 39545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (c < 0x800) 39555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } 39565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (c < 0x10000) 39575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } 39585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 39595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } 39605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 39615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for ( ; bits >= 0; bits-= 6) { 39625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out[i++]= ((c >> bits) & 0x3F) | 0x80; 39635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 39645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) out[i] = 0; 39655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 39665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCheckParagraph(ctxt); 39675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) 39685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->characters(ctxt->userData, out, i); 39695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 39705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 39715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 39725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 39735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 39745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseContent: 39755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 39765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 39775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse a content: comment, sub-element, reference or text. 39785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Kept for compatibility with old code 39795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 39805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 39815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 39825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseContent(htmlParserCtxtPtr ctxt) { 39835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *currentNode; 39845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int depth; 39855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *name; 39865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 39875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) currentNode = xmlStrdup(ctxt->name); 39885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) depth = ctxt->nameNr; 39895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (1) { 39905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) long cons = ctxt->nbChars; 39915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 39925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 39935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 39945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->instate == XML_PARSER_EOF) 39955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 39965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 39975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 39985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Our tag or one of it's parent or children is ending. 39995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 40005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR == '<') && (NXT(1) == '/')) { 40015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (htmlParseEndTag(ctxt) && 40025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((currentNode != NULL) || (ctxt->nameNr == 0))) { 40035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (currentNode != NULL) 40045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(currentNode); 40055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 40065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 40075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; /* while */ 40085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 40095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 40105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if ((CUR == '<') && 40115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((IS_ASCII_LETTER(NXT(1))) || 40125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (NXT(1) == '_') || (NXT(1) == ':'))) { 40135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) name = htmlParseHTMLName_nonInvasive(ctxt); 40145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (name == NULL) { 40155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, 40165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseStartTag: invalid element name\n", 40175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 40185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* Dump the bogus tag like browsers do */ 40195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((IS_CHAR_CH(CUR)) && (CUR != '>')) 40205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 40215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 40225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (currentNode != NULL) 40235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(currentNode); 40245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 40255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 40265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 40275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->name != NULL) { 40285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (htmlCheckAutoClose(name, ctxt->name) == 1) { 40295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlAutoClose(ctxt, name); 40305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 40315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 40325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 40335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 40345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 40355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 40365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Has this node been popped out during parsing of 40375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * the next element 40385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 40395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) && 40405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (!xmlStrEqual(currentNode, ctxt->name))) 40415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 40425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (currentNode != NULL) xmlFree(currentNode); 40435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 40445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 40455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 40465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR != 0) && ((xmlStrEqual(currentNode, BAD_CAST"script")) || 40475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (xmlStrEqual(currentNode, BAD_CAST"style")))) { 40485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 40495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Handle SCRIPT/STYLE separately 40505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 40515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseScript(ctxt); 40525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 40535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 40545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Sometimes DOCTYPE arrives in the middle of the document 40555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 40565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR == '<') && (NXT(1) == '!') && 40575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(2) == 'D') && (UPP(3) == 'O') && 40585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(4) == 'C') && (UPP(5) == 'T') && 40595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(6) == 'Y') && (UPP(7) == 'P') && 40605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(8) == 'E')) { 40615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, 40625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Misplaced DOCTYPE declaration\n", 40635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) BAD_CAST "DOCTYPE" , NULL); 40645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseDocTypeDecl(ctxt); 40655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 40665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 40675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 40685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * First case : a comment 40695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 40705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR == '<') && (NXT(1) == '!') && 40715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (NXT(2) == '-') && (NXT(3) == '-')) { 40725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseComment(ctxt); 40735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 40745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 40755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 40765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Second case : a Processing Instruction. 40775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 40785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if ((CUR == '<') && (NXT(1) == '?')) { 40795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParsePI(ctxt); 40805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 40815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 40825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 40835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Third case : a sub-element. 40845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 40855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (CUR == '<') { 40865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseElement(ctxt); 40875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 40885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 40895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 40905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Fourth case : a reference. If if has not been resolved, 40915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parsing returns it's Name, create the node 40925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 40935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (CUR == '&') { 40945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseReference(ctxt); 40955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 40965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 40975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 40985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Fifth case : end of the resource 40995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 41005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (CUR == 0) { 41015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlAutoCloseOnEnd(ctxt); 41025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 41035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 41045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 41055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 41065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Last case, text. Note that References are handled directly. 41075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 41085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else { 41095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseCharData(ctxt); 41105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 41115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 41125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cons == ctxt->nbChars) { 41135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->node != NULL) { 41145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 41155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "detected an error in element content\n", 41165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 41175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 41185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 41195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 41205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 41215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 41225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 41235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (currentNode != NULL) xmlFree(currentNode); 41245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 41255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 41265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 41275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseElement: 41285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 41295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 41305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML element, this is highly recursive 41315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * this is kept for compatibility with previous code versions 41325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 41335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [39] element ::= EmptyElemTag | STag content ETag 41345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 41355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [41] Attribute ::= Name Eq AttValue 41365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 41375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 41385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void 41395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseElement(htmlParserCtxtPtr ctxt) { 41405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *name; 41415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *currentNode = NULL; 41425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const htmlElemDesc * info; 41435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserNodeInfo node_info; 41445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int failed; 41455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int depth; 41465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *oldptr; 41475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 41485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt == NULL) || (ctxt->input == NULL)) { 41495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 41505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseElement: context error\n", NULL, NULL); 41515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 41525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 41535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 41545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->instate == XML_PARSER_EOF) 41555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 41565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 41575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* Capture start position */ 41585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->record_info) { 41595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) node_info.begin_pos = ctxt->input->consumed + 41605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (CUR_PTR - ctxt->input->base); 41615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) node_info.begin_line = ctxt->input->line; 41625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 41635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 41645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) failed = htmlParseStartTag(ctxt); 41655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) name = ctxt->name; 41665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((failed == -1) || (name == NULL)) { 41675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == '>') 41685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 41695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 41705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 41715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 41725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 41735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Lookup the info for that element. 41745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 41755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) info = htmlTagLookup(name); 41765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (info == NULL) { 41775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG, 41785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Tag %s invalid\n", name, NULL); 41795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 41805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 41815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 41825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check for an Empty Element labeled the XML/SGML way 41835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 41845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR == '/') && (NXT(1) == '>')) { 41855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP(2); 41865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 41875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endElement(ctxt->userData, name); 41885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePop(ctxt); 41895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 41905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 41915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 41925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == '>') { 41935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 41945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 41955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, 41965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Couldn't find end of Start Tag %s\n", name, NULL); 41975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 41985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 41995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * end of parsing of this node. 42005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 42015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(name, ctxt->name)) { 42025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nodePop(ctxt); 42035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePop(ctxt); 42045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 42055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 42065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 42075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Capture end position and add node 42085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 42095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->record_info) { 42105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) node_info.end_pos = ctxt->input->consumed + 42115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (CUR_PTR - ctxt->input->base); 42125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) node_info.end_line = ctxt->input->line; 42135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) node_info.node = ctxt->node; 42145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserAddNodeInfo(ctxt, &node_info); 42155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 42165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 42175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 42185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 42195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 42205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check for an Empty Element from DTD definition 42215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 42225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((info != NULL) && (info->empty)) { 42235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 42245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endElement(ctxt->userData, name); 42255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePop(ctxt); 42265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 42275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 42285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 42295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 42305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse the content of the element: 42315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 42325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) currentNode = xmlStrdup(ctxt->name); 42335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) depth = ctxt->nameNr; 42345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (IS_CHAR_CH(CUR)) { 42355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) oldptr = ctxt->input->cur; 42365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseContent(ctxt); 42375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (oldptr==ctxt->input->cur) break; 42385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nameNr < depth) break; 42395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 42405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 42415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 42425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Capture end position and add node 42435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 42445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( currentNode != NULL && ctxt->record_info ) { 42455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) node_info.end_pos = ctxt->input->consumed + 42465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (CUR_PTR - ctxt->input->base); 42475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) node_info.end_line = ctxt->input->line; 42485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) node_info.node = ctxt->node; 42495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserAddNodeInfo(ctxt, &node_info); 42505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 42515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!IS_CHAR_CH(CUR)) { 42525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlAutoCloseOnEnd(ctxt); 42535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 42545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 42555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (currentNode != NULL) 42565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(currentNode); 42575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 42585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 42595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 42605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) { 42615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 42625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Capture end position and add node 42635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 42645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( ctxt->node != NULL && ctxt->record_info ) { 42655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeInfo->end_pos = ctxt->input->consumed + 42665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (CUR_PTR - ctxt->input->base); 42675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeInfo->end_line = ctxt->input->line; 42685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeInfo->node = ctxt->node; 42695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo); 42705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlNodeInfoPop(ctxt); 42715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 42725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!IS_CHAR_CH(CUR)) { 42735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlAutoCloseOnEnd(ctxt); 42745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 42755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 42765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 42775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 42785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseElementInternal: 42795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 42805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 42815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML element, new version, non recursive 42825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 42835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [39] element ::= EmptyElemTag | STag content ETag 42845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 42855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [41] Attribute ::= Name Eq AttValue 42865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 42875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 42885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 42895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseElementInternal(htmlParserCtxtPtr ctxt) { 42905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *name; 42915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const htmlElemDesc * info; 42925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserNodeInfo node_info; 42935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int failed; 42945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 42955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt == NULL) || (ctxt->input == NULL)) { 42965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 42975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseElementInternal: context error\n", NULL, NULL); 42985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 42995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 43005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->instate == XML_PARSER_EOF) 43025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 43035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* Capture start position */ 43055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->record_info) { 43065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) node_info.begin_pos = ctxt->input->consumed + 43075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (CUR_PTR - ctxt->input->base); 43085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) node_info.begin_line = ctxt->input->line; 43095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 43105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) failed = htmlParseStartTag(ctxt); 43125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) name = ctxt->name; 43135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((failed == -1) || (name == NULL)) { 43145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == '>') 43155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 43165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 43175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 43185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 43205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Lookup the info for that element. 43215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 43225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) info = htmlTagLookup(name); 43235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (info == NULL) { 43245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG, 43255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Tag %s invalid\n", name, NULL); 43265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 43275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 43295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check for an Empty Element labeled the XML/SGML way 43305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 43315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR == '/') && (NXT(1) == '>')) { 43325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP(2); 43335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 43345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endElement(ctxt->userData, name); 43355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePop(ctxt); 43365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 43375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 43385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == '>') { 43405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 43415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 43425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, 43435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Couldn't find end of Start Tag %s\n", name, NULL); 43445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 43465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * end of parsing of this node. 43475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 43485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(name, ctxt->name)) { 43495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nodePop(ctxt); 43505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePop(ctxt); 43515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 43525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->record_info) 43545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlNodeInfoPush(ctxt, &node_info); 43555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserFinishElementParsing(ctxt); 43565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 43575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 43585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 43605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check for an Empty Element from DTD definition 43615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 43625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((info != NULL) && (info->empty)) { 43635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 43645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endElement(ctxt->userData, name); 43655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePop(ctxt); 43665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 43675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 43685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->record_info) 43705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlNodeInfoPush(ctxt, &node_info); 43715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 43725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 43745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseContentInternal: 43755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 43765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 43775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse a content: comment, sub-element, reference or text. 43785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * New version for non recursive htmlParseElementInternal 43795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 43805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void 43825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseContentInternal(htmlParserCtxtPtr ctxt) { 43835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *currentNode; 43845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int depth; 43855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *name; 43865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) currentNode = xmlStrdup(ctxt->name); 43885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) depth = ctxt->nameNr; 43895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (1) { 43905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) long cons = ctxt->nbChars; 43915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 43935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->instate == XML_PARSER_EOF) 43955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 43965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 43975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 43985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Our tag or one of it's parent or children is ending. 43995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 44005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR == '<') && (NXT(1) == '/')) { 44015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (htmlParseEndTag(ctxt) && 44025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((currentNode != NULL) || (ctxt->nameNr == 0))) { 44035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (currentNode != NULL) 44045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(currentNode); 44055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 44065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) currentNode = xmlStrdup(ctxt->name); 44075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) depth = ctxt->nameNr; 44085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 44095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; /* while */ 44105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 44115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 44125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if ((CUR == '<') && 44135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((IS_ASCII_LETTER(NXT(1))) || 44145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (NXT(1) == '_') || (NXT(1) == ':'))) { 44155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) name = htmlParseHTMLName_nonInvasive(ctxt); 44165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (name == NULL) { 44175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, 44185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseStartTag: invalid element name\n", 44195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 44205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* Dump the bogus tag like browsers do */ 44215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((IS_CHAR_CH(CUR)) && (CUR != '>')) 44225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 44235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 44245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserFinishElementParsing(ctxt); 44255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (currentNode != NULL) 44265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(currentNode); 44275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 44285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) currentNode = xmlStrdup(ctxt->name); 44295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) depth = ctxt->nameNr; 44305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 44315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 44325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 44335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->name != NULL) { 44345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (htmlCheckAutoClose(name, ctxt->name) == 1) { 44355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlAutoClose(ctxt, name); 44365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 44375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 44385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 44395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 44405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 44415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 44425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Has this node been popped out during parsing of 44435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * the next element 44445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 44455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) && 44465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (!xmlStrEqual(currentNode, ctxt->name))) 44475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 44485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserFinishElementParsing(ctxt); 44495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (currentNode != NULL) xmlFree(currentNode); 44505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 44515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) currentNode = xmlStrdup(ctxt->name); 44525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) depth = ctxt->nameNr; 44535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 44545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 44555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 44565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR != 0) && ((xmlStrEqual(currentNode, BAD_CAST"script")) || 44575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (xmlStrEqual(currentNode, BAD_CAST"style")))) { 44585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 44595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Handle SCRIPT/STYLE separately 44605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 44615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseScript(ctxt); 44625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 44635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 44645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Sometimes DOCTYPE arrives in the middle of the document 44655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 44665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR == '<') && (NXT(1) == '!') && 44675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(2) == 'D') && (UPP(3) == 'O') && 44685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(4) == 'C') && (UPP(5) == 'T') && 44695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(6) == 'Y') && (UPP(7) == 'P') && 44705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(8) == 'E')) { 44715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, 44725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Misplaced DOCTYPE declaration\n", 44735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) BAD_CAST "DOCTYPE" , NULL); 44745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseDocTypeDecl(ctxt); 44755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 44765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 44775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 44785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * First case : a comment 44795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 44805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR == '<') && (NXT(1) == '!') && 44815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (NXT(2) == '-') && (NXT(3) == '-')) { 44825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseComment(ctxt); 44835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 44845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 44855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 44865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Second case : a Processing Instruction. 44875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 44885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if ((CUR == '<') && (NXT(1) == '?')) { 44895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParsePI(ctxt); 44905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 44915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 44925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 44935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Third case : a sub-element. 44945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 44955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (CUR == '<') { 44965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseElementInternal(ctxt); 44975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (currentNode != NULL) xmlFree(currentNode); 44985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 44995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) currentNode = xmlStrdup(ctxt->name); 45005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) depth = ctxt->nameNr; 45015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 45025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 45035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 45045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Fourth case : a reference. If if has not been resolved, 45055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parsing returns it's Name, create the node 45065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 45075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (CUR == '&') { 45085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseReference(ctxt); 45095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 45105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 45115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 45125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Fifth case : end of the resource 45135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 45145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (CUR == 0) { 45155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlAutoCloseOnEnd(ctxt); 45165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 45175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 45185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 45195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 45205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Last case, text. Note that References are handled directly. 45215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 45225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else { 45235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseCharData(ctxt); 45245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 45255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 45265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cons == ctxt->nbChars) { 45275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->node != NULL) { 45285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 45295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "detected an error in element content\n", 45305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 45315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 45325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 45335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 45345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 45355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 45365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 45375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (currentNode != NULL) xmlFree(currentNode); 45385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 45395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 45405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 45415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseContent: 45425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 45435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 45445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse a content: comment, sub-element, reference or text. 45455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This is the entry point when called from parser.c 45465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 45475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 45485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void 45495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)__htmlParseContent(void *ctxt) { 45505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt != NULL) 45515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseContentInternal((htmlParserCtxtPtr) ctxt); 45525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 45535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 45545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 45555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseDocument: 45565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 45575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 45585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML document (and build a tree if using the standard SAX 45595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * interface). 45605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 45615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0, -1 in case of error. the parser context is augmented 45625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * as a result of the parsing. 45635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 45645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 45655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int 45665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseDocument(htmlParserCtxtPtr ctxt) { 45675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar start[4]; 45685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlCharEncoding enc; 45695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlDtdPtr dtd; 45705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 45715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInitParser(); 45725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 45735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlDefaultSAXHandlerInit(); 45745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 45755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt == NULL) || (ctxt->input == NULL)) { 45765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 45775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseDocument: context error\n", NULL, NULL); 45785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(XML_ERR_INTERNAL_ERROR); 45795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 45805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->html = 1; 45815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->linenumbers = 1; 45825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GROW; 45835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 45845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * SAX: beginning of the document processing. 45855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 45865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 45875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 45885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 45895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && 45905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((ctxt->input->end - ctxt->input->cur) >= 4)) { 45915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 45925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Get the 4 first bytes and decode the charset 45935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * if enc != XML_CHAR_ENCODING_NONE 45945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * plug some encoding conversion routines. 45955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 45965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) start[0] = RAW; 45975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) start[1] = NXT(1); 45985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) start[2] = NXT(2); 45995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) start[3] = NXT(3); 46005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) enc = xmlDetectCharEncoding(&start[0], 4); 46015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (enc != XML_CHAR_ENCODING_NONE) { 46025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlSwitchEncoding(ctxt, enc); 46035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 46045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 46055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 46065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 46075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Wipe out everything which is before the first '<' 46085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 46095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 46105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == 0) { 46115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY, 46125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Document is empty\n", NULL, NULL); 46135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 46145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 46155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 46165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->startDocument(ctxt->userData); 46175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 46185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 46195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 46205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse possible comments and PIs before any content 46215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 46225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (((CUR == '<') && (NXT(1) == '!') && 46235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (NXT(2) == '-') && (NXT(3) == '-')) || 46245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((CUR == '<') && (NXT(1) == '?'))) { 46255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseComment(ctxt); 46265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParsePI(ctxt); 46275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 46285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 46295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 46305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 46315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 46325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Then possibly doc type declaration(s) and more Misc 46335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * (doctypedecl Misc*)? 46345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 46355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR == '<') && (NXT(1) == '!') && 46365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(2) == 'D') && (UPP(3) == 'O') && 46375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(4) == 'C') && (UPP(5) == 'T') && 46385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(6) == 'Y') && (UPP(7) == 'P') && 46395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(8) == 'E')) { 46405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseDocTypeDecl(ctxt); 46415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 46425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 46435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 46445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 46455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse possible comments and PIs before any content 46465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 46475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (((CUR == '<') && (NXT(1) == '!') && 46485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (NXT(2) == '-') && (NXT(3) == '-')) || 46495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((CUR == '<') && (NXT(1) == '?'))) { 46505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseComment(ctxt); 46515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParsePI(ctxt); 46525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 46535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 46545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 46555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 46565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Time to start parsing the tree itself 46575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 46585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseContentInternal(ctxt); 46595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 46605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 46615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * autoclose 46625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 46635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == 0) 46645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlAutoCloseOnEnd(ctxt); 46655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 46665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 46675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 46685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * SAX: end of the document processing. 46695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 46705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 46715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endDocument(ctxt->userData); 46725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 46735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->myDoc != NULL) { 46745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) dtd = xmlGetIntSubset(ctxt->myDoc); 46755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dtd == NULL) 46765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->myDoc->intSubset = 46775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", 46785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", 46795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); 46805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 46815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (! ctxt->wellFormed) return(-1); 46825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 46835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 46845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 46855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 46865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************ 46875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 46885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parser contexts handling * 46895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 46905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/ 46915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 46925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 46935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlInitParserCtxt: 46945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 46955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 46965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Initialize a parser context 46975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 46985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 in case of success and -1 in case of error 46995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 47005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 47015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int 47025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlInitParserCtxt(htmlParserCtxtPtr ctxt) 47035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 47045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlSAXHandler *sax; 47055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 47065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) return(-1); 47075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memset(ctxt, 0, sizeof(htmlParserCtxt)); 47085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 47095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->dict = xmlDictCreate(); 47105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->dict == NULL) { 47115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); 47125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(-1); 47135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 47145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler)); 47155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (sax == NULL) { 47165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); 47175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(-1); 47185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 47195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 47205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memset(sax, 0, sizeof(htmlSAXHandler)); 47215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 47225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* Allocate the Input stack */ 47235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->inputTab = (htmlParserInputPtr *) 47245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlMalloc(5 * sizeof(htmlParserInputPtr)); 47255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->inputTab == NULL) { 47265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); 47275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->inputNr = 0; 47285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->inputMax = 0; 47295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input = NULL; 47305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(-1); 47315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 47325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->inputNr = 0; 47335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->inputMax = 5; 47345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input = NULL; 47355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->version = NULL; 47365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->encoding = NULL; 47375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->standalone = -1; 47385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_START; 47395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 47405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* Allocate the Node stack */ 47415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr)); 47425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nodeTab == NULL) { 47435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); 47445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeNr = 0; 47455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeMax = 0; 47465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->node = NULL; 47475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->inputNr = 0; 47485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->inputMax = 0; 47495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input = NULL; 47505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(-1); 47515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 47525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeNr = 0; 47535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeMax = 10; 47545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->node = NULL; 47555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 47565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* Allocate the Name stack */ 47575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 47585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nameTab == NULL) { 47595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); 47605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nameNr = 0; 47615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nameMax = 0; 47625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->name = NULL; 47635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeNr = 0; 47645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeMax = 0; 47655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->node = NULL; 47665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->inputNr = 0; 47675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->inputMax = 0; 47685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input = NULL; 47695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(-1); 47705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 47715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nameNr = 0; 47725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nameMax = 10; 47735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->name = NULL; 47745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 47755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeInfoTab = NULL; 47765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeInfoNr = 0; 47775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeInfoMax = 0; 47785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 47795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler; 47805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else { 47815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax = sax; 47825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1)); 47835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 47845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->userData = ctxt; 47855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->myDoc = NULL; 47865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->wellFormed = 1; 47875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->replaceEntities = 0; 47885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->linenumbers = xmlLineNumbersDefaultValue; 47895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->html = 1; 47905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 47915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->vctxt.userData = ctxt; 47925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->vctxt.error = xmlParserValidityError; 47935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->vctxt.warning = xmlParserValidityWarning; 47945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->record_info = 0; 47955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->validate = 0; 47965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nbChars = 0; 47975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 47985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->catalogs = NULL; 47995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInitNodeInfoSeq(&ctxt->node_seq); 48005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(0); 48015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 48025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 48035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 48045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlFreeParserCtxt: 48055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 48065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 48075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Free all the memory used by a parser context. However the parsed 48085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * document in ctxt->myDoc is not freed. 48095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 48105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 48115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void 48125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlFreeParserCtxt(htmlParserCtxtPtr ctxt) 48135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 48145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserCtxt(ctxt); 48155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 48165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 48175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 48185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlNewParserCtxt: 48195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 48205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Allocate and initialize a new parser context. 48215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 48225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the htmlParserCtxtPtr or NULL in case of allocation error 48235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 48245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 48255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParserCtxtPtr 48265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlNewParserCtxt(void) 48275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 48285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserCtxtPtr ctxt; 48295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 48305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 48315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) { 48325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlErrMemory(NULL, "NewParserCtxt: out of memory\n"); 48335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 48345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 48355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memset(ctxt, 0, sizeof(xmlParserCtxt)); 48365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (htmlInitParserCtxt(ctxt) < 0) { 48375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlFreeParserCtxt(ctxt); 48385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 48395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 48405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(ctxt); 48415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 48425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 48435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 48445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCreateMemoryParserCtxt: 48455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @buffer: a pointer to a char array 48465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @size: the size of the array 48475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 48485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Create a parser context for an HTML in-memory document. 48495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 48505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the new parser context or NULL 48515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 48525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParserCtxtPtr 48535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCreateMemoryParserCtxt(const char *buffer, int size) { 48545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserCtxtPtr ctxt; 48555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputPtr input; 48565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputBufferPtr buf; 48575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 48585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (buffer == NULL) 48595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 48605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (size <= 0) 48615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 48625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 48635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt = htmlNewParserCtxt(); 48645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) 48655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 48665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 48675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 48685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (buf == NULL) return(NULL); 48695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 48705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input = xmlNewInputStream(ctxt); 48715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (input == NULL) { 48725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserCtxt(ctxt); 48735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 48745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 48755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 48765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->filename = NULL; 48775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->buf = buf; 48785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->base = input->buf->buffer->content; 48795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->cur = input->buf->buffer->content; 48805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input->end = &input->buf->buffer->content[input->buf->buffer->use]; 48815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 48825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputPush(ctxt, input); 48835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(ctxt); 48845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 48855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 48865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 48875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCreateDocParserCtxt: 48885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @cur: a pointer to an array of xmlChar 48895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: a free form C string describing the HTML document encoding, or NULL 48905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 48915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Create a parser context for an HTML document. 48925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 48935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * TODO: check the need to add encoding handling there 48945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 48955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the new parser context or NULL 48965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 48975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static htmlParserCtxtPtr 48985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCreateDocParserCtxt(const xmlChar *cur, const char *encoding) { 48995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int len; 49005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserCtxtPtr ctxt; 49015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 49025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur == NULL) 49035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 49045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) len = xmlStrlen(cur); 49055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt = htmlCreateMemoryParserCtxt((char *)cur, len); 49065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) 49075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 49085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 49095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (encoding != NULL) { 49105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlCharEncoding enc; 49115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlCharEncodingHandlerPtr handler; 49125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 49135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->input->encoding != NULL) 49145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree((xmlChar *) ctxt->input->encoding); 49155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding); 49165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 49175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) enc = xmlParseCharEncoding(encoding); 49185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 49195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * registered set of known encodings 49205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 49215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (enc != XML_CHAR_ENCODING_ERROR) { 49225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlSwitchEncoding(ctxt, enc); 49235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 49245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 49255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Unsupported encoding %s\n", 49265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (const xmlChar *) encoding, NULL); 49275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 49285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 49295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 49305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * fallback for unknown encodings 49315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 49325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) handler = xmlFindCharEncodingHandler((const char *) encoding); 49335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (handler != NULL) { 49345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlSwitchToEncoding(ctxt, handler); 49355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 49365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 49375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Unsupported encoding %s\n", 49385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (const xmlChar *) encoding, NULL); 49395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 49405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 49415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 49425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(ctxt); 49435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 49445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 49455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef LIBXML_PUSH_ENABLED 49465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************ 49475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 49485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Progressive parsing interfaces * 49495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 49505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/ 49515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 49525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 49535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseLookupSequence: 49545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 49555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @first: the first char to lookup 49565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @next: the next char to lookup or zero 49575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @third: the next char to lookup or zero 49585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @comment: flag to force checking inside comments 49595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 49605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Try to find if a sequence (first, next, third) or just (first next) or 49615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * (first) is available in the input stream. 49625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This function has a side effect of (possibly) incrementing ctxt->checkIndex 49635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * to avoid rescanning sequences of bytes, it DOES change the state of the 49645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parser, do not use liberally. 49655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This is basically similar to xmlParseLookupSequence() 49665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 49675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the index to the current parsing point if the full sequence 49685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * is available, -1 otherwise. 49695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 49705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int 49715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, 49725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar next, xmlChar third, int iscomment, 49735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int ignoreattrval) 49745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 49755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int base, len; 49765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserInputPtr in; 49775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *buf; 49785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int incomment = 0; 49795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int invalue = 0; 49805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char valdellim = 0x0; 49815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 49825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) in = ctxt->input; 49835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (in == NULL) 49845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (-1); 49855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 49865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base = in->cur - in->base; 49875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (base < 0) 49885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (-1); 49895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 49905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->checkIndex > base) 49915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base = ctxt->checkIndex; 49925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 49935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (in->buf == NULL) { 49945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf = in->base; 49955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) len = in->length; 49965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 49975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf = in->buf->buffer->content; 49985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) len = in->buf->buffer->use; 49995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 50005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 50015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* take into account the sequence length */ 50025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (third) 50035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) len -= 2; 50045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (next) 50055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) len--; 50065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (; base < len; base++) { 50075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!incomment) && (base + 4 < len) && (!iscomment)) { 50085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((buf[base] == '<') && (buf[base + 1] == '!') && 50095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (buf[base + 2] == '-') && (buf[base + 3] == '-')) { 50105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) incomment = 1; 50115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* do not increment past <! - some people use <!--> */ 50125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base += 2; 50135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 50145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 50155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ignoreattrval) { 50165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (buf[base] == '"' || buf[base] == '\'') { 50175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (invalue) { 50185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (buf[base] == valdellim) { 50195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) invalue = 0; 50205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 50215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 50225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 50235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) valdellim = buf[base]; 50245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) invalue = 1; 50255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 50265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 50275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (invalue) { 50285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 50295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 50305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 50315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (incomment) { 50325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (base + 3 > len) 50335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (-1); 50345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((buf[base] == '-') && (buf[base + 1] == '-') && 50355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (buf[base + 2] == '>')) { 50365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) incomment = 0; 50375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base += 2; 50385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 50395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 50405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 50415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (buf[base] == first) { 50425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (third != 0) { 50435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((buf[base + 1] != next) || (buf[base + 2] != third)) 50445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 50455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (next != 0) { 50465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (buf[base + 1] != next) 50475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 50485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 50495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 50505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 50515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (next == 0) 50525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 50535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: lookup '%c' found at %d\n", 50545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) first, base); 50555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (third == 0) 50565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 50575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: lookup '%c%c' found at %d\n", 50585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) first, next, base); 50595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 50605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 50615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: lookup '%c%c%c' found at %d\n", 50625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) first, next, third, base); 50635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 50645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (base - (in->cur - in->base)); 50655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 50665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 50675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!incomment) && (!invalue)) 50685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = base; 50695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 50705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (next == 0) 50715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 50725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: lookup '%c' failed\n", first); 50735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (third == 0) 50745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 50755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: lookup '%c%c' failed\n", first, next); 50765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 50775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 50785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: lookup '%c%c%c' failed\n", first, next, 50795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) third); 50805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 50815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (-1); 50825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 50835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 50845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 50855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseLookupChars: 50865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 50875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @stop: Array of chars, which stop the lookup. 50885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @stopLen: Length of stop-Array 50895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 50905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Try to find if any char of the stop-Array is available in the input 50915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * stream. 50925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This function has a side effect of (possibly) incrementing ctxt->checkIndex 50935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * to avoid rescanning sequences of bytes, it DOES change the state of the 50945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parser, do not use liberally. 50955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 50965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the index to the current parsing point if a stopChar 50975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * is available, -1 otherwise. 50985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 50995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int 51005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop, 51015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int stopLen) 51025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 51035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int base, len; 51045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserInputPtr in; 51055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *buf; 51065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int incomment = 0; 51075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int i; 51085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 51095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) in = ctxt->input; 51105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (in == NULL) 51115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (-1); 51125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 51135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base = in->cur - in->base; 51145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (base < 0) 51155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (-1); 51165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 51175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->checkIndex > base) 51185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base = ctxt->checkIndex; 51195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 51205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (in->buf == NULL) { 51215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf = in->base; 51225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) len = in->length; 51235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 51245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf = in->buf->buffer->content; 51255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) len = in->buf->buffer->use; 51265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 51275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 51285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (; base < len; base++) { 51295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!incomment && (base + 4 < len)) { 51305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((buf[base] == '<') && (buf[base + 1] == '!') && 51315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (buf[base + 2] == '-') && (buf[base + 3] == '-')) { 51325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) incomment = 1; 51335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* do not increment past <! - some people use <!--> */ 51345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base += 2; 51355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 51365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 51375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (incomment) { 51385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (base + 3 > len) 51395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (-1); 51405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((buf[base] == '-') && (buf[base + 1] == '-') && 51415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (buf[base + 2] == '>')) { 51425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) incomment = 0; 51435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base += 2; 51445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 51455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 51465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 51475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (i = 0; i < stopLen; ++i) { 51485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (buf[base] == stop[i]) { 51495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 51505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (base - (in->cur - in->base)); 51515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 51525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 51535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 51545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = base; 51555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (-1); 51565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 51575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 51585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 51595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseTryOrFinish: 51605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 51615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @terminate: last chunk indicator 51625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 51635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Try to progress on parsing 51645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 51655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns zero if no parsing was possible 51665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 51675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int 51685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { 51695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int ret = 0; 51705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserInputPtr in; 51715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int avail = 0; 51725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar cur, next; 51735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 51745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 51755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (ctxt->instate) { 51765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_EOF: 51775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 51785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try EOF\n"); break; 51795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_START: 51805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 51815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try START\n"); break; 51825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_MISC: 51835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 51845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try MISC\n");break; 51855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_COMMENT: 51865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 51875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try COMMENT\n");break; 51885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_PROLOG: 51895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 51905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try PROLOG\n");break; 51915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_START_TAG: 51925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 51935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try START_TAG\n");break; 51945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_CONTENT: 51955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 51965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try CONTENT\n");break; 51975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_CDATA_SECTION: 51985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 51995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try CDATA_SECTION\n");break; 52005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_END_TAG: 52015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 52025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try END_TAG\n");break; 52035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_ENTITY_DECL: 52045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 52055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try ENTITY_DECL\n");break; 52065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_ENTITY_VALUE: 52075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 52085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try ENTITY_VALUE\n");break; 52095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_ATTRIBUTE_VALUE: 52105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 52115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try ATTRIBUTE_VALUE\n");break; 52125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_DTD: 52135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 52145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try DTD\n");break; 52155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_EPILOG: 52165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 52175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try EPILOG\n");break; 52185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_PI: 52195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 52205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try PI\n");break; 52215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_SYSTEM_LITERAL: 52225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 52235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: try SYSTEM_LITERAL\n");break; 52245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 52255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 52265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 52275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (1) { 52285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 52295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) in = ctxt->input; 52305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (in == NULL) break; 52315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (in->buf == NULL) 52325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) avail = in->length - (in->cur - in->base); 52335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 52345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) avail = in->buf->buffer->use - (in->cur - in->base); 52355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((avail == 0) && (terminate)) { 52365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlAutoCloseOnEnd(ctxt); 52375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { 52385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 52395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * SAX: end of the document processing. 52405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 52415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_EOF; 52425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 52435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endDocument(ctxt->userData); 52445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 52455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 52465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (avail < 1) 52475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 52485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = in->cur[0]; 52495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur == 0) { 52505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP(1); 52515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 52525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 52535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 52545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (ctxt->instate) { 52555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_EOF: 52565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 52575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Document parsing is done ! 52585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 52595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 52605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_START: 52615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 52625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Very first chars read from the document flow. 52635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 52645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = in->cur[0]; 52655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (IS_BLANK_CH(cur)) { 52665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 52675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (in->buf == NULL) 52685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) avail = in->length - (in->cur - in->base); 52695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 52705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) avail = in->buf->buffer->use - (in->cur - in->base); 52715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 52725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 52735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->setDocumentLocator(ctxt->userData, 52745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) &xmlDefaultSAXLocator); 52755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax) && (ctxt->sax->startDocument) && 52765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (!ctxt->disableSAX)) 52775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->startDocument(ctxt->userData); 52785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 52795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = in->cur[0]; 52805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) next = in->cur[1]; 52815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((cur == '<') && (next == '!') && 52825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(2) == 'D') && (UPP(3) == 'O') && 52835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(4) == 'C') && (UPP(5) == 'T') && 52845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(6) == 'Y') && (UPP(7) == 'P') && 52855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(8) == 'E')) { 52865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!terminate) && 52875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) 52885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 52895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 52905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 52915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: Parsing internal subset\n"); 52925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 52935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseDocTypeDecl(ctxt); 52945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_PROLOG; 52955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 52965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 52975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering PROLOG\n"); 52985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 52995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 53005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_MISC; 53015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 53025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 53035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering MISC\n"); 53045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 53055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 53065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 53075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_MISC: 53085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 53095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (in->buf == NULL) 53105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) avail = in->length - (in->cur - in->base); 53115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 53125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) avail = in->buf->buffer->use - (in->cur - in->base); 53135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (avail < 2) 53145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 53155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = in->cur[0]; 53165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) next = in->cur[1]; 53175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((cur == '<') && (next == '!') && 53185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (in->cur[2] == '-') && (in->cur[3] == '-')) { 53195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!terminate) && 53205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)) 53215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 53225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 53235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 53245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: Parsing Comment\n"); 53255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 53265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseComment(ctxt); 53275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_MISC; 53285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((cur == '<') && (next == '?')) { 53295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!terminate) && 53305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) 53315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 53325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 53335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 53345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: Parsing PI\n"); 53355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 53365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParsePI(ctxt); 53375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_MISC; 53385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((cur == '<') && (next == '!') && 53395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(2) == 'D') && (UPP(3) == 'O') && 53405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(4) == 'C') && (UPP(5) == 'T') && 53415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(6) == 'Y') && (UPP(7) == 'P') && 53425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(8) == 'E')) { 53435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!terminate) && 53445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) 53455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 53465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 53475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 53485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: Parsing internal subset\n"); 53495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 53505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseDocTypeDecl(ctxt); 53515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_PROLOG; 53525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 53535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 53545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering PROLOG\n"); 53555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 53565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((cur == '<') && (next == '!') && 53575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (avail < 9)) { 53585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 53595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 53605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_START_TAG; 53615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 53625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 53635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering START_TAG\n"); 53645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 53655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 53665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 53675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_PROLOG: 53685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP_BLANKS; 53695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (in->buf == NULL) 53705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) avail = in->length - (in->cur - in->base); 53715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 53725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) avail = in->buf->buffer->use - (in->cur - in->base); 53735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (avail < 2) 53745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 53755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = in->cur[0]; 53765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) next = in->cur[1]; 53775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((cur == '<') && (next == '!') && 53785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (in->cur[2] == '-') && (in->cur[3] == '-')) { 53795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!terminate) && 53805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)) 53815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 53825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 53835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 53845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: Parsing Comment\n"); 53855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 53865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseComment(ctxt); 53875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_PROLOG; 53885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((cur == '<') && (next == '?')) { 53895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!terminate) && 53905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) 53915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 53925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 53935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 53945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: Parsing PI\n"); 53955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 53965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParsePI(ctxt); 53975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_PROLOG; 53985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((cur == '<') && (next == '!') && 53995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (avail < 4)) { 54005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 54015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 54025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_START_TAG; 54035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 54045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 54055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering START_TAG\n"); 54065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 54075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 54085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 54095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_EPILOG: 54105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (in->buf == NULL) 54115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) avail = in->length - (in->cur - in->base); 54125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 54135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) avail = in->buf->buffer->use - (in->cur - in->base); 54145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (avail < 1) 54155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 54165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = in->cur[0]; 54175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (IS_BLANK_CH(cur)) { 54185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseCharData(ctxt); 54195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 54205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 54215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (avail < 2) 54225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 54235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) next = in->cur[1]; 54245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((cur == '<') && (next == '!') && 54255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (in->cur[2] == '-') && (in->cur[3] == '-')) { 54265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!terminate) && 54275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)) 54285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 54295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 54305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 54315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: Parsing Comment\n"); 54325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 54335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseComment(ctxt); 54345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_EPILOG; 54355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((cur == '<') && (next == '?')) { 54365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!terminate) && 54375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) 54385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 54395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 54405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 54415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: Parsing PI\n"); 54425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 54435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParsePI(ctxt); 54445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_EPILOG; 54455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((cur == '<') && (next == '!') && 54465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (avail < 4)) { 54475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 54485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 54495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->errNo = XML_ERR_DOCUMENT_END; 54505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->wellFormed = 0; 54515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_EOF; 54525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 54535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 54545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering EOF\n"); 54555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 54565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 54575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endDocument(ctxt->userData); 54585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 54595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 54605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 54615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_START_TAG: { 54625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const xmlChar *name; 54635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int failed; 54645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const htmlElemDesc * info; 54655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 54665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (avail < 2) 54675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 54685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = in->cur[0]; 54695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur != '<') { 54705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 54715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 54725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 54735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering CONTENT\n"); 54745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 54755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 54765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 54775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (in->cur[1] == '/') { 54785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_END_TAG; 54795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 54805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 54815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 54825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering END_TAG\n"); 54835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 54845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 54855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 54865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!terminate) && 54875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) 54885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 54895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 54905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) failed = htmlParseStartTag(ctxt); 54915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) name = ctxt->name; 54925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((failed == -1) || 54935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (name == NULL)) { 54945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == '>') 54955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 54965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 54975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 54985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 54995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 55005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Lookup the info for that element. 55015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 55025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) info = htmlTagLookup(name); 55035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (info == NULL) { 55045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG, 55055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Tag %s invalid\n", name, NULL); 55065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 55075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 55095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check for an Empty Element labeled the XML/SGML way 55105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 55115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((CUR == '/') && (NXT(1) == '>')) { 55125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SKIP(2); 55135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 55145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endElement(ctxt->userData, name); 55155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePop(ctxt); 55165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 55175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 55185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 55195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering CONTENT\n"); 55205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 55215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 55225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 55235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CUR == '>') { 55255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 55265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 55275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, 55285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Couldn't find end of Start Tag %s\n", 55295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) name, NULL); 55305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 55325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * end of parsing of this node. 55335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 55345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlStrEqual(name, ctxt->name)) { 55355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nodePop(ctxt); 55365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePop(ctxt); 55375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 55385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 55405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 55415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 55425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering CONTENT\n"); 55435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 55445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 55455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 55465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 55485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check for an Empty Element from DTD definition 55495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 55505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((info != NULL) && (info->empty)) { 55515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 55525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endElement(ctxt->userData, name); 55535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlnamePop(ctxt); 55545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 55555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 55565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 55575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 55585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering CONTENT\n"); 55595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 55605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 55615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 55625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_CONTENT: { 55635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) long cons; 55645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 55655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Handle preparsed entities and charRef 55665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 55675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->token != 0) { 55685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar chr[2] = { 0 , 0 } ; 55695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) chr[0] = (xmlChar) ctxt->token; 55715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCheckParagraph(ctxt); 55725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) 55735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->characters(ctxt->userData, chr, 1); 55745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->token = 0; 55755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 55765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 55775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((avail == 1) && (terminate)) { 55785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = in->cur[0]; 55795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((cur != '<') && (cur != '&')) { 55805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->sax != NULL) { 55815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (IS_BLANK_CH(cur)) { 55825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->sax->ignorableWhitespace != NULL) 55835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->ignorableWhitespace( 55845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->userData, &cur, 1); 55855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 55865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCheckParagraph(ctxt); 55875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->sax->characters != NULL) 55885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->characters( 55895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->userData, &cur, 1); 55905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 55915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 55925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->token = 0; 55935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 55945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) in->cur++; 55955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 55965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 55975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 55985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (avail < 2) 55995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 56005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cur = in->cur[0]; 56015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) next = in->cur[1]; 56025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cons = ctxt->nbChars; 56035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) || 56045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (xmlStrEqual(ctxt->name, BAD_CAST"style"))) { 56055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 56065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Handle SCRIPT/STYLE separately 56075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 56085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!terminate) { 56095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int idx; 56105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar val; 56115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 56125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 1); 56135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (idx < 0) 56145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 56155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) val = in->cur[idx + 2]; 56165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (val == 0) /* bad cut of input */ 56175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 56185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 56195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseScript(ctxt); 56205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((cur == '<') && (next == '/')) { 56215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_END_TAG; 56225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 56235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 56245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 56255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering END_TAG\n"); 56265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 56275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 56285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 56295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 56305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 56315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Sometimes DOCTYPE arrives in the middle of the document 56325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 56335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((cur == '<') && (next == '!') && 56345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(2) == 'D') && (UPP(3) == 'O') && 56355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(4) == 'C') && (UPP(5) == 'T') && 56365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(6) == 'Y') && (UPP(7) == 'P') && 56375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (UPP(8) == 'E')) { 56385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!terminate) && 56395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) 56405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 56415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, 56425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Misplaced DOCTYPE declaration\n", 56435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) BAD_CAST "DOCTYPE" , NULL); 56445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseDocTypeDecl(ctxt); 56455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((cur == '<') && (next == '!') && 56465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (in->cur[2] == '-') && (in->cur[3] == '-')) { 56475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!terminate) && 56485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlParseLookupSequence( 56495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt, '-', '-', '>', 1, 1) < 0)) 56505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 56515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 56525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 56535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: Parsing Comment\n"); 56545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 56555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseComment(ctxt); 56565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 56575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((cur == '<') && (next == '?')) { 56585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!terminate) && 56595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) 56605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 56615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 56625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 56635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: Parsing PI\n"); 56645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 56655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParsePI(ctxt); 56665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 56675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((cur == '<') && (next == '!') && (avail < 4)) { 56685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 56695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((cur == '<') && (next == '/')) { 56705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_END_TAG; 56715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 56725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 56735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 56745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering END_TAG\n"); 56755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 56765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 56775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (cur == '<') { 56785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_START_TAG; 56795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 56805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 56815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 56825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering START_TAG\n"); 56835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 56845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 56855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (cur == '&') { 56865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!terminate) && 56875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlParseLookupChars(ctxt, 56885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) BAD_CAST "; >/", 4) < 0)) 56895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 56905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 56915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 56925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: Parsing Reference\n"); 56935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 56945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* TODO: check generation of subtrees if noent !!! */ 56955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseReference(ctxt); 56965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 56975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 56985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * check that the text sequence is complete 56995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * before handing out the data to the parser 57005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * to avoid problems with erroneous end of 57015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * data detection. 57025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 57035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!terminate) && 57045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlParseLookupChars(ctxt, BAD_CAST "<&", 2) < 0)) 57055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 57065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 57075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 57085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 57095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: Parsing char data\n"); 57105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 57115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseCharData(ctxt); 57125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 57135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 57145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cons == ctxt->nbChars) { 57155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->node != NULL) { 57165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 57175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "detected an error in element content\n", 57185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 57195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 57205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEXT; 57215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 57225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 57235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 57245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 57255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 57265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_END_TAG: 57275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (avail < 2) 57285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 57295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((!terminate) && 57305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) 57315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto done; 57325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseEndTag(ctxt); 57335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->nameNr == 0) { 57345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_EPILOG; 57355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 57365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 57375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 57385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 57395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 57405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 57415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering CONTENT\n"); 57425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 57435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 57445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_CDATA_SECTION: 57455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 57465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: internal error, state == CDATA\n", 57475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 57485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 57495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 57505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 57515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 57525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering CONTENT\n"); 57535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 57545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 57555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_DTD: 57565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 57575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: internal error, state == DTD\n", 57585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 57595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 57605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 57615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 57625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 57635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering CONTENT\n"); 57645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 57655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 57665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_COMMENT: 57675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 57685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: internal error, state == COMMENT\n", 57695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 57705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 57715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 57725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 57735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 57745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering CONTENT\n"); 57755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 57765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 57775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_PI: 57785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 57795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: internal error, state == PI\n", 57805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 57815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 57825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 57835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 57845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 57855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering CONTENT\n"); 57865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 57875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 57885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_ENTITY_DECL: 57895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 57905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: internal error, state == ENTITY_DECL\n", 57915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 57925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 57935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 57945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 57955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 57965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering CONTENT\n"); 57975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 57985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 57995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_ENTITY_VALUE: 58005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 58015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: internal error, state == ENTITY_VALUE\n", 58025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 58035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 58045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 58055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 58065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 58075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering DTD\n"); 58085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 58095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 58105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_ATTRIBUTE_VALUE: 58115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 58125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: internal error, state == ATTRIBUTE_VALUE\n", 58135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 58145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_START_TAG; 58155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 58165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 58175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 58185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering START_TAG\n"); 58195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 58205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 58215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_SYSTEM_LITERAL: 58225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 58235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n", 58245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 58255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 58265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 58275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 58285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 58295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering CONTENT\n"); 58305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 58315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 58325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_IGNORE: 58335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 58345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: internal error, state == XML_PARSER_IGNORE\n", 58355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 58365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 58375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 58385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 58395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 58405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering CONTENT\n"); 58415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 58425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 58435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_PARSER_PUBLIC_LITERAL: 58445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 58455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: internal error, state == XML_PARSER_LITERAL\n", 58465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NULL, NULL); 58475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_CONTENT; 58485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 58495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 58505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, 58515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "HPP: entering CONTENT\n"); 58525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 58535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 58545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 58555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 58565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 58575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)done: 58585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((avail == 0) && (terminate)) { 58595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlAutoCloseOnEnd(ctxt); 58605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { 58615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* 58625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * SAX: end of the document processing. 58635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 58645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_EOF; 58655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 58665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endDocument(ctxt->userData); 58675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 58685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 58695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->myDoc != NULL) && 58705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ((terminate) || (ctxt->instate == XML_PARSER_EOF) || 58715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->instate == XML_PARSER_EPILOG))) { 58725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlDtdPtr dtd; 58735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) dtd = xmlGetIntSubset(ctxt->myDoc); 58745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (dtd == NULL) 58755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->myDoc->intSubset = 58765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", 58775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", 58785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); 58795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 58805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 58815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret); 58825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 58835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(ret); 58845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 58855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 58865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 58875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseChunk: 58885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 58895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @chunk: an char array 58905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @size: the size in byte of the chunk 58915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @terminate: last chunk indicator 58925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 58935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse a Chunk of memory 58945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 58955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns zero if no error, the xmlParserErrors otherwise. 58965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 58975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int 58985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, 58995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int terminate) { 59005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt == NULL) || (ctxt->input == NULL)) { 59015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 59025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "htmlParseChunk: context error\n", NULL, NULL); 59035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(XML_ERR_INTERNAL_ERROR); 59045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 59055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 59065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 59075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int base = ctxt->input->base - ctxt->input->buf->buffer->content; 59085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int cur = ctxt->input->cur - ctxt->input->base; 59095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int res; 59105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 59115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 59125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (res < 0) { 59135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->errNo = XML_PARSER_EOF; 59145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->disableSAX = 1; 59155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (XML_PARSER_EOF); 59165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 59175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->base = ctxt->input->buf->buffer->content + base; 59185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->cur = ctxt->input->base + cur; 59195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->end = 59205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 59215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 59225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); 59235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 59245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 59255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if 0 59265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((terminate) || (ctxt->input->buf->buffer->use > 80)) 59275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseTryOrFinish(ctxt, terminate); 59285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 59295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (ctxt->instate != XML_PARSER_EOF) { 59305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 59315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputBufferPtr in = ctxt->input->buf; 59325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((in->encoder != NULL) && (in->buffer != NULL) && 59335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (in->raw != NULL)) { 59345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nbchars; 59355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 59365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 59375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nbchars < 0) { 59385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, 59395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "encoder error\n", NULL, NULL); 59405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(XML_ERR_INVALID_ENCODING); 59415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 59425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 59435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 59445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 59455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseTryOrFinish(ctxt, terminate); 59465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (terminate) { 59475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->instate != XML_PARSER_EOF) && 59485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->instate != XML_PARSER_EPILOG) && 59495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->instate != XML_PARSER_MISC)) { 59505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->errNo = XML_ERR_DOCUMENT_END; 59515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->wellFormed = 0; 59525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 59535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->instate != XML_PARSER_EOF) { 59545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 59555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->endDocument(ctxt->userData); 59565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 59575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_EOF; 59585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 59595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return((xmlParserErrors) ctxt->errNo); 59605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 59615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 59625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************ 59635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 59645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * User entry points * 59655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 59665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/ 59675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 59685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 59695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCreatePushParserCtxt: 59705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @sax: a SAX handler 59715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @user_data: The user data returned on SAX callbacks 59725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @chunk: a pointer to an array of chars 59735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @size: number of chars in the array 59745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @filename: an optional file name or URI 59755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @enc: an optional encoding 59765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 59775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Create a parser context for using the HTML parser in push mode 59785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The value of @filename is used for fetching external entities 59795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * and error/warning reports. 59805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 59815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the new parser context or NULL 59825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 59835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParserCtxtPtr 59845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, 59855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *chunk, int size, const char *filename, 59865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlCharEncoding enc) { 59875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserCtxtPtr ctxt; 59885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserInputPtr inputStream; 59895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputBufferPtr buf; 59905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 59915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInitParser(); 59925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 59935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) buf = xmlAllocParserInputBuffer(enc); 59945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (buf == NULL) return(NULL); 59955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 59965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt = htmlNewParserCtxt(); 59975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) { 59985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserInputBuffer(buf); 59995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 60005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 60015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if(enc==XML_CHAR_ENCODING_UTF8 || buf->encoder) 60025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->charset=XML_CHAR_ENCODING_UTF8; 60035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (sax != NULL) { 60045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->sax != (xmlSAXHandlerPtr) &htmlDefaultSAXHandler) 60055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(ctxt->sax); 60065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler)); 60075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->sax == NULL) { 60085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(buf); 60095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(ctxt); 60105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 60115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 60125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler)); 60135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (user_data != NULL) 60145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->userData = user_data; 60155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 60165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (filename == NULL) { 60175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->directory = NULL; 60185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 60195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->directory = xmlParserGetDirectory(filename); 60205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 60215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 60225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputStream = htmlNewInputStream(ctxt); 60235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (inputStream == NULL) { 60245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserCtxt(ctxt); 60255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(buf); 60265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 60275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 60285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 60295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (filename == NULL) 60305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputStream->filename = NULL; 60315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 60325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputStream->filename = (char *) 60335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlCanonicPath((const xmlChar *) filename); 60345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputStream->buf = buf; 60355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputStream->base = inputStream->buf->buffer->content; 60365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputStream->cur = inputStream->buf->buffer->content; 60375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputStream->end = 60385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 60395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 60405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputPush(ctxt, inputStream); 60415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 60425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 60435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->input->buf != NULL)) { 60445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int base = ctxt->input->base - ctxt->input->buf->buffer->content; 60455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int cur = ctxt->input->cur - ctxt->input->base; 60465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 60475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 60485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 60495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->base = ctxt->input->buf->buffer->content + base; 60505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->cur = ctxt->input->base + cur; 60515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->end = 60525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 60535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH 60545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); 60555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 60565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 60575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->progressive = 1; 60585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 60595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(ctxt); 60605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 60615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif /* LIBXML_PUSH_ENABLED */ 60625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 60635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 60645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlSAXParseDoc: 60655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @cur: a pointer to an array of xmlChar 60665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: a free form C string describing the HTML document encoding, or NULL 60675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @sax: the SAX handler block 60685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @userData: if using SAX, this pointer will be provided on callbacks. 60695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 60705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks 60715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * to handle parse events. If sax is NULL, fallback to the default DOM 60725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * behavior and return a tree. 60735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 60745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree unless SAX is NULL or the document is 60755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * not well formed. 60765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 60775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 60785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 60795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlSAXParseDoc(xmlChar *cur, const char *encoding, htmlSAXHandlerPtr sax, void *userData) { 60805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlDocPtr ret; 60815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserCtxtPtr ctxt; 60825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 60835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInitParser(); 60845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 60855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur == NULL) return(NULL); 60865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 60875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 60885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt = htmlCreateDocParserCtxt(cur, encoding); 60895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) return(NULL); 60905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (sax != NULL) { 60915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->sax != NULL) xmlFree (ctxt->sax); 60925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax = sax; 60935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->userData = userData; 60945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 60955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 60965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseDocument(ctxt); 60975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret = ctxt->myDoc; 60985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (sax != NULL) { 60995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax = NULL; 61005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->userData = NULL; 61015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 61025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlFreeParserCtxt(ctxt); 61035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 61045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(ret); 61055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 61065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 61075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 61085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseDoc: 61095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @cur: a pointer to an array of xmlChar 61105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: a free form C string describing the HTML document encoding, or NULL 61115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 61125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML in-memory document and build a tree. 61135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 61145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree 61155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 61165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 61175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 61185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseDoc(xmlChar *cur, const char *encoding) { 61195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(htmlSAXParseDoc(cur, encoding, NULL, NULL)); 61205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 61215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 61225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 61235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 61245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCreateFileParserCtxt: 61255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @filename: the filename 61265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: a free form C string describing the HTML document encoding, or NULL 61275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 61285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Create a parser context for a file content. 61295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Automatic support for ZLIB/Compress compressed document is provided 61305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * by default if found at compile-time. 61315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 61325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the new parser context or NULL 61335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 61345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParserCtxtPtr 61355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCreateFileParserCtxt(const char *filename, const char *encoding) 61365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 61375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserCtxtPtr ctxt; 61385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserInputPtr inputStream; 61395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char *canonicFilename; 61405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* htmlCharEncoding enc; */ 61415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlChar *content, *content_line = (xmlChar *) "charset="; 61425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 61435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (filename == NULL) 61445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 61455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 61465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt = htmlNewParserCtxt(); 61475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) { 61485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 61495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 61505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename); 61515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (canonicFilename == NULL) { 61525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef LIBXML_SAX1_ENABLED 61535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (xmlDefaultSAXHandler.error != NULL) { 61545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlDefaultSAXHandler.error(NULL, "out of memory\n"); 61555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 61565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 61575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserCtxt(ctxt); 61585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 61595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 61605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 61615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt); 61625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree(canonicFilename); 61635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (inputStream == NULL) { 61645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserCtxt(ctxt); 61655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 61665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 61675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 61685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputPush(ctxt, inputStream); 61695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 61705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* set encoding */ 61715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (encoding) { 61725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) + 1); 61735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (content) { 61745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) strcpy ((char *)content, (char *)content_line); 61755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) strcat ((char *)content, (char *)encoding); 61765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCheckEncoding (ctxt, content); 61775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree (content); 61785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 61795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 61805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 61815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(ctxt); 61825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 61835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 61845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 61855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlSAXParseFile: 61865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @filename: the filename 61875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: a free form C string describing the HTML document encoding, or NULL 61885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @sax: the SAX handler block 61895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @userData: if using SAX, this pointer will be provided on callbacks. 61905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 61915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML file and build a tree. Automatic support for ZLIB/Compress 61925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * compressed document is provided by default if found at compile-time. 61935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * It use the given SAX function block to handle the parsing callback. 61945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * If sax is NULL, fallback to the default DOM tree building routines. 61955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 61965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree unless SAX is NULL or the document is 61975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * not well formed. 61985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 61995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 62015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlSAXParseFile(const char *filename, const char *encoding, htmlSAXHandlerPtr sax, 62025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void *userData) { 62035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlDocPtr ret; 62045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserCtxtPtr ctxt; 62055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlSAXHandlerPtr oldsax = NULL; 62065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInitParser(); 62085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt = htmlCreateFileParserCtxt(filename, encoding); 62105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) return(NULL); 62115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (sax != NULL) { 62125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) oldsax = ctxt->sax; 62135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax = sax; 62145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->userData = userData; 62155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 62165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseDocument(ctxt); 62185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret = ctxt->myDoc; 62205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (sax != NULL) { 62215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax = oldsax; 62225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->userData = NULL; 62235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 62245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlFreeParserCtxt(ctxt); 62255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(ret); 62275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 62285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 62305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseFile: 62315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @filename: the filename 62325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: a free form C string describing the HTML document encoding, or NULL 62335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 62345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML file and build a tree. Automatic support for ZLIB/Compress 62355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * compressed document is provided by default if found at compile-time. 62365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 62375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree 62385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 62395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 62415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseFile(const char *filename, const char *encoding) { 62425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(htmlSAXParseFile(filename, encoding, NULL, NULL)); 62435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 62445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 62465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlHandleOmittedElem: 62475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @val: int 0 or 1 62485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 62495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Set and return the previous value for handling HTML omitted tags. 62505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 62515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the last value for 0 for no handling, 1 for auto insertion. 62525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 62535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int 62555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlHandleOmittedElem(int val) { 62565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int old = htmlOmittedDefaultValue; 62575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlOmittedDefaultValue = val; 62595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(old); 62605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 62615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 62635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlElementAllowedHere: 62645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @parent: HTML parent element 62655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @elt: HTML element 62665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 62675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Checks whether an HTML element may be a direct child of a parent element. 62685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Note - doesn't check for deprecated elements 62695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 62705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 1 if allowed; 0 otherwise. 62715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 62725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int 62735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlElementAllowedHere(const htmlElemDesc* parent, const xmlChar* elt) { 62745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char** p ; 62755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( ! elt || ! parent || ! parent->subelts ) 62775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0 ; 62785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for ( p = parent->subelts; *p; ++p ) 62805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( !xmlStrcmp((const xmlChar *)*p, elt) ) 62815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 1 ; 62825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 62835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0 ; 62845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 62855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 62865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlElementStatusHere: 62875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @parent: HTML parent element 62885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @elt: HTML element 62895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 62905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Checks whether an HTML element may be a direct child of a parent element. 62915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * and if so whether it is valid or deprecated. 62925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 62935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns one of HTML_VALID, HTML_DEPRECATED, HTML_INVALID 62945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 62955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlStatus 62965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlElementStatusHere(const htmlElemDesc* parent, const htmlElemDesc* elt) { 62975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( ! parent || ! elt ) 62985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return HTML_INVALID ; 62995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( ! htmlElementAllowedHere(parent, (const xmlChar*) elt->name ) ) 63005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return HTML_INVALID ; 63015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 63025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ( elt->dtd == 0 ) ? HTML_VALID : HTML_DEPRECATED ; 63035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 63045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 63055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlAttrAllowed: 63065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @elt: HTML element 63075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @attr: HTML attribute 63085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @legacy: whether to allow deprecated attributes 63095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 63105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Checks whether an attribute is valid for an element 63115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Has full knowledge of Required and Deprecated attributes 63125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 63135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, HTML_INVALID 63145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 63155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlStatus 63165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlAttrAllowed(const htmlElemDesc* elt, const xmlChar* attr, int legacy) { 63175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char** p ; 63185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 63195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( !elt || ! attr ) 63205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return HTML_INVALID ; 63215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 63225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( elt->attrs_req ) 63235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for ( p = elt->attrs_req; *p; ++p) 63245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( !xmlStrcmp((const xmlChar*)*p, attr) ) 63255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return HTML_REQUIRED ; 63265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 63275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( elt->attrs_opt ) 63285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for ( p = elt->attrs_opt; *p; ++p) 63295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( !xmlStrcmp((const xmlChar*)*p, attr) ) 63305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return HTML_VALID ; 63315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 63325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( legacy && elt->attrs_depr ) 63335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for ( p = elt->attrs_depr; *p; ++p) 63345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( !xmlStrcmp((const xmlChar*)*p, attr) ) 63355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return HTML_DEPRECATED ; 63365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 63375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return HTML_INVALID ; 63385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 63395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 63405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlNodeStatus: 63415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @node: an htmlNodePtr in a tree 63425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @legacy: whether to allow deprecated elements (YES is faster here 63435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * for Element nodes) 63445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 63455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Checks whether the tree node is valid. Experimental (the author 63465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * only uses the HTML enhancements in a SAX parser) 63475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 63485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Return: for Element nodes, a return from htmlElementAllowedHere (if 63495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * legacy allowed) or htmlElementStatusHere (otherwise). 63505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * for Attribute nodes, a return from htmlAttrAllowed 63515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * for other nodes, HTML_NA (no checks performed) 63525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 63535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlStatus 63545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlNodeStatus(const htmlNodePtr node, int legacy) { 63555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ( ! node ) 63565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return HTML_INVALID ; 63575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 63585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch ( node->type ) { 63595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_ELEMENT_NODE: 63605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return legacy 63615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ? ( htmlElementAllowedHere ( 63625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlTagLookup(node->parent->name) , node->name 63635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ) ? HTML_VALID : HTML_INVALID ) 63645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : htmlElementStatusHere( 63655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlTagLookup(node->parent->name) , 63665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlTagLookup(node->name) ) 63675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ; 63685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case XML_ATTRIBUTE_NODE: 63695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return htmlAttrAllowed( 63705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlTagLookup(node->parent->name) , node->name, legacy) ; 63715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) default: return HTML_NA ; 63725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 63735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 63745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************ 63755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 63765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * New set (2.6.0) of simpler and more flexible APIs * 63775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * * 63785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/ 63795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 63805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * DICT_FREE: 63815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @str: a string 63825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 63835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Free a string if it is not owned by the "dict" dictionnary in the 63845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * current scope 63855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 63865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define DICT_FREE(str) \ 63875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((str) && ((!dict) || \ 63885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 63895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree((char *)(str)); 63905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 63915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 63925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCtxtReset: 63935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 63945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 63955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Reset a parser context 63965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 63975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void 63985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCtxtReset(htmlParserCtxtPtr ctxt) 63995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 64005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputPtr input; 64015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlDictPtr dict; 64025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 64035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) 64045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 64055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 64065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInitParser(); 64075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) dict = ctxt->dict; 64085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 64095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 64105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeInputStream(input); 64115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 64125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->inputNr = 0; 64135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input = NULL; 64145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 64155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->spaceNr = 0; 64165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->spaceTab != NULL) { 64175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->spaceTab[0] = -1; 64185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->space = &ctxt->spaceTab[0]; 64195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 64205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->space = NULL; 64215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 64225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 64235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 64245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nodeNr = 0; 64255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->node = NULL; 64265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 64275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nameNr = 0; 64285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->name = NULL; 64295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 64305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DICT_FREE(ctxt->version); 64315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->version = NULL; 64325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DICT_FREE(ctxt->encoding); 64335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->encoding = NULL; 64345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DICT_FREE(ctxt->directory); 64355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->directory = NULL; 64365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DICT_FREE(ctxt->extSubURI); 64375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->extSubURI = NULL; 64385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DICT_FREE(ctxt->extSubSystem); 64395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->extSubSystem = NULL; 64405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->myDoc != NULL) 64415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeDoc(ctxt->myDoc); 64425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->myDoc = NULL; 64435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 64445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->standalone = -1; 64455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->hasExternalSubset = 0; 64465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->hasPErefs = 0; 64475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->html = 1; 64485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->external = 0; 64495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->instate = XML_PARSER_START; 64505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->token = 0; 64515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 64525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->wellFormed = 1; 64535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nsWellFormed = 1; 64545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->valid = 1; 64555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->vctxt.userData = ctxt; 64565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->vctxt.error = xmlParserValidityError; 64575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->vctxt.warning = xmlParserValidityWarning; 64585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->record_info = 0; 64595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->nbChars = 0; 64605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->checkIndex = 0; 64615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->inSubset = 0; 64625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->errNo = XML_ERR_OK; 64635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->depth = 0; 64645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->charset = XML_CHAR_ENCODING_NONE; 64655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->catalogs = NULL; 64665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInitNodeInfoSeq(&ctxt->node_seq); 64675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 64685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->attsDefault != NULL) { 64695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 64705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->attsDefault = NULL; 64715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 64725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->attsSpecial != NULL) { 64735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlHashFree(ctxt->attsSpecial, NULL); 64745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->attsSpecial = NULL; 64755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 64765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 64775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 64785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 64795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCtxtUseOptions: 64805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 64815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options: a combination of htmlParserOption(s) 64825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 64835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Applies the options to the parser context 64845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 64855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 in case of success, the set of unknown or unimplemented options 64865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * in case of error. 64875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 64885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int 64895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options) 64905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 64915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) 64925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(-1); 64935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 64945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (options & HTML_PARSE_NOWARNING) { 64955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->warning = NULL; 64965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->vctxt.warning = NULL; 64975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) options -= XML_PARSE_NOWARNING; 64985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->options |= XML_PARSE_NOWARNING; 64995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 65005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (options & HTML_PARSE_NOERROR) { 65015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->error = NULL; 65025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->vctxt.error = NULL; 65035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->fatalError = NULL; 65045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) options -= XML_PARSE_NOERROR; 65055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->options |= XML_PARSE_NOERROR; 65065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 65075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (options & HTML_PARSE_PEDANTIC) { 65085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->pedantic = 1; 65095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) options -= XML_PARSE_PEDANTIC; 65105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->options |= XML_PARSE_PEDANTIC; 65115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else 65125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->pedantic = 0; 65135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (options & XML_PARSE_NOBLANKS) { 65145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->keepBlanks = 0; 65155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 65165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) options -= XML_PARSE_NOBLANKS; 65175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->options |= XML_PARSE_NOBLANKS; 65185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else 65195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->keepBlanks = 1; 65205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (options & HTML_PARSE_RECOVER) { 65215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->recovery = 1; 65225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) options -= HTML_PARSE_RECOVER; 65235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else 65245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->recovery = 0; 65255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (options & HTML_PARSE_COMPACT) { 65265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->options |= HTML_PARSE_COMPACT; 65275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) options -= HTML_PARSE_COMPACT; 65285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 65295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (options & XML_PARSE_HUGE) { 65305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->options |= XML_PARSE_HUGE; 65315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) options -= XML_PARSE_HUGE; 65325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 65335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->dictNames = 0; 65345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (options); 65355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 65365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 65375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 65385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlDoRead: 65395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 65405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL: the base URL to use for the document 65415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: the document encoding, or NULL 65425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options: a combination of htmlParserOption(s) 65435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @reuse: keep the context for reuse 65445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 65455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Common front-end for the htmlRead functions 65465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 65475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree or NULL 65485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 65495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static htmlDocPtr 65505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 65515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int options, int reuse) 65525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 65535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlDocPtr ret; 65545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 65555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCtxtUseOptions(ctxt, options); 65565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->html = 1; 65575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (encoding != NULL) { 65585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlCharEncodingHandlerPtr hdlr; 65595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 65605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) hdlr = xmlFindCharEncodingHandler(encoding); 65615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (hdlr != NULL) { 65625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlSwitchToEncoding(ctxt, hdlr); 65635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->input->encoding != NULL) 65645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFree((xmlChar *) ctxt->input->encoding); 65655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->encoding = xmlStrdup((xmlChar *)encoding); 65665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 65675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 65685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((URL != NULL) && (ctxt->input != NULL) && 65695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ctxt->input->filename == NULL)) 65705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 65715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParseDocument(ctxt); 65725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ret = ctxt->myDoc; 65735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->myDoc = NULL; 65745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!reuse) { 65755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((ctxt->dictNames) && 65765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ret != NULL) && 65775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (ret->dict == ctxt->dict)) 65785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt->dict = NULL; 65795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserCtxt(ctxt); 65805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 65815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (ret); 65825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 65835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 65845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 65855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlReadDoc: 65865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @cur: a pointer to a zero terminated string 65875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL: the base URL to use for the document 65885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: the document encoding, or NULL 65895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options: a combination of htmlParserOption(s) 65905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 65915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML in-memory document and build a tree. 65925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 65935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree 65945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 65955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 65965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 65975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 65985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserCtxtPtr ctxt; 65995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 66005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur == NULL) 66015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 66025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 66035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInitParser(); 66045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt = htmlCreateDocParserCtxt(cur, NULL); 66055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) 66065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 66075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (htmlDoRead(ctxt, URL, encoding, options, 0)); 66085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 66095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 66105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 66115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlReadFile: 66125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @filename: a file or URL 66135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: the document encoding, or NULL 66145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options: a combination of htmlParserOption(s) 66155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 66165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML file from the filesystem or the network. 66175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 66185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree 66195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 66205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 66215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlReadFile(const char *filename, const char *encoding, int options) 66225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 66235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserCtxtPtr ctxt; 66245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 66255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInitParser(); 66265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt = htmlCreateFileParserCtxt(filename, encoding); 66275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) 66285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 66295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (htmlDoRead(ctxt, NULL, NULL, options, 0)); 66305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 66315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 66325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 66335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlReadMemory: 66345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @buffer: a pointer to a char array 66355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @size: the size of the array 66365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL: the base URL to use for the document 66375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: the document encoding, or NULL 66385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options: a combination of htmlParserOption(s) 66395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 66405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML in-memory document and build a tree. 66415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 66425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree 66435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 66445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 66455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 66465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 66475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserCtxtPtr ctxt; 66485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 66495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInitParser(); 66505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt = xmlCreateMemoryParserCtxt(buffer, size); 66515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) 66525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 66535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlDefaultSAXHandlerInit(); 66545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt->sax != NULL) 66555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1)); 66565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (htmlDoRead(ctxt, URL, encoding, options, 0)); 66575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 66585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 66595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 66605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlReadFd: 66615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @fd: an open file descriptor 66625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL: the base URL to use for the document 66635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: the document encoding, or NULL 66645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options: a combination of htmlParserOption(s) 66655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 66665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML from a file descriptor and build a tree. 66675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 66685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree 66695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 66705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 66715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlReadFd(int fd, const char *URL, const char *encoding, int options) 66725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 66735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserCtxtPtr ctxt; 66745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputBufferPtr input; 66755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputPtr stream; 66765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 66775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (fd < 0) 66785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 66795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 66805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInitParser(); 66815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 66825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (input == NULL) 66835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 66845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt = xmlNewParserCtxt(); 66855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) { 66865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserInputBuffer(input); 66875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 66885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 66895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 66905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (stream == NULL) { 66915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserInputBuffer(input); 66925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserCtxt(ctxt); 66935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 66945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 66955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputPush(ctxt, stream); 66965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (htmlDoRead(ctxt, URL, encoding, options, 0)); 66975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 66985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 66995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 67005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlReadIO: 67015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ioread: an I/O read function 67025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ioclose: an I/O close function 67035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ioctx: an I/O handler 67045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL: the base URL to use for the document 67055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: the document encoding, or NULL 67065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options: a combination of htmlParserOption(s) 67075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 67085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML document from I/O functions and source and build a tree. 67095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 67105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree 67115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 67125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 67135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 67145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void *ioctx, const char *URL, const char *encoding, int options) 67155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 67165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlParserCtxtPtr ctxt; 67175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputBufferPtr input; 67185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputPtr stream; 67195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 67205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ioread == NULL) 67215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 67225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInitParser(); 67235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 67245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 67255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) XML_CHAR_ENCODING_NONE); 67265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (input == NULL) 67275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 67285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ctxt = htmlNewParserCtxt(); 67295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) { 67305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserInputBuffer(input); 67315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 67325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 67335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 67345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (stream == NULL) { 67355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserInputBuffer(input); 67365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserCtxt(ctxt); 67375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 67385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 67395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputPush(ctxt, stream); 67405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (htmlDoRead(ctxt, URL, encoding, options, 0)); 67415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 67425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 67435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 67445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCtxtReadDoc: 67455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 67465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @cur: a pointer to a zero terminated string 67475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL: the base URL to use for the document 67485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: the document encoding, or NULL 67495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options: a combination of htmlParserOption(s) 67505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 67515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML in-memory document and build a tree. 67525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This reuses the existing @ctxt parser context 67535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 67545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree 67555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 67565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 67575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur, 67585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *URL, const char *encoding, int options) 67595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 67605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputPtr stream; 67615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 67625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (cur == NULL) 67635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 67645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) 67655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 67665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 67675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCtxtReset(ctxt); 67685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 67695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stream = xmlNewStringInputStream(ctxt, cur); 67705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (stream == NULL) { 67715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 67725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 67735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputPush(ctxt, stream); 67745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (htmlDoRead(ctxt, URL, encoding, options, 1)); 67755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 67765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 67775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 67785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCtxtReadFile: 67795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 67805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @filename: a file or URL 67815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: the document encoding, or NULL 67825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options: a combination of htmlParserOption(s) 67835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 67845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML file from the filesystem or the network. 67855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This reuses the existing @ctxt parser context 67865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 67875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree 67885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 67895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 67905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename, 67915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding, int options) 67925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 67935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputPtr stream; 67945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 67955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (filename == NULL) 67965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 67975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) 67985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 67995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 68005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCtxtReset(ctxt); 68015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 68025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stream = xmlLoadExternalEntity(filename, NULL, ctxt); 68035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (stream == NULL) { 68045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 68055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 68065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputPush(ctxt, stream); 68075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (htmlDoRead(ctxt, NULL, encoding, options, 1)); 68085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 68095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 68105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 68115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCtxtReadMemory: 68125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 68135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @buffer: a pointer to a char array 68145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @size: the size of the array 68155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL: the base URL to use for the document 68165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: the document encoding, or NULL 68175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options: a combination of htmlParserOption(s) 68185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 68195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML in-memory document and build a tree. 68205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This reuses the existing @ctxt parser context 68215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 68225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree 68235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 68245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 68255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size, 68265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *URL, const char *encoding, int options) 68275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 68285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputBufferPtr input; 68295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputPtr stream; 68305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 68315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) 68325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 68335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (buffer == NULL) 68345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 68355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 68365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCtxtReset(ctxt); 68375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 68385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 68395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (input == NULL) { 68405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 68415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 68425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 68435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 68445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (stream == NULL) { 68455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserInputBuffer(input); 68465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return(NULL); 68475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 68485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 68495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputPush(ctxt, stream); 68505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (htmlDoRead(ctxt, URL, encoding, options, 1)); 68515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 68525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 68535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 68545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCtxtReadFd: 68555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 68565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @fd: an open file descriptor 68575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL: the base URL to use for the document 68585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: the document encoding, or NULL 68595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options: a combination of htmlParserOption(s) 68605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 68615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML from a file descriptor and build a tree. 68625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This reuses the existing @ctxt parser context 68635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 68645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree 68655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 68665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 68675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd, 68685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *URL, const char *encoding, int options) 68695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 68705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputBufferPtr input; 68715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputPtr stream; 68725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 68735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (fd < 0) 68745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 68755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) 68765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 68775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 68785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCtxtReset(ctxt); 68795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 68805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 68815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 68825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (input == NULL) 68835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 68845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 68855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (stream == NULL) { 68865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserInputBuffer(input); 68875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 68885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 68895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputPush(ctxt, stream); 68905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (htmlDoRead(ctxt, URL, encoding, options, 1)); 68915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 68925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 68935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 68945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCtxtReadIO: 68955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context 68965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ioread: an I/O read function 68975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ioclose: an I/O close function 68985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ioctx: an I/O handler 68995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL: the base URL to use for the document 69005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding: the document encoding, or NULL 69015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options: a combination of htmlParserOption(s) 69025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 69035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML document from I/O functions and source and build a tree. 69045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This reuses the existing @ctxt parser context 69055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 69065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree 69075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 69085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr 69095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 69105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlInputCloseCallback ioclose, void *ioctx, 69115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *URL, 69125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *encoding, int options) 69135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 69145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputBufferPtr input; 69155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlParserInputPtr stream; 69165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 69175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ioread == NULL) 69185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 69195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ctxt == NULL) 69205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 69215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 69225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) htmlCtxtReset(ctxt); 69235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 69245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 69255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) XML_CHAR_ENCODING_NONE); 69265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (input == NULL) 69275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 69285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 69295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (stream == NULL) { 69305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) xmlFreeParserInputBuffer(input); 69315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (NULL); 69325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 69335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inputPush(ctxt, stream); 69345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (htmlDoRead(ctxt, URL, encoding, options, 1)); 69355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 69365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 69375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define bottom_HTMLparser 69385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "elfgcchack.h" 69395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif /* LIBXML_HTML_ENABLED */ 6940