15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * HTMLparser.c : an HTML 4.0 non-verifying parser
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * See Copyright for the status of this software.
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * daniel@veillard.com
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define IN_LIBXML
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "libxml.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef LIBXML_HTML_ENABLED
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string.h>
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef HAVE_CTYPE_H
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <ctype.h>
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef HAVE_STDLIB_H
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdlib.h>
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef HAVE_SYS_STAT_H
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <sys/stat.h>
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef HAVE_FCNTL_H
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <fcntl.h>
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef HAVE_UNISTD_H
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <unistd.h>
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef HAVE_ZLIB_H
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <zlib.h>
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/xmlmemory.h>
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/tree.h>
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/parser.h>
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/parserInternals.h>
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/xmlerror.h>
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/HTMLparser.h>
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/HTMLtree.h>
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/entities.h>
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/encoding.h>
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/valid.h>
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/xmlIO.h>
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/globals.h>
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <libxml/uri.h>
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define HTML_MAX_NAMELEN 1000
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define HTML_PARSER_BIG_BUFFER_SIZE 1000
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define HTML_PARSER_BUFFER_SIZE 100
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* #define DEBUG */
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* #define DEBUG_PUSH */
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int htmlOmittedDefaultValue = 1;
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     xmlChar end, xmlChar  end2, xmlChar end3);
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void htmlParseComment(htmlParserCtxtPtr ctxt);
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *		Some factorized error routines				*
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlErrMemory:
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @extra:  extra informations
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Handle a redefinition of attribute error
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (ctxt->instate == XML_PARSER_EOF))
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt != NULL) {
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->errNo = XML_ERR_NO_MEMORY;
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->instate = XML_PARSER_EOF;
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->disableSAX = 1;
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (extra)
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        NULL, NULL, 0, 0,
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        "Memory allocation failed : %s\n", extra);
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        NULL, NULL, 0, 0, "Memory allocation failed\n");
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseErr:
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @error:  the error number
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @msg:  the error message
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @str1:  string infor
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @str2:  string infor
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Handle a fatal parser error, i.e. violating Well-Formedness constraints
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             const char *msg, const xmlChar *str1, const xmlChar *str2)
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (ctxt->instate == XML_PARSER_EOF))
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt != NULL)
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->errNo = error;
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    XML_ERR_ERROR, NULL, 0,
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (const char *) str1, (const char *) str2,
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    NULL, 0, 0,
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    msg, str1, str2);
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt != NULL)
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->wellFormed = 0;
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseErrInt:
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @error:  the error number
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @msg:  the error message
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @val:  integer info
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Handle a fatal parser error, i.e. violating Well-Formedness constraints
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             const char *msg, int val)
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (ctxt->instate == XML_PARSER_EOF))
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt != NULL)
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->errNo = error;
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    XML_ERR_ERROR, NULL, 0, NULL, NULL,
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    NULL, val, 0, msg, val);
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt != NULL)
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->wellFormed = 0;
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *	Parser stacks related functions and macros		*
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlnamePush:
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @value:  the element name
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Pushes a new element name on top of the name stack
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 in case of error, the index in the stack otherwise
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt->html < 3) && (xmlStrEqual(value, BAD_CAST "head")))
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->html = 3;
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt->html < 10) && (xmlStrEqual(value, BAD_CAST "body")))
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->html = 10;
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->nameNr >= ctxt->nameMax) {
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->nameMax *= 2;
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->nameTab = (const xmlChar * *)
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         xmlRealloc((xmlChar * *)ctxt->nameTab,
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    ctxt->nameMax *
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    sizeof(ctxt->nameTab[0]));
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (ctxt->nameTab == NULL) {
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            htmlErrMemory(ctxt, NULL);
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return (0);
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nameTab[ctxt->nameNr] = value;
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->name = value;
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (ctxt->nameNr++);
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlnamePop:
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Pops the top element name from the name stack
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the name just removed
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const xmlChar *
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlnamePop(htmlParserCtxtPtr ctxt)
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *ret;
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->nameNr <= 0)
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nameNr--;
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->nameNr < 0)
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->nameNr > 0)
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->name = NULL;
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ret = ctxt->nameTab[ctxt->nameNr];
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nameTab[ctxt->nameNr] = NULL;
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (ret);
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlNodeInfoPush:
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @value:  the node info
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Pushes a new element name on top of the node info stack
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 in case of error, the index in the stack otherwise
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlNodeInfoPush(htmlParserCtxtPtr ctxt, htmlParserNodeInfo *value)
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->nodeInfoNr >= ctxt->nodeInfoMax) {
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (ctxt->nodeInfoMax == 0)
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                ctxt->nodeInfoMax = 5;
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->nodeInfoMax *= 2;
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->nodeInfoTab = (htmlParserNodeInfo *)
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         xmlRealloc((htmlParserNodeInfo *)ctxt->nodeInfoTab,
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    ctxt->nodeInfoMax *
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    sizeof(ctxt->nodeInfoTab[0]));
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (ctxt->nodeInfoTab == NULL) {
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            htmlErrMemory(ctxt, NULL);
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return (0);
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nodeInfoTab[ctxt->nodeInfoNr] = *value;
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (ctxt->nodeInfoNr++);
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlNodeInfoPop:
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Pops the top element name from the node info stack
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 in case of error, the pointer to NodeInfo otherwise
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static htmlParserNodeInfo *
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->nodeInfoNr <= 0)
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nodeInfoNr--;
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->nodeInfoNr < 0)
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->nodeInfoNr > 0)
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr - 1];
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->nodeInfo = NULL;
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Macros for accessing the content. Those should be used only by the parser,
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * and not exported.
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Dirty macros, i.e. one need to make assumption on the context to use them
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *           in ISO-Latin or UTF-8, and the current 16 bit value if compiled
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *           in UNICODE mode. This should be used internally by the parser
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *           only to compare to ASCII values otherwise it would break when
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *           running with UTF-8 encoding.
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *           to compare on ASCII based substring.
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   UPP(n)  returns the n'th next xmlChar converted to uppercase. Same as CUR
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *           it should be used only to compare on ASCII based substring.
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *           strings without newlines within the parser.
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   CURRENT Returns the current char value, with the full decoding of
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *           UTF-8 if we are using this mode. It returns an int.
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   NEXT    Skip to the next character, this does the proper decoding
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define UPPER (toupper(*ctxt->input->cur))
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val)
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NXT(val) ctxt->input->cur[(val)]
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define UPP(val) (toupper(ctxt->input->cur[(val)]))
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CUR_PTR ctxt->input->cur
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlParserInputShrink(ctxt->input)
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define GROW if ((ctxt->progressive == 0) &&				\
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CURRENT ((int) (*ctxt->input->cur))
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define SKIP_BLANKS htmlSkipBlankChars(ctxt)
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* Inported from XML */
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CUR ((int) (*ctxt->input->cur))
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NEXT xmlNextChar(ctxt)
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NEXTL(l) do {							\
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (*(ctxt->input->cur) == '\n') {					\
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->input->line++; ctxt->input->col = 1;			\
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else ctxt->input->col++;						\
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++;		\
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } while (0)
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    \
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************/
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define COPY_BUF(l,b,i,v)						\
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (l == 1) b[i++] = (xmlChar) v;					\
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else i += xmlCopyChar(l,&b[i],v)
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlFindEncoding:
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @the HTML parser context
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Ty to find and encoding in the current data available in the input
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * buffer this is needed to try to switch to the proper encoding when
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * one face a character error.
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * That's an heuristic, since it's operating outside of parsing it could
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * try to use a meta which had been commented out, that's the reason it
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * should only be used in case of error, not as a default.
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns an encoding string or NULL if not found, the string need to
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   be freed
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static xmlChar *
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlFindEncoding(xmlParserCtxtPtr ctxt) {
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *start, *cur, *end;
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt == NULL) || (ctxt->input == NULL) ||
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (ctxt->input->encoding != NULL) || (ctxt->input->buf == NULL) ||
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (ctxt->input->buf->encoder != NULL))
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return(NULL);
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt->input->cur == NULL) || (ctxt->input->end == NULL))
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return(NULL);
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    start = ctxt->input->cur;
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    end = ctxt->input->end;
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* we also expect the input buffer to be zero terminated */
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (*end != 0)
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return(NULL);
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur = xmlStrcasestr(start, BAD_CAST "HTTP-EQUIV");
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (cur == NULL)
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return(NULL);
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur = xmlStrcasestr(cur, BAD_CAST  "CONTENT");
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (cur == NULL)
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return(NULL);
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur = xmlStrcasestr(cur, BAD_CAST  "CHARSET=");
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (cur == NULL)
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return(NULL);
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur += 8;
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    start = cur;
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (((*cur >= 'A') && (*cur <= 'Z')) ||
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           ((*cur >= 'a') && (*cur <= 'z')) ||
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           ((*cur >= '0') && (*cur <= '9')) ||
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           (*cur == '-') || (*cur == '_') || (*cur == ':') || (*cur == '/'))
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           cur++;
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (cur == start)
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return(NULL);
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(xmlStrndup(start, cur - start));
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCurrentChar:
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  the HTML parser context
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @len:  pointer to the length of the char read
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The current char value, if using UTF-8 this may actually span multiple
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * bytes in the input buffer. Implement the end of line normalization:
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 2.11 End-of-Line Handling
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * If the encoding is unspecified, in the case we find an ISO-Latin-1
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * char, then the encoding converter is plugged in automatically.
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the current char value and its length
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->instate == XML_PARSER_EOF)
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(0);
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->token != 0) {
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	*len = 0;
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(ctxt->token);
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/*
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * We are supposed to handle UTF8, check it's valid
4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * From rfc2044: encoding of the Unicode values on UTF-8:
4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 *
4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * 0000 0000-0000 007F   0xxxxxxx
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 *
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * Check for the 0x110000 limit too
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	const unsigned char *cur = ctxt->input->cur;
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	unsigned char c;
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	unsigned int val;
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	c = *cur;
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (c & 0x80) {
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (cur[1] == 0) {
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                cur = ctxt->input->cur;
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((cur[1] & 0xc0) != 0x80)
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		goto encoding_error;
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((c & 0xe0) == 0xe0) {
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (cur[2] == 0) {
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    cur = ctxt->input->cur;
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                }
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((cur[2] & 0xc0) != 0x80)
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto encoding_error;
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((c & 0xf0) == 0xf0) {
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if (cur[3] == 0) {
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        cur = ctxt->input->cur;
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    }
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if (((c & 0xf8) != 0xf0) ||
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			((cur[3] & 0xc0) != 0x80))
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			goto encoding_error;
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    /* 4-byte code */
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    *len = 4;
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    val = (cur[0] & 0x7) << 18;
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    val |= (cur[1] & 0x3f) << 12;
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    val |= (cur[2] & 0x3f) << 6;
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    val |= cur[3] & 0x3f;
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		} else {
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		  /* 3-byte code */
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    *len = 3;
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    val = (cur[0] & 0xf) << 12;
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    val |= (cur[1] & 0x3f) << 6;
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    val |= cur[2] & 0x3f;
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    } else {
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	      /* 2-byte code */
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		*len = 2;
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		val = (cur[0] & 0x1f) << 6;
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		val |= cur[1] & 0x3f;
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (!IS_CHAR(val)) {
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				"Char 0x%X out of allowed range\n", val);
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    return(val);
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if ((*ctxt->input->cur == 0) &&
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                (ctxt->input->cur < ctxt->input->end)) {
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				"Char 0x%X out of allowed range\n", 0);
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                *len = 1;
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                return(' ');
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /* 1-byte code */
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    *len = 1;
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    return((int) *ctxt->input->cur);
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Assume it's a fixed length encoding (1) with
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * a compatible encoding for the ASCII set, since
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * XML constructs only use < 128 chars
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *len = 1;
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((int) *ctxt->input->cur < 0x80)
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return((int) *ctxt->input->cur);
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Humm this is bad, do an automatic flow conversion
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlChar * guess;
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlCharEncodingHandlerPtr handler;
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        guess = htmlFindEncoding(ctxt);
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (guess == NULL) {
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else {
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (ctxt->input->encoding != NULL)
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                xmlFree((xmlChar *) ctxt->input->encoding);
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            ctxt->input->encoding = guess;
5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            handler = xmlFindCharEncodingHandler((const char *) guess);
5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (handler != NULL) {
5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                xmlSwitchToEncoding(ctxt, handler);
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            } else {
5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             "Unsupported encoding %s", guess, NULL);
5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->charset = XML_CHAR_ENCODING_UTF8;
5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(xmlCurrentChar(ctxt, len));
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)encoding_error:
5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * If we detect an UTF8 error that probably mean that the
5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * input encoding didn't get properly advertized in the
5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * declaration header. Report the error and switch the encoding
5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * to ISO-Latin-1 (if you don't like this policy, just declare the
5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * encoding !)
5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {
5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        char buffer[150];
5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (ctxt->input->end - ctxt->input->cur >= 4) {
5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    ctxt->input->cur[0], ctxt->input->cur[1],
5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    ctxt->input->cur[2], ctxt->input->cur[3]);
5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]);
5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     "Input is not proper UTF-8, indicate encoding !\n",
5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     BAD_CAST buffer, NULL);
5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->charset = XML_CHAR_ENCODING_8859_1;
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *len = 1;
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return((int) *ctxt->input->cur);
5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlSkipBlankChars:
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  the HTML parser context
5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * skip all blanks character found at that point in the input streams.
5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the number of space chars skipped
5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int
5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int res = 0;
5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (IS_BLANK_CH(*(ctxt->input->cur))) {
5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((*ctxt->input->cur == 0) &&
5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlPopInput(ctxt);
5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (*(ctxt->input->cur) == '\n') {
5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->input->line++; ctxt->input->col = 1;
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    } else ctxt->input->col++;
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->input->cur++;
5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->nbChars++;
5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (*ctxt->input->cur == 0)
5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	res++;
5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(res);
5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************
5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *	The list of HTML elements and their properties		*
5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/
5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  Start Tag: 1 means the start tag can be ommited
6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  End Tag:   1 means the end tag can be ommited
6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *             2 means it's forbidden (empty elements)
6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *             3 means the tag is stylistic and should be closed easily
6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  Depr:      this element is deprecated
6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  DTD:       1 means that this element is valid only in the Loose DTD
6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *             2 means that this element is valid only in the Frameset DTD
6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description
6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	, subElements , impliedsubelt , Attributes, userdata
6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* Definitions and a couple of vars for HTML Elements */
6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small"
6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_FONTSTYLE 8
6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_PHRASE 10
6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define SPECIAL "a", "img", "applet", "embed", "object", "font", "basefont", "br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_SPECIAL 16
6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define INLINE FONTSTYLE, PHRASE, SPECIAL, FORMCTRL
6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_INLINE NB_PCDATA + NB_FONTSTYLE + NB_PHRASE + NB_SPECIAL + NB_FORMCTRL
6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define BLOCK HEADING, LIST, "pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address"
6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_BLOCK NB_HEADING + NB_LIST + 14
6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define FORMCTRL "input", "select", "textarea", "label", "button"
6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_FORMCTRL 5
6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define PCDATA
6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_PCDATA 0
6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define HEADING "h1", "h2", "h3", "h4", "h5", "h6"
6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_HEADING 6
6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define LIST "ul", "ol", "dir", "menu"
6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_LIST 4
6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define MODIFIER
6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_MODIFIER 0
6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define FLOW BLOCK,INLINE
6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_FLOW NB_BLOCK + NB_INLINE
6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define EMPTY NULL
6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const html_flow[] = { FLOW, NULL } ;
6415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const html_inline[] = { INLINE, NULL } ;
6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* placeholders: elts with content but no subelements */
6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const html_pcdata[] = { NULL } ;
6455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define html_cdata html_pcdata
6465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* ... and for HTML Attributes */
6495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define COREATTRS "id", "class", "style", "title"
6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_COREATTRS 4
6525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define I18N "lang", "dir"
6535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_I18N 2
6545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define EVENTS "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress", "onkeydown", "onkeyup"
6555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_EVENTS 9
6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define ATTRS COREATTRS,I18N,EVENTS
6575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_ATTRS NB_NB_COREATTRS + NB_I18N + NB_EVENTS
6585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CELLHALIGN "align", "char", "charoff"
6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_CELLHALIGN 3
6605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CELLVALIGN "valign"
6615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define NB_CELLVALIGN 1
6625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const html_attrs[] = { ATTRS, NULL } ;
6645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const core_i18n_attrs[] = { COREATTRS, I18N, NULL } ;
6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const core_attrs[] = { COREATTRS, NULL } ;
6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const i18n_attrs[] = { I18N, NULL } ;
6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* Other declarations that should go inline ... */
6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const a_attrs[] = { ATTRS, "charset", "type", "name",
6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	"href", "hreflang", "rel", "rev", "accesskey", "shape", "coords",
6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	"tabindex", "onfocus", "onblur", NULL } ;
6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const target_attr[] = { "target", NULL } ;
6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const rows_cols_attr[] = { "rows", "cols", NULL } ;
6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const alt_attr[] = { "alt", NULL } ;
6765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const src_alt_attrs[] = { "src", "alt", NULL } ;
6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const href_attrs[] = { "href", NULL } ;
6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const clear_attrs[] = { "clear", NULL } ;
6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const inline_p[] = { INLINE, "p", NULL } ;
6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const flow_param[] = { FLOW, "param", NULL } ;
6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const applet_attrs[] = { COREATTRS , "codebase",
6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"archive", "alt", "name", "height", "width", "align",
6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"hspace", "vspace", NULL } ;
6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const area_attrs[] = { "shape", "coords", "href", "nohref",
6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	"tabindex", "accesskey", "onfocus", "onblur", NULL } ;
6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const basefont_attrs[] =
6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	{ "id", "size", "color", "face", NULL } ;
6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const quote_attrs[] = { ATTRS, "cite", NULL } ;
6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const body_contents[] = { FLOW, "ins", "del", NULL } ;
6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const body_attrs[] = { ATTRS, "onload", "onunload", NULL } ;
6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const body_depr[] = { "background", "bgcolor", "text",
6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	"link", "vlink", "alink", NULL } ;
6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const button_attrs[] = { ATTRS, "name", "value", "type",
6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	"disabled", "tabindex", "accesskey", "onfocus", "onblur", NULL } ;
6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const col_attrs[] = { ATTRS, "span", "width", CELLHALIGN, CELLVALIGN, NULL } ;
6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const col_elt[] = { "col", NULL } ;
7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const edit_attrs[] = { ATTRS, "datetime", "cite", NULL } ;
7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const compact_attrs[] = { ATTRS, "compact", NULL } ;
7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const dl_contents[] = { "dt", "dd", NULL } ;
7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const compact_attr[] = { "compact", NULL } ;
7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const label_attr[] = { "label", NULL } ;
7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const fieldset_contents[] = { FLOW, "legend" } ;
7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const font_attrs[] = { COREATTRS, I18N, "size", "color", "face" , NULL } ;
7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const form_contents[] = { HEADING, LIST, INLINE, "pre", "p", "div", "center", "noscript", "noframes", "blockquote", "isindex", "hr", "table", "fieldset", "address", NULL } ;
7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const form_attrs[] = { ATTRS, "method", "enctype", "accept", "name", "onsubmit", "onreset", "accept-charset", NULL } ;
7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const frame_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "noresize", "scrolling" , NULL } ;
7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const frameset_attrs[] = { COREATTRS, "rows", "cols", "onload", "onunload", NULL } ;
7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const frameset_contents[] = { "frameset", "frame", "noframes", NULL } ;
7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const head_attrs[] = { I18N, "profile", NULL } ;
7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const head_contents[] = { "title", "isindex", "base", "script", "style", "meta", "link", "object", NULL } ;
7145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const hr_depr[] = { "align", "noshade", "size", "width", NULL } ;
7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const version_attr[] = { "version", NULL } ;
7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const html_content[] = { "head", "body", "frameset", NULL } ;
7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const iframe_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "scrolling", "align", "height", "width", NULL } ;
7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const img_attrs[] = { ATTRS, "longdesc", "name", "height", "width", "usemap", "ismap", NULL } ;
7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const embed_attrs[] = { COREATTRS, "align", "alt", "border", "code", "codebase", "frameborder", "height", "hidden", "hspace", "name", "palette", "pluginspace", "pluginurl", "src", "type", "units", "vspace", "width", NULL } ;
7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const input_attrs[] = { ATTRS, "type", "name", "value", "checked", "disabled", "readonly", "size", "maxlength", "src", "alt", "usemap", "ismap", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", "accept", NULL } ;
7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const prompt_attrs[] = { COREATTRS, I18N, "prompt", NULL } ;
7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const label_attrs[] = { ATTRS, "for", "accesskey", "onfocus", "onblur", NULL } ;
7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const legend_attrs[] = { ATTRS, "accesskey", NULL } ;
7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const align_attr[] = { "align", NULL } ;
7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const link_attrs[] = { ATTRS, "charset", "href", "hreflang", "type", "rel", "rev", "media", NULL } ;
7265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const map_contents[] = { BLOCK, "area", NULL } ;
7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const name_attr[] = { "name", NULL } ;
7285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const action_attr[] = { "action", NULL } ;
7295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const blockli_elt[] = { BLOCK, "li", NULL } ;
7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", NULL } ;
7315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const content_attr[] = { "content", NULL } ;
7325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const type_attr[] = { "type", NULL } ;
7335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ;
7345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const object_contents[] = { FLOW, "param", NULL } ;
7355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const object_attrs[] = { ATTRS, "declare", "classid", "codebase", "data", "type", "codetype", "archive", "standby", "height", "width", "usemap", "name", "tabindex", NULL } ;
7365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const object_depr[] = { "align", "border", "hspace", "vspace", NULL } ;
7375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const ol_attrs[] = { "type", "compact", "start", NULL} ;
7385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const option_elt[] = { "option", NULL } ;
7395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ;
7405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selected", "value", NULL } ;
7415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const param_attrs[] = { "id", "value", "valuetype", "type", NULL } ;
7425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const width_attr[] = { "width", NULL } ;
7435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const pre_content[] = { PHRASE, "tt", "i", "b", "u", "s", "strike", "a", "br", "script", "map", "q", "span", "bdo", "iframe", NULL } ;
7445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const script_attrs[] = { "charset", "src", "defer", "event", "for", NULL } ;
7455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const language_attr[] = { "language", NULL } ;
7465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const select_content[] = { "optgroup", "option", NULL } ;
7475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const select_attrs[] = { ATTRS, "name", "size", "multiple", "disabled", "tabindex", "onfocus", "onblur", "onchange", NULL } ;
7485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const style_attrs[] = { I18N, "media", "title", NULL } ;
7495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const table_attrs[] = { ATTRS, "summary", "width", "border", "frame", "rules", "cellspacing", "cellpadding", "datapagesize", NULL } ;
7505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const table_depr[] = { "align", "bgcolor", NULL } ;
7515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const table_contents[] = { "caption", "col", "colgroup", "thead", "tfoot", "tbody", "tr", NULL} ;
7525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const tr_elt[] = { "tr", NULL } ;
7535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN, NULL} ;
7545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const th_td_depr[] = { "nowrap", "bgcolor", "width", "height", NULL } ;
7555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const th_td_attr[] = { ATTRS, "abbr", "axis", "headers", "scope", "rowspan", "colspan", CELLHALIGN, CELLVALIGN, NULL } ;
7565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const textarea_attrs[] = { ATTRS, "name", "disabled", "readonly", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", NULL } ;
7575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const tr_contents[] = { "th", "td", NULL } ;
7585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const bgcolor_attr[] = { "bgcolor", NULL } ;
7595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const li_elt[] = { "li", NULL } ;
7605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const ul_depr[] = { "type", "compact", NULL} ;
7615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char* const dir_attr[] = { "dir", NULL} ;
7625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define DECL (const char**)
7645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const htmlElemDesc
7665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)html40ElementTable[] = {
7675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "a",		0, 0, 0, 0, 0, 0, 1, "anchor ",
7685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline , NULL , DECL a_attrs , DECL target_attr, NULL
7695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
7705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "abbr",	0, 0, 0, 0, 0, 0, 1, "abbreviated form",
7715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
7725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
7735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "acronym",	0, 0, 0, 0, 0, 0, 1, "",
7745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
7755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
7765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "address",	0, 0, 0, 0, 0, 0, 0, "information on author ",
7775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL inline_p  , NULL , DECL html_attrs, NULL, NULL
7785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
7795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "applet",	0, 0, 0, 0, 1, 1, 2, "java applet ",
7805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL flow_param , NULL , NULL , DECL applet_attrs, NULL
7815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
7825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "area",	0, 2, 2, 1, 0, 0, 0, "client-side image map area ",
7835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	EMPTY ,  NULL , DECL area_attrs , DECL target_attr, DECL alt_attr
7845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
7855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "b",		0, 3, 0, 0, 0, 0, 1, "bold text style",
7865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
7875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
7885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "base",	0, 2, 2, 1, 0, 0, 0, "document base uri ",
7895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	EMPTY , NULL , NULL , DECL target_attr, DECL href_attrs
7905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
7915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "basefont",	0, 2, 2, 1, 1, 1, 1, "base font size " ,
7925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	EMPTY , NULL , NULL, DECL basefont_attrs, NULL
7935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
7945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "bdo",	0, 0, 0, 0, 0, 0, 1, "i18n bidi over-ride ",
7955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline , NULL , DECL core_i18n_attrs, NULL, DECL dir_attr
7965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
7975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "big",	0, 3, 0, 0, 0, 0, 1, "large text style",
7985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
7995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "blockquote",	0, 0, 0, 0, 0, 0, 0, "long quotation ",
8015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_flow , NULL , DECL quote_attrs , NULL, NULL
8025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "body",	1, 1, 0, 0, 0, 0, 0, "document body ",
8045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL body_contents , "div" , DECL body_attrs, DECL body_depr, NULL
8055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "br",		0, 2, 2, 1, 0, 0, 1, "forced line break ",
8075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	EMPTY , NULL , DECL core_attrs, DECL clear_attrs , NULL
8085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "button",	0, 0, 0, 0, 0, 0, 2, "push button ",
8105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_flow MODIFIER , NULL , DECL button_attrs, NULL, NULL
8115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "caption",	0, 0, 0, 0, 0, 0, 0, "table caption ",
8135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
8145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "center",	0, 3, 0, 0, 1, 1, 0, "shorthand for div align=center ",
8165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_flow , NULL , NULL, DECL html_attrs, NULL
8175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "cite",	0, 0, 0, 0, 0, 0, 1, "citation",
8195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
8205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "code",	0, 0, 0, 0, 0, 0, 1, "computer code fragment",
8225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
8235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "col",	0, 2, 2, 1, 0, 0, 0, "table column ",
8255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	EMPTY , NULL , DECL col_attrs , NULL, NULL
8265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "colgroup",	0, 1, 0, 0, 0, 0, 0, "table column group ",
8285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL col_elt , "col" , DECL col_attrs , NULL, NULL
8295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "dd",		0, 1, 0, 0, 0, 0, 0, "definition description ",
8315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_flow , NULL , DECL html_attrs, NULL, NULL
8325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "del",	0, 0, 0, 0, 0, 0, 2, "deleted text ",
8345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_flow , NULL , DECL edit_attrs , NULL, NULL
8355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "dfn",	0, 0, 0, 0, 0, 0, 1, "instance definition",
8375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
8385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "dir",	0, 0, 0, 0, 1, 1, 0, "directory list",
8405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL blockli_elt, "li" , NULL, DECL compact_attrs, NULL
8415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "div",	0, 0, 0, 0, 0, 0, 0, "generic language/style container",
8435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_flow, NULL, DECL html_attrs, DECL align_attr, NULL
8445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "dl",		0, 0, 0, 0, 0, 0, 0, "definition list ",
8465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL dl_contents , "dd" , DECL html_attrs, DECL compact_attr, NULL
8475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "dt",		0, 1, 0, 0, 0, 0, 0, "definition term ",
8495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
8505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "em",		0, 3, 0, 0, 0, 0, 1, "emphasis",
8525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
8535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "embed",	0, 1, 0, 0, 1, 1, 1, "generic embedded object ",
8555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	EMPTY, NULL, DECL embed_attrs, NULL, NULL
8565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "fieldset",	0, 0, 0, 0, 0, 0, 0, "form control group ",
8585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL fieldset_contents , NULL, DECL html_attrs, NULL, NULL
8595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "font",	0, 3, 0, 0, 1, 1, 1, "local change to font ",
8615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, NULL, DECL font_attrs, NULL
8625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "form",	0, 0, 0, 0, 0, 0, 0, "interactive form ",
8645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL form_contents, "fieldset", DECL form_attrs , DECL target_attr, DECL action_attr
8655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "frame",	0, 2, 2, 1, 0, 2, 0, "subwindow " ,
8675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	EMPTY, NULL, NULL, DECL frame_attrs, NULL
8685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "frameset",	0, 0, 0, 0, 0, 2, 0, "window subdivision" ,
8705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL frameset_contents, "noframes" , NULL , DECL frameset_attrs, NULL
8715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "h1",		0, 0, 0, 0, 0, 0, 0, "heading ",
8735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
8745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "h2",		0, 0, 0, 0, 0, 0, 0, "heading ",
8765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
8775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "h3",		0, 0, 0, 0, 0, 0, 0, "heading ",
8795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
8805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "h4",		0, 0, 0, 0, 0, 0, 0, "heading ",
8825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
8835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "h5",		0, 0, 0, 0, 0, 0, 0, "heading ",
8855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
8865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "h6",		0, 0, 0, 0, 0, 0, 0, "heading ",
8885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
8895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "head",	1, 1, 0, 0, 0, 0, 0, "document head ",
8915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL head_contents, NULL, DECL head_attrs, NULL, NULL
8925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "hr",		0, 2, 2, 1, 0, 0, 0, "horizontal rule " ,
8945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	EMPTY, NULL, DECL html_attrs, DECL hr_depr, NULL
8955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "html",	1, 1, 0, 0, 0, 0, 0, "document root element ",
8975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_content , NULL , DECL i18n_attrs, DECL version_attr, NULL
8985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
8995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "i",		0, 3, 0, 0, 0, 0, 1, "italic text style",
9005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
9015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "iframe",	0, 0, 0, 0, 0, 1, 2, "inline subwindow ",
9035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_flow, NULL, NULL, DECL iframe_attrs, NULL
9045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "img",	0, 2, 2, 1, 0, 0, 1, "embedded image ",
9065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	EMPTY, NULL, DECL img_attrs, DECL align_attr, DECL src_alt_attrs
9075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "input",	0, 2, 2, 1, 0, 0, 1, "form control ",
9095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	EMPTY, NULL, DECL input_attrs , DECL align_attr, NULL
9105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "ins",	0, 0, 0, 0, 0, 0, 2, "inserted text",
9125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_flow, NULL, DECL edit_attrs, NULL, NULL
9135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "isindex",	0, 2, 2, 1, 1, 1, 0, "single line prompt ",
9155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	EMPTY, NULL, NULL, DECL prompt_attrs, NULL
9165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "kbd",	0, 0, 0, 0, 0, 0, 1, "text to be entered by the user",
9185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
9195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "label",	0, 0, 0, 0, 0, 0, 1, "form field label text ",
9215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline MODIFIER, NULL, DECL label_attrs , NULL, NULL
9225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "legend",	0, 0, 0, 0, 0, 0, 0, "fieldset legend ",
9245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL legend_attrs , DECL align_attr, NULL
9255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "li",		0, 1, 1, 0, 0, 0, 0, "list item ",
9275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_flow, NULL, DECL html_attrs, NULL, NULL
9285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "link",	0, 2, 2, 1, 0, 0, 0, "a media-independent link ",
9305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	EMPTY, NULL, DECL link_attrs, DECL target_attr, NULL
9315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "map",	0, 0, 0, 0, 0, 0, 2, "client-side image map ",
9335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL map_contents , NULL, DECL html_attrs , NULL, DECL name_attr
9345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "menu",	0, 0, 0, 0, 1, 1, 0, "menu list ",
9365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL blockli_elt , NULL, NULL, DECL compact_attrs, NULL
9375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "meta",	0, 2, 2, 1, 0, 0, 0, "generic metainformation ",
9395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	EMPTY, NULL, DECL meta_attrs , NULL , DECL content_attr
9405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "noframes",	0, 0, 0, 0, 0, 2, 0, "alternate content container for non frame-based rendering ",
9425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL noframes_content, "body" , DECL html_attrs, NULL, NULL
9435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "noscript",	0, 0, 0, 0, 0, 0, 0, "alternate content container for non script-based rendering ",
9455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_flow, "div", DECL html_attrs, NULL, NULL
9465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "object",	0, 0, 0, 0, 0, 0, 2, "generic embedded object ",
9485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL object_contents , "div" , DECL object_attrs, DECL object_depr, NULL
9495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "ol",		0, 0, 0, 0, 0, 0, 0, "ordered list ",
9515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL li_elt , "li" , DECL html_attrs, DECL ol_attrs, NULL
9525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "optgroup",	0, 0, 0, 0, 0, 0, 0, "option group ",
9545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL option_elt , "option", DECL optgroup_attrs, NULL, DECL label_attr
9555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "option",	0, 1, 0, 0, 0, 0, 0, "selectable choice " ,
9575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_pcdata, NULL, DECL option_attrs, NULL, NULL
9585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "p",		0, 1, 0, 0, 0, 0, 0, "paragraph ",
9605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
9615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "param",	0, 2, 2, 1, 0, 0, 0, "named property value ",
9635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	EMPTY, NULL, DECL param_attrs, NULL, DECL name_attr
9645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "pre",	0, 0, 0, 0, 0, 0, 0, "preformatted text ",
9665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL pre_content, NULL, DECL html_attrs, DECL width_attr, NULL
9675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "q",		0, 0, 0, 0, 0, 0, 1, "short inline quotation ",
9695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL quote_attrs, NULL, NULL
9705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "s",		0, 3, 0, 0, 1, 1, 1, "strike-through text style",
9725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, NULL, DECL html_attrs, NULL
9735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "samp",	0, 0, 0, 0, 0, 0, 1, "sample program output, scripts, etc.",
9755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
9765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "script",	0, 0, 0, 0, 0, 0, 2, "script statements ",
9785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_cdata, NULL, DECL script_attrs, DECL language_attr, DECL type_attr
9795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "select",	0, 0, 0, 0, 0, 0, 1, "option selector ",
9815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL select_content, NULL, DECL select_attrs, NULL, NULL
9825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "small",	0, 3, 0, 0, 0, 0, 1, "small text style",
9845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
9855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "span",	0, 0, 0, 0, 0, 0, 1, "generic language/style container ",
9875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
9885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "strike",	0, 3, 0, 0, 1, 1, 1, "strike-through text",
9905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, NULL, DECL html_attrs, NULL
9915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "strong",	0, 3, 0, 0, 0, 0, 1, "strong emphasis",
9935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
9945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "style",	0, 0, 0, 0, 0, 0, 0, "style info ",
9965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_cdata, NULL, DECL style_attrs, NULL, DECL type_attr
9975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
9985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "sub",	0, 3, 0, 0, 0, 0, 1, "subscript",
9995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
10005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
10015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "sup",	0, 3, 0, 0, 0, 0, 1, "superscript ",
10025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
10035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
10045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "table",	0, 0, 0, 0, 0, 0, 0, "",
10055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL table_contents , "tr" , DECL table_attrs , DECL table_depr, NULL
10065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
10075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "tbody",	1, 0, 0, 0, 0, 0, 0, "table body ",
10085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL
10095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
10105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "td",		0, 0, 0, 0, 0, 0, 0, "table data cell",
10115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL
10125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
10135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "textarea",	0, 0, 0, 0, 0, 0, 1, "multi-line text field ",
10145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_pcdata, NULL, DECL textarea_attrs, NULL, DECL rows_cols_attr
10155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
10165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "tfoot",	0, 1, 0, 0, 0, 0, 0, "table footer ",
10175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL
10185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
10195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "th",		0, 1, 0, 0, 0, 0, 0, "table header cell",
10205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL
10215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
10225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "thead",	0, 1, 0, 0, 0, 0, 0, "table header ",
10235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL
10245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
10255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "title",	0, 0, 0, 0, 0, 0, 0, "document title ",
10265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_pcdata, NULL, DECL i18n_attrs, NULL, NULL
10275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
10285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "tr",		0, 0, 0, 0, 0, 0, 0, "table row ",
10295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL tr_contents , "td" , DECL talign_attrs, DECL bgcolor_attr, NULL
10305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
10315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "tt",		0, 3, 0, 0, 0, 0, 1, "teletype or monospaced text style",
10325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
10335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
10345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "u",		0, 3, 0, 0, 1, 1, 1, "underlined text style",
10355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, NULL, DECL html_attrs, NULL
10365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
10375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "ul",		0, 0, 0, 0, 0, 0, 0, "unordered list ",
10385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL li_elt , "li" , DECL html_attrs, DECL ul_depr, NULL
10395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)},
10405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ "var",	0, 0, 0, 0, 0, 0, 1, "instance of a variable or program argument",
10415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
10425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
10435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
10445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
10465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * start tags that imply the end of current element
10475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
10485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char * const htmlStartClose[] = {
10495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"form",		"form", "p", "hr", "h1", "h2", "h3", "h4", "h5", "h6",
10505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"dl", "ul", "ol", "menu", "dir", "address", "pre",
10515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"listing", "xmp", "head", NULL,
10525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"head",		"p", NULL,
10535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"title",	"p", NULL,
10545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"body",		"head", "style", "link", "title", "p", NULL,
10555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"frameset",	"head", "style", "link", "title", "p", NULL,
10565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"li",		"p", "h1", "h2", "h3", "h4", "h5", "h6", "dl", "address",
10575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"pre", "listing", "xmp", "head", "li", NULL,
10585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"hr",		"p", "head", NULL,
10595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"h1",		"p", "head", NULL,
10605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"h2",		"p", "head", NULL,
10615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"h3",		"p", "head", NULL,
10625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"h4",		"p", "head", NULL,
10635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"h5",		"p", "head", NULL,
10645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"h6",		"p", "head", NULL,
10655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"dir",		"p", "head", NULL,
10665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"address",	"p", "head", "ul", NULL,
10675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"pre",		"p", "head", "ul", NULL,
10685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"listing",	"p", "head", NULL,
10695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"xmp",		"p", "head", NULL,
10705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"blockquote",	"p", "head", NULL,
10715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"dl",		"p", "dt", "menu", "dir", "address", "pre", "listing",
10725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"xmp", "head", NULL,
10735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"dt",		"p", "menu", "dir", "address", "pre", "listing", "xmp",
10745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                "head", "dd", NULL,
10755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"dd",		"p", "menu", "dir", "address", "pre", "listing", "xmp",
10765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                "head", "dt", NULL,
10775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"ul",		"p", "head", "ol", "menu", "dir", "address", "pre",
10785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"listing", "xmp", NULL,
10795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"ol",		"p", "head", "ul", NULL,
10805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"menu",		"p", "head", "ul", NULL,
10815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"p",		"p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL,
10825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"div",		"p", "head", NULL,
10835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"noscript",	"p", "head", NULL,
10845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"center",	"font", "b", "i", "p", "head", NULL,
10855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"a",		"a", NULL,
10865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"caption",	"p", NULL,
10875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"colgroup",	"caption", "colgroup", "col", "p", NULL,
10885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"col",		"caption", "col", "p", NULL,
10895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"table",	"p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre",
10905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"listing", "xmp", "a", NULL,
10915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"th",		"th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,
10925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"td",		"th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,
10935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"tr",		"th", "td", "tr", "caption", "col", "colgroup", "p", NULL,
10945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"thead",	"caption", "col", "colgroup", NULL,
10955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"tfoot",	"th", "td", "tr", "caption", "col", "colgroup", "thead",
10965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"tbody", "p", NULL,
10975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"tbody",	"th", "td", "tr", "caption", "col", "colgroup", "thead",
10985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"tfoot", "tbody", "p", NULL,
10995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"optgroup",	"option", NULL,
11005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"option",	"option", NULL,
11015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"fieldset",	"legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",
11025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		"pre", "listing", "xmp", "a", NULL,
11035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)NULL
11045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
11055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
11075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The list of HTML elements which are supposed not to have
11085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * CDATA content and where a p element will be implied
11095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
11105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * TODO: extend that list by reading the HTML SGML DTD on
11115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *       implied paragraph
11125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
11135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char *const htmlNoContentElements[] = {
11145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "html",
11155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "head",
11165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NULL
11175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
11185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
11205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The list of HTML attributes which are of content %Script;
11215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * NOTE: when adding ones, check htmlIsScriptAttribute() since
11225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *       it assumes the name starts with 'on'
11235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
11245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char *const htmlScriptAttributes[] = {
11255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onclick",
11265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "ondblclick",
11275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onmousedown",
11285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onmouseup",
11295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onmouseover",
11305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onmousemove",
11315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onmouseout",
11325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onkeypress",
11335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onkeydown",
11345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onkeyup",
11355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onload",
11365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onunload",
11375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onfocus",
11385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onblur",
11395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onsubmit",
11405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onrest",
11415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onchange",
11425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "onselect"
11435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
11445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
11465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This table is used by the htmlparser to know what to do with
11475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * broken html pages. By assigning different priorities to different
11485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * elements the parser can decide how to handle extra endtags.
11495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Endtags are only allowed to close elements with lower or equal
11505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * priority.
11515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
11525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef struct {
11545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const char *name;
11555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int priority;
11565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} elementPriority;
11575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const elementPriority htmlEndPriority[] = {
11595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {"div",   150},
11605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {"td",    160},
11615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {"th",    160},
11625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {"tr",    170},
11635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {"thead", 180},
11645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {"tbody", 180},
11655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {"tfoot", 180},
11665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {"table", 190},
11675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {"head",  200},
11685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {"body",  200},
11695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {"html",  220},
11705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    {NULL,    100} /* Default priority */
11715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
11725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char** htmlStartCloseIndex[100];
11745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int htmlStartCloseIndexinitialized = 0;
11755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************
11775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
11785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *	functions to handle HTML specific data			*
11795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
11805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/
11815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
11835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlInitAutoClose:
11845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
11855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Initialize the htmlStartCloseIndex for fast lookup of closing tags names.
11865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This is not reentrant. Call xmlInitParser() once before processing in
11875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * case of use in multithreaded programs.
11885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
11895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void
11905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlInitAutoClose(void) {
11915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int indx, i = 0;
11925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (htmlStartCloseIndexinitialized) return;
11945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (indx = 0;indx < 100;indx ++) htmlStartCloseIndex[indx] = NULL;
11965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    indx = 0;
11975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while ((htmlStartClose[i] != NULL) && (indx < 100 - 1)) {
11985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlStartCloseIndex[indx++] = (const char**) &htmlStartClose[i];
11995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	while (htmlStartClose[i] != NULL) i++;
12005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	i++;
12015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
12025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlStartCloseIndexinitialized = 1;
12035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
12045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
12065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlTagLookup:
12075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @tag:  The tag name in lowercase
12085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
12095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Lookup the HTML tag in the ElementTable
12105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
12115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the related htmlElemDescPtr or NULL if not found.
12125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
12135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const htmlElemDesc *
12145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlTagLookup(const xmlChar *tag) {
12155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unsigned int i;
12165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (i = 0; i < (sizeof(html40ElementTable) /
12185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     sizeof(html40ElementTable[0]));i++) {
12195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name))
12205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    return((htmlElemDescPtr) &html40ElementTable[i]);
12215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
12225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(NULL);
12235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
12245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
12265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlGetEndPriority:
12275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @name: The name of the element to look up the priority for.
12285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
12295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Return value: The "endtag" priority.
12305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) **/
12315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int
12325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlGetEndPriority (const xmlChar *name) {
12335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int i = 0;
12345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while ((htmlEndPriority[i].name != NULL) &&
12365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	   (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name)))
12375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	i++;
12385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(htmlEndPriority[i].priority);
12405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
12415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
12445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCheckAutoClose:
12455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @newtag:  The new tag name
12465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @oldtag:  The old tag name
12475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
12485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Checks whether the new tag is one of the registered valid tags for
12495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * closing old.
12505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Initialize the htmlStartCloseIndex for fast lookup of closing tags names.
12515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
12525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 if no, 1 if yes.
12535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
12545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int
12555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCheckAutoClose(const xmlChar * newtag, const xmlChar * oldtag)
12565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
12575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int i, indx;
12585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const char **closed = NULL;
12595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (htmlStartCloseIndexinitialized == 0)
12615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlInitAutoClose();
12625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* inefficient, but not a big deal */
12645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (indx = 0; indx < 100; indx++) {
12655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        closed = htmlStartCloseIndex[indx];
12665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (closed == NULL)
12675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return (0);
12685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (xmlStrEqual(BAD_CAST * closed, newtag))
12695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            break;
12705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
12715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    i = closed - htmlStartClose;
12735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    i++;
12745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (htmlStartClose[i] != NULL) {
12755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (xmlStrEqual(BAD_CAST htmlStartClose[i], oldtag)) {
12765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return (1);
12775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
12785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        i++;
12795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
12805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (0);
12815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
12825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
12845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlAutoCloseOnClose:
12855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
12865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @newtag:  The new tag name
12875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @force:  force the tag closure
12885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
12895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The HTML DTD allows an ending tag to implicitly close other tags.
12905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
12915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
12925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
12935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
12945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const htmlElemDesc *info;
12955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int i, priority;
12965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    priority = htmlGetEndPriority(newtag);
12985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
12995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (i = (ctxt->nameNr - 1); i >= 0; i--) {
13005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
13015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (xmlStrEqual(newtag, ctxt->nameTab[i]))
13025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            break;
13035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        /*
13045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         * A missplaced endtag can only close elements with lower
13055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         * or equal priority, so if we find an element with higher
13065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         * priority before we find an element with
13075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         * matching name, we just ignore this endtag
13085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         */
13095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (htmlGetEndPriority(ctxt->nameTab[i]) > priority)
13105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return;
13115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
13125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (i < 0)
13135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return;
13145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
13155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (!xmlStrEqual(newtag, ctxt->name)) {
13165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        info = htmlTagLookup(ctxt->name);
13175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((info != NULL) && (info->endTag == 3)) {
13185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
13195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	                 "Opening and ending tag mismatch: %s and %s\n",
13205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 newtag, ctxt->name);
13215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
13225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
13235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            ctxt->sax->endElement(ctxt->userData, ctxt->name);
13245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlnamePop(ctxt);
13255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
13265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
13275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
13285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
13295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlAutoCloseOnEnd:
13305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
13315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
13325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Close all remaining tags at the end of the stream
13335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
13345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
13355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)
13365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
13375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int i;
13385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
13395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->nameNr == 0)
13405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return;
13415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (i = (ctxt->nameNr - 1); i >= 0; i--) {
13425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
13435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            ctxt->sax->endElement(ctxt->userData, ctxt->name);
13445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlnamePop(ctxt);
13455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
13465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
13475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
13485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
13495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlAutoClose:
13505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
13515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @newtag:  The new tag name or NULL
13525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
13535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The HTML DTD allows a tag to implicitly close other tags.
13545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The list is kept in htmlStartClose array. This function is
13555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * called when a new tag has been detected and generates the
13565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * appropriates closes if possible/needed.
13575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * If newtag is NULL this mean we are at the end of the resource
13585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * and we should check
13595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
13605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
13615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
13625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
13635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while ((newtag != NULL) && (ctxt->name != NULL) &&
13645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           (htmlCheckAutoClose(newtag, ctxt->name))) {
13655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
13665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            ctxt->sax->endElement(ctxt->userData, ctxt->name);
13675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlnamePop(ctxt);
13685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
13695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (newtag == NULL) {
13705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlAutoCloseOnEnd(ctxt);
13715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return;
13725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
13735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while ((newtag == NULL) && (ctxt->name != NULL) &&
13745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           ((xmlStrEqual(ctxt->name, BAD_CAST "head")) ||
13755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            (xmlStrEqual(ctxt->name, BAD_CAST "body")) ||
13765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            (xmlStrEqual(ctxt->name, BAD_CAST "html")))) {
13775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
13785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            ctxt->sax->endElement(ctxt->userData, ctxt->name);
13795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlnamePop(ctxt);
13805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
13815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
13825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
13835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
13845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlAutoCloseTag:
13855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @doc:  the HTML document
13865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @name:  The tag name
13875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @elem:  the HTML element
13885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
13895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The HTML DTD allows a tag to implicitly close other tags.
13905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The list is kept in htmlStartClose array. This function checks
13915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * if the element or one of it's children would autoclose the
13925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * given tag.
13935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
13945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 1 if autoclose, 0 otherwise
13955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
13965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int
13975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) {
13985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlNodePtr child;
13995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
14005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (elem == NULL) return(1);
14015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (xmlStrEqual(name, elem->name)) return(0);
14025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (htmlCheckAutoClose(elem->name, name)) return(1);
14035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    child = elem->children;
14045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (child != NULL) {
14055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (htmlAutoCloseTag(doc, name, child)) return(1);
14065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	child = child->next;
14075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
14085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(0);
14095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
14105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
14115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
14125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlIsAutoClosed:
14135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @doc:  the HTML document
14145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @elem:  the HTML element
14155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
14165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The HTML DTD allows a tag to implicitly close other tags.
14175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The list is kept in htmlStartClose array. This function checks
14185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * if a tag is autoclosed by one of it's child
14195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
14205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 1 if autoclosed, 0 otherwise
14215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
14225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int
14235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) {
14245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlNodePtr child;
14255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
14265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (elem == NULL) return(1);
14275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    child = elem->children;
14285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (child != NULL) {
14295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (htmlAutoCloseTag(doc, elem->name, child)) return(1);
14305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	child = child->next;
14315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
14325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(0);
14335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
14345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
14355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
14365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCheckImplied:
14375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
14385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @newtag:  The new tag name
14395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
14405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The HTML DTD allows a tag to exists only implicitly
14415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * called when a new tag has been detected and generates the
14425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * appropriates implicit tags if missing
14435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
14445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
14455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
14465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int i;
14475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
14485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->options & HTML_PARSE_NOIMPLIED)
14495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return;
14505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!htmlOmittedDefaultValue)
14515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
14525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (xmlStrEqual(newtag, BAD_CAST"html"))
14535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
14545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->nameNr <= 0) {
14555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlnamePush(ctxt, BAD_CAST"html");
14565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
14575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
14585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
14595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"head")))
14605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return;
14615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt->nameNr <= 1) &&
14625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ((xmlStrEqual(newtag, BAD_CAST"script")) ||
14635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 (xmlStrEqual(newtag, BAD_CAST"style")) ||
14645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 (xmlStrEqual(newtag, BAD_CAST"meta")) ||
14655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 (xmlStrEqual(newtag, BAD_CAST"link")) ||
14665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 (xmlStrEqual(newtag, BAD_CAST"title")) ||
14675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 (xmlStrEqual(newtag, BAD_CAST"base")))) {
14685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (ctxt->html >= 3) {
14695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            /* we already saw or generated an <head> before */
14705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return;
14715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
14725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        /*
14735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         * dropped OBJECT ... i you put it first BODY will be
14745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         * assumed !
14755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         */
14765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlnamePush(ctxt, BAD_CAST"head");
14775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
14785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
14795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) &&
14805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	       (!xmlStrEqual(newtag, BAD_CAST"frame")) &&
14815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	       (!xmlStrEqual(newtag, BAD_CAST"frameset"))) {
14825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (ctxt->html >= 10) {
14835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            /* we already saw or generated a <body> before */
14845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return;
14855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
14865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for (i = 0;i < ctxt->nameNr;i++) {
14875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) {
14885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return;
14895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
14905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) {
14915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return;
14925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
14935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
14945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
14955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlnamePush(ctxt, BAD_CAST"body");
14965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
14975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
14985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
14995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
15005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
15015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
15025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCheckParagraph
15035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
15045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
15055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check whether a p element need to be implied before inserting
15065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * characters in the current element.
15075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
15085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 1 if a paragraph has been inserted, 0 if not and -1
15095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *         in case of error.
15105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
15115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
15125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int
15135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
15145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *tag;
15155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int i;
15165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
15175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL)
15185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(-1);
15195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tag = ctxt->name;
15205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (tag == NULL) {
15215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlAutoClose(ctxt, BAD_CAST"p");
15225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlCheckImplied(ctxt, BAD_CAST"p");
15235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlnamePush(ctxt, BAD_CAST"p");
15245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
15255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
15265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(1);
15275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
15285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!htmlOmittedDefaultValue)
15295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(0);
15305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (i = 0; htmlNoContentElements[i] != NULL; i++) {
15315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (xmlStrEqual(tag, BAD_CAST htmlNoContentElements[i])) {
15325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlAutoClose(ctxt, BAD_CAST"p");
15335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlCheckImplied(ctxt, BAD_CAST"p");
15345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlnamePush(ctxt, BAD_CAST"p");
15355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
15365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
15375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    return(1);
15385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
15395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
15405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(0);
15415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
15425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
15435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
15445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlIsScriptAttribute:
15455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @name:  an attribute name
15465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
15475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Check if an attribute is of content type Script
15485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
15495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 1 is the attribute is a script 0 otherwise
15505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
15515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int
15525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlIsScriptAttribute(const xmlChar *name) {
15535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unsigned int i;
15545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
15555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (name == NULL)
15565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return(0);
15575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
15585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * all script attributes start with 'on'
15595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
15605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((name[0] != 'o') || (name[1] != 'n'))
15615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return(0);
15625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (i = 0;
15635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]);
15645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 i++) {
15655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i]))
15665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    return(1);
15675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
15685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(0);
15695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
15705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
15715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************
15725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
15735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *	The list of HTML predefined entities			*
15745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
15755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/
15765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
15775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
15785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const htmlEntityDesc  html40EntitiesTable[] = {
15795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
15805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * the 4 absolute ones, plus apostrophe.
15815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
15825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 34,	"quot",	"quotation mark = APL quote, U+0022 ISOnum" },
15835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 38,	"amp",	"ampersand, U+0026 ISOnum" },
15845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 39,	"apos",	"single quote" },
15855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 60,	"lt",	"less-than sign, U+003C ISOnum" },
15865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 62,	"gt",	"greater-than sign, U+003E ISOnum" },
15875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
15885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
15895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * A bunch still in the 128-255 range
15905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Replacing them depend really on the charset used.
15915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
15925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 160,	"nbsp",	"no-break space = non-breaking space, U+00A0 ISOnum" },
15935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 161,	"iexcl","inverted exclamation mark, U+00A1 ISOnum" },
15945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 162,	"cent",	"cent sign, U+00A2 ISOnum" },
15955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 163,	"pound","pound sign, U+00A3 ISOnum" },
15965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 164,	"curren","currency sign, U+00A4 ISOnum" },
15975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 165,	"yen",	"yen sign = yuan sign, U+00A5 ISOnum" },
15985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 166,	"brvbar","broken bar = broken vertical bar, U+00A6 ISOnum" },
15995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 167,	"sect",	"section sign, U+00A7 ISOnum" },
16005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 168,	"uml",	"diaeresis = spacing diaeresis, U+00A8 ISOdia" },
16015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 169,	"copy",	"copyright sign, U+00A9 ISOnum" },
16025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 170,	"ordf",	"feminine ordinal indicator, U+00AA ISOnum" },
16035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 171,	"laquo","left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum" },
16045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 172,	"not",	"not sign, U+00AC ISOnum" },
16055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 173,	"shy",	"soft hyphen = discretionary hyphen, U+00AD ISOnum" },
16065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 174,	"reg",	"registered sign = registered trade mark sign, U+00AE ISOnum" },
16075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 175,	"macr",	"macron = spacing macron = overline = APL overbar, U+00AF ISOdia" },
16085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 176,	"deg",	"degree sign, U+00B0 ISOnum" },
16095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 177,	"plusmn","plus-minus sign = plus-or-minus sign, U+00B1 ISOnum" },
16105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 178,	"sup2",	"superscript two = superscript digit two = squared, U+00B2 ISOnum" },
16115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 179,	"sup3",	"superscript three = superscript digit three = cubed, U+00B3 ISOnum" },
16125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 180,	"acute","acute accent = spacing acute, U+00B4 ISOdia" },
16135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 181,	"micro","micro sign, U+00B5 ISOnum" },
16145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 182,	"para",	"pilcrow sign = paragraph sign, U+00B6 ISOnum" },
16155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 183,	"middot","middle dot = Georgian comma Greek middle dot, U+00B7 ISOnum" },
16165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 184,	"cedil","cedilla = spacing cedilla, U+00B8 ISOdia" },
16175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 185,	"sup1",	"superscript one = superscript digit one, U+00B9 ISOnum" },
16185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 186,	"ordm",	"masculine ordinal indicator, U+00BA ISOnum" },
16195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 187,	"raquo","right-pointing double angle quotation mark right pointing guillemet, U+00BB ISOnum" },
16205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 188,	"frac14","vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum" },
16215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 189,	"frac12","vulgar fraction one half = fraction one half, U+00BD ISOnum" },
16225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 190,	"frac34","vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum" },
16235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 191,	"iquest","inverted question mark = turned question mark, U+00BF ISOnum" },
16245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 192,	"Agrave","latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1" },
16255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 193,	"Aacute","latin capital letter A with acute, U+00C1 ISOlat1" },
16265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 194,	"Acirc","latin capital letter A with circumflex, U+00C2 ISOlat1" },
16275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 195,	"Atilde","latin capital letter A with tilde, U+00C3 ISOlat1" },
16285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 196,	"Auml",	"latin capital letter A with diaeresis, U+00C4 ISOlat1" },
16295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 197,	"Aring","latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1" },
16305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 198,	"AElig","latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1" },
16315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 199,	"Ccedil","latin capital letter C with cedilla, U+00C7 ISOlat1" },
16325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 200,	"Egrave","latin capital letter E with grave, U+00C8 ISOlat1" },
16335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 201,	"Eacute","latin capital letter E with acute, U+00C9 ISOlat1" },
16345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 202,	"Ecirc","latin capital letter E with circumflex, U+00CA ISOlat1" },
16355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 203,	"Euml",	"latin capital letter E with diaeresis, U+00CB ISOlat1" },
16365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 204,	"Igrave","latin capital letter I with grave, U+00CC ISOlat1" },
16375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 205,	"Iacute","latin capital letter I with acute, U+00CD ISOlat1" },
16385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 206,	"Icirc","latin capital letter I with circumflex, U+00CE ISOlat1" },
16395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 207,	"Iuml",	"latin capital letter I with diaeresis, U+00CF ISOlat1" },
16405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 208,	"ETH",	"latin capital letter ETH, U+00D0 ISOlat1" },
16415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 209,	"Ntilde","latin capital letter N with tilde, U+00D1 ISOlat1" },
16425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 210,	"Ograve","latin capital letter O with grave, U+00D2 ISOlat1" },
16435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 211,	"Oacute","latin capital letter O with acute, U+00D3 ISOlat1" },
16445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 212,	"Ocirc","latin capital letter O with circumflex, U+00D4 ISOlat1" },
16455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 213,	"Otilde","latin capital letter O with tilde, U+00D5 ISOlat1" },
16465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 214,	"Ouml",	"latin capital letter O with diaeresis, U+00D6 ISOlat1" },
16475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 215,	"times","multiplication sign, U+00D7 ISOnum" },
16485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 216,	"Oslash","latin capital letter O with stroke latin capital letter O slash, U+00D8 ISOlat1" },
16495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 217,	"Ugrave","latin capital letter U with grave, U+00D9 ISOlat1" },
16505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 218,	"Uacute","latin capital letter U with acute, U+00DA ISOlat1" },
16515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 219,	"Ucirc","latin capital letter U with circumflex, U+00DB ISOlat1" },
16525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 220,	"Uuml",	"latin capital letter U with diaeresis, U+00DC ISOlat1" },
16535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 221,	"Yacute","latin capital letter Y with acute, U+00DD ISOlat1" },
16545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 222,	"THORN","latin capital letter THORN, U+00DE ISOlat1" },
16555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 223,	"szlig","latin small letter sharp s = ess-zed, U+00DF ISOlat1" },
16565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 224,	"agrave","latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1" },
16575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 225,	"aacute","latin small letter a with acute, U+00E1 ISOlat1" },
16585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 226,	"acirc","latin small letter a with circumflex, U+00E2 ISOlat1" },
16595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 227,	"atilde","latin small letter a with tilde, U+00E3 ISOlat1" },
16605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 228,	"auml",	"latin small letter a with diaeresis, U+00E4 ISOlat1" },
16615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 229,	"aring","latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1" },
16625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 230,	"aelig","latin small letter ae = latin small ligature ae, U+00E6 ISOlat1" },
16635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 231,	"ccedil","latin small letter c with cedilla, U+00E7 ISOlat1" },
16645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 232,	"egrave","latin small letter e with grave, U+00E8 ISOlat1" },
16655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 233,	"eacute","latin small letter e with acute, U+00E9 ISOlat1" },
16665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 234,	"ecirc","latin small letter e with circumflex, U+00EA ISOlat1" },
16675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 235,	"euml",	"latin small letter e with diaeresis, U+00EB ISOlat1" },
16685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 236,	"igrave","latin small letter i with grave, U+00EC ISOlat1" },
16695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 237,	"iacute","latin small letter i with acute, U+00ED ISOlat1" },
16705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 238,	"icirc","latin small letter i with circumflex, U+00EE ISOlat1" },
16715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 239,	"iuml",	"latin small letter i with diaeresis, U+00EF ISOlat1" },
16725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 240,	"eth",	"latin small letter eth, U+00F0 ISOlat1" },
16735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 241,	"ntilde","latin small letter n with tilde, U+00F1 ISOlat1" },
16745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 242,	"ograve","latin small letter o with grave, U+00F2 ISOlat1" },
16755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 243,	"oacute","latin small letter o with acute, U+00F3 ISOlat1" },
16765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 244,	"ocirc","latin small letter o with circumflex, U+00F4 ISOlat1" },
16775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 245,	"otilde","latin small letter o with tilde, U+00F5 ISOlat1" },
16785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 246,	"ouml",	"latin small letter o with diaeresis, U+00F6 ISOlat1" },
16795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 247,	"divide","division sign, U+00F7 ISOnum" },
16805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 248,	"oslash","latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1" },
16815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 249,	"ugrave","latin small letter u with grave, U+00F9 ISOlat1" },
16825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 250,	"uacute","latin small letter u with acute, U+00FA ISOlat1" },
16835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 251,	"ucirc","latin small letter u with circumflex, U+00FB ISOlat1" },
16845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 252,	"uuml",	"latin small letter u with diaeresis, U+00FC ISOlat1" },
16855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 253,	"yacute","latin small letter y with acute, U+00FD ISOlat1" },
16865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 254,	"thorn","latin small letter thorn with, U+00FE ISOlat1" },
16875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 255,	"yuml",	"latin small letter y with diaeresis, U+00FF ISOlat1" },
16885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
16895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 338,	"OElig","latin capital ligature OE, U+0152 ISOlat2" },
16905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 339,	"oelig","latin small ligature oe, U+0153 ISOlat2" },
16915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 352,	"Scaron","latin capital letter S with caron, U+0160 ISOlat2" },
16925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 353,	"scaron","latin small letter s with caron, U+0161 ISOlat2" },
16935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 376,	"Yuml",	"latin capital letter Y with diaeresis, U+0178 ISOlat2" },
16945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
16955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
16965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Anything below should really be kept as entities references
16975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
16985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 402,	"fnof",	"latin small f with hook = function = florin, U+0192 ISOtech" },
16995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
17005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 710,	"circ",	"modifier letter circumflex accent, U+02C6 ISOpub" },
17015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 732,	"tilde","small tilde, U+02DC ISOdia" },
17025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
17035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 913,	"Alpha","greek capital letter alpha, U+0391" },
17045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 914,	"Beta",	"greek capital letter beta, U+0392" },
17055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 915,	"Gamma","greek capital letter gamma, U+0393 ISOgrk3" },
17065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 916,	"Delta","greek capital letter delta, U+0394 ISOgrk3" },
17075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 917,	"Epsilon","greek capital letter epsilon, U+0395" },
17085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 918,	"Zeta",	"greek capital letter zeta, U+0396" },
17095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 919,	"Eta",	"greek capital letter eta, U+0397" },
17105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 920,	"Theta","greek capital letter theta, U+0398 ISOgrk3" },
17115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 921,	"Iota",	"greek capital letter iota, U+0399" },
17125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 922,	"Kappa","greek capital letter kappa, U+039A" },
17135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 923,	"Lambda", "greek capital letter lambda, U+039B ISOgrk3" },
17145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 924,	"Mu",	"greek capital letter mu, U+039C" },
17155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 925,	"Nu",	"greek capital letter nu, U+039D" },
17165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 926,	"Xi",	"greek capital letter xi, U+039E ISOgrk3" },
17175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 927,	"Omicron","greek capital letter omicron, U+039F" },
17185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 928,	"Pi",	"greek capital letter pi, U+03A0 ISOgrk3" },
17195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 929,	"Rho",	"greek capital letter rho, U+03A1" },
17205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 931,	"Sigma","greek capital letter sigma, U+03A3 ISOgrk3" },
17215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 932,	"Tau",	"greek capital letter tau, U+03A4" },
17225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 933,	"Upsilon","greek capital letter upsilon, U+03A5 ISOgrk3" },
17235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 934,	"Phi",	"greek capital letter phi, U+03A6 ISOgrk3" },
17245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 935,	"Chi",	"greek capital letter chi, U+03A7" },
17255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 936,	"Psi",	"greek capital letter psi, U+03A8 ISOgrk3" },
17265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 937,	"Omega","greek capital letter omega, U+03A9 ISOgrk3" },
17275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
17285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 945,	"alpha","greek small letter alpha, U+03B1 ISOgrk3" },
17295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 946,	"beta",	"greek small letter beta, U+03B2 ISOgrk3" },
17305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 947,	"gamma","greek small letter gamma, U+03B3 ISOgrk3" },
17315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 948,	"delta","greek small letter delta, U+03B4 ISOgrk3" },
17325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 949,	"epsilon","greek small letter epsilon, U+03B5 ISOgrk3" },
17335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 950,	"zeta",	"greek small letter zeta, U+03B6 ISOgrk3" },
17345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 951,	"eta",	"greek small letter eta, U+03B7 ISOgrk3" },
17355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 952,	"theta","greek small letter theta, U+03B8 ISOgrk3" },
17365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 953,	"iota",	"greek small letter iota, U+03B9 ISOgrk3" },
17375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 954,	"kappa","greek small letter kappa, U+03BA ISOgrk3" },
17385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 955,	"lambda","greek small letter lambda, U+03BB ISOgrk3" },
17395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 956,	"mu",	"greek small letter mu, U+03BC ISOgrk3" },
17405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 957,	"nu",	"greek small letter nu, U+03BD ISOgrk3" },
17415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 958,	"xi",	"greek small letter xi, U+03BE ISOgrk3" },
17425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 959,	"omicron","greek small letter omicron, U+03BF NEW" },
17435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 960,	"pi",	"greek small letter pi, U+03C0 ISOgrk3" },
17445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 961,	"rho",	"greek small letter rho, U+03C1 ISOgrk3" },
17455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 962,	"sigmaf","greek small letter final sigma, U+03C2 ISOgrk3" },
17465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 963,	"sigma","greek small letter sigma, U+03C3 ISOgrk3" },
17475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 964,	"tau",	"greek small letter tau, U+03C4 ISOgrk3" },
17485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 965,	"upsilon","greek small letter upsilon, U+03C5 ISOgrk3" },
17495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 966,	"phi",	"greek small letter phi, U+03C6 ISOgrk3" },
17505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 967,	"chi",	"greek small letter chi, U+03C7 ISOgrk3" },
17515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 968,	"psi",	"greek small letter psi, U+03C8 ISOgrk3" },
17525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 969,	"omega","greek small letter omega, U+03C9 ISOgrk3" },
17535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 977,	"thetasym","greek small letter theta symbol, U+03D1 NEW" },
17545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 978,	"upsih","greek upsilon with hook symbol, U+03D2 NEW" },
17555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 982,	"piv",	"greek pi symbol, U+03D6 ISOgrk3" },
17565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
17575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8194,	"ensp",	"en space, U+2002 ISOpub" },
17585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8195,	"emsp",	"em space, U+2003 ISOpub" },
17595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8201,	"thinsp","thin space, U+2009 ISOpub" },
17605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8204,	"zwnj",	"zero width non-joiner, U+200C NEW RFC 2070" },
17615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8205,	"zwj",	"zero width joiner, U+200D NEW RFC 2070" },
17625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8206,	"lrm",	"left-to-right mark, U+200E NEW RFC 2070" },
17635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8207,	"rlm",	"right-to-left mark, U+200F NEW RFC 2070" },
17645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8211,	"ndash","en dash, U+2013 ISOpub" },
17655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8212,	"mdash","em dash, U+2014 ISOpub" },
17665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8216,	"lsquo","left single quotation mark, U+2018 ISOnum" },
17675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8217,	"rsquo","right single quotation mark, U+2019 ISOnum" },
17685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8218,	"sbquo","single low-9 quotation mark, U+201A NEW" },
17695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8220,	"ldquo","left double quotation mark, U+201C ISOnum" },
17705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8221,	"rdquo","right double quotation mark, U+201D ISOnum" },
17715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8222,	"bdquo","double low-9 quotation mark, U+201E NEW" },
17725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8224,	"dagger","dagger, U+2020 ISOpub" },
17735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8225,	"Dagger","double dagger, U+2021 ISOpub" },
17745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
17755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8226,	"bull",	"bullet = black small circle, U+2022 ISOpub" },
17765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8230,	"hellip","horizontal ellipsis = three dot leader, U+2026 ISOpub" },
17775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
17785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8240,	"permil","per mille sign, U+2030 ISOtech" },
17795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
17805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8242,	"prime","prime = minutes = feet, U+2032 ISOtech" },
17815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8243,	"Prime","double prime = seconds = inches, U+2033 ISOtech" },
17825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
17835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8249,	"lsaquo","single left-pointing angle quotation mark, U+2039 ISO proposed" },
17845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8250,	"rsaquo","single right-pointing angle quotation mark, U+203A ISO proposed" },
17855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
17865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8254,	"oline","overline = spacing overscore, U+203E NEW" },
17875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8260,	"frasl","fraction slash, U+2044 NEW" },
17885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
17895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8364,	"euro",	"euro sign, U+20AC NEW" },
17905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
17915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8465,	"image","blackletter capital I = imaginary part, U+2111 ISOamso" },
17925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8472,	"weierp","script capital P = power set = Weierstrass p, U+2118 ISOamso" },
17935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8476,	"real",	"blackletter capital R = real part symbol, U+211C ISOamso" },
17945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8482,	"trade","trade mark sign, U+2122 ISOnum" },
17955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8501,	"alefsym","alef symbol = first transfinite cardinal, U+2135 NEW" },
17965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8592,	"larr",	"leftwards arrow, U+2190 ISOnum" },
17975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8593,	"uarr",	"upwards arrow, U+2191 ISOnum" },
17985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8594,	"rarr",	"rightwards arrow, U+2192 ISOnum" },
17995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8595,	"darr",	"downwards arrow, U+2193 ISOnum" },
18005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8596,	"harr",	"left right arrow, U+2194 ISOamsa" },
18015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8629,	"crarr","downwards arrow with corner leftwards = carriage return, U+21B5 NEW" },
18025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8656,	"lArr",	"leftwards double arrow, U+21D0 ISOtech" },
18035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8657,	"uArr",	"upwards double arrow, U+21D1 ISOamsa" },
18045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8658,	"rArr",	"rightwards double arrow, U+21D2 ISOtech" },
18055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8659,	"dArr",	"downwards double arrow, U+21D3 ISOamsa" },
18065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8660,	"hArr",	"left right double arrow, U+21D4 ISOamsa" },
18075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
18085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8704,	"forall","for all, U+2200 ISOtech" },
18095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8706,	"part",	"partial differential, U+2202 ISOtech" },
18105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8707,	"exist","there exists, U+2203 ISOtech" },
18115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8709,	"empty","empty set = null set = diameter, U+2205 ISOamso" },
18125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8711,	"nabla","nabla = backward difference, U+2207 ISOtech" },
18135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8712,	"isin",	"element of, U+2208 ISOtech" },
18145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8713,	"notin","not an element of, U+2209 ISOtech" },
18155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8715,	"ni",	"contains as member, U+220B ISOtech" },
18165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8719,	"prod",	"n-ary product = product sign, U+220F ISOamsb" },
18175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8721,	"sum",	"n-ary summation, U+2211 ISOamsb" },
18185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8722,	"minus","minus sign, U+2212 ISOtech" },
18195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8727,	"lowast","asterisk operator, U+2217 ISOtech" },
18205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8730,	"radic","square root = radical sign, U+221A ISOtech" },
18215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8733,	"prop",	"proportional to, U+221D ISOtech" },
18225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8734,	"infin","infinity, U+221E ISOtech" },
18235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8736,	"ang",	"angle, U+2220 ISOamso" },
18245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8743,	"and",	"logical and = wedge, U+2227 ISOtech" },
18255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8744,	"or",	"logical or = vee, U+2228 ISOtech" },
18265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8745,	"cap",	"intersection = cap, U+2229 ISOtech" },
18275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8746,	"cup",	"union = cup, U+222A ISOtech" },
18285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8747,	"int",	"integral, U+222B ISOtech" },
18295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8756,	"there4","therefore, U+2234 ISOtech" },
18305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8764,	"sim",	"tilde operator = varies with = similar to, U+223C ISOtech" },
18315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8773,	"cong",	"approximately equal to, U+2245 ISOtech" },
18325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8776,	"asymp","almost equal to = asymptotic to, U+2248 ISOamsr" },
18335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8800,	"ne",	"not equal to, U+2260 ISOtech" },
18345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8801,	"equiv","identical to, U+2261 ISOtech" },
18355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8804,	"le",	"less-than or equal to, U+2264 ISOtech" },
18365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8805,	"ge",	"greater-than or equal to, U+2265 ISOtech" },
18375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8834,	"sub",	"subset of, U+2282 ISOtech" },
18385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8835,	"sup",	"superset of, U+2283 ISOtech" },
18395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8836,	"nsub",	"not a subset of, U+2284 ISOamsn" },
18405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8838,	"sube",	"subset of or equal to, U+2286 ISOtech" },
18415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8839,	"supe",	"superset of or equal to, U+2287 ISOtech" },
18425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8853,	"oplus","circled plus = direct sum, U+2295 ISOamsb" },
18435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8855,	"otimes","circled times = vector product, U+2297 ISOamsb" },
18445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8869,	"perp",	"up tack = orthogonal to = perpendicular, U+22A5 ISOtech" },
18455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8901,	"sdot",	"dot operator, U+22C5 ISOamsb" },
18465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8968,	"lceil","left ceiling = apl upstile, U+2308 ISOamsc" },
18475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8969,	"rceil","right ceiling, U+2309 ISOamsc" },
18485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8970,	"lfloor","left floor = apl downstile, U+230A ISOamsc" },
18495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 8971,	"rfloor","right floor, U+230B ISOamsc" },
18505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 9001,	"lang",	"left-pointing angle bracket = bra, U+2329 ISOtech" },
18515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 9002,	"rang",	"right-pointing angle bracket = ket, U+232A ISOtech" },
18525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 9674,	"loz",	"lozenge, U+25CA ISOpub" },
18535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
18545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 9824,	"spades","black spade suit, U+2660 ISOpub" },
18555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 9827,	"clubs","black club suit = shamrock, U+2663 ISOpub" },
18565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 9829,	"hearts","black heart suit = valentine, U+2665 ISOpub" },
18575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 9830,	"diams","black diamond suit, U+2666 ISOpub" },
18585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
18595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
18605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
18615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************
18625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
18635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *		Commodity functions to handle entities			*
18645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
18655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/
18665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
18675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
18685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Macro used to grow the current buffer.
18695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
18705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define growBuffer(buffer) {						\
18715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *tmp;							\
18725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    buffer##_size *= 2;							\
18735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tmp = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
18745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (tmp == NULL) {						\
18755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlErrMemory(ctxt, "growing buffer\n");			\
18765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlFree(buffer);						\
18775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);							\
18785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }									\
18795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    buffer = tmp;							\
18805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
18815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
18825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
18835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlEntityLookup:
18845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @name: the entity name
18855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
18865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Lookup the given entity in EntitiesTable
18875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
18885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * TODO: the linear scan is really ugly, an hash table is really needed.
18895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
18905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the associated htmlEntityDescPtr if found, NULL otherwise.
18915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
18925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const htmlEntityDesc *
18935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlEntityLookup(const xmlChar *name) {
18945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unsigned int i;
18955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
18965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (i = 0;i < (sizeof(html40EntitiesTable)/
18975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    sizeof(html40EntitiesTable[0]));i++) {
18985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (xmlStrEqual(name, BAD_CAST html40EntitiesTable[i].name)) {
18995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return((htmlEntityDescPtr) &html40EntitiesTable[i]);
19005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
19015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
19025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(NULL);
19035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
19045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
19055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
19065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlEntityValueLookup:
19075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @value: the entity's unicode value
19085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
19095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Lookup the given entity in EntitiesTable
19105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
19115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * TODO: the linear scan is really ugly, an hash table is really needed.
19125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
19135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the associated htmlEntityDescPtr if found, NULL otherwise.
19145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
19155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const htmlEntityDesc *
19165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlEntityValueLookup(unsigned int value) {
19175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unsigned int i;
19185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
19195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (i = 0;i < (sizeof(html40EntitiesTable)/
19205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    sizeof(html40EntitiesTable[0]));i++) {
19215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (html40EntitiesTable[i].value >= value) {
19225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (html40EntitiesTable[i].value > value)
19235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
19245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return((htmlEntityDescPtr) &html40EntitiesTable[i]);
19255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
19265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
19275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(NULL);
19285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
19295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
19305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
19315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * UTF8ToHtml:
19325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @out:  a pointer to an array of bytes to store the result
19335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @outlen:  the length of @out
19345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @in:  a pointer to an array of UTF-8 chars
19355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @inlen:  the length of @in
19365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
19375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Take a block of UTF-8 chars in and try to convert it to an ASCII
19385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * plus HTML entities block of chars out.
19395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
19405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
19415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The value of @inlen after return is the number of octets consumed
19425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *     as the return value is positive, else unpredictable.
19435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The value of @outlen after return is the number of octets consumed.
19445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
19455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int
19465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)UTF8ToHtml(unsigned char* out, int *outlen,
19475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              const unsigned char* in, int *inlen) {
19485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const unsigned char* processed = in;
19495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const unsigned char* outend;
19505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const unsigned char* outstart = out;
19515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const unsigned char* instart = in;
19525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const unsigned char* inend;
19535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unsigned int c, d;
19545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int trailing;
19555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
19565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
19575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (in == NULL) {
19585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        /*
19595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * initialization nothing to do
19605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
19615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	*outlen = 0;
19625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	*inlen = 0;
19635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(0);
19645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
19655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inend = in + (*inlen);
19665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    outend = out + (*outlen);
19675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (in < inend) {
19685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	d = *in++;
19695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if      (d < 0x80)  { c= d; trailing= 0; }
19705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else if (d < 0xC0) {
19715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /* trailing byte in leading position */
19725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    *outlen = out - outstart;
19735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    *inlen = processed - instart;
19745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    return(-2);
19755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
19765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
19775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
19785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else {
19795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /* no chance for this in Ascii */
19805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    *outlen = out - outstart;
19815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    *inlen = processed - instart;
19825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    return(-2);
19835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
19845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
19855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (inend - in < trailing) {
19865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    break;
19875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
19885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
19895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for ( ; trailing; trailing--) {
19905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
19915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
19925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    c <<= 6;
19935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    c |= d & 0x3F;
19945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
19955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
19965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/* assertion: c is a single UTF-4 value */
19975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (c < 0x80) {
19985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (out + 1 >= outend)
19995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
20005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    *out++ = c;
20015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
20025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    int len;
20035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    const htmlEntityDesc * ent;
20045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    const char *cp;
20055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    char nbuf[16];
20065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
20075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
20085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Try to lookup a predefined HTML entity for it
20095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
20105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
20115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ent = htmlEntityValueLookup(c);
20125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (ent == NULL) {
20135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	      snprintf(nbuf, sizeof(nbuf), "#%u", c);
20145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	      cp = nbuf;
20155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
20165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else
20175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	      cp = ent->name;
20185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    len = strlen(cp);
20195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (out + 2 + len >= outend)
20205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
20215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    *out++ = '&';
20225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    memcpy(out, cp, len);
20235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    out += len;
20245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    *out++ = ';';
20255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
20265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	processed = in;
20275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
20285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *outlen = out - outstart;
20295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *inlen = processed - instart;
20305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(0);
20315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
20325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
20335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
20345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlEncodeEntities:
20355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @out:  a pointer to an array of bytes to store the result
20365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @outlen:  the length of @out
20375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @in:  a pointer to an array of UTF-8 chars
20385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @inlen:  the length of @in
20395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @quoteChar: the quote character to escape (' or ") or zero.
20405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
20415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Take a block of UTF-8 chars in and try to convert it to an ASCII
20425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * plus HTML entities block of chars out.
20435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
20445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
20455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The value of @inlen after return is the number of octets consumed
20465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *     as the return value is positive, else unpredictable.
20475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The value of @outlen after return is the number of octets consumed.
20485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
20495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int
20505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlEncodeEntities(unsigned char* out, int *outlen,
20515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		   const unsigned char* in, int *inlen, int quoteChar) {
20525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const unsigned char* processed = in;
20535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const unsigned char* outend;
20545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const unsigned char* outstart = out;
20555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const unsigned char* instart = in;
20565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const unsigned char* inend;
20575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unsigned int c, d;
20585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int trailing;
20595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
20605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL))
20615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return(-1);
20625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    outend = out + (*outlen);
20635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inend = in + (*inlen);
20645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (in < inend) {
20655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	d = *in++;
20665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if      (d < 0x80)  { c= d; trailing= 0; }
20675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else if (d < 0xC0) {
20685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /* trailing byte in leading position */
20695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    *outlen = out - outstart;
20705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    *inlen = processed - instart;
20715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    return(-2);
20725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
20735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
20745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
20755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else {
20765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /* no chance for this in Ascii */
20775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    *outlen = out - outstart;
20785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    *inlen = processed - instart;
20795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    return(-2);
20805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
20815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
20825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (inend - in < trailing)
20835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    break;
20845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
20855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	while (trailing--) {
20865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (((d= *in++) & 0xC0) != 0x80) {
20875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		*outlen = out - outstart;
20885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		*inlen = processed - instart;
20895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return(-2);
20905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
20915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    c <<= 6;
20925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    c |= d & 0x3F;
20935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
20945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
20955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/* assertion: c is a single UTF-4 value */
20965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((c < 0x80) && (c != (unsigned int) quoteChar) &&
20975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (c != '&') && (c != '<') && (c != '>')) {
20985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (out >= outend)
20995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
21005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    *out++ = c;
21015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
21025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    const htmlEntityDesc * ent;
21035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    const char *cp;
21045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    char nbuf[16];
21055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    int len;
21065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
21075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
21085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Try to lookup a predefined HTML entity for it
21095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
21105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ent = htmlEntityValueLookup(c);
21115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (ent == NULL) {
21125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		snprintf(nbuf, sizeof(nbuf), "#%u", c);
21135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		cp = nbuf;
21145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
21155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else
21165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		cp = ent->name;
21175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    len = strlen(cp);
21185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (out + 2 + len > outend)
21195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
21205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    *out++ = '&';
21215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    memcpy(out, cp, len);
21225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    out += len;
21235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    *out++ = ';';
21245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
21255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	processed = in;
21265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
21275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *outlen = out - outstart;
21285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *inlen = processed - instart;
21295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(0);
21305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
21315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
21325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************
21335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
21345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *		Commodity functions to handle streams			*
21355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
21365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/
21375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
21385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
21395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlNewInputStream:
21405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
21415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
21425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Create a new input stream structure
21435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the new input stream or NULL
21445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
21455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static htmlParserInputPtr
21465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlNewInputStream(htmlParserCtxtPtr ctxt) {
21475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserInputPtr input;
21485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
21495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input = (xmlParserInputPtr) xmlMalloc(sizeof(htmlParserInput));
21505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (input == NULL) {
21515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
21525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
21535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
21545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    memset(input, 0, sizeof(htmlParserInput));
21555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->filename = NULL;
21565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->directory = NULL;
21575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->base = NULL;
21585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->cur = NULL;
21595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->buf = NULL;
21605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->line = 1;
21615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->col = 1;
21625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->buf = NULL;
21635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->free = NULL;
21645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->version = NULL;
21655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->consumed = 0;
21665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->length = 0;
21675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(input);
21685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
21695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
21705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
21715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************
21725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
21735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *		Commodity functions, cleanup needed ?			*
21745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
21755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/
21765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
21775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * all tags allowing pc data from the html 4.01 loose dtd
21785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * NOTE: it might be more apropriate to integrate this information
21795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * into the html40ElementTable array but I don't want to risk any
21805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * binary incomptibility
21815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
21825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char *allowPCData[] = {
21835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "a", "abbr", "acronym", "address", "applet", "b", "bdo", "big",
21845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "blockquote", "body", "button", "caption", "center", "cite", "code",
21855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2",
21865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend",
21875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp",
21885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "small", "span", "strike", "strong", "td", "th", "tt", "u", "var"
21895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
21905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
21915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
21925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * areBlanks:
21935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
21945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @str:  a xmlChar *
21955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @len:  the size of @str
21965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
21975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Is this a sequence of blank chars that one can ignore ?
21985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
21995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 1 if ignorable 0 otherwise.
22005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
22015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
22025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
22035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unsigned int i;
22045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int j;
22055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlNodePtr lastChild;
22065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlDtdPtr dtd;
22075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
22085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (j = 0;j < len;j++)
22095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (!(IS_BLANK_CH(str[j]))) return(0);
22105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
22115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (CUR == 0) return(1);
22125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (CUR != '<') return(0);
22135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->name == NULL)
22145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(1);
22155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (xmlStrEqual(ctxt->name, BAD_CAST"html"))
22165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(1);
22175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (xmlStrEqual(ctxt->name, BAD_CAST"head"))
22185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(1);
22195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
22205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* Only strip CDATA children of the body tag for strict HTML DTDs */
22215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (xmlStrEqual(ctxt->name, BAD_CAST "body") && ctxt->myDoc != NULL) {
22225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        dtd = xmlGetIntSubset(ctxt->myDoc);
22235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (dtd != NULL && dtd->ExternalID != NULL) {
22245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (!xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4.01//EN") ||
22255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    !xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4//EN"))
22265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                return(1);
22275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
22285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
22295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
22305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->node == NULL) return(0);
22315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lastChild = xmlGetLastChild(ctxt->node);
22325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while ((lastChild) && (lastChild->type == XML_COMMENT_NODE))
22335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	lastChild = lastChild->prev;
22345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (lastChild == NULL) {
22355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
22365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            (ctxt->node->content != NULL)) return(0);
22375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/* keep ws in constructs like ...<b> </b>...
22385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	   for all tags "b" allowing PCDATA */
22395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {
22405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) {
22415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return(0);
22425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
22435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
22445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (xmlNodeIsText(lastChild)) {
22455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return(0);
22465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
22475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/* keep ws in constructs like <p><b>xy</b> <i>z</i><p>
22485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	   for all tags "p" allowing PCDATA */
22495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {
22505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) {
22515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return(0);
22525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
22535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
22545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
22555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(1);
22565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
22575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
22585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
22595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlNewDocNoDtD:
22605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URI:  URI for the dtd, or NULL
22615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ExternalID:  the external ID of the DTD, or NULL
22625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
22635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Creates a new HTML document without a DTD node if @URI and @ExternalID
22645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * are NULL
22655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
22665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns a new document, do not initialize the DTD if not provided
22675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
22685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
22695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) {
22705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlDocPtr cur;
22715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
22725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
22735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Allocate a new document and fill the fields.
22745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
22755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc));
22765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (cur == NULL) {
22775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlErrMemory(NULL, "HTML document creation failed\n");
22785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
22795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
22805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    memset(cur, 0, sizeof(xmlDoc));
22815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
22825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->type = XML_HTML_DOCUMENT_NODE;
22835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->version = NULL;
22845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->intSubset = NULL;
22855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->doc = cur;
22865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->name = NULL;
22875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->children = NULL;
22885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->extSubset = NULL;
22895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->oldNs = NULL;
22905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->encoding = NULL;
22915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->standalone = 1;
22925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->compression = 0;
22935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->ids = NULL;
22945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->refs = NULL;
22955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->_private = NULL;
22965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->charset = XML_CHAR_ENCODING_UTF8;
22975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur->properties = XML_DOC_HTML | XML_DOC_USERBUILT;
22985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ExternalID != NULL) ||
22995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	(URI != NULL))
23005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI);
23015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(cur);
23025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
23035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
23055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlNewDoc:
23065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URI:  URI for the dtd, or NULL
23075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ExternalID:  the external ID of the DTD, or NULL
23085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
23095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Creates a new HTML document
23105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
23115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns a new document
23125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
23135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
23145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
23155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((URI == NULL) && (ExternalID == NULL))
23165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(htmlNewDocNoDtD(
23175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd",
23185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN"));
23195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(htmlNewDocNoDtD(URI, ExternalID));
23215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
23225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************
23255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
23265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *			The parser itself				*
23275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *	Relates to http://www.w3.org/TR/html40				*
23285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
23295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/
23305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************
23325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
23335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *			The parser itself				*
23345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
23355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/
23365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
23385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
23405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseHTMLName:
23415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
23425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
23435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML tag or attribute name, note that we convert it to lowercase
23445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * since HTML names are not case-sensitive.
23455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
23465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the Tag Name parsed or NULL
23475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
23485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const xmlChar *
23505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
23515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int i = 0;
23525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar loc[HTML_PARSER_BUFFER_SIZE];
23535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!IS_ASCII_LETTER(CUR) && (CUR != '_') &&
23555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (CUR != ':') && (CUR != '.')) return(NULL);
23565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while ((i < HTML_PARSER_BUFFER_SIZE) &&
23585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) ||
23595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	   (CUR == ':') || (CUR == '-') || (CUR == '_') ||
23605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           (CUR == '.'))) {
23615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
23625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        else loc[i] = CUR;
23635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	i++;
23645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	NEXT;
23665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
23675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(xmlDictLookup(ctxt->dict, loc, i));
23695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
23705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
23735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseHTMLName_nonInvasive:
23745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
23755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
23765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML tag or attribute name, note that we convert it to lowercase
23775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * since HTML names are not case-sensitive, this doesn't consume the data
23785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * from the stream, it's a look-ahead
23795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
23805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the Tag Name parsed or NULL
23815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
23825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const xmlChar *
23845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {
23855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int i = 0;
23865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar loc[HTML_PARSER_BUFFER_SIZE];
23875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!IS_ASCII_LETTER(NXT(1)) && (NXT(1) != '_') &&
23895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (NXT(1) != ':')) return(NULL);
23905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while ((i < HTML_PARSER_BUFFER_SIZE) &&
23925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           ((IS_ASCII_LETTER(NXT(1+i))) || (IS_ASCII_DIGIT(NXT(1+i))) ||
23935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	   (NXT(1+i) == ':') || (NXT(1+i) == '-') || (NXT(1+i) == '_'))) {
23945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((NXT(1+i) >= 'A') && (NXT(1+i) <= 'Z')) loc[i] = NXT(1+i) + 0x20;
23955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        else loc[i] = NXT(1+i);
23965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	i++;
23975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
23985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
23995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(xmlDictLookup(ctxt->dict, loc, i));
24005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
24015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
24025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
24035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
24045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseName:
24055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
24065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
24075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML name, this routine is case sensitive.
24085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
24095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the Name parsed or NULL
24105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
24115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
24125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const xmlChar *
24135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseName(htmlParserCtxtPtr ctxt) {
24145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *in;
24155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *ret;
24165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int count = 0;
24175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
24185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    GROW;
24195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
24205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
24215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Accelerator for simple ASCII names
24225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
24235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    in = ctxt->input->cur;
24245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (((*in >= 0x61) && (*in <= 0x7A)) ||
24255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	((*in >= 0x41) && (*in <= 0x5A)) ||
24265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	(*in == '_') || (*in == ':')) {
24275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	in++;
24285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	while (((*in >= 0x61) && (*in <= 0x7A)) ||
24295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	       ((*in >= 0x41) && (*in <= 0x5A)) ||
24305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	       ((*in >= 0x30) && (*in <= 0x39)) ||
24315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	       (*in == '_') || (*in == '-') ||
24325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	       (*in == ':') || (*in == '.'))
24335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    in++;
24345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((*in > 0) && (*in < 0x80)) {
24355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    count = in - ctxt->input->cur;
24365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
24375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->input->cur = in;
24385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->nbChars += count;
24395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->input->col += count;
24405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    return(ret);
24415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
24425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
24435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(htmlParseNameComplex(ctxt));
24445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
24455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
24465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const xmlChar *
24475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
24485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int len = 0, l;
24495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int c;
24505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int count = 0;
24515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
24525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
24535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Handler for more complex cases
24545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
24555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    GROW;
24565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    c = CUR_CHAR(l);
24575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
24585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	(!IS_LETTER(c) && (c != '_') &&
24595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         (c != ':'))) {
24605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
24615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
24625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
24635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
24645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	   ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
24655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            (c == '.') || (c == '-') ||
24665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (c == '_') || (c == ':') ||
24675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (IS_COMBINING(c)) ||
24685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (IS_EXTENDER(c)))) {
24695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (count++ > 100) {
24705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    count = 0;
24715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    GROW;
24725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
24735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	len += l;
24745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	NEXTL(l);
24755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	c = CUR_CHAR(l);
24765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
24775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
24785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
24795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
24805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
24815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
24825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseHTMLAttribute:
24835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
24845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @stop:  a char stop value
24855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
24865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML attribute value till the stop (quote), if
24875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * stop is 0 then it stops at the first space
24885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
24895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the attribute parsed or NULL
24905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
24915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
24925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static xmlChar *
24935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
24945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *buffer = NULL;
24955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int buffer_size = 0;
24965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *out = NULL;
24975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *name = NULL;
24985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *cur = NULL;
24995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const htmlEntityDesc * ent;
25005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
25025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * allocate a translation buffer.
25035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
25045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    buffer_size = HTML_PARSER_BUFFER_SIZE;
25055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
25065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (buffer == NULL) {
25075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlErrMemory(ctxt, "buffer allocation failed\n");
25085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
25095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
25105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    out = buffer;
25115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
25135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Ok loop until we reach one of the ending chars
25145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
25155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while ((CUR != 0) && (CUR != stop)) {
25165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((stop == 0) && (CUR == '>')) break;
25175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((stop == 0) && (IS_BLANK_CH(CUR))) break;
25185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (CUR == '&') {
25195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (NXT(1) == '#') {
25205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		unsigned int c;
25215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		int bits;
25225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		c = htmlParseCharRef(ctxt);
25245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if      (c <    0x80)
25255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		        { *out++  = c;                bits= -6; }
25265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		else if (c <   0x800)
25275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		        { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
25285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		else if (c < 0x10000)
25295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		        { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
25305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		else
25315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		        { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
25325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		for ( ; bits >= 0; bits-= 6) {
25345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    *out++  = ((c >> bits) & 0x3F) | 0x80;
25355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
25365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (out - buffer > buffer_size - 100) {
25385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			int indx = out - buffer;
25395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			growBuffer(buffer);
25415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			out = &buffer[indx];
25425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
25435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    } else {
25445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ent = htmlParseEntityRef(ctxt, &name);
25455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (name == NULL) {
25465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    *out++ = '&';
25475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if (out - buffer > buffer_size - 100) {
25485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			int indx = out - buffer;
25495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			growBuffer(buffer);
25515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			out = &buffer[indx];
25525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    }
25535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		} else if (ent == NULL) {
25545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    *out++ = '&';
25555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    cur = name;
25565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    while (*cur != 0) {
25575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if (out - buffer > buffer_size - 100) {
25585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    int indx = out - buffer;
25595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    growBuffer(buffer);
25615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    out = &buffer[indx];
25625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			}
25635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			*out++ = *cur++;
25645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    }
25655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		} else {
25665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    unsigned int c;
25675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    int bits;
25685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if (out - buffer > buffer_size - 100) {
25705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			int indx = out - buffer;
25715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			growBuffer(buffer);
25735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			out = &buffer[indx];
25745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    }
25755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    c = ent->value;
25765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if      (c <    0x80)
25775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			{ *out++  = c;                bits= -6; }
25785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    else if (c <   0x800)
25795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			{ *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
25805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    else if (c < 0x10000)
25815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			{ *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
25825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    else
25835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			{ *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
25845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    for ( ; bits >= 0; bits-= 6) {
25865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			*out++  = ((c >> bits) & 0x3F) | 0x80;
25875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    }
25885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
25895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
25905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
25915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    unsigned int c;
25925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    int bits, l;
25935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (out - buffer > buffer_size - 100) {
25955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		int indx = out - buffer;
25965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		growBuffer(buffer);
25985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		out = &buffer[indx];
25995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
26005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    c = CUR_CHAR(l);
26015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if      (c <    0x80)
26025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    { *out++  = c;                bits= -6; }
26035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else if (c <   0x800)
26045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
26055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else if (c < 0x10000)
26065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
26075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else
26085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
26095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
26105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    for ( ; bits >= 0; bits-= 6) {
26115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		*out++  = ((c >> bits) & 0x3F) | 0x80;
26125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
26135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
26145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
26155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
26165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *out = 0;
26175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(buffer);
26185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
26195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
26205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
26215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseEntityRef:
26225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
26235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @str:  location to store the entity name
26245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
26255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML ENTITY references
26265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
26275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [68] EntityRef ::= '&' Name ';'
26285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
26295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the associated htmlEntityDescPtr if found, or NULL otherwise,
26305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *         if non-NULL *str will have to be freed by the caller.
26315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
26325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const htmlEntityDesc *
26335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) {
26345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *name;
26355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const htmlEntityDesc * ent = NULL;
26365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
26375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (str != NULL) *str = NULL;
26385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
26395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
26405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (CUR == '&') {
26415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        NEXT;
26425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        name = htmlParseName(ctxt);
26435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (name == NULL) {
26445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
26455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	                 "htmlParseEntityRef: no name\n", NULL, NULL);
26465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
26475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    GROW;
26485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (CUR == ';') {
26495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        if (str != NULL)
26505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    *str = name;
26515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
26525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		/*
26535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 * Lookup the entity in the table.
26545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 */
26555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ent = htmlEntityLookup(name);
26565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (ent != NULL) /* OK that's ugly !!! */
26575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    NEXT;
26585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    } else {
26595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING,
26605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		             "htmlParseEntityRef: expecting ';'\n",
26615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     NULL, NULL);
26625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        if (str != NULL)
26635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    *str = name;
26645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
26655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
26665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
26675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(ent);
26685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
26695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
26705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
26715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseAttValue:
26725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
26735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
26745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse a value for an attribute
26755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Note: the parser won't do substitution of entities here, this
26765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * will be handled later in xmlStringGetNodeList, unless it was
26775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * asked for ctxt->replaceEntities != 0
26785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
26795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the AttValue parsed or NULL.
26805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
26815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
26825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static xmlChar *
26835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseAttValue(htmlParserCtxtPtr ctxt) {
26845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *ret = NULL;
26855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
26865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (CUR == '"') {
26875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        NEXT;
26885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ret = htmlParseHTMLAttribute(ctxt, '"');
26895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (CUR != '"') {
26905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
26915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	                 "AttValue: \" expected\n", NULL, NULL);
26925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else
26935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
26945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (CUR == '\'') {
26955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        NEXT;
26965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ret = htmlParseHTMLAttribute(ctxt, '\'');
26975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (CUR != '\'') {
26985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
26995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	                 "AttValue: ' expected\n", NULL, NULL);
27005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else
27015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
27025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
27035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        /*
27045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * That's an HTMLism, the attribute value may not be quoted
27055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
27065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ret = htmlParseHTMLAttribute(ctxt, 0);
27075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (ret == NULL) {
27085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
27095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	                 "AttValue: no value found\n", NULL, NULL);
27105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
27115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
27125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(ret);
27135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
27145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
27155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
27165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseSystemLiteral:
27175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
27185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
27195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML Literal
27205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
27215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
27225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
27235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the SystemLiteral parsed or NULL
27245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
27255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
27265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static xmlChar *
27275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
27285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *q;
27295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *ret = NULL;
27305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
27315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (CUR == '"') {
27325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        NEXT;
27335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	q = CUR_PTR;
27345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	while ((IS_CHAR_CH(CUR)) && (CUR != '"'))
27355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
27365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (!IS_CHAR_CH(CUR)) {
27375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
27385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 "Unfinished SystemLiteral\n", NULL, NULL);
27395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
27405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ret = xmlStrndup(q, CUR_PTR - q);
27415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
27425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
27435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (CUR == '\'') {
27445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        NEXT;
27455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	q = CUR_PTR;
27465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	while ((IS_CHAR_CH(CUR)) && (CUR != '\''))
27475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
27485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (!IS_CHAR_CH(CUR)) {
27495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
27505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 "Unfinished SystemLiteral\n", NULL, NULL);
27515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
27525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ret = xmlStrndup(q, CUR_PTR - q);
27535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
27545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
27555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
27565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
27575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             " or ' expected\n", NULL, NULL);
27585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
27595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
27605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(ret);
27615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
27625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
27635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
27645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParsePubidLiteral:
27655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
27665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
27675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML public literal
27685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
27695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
27705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
27715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the PubidLiteral parsed or NULL.
27725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
27735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
27745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static xmlChar *
27755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
27765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *q;
27775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *ret = NULL;
27785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
27795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Name ::= (Letter | '_') (NameChar)*
27805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
27815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (CUR == '"') {
27825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        NEXT;
27835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	q = CUR_PTR;
27845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	while (IS_PUBIDCHAR_CH(CUR)) NEXT;
27855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (CUR != '"') {
27865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
27875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	                 "Unfinished PubidLiteral\n", NULL, NULL);
27885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
27895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ret = xmlStrndup(q, CUR_PTR - q);
27905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
27915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
27925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (CUR == '\'') {
27935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        NEXT;
27945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	q = CUR_PTR;
27955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\''))
27965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
27975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (CUR != '\'') {
27985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
27995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	                 "Unfinished PubidLiteral\n", NULL, NULL);
28005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
28015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ret = xmlStrndup(q, CUR_PTR - q);
28025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
28035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
28045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
28055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
28065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "PubidLiteral \" or ' expected\n", NULL, NULL);
28075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
28085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
28095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(ret);
28105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
28115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
28125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
28135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseScript:
28145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
28155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
28165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse the content of an HTML SCRIPT or STYLE element
28175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * http://www.w3.org/TR/html4/sgml/dtd.html#Script
28185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet
28195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * http://www.w3.org/TR/html4/types.html#type-script
28205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * http://www.w3.org/TR/html4/types.html#h-6.15
28215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.2.1
28225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
28235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Script data ( %Script; in the DTD) can be the content of the SCRIPT
28245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * element and the value of intrinsic event attributes. User agents must
28255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * not evaluate script data as HTML markup but instead must pass it on as
28265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * data to a script engine.
28275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * NOTES:
28285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * - The content is passed like CDATA
28295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * - the attributes for style and scripting "onXXX" are also described
28305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   as CDATA but SGML allows entities references in attributes so their
28315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *   processing is identical as other attributes
28325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
28335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
28345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseScript(htmlParserCtxtPtr ctxt) {
28355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5];
28365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int nbchar = 0;
28375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int cur,l;
28385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
28395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SHRINK;
28405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur = CUR_CHAR(l);
28415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (IS_CHAR_CH(cur)) {
28425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((cur == '<') && (NXT(1) == '/')) {
28435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            /*
28445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             * One should break here, the specification is clear:
28455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             * Authors should therefore escape "</" within the content.
28465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             * Escape mechanisms are specific to each scripting or
28475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             * style sheet language.
28485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             *
28495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             * In recovery mode, only break if end tag match the
28505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             * current tag, effectively ignoring all tags inside the
28515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             * script/style block and treating the entire block as
28525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             * CDATA.
28535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             */
28545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (ctxt->recovery) {
28555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2,
28565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				   xmlStrlen(ctxt->name)) == 0)
28575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                {
28585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    break; /* while */
28595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                } else {
28605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
28615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				 "Element %s embeds close tag\n",
28625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		                 ctxt->name, NULL);
28635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
28645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            } else {
28655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if (((NXT(2) >= 'A') && (NXT(2) <= 'Z')) ||
28665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    ((NXT(2) >= 'a') && (NXT(2) <= 'z')))
28675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                {
28685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    break; /* while */
28695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                }
28705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
28715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
28725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	COPY_BUF(l,buf,nbchar,cur);
28735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
28745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (ctxt->sax->cdataBlock!= NULL) {
28755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		/*
28765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE
28775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 */
28785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
28795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    } else if (ctxt->sax->characters != NULL) {
28805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->sax->characters(ctxt->userData, buf, nbchar);
28815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
28825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    nbchar = 0;
28835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
28845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	GROW;
28855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	NEXTL(l);
28865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	cur = CUR_CHAR(l);
28875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
28885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
28895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) {
28905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
28915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	                "Invalid char in CDATA 0x%X\n", cur);
28925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	NEXT;
28935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
28945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
28955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
28965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (ctxt->sax->cdataBlock!= NULL) {
28975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
28985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Insert as CDATA, which is the same as HTML_PRESERVE_NODE
28995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
29005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
29015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else if (ctxt->sax->characters != NULL) {
29025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->sax->characters(ctxt->userData, buf, nbchar);
29035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
29045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
29055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
29065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
29075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
29085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
29095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseCharData:
29105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
29115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
29125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse a CharData section.
29135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * if we are within a CDATA section ']]>' marks an end of section.
29145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
29155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
29165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
29175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
29185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
29195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseCharData(htmlParserCtxtPtr ctxt) {
29205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5];
29215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int nbchar = 0;
29225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int cur, l;
29235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int chunk = 0;
29245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
29255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SHRINK;
29265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur = CUR_CHAR(l);
29275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (((cur != '<') || (ctxt->token == '<')) &&
29285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           ((cur != '&') || (ctxt->token == '&')) &&
29295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	   (cur != 0)) {
29305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (!(IS_CHAR(cur))) {
29315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
29325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	                "Invalid char in CDATA 0x%X\n", cur);
29335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
29345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    COPY_BUF(l,buf,nbchar,cur);
29355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
29365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
29375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
29385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Ok the segment is to be consumed as chars.
29395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
29405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
29415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (areBlanks(ctxt, buf, nbchar)) {
29425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if (ctxt->sax->ignorableWhitespace != NULL)
29435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->sax->ignorableWhitespace(ctxt->userData,
29445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			                               buf, nbchar);
29455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		} else {
29465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlCheckParagraph(ctxt);
29475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if (ctxt->sax->characters != NULL)
29485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->sax->characters(ctxt->userData, buf, nbchar);
29495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
29505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
29515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    nbchar = 0;
29525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
29535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	NEXTL(l);
29545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        chunk++;
29555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (chunk > HTML_PARSER_BUFFER_SIZE) {
29565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            chunk = 0;
29575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            SHRINK;
29585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            GROW;
29595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
29605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	cur = CUR_CHAR(l);
29615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (cur == 0) {
29625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    SHRINK;
29635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    GROW;
29645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    cur = CUR_CHAR(l);
29655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
29665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
29675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (nbchar != 0) {
29685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        buf[nbchar] = 0;
29695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
29705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/*
29715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * Ok the segment is to be consumed as chars.
29725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
29735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
29745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (areBlanks(ctxt, buf, nbchar)) {
29755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (ctxt->sax->ignorableWhitespace != NULL)
29765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
29775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    } else {
29785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlCheckParagraph(ctxt);
29795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (ctxt->sax->characters != NULL)
29805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
29815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
29825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
29835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
29845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/*
29855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * Loop detection
29865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
29875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (cur == 0)
29885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->instate = XML_PARSER_EOF;
29895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
29905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
29915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
29925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
29935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseExternalID:
29945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
29955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @publicID:  a xmlChar** receiving PubidLiteral
29965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
29975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse an External ID or a Public ID
29985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
29995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
30005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
30015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
30025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [83] PublicID ::= 'PUBLIC' S PubidLiteral
30035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
30045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the function returns SystemLiteral and in the second
30055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *                case publicID receives PubidLiteral, is strict is off
30065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *                it is possible to return NULL and have publicID set.
30075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
30085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
30095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static xmlChar *
30105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
30115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *URI = NULL;
30125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
30135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((UPPER == 'S') && (UPP(1) == 'Y') &&
30145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         (UPP(2) == 'S') && (UPP(3) == 'T') &&
30155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 (UPP(4) == 'E') && (UPP(5) == 'M')) {
30165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SKIP(6);
30175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (!IS_BLANK_CH(CUR)) {
30185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
30195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	                 "Space required after 'SYSTEM'\n", NULL, NULL);
30205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
30215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SKIP_BLANKS;
30225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	URI = htmlParseSystemLiteral(ctxt);
30235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (URI == NULL) {
30245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseErr(ctxt, XML_ERR_URI_REQUIRED,
30255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	                 "htmlParseExternalID: SYSTEM, no URI\n", NULL, NULL);
30265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
30275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if ((UPPER == 'P') && (UPP(1) == 'U') &&
30285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	       (UPP(2) == 'B') && (UPP(3) == 'L') &&
30295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	       (UPP(4) == 'I') && (UPP(5) == 'C')) {
30305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SKIP(6);
30315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (!IS_BLANK_CH(CUR)) {
30325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
30335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	                 "Space required after 'PUBLIC'\n", NULL, NULL);
30345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
30355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SKIP_BLANKS;
30365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	*publicID = htmlParsePubidLiteral(ctxt);
30375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (*publicID == NULL) {
30385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseErr(ctxt, XML_ERR_PUBID_REQUIRED,
30395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	                 "htmlParseExternalID: PUBLIC, no Public Identifier\n",
30405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 NULL, NULL);
30415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
30425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SKIP_BLANKS;
30435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((CUR == '"') || (CUR == '\'')) {
30445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    URI = htmlParseSystemLiteral(ctxt);
30455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
30465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
30475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(URI);
30485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
30495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
30505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
30515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * xmlParsePI:
30525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an XML parser context
30535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
30545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML Processing Instruction.
30555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
30565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
30575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
30585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
30595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParsePI(htmlParserCtxtPtr ctxt) {
30605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *buf = NULL;
30615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int len = 0;
30625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int size = HTML_PARSER_BUFFER_SIZE;
30635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int cur, l;
30645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *target;
30655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputState state;
30665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int count = 0;
30675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
30685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((RAW == '<') && (NXT(1) == '?')) {
30695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	state = ctxt->instate;
30705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->instate = XML_PARSER_PI;
30715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/*
30725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * this is a Processing Instruction.
30735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
30745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	SKIP(2);
30755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	SHRINK;
30765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
30775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/*
30785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * Parse the target name and check for special support like
30795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * namespace.
30805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
30815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        target = htmlParseName(ctxt);
30825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (target != NULL) {
30835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (RAW == '>') {
30845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		SKIP(1);
30855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
30865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		/*
30875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 * SAX: PI detected.
30885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 */
30895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((ctxt->sax) && (!ctxt->disableSAX) &&
30905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (ctxt->sax->processingInstruction != NULL))
30915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->sax->processingInstruction(ctxt->userData,
30925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		                                     target, NULL);
30935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->instate = state;
30945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return;
30955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
30965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
30975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (buf == NULL) {
30985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlErrMemory(ctxt, NULL);
30995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->instate = state;
31005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return;
31015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
31025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    cur = CUR;
31035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (!IS_BLANK(cur)) {
31045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
31055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			  "ParsePI: PI %s space expected\n", target, NULL);
31065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
31075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            SKIP_BLANKS;
31085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    cur = CUR_CHAR(l);
31095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    while (IS_CHAR(cur) && (cur != '>')) {
31105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (len + 5 >= size) {
31115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlChar *tmp;
31125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
31135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    size *= 2;
31145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
31155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if (tmp == NULL) {
31165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			htmlErrMemory(ctxt, NULL);
31175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			xmlFree(buf);
31185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->instate = state;
31195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			return;
31205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    }
31215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    buf = tmp;
31225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
31235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		count++;
31245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (count > 50) {
31255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    GROW;
31265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    count = 0;
31275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
31285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		COPY_BUF(l,buf,len,cur);
31295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		NEXTL(l);
31305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		cur = CUR_CHAR(l);
31315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (cur == 0) {
31325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    SHRINK;
31335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    GROW;
31345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    cur = CUR_CHAR(l);
31355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
31365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
31375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    buf[len] = 0;
31385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (cur != '>') {
31395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
31405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		      "ParsePI: PI %s never end ...\n", target, NULL);
31415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    } else {
31425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		SKIP(1);
31435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
31445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		/*
31455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 * SAX: PI detected.
31465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 */
31475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((ctxt->sax) && (!ctxt->disableSAX) &&
31485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (ctxt->sax->processingInstruction != NULL))
31495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->sax->processingInstruction(ctxt->userData,
31505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		                                     target, buf);
31515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
31525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlFree(buf);
31535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
31545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,
31555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         "PI is not started correctly", NULL, NULL);
31565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
31575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->instate = state;
31585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
31595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
31605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
31615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
31625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseComment:
31635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
31645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
31655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse an XML (SGML) comment <!-- .... -->
31665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
31675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
31685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
31695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
31705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseComment(htmlParserCtxtPtr ctxt) {
31715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *buf = NULL;
31725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int len;
31735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int size = HTML_PARSER_BUFFER_SIZE;
31745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int q, ql;
31755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int r, rl;
31765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int cur, l;
31775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputState state;
31785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
31795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
31805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Check that there is a comment right here.
31815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
31825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((RAW != '<') || (NXT(1) != '!') ||
31835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (NXT(2) != '-') || (NXT(3) != '-')) return;
31845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
31855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    state = ctxt->instate;
31865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->instate = XML_PARSER_COMMENT;
31875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SHRINK;
31885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SKIP(4);
31895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
31905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (buf == NULL) {
31915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlErrMemory(ctxt, "buffer allocation failed\n");
31925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->instate = state;
31935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
31945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
31955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    q = CUR_CHAR(ql);
31965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NEXTL(ql);
31975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    r = CUR_CHAR(rl);
31985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NEXTL(rl);
31995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cur = CUR_CHAR(l);
32005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    len = 0;
32015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (IS_CHAR(cur) &&
32025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           ((cur != '>') ||
32035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (r != '-') || (q != '-'))) {
32045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (len + 5 >= size) {
32055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlChar *tmp;
32065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
32075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    size *= 2;
32085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
32095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (tmp == NULL) {
32105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        xmlFree(buf);
32115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        htmlErrMemory(ctxt, "growing buffer failed\n");
32125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->instate = state;
32135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return;
32145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
32155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    buf = tmp;
32165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
32175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	COPY_BUF(ql,buf,len,q);
32185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	q = r;
32195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ql = rl;
32205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	r = cur;
32215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	rl = l;
32225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	NEXTL(l);
32235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	cur = CUR_CHAR(l);
32245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (cur == 0) {
32255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    SHRINK;
32265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    GROW;
32275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    cur = CUR_CHAR(l);
32285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
32295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
32305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    buf[len] = 0;
32315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!IS_CHAR(cur)) {
32325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
32335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "Comment not terminated \n<!--%.50s\n", buf, NULL);
32345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlFree(buf);
32355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
32365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        NEXT;
32375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
32385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (!ctxt->disableSAX))
32395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->sax->comment(ctxt->userData, buf);
32405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlFree(buf);
32415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
32425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->instate = state;
32435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
32445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
32455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
32465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseCharRef:
32475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
32485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
32495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse Reference declarations
32505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
32515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [66] CharRef ::= '&#' [0-9]+ ';' |
32525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *                  '&#x' [0-9a-fA-F]+ ';'
32535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
32545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the value parsed (as an int)
32555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
32565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int
32575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseCharRef(htmlParserCtxtPtr ctxt) {
32585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int val = 0;
32595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
32605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt == NULL) || (ctxt->input == NULL)) {
32615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
32625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     "htmlParseCharRef: context error\n",
32635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     NULL, NULL);
32645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return(0);
32655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
32665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((CUR == '&') && (NXT(1) == '#') &&
32675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ((NXT(2) == 'x') || NXT(2) == 'X')) {
32685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	SKIP(3);
32695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	while (CUR != ';') {
32705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((CUR >= '0') && (CUR <= '9'))
32715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        val = val * 16 + (CUR - '0');
32725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else if ((CUR >= 'a') && (CUR <= 'f'))
32735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        val = val * 16 + (CUR - 'a') + 10;
32745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else if ((CUR >= 'A') && (CUR <= 'F'))
32755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        val = val * 16 + (CUR - 'A') + 10;
32765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else {
32775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,
32785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		             "htmlParseCharRef: missing semicolumn\n",
32795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     NULL, NULL);
32805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
32815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
32825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
32835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
32845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (CUR == ';')
32855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
32865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if  ((CUR == '&') && (NXT(1) == '#')) {
32875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	SKIP(2);
32885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	while (CUR != ';') {
32895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((CUR >= '0') && (CUR <= '9'))
32905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        val = val * 10 + (CUR - '0');
32915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else {
32925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,
32935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		             "htmlParseCharRef: missing semicolumn\n",
32945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     NULL, NULL);
32955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
32965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
32975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
32985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
32995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (CUR == ';')
33005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
33015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
33025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF,
33035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "htmlParseCharRef: invalid value\n", NULL, NULL);
33045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
33055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
33065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Check the value IS_CHAR ...
33075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
33085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (IS_CHAR(val)) {
33095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return(val);
33105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
33115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
33125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"htmlParseCharRef: invalid xmlChar value %d\n",
33135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			val);
33145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
33155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(0);
33165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
33175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
33185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
33195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
33205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseDocTypeDecl:
33215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
33225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
33235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse a DOCTYPE declaration
33245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
33255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
33265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
33275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
33285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
33295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
33305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
33315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *name;
33325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *ExternalID = NULL;
33335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *URI = NULL;
33345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
33355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
33365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * We know that '<!DOCTYPE' has been detected.
33375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
33385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SKIP(9);
33395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
33405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SKIP_BLANKS;
33415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
33425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
33435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Parse the DOCTYPE name.
33445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
33455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    name = htmlParseName(ctxt);
33465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (name == NULL) {
33475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
33485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "htmlParseDocTypeDecl : no DOCTYPE name !\n",
33495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     NULL, NULL);
33505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
33515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
33525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Check that upper(name) == "HTML" !!!!!!!!!!!!!
33535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
33545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
33555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SKIP_BLANKS;
33565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
33575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
33585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Check for SystemID and ExternalID
33595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
33605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    URI = htmlParseExternalID(ctxt, &ExternalID);
33615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SKIP_BLANKS;
33625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
33635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
33645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * We should be at the end of the DOCTYPE declaration.
33655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
33665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (CUR != '>') {
33675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED,
33685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "DOCTYPE improperly terminated\n", NULL, NULL);
33695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        /* We shouldn't try to resynchronize ... */
33705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
33715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NEXT;
33725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
33735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
33745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Create or update the document accordingly to the DOCTYPE
33755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
33765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
33775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	(!ctxt->disableSAX))
33785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
33795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
33805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
33815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Cleanup, since we don't use all those identifiers
33825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
33835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (URI != NULL) xmlFree(URI);
33845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ExternalID != NULL) xmlFree(ExternalID);
33855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
33865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
33875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
33885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseAttribute:
33895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
33905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @value:  a xmlChar ** used to store the value of the attribute
33915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
33925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an attribute
33935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
33945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [41] Attribute ::= Name Eq AttValue
33955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
33965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [25] Eq ::= S? '=' S?
33975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
33985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * With namespace:
33995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
34005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [NS 11] Attribute ::= QName Eq AttValue
34015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
34025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Also the case QName == xmlns:??? is handled independently as a namespace
34035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * definition.
34045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
34055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the attribute name, and the value in *value.
34065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
34075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
34085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const xmlChar *
34095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
34105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *name;
34115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *val = NULL;
34125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
34135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *value = NULL;
34145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    name = htmlParseHTMLName(ctxt);
34155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (name == NULL) {
34165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
34175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "error parsing attribute name\n", NULL, NULL);
34185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return(NULL);
34195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
34205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
34215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
34225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * read the value
34235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
34245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SKIP_BLANKS;
34255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (CUR == '=') {
34265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        NEXT;
34275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	SKIP_BLANKS;
34285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	val = htmlParseAttValue(ctxt);
34295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
34305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
34315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *value = val;
34325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(name);
34335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
34345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
34355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
34365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCheckEncoding:
34375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
34385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @attvalue: the attribute value
34395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
34405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Checks an http-equiv attribute from a Meta tag to detect
34415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * the encoding
34425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * If a new encoding is detected the parser is switched to decode
34435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * it and pass UTF8
34445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
34455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
34465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
34475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *encoding;
34485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
34495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt == NULL) || (attvalue == NULL))
34505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
34515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
34525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* do not change encoding */
34535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->input->encoding != NULL)
34545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return;
34555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
34565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    encoding = xmlStrcasestr(attvalue, BAD_CAST"charset=");
34575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (encoding != NULL) {
34585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	encoding += 8;
34595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
34605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	encoding = xmlStrcasestr(attvalue, BAD_CAST"charset =");
34615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (encoding != NULL)
34625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    encoding += 9;
34635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
34645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (encoding != NULL) {
34655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlCharEncoding enc;
34665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlCharEncodingHandlerPtr handler;
34675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
34685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
34695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
34705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (ctxt->input->encoding != NULL)
34715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlFree((xmlChar *) ctxt->input->encoding);
34725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->input->encoding = xmlStrdup(encoding);
34735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
34745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	enc = xmlParseCharEncoding((const char *) encoding);
34755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/*
34765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * registered set of known encodings
34775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
34785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (enc != XML_CHAR_ENCODING_ERROR) {
34795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (((enc == XML_CHAR_ENCODING_UTF16LE) ||
34805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	         (enc == XML_CHAR_ENCODING_UTF16BE) ||
34815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 (enc == XML_CHAR_ENCODING_UCS4LE) ||
34825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 (enc == XML_CHAR_ENCODING_UCS4BE)) &&
34835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		(ctxt->input->buf != NULL) &&
34845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		(ctxt->input->buf->encoder == NULL)) {
34855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
34865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		             "htmlCheckEncoding: wrong encoding meta\n",
34875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     NULL, NULL);
34885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    } else {
34895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlSwitchEncoding(ctxt, enc);
34905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
34915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
34925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
34935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
34945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * fallback for unknown encodings
34955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
34965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    handler = xmlFindCharEncodingHandler((const char *) encoding);
34975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (handler != NULL) {
34985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlSwitchToEncoding(ctxt, handler);
34995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->charset = XML_CHAR_ENCODING_UTF8;
35005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    } else {
35015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
35025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
35035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
35045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
35055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ctxt->input->buf != NULL) &&
35065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (ctxt->input->buf->encoder != NULL) &&
35075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (ctxt->input->buf->raw != NULL) &&
35085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (ctxt->input->buf->buffer != NULL)) {
35095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    int nbchars;
35105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    int processed;
35115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
35125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
35135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * convert as much as possible to the parser reading buffer.
35145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
35155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    processed = ctxt->input->cur - ctxt->input->base;
35165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlBufferShrink(ctxt->input->buf->buffer, processed);
35175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
35185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		                       ctxt->input->buf->buffer,
35195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				       ctxt->input->buf->raw);
35205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (nbchars < 0) {
35215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
35225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		             "htmlCheckEncoding: encoder error\n",
35235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     NULL, NULL);
35245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
35255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->input->base =
35265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->input->cur = ctxt->input->buf->buffer->content;
35275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            ctxt->input->end =
35285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                          &ctxt->input->base[ctxt->input->buf->buffer->use];
35295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
35305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
35315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
35325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
35335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
35345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCheckMeta:
35355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
35365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @atts:  the attributes values
35375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
35385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Checks an attributes from a Meta tag
35395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
35405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
35415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
35425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int i;
35435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *att, *value;
35445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int http = 0;
35455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *content = NULL;
35465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
35475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt == NULL) || (atts == NULL))
35485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
35495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
35505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    i = 0;
35515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    att = atts[i++];
35525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (att != NULL) {
35535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	value = atts[i++];
35545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv"))
35555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
35565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    http = 1;
35575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content")))
35585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    content = value;
35595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	att = atts[i++];
35605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
35615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((http) && (content != NULL))
35625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlCheckEncoding(ctxt, content);
35635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
35645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
35655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
35665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
35675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseStartTag:
35685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
35695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
35705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse a start of tag either for rule element or
35715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * EmptyElement. In both case we don't parse the tag closing chars.
35725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
35735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [40] STag ::= '<' Name (S Attribute)* S? '>'
35745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
35755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
35765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
35775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * With namespace:
35785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
35795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
35805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
35815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
35825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
35835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 in case of success, -1 in case of error and 1 if discarded
35845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
35855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
35865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int
35875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseStartTag(htmlParserCtxtPtr ctxt) {
35885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *name;
35895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *attname;
35905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *attvalue;
35915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar **atts;
35925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int nbatts = 0;
35935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int maxatts;
35945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int meta = 0;
35955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int i;
35965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int discardtag = 0;
35975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
35985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->instate == XML_PARSER_EOF)
35995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return(-1);
36005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt == NULL) || (ctxt->input == NULL)) {
36015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
36025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     "htmlParseStartTag: context error\n", NULL, NULL);
36035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return -1;
36045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
36055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (CUR != '<') return -1;
36065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NEXT;
36075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
36085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    atts = ctxt->atts;
36095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    maxatts = ctxt->maxatts;
36105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
36115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    GROW;
36125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    name = htmlParseHTMLName(ctxt);
36135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (name == NULL) {
36145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
36155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "htmlParseStartTag: invalid element name\n",
36165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     NULL, NULL);
36175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/* Dump the bogus tag like browsers do */
36185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	while ((IS_CHAR_CH(CUR)) && (CUR != '>') &&
36195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               (ctxt->instate != XML_PARSER_EOF))
36205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
36215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return -1;
36225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
36235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (xmlStrEqual(name, BAD_CAST"meta"))
36245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	meta = 1;
36255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
36265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
36275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Check for auto-closure of HTML elements.
36285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
36295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlAutoClose(ctxt, name);
36305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
36315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
36325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Check for implied HTML elements.
36335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
36345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlCheckImplied(ctxt, name);
36355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
36365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
36375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Avoid html at any level > 0, head at any level != 1
36385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * or any attempt to recurse body
36395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
36405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) {
36415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
36425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "htmlParseStartTag: misplaced <html> tag\n",
36435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     name, NULL);
36445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	discardtag = 1;
36455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->depth++;
36465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
36475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt->nameNr != 1) &&
36485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	(xmlStrEqual(name, BAD_CAST"head"))) {
36495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
36505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "htmlParseStartTag: misplaced <head> tag\n",
36515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     name, NULL);
36525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	discardtag = 1;
36535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->depth++;
36545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
36555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (xmlStrEqual(name, BAD_CAST"body")) {
36565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	int indx;
36575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	for (indx = 0;indx < ctxt->nameNr;indx++) {
36585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {
36595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
36605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		             "htmlParseStartTag: misplaced <body> tag\n",
36615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     name, NULL);
36625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		discardtag = 1;
36635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->depth++;
36645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
36655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
36665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
36675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
36685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
36695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Now parse the attributes, it ends up with the ending
36705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     *
36715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * (S Attribute)* S?
36725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
36735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SKIP_BLANKS;
36745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while ((IS_CHAR_CH(CUR)) &&
36755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           (CUR != '>') &&
36765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	   ((CUR != '/') || (NXT(1) != '>'))) {
36775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	long cons = ctxt->nbChars;
36785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
36795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	GROW;
36805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	attname = htmlParseAttribute(ctxt, &attvalue);
36815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (attname != NULL) {
36825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
36835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
36845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Well formedness requires at most one declaration of an attribute
36855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
36865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    for (i = 0; i < nbatts;i += 2) {
36875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        if (xmlStrEqual(atts[i], attname)) {
36885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_REDEFINED,
36895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		                 "Attribute %s redefined\n", attname, NULL);
36905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if (attvalue != NULL)
36915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			xmlFree(attvalue);
36925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto failed;
36935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
36945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
36955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
36965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
36975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Add the pair to atts
36985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
36995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (atts == NULL) {
37005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        maxatts = 22; /* allow for 10 attrs by default */
37015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        atts = (const xmlChar **)
37025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		       xmlMalloc(maxatts * sizeof(xmlChar *));
37035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (atts == NULL) {
37045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlErrMemory(ctxt, NULL);
37055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if (attvalue != NULL)
37065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			xmlFree(attvalue);
37075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto failed;
37085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
37095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->atts = atts;
37105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->maxatts = maxatts;
37115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    } else if (nbatts + 4 > maxatts) {
37125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        const xmlChar **n;
37135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
37145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        maxatts *= 2;
37155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        n = (const xmlChar **) xmlRealloc((void *) atts,
37165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					     maxatts * sizeof(const xmlChar *));
37175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (n == NULL) {
37185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlErrMemory(ctxt, NULL);
37195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if (attvalue != NULL)
37205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			xmlFree(attvalue);
37215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto failed;
37225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
37235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		atts = n;
37245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->atts = atts;
37255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->maxatts = maxatts;
37265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
37275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    atts[nbatts++] = attname;
37285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    atts[nbatts++] = attvalue;
37295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    atts[nbatts] = NULL;
37305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    atts[nbatts + 1] = NULL;
37315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
37325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else {
37335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (attvalue != NULL)
37345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        xmlFree(attvalue);
37355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /* Dump the bogus attribute string up to the next blank or
37365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * the end of the tag. */
37375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    while ((IS_CHAR_CH(CUR)) &&
37385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	           !(IS_BLANK_CH(CUR)) && (CUR != '>') &&
37395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		   ((CUR != '/') || (NXT(1) != '>')))
37405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		NEXT;
37415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
37425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
37435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)failed:
37445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	SKIP_BLANKS;
37455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (cons == ctxt->nbChars) {
37465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
37475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	                 "htmlParseStartTag: problem parsing attributes\n",
37485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 NULL, NULL);
37495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    break;
37505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
37515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
37525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
37535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
37545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Handle specific association to the META tag
37555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
37565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (meta && (nbatts != 0))
37575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlCheckMeta(ctxt, atts);
37585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
37595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
37605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * SAX: Start of Element !
37615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
37625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!discardtag) {
37635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlnamePush(ctxt, name);
37645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
37655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (nbatts != 0)
37665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->sax->startElement(ctxt->userData, name, atts);
37675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else
37685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->sax->startElement(ctxt->userData, name, NULL);
37695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
37705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
37715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
37725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (atts != NULL) {
37735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for (i = 1;i < nbatts;i += 2) {
37745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (atts[i] != NULL)
37755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlFree((xmlChar *) atts[i]);
37765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
37775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
37785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
37795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(discardtag);
37805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
37815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
37825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
37835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseEndTag:
37845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
37855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
37865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an end of tag
37875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
37885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [42] ETag ::= '</' Name S? '>'
37895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
37905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * With namespace
37915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
37925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [NS 9] ETag ::= '</' QName S? '>'
37935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
37945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 1 if the current level should be closed.
37955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
37965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
37975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int
37985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseEndTag(htmlParserCtxtPtr ctxt)
37995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
38005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *name;
38015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *oldname;
38025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int i, ret;
38035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
38045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((CUR != '<') || (NXT(1) != '/')) {
38055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED,
38065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "htmlParseEndTag: '</' not found\n", NULL, NULL);
38075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (0);
38085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
38095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SKIP(2);
38105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
38115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    name = htmlParseHTMLName(ctxt);
38125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (name == NULL)
38135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (0);
38145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
38155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * We should definitely be at the ending "S? '>'" part
38165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
38175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SKIP_BLANKS;
38185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((!IS_CHAR_CH(CUR)) || (CUR != '>')) {
38195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
38205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "End tag : expected '>'\n", NULL, NULL);
38215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (ctxt->recovery) {
38225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
38235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * We're not at the ending > !!
38245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Error, unless in recover mode where we search forwards
38255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * until we find a >
38265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
38275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    while (CUR != '\0' && CUR != '>') NEXT;
38285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
38295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
38305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else
38315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        NEXT;
38325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
38335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
38345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * if we ignored misplaced tags in htmlParseStartTag don't pop them
38355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * out now.
38365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
38375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt->depth > 0) &&
38385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (xmlStrEqual(name, BAD_CAST "html") ||
38395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         xmlStrEqual(name, BAD_CAST "body") ||
38405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 xmlStrEqual(name, BAD_CAST "head"))) {
38415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->depth--;
38425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return (0);
38435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
38445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
38455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
38465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * If the name read is not one of the element in the parsing stack
38475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * then return, it's just an error.
38485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
38495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (i = (ctxt->nameNr - 1); i >= 0; i--) {
38505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (xmlStrEqual(name, ctxt->nameTab[i]))
38515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            break;
38525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
38535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (i < 0) {
38545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
38555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "Unexpected end tag : %s\n", name, NULL);
38565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (0);
38575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
38585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
38595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
38605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
38615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Check for auto-closure of HTML elements.
38625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
38635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
38645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlAutoCloseOnClose(ctxt, name);
38655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
38665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
38675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Well formedness constraints, opening and closing must match.
38685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * With the exception that the autoclose may have popped stuff out
38695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * of the stack.
38705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
38715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!xmlStrEqual(name, ctxt->name)) {
38725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) {
38735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
38745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	                 "Opening and ending tag mismatch: %s and %s\n",
38755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 name, ctxt->name);
38765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
38775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
38785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
38795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
38805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * SAX: End of Tag
38815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
38825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    oldname = ctxt->name;
38835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((oldname != NULL) && (xmlStrEqual(oldname, name))) {
38845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            ctxt->sax->endElement(ctxt->userData, name);
38865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlnamePop(ctxt);
38875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ret = 1;
38885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
38895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ret = 0;
38905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
38915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
38925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (ret);
38935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
38945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
38955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
38965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
38975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseReference:
38985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
38995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
39005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse and handle entity references in content,
39015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * this will end-up in a call to character() since this is either a
39025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * CharRef, or a predefined entity.
39035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
39045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
39055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseReference(htmlParserCtxtPtr ctxt) {
39065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const htmlEntityDesc * ent;
39075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar out[6];
39085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *name;
39095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (CUR != '&') return;
39105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
39115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (NXT(1) == '#') {
39125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	unsigned int c;
39135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	int bits, i = 0;
39145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
39155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	c = htmlParseCharRef(ctxt);
39165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (c == 0)
39175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    return;
39185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
39195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if      (c <    0x80) { out[i++]= c;                bits= -6; }
39205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        else if (c <   0x800) { out[i++]=((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
39215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
39225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        else                  { out[i++]=((c >> 18) & 0x07) | 0xF0;  bits= 12; }
39235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
39245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for ( ; bits >= 0; bits-= 6) {
39255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            out[i++]= ((c >> bits) & 0x3F) | 0x80;
39265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
39275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	out[i] = 0;
39285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
39295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlCheckParagraph(ctxt);
39305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
39315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->sax->characters(ctxt->userData, out, i);
39325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
39335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ent = htmlParseEntityRef(ctxt, &name);
39345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (name == NULL) {
39355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlCheckParagraph(ctxt);
39365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
39375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
39385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    return;
39395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
39405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ent == NULL) || !(ent->value > 0)) {
39415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlCheckParagraph(ctxt);
39425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
39435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
39445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
39455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		/* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
39465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
39475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
39485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    unsigned int c;
39495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    int bits, i = 0;
39505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
39515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    c = ent->value;
39525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if      (c <    0x80)
39535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	            { out[i++]= c;                bits= -6; }
39545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else if (c <   0x800)
39555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	            { out[i++]=((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
39565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else if (c < 0x10000)
39575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	            { out[i++]=((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
39585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else
39595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	            { out[i++]=((c >> 18) & 0x07) | 0xF0;  bits= 12; }
39605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
39615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    for ( ; bits >= 0; bits-= 6) {
39625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		out[i++]= ((c >> bits) & 0x3F) | 0x80;
39635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
39645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    out[i] = 0;
39655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
39665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlCheckParagraph(ctxt);
39675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
39685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->sax->characters(ctxt->userData, out, i);
39695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
39705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
39715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
39725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
39735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
39745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseContent:
39755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
39765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
39775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse a content: comment, sub-element, reference or text.
39785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Kept for compatibility with old code
39795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
39805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
39815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
39825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseContent(htmlParserCtxtPtr ctxt) {
39835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *currentNode;
39845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int depth;
39855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *name;
39865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
39875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    currentNode = xmlStrdup(ctxt->name);
39885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    depth = ctxt->nameNr;
39895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (1) {
39905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	long cons = ctxt->nbChars;
39915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
39925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        GROW;
39935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
39945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (ctxt->instate == XML_PARSER_EOF)
39955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            break;
39965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
39975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/*
39985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * Our tag or one of it's parent or children is ending.
39995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
40005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((CUR == '<') && (NXT(1) == '/')) {
40015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (htmlParseEndTag(ctxt) &&
40025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		((currentNode != NULL) || (ctxt->nameNr == 0))) {
40035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (currentNode != NULL)
40045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlFree(currentNode);
40055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		return;
40065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
40075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    continue; /* while */
40085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
40095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
40105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else if ((CUR == '<') &&
40115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	         ((IS_ASCII_LETTER(NXT(1))) ||
40125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		  (NXT(1) == '_') || (NXT(1) == ':'))) {
40135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    name = htmlParseHTMLName_nonInvasive(ctxt);
40145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (name == NULL) {
40155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
40165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 "htmlParseStartTag: invalid element name\n",
40175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 NULL, NULL);
40185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        /* Dump the bogus tag like browsers do */
40195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
40205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	            NEXT;
40215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
40225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        if (currentNode != NULL)
40235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	            xmlFree(currentNode);
40245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        return;
40255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
40265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
40275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (ctxt->name != NULL) {
40285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        if (htmlCheckAutoClose(name, ctxt->name) == 1) {
40295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	            htmlAutoClose(ctxt, name);
40305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	            continue;
40315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        }
40325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
40335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
40345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
40355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/*
40365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * Has this node been popped out during parsing of
40375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * the next element
40385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
40395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
40405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (!xmlStrEqual(currentNode, ctxt->name)))
40415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     {
40425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (currentNode != NULL) xmlFree(currentNode);
40435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    return;
40445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
40455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
40465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((CUR != 0) && ((xmlStrEqual(currentNode, BAD_CAST"script")) ||
40475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (xmlStrEqual(currentNode, BAD_CAST"style")))) {
40485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
40495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Handle SCRIPT/STYLE separately
40505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
40515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseScript(ctxt);
40525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
40535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
40545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Sometimes DOCTYPE arrives in the middle of the document
40555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
40565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((CUR == '<') && (NXT(1) == '!') &&
40575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		(UPP(2) == 'D') && (UPP(3) == 'O') &&
40585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		(UPP(4) == 'C') && (UPP(5) == 'T') &&
40595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		(UPP(6) == 'Y') && (UPP(7) == 'P') &&
40605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		(UPP(8) == 'E')) {
40615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
40625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		             "Misplaced DOCTYPE declaration\n",
40635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     BAD_CAST "DOCTYPE" , NULL);
40645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseDocTypeDecl(ctxt);
40655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
40665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
40675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
40685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * First case :  a comment
40695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
40705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((CUR == '<') && (NXT(1) == '!') &&
40715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		(NXT(2) == '-') && (NXT(3) == '-')) {
40725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseComment(ctxt);
40735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
40745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
40755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
40765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Second case : a Processing Instruction.
40775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
40785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else if ((CUR == '<') && (NXT(1) == '?')) {
40795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParsePI(ctxt);
40805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
40815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
40825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
40835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Third case :  a sub-element.
40845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
40855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else if (CUR == '<') {
40865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseElement(ctxt);
40875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
40885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
40895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
40905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Fourth case : a reference. If if has not been resolved,
40915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     *    parsing returns it's Name, create the node
40925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
40935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else if (CUR == '&') {
40945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseReference(ctxt);
40955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
40965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
40975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
40985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Fifth case : end of the resource
40995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
41005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else if (CUR == 0) {
41015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlAutoCloseOnEnd(ctxt);
41025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
41035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
41045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
41055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
41065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Last case, text. Note that References are handled directly.
41075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
41085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else {
41095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseCharData(ctxt);
41105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
41115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
41125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (cons == ctxt->nbChars) {
41135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (ctxt->node != NULL) {
41145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
41155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		                 "detected an error in element content\n",
41165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				 NULL, NULL);
41175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
41185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
41195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
41205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
41215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        GROW;
41225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
41235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (currentNode != NULL) xmlFree(currentNode);
41245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
41255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
41265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
41275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseElement:
41285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
41295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
41305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML element, this is highly recursive
41315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * this is kept for compatibility with previous code versions
41325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
41335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [39] element ::= EmptyElemTag | STag content ETag
41345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
41355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [41] Attribute ::= Name Eq AttValue
41365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
41375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
41385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void
41395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseElement(htmlParserCtxtPtr ctxt) {
41405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *name;
41415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *currentNode = NULL;
41425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const htmlElemDesc * info;
41435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserNodeInfo node_info;
41445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int failed;
41455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int depth;
41465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *oldptr;
41475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
41485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt == NULL) || (ctxt->input == NULL)) {
41495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
41505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     "htmlParseElement: context error\n", NULL, NULL);
41515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
41525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
41535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
41545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->instate == XML_PARSER_EOF)
41555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return;
41565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
41575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* Capture start position */
41585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->record_info) {
41595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        node_info.begin_pos = ctxt->input->consumed +
41605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                          (CUR_PTR - ctxt->input->base);
41615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	node_info.begin_line = ctxt->input->line;
41625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
41635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
41645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    failed = htmlParseStartTag(ctxt);
41655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    name = ctxt->name;
41665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((failed == -1) || (name == NULL)) {
41675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (CUR == '>')
41685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
41695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return;
41705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
41715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
41725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
41735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Lookup the info for that element.
41745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
41755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    info = htmlTagLookup(name);
41765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (info == NULL) {
41775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
41785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "Tag %s invalid\n", name, NULL);
41795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
41805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
41815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
41825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Check for an Empty Element labeled the XML/SGML way
41835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
41845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((CUR == '/') && (NXT(1) == '>')) {
41855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SKIP(2);
41865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
41875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->sax->endElement(ctxt->userData, name);
41885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlnamePop(ctxt);
41895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
41905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
41915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
41925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (CUR == '>') {
41935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        NEXT;
41945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
41955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
41965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "Couldn't find end of Start Tag %s\n", name, NULL);
41975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
41985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/*
41995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * end of parsing of this node.
42005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
42015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (xmlStrEqual(name, ctxt->name)) {
42025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    nodePop(ctxt);
42035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlnamePop(ctxt);
42045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
42055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
42065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/*
42075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * Capture end position and add node
42085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
42095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (ctxt->record_info) {
42105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	   node_info.end_pos = ctxt->input->consumed +
42115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			      (CUR_PTR - ctxt->input->base);
42125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	   node_info.end_line = ctxt->input->line;
42135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	   node_info.node = ctxt->node;
42145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	   xmlParserAddNodeInfo(ctxt, &node_info);
42155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
42165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
42175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
42185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
42195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
42205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Check for an Empty Element from DTD definition
42215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
42225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((info != NULL) && (info->empty)) {
42235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
42245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->sax->endElement(ctxt->userData, name);
42255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlnamePop(ctxt);
42265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
42275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
42285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
42295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
42305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Parse the content of the element:
42315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
42325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    currentNode = xmlStrdup(ctxt->name);
42335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    depth = ctxt->nameNr;
42345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (IS_CHAR_CH(CUR)) {
42355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	oldptr = ctxt->input->cur;
42365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseContent(ctxt);
42375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (oldptr==ctxt->input->cur) break;
42385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (ctxt->nameNr < depth) break;
42395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
42405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
42415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
42425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Capture end position and add node
42435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
42445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ( currentNode != NULL && ctxt->record_info ) {
42455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       node_info.end_pos = ctxt->input->consumed +
42465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                          (CUR_PTR - ctxt->input->base);
42475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       node_info.end_line = ctxt->input->line;
42485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       node_info.node = ctxt->node;
42495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       xmlParserAddNodeInfo(ctxt, &node_info);
42505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
42515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!IS_CHAR_CH(CUR)) {
42525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlAutoCloseOnEnd(ctxt);
42535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
42545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
42555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (currentNode != NULL)
42565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlFree(currentNode);
42575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
42585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
42595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
42605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) {
42615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
42625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Capture end position and add node
42635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
42645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ( ctxt->node != NULL && ctxt->record_info ) {
42655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ctxt->nodeInfo->end_pos = ctxt->input->consumed +
42665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                (CUR_PTR - ctxt->input->base);
42675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ctxt->nodeInfo->end_line = ctxt->input->line;
42685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ctxt->nodeInfo->node = ctxt->node;
42695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo);
42705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       htmlNodeInfoPop(ctxt);
42715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
42725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!IS_CHAR_CH(CUR)) {
42735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       htmlAutoCloseOnEnd(ctxt);
42745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
42755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
42765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
42775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
42785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseElementInternal:
42795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
42805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
42815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML element, new version, non recursive
42825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
42835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [39] element ::= EmptyElemTag | STag content ETag
42845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
42855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * [41] Attribute ::= Name Eq AttValue
42865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
42875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
42885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
42895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
42905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *name;
42915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const htmlElemDesc * info;
42925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserNodeInfo node_info;
42935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int failed;
42945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
42955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt == NULL) || (ctxt->input == NULL)) {
42965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
42975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     "htmlParseElementInternal: context error\n", NULL, NULL);
42985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
42995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
43005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->instate == XML_PARSER_EOF)
43025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return;
43035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* Capture start position */
43055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->record_info) {
43065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        node_info.begin_pos = ctxt->input->consumed +
43075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                          (CUR_PTR - ctxt->input->base);
43085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	node_info.begin_line = ctxt->input->line;
43095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
43105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    failed = htmlParseStartTag(ctxt);
43125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    name = ctxt->name;
43135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((failed == -1) || (name == NULL)) {
43145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (CUR == '>')
43155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    NEXT;
43165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return;
43175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
43185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
43205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Lookup the info for that element.
43215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
43225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    info = htmlTagLookup(name);
43235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (info == NULL) {
43245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
43255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "Tag %s invalid\n", name, NULL);
43265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
43275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
43295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Check for an Empty Element labeled the XML/SGML way
43305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
43315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((CUR == '/') && (NXT(1) == '>')) {
43325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SKIP(2);
43335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
43345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->sax->endElement(ctxt->userData, name);
43355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlnamePop(ctxt);
43365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
43375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
43385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (CUR == '>') {
43405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        NEXT;
43415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
43425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
43435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "Couldn't find end of Start Tag %s\n", name, NULL);
43445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/*
43465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * end of parsing of this node.
43475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
43485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (xmlStrEqual(name, ctxt->name)) {
43495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    nodePop(ctxt);
43505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlnamePop(ctxt);
43515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
43525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (ctxt->record_info)
43545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            htmlNodeInfoPush(ctxt, &node_info);
43555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlParserFinishElementParsing(ctxt);
43565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
43575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
43585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
43605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Check for an Empty Element from DTD definition
43615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
43625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((info != NULL) && (info->empty)) {
43635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
43645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->sax->endElement(ctxt->userData, name);
43655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlnamePop(ctxt);
43665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return;
43675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
43685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->record_info)
43705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlNodeInfoPush(ctxt, &node_info);
43715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
43725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
43745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseContentInternal:
43755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
43765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
43775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse a content: comment, sub-element, reference or text.
43785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * New version for non recursive htmlParseElementInternal
43795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
43805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void
43825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
43835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *currentNode;
43845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int depth;
43855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *name;
43865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    currentNode = xmlStrdup(ctxt->name);
43885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    depth = ctxt->nameNr;
43895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (1) {
43905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	long cons = ctxt->nbChars;
43915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        GROW;
43935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (ctxt->instate == XML_PARSER_EOF)
43955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            break;
43965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
43975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/*
43985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * Our tag or one of it's parent or children is ending.
43995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
44005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((CUR == '<') && (NXT(1) == '/')) {
44015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (htmlParseEndTag(ctxt) &&
44025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		((currentNode != NULL) || (ctxt->nameNr == 0))) {
44035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (currentNode != NULL)
44045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlFree(currentNode);
44055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
44065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        currentNode = xmlStrdup(ctxt->name);
44075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        depth = ctxt->nameNr;
44085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
44095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    continue; /* while */
44105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
44115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
44125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else if ((CUR == '<') &&
44135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	         ((IS_ASCII_LETTER(NXT(1))) ||
44145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		  (NXT(1) == '_') || (NXT(1) == ':'))) {
44155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    name = htmlParseHTMLName_nonInvasive(ctxt);
44165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (name == NULL) {
44175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
44185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 "htmlParseStartTag: invalid element name\n",
44195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 NULL, NULL);
44205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        /* Dump the bogus tag like browsers do */
44215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
44225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	            NEXT;
44235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
44245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        htmlParserFinishElementParsing(ctxt);
44255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        if (currentNode != NULL)
44265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	            xmlFree(currentNode);
44275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
44285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        currentNode = xmlStrdup(ctxt->name);
44295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        depth = ctxt->nameNr;
44305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        continue;
44315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
44325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
44335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (ctxt->name != NULL) {
44345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        if (htmlCheckAutoClose(name, ctxt->name) == 1) {
44355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	            htmlAutoClose(ctxt, name);
44365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	            continue;
44375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        }
44385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
44395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
44405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
44415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/*
44425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * Has this node been popped out during parsing of
44435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * the next element
44445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
44455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
44465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (!xmlStrEqual(currentNode, ctxt->name)))
44475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     {
44485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParserFinishElementParsing(ctxt);
44495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (currentNode != NULL) xmlFree(currentNode);
44505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
44515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    currentNode = xmlStrdup(ctxt->name);
44525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    depth = ctxt->nameNr;
44535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    continue;
44545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
44555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
44565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((CUR != 0) && ((xmlStrEqual(currentNode, BAD_CAST"script")) ||
44575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (xmlStrEqual(currentNode, BAD_CAST"style")))) {
44585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
44595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Handle SCRIPT/STYLE separately
44605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
44615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseScript(ctxt);
44625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
44635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
44645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Sometimes DOCTYPE arrives in the middle of the document
44655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
44665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((CUR == '<') && (NXT(1) == '!') &&
44675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		(UPP(2) == 'D') && (UPP(3) == 'O') &&
44685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		(UPP(4) == 'C') && (UPP(5) == 'T') &&
44695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		(UPP(6) == 'Y') && (UPP(7) == 'P') &&
44705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		(UPP(8) == 'E')) {
44715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
44725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		             "Misplaced DOCTYPE declaration\n",
44735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     BAD_CAST "DOCTYPE" , NULL);
44745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseDocTypeDecl(ctxt);
44755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
44765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
44775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
44785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * First case :  a comment
44795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
44805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((CUR == '<') && (NXT(1) == '!') &&
44815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		(NXT(2) == '-') && (NXT(3) == '-')) {
44825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseComment(ctxt);
44835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
44845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
44855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
44865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Second case : a Processing Instruction.
44875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
44885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else if ((CUR == '<') && (NXT(1) == '?')) {
44895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParsePI(ctxt);
44905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
44915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
44925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
44935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Third case :  a sub-element.
44945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
44955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else if (CUR == '<') {
44965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseElementInternal(ctxt);
44975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (currentNode != NULL) xmlFree(currentNode);
44985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
44995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		currentNode = xmlStrdup(ctxt->name);
45005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		depth = ctxt->nameNr;
45015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
45025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
45035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
45045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Fourth case : a reference. If if has not been resolved,
45055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     *    parsing returns it's Name, create the node
45065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
45075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else if (CUR == '&') {
45085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseReference(ctxt);
45095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
45105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
45115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
45125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Fifth case : end of the resource
45135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
45145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else if (CUR == 0) {
45155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlAutoCloseOnEnd(ctxt);
45165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
45175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
45185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
45195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
45205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * Last case, text. Note that References are handled directly.
45215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
45225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    else {
45235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseCharData(ctxt);
45245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
45255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
45265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (cons == ctxt->nbChars) {
45275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (ctxt->node != NULL) {
45285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
45295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		                 "detected an error in element content\n",
45305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				 NULL, NULL);
45315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
45325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
45335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
45345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
45355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        GROW;
45365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
45375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (currentNode != NULL) xmlFree(currentNode);
45385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
45395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
45405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
45415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseContent:
45425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
45435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
45445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse a content: comment, sub-element, reference or text.
45455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This is the entry point when called from parser.c
45465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
45475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
45485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void
45495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)__htmlParseContent(void *ctxt) {
45505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt != NULL)
45515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseContentInternal((htmlParserCtxtPtr) ctxt);
45525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
45535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
45545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
45555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseDocument:
45565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
45575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
45585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML document (and build a tree if using the standard SAX
45595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * interface).
45605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
45615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0, -1 in case of error. the parser context is augmented
45625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *                as a result of the parsing.
45635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
45645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
45655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int
45665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseDocument(htmlParserCtxtPtr ctxt) {
45675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar start[4];
45685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlCharEncoding enc;
45695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlDtdPtr dtd;
45705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
45715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlInitParser();
45725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
45735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlDefaultSAXHandlerInit();
45745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
45755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt == NULL) || (ctxt->input == NULL)) {
45765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
45775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     "htmlParseDocument: context error\n", NULL, NULL);
45785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(XML_ERR_INTERNAL_ERROR);
45795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
45805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->html = 1;
45815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->linenumbers = 1;
45825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    GROW;
45835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
45845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * SAX: beginning of the document processing.
45855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
45865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
45875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
45885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
45895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
45905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
45915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/*
45925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * Get the 4 first bytes and decode the charset
45935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * if enc != XML_CHAR_ENCODING_NONE
45945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * plug some encoding conversion routines.
45955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
45965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	start[0] = RAW;
45975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	start[1] = NXT(1);
45985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	start[2] = NXT(2);
45995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	start[3] = NXT(3);
46005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	enc = xmlDetectCharEncoding(&start[0], 4);
46015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (enc != XML_CHAR_ENCODING_NONE) {
46025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlSwitchEncoding(ctxt, enc);
46035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
46045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
46055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
46075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Wipe out everything which is before the first '<'
46085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
46095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SKIP_BLANKS;
46105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (CUR == 0) {
46115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY,
46125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	             "Document is empty\n", NULL, NULL);
46135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
46145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
46165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->sax->startDocument(ctxt->userData);
46175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
46205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Parse possible comments and PIs before any content
46215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
46225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (((CUR == '<') && (NXT(1) == '!') &&
46235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            (NXT(2) == '-') && (NXT(3) == '-')) ||
46245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	   ((CUR == '<') && (NXT(1) == '?'))) {
46255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlParseComment(ctxt);
46265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlParsePI(ctxt);
46275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	SKIP_BLANKS;
46285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
46295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
46325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Then possibly doc type declaration(s) and more Misc
46335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * (doctypedecl Misc*)?
46345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
46355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((CUR == '<') && (NXT(1) == '!') &&
46365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	(UPP(2) == 'D') && (UPP(3) == 'O') &&
46375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	(UPP(4) == 'C') && (UPP(5) == 'T') &&
46385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
46395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	(UPP(8) == 'E')) {
46405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseDocTypeDecl(ctxt);
46415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
46425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SKIP_BLANKS;
46435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
46455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Parse possible comments and PIs before any content
46465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
46475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (((CUR == '<') && (NXT(1) == '!') &&
46485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            (NXT(2) == '-') && (NXT(3) == '-')) ||
46495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	   ((CUR == '<') && (NXT(1) == '?'))) {
46505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlParseComment(ctxt);
46515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlParsePI(ctxt);
46525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	SKIP_BLANKS;
46535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
46545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
46565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * Time to start parsing the tree itself
46575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
46585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParseContentInternal(ctxt);
46595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
46615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * autoclose
46625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
46635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (CUR == 0)
46645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlAutoCloseOnEnd(ctxt);
46655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /*
46685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     * SAX: end of the document processing.
46695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     */
46705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
46715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->sax->endDocument(ctxt->userData);
46725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->myDoc != NULL) {
46745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	dtd = xmlGetIntSubset(ctxt->myDoc);
46755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (dtd == NULL)
46765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->myDoc->intSubset =
46775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
46785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
46795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
46805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
46815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (! ctxt->wellFormed) return(-1);
46825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(0);
46835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
46845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************
46875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
46885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *			Parser contexts handling			*
46895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
46905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/
46915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
46925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
46935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlInitParserCtxt:
46945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
46955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
46965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Initialize a parser context
46975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
46985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 in case of success and -1 in case of error
46995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
47005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
47015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int
47025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
47035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
47045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlSAXHandler *sax;
47055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
47065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL) return(-1);
47075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    memset(ctxt, 0, sizeof(htmlParserCtxt));
47085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
47095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->dict = xmlDictCreate();
47105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->dict == NULL) {
47115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
47125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(-1);
47135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
47145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));
47155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (sax == NULL) {
47165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
47175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(-1);
47185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
47195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else
47205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        memset(sax, 0, sizeof(htmlSAXHandler));
47215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
47225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* Allocate the Input stack */
47235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->inputTab = (htmlParserInputPtr *)
47245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      xmlMalloc(5 * sizeof(htmlParserInputPtr));
47255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->inputTab == NULL) {
47265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
47275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->inputNr = 0;
47285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->inputMax = 0;
47295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->input = NULL;
47305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(-1);
47315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
47325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->inputNr = 0;
47335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->inputMax = 5;
47345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->input = NULL;
47355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->version = NULL;
47365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->encoding = NULL;
47375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->standalone = -1;
47385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->instate = XML_PARSER_START;
47395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
47405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* Allocate the Node stack */
47415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
47425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->nodeTab == NULL) {
47435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
47445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->nodeNr = 0;
47455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->nodeMax = 0;
47465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->node = NULL;
47475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->inputNr = 0;
47485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->inputMax = 0;
47495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->input = NULL;
47505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(-1);
47515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
47525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nodeNr = 0;
47535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nodeMax = 10;
47545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->node = NULL;
47555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
47565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* Allocate the Name stack */
47575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
47585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->nameTab == NULL) {
47595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
47605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->nameNr = 0;
47615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->nameMax = 0;
47625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->name = NULL;
47635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->nodeNr = 0;
47645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->nodeMax = 0;
47655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->node = NULL;
47665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->inputNr = 0;
47675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->inputMax = 0;
47685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->input = NULL;
47695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(-1);
47705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
47715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nameNr = 0;
47725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nameMax = 10;
47735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->name = NULL;
47745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
47755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nodeInfoTab = NULL;
47765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nodeInfoNr  = 0;
47775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nodeInfoMax = 0;
47785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
47795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
47805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else {
47815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->sax = sax;
47825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
47835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
47845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->userData = ctxt;
47855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->myDoc = NULL;
47865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->wellFormed = 1;
47875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->replaceEntities = 0;
47885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->linenumbers = xmlLineNumbersDefaultValue;
47895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->html = 1;
47905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
47915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->vctxt.userData = ctxt;
47925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->vctxt.error = xmlParserValidityError;
47935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->vctxt.warning = xmlParserValidityWarning;
47945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->record_info = 0;
47955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->validate = 0;
47965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nbChars = 0;
47975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->checkIndex = 0;
47985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->catalogs = NULL;
47995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlInitNodeInfoSeq(&ctxt->node_seq);
48005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(0);
48015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
48025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
48035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
48045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlFreeParserCtxt:
48055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
48065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
48075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Free all the memory used by a parser context. However the parsed
48085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * document in ctxt->myDoc is not freed.
48095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
48105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
48115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void
48125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
48135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
48145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlFreeParserCtxt(ctxt);
48155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
48165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
48175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
48185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlNewParserCtxt:
48195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
48205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Allocate and initialize a new parser context.
48215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
48225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the htmlParserCtxtPtr or NULL in case of allocation error
48235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
48245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
48255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParserCtxtPtr
48265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlNewParserCtxt(void)
48275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
48285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserCtxtPtr ctxt;
48295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
48305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
48315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL) {
48325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlErrMemory(NULL, "NewParserCtxt: out of memory\n");
48335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
48345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
48355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    memset(ctxt, 0, sizeof(xmlParserCtxt));
48365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (htmlInitParserCtxt(ctxt) < 0) {
48375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        htmlFreeParserCtxt(ctxt);
48385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
48395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
48405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(ctxt);
48415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
48425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
48435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
48445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCreateMemoryParserCtxt:
48455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @buffer:  a pointer to a char array
48465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @size:  the size of the array
48475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
48485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Create a parser context for an HTML in-memory document.
48495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
48505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the new parser context or NULL
48515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
48525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParserCtxtPtr
48535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCreateMemoryParserCtxt(const char *buffer, int size) {
48545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserCtxtPtr ctxt;
48555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputPtr input;
48565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputBufferPtr buf;
48575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
48585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (buffer == NULL)
48595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
48605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (size <= 0)
48615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
48625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
48635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt = htmlNewParserCtxt();
48645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL)
48655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
48665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
48675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
48685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (buf == NULL) return(NULL);
48695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
48705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input = xmlNewInputStream(ctxt);
48715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (input == NULL) {
48725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlFreeParserCtxt(ctxt);
48735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
48745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
48755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
48765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->filename = NULL;
48775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->buf = buf;
48785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->base = input->buf->buffer->content;
48795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->cur = input->buf->buffer->content;
48805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input->end = &input->buf->buffer->content[input->buf->buffer->use];
48815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
48825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputPush(ctxt, input);
48835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(ctxt);
48845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
48855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
48865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
48875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCreateDocParserCtxt:
48885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @cur:  a pointer to an array of xmlChar
48895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  a free form C string describing the HTML document encoding, or NULL
48905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
48915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Create a parser context for an HTML document.
48925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
48935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * TODO: check the need to add encoding handling there
48945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
48955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the new parser context or NULL
48965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
48975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static htmlParserCtxtPtr
48985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCreateDocParserCtxt(const xmlChar *cur, const char *encoding) {
48995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int len;
49005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserCtxtPtr ctxt;
49015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
49025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (cur == NULL)
49035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
49045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    len = xmlStrlen(cur);
49055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt = htmlCreateMemoryParserCtxt((char *)cur, len);
49065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL)
49075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
49085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
49095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (encoding != NULL) {
49105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlCharEncoding enc;
49115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlCharEncodingHandlerPtr handler;
49125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
49135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (ctxt->input->encoding != NULL)
49145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlFree((xmlChar *) ctxt->input->encoding);
49155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding);
49165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
49175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	enc = xmlParseCharEncoding(encoding);
49185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	/*
49195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 * registered set of known encodings
49205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 */
49215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (enc != XML_CHAR_ENCODING_ERROR) {
49225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlSwitchEncoding(ctxt, enc);
49235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
49245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
49255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		             "Unsupported encoding %s\n",
49265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     (const xmlChar *) encoding, NULL);
49275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
49285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	} else {
49295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
49305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * fallback for unknown encodings
49315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
49325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    handler = xmlFindCharEncodingHandler((const char *) encoding);
49335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (handler != NULL) {
49345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlSwitchToEncoding(ctxt, handler);
49355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    } else {
49365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
49375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		             "Unsupported encoding %s\n",
49385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     (const xmlChar *) encoding, NULL);
49395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
49405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
49415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
49425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(ctxt);
49435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
49445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
49455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef LIBXML_PUSH_ENABLED
49465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************
49475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
49485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *	Progressive parsing interfaces				*
49495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
49505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/
49515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
49525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
49535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseLookupSequence:
49545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
49555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @first:  the first char to lookup
49565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @next:  the next char to lookup or zero
49575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @third:  the next char to lookup or zero
49585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @comment: flag to force checking inside comments
49595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
49605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Try to find if a sequence (first, next, third) or  just (first next) or
49615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * (first) is available in the input stream.
49625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This function has a side effect of (possibly) incrementing ctxt->checkIndex
49635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * to avoid rescanning sequences of bytes, it DOES change the state of the
49645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parser, do not use liberally.
49655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This is basically similar to xmlParseLookupSequence()
49665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
49675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the index to the current parsing point if the full sequence
49685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *      is available, -1 otherwise.
49695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
49705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int
49715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
49725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        xmlChar next, xmlChar third, int iscomment,
49735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        int ignoreattrval)
49745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
49755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int base, len;
49765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserInputPtr in;
49775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *buf;
49785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int incomment = 0;
49795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int invalue = 0;
49805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    char valdellim = 0x0;
49815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
49825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    in = ctxt->input;
49835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (in == NULL)
49845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (-1);
49855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
49865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base = in->cur - in->base;
49875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (base < 0)
49885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (-1);
49895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
49905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->checkIndex > base)
49915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        base = ctxt->checkIndex;
49925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
49935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (in->buf == NULL) {
49945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        buf = in->base;
49955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        len = in->length;
49965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
49975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        buf = in->buf->buffer->content;
49985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        len = in->buf->buffer->use;
49995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
50005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
50015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* take into account the sequence length */
50025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (third)
50035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        len -= 2;
50045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else if (next)
50055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        len--;
50065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (; base < len; base++) {
50075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((!incomment) && (base + 4 < len) && (!iscomment)) {
50085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if ((buf[base] == '<') && (buf[base + 1] == '!') &&
50095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                (buf[base + 2] == '-') && (buf[base + 3] == '-')) {
50105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                incomment = 1;
50115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                /* do not increment past <! - some people use <!--> */
50125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                base += 2;
50135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
50145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
50155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (ignoreattrval) {
50165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (buf[base] == '"' || buf[base] == '\'') {
50175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if (invalue) {
50185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    if (buf[base] == valdellim) {
50195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        invalue = 0;
50205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        continue;
50215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    }
50225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                } else {
50235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    valdellim = buf[base];
50245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    invalue = 1;
50255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    continue;
50265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                }
50275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            } else if (invalue) {
50285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                continue;
50295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
50305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
50315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (incomment) {
50325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (base + 3 > len)
50335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                return (-1);
50345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if ((buf[base] == '-') && (buf[base + 1] == '-') &&
50355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                (buf[base + 2] == '>')) {
50365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                incomment = 0;
50375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                base += 2;
50385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
50395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            continue;
50405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
50415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (buf[base] == first) {
50425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (third != 0) {
50435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if ((buf[base + 1] != next) || (buf[base + 2] != third))
50445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    continue;
50455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            } else if (next != 0) {
50465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if (buf[base + 1] != next)
50475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    continue;
50485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
50495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            ctxt->checkIndex = 0;
50505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
50515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (next == 0)
50525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                xmlGenericError(xmlGenericErrorContext,
50535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                "HPP: lookup '%c' found at %d\n",
50545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                first, base);
50555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            else if (third == 0)
50565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                xmlGenericError(xmlGenericErrorContext,
50575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                "HPP: lookup '%c%c' found at %d\n",
50585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                first, next, base);
50595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            else
50605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                xmlGenericError(xmlGenericErrorContext,
50615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                "HPP: lookup '%c%c%c' found at %d\n",
50625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                first, next, third, base);
50635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
50645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return (base - (in->cur - in->base));
50655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
50665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
50675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((!incomment) && (!invalue))
50685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->checkIndex = base;
50695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
50705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (next == 0)
50715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlGenericError(xmlGenericErrorContext,
50725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        "HPP: lookup '%c' failed\n", first);
50735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else if (third == 0)
50745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlGenericError(xmlGenericErrorContext,
50755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        "HPP: lookup '%c%c' failed\n", first, next);
50765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else
50775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlGenericError(xmlGenericErrorContext,
50785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        "HPP: lookup '%c%c%c' failed\n", first, next,
50795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        third);
50805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
50815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (-1);
50825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
50835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
50845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
50855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseLookupChars:
50865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context
50875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @stop: Array of chars, which stop the lookup.
50885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @stopLen: Length of stop-Array
50895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
50905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Try to find if any char of the stop-Array is available in the input
50915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * stream.
50925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This function has a side effect of (possibly) incrementing ctxt->checkIndex
50935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * to avoid rescanning sequences of bytes, it DOES change the state of the
50945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parser, do not use liberally.
50955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
50965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the index to the current parsing point if a stopChar
50975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *      is available, -1 otherwise.
50985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
50995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int
51005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
51015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     int stopLen)
51025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
51035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int base, len;
51045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserInputPtr in;
51055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const xmlChar *buf;
51065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int incomment = 0;
51075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int i;
51085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
51095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    in = ctxt->input;
51105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (in == NULL)
51115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (-1);
51125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
51135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base = in->cur - in->base;
51145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (base < 0)
51155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (-1);
51165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
51175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->checkIndex > base)
51185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        base = ctxt->checkIndex;
51195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
51205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (in->buf == NULL) {
51215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        buf = in->base;
51225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        len = in->length;
51235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
51245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        buf = in->buf->buffer->content;
51255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        len = in->buf->buffer->use;
51265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
51275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
51285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (; base < len; base++) {
51295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (!incomment && (base + 4 < len)) {
51305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if ((buf[base] == '<') && (buf[base + 1] == '!') &&
51315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                (buf[base + 2] == '-') && (buf[base + 3] == '-')) {
51325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                incomment = 1;
51335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                /* do not increment past <! - some people use <!--> */
51345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                base += 2;
51355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
51365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
51375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (incomment) {
51385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (base + 3 > len)
51395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                return (-1);
51405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if ((buf[base] == '-') && (buf[base + 1] == '-') &&
51415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                (buf[base + 2] == '>')) {
51425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                incomment = 0;
51435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                base += 2;
51445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
51455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            continue;
51465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
51475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for (i = 0; i < stopLen; ++i) {
51485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (buf[base] == stop[i]) {
51495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                ctxt->checkIndex = 0;
51505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                return (base - (in->cur - in->base));
51515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
51525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
51535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
51545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->checkIndex = base;
51555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (-1);
51565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
51575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
51585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
51595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseTryOrFinish:
51605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
51615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @terminate:  last chunk indicator
51625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
51635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Try to progress on parsing
51645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
51655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns zero if no parsing was possible
51665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
51675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int
51685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
51695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int ret = 0;
51705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserInputPtr in;
51715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int avail = 0;
51725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar cur, next;
51735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
51745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
51755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    switch (ctxt->instate) {
51765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_EOF:
51775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
51785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try EOF\n"); break;
51795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_START:
51805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
51815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try START\n"); break;
51825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_MISC:
51835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
51845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try MISC\n");break;
51855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_COMMENT:
51865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
51875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try COMMENT\n");break;
51885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_PROLOG:
51895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
51905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try PROLOG\n");break;
51915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_START_TAG:
51925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
51935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try START_TAG\n");break;
51945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_CONTENT:
51955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
51965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try CONTENT\n");break;
51975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_CDATA_SECTION:
51985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
51995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try CDATA_SECTION\n");break;
52005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_END_TAG:
52015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
52025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try END_TAG\n");break;
52035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_ENTITY_DECL:
52045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
52055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try ENTITY_DECL\n");break;
52065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_ENTITY_VALUE:
52075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
52085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try ENTITY_VALUE\n");break;
52095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_ATTRIBUTE_VALUE:
52105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
52115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try ATTRIBUTE_VALUE\n");break;
52125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_DTD:
52135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
52145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try DTD\n");break;
52155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_EPILOG:
52165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
52175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try EPILOG\n");break;
52185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_PI:
52195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
52205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try PI\n");break;
52215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	case XML_PARSER_SYSTEM_LITERAL:
52225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlGenericError(xmlGenericErrorContext,
52235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: try SYSTEM_LITERAL\n");break;
52245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
52255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
52265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
52275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (1) {
52285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
52295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	in = ctxt->input;
52305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (in == NULL) break;
52315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (in->buf == NULL)
52325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    avail = in->length - (in->cur - in->base);
52335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	else
52345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    avail = in->buf->buffer->use - (in->cur - in->base);
52355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((avail == 0) && (terminate)) {
52365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlAutoCloseOnEnd(ctxt);
52375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
52385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		/*
52395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 * SAX: end of the document processing.
52405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 */
52415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->instate = XML_PARSER_EOF;
52425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
52435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->sax->endDocument(ctxt->userData);
52445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
52455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
52465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (avail < 1)
52475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    goto done;
52485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	cur = in->cur[0];
52495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (cur == 0) {
52505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    SKIP(1);
52515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    continue;
52525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
52535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
52545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        switch (ctxt->instate) {
52555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            case XML_PARSER_EOF:
52565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        /*
52575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 * Document parsing is done !
52585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 */
52595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        goto done;
52605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            case XML_PARSER_START:
52615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        /*
52625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 * Very first chars read from the document flow.
52635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 */
52645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		cur = in->cur[0];
52655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (IS_BLANK_CH(cur)) {
52665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    SKIP_BLANKS;
52675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if (in->buf == NULL)
52685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			avail = in->length - (in->cur - in->base);
52695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    else
52705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			avail = in->buf->buffer->use - (in->cur - in->base);
52715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
52725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
52735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->sax->setDocumentLocator(ctxt->userData,
52745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)						  &xmlDefaultSAXLocator);
52755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((ctxt->sax) && (ctxt->sax->startDocument) &&
52765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	            (!ctxt->disableSAX))
52775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->sax->startDocument(ctxt->userData);
52785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
52795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		cur = in->cur[0];
52805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		next = in->cur[1];
52815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((cur == '<') && (next == '!') &&
52825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (UPP(2) == 'D') && (UPP(3) == 'O') &&
52835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (UPP(4) == 'C') && (UPP(5) == 'T') &&
52845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (UPP(6) == 'Y') && (UPP(7) == 'P') &&
52855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (UPP(8) == 'E')) {
52865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if ((!terminate) &&
52875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		        (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
52885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			goto done;
52895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
52905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
52915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: Parsing internal subset\n");
52925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
52935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParseDocTypeDecl(ctxt);
52945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_PROLOG;
52955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
52965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
52975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: entering PROLOG\n");
52985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
52995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                } else {
53005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_MISC;
53015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
53025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
53035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: entering MISC\n");
53045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
53055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
53065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
53075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            case XML_PARSER_MISC:
53085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		SKIP_BLANKS;
53095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (in->buf == NULL)
53105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    avail = in->length - (in->cur - in->base);
53115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		else
53125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    avail = in->buf->buffer->use - (in->cur - in->base);
53135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (avail < 2)
53145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto done;
53155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		cur = in->cur[0];
53165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		next = in->cur[1];
53175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        if ((cur == '<') && (next == '!') &&
53185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (in->cur[2] == '-') && (in->cur[3] == '-')) {
53195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if ((!terminate) &&
53205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		        (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
53215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			goto done;
53225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
53235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
53245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: Parsing Comment\n");
53255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
53265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParseComment(ctxt);
53275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_MISC;
53285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        } else if ((cur == '<') && (next == '?')) {
53295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if ((!terminate) &&
53305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		        (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
53315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			goto done;
53325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
53335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
53345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: Parsing PI\n");
53355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
53365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParsePI(ctxt);
53375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_MISC;
53385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		} else if ((cur == '<') && (next == '!') &&
53395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (UPP(2) == 'D') && (UPP(3) == 'O') &&
53405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (UPP(4) == 'C') && (UPP(5) == 'T') &&
53415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (UPP(6) == 'Y') && (UPP(7) == 'P') &&
53425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (UPP(8) == 'E')) {
53435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if ((!terminate) &&
53445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		        (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
53455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			goto done;
53465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
53475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
53485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: Parsing internal subset\n");
53495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
53505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParseDocTypeDecl(ctxt);
53515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_PROLOG;
53525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
53535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
53545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: entering PROLOG\n");
53555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
53565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		} else if ((cur == '<') && (next == '!') &&
53575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		           (avail < 9)) {
53585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto done;
53595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		} else {
53605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_START_TAG;
53615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
53625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
53635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: entering START_TAG\n");
53645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
53655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
53665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
53675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            case XML_PARSER_PROLOG:
53685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		SKIP_BLANKS;
53695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (in->buf == NULL)
53705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    avail = in->length - (in->cur - in->base);
53715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		else
53725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    avail = in->buf->buffer->use - (in->cur - in->base);
53735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (avail < 2)
53745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto done;
53755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		cur = in->cur[0];
53765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		next = in->cur[1];
53775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((cur == '<') && (next == '!') &&
53785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (in->cur[2] == '-') && (in->cur[3] == '-')) {
53795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if ((!terminate) &&
53805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		        (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
53815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			goto done;
53825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
53835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
53845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: Parsing Comment\n");
53855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
53865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParseComment(ctxt);
53875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_PROLOG;
53885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        } else if ((cur == '<') && (next == '?')) {
53895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if ((!terminate) &&
53905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		        (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
53915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			goto done;
53925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
53935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
53945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: Parsing PI\n");
53955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
53965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParsePI(ctxt);
53975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_PROLOG;
53985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		} else if ((cur == '<') && (next == '!') &&
53995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		           (avail < 4)) {
54005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto done;
54015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		} else {
54025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_START_TAG;
54035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
54045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
54055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: entering START_TAG\n");
54065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
54075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
54085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
54095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            case XML_PARSER_EPILOG:
54105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (in->buf == NULL)
54115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    avail = in->length - (in->cur - in->base);
54125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		else
54135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    avail = in->buf->buffer->use - (in->cur - in->base);
54145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (avail < 1)
54155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto done;
54165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		cur = in->cur[0];
54175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (IS_BLANK_CH(cur)) {
54185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParseCharData(ctxt);
54195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto done;
54205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
54215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (avail < 2)
54225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto done;
54235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		next = in->cur[1];
54245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        if ((cur == '<') && (next == '!') &&
54255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (in->cur[2] == '-') && (in->cur[3] == '-')) {
54265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if ((!terminate) &&
54275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		        (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
54285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			goto done;
54295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
54305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
54315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: Parsing Comment\n");
54325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
54335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParseComment(ctxt);
54345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_EPILOG;
54355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        } else if ((cur == '<') && (next == '?')) {
54365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if ((!terminate) &&
54375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		        (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
54385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			goto done;
54395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
54405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
54415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: Parsing PI\n");
54425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
54435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParsePI(ctxt);
54445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_EPILOG;
54455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		} else if ((cur == '<') && (next == '!') &&
54465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		           (avail < 4)) {
54475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto done;
54485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		} else {
54495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->errNo = XML_ERR_DOCUMENT_END;
54505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->wellFormed = 0;
54515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_EOF;
54525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
54535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
54545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: entering EOF\n");
54555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
54565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
54575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->sax->endDocument(ctxt->userData);
54585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto done;
54595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
54605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
54615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            case XML_PARSER_START_TAG: {
54625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        const xmlChar *name;
54635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		int failed;
54645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		const htmlElemDesc * info;
54655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
54665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (avail < 2)
54675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto done;
54685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		cur = in->cur[0];
54695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        if (cur != '<') {
54705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_CONTENT;
54715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
54725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
54735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: entering CONTENT\n");
54745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
54755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    break;
54765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
54775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (in->cur[1] == '/') {
54785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_END_TAG;
54795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->checkIndex = 0;
54805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
54815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
54825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: entering END_TAG\n");
54835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
54845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    break;
54855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
54865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((!terminate) &&
54875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
54885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto done;
54895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
54905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		failed = htmlParseStartTag(ctxt);
54915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		name = ctxt->name;
54925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((failed == -1) ||
54935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (name == NULL)) {
54945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if (CUR == '>')
54955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			NEXT;
54965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    break;
54975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
54985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
54995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		/*
55005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 * Lookup the info for that element.
55015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 */
55025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		info = htmlTagLookup(name);
55035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (info == NULL) {
55045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
55055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		                 "Tag %s invalid\n", name, NULL);
55065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
55075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		/*
55095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 * Check for an Empty Element labeled the XML/SGML way
55105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 */
55115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((CUR == '/') && (NXT(1) == '>')) {
55125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    SKIP(2);
55135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
55145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->sax->endElement(ctxt->userData, name);
55155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlnamePop(ctxt);
55165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_CONTENT;
55175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
55185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
55195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: entering CONTENT\n");
55205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
55215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    break;
55225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
55235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (CUR == '>') {
55255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    NEXT;
55265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		} else {
55275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
55285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		                 "Couldn't find end of Start Tag %s\n",
55295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				 name, NULL);
55305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    /*
55325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     * end of parsing of this node.
55335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     */
55345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if (xmlStrEqual(name, ctxt->name)) {
55355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			nodePop(ctxt);
55365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			htmlnamePop(ctxt);
55375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    }
55385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_CONTENT;
55405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
55415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlGenericError(xmlGenericErrorContext,
55425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    "HPP: entering CONTENT\n");
55435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
55445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    break;
55455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
55465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		/*
55485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 * Check for an Empty Element from DTD definition
55495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 */
55505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((info != NULL) && (info->empty)) {
55515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
55525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->sax->endElement(ctxt->userData, name);
55535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlnamePop(ctxt);
55545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
55555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->instate = XML_PARSER_CONTENT;
55565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
55575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlGenericError(xmlGenericErrorContext,
55585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: entering CONTENT\n");
55595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
55605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                break;
55615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
55625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            case XML_PARSER_CONTENT: {
55635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		long cons;
55645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                /*
55655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 * Handle preparsed entities and charRef
55665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		 */
55675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (ctxt->token != 0) {
55685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    xmlChar chr[2] = { 0 , 0 } ;
55695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    chr[0] = (xmlChar) ctxt->token;
55715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlCheckParagraph(ctxt);
55725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
55735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->sax->characters(ctxt->userData, chr, 1);
55745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->token = 0;
55755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->checkIndex = 0;
55765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
55775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((avail == 1) && (terminate)) {
55785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    cur = in->cur[0];
55795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if ((cur != '<') && (cur != '&')) {
55805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if (ctxt->sax != NULL) {
55815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    if (IS_BLANK_CH(cur)) {
55825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				if (ctxt->sax->ignorableWhitespace != NULL)
55835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				    ctxt->sax->ignorableWhitespace(
55845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					    ctxt->userData, &cur, 1);
55855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    } else {
55865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				htmlCheckParagraph(ctxt);
55875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				if (ctxt->sax->characters != NULL)
55885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				    ctxt->sax->characters(
55895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)					    ctxt->userData, &cur, 1);
55905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    }
55915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			}
55925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->token = 0;
55935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->checkIndex = 0;
55945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			in->cur++;
55955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			break;
55965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    }
55975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
55985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (avail < 2)
55995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto done;
56005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		cur = in->cur[0];
56015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		next = in->cur[1];
56025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		cons = ctxt->nbChars;
56035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) ||
56045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (xmlStrEqual(ctxt->name, BAD_CAST"style"))) {
56055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    /*
56065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     * Handle SCRIPT/STYLE separately
56075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     */
56085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if (!terminate) {
56095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		        int idx;
56105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			xmlChar val;
56115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
56125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 1);
56135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if (idx < 0)
56145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    goto done;
56155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		        val = in->cur[idx + 2];
56165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if (val == 0) /* bad cut of input */
56175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    goto done;
56185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    }
56195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParseScript(ctxt);
56205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if ((cur == '<') && (next == '/')) {
56215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->instate = XML_PARSER_END_TAG;
56225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->checkIndex = 0;
56235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
56245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			xmlGenericError(xmlGenericErrorContext,
56255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				"HPP: entering END_TAG\n");
56265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
56275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			break;
56285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    }
56295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		} else {
56305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    /*
56315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     * Sometimes DOCTYPE arrives in the middle of the document
56325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     */
56335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if ((cur == '<') && (next == '!') &&
56345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			(UPP(2) == 'D') && (UPP(3) == 'O') &&
56355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			(UPP(4) == 'C') && (UPP(5) == 'T') &&
56365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			(UPP(6) == 'Y') && (UPP(7) == 'P') &&
56375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			(UPP(8) == 'E')) {
56385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if ((!terminate) &&
56395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
56405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    goto done;
56415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
56425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			             "Misplaced DOCTYPE declaration\n",
56435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				     BAD_CAST "DOCTYPE" , NULL);
56445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			htmlParseDocTypeDecl(ctxt);
56455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    } else if ((cur == '<') && (next == '!') &&
56465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			(in->cur[2] == '-') && (in->cur[3] == '-')) {
56475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if ((!terminate) &&
56485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    (htmlParseLookupSequence(
56495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				ctxt, '-', '-', '>', 1, 1) < 0))
56505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    goto done;
56515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
56525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			xmlGenericError(xmlGenericErrorContext,
56535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				"HPP: Parsing Comment\n");
56545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
56555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			htmlParseComment(ctxt);
56565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->instate = XML_PARSER_CONTENT;
56575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    } else if ((cur == '<') && (next == '?')) {
56585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if ((!terminate) &&
56595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
56605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    goto done;
56615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
56625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			xmlGenericError(xmlGenericErrorContext,
56635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				"HPP: Parsing PI\n");
56645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
56655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			htmlParsePI(ctxt);
56665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->instate = XML_PARSER_CONTENT;
56675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    } else if ((cur == '<') && (next == '!') && (avail < 4)) {
56685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			goto done;
56695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    } else if ((cur == '<') && (next == '/')) {
56705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->instate = XML_PARSER_END_TAG;
56715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->checkIndex = 0;
56725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
56735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			xmlGenericError(xmlGenericErrorContext,
56745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				"HPP: entering END_TAG\n");
56755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
56765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			break;
56775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    } else if (cur == '<') {
56785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->instate = XML_PARSER_START_TAG;
56795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->checkIndex = 0;
56805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
56815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			xmlGenericError(xmlGenericErrorContext,
56825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				"HPP: entering START_TAG\n");
56835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
56845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			break;
56855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    } else if (cur == '&') {
56865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if ((!terminate) &&
56875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    (htmlParseLookupChars(ctxt,
56885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                  BAD_CAST "; >/", 4) < 0))
56895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    goto done;
56905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
56915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			xmlGenericError(xmlGenericErrorContext,
56925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				"HPP: Parsing Reference\n");
56935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
56945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			/* TODO: check generation of subtrees if noent !!! */
56955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			htmlParseReference(ctxt);
56965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    } else {
56975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		        /*
56985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 * check that the text sequence is complete
56995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 * before handing out the data to the parser
57005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 * to avoid problems with erroneous end of
57015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 * data detection.
57025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 */
57035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			if ((!terminate) &&
57045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            (htmlParseLookupChars(ctxt, BAD_CAST "<&", 2) < 0))
57055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			    goto done;
57065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			ctxt->checkIndex = 0;
57075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
57085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			xmlGenericError(xmlGenericErrorContext,
57095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				"HPP: Parsing char data\n");
57105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
57115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			htmlParseCharData(ctxt);
57125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    }
57135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
57145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (cons == ctxt->nbChars) {
57155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    if (ctxt->node != NULL) {
57165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
57175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			             "detected an error in element content\n",
57185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)				     NULL, NULL);
57195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    }
57205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    NEXT;
57215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    break;
57225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
57235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
57245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
57255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
57265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            case XML_PARSER_END_TAG:
57275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (avail < 2)
57285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto done;
57295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if ((!terminate) &&
57305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
57315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    goto done;
57325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseEndTag(ctxt);
57335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (ctxt->nameNr == 0) {
57345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_EPILOG;
57355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		} else {
57365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    ctxt->instate = XML_PARSER_CONTENT;
57375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
57385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->checkIndex = 0;
57395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
57405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlGenericError(xmlGenericErrorContext,
57415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: entering CONTENT\n");
57425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
57435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	        break;
57445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            case XML_PARSER_CDATA_SECTION:
57455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
57465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: internal error, state == CDATA\n",
57475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     NULL, NULL);
57485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->instate = XML_PARSER_CONTENT;
57495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->checkIndex = 0;
57505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
57515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlGenericError(xmlGenericErrorContext,
57525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: entering CONTENT\n");
57535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
57545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
57555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            case XML_PARSER_DTD:
57565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
57575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: internal error, state == DTD\n",
57585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     NULL, NULL);
57595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->instate = XML_PARSER_CONTENT;
57605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->checkIndex = 0;
57615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
57625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlGenericError(xmlGenericErrorContext,
57635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: entering CONTENT\n");
57645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
57655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
57665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            case XML_PARSER_COMMENT:
57675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
57685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: internal error, state == COMMENT\n",
57695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     NULL, NULL);
57705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->instate = XML_PARSER_CONTENT;
57715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->checkIndex = 0;
57725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
57735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlGenericError(xmlGenericErrorContext,
57745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: entering CONTENT\n");
57755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
57765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
57775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            case XML_PARSER_PI:
57785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
57795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: internal error, state == PI\n",
57805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     NULL, NULL);
57815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->instate = XML_PARSER_CONTENT;
57825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->checkIndex = 0;
57835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
57845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlGenericError(xmlGenericErrorContext,
57855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: entering CONTENT\n");
57865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
57875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
57885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            case XML_PARSER_ENTITY_DECL:
57895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
57905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: internal error, state == ENTITY_DECL\n",
57915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     NULL, NULL);
57925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->instate = XML_PARSER_CONTENT;
57935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->checkIndex = 0;
57945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
57955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlGenericError(xmlGenericErrorContext,
57965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: entering CONTENT\n");
57975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
57985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
57995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            case XML_PARSER_ENTITY_VALUE:
58005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
58015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: internal error, state == ENTITY_VALUE\n",
58025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     NULL, NULL);
58035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->instate = XML_PARSER_CONTENT;
58045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->checkIndex = 0;
58055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
58065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlGenericError(xmlGenericErrorContext,
58075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: entering DTD\n");
58085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
58095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
58105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            case XML_PARSER_ATTRIBUTE_VALUE:
58115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
58125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: internal error, state == ATTRIBUTE_VALUE\n",
58135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     NULL, NULL);
58145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->instate = XML_PARSER_START_TAG;
58155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->checkIndex = 0;
58165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
58175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlGenericError(xmlGenericErrorContext,
58185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: entering START_TAG\n");
58195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
58205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
58215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    case XML_PARSER_SYSTEM_LITERAL:
58225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
58235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n",
58245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     NULL, NULL);
58255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->instate = XML_PARSER_CONTENT;
58265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->checkIndex = 0;
58275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
58285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlGenericError(xmlGenericErrorContext,
58295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: entering CONTENT\n");
58305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
58315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
58325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    case XML_PARSER_IGNORE:
58335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
58345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: internal error, state == XML_PARSER_IGNORE\n",
58355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     NULL, NULL);
58365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->instate = XML_PARSER_CONTENT;
58375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->checkIndex = 0;
58385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
58395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlGenericError(xmlGenericErrorContext,
58405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: entering CONTENT\n");
58415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
58425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
58435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    case XML_PARSER_PUBLIC_LITERAL:
58445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
58455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: internal error, state == XML_PARSER_LITERAL\n",
58465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			     NULL, NULL);
58475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->instate = XML_PARSER_CONTENT;
58485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->checkIndex = 0;
58495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
58505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlGenericError(xmlGenericErrorContext,
58515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			"HPP: entering CONTENT\n");
58525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
58535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		break;
58545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
58555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
58565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
58575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)done:
58585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((avail == 0) && (terminate)) {
58595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlAutoCloseOnEnd(ctxt);
58605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
58615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    /*
58625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     * SAX: end of the document processing.
58635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	     */
58645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->instate = XML_PARSER_EOF;
58655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
58665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->sax->endDocument(ctxt->userData);
58675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
58685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
58695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt->myDoc != NULL) &&
58705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
58715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 (ctxt->instate == XML_PARSER_EPILOG))) {
58725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlDtdPtr dtd;
58735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	dtd = xmlGetIntSubset(ctxt->myDoc);
58745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (dtd == NULL)
58755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->myDoc->intSubset =
58765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
58775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
58785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
58795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
58805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
58815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret);
58825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
58835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(ret);
58845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
58855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
58865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
58875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseChunk:
58885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
58895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @chunk:  an char array
58905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @size:  the size in byte of the chunk
58915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @terminate:  last chunk indicator
58925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
58935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse a Chunk of memory
58945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
58955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns zero if no error, the xmlParserErrors otherwise.
58965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
58975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int
58985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
58995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              int terminate) {
59005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ctxt == NULL) || (ctxt->input == NULL)) {
59015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
59025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		     "htmlParseChunk: context error\n", NULL, NULL);
59035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(XML_ERR_INTERNAL_ERROR);
59045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
59055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
59065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
59075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	int base = ctxt->input->base - ctxt->input->buf->buffer->content;
59085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	int cur = ctxt->input->cur - ctxt->input->base;
59095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	int res;
59105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
59115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
59125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (res < 0) {
59135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->errNo = XML_PARSER_EOF;
59145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->disableSAX = 1;
59155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    return (XML_PARSER_EOF);
59165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
59175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->input->base = ctxt->input->buf->buffer->content + base;
59185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->input->cur = ctxt->input->base + cur;
59195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->input->end =
59205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	  &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
59215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
59225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
59235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
59245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
59255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if 0
59265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((terminate) || (ctxt->input->buf->buffer->use > 80))
59275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    htmlParseTryOrFinish(ctxt, terminate);
59285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
59295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (ctxt->instate != XML_PARSER_EOF) {
59305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
59315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlParserInputBufferPtr in = ctxt->input->buf;
59325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
59335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    (in->raw != NULL)) {
59345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		int nbchars;
59355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
59365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
59375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		if (nbchars < 0) {
59385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
59395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			         "encoder error\n", NULL, NULL);
59405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		    return(XML_ERR_INVALID_ENCODING);
59415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		}
59425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    }
59435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
59445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
59455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParseTryOrFinish(ctxt, terminate);
59465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (terminate) {
59475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((ctxt->instate != XML_PARSER_EOF) &&
59485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (ctxt->instate != XML_PARSER_EPILOG) &&
59495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (ctxt->instate != XML_PARSER_MISC)) {
59505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->errNo = XML_ERR_DOCUMENT_END;
59515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->wellFormed = 0;
59525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
59535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (ctxt->instate != XML_PARSER_EOF) {
59545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
59555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		ctxt->sax->endDocument(ctxt->userData);
59565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
59575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->instate = XML_PARSER_EOF;
59585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
59595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return((xmlParserErrors) ctxt->errNo);
59605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
59615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
59625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************
59635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
59645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *			User entry points				*
59655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
59665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/
59675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
59685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
59695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCreatePushParserCtxt:
59705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @sax:  a SAX handler
59715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @user_data:  The user data returned on SAX callbacks
59725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @chunk:  a pointer to an array of chars
59735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @size:  number of chars in the array
59745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @filename:  an optional file name or URI
59755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @enc:  an optional encoding
59765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
59775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Create a parser context for using the HTML parser in push mode
59785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The value of @filename is used for fetching external entities
59795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * and error/warning reports.
59805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
59815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the new parser context or NULL
59825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
59835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParserCtxtPtr
59845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
59855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         const char *chunk, int size, const char *filename,
59865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)			 xmlCharEncoding enc) {
59875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserCtxtPtr ctxt;
59885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserInputPtr inputStream;
59895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputBufferPtr buf;
59905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
59915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlInitParser();
59925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
59935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    buf = xmlAllocParserInputBuffer(enc);
59945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (buf == NULL) return(NULL);
59955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
59965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt = htmlNewParserCtxt();
59975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL) {
59985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlFreeParserInputBuffer(buf);
59995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
60005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
60015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if(enc==XML_CHAR_ENCODING_UTF8 || buf->encoder)
60025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->charset=XML_CHAR_ENCODING_UTF8;
60035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (sax != NULL) {
60045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (ctxt->sax != (xmlSAXHandlerPtr) &htmlDefaultSAXHandler)
60055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlFree(ctxt->sax);
60065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler));
60075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (ctxt->sax == NULL) {
60085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlFree(buf);
60095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlFree(ctxt);
60105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    return(NULL);
60115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
60125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler));
60135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (user_data != NULL)
60145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->userData = user_data;
60155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
60165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (filename == NULL) {
60175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->directory = NULL;
60185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
60195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->directory = xmlParserGetDirectory(filename);
60205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
60215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
60225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputStream = htmlNewInputStream(ctxt);
60235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (inputStream == NULL) {
60245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlFreeParserCtxt(ctxt);
60255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlFree(buf);
60265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
60275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
60285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
60295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (filename == NULL)
60305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	inputStream->filename = NULL;
60315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else
60325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	inputStream->filename = (char *)
60335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlCanonicPath((const xmlChar *) filename);
60345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputStream->buf = buf;
60355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputStream->base = inputStream->buf->buffer->content;
60365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputStream->cur = inputStream->buf->buffer->content;
60375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputStream->end =
60385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	&inputStream->buf->buffer->content[inputStream->buf->buffer->use];
60395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
60405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputPush(ctxt, inputStream);
60415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
60425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
60435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (ctxt->input->buf != NULL))  {
60445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	int base = ctxt->input->base - ctxt->input->buf->buffer->content;
60455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	int cur = ctxt->input->cur - ctxt->input->base;
60465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
60475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
60485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
60495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->input->base = ctxt->input->buf->buffer->content + base;
60505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->input->cur = ctxt->input->base + cur;
60515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->input->end =
60525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
60535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef DEBUG_PUSH
60545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
60555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
60565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
60575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->progressive = 1;
60585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
60595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(ctxt);
60605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
60615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif /* LIBXML_PUSH_ENABLED */
60625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
60635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
60645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlSAXParseDoc:
60655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @cur:  a pointer to an array of xmlChar
60665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  a free form C string describing the HTML document encoding, or NULL
60675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @sax:  the SAX handler block
60685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @userData: if using SAX, this pointer will be provided on callbacks.
60695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
60705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks
60715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * to handle parse events. If sax is NULL, fallback to the default DOM
60725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * behavior and return a tree.
60735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
60745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree unless SAX is NULL or the document is
60755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *     not well formed.
60765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
60775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
60785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
60795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlSAXParseDoc(xmlChar *cur, const char *encoding, htmlSAXHandlerPtr sax, void *userData) {
60805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlDocPtr ret;
60815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserCtxtPtr ctxt;
60825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
60835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlInitParser();
60845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
60855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (cur == NULL) return(NULL);
60865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
60875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
60885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt = htmlCreateDocParserCtxt(cur, encoding);
60895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL) return(NULL);
60905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (sax != NULL) {
60915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (ctxt->sax != NULL) xmlFree (ctxt->sax);
60925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->sax = sax;
60935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->userData = userData;
60945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
60955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
60965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParseDocument(ctxt);
60975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ret = ctxt->myDoc;
60985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (sax != NULL) {
60995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->sax = NULL;
61005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->userData = NULL;
61015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
61025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlFreeParserCtxt(ctxt);
61035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
61045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(ret);
61055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
61065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
61075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
61085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseDoc:
61095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @cur:  a pointer to an array of xmlChar
61105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  a free form C string describing the HTML document encoding, or NULL
61115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
61125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML in-memory document and build a tree.
61135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
61145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree
61155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
61165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
61175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
61185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseDoc(xmlChar *cur, const char *encoding) {
61195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(htmlSAXParseDoc(cur, encoding, NULL, NULL));
61205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
61215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
61225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
61235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
61245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCreateFileParserCtxt:
61255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @filename:  the filename
61265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  a free form C string describing the HTML document encoding, or NULL
61275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
61285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Create a parser context for a file content.
61295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Automatic support for ZLIB/Compress compressed document is provided
61305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * by default if found at compile-time.
61315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
61325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the new parser context or NULL
61335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
61345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParserCtxtPtr
61355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCreateFileParserCtxt(const char *filename, const char *encoding)
61365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
61375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserCtxtPtr ctxt;
61385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserInputPtr inputStream;
61395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    char *canonicFilename;
61405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* htmlCharEncoding enc; */
61415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlChar *content, *content_line = (xmlChar *) "charset=";
61425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
61435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (filename == NULL)
61445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return(NULL);
61455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
61465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt = htmlNewParserCtxt();
61475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL) {
61485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
61495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
61505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename);
61515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (canonicFilename == NULL) {
61525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef LIBXML_SAX1_ENABLED
61535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (xmlDefaultSAXHandler.error != NULL) {
61545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlDefaultSAXHandler.error(NULL, "out of memory\n");
61555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
61565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
61575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlFreeParserCtxt(ctxt);
61585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
61595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
61605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
61615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
61625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlFree(canonicFilename);
61635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (inputStream == NULL) {
61645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlFreeParserCtxt(ctxt);
61655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
61665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
61675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
61685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputPush(ctxt, inputStream);
61695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
61705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* set encoding */
61715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (encoding) {
61725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) + 1);
61735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (content) {
61745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    strcpy ((char *)content, (char *)content_line);
61755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            strcat ((char *)content, (char *)encoding);
61765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            htmlCheckEncoding (ctxt, content);
61775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlFree (content);
61785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	}
61795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
61805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
61815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(ctxt);
61825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
61835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
61845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
61855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlSAXParseFile:
61865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @filename:  the filename
61875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  a free form C string describing the HTML document encoding, or NULL
61885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @sax:  the SAX handler block
61895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @userData: if using SAX, this pointer will be provided on callbacks.
61905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
61915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML file and build a tree. Automatic support for ZLIB/Compress
61925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * compressed document is provided by default if found at compile-time.
61935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * It use the given SAX function block to handle the parsing callback.
61945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * If sax is NULL, fallback to the default DOM tree building routines.
61955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
61965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree unless SAX is NULL or the document is
61975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *     not well formed.
61985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
61995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
62015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlSAXParseFile(const char *filename, const char *encoding, htmlSAXHandlerPtr sax,
62025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 void *userData) {
62035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlDocPtr ret;
62045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserCtxtPtr ctxt;
62055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlSAXHandlerPtr oldsax = NULL;
62065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlInitParser();
62085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt = htmlCreateFileParserCtxt(filename, encoding);
62105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL) return(NULL);
62115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (sax != NULL) {
62125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	oldsax = ctxt->sax;
62135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->sax = sax;
62145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->userData = userData;
62155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
62165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParseDocument(ctxt);
62185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ret = ctxt->myDoc;
62205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (sax != NULL) {
62215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->sax = oldsax;
62225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->userData = NULL;
62235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
62245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlFreeParserCtxt(ctxt);
62255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(ret);
62275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
62285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
62305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlParseFile:
62315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @filename:  the filename
62325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  a free form C string describing the HTML document encoding, or NULL
62335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
62345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML file and build a tree. Automatic support for ZLIB/Compress
62355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * compressed document is provided by default if found at compile-time.
62365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
62375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree
62385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
62395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
62415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlParseFile(const char *filename, const char *encoding) {
62425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(htmlSAXParseFile(filename, encoding, NULL, NULL));
62435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
62445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
62465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlHandleOmittedElem:
62475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @val:  int 0 or 1
62485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
62495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Set and return the previous value for handling HTML omitted tags.
62505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
62515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the last value for 0 for no handling, 1 for auto insertion.
62525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
62535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int
62555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlHandleOmittedElem(int val) {
62565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int old = htmlOmittedDefaultValue;
62575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlOmittedDefaultValue = val;
62595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return(old);
62605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
62615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
62635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlElementAllowedHere:
62645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @parent: HTML parent element
62655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @elt: HTML element
62665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
62675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Checks whether an HTML element may be a direct child of a parent element.
62685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Note - doesn't check for deprecated elements
62695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
62705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 1 if allowed; 0 otherwise.
62715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
62725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int
62735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlElementAllowedHere(const htmlElemDesc* parent, const xmlChar* elt) {
62745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char** p ;
62755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ( ! elt || ! parent || ! parent->subelts )
62775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return 0 ;
62785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for ( p = parent->subelts; *p; ++p )
62805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ( !xmlStrcmp((const xmlChar *)*p, elt) )
62815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return 1 ;
62825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
62835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return 0 ;
62845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
62855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
62865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlElementStatusHere:
62875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @parent: HTML parent element
62885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @elt: HTML element
62895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
62905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Checks whether an HTML element may be a direct child of a parent element.
62915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * and if so whether it is valid or deprecated.
62925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
62935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns one of HTML_VALID, HTML_DEPRECATED, HTML_INVALID
62945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
62955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlStatus
62965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlElementStatusHere(const htmlElemDesc* parent, const htmlElemDesc* elt) {
62975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ( ! parent || ! elt )
62985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return HTML_INVALID ;
62995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ( ! htmlElementAllowedHere(parent, (const xmlChar*) elt->name ) )
63005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return HTML_INVALID ;
63015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
63025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return ( elt->dtd == 0 ) ? HTML_VALID : HTML_DEPRECATED ;
63035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
63045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
63055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlAttrAllowed:
63065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @elt: HTML element
63075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @attr: HTML attribute
63085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @legacy: whether to allow deprecated attributes
63095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
63105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Checks whether an attribute is valid for an element
63115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Has full knowledge of Required and Deprecated attributes
63125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
63135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, HTML_INVALID
63145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
63155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlStatus
63165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlAttrAllowed(const htmlElemDesc* elt, const xmlChar* attr, int legacy) {
63175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char** p ;
63185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
63195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ( !elt || ! attr )
63205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return HTML_INVALID ;
63215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
63225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ( elt->attrs_req )
63235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for ( p = elt->attrs_req; *p; ++p)
63245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if ( !xmlStrcmp((const xmlChar*)*p, attr) )
63255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return HTML_REQUIRED ;
63265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
63275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ( elt->attrs_opt )
63285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for ( p = elt->attrs_opt; *p; ++p)
63295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if ( !xmlStrcmp((const xmlChar*)*p, attr) )
63305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return HTML_VALID ;
63315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
63325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ( legacy && elt->attrs_depr )
63335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for ( p = elt->attrs_depr; *p; ++p)
63345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if ( !xmlStrcmp((const xmlChar*)*p, attr) )
63355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return HTML_DEPRECATED ;
63365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
63375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return HTML_INVALID ;
63385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
63395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
63405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlNodeStatus:
63415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @node: an htmlNodePtr in a tree
63425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @legacy: whether to allow deprecated elements (YES is faster here
63435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *	for Element nodes)
63445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
63455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Checks whether the tree node is valid.  Experimental (the author
63465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *     only uses the HTML enhancements in a SAX parser)
63475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
63485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Return: for Element nodes, a return from htmlElementAllowedHere (if
63495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *	legacy allowed) or htmlElementStatusHere (otherwise).
63505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *	for Attribute nodes, a return from htmlAttrAllowed
63515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *	for other nodes, HTML_NA (no checks performed)
63525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
63535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlStatus
63545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlNodeStatus(const htmlNodePtr node, int legacy) {
63555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ( ! node )
63565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return HTML_INVALID ;
63575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
63585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch ( node->type ) {
63595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case XML_ELEMENT_NODE:
63605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return legacy
63615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	? ( htmlElementAllowedHere (
63625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlTagLookup(node->parent->name) , node->name
63635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		) ? HTML_VALID : HTML_INVALID )
63645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	: htmlElementStatusHere(
63655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlTagLookup(node->parent->name) ,
63665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)		htmlTagLookup(node->name) )
63675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	;
63685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case XML_ATTRIBUTE_NODE:
63695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return htmlAttrAllowed(
63705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	htmlTagLookup(node->parent->name) , node->name, legacy) ;
63715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    default: return HTML_NA ;
63725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
63735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
63745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************
63755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
63765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *	New set (2.6.0) of simpler and more flexible APIs		*
63775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *									*
63785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ************************************************************************/
63795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
63805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * DICT_FREE:
63815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @str:  a string
63825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
63835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Free a string if it is not owned by the "dict" dictionnary in the
63845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * current scope
63855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
63865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define DICT_FREE(str)						\
63875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if ((str) && ((!dict) ||				\
63885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
63895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlFree((char *)(str));
63905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
63915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
63925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCtxtReset:
63935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context
63945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
63955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Reset a parser context
63965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
63975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void
63985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCtxtReset(htmlParserCtxtPtr ctxt)
63995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
64005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputPtr input;
64015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlDictPtr dict;
64025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
64035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL)
64045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return;
64055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
64065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlInitParser();
64075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    dict = ctxt->dict;
64085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
64095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
64105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlFreeInputStream(input);
64115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
64125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->inputNr = 0;
64135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->input = NULL;
64145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
64155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->spaceNr = 0;
64165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->spaceTab != NULL) {
64175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->spaceTab[0] = -1;
64185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->space = &ctxt->spaceTab[0];
64195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
64205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->space = NULL;
64215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
64225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
64235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
64245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nodeNr = 0;
64255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->node = NULL;
64265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
64275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nameNr = 0;
64285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->name = NULL;
64295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
64305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DICT_FREE(ctxt->version);
64315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->version = NULL;
64325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DICT_FREE(ctxt->encoding);
64335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->encoding = NULL;
64345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DICT_FREE(ctxt->directory);
64355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->directory = NULL;
64365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DICT_FREE(ctxt->extSubURI);
64375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->extSubURI = NULL;
64385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DICT_FREE(ctxt->extSubSystem);
64395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->extSubSystem = NULL;
64405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->myDoc != NULL)
64415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlFreeDoc(ctxt->myDoc);
64425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->myDoc = NULL;
64435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
64445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->standalone = -1;
64455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->hasExternalSubset = 0;
64465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->hasPErefs = 0;
64475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->html = 1;
64485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->external = 0;
64495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->instate = XML_PARSER_START;
64505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->token = 0;
64515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
64525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->wellFormed = 1;
64535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nsWellFormed = 1;
64545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->valid = 1;
64555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->vctxt.userData = ctxt;
64565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->vctxt.error = xmlParserValidityError;
64575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->vctxt.warning = xmlParserValidityWarning;
64585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->record_info = 0;
64595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->nbChars = 0;
64605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->checkIndex = 0;
64615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->inSubset = 0;
64625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->errNo = XML_ERR_OK;
64635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->depth = 0;
64645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->charset = XML_CHAR_ENCODING_NONE;
64655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->catalogs = NULL;
64665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlInitNodeInfoSeq(&ctxt->node_seq);
64675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
64685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->attsDefault != NULL) {
64695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
64705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->attsDefault = NULL;
64715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
64725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->attsSpecial != NULL) {
64735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlHashFree(ctxt->attsSpecial, NULL);
64745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->attsSpecial = NULL;
64755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
64765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
64775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
64785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
64795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCtxtUseOptions:
64805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt: an HTML parser context
64815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options:  a combination of htmlParserOption(s)
64825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
64835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Applies the options to the parser context
64845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
64855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns 0 in case of success, the set of unknown or unimplemented options
64865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *         in case of error.
64875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
64885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int
64895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
64905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
64915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL)
64925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return(-1);
64935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
64945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (options & HTML_PARSE_NOWARNING) {
64955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->sax->warning = NULL;
64965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->vctxt.warning = NULL;
64975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        options -= XML_PARSE_NOWARNING;
64985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->options |= XML_PARSE_NOWARNING;
64995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
65005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (options & HTML_PARSE_NOERROR) {
65015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->sax->error = NULL;
65025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->vctxt.error = NULL;
65035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->sax->fatalError = NULL;
65045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        options -= XML_PARSE_NOERROR;
65055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->options |= XML_PARSE_NOERROR;
65065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
65075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (options & HTML_PARSE_PEDANTIC) {
65085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->pedantic = 1;
65095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        options -= XML_PARSE_PEDANTIC;
65105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->options |= XML_PARSE_PEDANTIC;
65115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else
65125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->pedantic = 0;
65135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (options & XML_PARSE_NOBLANKS) {
65145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->keepBlanks = 0;
65155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
65165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        options -= XML_PARSE_NOBLANKS;
65175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->options |= XML_PARSE_NOBLANKS;
65185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else
65195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->keepBlanks = 1;
65205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (options & HTML_PARSE_RECOVER) {
65215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->recovery = 1;
65225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	options -= HTML_PARSE_RECOVER;
65235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else
65245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->recovery = 0;
65255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (options & HTML_PARSE_COMPACT) {
65265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->options |= HTML_PARSE_COMPACT;
65275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        options -= HTML_PARSE_COMPACT;
65285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
65295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (options & XML_PARSE_HUGE) {
65305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	ctxt->options |= XML_PARSE_HUGE;
65315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        options -= XML_PARSE_HUGE;
65325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
65335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->dictNames = 0;
65345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (options);
65355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
65365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
65375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
65385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlDoRead:
65395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
65405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL:  the base URL to use for the document
65415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  the document encoding, or NULL
65425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options:  a combination of htmlParserOption(s)
65435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @reuse:  keep the context for reuse
65445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
65455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Common front-end for the htmlRead functions
65465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
65475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree or NULL
65485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
65495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static htmlDocPtr
65505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
65515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          int options, int reuse)
65525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
65535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlDocPtr ret;
65545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
65555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlCtxtUseOptions(ctxt, options);
65565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->html = 1;
65575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (encoding != NULL) {
65585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlCharEncodingHandlerPtr hdlr;
65595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
65605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	hdlr = xmlFindCharEncodingHandler(encoding);
65615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	if (hdlr != NULL) {
65625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    xmlSwitchToEncoding(ctxt, hdlr);
65635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    if (ctxt->input->encoding != NULL)
65645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	      xmlFree((xmlChar *) ctxt->input->encoding);
65655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            ctxt->input->encoding = xmlStrdup((xmlChar *)encoding);
65665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
65675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
65685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((URL != NULL) && (ctxt->input != NULL) &&
65695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (ctxt->input->filename == NULL))
65705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
65715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParseDocument(ctxt);
65725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ret = ctxt->myDoc;
65735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt->myDoc = NULL;
65745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!reuse) {
65755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((ctxt->dictNames) &&
65765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (ret != NULL) &&
65775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    (ret->dict == ctxt->dict))
65785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	    ctxt->dict = NULL;
65795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlFreeParserCtxt(ctxt);
65805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
65815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (ret);
65825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
65835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
65845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
65855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlReadDoc:
65865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @cur:  a pointer to a zero terminated string
65875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL:  the base URL to use for the document
65885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  the document encoding, or NULL
65895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options:  a combination of htmlParserOption(s)
65905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
65915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML in-memory document and build a tree.
65925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
65935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree
65945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
65955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
65965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
65975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
65985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserCtxtPtr ctxt;
65995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
66005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (cur == NULL)
66015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
66025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
66035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlInitParser();
66045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt = htmlCreateDocParserCtxt(cur, NULL);
66055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL)
66065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
66075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (htmlDoRead(ctxt, URL, encoding, options, 0));
66085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
66095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
66105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
66115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlReadFile:
66125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @filename:  a file or URL
66135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  the document encoding, or NULL
66145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options:  a combination of htmlParserOption(s)
66155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
66165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML file from the filesystem or the network.
66175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
66185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree
66195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
66205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
66215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlReadFile(const char *filename, const char *encoding, int options)
66225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
66235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserCtxtPtr ctxt;
66245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
66255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlInitParser();
66265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt = htmlCreateFileParserCtxt(filename, encoding);
66275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL)
66285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
66295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (htmlDoRead(ctxt, NULL, NULL, options, 0));
66305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
66315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
66325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
66335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlReadMemory:
66345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @buffer:  a pointer to a char array
66355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @size:  the size of the array
66365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL:  the base URL to use for the document
66375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  the document encoding, or NULL
66385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options:  a combination of htmlParserOption(s)
66395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
66405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML in-memory document and build a tree.
66415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
66425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree
66435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
66445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
66455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
66465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
66475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserCtxtPtr ctxt;
66485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
66495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlInitParser();
66505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
66515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL)
66525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
66535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlDefaultSAXHandlerInit();
66545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt->sax != NULL)
66555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
66565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (htmlDoRead(ctxt, URL, encoding, options, 0));
66575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
66585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
66595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
66605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlReadFd:
66615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @fd:  an open file descriptor
66625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL:  the base URL to use for the document
66635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  the document encoding, or NULL
66645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options:  a combination of htmlParserOption(s)
66655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
66665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML from a file descriptor and build a tree.
66675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
66685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree
66695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
66705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
66715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlReadFd(int fd, const char *URL, const char *encoding, int options)
66725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
66735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserCtxtPtr ctxt;
66745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputBufferPtr input;
66755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputPtr stream;
66765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
66775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (fd < 0)
66785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
66795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
66805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlInitParser();
66815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
66825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (input == NULL)
66835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
66845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt = xmlNewParserCtxt();
66855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL) {
66865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlFreeParserInputBuffer(input);
66875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
66885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
66895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
66905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (stream == NULL) {
66915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlFreeParserInputBuffer(input);
66925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlFreeParserCtxt(ctxt);
66935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
66945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
66955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputPush(ctxt, stream);
66965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (htmlDoRead(ctxt, URL, encoding, options, 0));
66975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
66985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
66995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
67005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlReadIO:
67015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ioread:  an I/O read function
67025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ioclose:  an I/O close function
67035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ioctx:  an I/O handler
67045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL:  the base URL to use for the document
67055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  the document encoding, or NULL
67065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options:  a combination of htmlParserOption(s)
67075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
67085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML document from I/O functions and source and build a tree.
67095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
67105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree
67115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
67125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
67135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
67145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          void *ioctx, const char *URL, const char *encoding, int options)
67155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
67165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlParserCtxtPtr ctxt;
67175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputBufferPtr input;
67185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputPtr stream;
67195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
67205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ioread == NULL)
67215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
67225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlInitParser();
67235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
67245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
67255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                         XML_CHAR_ENCODING_NONE);
67265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (input == NULL)
67275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
67285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ctxt = htmlNewParserCtxt();
67295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL) {
67305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlFreeParserInputBuffer(input);
67315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
67325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
67335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
67345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (stream == NULL) {
67355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlFreeParserInputBuffer(input);
67365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlFreeParserCtxt(ctxt);
67375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
67385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
67395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputPush(ctxt, stream);
67405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (htmlDoRead(ctxt, URL, encoding, options, 0));
67415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
67425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
67435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
67445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCtxtReadDoc:
67455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
67465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @cur:  a pointer to a zero terminated string
67475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL:  the base URL to use for the document
67485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  the document encoding, or NULL
67495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options:  a combination of htmlParserOption(s)
67505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
67515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML in-memory document and build a tree.
67525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This reuses the existing @ctxt parser context
67535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
67545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree
67555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
67565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
67575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
67585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               const char *URL, const char *encoding, int options)
67595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
67605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputPtr stream;
67615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
67625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (cur == NULL)
67635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
67645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL)
67655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
67665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
67675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlCtxtReset(ctxt);
67685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
67695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    stream = xmlNewStringInputStream(ctxt, cur);
67705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (stream == NULL) {
67715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
67725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
67735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputPush(ctxt, stream);
67745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (htmlDoRead(ctxt, URL, encoding, options, 1));
67755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
67765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
67775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
67785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCtxtReadFile:
67795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
67805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @filename:  a file or URL
67815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  the document encoding, or NULL
67825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options:  a combination of htmlParserOption(s)
67835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
67845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML file from the filesystem or the network.
67855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This reuses the existing @ctxt parser context
67865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
67875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree
67885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
67895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
67905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
67915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                const char *encoding, int options)
67925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
67935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputPtr stream;
67945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
67955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (filename == NULL)
67965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
67975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL)
67985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
67995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlCtxtReset(ctxt);
68015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
68035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (stream == NULL) {
68045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
68055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
68065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputPush(ctxt, stream);
68075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (htmlDoRead(ctxt, NULL, encoding, options, 1));
68085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
68095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
68115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCtxtReadMemory:
68125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
68135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @buffer:  a pointer to a char array
68145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @size:  the size of the array
68155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL:  the base URL to use for the document
68165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  the document encoding, or NULL
68175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options:  a combination of htmlParserOption(s)
68185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
68195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML in-memory document and build a tree.
68205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This reuses the existing @ctxt parser context
68215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
68225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree
68235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
68245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
68255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
68265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  const char *URL, const char *encoding, int options)
68275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
68285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputBufferPtr input;
68295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputPtr stream;
68305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL)
68325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
68335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (buffer == NULL)
68345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
68355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlCtxtReset(ctxt);
68375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
68395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (input == NULL) {
68405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
68415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
68425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
68445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (stream == NULL) {
68455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	xmlFreeParserInputBuffer(input);
68465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	return(NULL);
68475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
68485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputPush(ctxt, stream);
68505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (htmlDoRead(ctxt, URL, encoding, options, 1));
68515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
68525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
68545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCtxtReadFd:
68555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
68565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @fd:  an open file descriptor
68575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL:  the base URL to use for the document
68585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  the document encoding, or NULL
68595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options:  a combination of htmlParserOption(s)
68605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
68615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an XML from a file descriptor and build a tree.
68625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This reuses the existing @ctxt parser context
68635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
68645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree
68655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
68665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
68675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
68685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              const char *URL, const char *encoding, int options)
68695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
68705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputBufferPtr input;
68715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputPtr stream;
68725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (fd < 0)
68745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
68755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL)
68765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
68775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlCtxtReset(ctxt);
68795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
68825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (input == NULL)
68835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
68845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
68855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (stream == NULL) {
68865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlFreeParserInputBuffer(input);
68875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
68885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
68895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputPush(ctxt, stream);
68905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (htmlDoRead(ctxt, URL, encoding, options, 1));
68915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
68925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
68935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
68945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * htmlCtxtReadIO:
68955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ctxt:  an HTML parser context
68965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ioread:  an I/O read function
68975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ioclose:  an I/O close function
68985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @ioctx:  an I/O handler
68995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @URL:  the base URL to use for the document
69005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @encoding:  the document encoding, or NULL
69015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * @options:  a combination of htmlParserOption(s)
69025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
69035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * parse an HTML document from I/O functions and source and build a tree.
69045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This reuses the existing @ctxt parser context
69055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
69065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Returns the resulting document tree
69075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
69085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlDocPtr
69095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
69105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              xmlInputCloseCallback ioclose, void *ioctx,
69115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	      const char *URL,
69125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              const char *encoding, int options)
69135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
69145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputBufferPtr input;
69155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlParserInputPtr stream;
69165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
69175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ioread == NULL)
69185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
69195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ctxt == NULL)
69205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
69215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
69225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    htmlCtxtReset(ctxt);
69235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
69245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
69255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                         XML_CHAR_ENCODING_NONE);
69265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (input == NULL)
69275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
69285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
69295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (stream == NULL) {
69305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        xmlFreeParserInputBuffer(input);
69315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return (NULL);
69325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
69335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    inputPush(ctxt, stream);
69345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (htmlDoRead(ctxt, URL, encoding, options, 1));
69355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
69365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
69375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define bottom_HTMLparser
69385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "elfgcchack.h"
69395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif /* LIBXML_HTML_ENABLED */
6940