DOCBparser.c revision cbaf399537a1fb69ef97b079e4cb553869aaa4d9
1/*
2 * DOCBparser.c : an attempt to parse SGML Docbook documents
3 *
4 * This is extremely hackish. It also adds one extension
5 *    <?sgml-declaration encoding="ISO-8859-1"?>
6 * allowing to store the encoding of the document within the instance.
7 *
8 * See Copyright for the status of this software.
9 *
10 * daniel@veillard.com
11 */
12
13#include "libxml.h"
14#ifdef LIBXML_DOCB_ENABLED
15
16#include <string.h>
17#ifdef HAVE_CTYPE_H
18#include <ctype.h>
19#endif
20#ifdef HAVE_STDLIB_H
21#include <stdlib.h>
22#endif
23#ifdef HAVE_SYS_STAT_H
24#include <sys/stat.h>
25#endif
26#ifdef HAVE_FCNTL_H
27#include <fcntl.h>
28#endif
29#ifdef HAVE_UNISTD_H
30#include <unistd.h>
31#endif
32#ifdef HAVE_ZLIB_H
33#include <zlib.h>
34#endif
35
36#include <libxml/xmlmemory.h>
37#include <libxml/tree.h>
38#include <libxml/SAX.h>
39#include <libxml/parser.h>
40#include <libxml/parserInternals.h>
41#include <libxml/xmlerror.h>
42#include <libxml/DOCBparser.h>
43#include <libxml/entities.h>
44#include <libxml/encoding.h>
45#include <libxml/valid.h>
46#include <libxml/xmlIO.h>
47#include <libxml/uri.h>
48#include <libxml/globals.h>
49
50/*
51 * DocBook XML current versions
52 */
53
54#define XML_DOCBOOK_XML_PUBLIC (const xmlChar *)			\
55             "-//OASIS//DTD DocBook XML V4.1.2//EN"
56#define XML_DOCBOOK_XML_SYSTEM (const xmlChar *)			\
57             "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd"
58
59/*
60 * Internal description of an SGML entity
61 */
62typedef struct _docbEntityDesc docbEntityDesc;
63typedef docbEntityDesc *docbEntityDescPtr;
64struct _docbEntityDesc {
65    int value;         /* the UNICODE value for the character */
66    const char *name;  /* The entity name */
67    const char *desc;   /* the description */
68};
69
70#if 0
71docbElemDescPtr        docbTagLookup   (const xmlChar *tag);
72docbEntityDescPtr      docbEntityLookup(const xmlChar *name);
73docbEntityDescPtr      docbEntityValueLookup(int value);
74
75int                    docbIsAutoClosed(docbDocPtr doc,
76                                        docbNodePtr elem);
77int                    docbAutoCloseTag(docbDocPtr doc,
78                                        const xmlChar *name,
79                                        docbNodePtr elem);
80
81#endif
82static int             docbParseCharRef(docbParserCtxtPtr ctxt);
83static xmlEntityPtr    docbParseEntityRef(docbParserCtxtPtr ctxt,
84                                        xmlChar **str);
85static void            docbParseElement(docbParserCtxtPtr ctxt);
86static void            docbParseContent(docbParserCtxtPtr ctxt);
87
88/*
89 * Internal description of an SGML element
90 */
91typedef struct _docbElemDesc docbElemDesc;
92typedef docbElemDesc *docbElemDescPtr;
93struct _docbElemDesc {
94    const char *name;  /* The tag name */
95    int startTag;       /* Whether the start tag can be implied */
96    int endTag;         /* Whether the end tag can be implied */
97    int empty;          /* Is this an empty element ? */
98    int depr;           /* Is this a deprecated element ? */
99    int dtd;            /* 1: only in Loose DTD, 2: only Frameset one */
100    const char *desc;   /* the description */
101};
102
103
104#define DOCB_MAX_NAMELEN 1000
105#define DOCB_PARSER_BIG_BUFFER_SIZE 1000
106#define DOCB_PARSER_BUFFER_SIZE 100
107
108/* #define DEBUG */
109/* #define DEBUG_PUSH */
110
111/************************************************************************
112 *                                                                     *
113 *             Parser stacks related functions and macros              *
114 *                                                                     *
115 ************************************************************************/
116
117/*
118 * Generic function for accessing stacks in the Parser Context
119 */
120
121#define PUSH_AND_POP(scope, type, name)                                        \
122scope int docb##name##Push(docbParserCtxtPtr ctxt, type value) {       \
123    if (ctxt->name##Nr >= ctxt->name##Max) {                           \
124       ctxt->name##Max *= 2;                                           \
125        ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab,         \
126                    ctxt->name##Max * sizeof(ctxt->name##Tab[0]));     \
127        if (ctxt->name##Tab == NULL) {                                 \
128           xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");                      \
129           return(0);                                                  \
130       }                                                               \
131    }                                                                  \
132    ctxt->name##Tab[ctxt->name##Nr] = value;                           \
133    ctxt->name = value;                                                        \
134    return(ctxt->name##Nr++);                                          \
135}                                                                      \
136scope type docb##name##Pop(docbParserCtxtPtr ctxt) {                   \
137    type ret;                                                          \
138    if (ctxt->name##Nr < 0) return(0);                                 \
139    ctxt->name##Nr--;                                                  \
140    if (ctxt->name##Nr < 0) return(0);                                 \
141    if (ctxt->name##Nr > 0)                                            \
142       ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1];               \
143    else                                                               \
144        ctxt->name = NULL;                                             \
145    ret = ctxt->name##Tab[ctxt->name##Nr];                             \
146    ctxt->name##Tab[ctxt->name##Nr] = 0;                               \
147    return(ret);                                                       \
148}                                                                      \
149
150/* PUSH_AND_POP(static, xmlNodePtr, node) */
151PUSH_AND_POP(static, xmlChar*, name)
152
153/*
154 * Macros for accessing the content. Those should be used only by the parser,
155 * and not exported.
156 *
157 * Dirty macros, i.e. one need to make assumption on the context to use them
158 *
159 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
160 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
161 *           in ISO-Latin or UTF-8, and the current 16 bit value if compiled
162 *           in UNICODE mode. This should be used internally by the parser
163 *           only to compare to ASCII values otherwise it would break when
164 *           running with UTF-8 encoding.
165 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
166 *           to compare on ASCII based substring.
167 *   UPP(n)  returns the n'th next xmlChar converted to uppercase. Same as CUR
168 *           it should be used only to compare on ASCII based substring.
169 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
170 *           strings within the parser.
171 *
172 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
173 *
174 *   CURRENT Returns the current char value, with the full decoding of
175 *           UTF-8 if we are using this mode. It returns an int.
176 *   NEXT    Skip to the next character, this does the proper decoding
177 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
178 *   COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
179 */
180
181#define UPPER (toupper(*ctxt->input->cur))
182
183#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
184
185#define NXT(val) ctxt->input->cur[(val)]
186
187#define UPP(val) (toupper(ctxt->input->cur[(val)]))
188
189#define CUR_PTR ctxt->input->cur
190
191#define SHRINK  xmlParserInputShrink(ctxt->input)
192
193#define GROW  xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
194
195#define CURRENT ((int) (*ctxt->input->cur))
196
197#define SKIP_BLANKS docbSkipBlankChars(ctxt)
198
199/* Imported from XML */
200
201/* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
202#define CUR ((int) (*ctxt->input->cur))
203#define NEXT xmlNextChar(ctxt),ctxt->nbChars++
204
205#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
206#define NXT(val) ctxt->input->cur[(val)]
207#define CUR_PTR ctxt->input->cur
208
209
210#define NEXTL(l) do {                                                  \
211    if (*(ctxt->input->cur) == '\n') {                                 \
212       ctxt->input->line++; ctxt->input->col = 1;                      \
213    } else ctxt->input->col++;                                         \
214    ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++;           \
215  } while (0)
216
217/************
218    \
219    if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);    \
220    if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
221 ************/
222
223#define CUR_CHAR(l) docbCurrentChar(ctxt, &l)
224#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
225
226#define COPY_BUF(l,b,i,v)                                              \
227    if (l == 1) b[i++] = (xmlChar) v;                                  \
228    else i += xmlCopyChar(l,&b[i],v)
229
230/**
231 * docbCurrentChar:
232 * @ctxt:  the DocBook SGML parser context
233 * @len:  pointer to the length of the char read
234 *
235 * The current char value, if using UTF-8 this may actually span multiple
236 * bytes in the input buffer. Implement the end of line normalization:
237 * 2.11 End-of-Line Handling
238 * If the encoding is unspecified, in the case we find an ISO-Latin-1
239 * char, then the encoding converter is plugged in automatically.
240 *
241 * Returns the current char value and its length
242 */
243
244static int
245docbCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
246    if (ctxt->instate == XML_PARSER_EOF)
247       return(0);
248
249    if (ctxt->token != 0) {
250       *len = 0;
251       return(ctxt->token);
252    }
253    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
254       /*
255        * We are supposed to handle UTF8, check it's valid
256        * From rfc2044: encoding of the Unicode values on UTF-8:
257        *
258        * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
259        * 0000 0000-0000 007F   0xxxxxxx
260        * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
261        * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
262        *
263        * Check for the 0x110000 limit too
264        */
265       const unsigned char *cur = ctxt->input->cur;
266       unsigned char c;
267       unsigned int val;
268
269       c = *cur;
270       if (c & 0x80) {
271           if (cur[1] == 0)
272               xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
273           if ((cur[1] & 0xc0) != 0x80)
274               goto encoding_error;
275           if ((c & 0xe0) == 0xe0) {
276
277               if (cur[2] == 0)
278                   xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
279               if ((cur[2] & 0xc0) != 0x80)
280                   goto encoding_error;
281               if ((c & 0xf0) == 0xf0) {
282                   if (cur[3] == 0)
283                       xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
284                   if (((c & 0xf8) != 0xf0) ||
285                       ((cur[3] & 0xc0) != 0x80))
286                       goto encoding_error;
287                   /* 4-byte code */
288                   *len = 4;
289                   val = (cur[0] & 0x7) << 18;
290                   val |= (cur[1] & 0x3f) << 12;
291                   val |= (cur[2] & 0x3f) << 6;
292                   val |= cur[3] & 0x3f;
293               } else {
294                 /* 3-byte code */
295                   *len = 3;
296                   val = (cur[0] & 0xf) << 12;
297                   val |= (cur[1] & 0x3f) << 6;
298                   val |= cur[2] & 0x3f;
299               }
300           } else {
301             /* 2-byte code */
302               *len = 2;
303               val = (cur[0] & 0x1f) << 6;
304               val |= cur[1] & 0x3f;
305           }
306           if (!IS_CHAR(val)) {
307               ctxt->errNo = XML_ERR_INVALID_ENCODING;
308               if ((ctxt->sax != NULL) &&
309                   (ctxt->sax->error != NULL))
310                   ctxt->sax->error(ctxt->userData,
311                                    "Char 0x%X out of allowed range\n", val);
312               ctxt->wellFormed = 0;
313               ctxt->disableSAX = 1;
314           }
315           return(val);
316       } else {
317           /* 1-byte code */
318           *len = 1;
319           return((int) *ctxt->input->cur);
320       }
321    }
322    /*
323     * Assume it's a fixed length encoding (1) with
324     * a compatible encoding for the ASCII set, since
325     * XML constructs only use < 128 chars
326     */
327    *len = 1;
328    if ((int) *ctxt->input->cur < 0x80)
329       return((int) *ctxt->input->cur);
330
331    /*
332     * Humm this is bad, do an automatic flow conversion
333     */
334    xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
335    ctxt->charset = XML_CHAR_ENCODING_UTF8;
336    return(xmlCurrentChar(ctxt, len));
337
338encoding_error:
339    /*
340     * If we detect an UTF8 error that probably mean that the
341     * input encoding didn't get properly advertized in the
342     * declaration header. Report the error and switch the encoding
343     * to ISO-Latin-1 (if you don't like this policy, just declare the
344     * encoding !)
345     */
346    ctxt->errNo = XML_ERR_INVALID_ENCODING;
347    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
348       ctxt->sax->error(ctxt->userData,
349                        "Input is not proper UTF-8, indicate encoding !\n");
350       ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
351                       ctxt->input->cur[0], ctxt->input->cur[1],
352                       ctxt->input->cur[2], ctxt->input->cur[3]);
353    }
354
355    ctxt->charset = XML_CHAR_ENCODING_8859_1;
356    *len = 1;
357    return((int) *ctxt->input->cur);
358}
359
360#if 0
361/**
362 * sgmlNextChar:
363 * @ctxt:  the DocBook SGML parser context
364 *
365 * Skip to the next char input char.
366 */
367
368static void
369sgmlNextChar(docbParserCtxtPtr ctxt) {
370    if (ctxt->instate == XML_PARSER_EOF)
371       return;
372    if ((*ctxt->input->cur == 0) &&
373        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
374           xmlPopInput(ctxt);
375    } else {
376        if (*(ctxt->input->cur) == '\n') {
377           ctxt->input->line++; ctxt->input->col = 1;
378       } else ctxt->input->col++;
379       ctxt->input->cur++;
380       ctxt->nbChars++;
381        if (*ctxt->input->cur == 0)
382           xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
383    }
384}
385#endif
386
387/**
388 * docbSkipBlankChars:
389 * @ctxt:  the DocBook SGML parser context
390 *
391 * skip all blanks character found at that point in the input streams.
392 *
393 * Returns the number of space chars skipped
394 */
395
396static int
397docbSkipBlankChars(xmlParserCtxtPtr ctxt) {
398    int res = 0;
399
400    while (IS_BLANK(*(ctxt->input->cur))) {
401       if ((*ctxt->input->cur == 0) &&
402           (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
403               xmlPopInput(ctxt);
404       } else {
405           if (*(ctxt->input->cur) == '\n') {
406               ctxt->input->line++; ctxt->input->col = 1;
407           } else ctxt->input->col++;
408           ctxt->input->cur++;
409           ctxt->nbChars++;
410           if (*ctxt->input->cur == 0)
411               xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
412       }
413       res++;
414    }
415    return(res);
416}
417
418
419
420/************************************************************************
421 *                                                                     *
422 *             The list of SGML elements and their properties          *
423 *                                                                     *
424 ************************************************************************/
425
426/*
427 *  Start Tag: 1 means the start tag can be ommited
428 *  End Tag:   1 means the end tag can be ommited
429 *             2 means it's forbidden (empty elements)
430 *  Depr:      this element is deprecated
431 *  DTD:       1 means that this element is valid only in the Loose DTD
432 *             2 means that this element is valid only in the Frameset DTD
433 *
434 * Name,Start Tag,End Tag,  Empty,  Depr.,    DTD, Description
435 */
436static docbElemDesc
437docbookElementTable[] = {
438{ "abbrev",    0,      0,      0,      3,      0, "" }, /* word */
439{ "abstract",  0,      0,      0,      9,      0, "" }, /* title */
440{ "accel",     0,      0,      0,      7,      0, "" }, /* smallcptr */
441{ "ackno",     0,      0,      0,      4,      0, "" }, /* docinfo */
442{ "acronym",   0,      0,      0,      3,      0, "" }, /* word */
443{ "action",    0,      0,      0,      7,      0, "" }, /* smallcptr */
444{ "address",   0,      0,      0,      1,      0, "" },
445{ "affiliation",0,     0,      0,      9,      0, "" }, /* shortaffil */
446{ "alt",       0,      0,      0,      1,      0, "" },
447{ "anchor",    0,      2,      1,      0,      0, "" },
448{ "answer",    0,      0,      0,      9,      0, "" }, /* label */
449{ "appendix",  0,      0,      0,      9,      0, "" }, /* appendixinfo */
450{ "appendixinfo",0,    0,      0,      9,      0, "" }, /* graphic */
451{ "application",0,     0,      0,      2,      0, "" }, /* para */
452{ "area",      0,      2,      1,      0,      0, "" },
453{ "areaset",   0,      0,      0,      9,      0, "" }, /* area */
454{ "areaspec",  0,      0,      0,      9,      0, "" }, /* area */
455{ "arg",       0,      0,      0,      1,      0, "" },
456{ "artheader", 0,      0,      0,      9,      0, "" },
457{ "article",   0,      0,      0,      9,      0, "" }, /* div.title.content */
458{ "articleinfo",0,     0,      0,      9,      0, "" }, /* graphic */
459{ "artpagenums",0,     0,      0,      4,      0, "" }, /* docinfo */
460{ "attribution",0,     0,      0,      2,      0, "" }, /* para */
461{ "audiodata", 0,      2,      1,      0,      0, "" },
462{ "audioobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
463{ "authorblurb",0,     0,      0,      9,      0, "" }, /* title */
464{ "authorgroup",0,     0,      0,      9,      0, "" }, /* author */
465{ "authorinitials",0,  0,      0,      4,      0, "" }, /* docinfo */
466{ "author",    0,      0,      0,      9,      0, "" }, /* person.ident.mix */
467{ "beginpage", 0,      2,      1,      0,      0, "" },
468{ "bibliodiv", 0,      0,      0,      9,      0, "" }, /* sect.title.content */
469{ "biblioentry",0,     0,      0,      9,      0, "" }, /* articleinfo */
470{ "bibliography",0,    0,      0,      9,      0, "" }, /* bibliographyinfo */
471{ "bibliographyinfo",0,        0,      0,      9,      0, "" }, /* graphic */
472{ "bibliomisc",        0,      0,      0,      2,      0, "" }, /* para */
473{ "bibliomixed",0,     0,      0,      1,      0, "" }, /* %bibliocomponent.mix, bibliomset) */
474{ "bibliomset",        0,      0,      0,      1,      0, "" }, /* %bibliocomponent.mix; | bibliomset) */
475{ "biblioset", 0,      0,      0,      9,      0, "" }, /* bibliocomponent.mix */
476{ "blockquote",        0,      0,      0,      9,      0, "" }, /* title */
477{ "book",      0,      0,      0,      9,      0, "" }, /* div.title.content */
478{ "bookinfo",  0,      0,      0,      9,      0, "" }, /* graphic */
479{ "bridgehead",        0,      0,      0,      8,      0, "" }, /* title */
480{ "callout",   0,      0,      0,      9,      0, "" }, /* component.mix */
481{ "calloutlist",0,     0,      0,      9,      0, "" }, /* formalobject.title.content */
482{ "caption",   0,      0,      0,      9,      0, "" }, /* textobject.mix */
483{ "caution",   0,      0,      0,      9,      0, "" }, /* title */
484{ "chapter",   0,      0,      0,      9,      0, "" }, /* chapterinfo */
485{ "chapterinfo",0,     0,      0,      9,      0, "" }, /* graphic */
486{ "citation",  0,      0,      0,      2,      0, "" }, /* para */
487{ "citerefentry",0,    0,      0,      9,      0, "" }, /* refentrytitle */
488{ "citetitle", 0,      0,      0,      2,      0, "" }, /* para */
489{ "city",      0,      0,      0,      4,      0, "" }, /* docinfo */
490{ "classname", 0,      0,      0,      7,      0, "" }, /* smallcptr */
491{ "classsynopsisinfo",0,0,     0,      9,      0, "" }, /* cptr */
492{ "classsynopsis",0,   0,      0,      9,      0, "" }, /* ooclass */
493{ "cmdsynopsis",0,     0,      0,      9,      0, "" }, /* command */
494{ "co",                0,      2,      1,      0,      0, "" },
495{ "collab",    0,      0,      0,      9,      0, "" }, /* collabname */
496{ "collabname",        0,      0,      0,      4,      0, "" }, /* docinfo */
497{ "colophon",  0,      0,      0,      9,      0, "" }, /* sect.title.content */
498{ "colspec",   0,      2,      1,      0,      0, "" },
499{ "colspec",   0,      2,      1,      0,      0, "" },
500{ "command",   0,      0,      0,      9,      0, "" }, /* cptr */
501{ "computeroutput",0,  0,      0,      9,      0, "" }, /* cptr */
502{ "confdates", 0,      0,      0,      4,      0, "" }, /* docinfo */
503{ "confgroup", 0,      0,      0,      9,      0, "" }, /* confdates */
504{ "confnum",   0,      0,      0,      4,      0, "" }, /* docinfo */
505{ "confsponsor",0,     0,      0,      4,      0, "" }, /* docinfo */
506{ "conftitle", 0,      0,      0,      4,      0, "" }, /* docinfo */
507{ "constant",  0,      0,      0,      7,      0, "" }, /* smallcptr */
508{ "constructorsynopsis",0,0,   0,      9,      0, "" }, /* modifier */
509{ "contractnum",0,     0,      0,      4,      0, "" }, /* docinfo */
510{ "contractsponsor",0, 0,      0,      4,      0, "" }, /* docinfo */
511{ "contrib",   0,      0,      0,      4,      0, "" }, /* docinfo */
512{ "copyright", 0,      0,      0,      9,      0, "" }, /* year */
513{ "corpauthor",        0,      0,      0,      4,      0, "" }, /* docinfo */
514{ "corpname",  0,      0,      0,      4,      0, "" }, /* docinfo */
515{ "country",   0,      0,      0,      4,      0, "" }, /* docinfo */
516{ "database",  0,      0,      0,      7,      0, "" }, /* smallcptr */
517{ "date",      0,      0,      0,      4,      0, "" }, /* docinfo */
518{ "dedication",        0,      0,      0,      9,      0, "" }, /* sect.title.content */
519{ "destructorsynopsis",0,0,    0,      9,      0, "" }, /* modifier */
520{ "docinfo",   0,      0,      0,      9,      0, "" },
521{ "edition",   0,      0,      0,      4,      0, "" }, /* docinfo */
522{ "editor",    0,      0,      0,      9,      0, "" }, /* person.ident.mix */
523{ "email",     0,      0,      0,      4,      0, "" }, /* docinfo */
524{ "emphasis",  0,      0,      0,      2,      0, "" }, /* para */
525{ "entry",     0,      0,      0,      9,      0, "" }, /* tbl.entry.mdl */
526{ "entrytbl",  0,      0,      0,      9,      0, "" }, /* tbl.entrytbl.mdl */
527{ "envar",     0,      0,      0,      7,      0, "" }, /* smallcptr */
528{ "epigraph",  0,      0,      0,      9,      0, "" }, /* attribution */
529{ "equation",  0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
530{ "errorcode", 0,      0,      0,      7,      0, "" }, /* smallcptr */
531{ "errorname", 0,      0,      0,      7,      0, "" }, /* smallcptr */
532{ "errortype", 0,      0,      0,      7,      0, "" }, /* smallcptr */
533{ "example",   0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
534{ "exceptionname",0,   0,      0,      7,      0, "" }, /* smallcptr */
535{ "fax",       0,      0,      0,      4,      0, "" }, /* docinfo */
536{ "fieldsynopsis",     0,      0,      0,      9,      0, "" }, /* modifier */
537{ "figure",    0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
538{ "filename",  0,      0,      0,      7,      0, "" }, /* smallcptr */
539{ "firstname", 0,      0,      0,      4,      0, "" }, /* docinfo */
540{ "firstterm", 0,      0,      0,      3,      0, "" }, /* word */
541{ "footnote",  0,      0,      0,      9,      0, "" }, /* footnote.mix */
542{ "footnoteref",0,     2,      1,      0,      0, "" },
543{ "foreignphrase",0,   0,      0,      2,      0, "" }, /* para */
544{ "formalpara",        0,      0,      0,      9,      0, "" }, /* title */
545{ "funcdef",   0,      0,      0,      1,      0, "" },
546{ "funcparams",        0,      0,      0,      9,      0, "" }, /* cptr */
547{ "funcprototype",0,   0,      0,      9,      0, "" }, /* funcdef */
548{ "funcsynopsis",0,    0,      0,      9,      0, "" }, /* funcsynopsisinfo */
549{ "funcsynopsisinfo",  0,      0,      0,      9,      0, "" }, /* cptr */
550{ "function",  0,      0,      0,      9,      0, "" }, /* cptr */
551{ "glossary",  0,      0,      0,      9,      0, "" }, /* glossaryinfo */
552{ "glossaryinfo",0,    0,      0,      9,      0, "" }, /* graphic */
553{ "glossdef",  0,      0,      0,      9,      0, "" }, /* glossdef.mix */
554{ "glossdiv",  0,      0,      0,      9,      0, "" }, /* sect.title.content */
555{ "glossentry",        0,      0,      0,      9,      0, "" }, /* glossterm */
556{ "glosslist", 0,      0,      0,      9,      0, "" }, /* glossentry */
557{ "glossseealso",0,    0,      1,      2,      0, "" }, /* para */
558{ "glosssee",  0,      0,      1,      2,      0, "" }, /* para */
559{ "glossterm", 0,      0,      0,      2,      0, "" }, /* para */
560{ "graphic",   0,      0,      0,      9,      0, "" },
561{ "graphicco", 0,      0,      0,      9,      0, "" }, /* areaspec */
562{ "group",     0,      0,      0,      9,      0, "" }, /* arg */
563{ "guibutton", 0,      0,      0,      7,      0, "" }, /* smallcptr */
564{ "guiicon",   0,      0,      0,      7,      0, "" }, /* smallcptr */
565{ "guilabel",  0,      0,      0,      7,      0, "" }, /* smallcptr */
566{ "guimenuitem",0,     0,      0,      7,      0, "" }, /* smallcptr */
567{ "guimenu",   0,      0,      0,      7,      0, "" }, /* smallcptr */
568{ "guisubmenu",        0,      0,      0,      7,      0, "" }, /* smallcptr */
569{ "hardware",  0,      0,      0,      7,      0, "" }, /* smallcptr */
570{ "highlights",        0,      0,      0,      9,      0, "" }, /* highlights.mix */
571{ "holder",    0,      0,      0,      4,      0, "" }, /* docinfo */
572{ "honorific", 0,      0,      0,      4,      0, "" }, /* docinfo */
573{ "imagedata", 0,      2,      1,      0,      0, "" },
574{ "imageobjectco",0,   0,      0,      9,      0, "" }, /* areaspec */
575{ "imageobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
576{ "important", 0,      0,      0,      9,      0, "" }, /* title */
577{ "indexdiv",  0,      0,      0,      9,      0, "" }, /* sect.title.content */
578{ "indexentry",        0,      0,      0,      9,      0, "" }, /* primaryie */
579{ "index",     0,      0,      0,      9,      0, "" }, /* indexinfo */
580{ "indexinfo", 0,      0,      0,      9,      0, "" }, /* graphic */
581{ "indexterm", 0,      0,      0,      9,      0, "" }, /* primary */
582{ "informalequation",0,        0,      0,      9,      0, "" }, /* equation.content */
583{ "informalexample",0, 0,      0,      9,      0, "" }, /* example.mix */
584{ "informalfigure",0,  0,      0,      9,      0, "" }, /* figure.mix */
585{ "informaltable",0,   0,      0,      9,      0, "" }, /* graphic */
586{ "initializer",0,     0,      0,      7,      0, "" }, /* smallcptr */
587{ "inlineequation",0,  0,      0,      9,      0, "" }, /* inlineequation.content */
588{ "inlinegraphic",0,   0,      0,      9,      0, "" },
589{ "inlinemediaobject",0,0,     0,      9,      0, "" }, /* objectinfo */
590{ "interfacename",0,   0,      0,      7,      0, "" }, /* smallcptr */
591{ "interface", 0,      0,      0,      7,      0, "" }, /* smallcptr */
592{ "invpartnumber",0,   0,      0,      4,      0, "" }, /* docinfo */
593{ "isbn",      0,      0,      0,      4,      0, "" }, /* docinfo */
594{ "issn",      0,      0,      0,      4,      0, "" }, /* docinfo */
595{ "issuenum",  0,      0,      0,      4,      0, "" }, /* docinfo */
596{ "itemizedlist",0,    0,      0,      9,      0, "" }, /* formalobject.title.content */
597{ "itermset",  0,      0,      0,      9,      0, "" }, /* indexterm */
598{ "jobtitle",  0,      0,      0,      4,      0, "" }, /* docinfo */
599{ "keycap",    0,      0,      0,      7,      0, "" }, /* smallcptr */
600{ "keycode",   0,      0,      0,      7,      0, "" }, /* smallcptr */
601{ "keycombo",  0,      0,      0,      9,      0, "" }, /* keycap */
602{ "keysym",    0,      0,      0,      7,      0, "" }, /* smallcptr */
603{ "keyword",   0,      0,      0,      1,      0, "" },
604{ "keywordset",        0,      0,      0,      9,      0, "" }, /* keyword */
605{ "label",     0,      0,      0,      3,      0, "" }, /* word */
606{ "legalnotice",0,     0,      0,      9,      0, "" }, /* title */
607{ "lineage",   0,      0,      0,      4,      0, "" }, /* docinfo */
608{ "lineannotation",0,  0,      0,      2,      0, "" }, /* para */
609{ "link",      0,      0,      0,      2,      0, "" }, /* para */
610{ "listitem",  0,      0,      0,      9,      0, "" }, /* component.mix */
611{ "literal",   0,      0,      0,      9,      0, "" }, /* cptr */
612{ "literallayout",0,   0,      0,      2,      0, "" }, /* para */
613{ "lot",       0,      0,      0,      9,      0, "" }, /* bookcomponent.title.content */
614{ "lotentry",  0,      0,      0,      2,      0, "" }, /* para */
615{ "manvolnum", 0,      0,      0,      3,      0, "" }, /* word */
616{ "markup",    0,      0,      0,      7,      0, "" }, /* smallcptr */
617{ "medialabel",        0,      0,      0,      7,      0, "" }, /* smallcptr */
618{ "mediaobjectco",0,   0,      0,      9,      0, "" }, /* objectinfo */
619{ "mediaobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
620{ "member",    0,      0,      0,      2,      0, "" }, /* para */
621{ "menuchoice",        0,      0,      0,      9,      0, "" }, /* shortcut */
622{ "methodname",        0,      0,      0,      7,      0, "" }, /* smallcptr */
623{ "methodparam",0,     0,      0,      9,      0, "" }, /* modifier */
624{ "methodsynopsis",0,  0,      0,      9,      0, "" }, /* modifier */
625{ "modespec",  0,      0,      0,      4,      0, "" }, /* docinfo */
626{ "modifier",  0,      0,      0,      7,      0, "" }, /* smallcptr */
627{ "mousebutton",0,     0,      0,      7,      0, "" }, /* smallcptr */
628{ "msgaud",    0,      0,      0,      2,      0, "" }, /* para */
629{ "msgentry",  0,      0,      0,      9,      0, "" }, /* msg */
630{ "msgexplan", 0,      0,      0,      9,      0, "" }, /* title */
631{ "msginfo",   0,      0,      0,      9,      0, "" }, /* msglevel */
632{ "msglevel",  0,      0,      0,      7,      0, "" }, /* smallcptr */
633{ "msgmain",   0,      0,      0,      9,      0, "" }, /* title */
634{ "msgorig",   0,      0,      0,      7,      0, "" }, /* smallcptr */
635{ "msgrel",    0,      0,      0,      9,      0, "" }, /* title */
636{ "msgset",    0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
637{ "msgsub",    0,      0,      0,      9,      0, "" }, /* title */
638{ "msgtext",   0,      0,      0,      9,      0, "" }, /* component.mix */
639{ "msg",       0,      0,      0,      9,      0, "" }, /* title */
640{ "note",      0,      0,      0,      9,      0, "" }, /* title */
641{ "objectinfo",        0,      0,      0,      9,      0, "" }, /* graphic */
642{ "olink",     0,      0,      0,      2,      0, "" }, /* para */
643{ "ooclass",   0,      0,      0,      9,      0, "" }, /* modifier */
644{ "ooexception",0,     0,      0,      9,      0, "" }, /* modifier */
645{ "oointerface",0,     0,      0,      9,      0, "" }, /* modifier */
646{ "optional",  0,      0,      0,      9,      0, "" }, /* cptr */
647{ "option",    0,      0,      0,      7,      0, "" }, /* smallcptr */
648{ "orderedlist",0,     0,      0,      9,      0, "" }, /* formalobject.title.content */
649{ "orgdiv",    0,      0,      0,      4,      0, "" }, /* docinfo */
650{ "orgname",   0,      0,      0,      4,      0, "" }, /* docinfo */
651{ "otheraddr", 0,      0,      0,      4,      0, "" }, /* docinfo */
652{ "othercredit",0,     0,      0,      9,      0, "" }, /* person.ident.mix */
653{ "othername", 0,      0,      0,      4,      0, "" }, /* docinfo */
654{ "pagenums",  0,      0,      0,      4,      0, "" }, /* docinfo */
655{ "paramdef",  0,      0,      0,      1,      0, "" },
656{ "parameter", 0,      0,      0,      7,      0, "" }, /* smallcptr */
657{ "para",      0,      0,      0,      2,      0, "" }, /* para */
658{ "partinfo",  0,      0,      0,      9,      0, "" }, /* graphic */
659{ "partintro", 0,      0,      0,      9,      0, "" }, /* div.title.content */
660{ "part",      0,      0,      0,      9,      0, "" }, /* partinfo */
661{ "phone",     0,      0,      0,      4,      0, "" }, /* docinfo */
662{ "phrase",    0,      0,      0,      2,      0, "" }, /* para */
663{ "pob",       0,      0,      0,      4,      0, "" }, /* docinfo */
664{ "postcode",  0,      0,      0,      4,      0, "" }, /* docinfo */
665{ "prefaceinfo",0,     0,      0,      9,      0, "" }, /* graphic */
666{ "preface",   0,      0,      0,      9,      0, "" }, /* prefaceinfo */
667{ "primaryie", 0,      0,      0,      4,      0, "" }, /* ndxterm */
668{ "primary", 0,      0,      0,      9,      0, "" }, /* ndxterm */
669{ "printhistory",0,    0,      0,      9,      0, "" }, /* para.class */
670{ "procedure", 0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
671{ "productname",0,     0,      0,      2,      0, "" }, /* para */
672{ "productnumber",0,   0,      0,      4,      0, "" }, /* docinfo */
673{ "programlistingco",0,        0,      0,      9,      0, "" }, /* areaspec */
674{ "programlisting",0,  0,      0,      2,      0, "" }, /* para */
675{ "prompt",    0,      0,      0,      7,      0, "" }, /* smallcptr */
676{ "property",  0,      0,      0,      7,      0, "" }, /* smallcptr */
677{ "pubdate",   0,      0,      0,      4,      0, "" }, /* docinfo */
678{ "publishername",0,   0,      0,      4,      0, "" }, /* docinfo */
679{ "publisher", 0,      0,      0,      9,      0, "" }, /* publishername */
680{ "pubsnumber",        0,      0,      0,      4,      0, "" }, /* docinfo */
681{ "qandadiv",  0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
682{ "qandaentry",        0,      0,      0,      9,      0, "" }, /* revhistory */
683{ "qandaset",  0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
684{ "question",  0,      0,      0,      9,      0, "" }, /* label */
685{ "quote",     0,      0,      0,      2,      0, "" }, /* para */
686{ "refclass",  0,      0,      0,      9,      0, "" }, /* refclass.char.mix */
687{ "refdescriptor",0,   0,      0,      9,      0, "" }, /* refname.char.mix */
688{ "refentryinfo",0,    0,      0,      9,      0, "" }, /* graphic */
689{ "refentry",  0,      0,      0,      9,      0, "" }, /* ndxterm.class */
690{ "refentrytitle",0,   0,      0,      2,      0, "" }, /* para */
691{ "referenceinfo",0,   0,      0,      9,      0, "" }, /* graphic */
692{ "reference", 0,      0,      0,      9,      0, "" }, /* referenceinfo */
693{ "refmeta",   0,      0,      0,      9,      0, "" }, /* ndxterm.class */
694{ "refmiscinfo",0,     0,      0,      4,      0, "" }, /* docinfo */
695{ "refnamediv",        0,      0,      0,      9,      0, "" }, /* refdescriptor */
696{ "refname",   0,      0,      0,      9,      0, "" }, /* refname.char.mix */
697{ "refpurpose",        0,      0,      0,      9,      0, "" }, /* refinline.char.mix */
698{ "refsect1info",0,    0,      0,      9,      0, "" }, /* graphic */
699{ "refsect1",  0,      0,      0,      9,      0, "" }, /* refsect */
700{ "refsect2info",0,    0,      0,      9,      0, "" }, /* graphic */
701{ "refsect2",  0,      0,      0,      9,      0, "" }, /* refsect */
702{ "refsect3info",0,    0,      0,      9,      0, "" }, /* graphic */
703{ "refsect3",  0,      0,      0,      9,      0, "" }, /* refsect */
704{ "refsynopsisdivinfo",0,0,    0,      9,      0, "" }, /* graphic */
705{ "refsynopsisdiv",0,  0,      0,      9,      0, "" }, /* refsynopsisdivinfo */
706{ "releaseinfo",0,     0,      0,      4,      0, "" }, /* docinfo */
707{ "remark",    0,      0,      0,      2,      0, "" }, /* para */
708{ "replaceable",0,     0,      0,      1,      0, "" },
709{ "returnvalue",0,     0,      0,      7,      0, "" }, /* smallcptr */
710{ "revdescription",0,  0,      0,      9,      0, "" }, /* revdescription.mix */
711{ "revhistory",        0,      0,      0,      9,      0, "" }, /* revision */
712{ "revision",  0,      0,      0,      9,      0, "" }, /* revnumber */
713{ "revnumber", 0,      0,      0,      4,      0, "" }, /* docinfo */
714{ "revremark", 0,      0,      0,      4,      0, "" }, /* docinfo */
715{ "row",       0,      0,      0,      9,      0, "" }, /* tbl.row.mdl */
716{ "row",       0,      0,      0,      9,      0, "" }, /* tbl.row.mdl */
717{ "sbr",       0,      2,      1,      0,      0, "" },
718{ "screenco",  0,      0,      0,      9,      0, "" }, /* areaspec */
719{ "screeninfo",        0,      0,      0,      2,      0, "" }, /* para */
720{ "screen",    0,      0,      0,      2,      0, "" }, /* para */
721{ "screenshot",        0,      0,      0,      9,      0, "" }, /* screeninfo */
722{ "secondaryie",0,     0,      0,      4,      0, "" }, /* ndxterm */
723{ "secondary", 0,      0,      0,      4,      0, "" }, /* ndxterm */
724{ "sect1info", 0,      0,      0,      9,      0, "" }, /* graphic */
725{ "sect1",     0,      0,      0,      9,      0, "" }, /* sect */
726{ "sect2info", 0,      0,      0,      9,      0, "" }, /* graphic */
727{ "sect2",     0,      0,      0,      9,      0, "" }, /* sect */
728{ "sect3info", 0,      0,      0,      9,      0, "" }, /* graphic */
729{ "sect3",     0,      0,      0,      9,      0, "" }, /* sect */
730{ "sect4info", 0,      0,      0,      9,      0, "" }, /* graphic */
731{ "sect4",     0,      0,      0,      9,      0, "" }, /* sect */
732{ "sect5info", 0,      0,      0,      9,      0, "" }, /* graphic */
733{ "sect5",     0,      0,      0,      9,      0, "" }, /* sect */
734{ "sectioninfo",0,     0,      0,      9,      0, "" }, /* graphic */
735{ "section",   0,      0,      0,      9,      0, "" }, /* sectioninfo */
736{ "seealsoie", 0,      0,      0,      4,      0, "" }, /* ndxterm */
737{ "seealso",   0,      0,      0,      4,      0, "" }, /* ndxterm */
738{ "seeie",     0,      0,      0,      4,      0, "" }, /* ndxterm */
739{ "see",       0,      0,      0,      4,      0, "" }, /* ndxterm */
740{ "seglistitem",0,     0,      0,      9,      0, "" }, /* seg */
741{ "segmentedlist",0,   0,      0,      9,      0, "" }, /* formalobject.title.content */
742{ "seg",       0,      0,      0,      2,      0, "" }, /* para */
743{ "segtitle",  0,      0,      0,      8,      0, "" }, /* title */
744{ "seriesvolnums",     0,      0,      0,      4,      0, "" }, /* docinfo */
745{ "set",       0,      0,      0,      9,      0, "" }, /* div.title.content */
746{ "setindexinfo",0,    0,      0,      9,      0, "" }, /* graphic */
747{ "setindex",  0,      0,      0,      9,      0, "" }, /* setindexinfo */
748{ "setinfo",   0,      0,      0,      9,      0, "" }, /* graphic */
749{ "sgmltag",   0,      0,      0,      7,      0, "" }, /* smallcptr */
750{ "shortaffil",        0,      0,      0,      4,      0, "" }, /* docinfo */
751{ "shortcut",  0,      0,      0,      9,      0, "" }, /* keycap */
752{ "sidebarinfo",0,     0,      0,      9,      0, "" }, /* graphic */
753{ "sidebar",   0,      0,      0,      9,      0, "" }, /* sidebarinfo */
754{ "simpara",   0,      0,      0,      2,      0, "" }, /* para */
755{ "simplelist",        0,      0,      0,      9,      0, "" }, /* member */
756{ "simplemsgentry",    0,      0,      0,      9,      0, "" }, /* msgtext */
757{ "simplesect",        0,      0,      0,      9,      0, "" }, /* sect.title.content */
758{ "spanspec",  0,      2,      1,      0,      0, "" },
759{ "state",     0,      0,      0,      4,      0, "" }, /* docinfo */
760{ "step",      0,      0,      0,      9,      0, "" }, /* title */
761{ "street",    0,      0,      0,      4,      0, "" }, /* docinfo */
762{ "structfield",0,     0,      0,      7,      0, "" }, /* smallcptr */
763{ "structname",        0,      0,      0,      7,      0, "" }, /* smallcptr */
764{ "subjectset",        0,      0,      0,      9,      0, "" }, /* subject */
765{ "subject",   0,      0,      0,      9,      0, "" }, /* subjectterm */
766{ "subjectterm",0,     0,      0,      1,      0, "" },
767{ "subscript", 0,      0,      0,      1,      0, "" },
768{ "substeps",  0,      0,      0,      9,      0, "" }, /* step */
769{ "subtitle",  0,      0,      0,      8,      0, "" }, /* title */
770{ "superscript",       0,      0,      0,      1,      0, "" },
771{ "surname",   0,      0,      0,      4,      0, "" }, /* docinfo */
772{ "symbol",    0,      0,      0,      7,      0, "" }, /* smallcptr */
773{ "synopfragment",     0,      0,      0,      9,      0, "" }, /* arg */
774{ "synopfragmentref",  0,      0,      0,      1,      0, "" },
775{ "synopsis",  0,      0,      0,      2,      0, "" }, /* para */
776{ "systemitem",        0,      0,      0,      7,      0, "" }, /* smallcptr */
777{ "table",     0,      0,      0,      9,      0, "" }, /* tbl.table.mdl */
778/* { "%tbl.table.name;",       0,      0,      0,      9,      0, "" },*/ /* tbl.table.mdl */
779{ "tbody",     0,      0,      0,      9,      0, "" }, /* row */
780{ "tbody",     0,      0,      0,      9,      0, "" }, /* row */
781{ "term",      0,      0,      0,      2,      0, "" }, /* para */
782{ "tertiaryie",        0,      0,      0,      4,      0, "" }, /* ndxterm */
783{ "tertiary ", 0,      0,      0,      4,      0, "" }, /* ndxterm */
784{ "textobject",        0,      0,      0,      9,      0, "" }, /* objectinfo */
785{ "tfoot",     0,      0,      0,      9,      0, "" }, /* tbl.hdft.mdl */
786{ "tgroup",    0,      0,      0,      9,      0, "" }, /* tbl.tgroup.mdl */
787{ "tgroup",    0,      0,      0,      9,      0, "" }, /* tbl.tgroup.mdl */
788{ "thead",     0,      0,      0,      9,      0, "" }, /* row */
789{ "thead",     0,      0,      0,      9,      0, "" }, /* tbl.hdft.mdl */
790{ "tip",       0,      0,      0,      9,      0, "" }, /* title */
791{ "titleabbrev",0,     0,      0,      8,      0, "" }, /* title */
792{ "title",     0,      0,      0,      8,      0, "" }, /* title */
793{ "tocback",   0,      0,      0,      2,      0, "" }, /* para */
794{ "toc",       0,      0,      0,      9,      0, "" }, /* bookcomponent.title.content */
795{ "tocchap",   0,      0,      0,      9,      0, "" }, /* tocentry */
796{ "tocentry",  0,      0,      0,      2,      0, "" }, /* para */
797{ "tocfront",  0,      0,      0,      2,      0, "" }, /* para */
798{ "toclevel1", 0,      0,      0,      9,      0, "" }, /* tocentry */
799{ "toclevel2", 0,      0,      0,      9,      0, "" }, /* tocentry */
800{ "toclevel3", 0,      0,      0,      9,      0, "" }, /* tocentry */
801{ "toclevel4", 0,      0,      0,      9,      0, "" }, /* tocentry */
802{ "toclevel5", 0,      0,      0,      9,      0, "" }, /* tocentry */
803{ "tocpart",   0,      0,      0,      9,      0, "" }, /* tocentry */
804{ "token",     0,      0,      0,      7,      0, "" }, /* smallcptr */
805{ "trademark", 0,      0,      0,      1,      0, "" },
806{ "type",      0,      0,      0,      7,      0, "" }, /* smallcptr */
807{ "ulink",     0,      0,      0,      2,      0, "" }, /* para */
808{ "userinput", 0,      0,      0,      9,      0, "" }, /* cptr */
809{ "varargs",   0,      2,      1,      0,      0, "" },
810{ "variablelist",0,    0,      0,      9,      0, "" }, /* formalobject.title.content */
811{ "varlistentry",0,    0,      0,      9,      0, "" }, /* term */
812{ "varname",   0,      0,      0,      7,      0, "" }, /* smallcptr */
813{ "videodata", 0,      2,      1,      0,      0, "" },
814{ "videoobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
815{ "void",      0,      2,      1,      0,      0, "" },
816{ "volumenum", 0,      0,      0,      4,      0, "" }, /* docinfo */
817{ "warning",   0,      0,      0,      9,      0, "" }, /* title */
818{ "wordasword",        0,      0,      0,      3,      0, "" }, /* word */
819{ "xref",      0,      2,      1,      0,      0, "" },
820{ "year",      0,      0,      0,      4,      0, "" }, /* docinfo */
821};
822
823#if 0
824/*
825 * start tags that imply the end of a current element
826 * any tag of each line implies the end of the current element if the type of
827 * that element is in the same line
828 */
829static const char *docbEquEnd[] = {
830"dt", "dd", "li", "option", NULL,
831"h1", "h2", "h3", "h4", "h5", "h6", NULL,
832"ol", "menu", "dir", "address", "pre", "listing", "xmp", NULL,
833NULL
834};
835#endif
836
837/*
838 * according the SGML DTD, HR should be added to the 2nd line above, as it
839 * is not allowed within a H1, H2, H3, etc. But we should tolerate that case
840 * because many documents contain rules in headings...
841 */
842
843/*
844 * start tags that imply the end of current element
845 */
846static const char *docbStartClose[] = {
847NULL
848};
849
850/*
851 * The list of SGML elements which are supposed not to have
852 * CDATA content and where a p element will be implied
853 *
854 * TODO: extend that list by reading the SGML SGML DTD on
855 *       implied paragraph
856 */
857static char *docbNoContentElements[] = {
858    NULL
859};
860
861
862static const char** docbStartCloseIndex[100];
863static int docbStartCloseIndexinitialized = 0;
864
865/************************************************************************
866 *                                                                     *
867 *             functions to handle SGML specific data                  *
868 *                                                                     *
869 ************************************************************************/
870
871/**
872 * docbInitAutoClose:
873 *
874 * Initialize the docbStartCloseIndex for fast lookup of closing tags names.
875 *
876 */
877static void
878docbInitAutoClose(void) {
879    int indx, i = 0;
880
881    if (docbStartCloseIndexinitialized) return;
882
883    for (indx = 0;indx < 100;indx ++) docbStartCloseIndex[indx] = NULL;
884    indx = 0;
885    while ((docbStartClose[i] != NULL) && (indx < 100 - 1)) {
886        docbStartCloseIndex[indx++] = &docbStartClose[i];
887       while (docbStartClose[i] != NULL) i++;
888       i++;
889    }
890}
891
892/**
893 * docbTagLookup:
894 * @tag:  The tag name
895 *
896 * Lookup the SGML tag in the ElementTable
897 *
898 * Returns the related docbElemDescPtr or NULL if not found.
899 */
900static docbElemDescPtr
901docbTagLookup(const xmlChar *tag) {
902    unsigned int i;
903
904    for (i = 0; i < (sizeof(docbookElementTable) /
905                     sizeof(docbookElementTable[0]));i++) {
906        if (xmlStrEqual(tag, BAD_CAST docbookElementTable[i].name))
907           return(&docbookElementTable[i]);
908    }
909    return(NULL);
910}
911
912/**
913 * docbCheckAutoClose:
914 * @newtag:  The new tag name
915 * @oldtag:  The old tag name
916 *
917 * Checks whether the new tag is one of the registered valid tags for
918 * closing old.
919 * Initialize the docbStartCloseIndex for fast lookup of closing tags names.
920 *
921 * Returns 0 if no, 1 if yes.
922 */
923static int
924docbCheckAutoClose(const xmlChar *newtag, const xmlChar *oldtag) {
925    int i, indx;
926    const char **closed = NULL;
927
928    if (docbStartCloseIndexinitialized == 0) docbInitAutoClose();
929
930    /* inefficient, but not a big deal */
931    for (indx = 0; indx < 100;indx++) {
932        closed = docbStartCloseIndex[indx];
933       if (closed == NULL) return(0);
934       if (xmlStrEqual(BAD_CAST *closed, newtag)) break;
935    }
936
937    i = closed - docbStartClose;
938    i++;
939    while (docbStartClose[i] != NULL) {
940        if (xmlStrEqual(BAD_CAST docbStartClose[i], oldtag)) {
941           return(1);
942       }
943       i++;
944    }
945    return(0);
946}
947
948/**
949 * docbAutoCloseOnClose:
950 * @ctxt:  an SGML parser context
951 * @newtag:  The new tag name
952 *
953 * The DocBook DTD allows an ending tag to implicitly close other tags.
954 */
955static void
956docbAutoCloseOnClose(docbParserCtxtPtr ctxt, const xmlChar *newtag) {
957    docbElemDescPtr info;
958    xmlChar *oldname;
959    int i;
960
961    if ((newtag[0] == '/') && (newtag[1] == 0))
962       return;
963
964#ifdef DEBUG
965    xmlGenericError(xmlGenericErrorContext,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr);
966    for (i = 0;i < ctxt->nameNr;i++)
967        xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]);
968#endif
969
970    for (i = (ctxt->nameNr - 1);i >= 0;i--) {
971        if (xmlStrEqual(newtag, ctxt->nameTab[i])) break;
972    }
973    if (i < 0) return;
974
975    while (!xmlStrEqual(newtag, ctxt->name)) {
976       info = docbTagLookup(ctxt->name);
977       if ((info == NULL) || (info->endTag == 1)) {
978#ifdef DEBUG
979           xmlGenericError(xmlGenericErrorContext,"docbAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name);
980#endif
981        } else {
982           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
983               ctxt->sax->error(ctxt->userData,
984                "Opening and ending tag mismatch: %s and %s\n",
985                                newtag, ctxt->name);
986           ctxt->wellFormed = 0;
987       }
988       if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
989           ctxt->sax->endElement(ctxt->userData, ctxt->name);
990       oldname = docbnamePop(ctxt);
991       if (oldname != NULL) {
992#ifdef DEBUG
993           xmlGenericError(xmlGenericErrorContext,"docbAutoCloseOnClose: popped %s\n", oldname);
994#endif
995           xmlFree(oldname);
996       }
997    }
998}
999
1000/**
1001 * docbAutoClose:
1002 * @ctxt:  an SGML parser context
1003 * @newtag:  The new tag name or NULL
1004 *
1005 * The DocBook DTD allows a tag to implicitly close other tags.
1006 * The list is kept in docbStartClose array. This function is
1007 * called when a new tag has been detected and generates the
1008 * appropriates closes if possible/needed.
1009 * If newtag is NULL this mean we are at the end of the resource
1010 * and we should check
1011 */
1012static void
1013docbAutoClose(docbParserCtxtPtr ctxt, const xmlChar *newtag) {
1014    xmlChar *oldname;
1015    while ((newtag != NULL) && (ctxt->name != NULL) &&
1016           (docbCheckAutoClose(newtag, ctxt->name))) {
1017#ifdef DEBUG
1018       xmlGenericError(xmlGenericErrorContext,"docbAutoClose: %s closes %s\n", newtag, ctxt->name);
1019#endif
1020       if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1021           ctxt->sax->endElement(ctxt->userData, ctxt->name);
1022       oldname = docbnamePop(ctxt);
1023       if (oldname != NULL) {
1024#ifdef DEBUG
1025           xmlGenericError(xmlGenericErrorContext,"docbAutoClose: popped %s\n", oldname);
1026#endif
1027           xmlFree(oldname);
1028        }
1029    }
1030}
1031
1032/**
1033 * docbAutoCloseTag:
1034 * @doc:  the SGML document
1035 * @name:  The tag name
1036 * @elem:  the SGML element
1037 *
1038 * The DocBook DTD allows a tag to implicitly close other tags.
1039 * The list is kept in docbStartClose array. This function checks
1040 * if the element or one of it's children would autoclose the
1041 * given tag.
1042 *
1043 * Returns 1 if autoclose, 0 otherwise
1044 */
1045static int
1046docbAutoCloseTag(docbDocPtr doc, const xmlChar *name, docbNodePtr elem) {
1047    docbNodePtr child;
1048
1049    if (elem == NULL) return(1);
1050    if (xmlStrEqual(name, elem->name)) return(0);
1051    if (docbCheckAutoClose(elem->name, name)) return(1);
1052    child = elem->children;
1053    while (child != NULL) {
1054        if (docbAutoCloseTag(doc, name, child)) return(1);
1055       child = child->next;
1056    }
1057    return(0);
1058}
1059
1060#if 0
1061/**
1062 * docbIsAutoClosed:
1063 * @doc:  the SGML document
1064 * @elem:  the SGML element
1065 *
1066 * The list is kept in docbStartClose array. This function checks
1067 * if a tag is autoclosed by one of it's child
1068 *
1069 * Returns 1 if autoclosed, 0 otherwise
1070 */
1071static int
1072docbIsAutoClosed(docbDocPtr doc, docbNodePtr elem) {
1073    docbNodePtr child;
1074
1075    if (elem == NULL) return(1);
1076    child = elem->children;
1077    while (child != NULL) {
1078       if (docbAutoCloseTag(doc, elem->name, child)) return(1);
1079       child = child->next;
1080    }
1081    return(0);
1082}
1083#endif
1084
1085/**
1086 * docbCheckParagraph
1087 * @ctxt:  an SGML parser context
1088 *
1089 * Check whether a p element need to be implied before inserting
1090 * characters in the current element.
1091 *
1092 * Returns 1 if a paragraph has been inserted, 0 if not and -1
1093 *         in case of error.
1094 */
1095
1096static int
1097docbCheckParagraph(docbParserCtxtPtr ctxt) {
1098    const xmlChar *tag;
1099    int i;
1100
1101    if (ctxt == NULL)
1102       return(-1);
1103    tag = ctxt->name;
1104    if (tag == NULL) {
1105       docbAutoClose(ctxt, BAD_CAST"p");
1106       docbnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
1107       if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1108           ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1109       return(1);
1110    }
1111    for (i = 0; docbNoContentElements[i] != NULL; i++) {
1112       if (xmlStrEqual(tag, BAD_CAST docbNoContentElements[i])) {
1113#ifdef DEBUG
1114           xmlGenericError(xmlGenericErrorContext,"Implied element paragraph\n");
1115#endif
1116           docbAutoClose(ctxt, BAD_CAST"p");
1117           docbnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
1118           if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1119               ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1120           return(1);
1121       }
1122    }
1123    return(0);
1124}
1125
1126/************************************************************************
1127 *                                                                     *
1128 *             The list of SGML predefined entities                    *
1129 *                                                                     *
1130 ************************************************************************/
1131
1132
1133static docbEntityDesc
1134docbookEntitiesTable[] = {
1135/*
1136 * the 4 absolute ones, plus apostrophe.
1137 */
1138{ 0x0026, "amp", "AMPERSAND" },
1139{ 0x003C, "lt",        "LESS-THAN SIGN" },
1140
1141/*
1142 * Converted with VI macros from docbook ent files
1143 */
1144{ 0x0021, "excl", "EXCLAMATION MARK" },
1145{ 0x0022, "quot", "QUOTATION MARK" },
1146{ 0x0023, "num", "NUMBER SIGN" },
1147{ 0x0024, "dollar", "DOLLAR SIGN" },
1148{ 0x0025, "percnt", "PERCENT SIGN" },
1149{ 0x0027, "apos", "APOSTROPHE" },
1150{ 0x0028, "lpar", "LEFT PARENTHESIS" },
1151{ 0x0029, "rpar", "RIGHT PARENTHESIS" },
1152{ 0x002A, "ast", "ASTERISK OPERATOR" },
1153{ 0x002B, "plus", "PLUS SIGN" },
1154{ 0x002C, "comma", "COMMA" },
1155{ 0x002D, "hyphen", "HYPHEN-MINUS" },
1156{ 0x002E, "period", "FULL STOP" },
1157{ 0x002F, "sol", "SOLIDUS" },
1158{ 0x003A, "colon", "COLON" },
1159{ 0x003B, "semi", "SEMICOLON" },
1160{ 0x003D, "equals", "EQUALS SIGN" },
1161{ 0x003E, "gt", "GREATER-THAN SIGN" },
1162{ 0x003F, "quest", "QUESTION MARK" },
1163{ 0x0040, "commat", "COMMERCIAL AT" },
1164{ 0x005B, "lsqb", "LEFT SQUARE BRACKET" },
1165{ 0x005C, "bsol", "REVERSE SOLIDUS" },
1166{ 0x005D, "rsqb", "RIGHT SQUARE BRACKET" },
1167{ 0x005E, "circ", "RING OPERATOR" },
1168{ 0x005F, "lowbar", "LOW LINE" },
1169{ 0x0060, "grave", "GRAVE ACCENT" },
1170{ 0x007B, "lcub", "LEFT CURLY BRACKET" },
1171{ 0x007C, "verbar", "VERTICAL LINE" },
1172{ 0x007D, "rcub", "RIGHT CURLY BRACKET" },
1173{ 0x00A0, "nbsp", "NO-BREAK SPACE" },
1174{ 0x00A1, "iexcl", "INVERTED EXCLAMATION MARK" },
1175{ 0x00A2, "cent", "CENT SIGN" },
1176{ 0x00A3, "pound", "POUND SIGN" },
1177{ 0x00A4, "curren", "CURRENCY SIGN" },
1178{ 0x00A5, "yen", "YEN SIGN" },
1179{ 0x00A6, "brvbar", "BROKEN BAR" },
1180{ 0x00A7, "sect", "SECTION SIGN" },
1181{ 0x00A8, "die", "" },
1182{ 0x00A8, "Dot", "" },
1183{ 0x00A8, "uml", "" },
1184{ 0x00A9, "copy", "COPYRIGHT SIGN" },
1185{ 0x00AA, "ordf", "FEMININE ORDINAL INDICATOR" },
1186{ 0x00AB, "laquo", "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK" },
1187{ 0x00AC, "not", "NOT SIGN" },
1188{ 0x00AD, "shy", "SOFT HYPHEN" },
1189{ 0x00AE, "reg", "REG TRADE MARK SIGN" },
1190{ 0x00AF, "macr", "MACRON" },
1191{ 0x00B0, "deg", "DEGREE SIGN" },
1192{ 0x00B1, "plusmn", "PLUS-MINUS SIGN" },
1193{ 0x00B2, "sup2", "SUPERSCRIPT TWO" },
1194{ 0x00B3, "sup3", "SUPERSCRIPT THREE" },
1195{ 0x00B4, "acute", "ACUTE ACCENT" },
1196{ 0x00B5, "micro", "MICRO SIGN" },
1197{ 0x00B6, "para", "PILCROW SIGN" },
1198{ 0x00B7, "middot", "MIDDLE DOT" },
1199{ 0x00B8, "cedil", "CEDILLA" },
1200{ 0x00B9, "sup1", "SUPERSCRIPT ONE" },
1201{ 0x00BA, "ordm", "MASCULINE ORDINAL INDICATOR" },
1202{ 0x00BB, "raquo", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK" },
1203{ 0x00BC, "frac14", "VULGAR FRACTION ONE QUARTER" },
1204{ 0x00BD, "frac12", "VULGAR FRACTION ONE HALF" },
1205{ 0x00BD, "half", "VULGAR FRACTION ONE HALF" },
1206{ 0x00BE, "frac34", "VULGAR FRACTION THREE QUARTERS" },
1207{ 0x00BF, "iquest", "INVERTED QUESTION MARK" },
1208{ 0x00C0, "Agrave", "LATIN CAPITAL LETTER A WITH GRAVE" },
1209{ 0x00C1, "Aacute", "LATIN CAPITAL LETTER A WITH ACUTE" },
1210{ 0x00C2, "Acirc", "LATIN CAPITAL LETTER A WITH CIRCUMFLEX" },
1211{ 0x00C3, "Atilde", "LATIN CAPITAL LETTER A WITH TILDE" },
1212{ 0x00C4, "Auml", "LATIN CAPITAL LETTER A WITH DIAERESIS" },
1213{ 0x00C5, "Aring", "LATIN CAPITAL LETTER A WITH RING ABOVE" },
1214{ 0x00C6, "AElig", "LATIN CAPITAL LETTER AE" },
1215{ 0x00C7, "Ccedil", "LATIN CAPITAL LETTER C WITH CEDILLA" },
1216{ 0x00C8, "Egrave", "LATIN CAPITAL LETTER E WITH GRAVE" },
1217{ 0x00C9, "Eacute", "LATIN CAPITAL LETTER E WITH ACUTE" },
1218{ 0x00CA, "Ecirc", "LATIN CAPITAL LETTER E WITH CIRCUMFLEX" },
1219{ 0x00CB, "Euml", "LATIN CAPITAL LETTER E WITH DIAERESIS" },
1220{ 0x00CC, "Igrave", "LATIN CAPITAL LETTER I WITH GRAVE" },
1221{ 0x00CD, "Iacute", "LATIN CAPITAL LETTER I WITH ACUTE" },
1222{ 0x00CE, "Icirc", "LATIN CAPITAL LETTER I WITH CIRCUMFLEX" },
1223{ 0x00CF, "Iuml", "LATIN CAPITAL LETTER I WITH DIAERESIS" },
1224{ 0x00D0, "ETH", "LATIN CAPITAL LETTER ETH" },
1225{ 0x00D1, "Ntilde", "LATIN CAPITAL LETTER N WITH TILDE" },
1226{ 0x00D2, "Ograve", "LATIN CAPITAL LETTER O WITH GRAVE" },
1227{ 0x00D3, "Oacute", "LATIN CAPITAL LETTER O WITH ACUTE" },
1228{ 0x00D4, "Ocirc", "LATIN CAPITAL LETTER O WITH CIRCUMFLEX" },
1229{ 0x00D5, "Otilde", "LATIN CAPITAL LETTER O WITH TILDE" },
1230{ 0x00D6, "Ouml", "LATIN CAPITAL LETTER O WITH DIAERESIS" },
1231{ 0x00D7, "times", "MULTIPLICATION SIGN" },
1232{ 0x00D8, "Oslash", "LATIN CAPITAL LETTER O WITH STROKE" },
1233{ 0x00D9, "Ugrave", "LATIN CAPITAL LETTER U WITH GRAVE" },
1234{ 0x00DA, "Uacute", "LATIN CAPITAL LETTER U WITH ACUTE" },
1235{ 0x00DB, "Ucirc", "LATIN CAPITAL LETTER U WITH CIRCUMFLEX" },
1236{ 0x00DC, "Uuml", "LATIN CAPITAL LETTER U WITH DIAERESIS" },
1237{ 0x00DD, "Yacute", "LATIN CAPITAL LETTER Y WITH ACUTE" },
1238{ 0x00DE, "THORN", "LATIN CAPITAL LETTER THORN" },
1239{ 0x00DF, "szlig", "LATIN SMALL LETTER SHARP S" },
1240{ 0x00E0, "agrave", "LATIN SMALL LETTER A WITH GRAVE" },
1241{ 0x00E1, "aacute", "LATIN SMALL LETTER A WITH ACUTE" },
1242{ 0x00E2, "acirc", "LATIN SMALL LETTER A WITH CIRCUMFLEX" },
1243{ 0x00E3, "atilde", "LATIN SMALL LETTER A WITH TILDE" },
1244{ 0x00E4, "auml", "LATIN SMALL LETTER A WITH DIAERESIS" },
1245{ 0x00E5, "aring", "LATIN SMALL LETTER A WITH RING ABOVE" },
1246{ 0x00E6, "aelig", "LATIN SMALL LETTER AE" },
1247{ 0x00E7, "ccedil", "LATIN SMALL LETTER C WITH CEDILLA" },
1248{ 0x00E8, "egrave", "LATIN SMALL LETTER E WITH GRAVE" },
1249{ 0x00E9, "eacute", "LATIN SMALL LETTER E WITH ACUTE" },
1250{ 0x00EA, "ecirc", "LATIN SMALL LETTER E WITH CIRCUMFLEX" },
1251{ 0x00EB, "euml", "LATIN SMALL LETTER E WITH DIAERESIS" },
1252{ 0x00EC, "igrave", "LATIN SMALL LETTER I WITH GRAVE" },
1253{ 0x00ED, "iacute", "LATIN SMALL LETTER I WITH ACUTE" },
1254{ 0x00EE, "icirc", "LATIN SMALL LETTER I WITH CIRCUMFLEX" },
1255{ 0x00EF, "iuml", "LATIN SMALL LETTER I WITH DIAERESIS" },
1256{ 0x00F0, "eth", "LATIN SMALL LETTER ETH" },
1257{ 0x00F1, "ntilde", "LATIN SMALL LETTER N WITH TILDE" },
1258{ 0x00F2, "ograve", "LATIN SMALL LETTER O WITH GRAVE" },
1259{ 0x00F3, "oacute", "LATIN SMALL LETTER O WITH ACUTE" },
1260{ 0x00F4, "ocirc", "LATIN SMALL LETTER O WITH CIRCUMFLEX" },
1261{ 0x00F5, "otilde", "LATIN SMALL LETTER O WITH TILDE" },
1262{ 0x00F6, "ouml", "LATIN SMALL LETTER O WITH DIAERESIS" },
1263{ 0x00F7, "divide", "DIVISION SIGN" },
1264{ 0x00F8, "oslash", "CIRCLED DIVISION SLASH" },
1265{ 0x00F9, "ugrave", "LATIN SMALL LETTER U WITH GRAVE" },
1266{ 0x00FA, "uacute", "LATIN SMALL LETTER U WITH ACUTE" },
1267{ 0x00FB, "ucirc", "LATIN SMALL LETTER U WITH CIRCUMFLEX" },
1268{ 0x00FC, "uuml", "LATIN SMALL LETTER U WITH DIAERESIS" },
1269{ 0x00FD, "yacute", "LATIN SMALL LETTER Y WITH ACUTE" },
1270{ 0x00FE, "thorn", "LATIN SMALL LETTER THORN" },
1271{ 0x00FF, "yuml", "LATIN SMALL LETTER Y WITH DIAERESIS" },
1272{ 0x0100, "Amacr", "LATIN CAPITAL LETTER A WITH MACRON" },
1273{ 0x0101, "amacr", "LATIN SMALL LETTER A WITH MACRON" },
1274{ 0x0102, "Abreve", "LATIN CAPITAL LETTER A WITH BREVE" },
1275{ 0x0103, "abreve", "LATIN SMALL LETTER A WITH BREVE" },
1276{ 0x0104, "Aogon", "LATIN CAPITAL LETTER A WITH OGONEK" },
1277{ 0x0105, "aogon", "LATIN SMALL LETTER A WITH OGONEK" },
1278{ 0x0106, "Cacute", "LATIN CAPITAL LETTER C WITH ACUTE" },
1279{ 0x0107, "cacute", "LATIN SMALL LETTER C WITH ACUTE" },
1280{ 0x0108, "Ccirc", "LATIN CAPITAL LETTER C WITH CIRCUMFLEX" },
1281{ 0x0109, "ccirc", "LATIN SMALL LETTER C WITH CIRCUMFLEX" },
1282{ 0x010A, "Cdot", "LATIN CAPITAL LETTER C WITH DOT ABOVE" },
1283{ 0x010B, "cdot", "DOT OPERATOR" },
1284{ 0x010C, "Ccaron", "LATIN CAPITAL LETTER C WITH CARON" },
1285{ 0x010D, "ccaron", "LATIN SMALL LETTER C WITH CARON" },
1286{ 0x010E, "Dcaron", "LATIN CAPITAL LETTER D WITH CARON" },
1287{ 0x010F, "dcaron", "LATIN SMALL LETTER D WITH CARON" },
1288{ 0x0110, "Dstrok", "LATIN CAPITAL LETTER D WITH STROKE" },
1289{ 0x0111, "dstrok", "LATIN SMALL LETTER D WITH STROKE" },
1290{ 0x0112, "Emacr", "LATIN CAPITAL LETTER E WITH MACRON" },
1291{ 0x0113, "emacr", "LATIN SMALL LETTER E WITH MACRON" },
1292{ 0x0116, "Edot", "LATIN CAPITAL LETTER E WITH DOT ABOVE" },
1293{ 0x0117, "edot", "LATIN SMALL LETTER E WITH DOT ABOVE" },
1294{ 0x0118, "Eogon", "LATIN CAPITAL LETTER E WITH OGONEK" },
1295{ 0x0119, "eogon", "LATIN SMALL LETTER E WITH OGONEK" },
1296{ 0x011A, "Ecaron", "LATIN CAPITAL LETTER E WITH CARON" },
1297{ 0x011B, "ecaron", "LATIN SMALL LETTER E WITH CARON" },
1298{ 0x011C, "Gcirc", "LATIN CAPITAL LETTER G WITH CIRCUMFLEX" },
1299{ 0x011D, "gcirc", "LATIN SMALL LETTER G WITH CIRCUMFLEX" },
1300{ 0x011E, "Gbreve", "LATIN CAPITAL LETTER G WITH BREVE" },
1301{ 0x011F, "gbreve", "LATIN SMALL LETTER G WITH BREVE" },
1302{ 0x0120, "Gdot", "LATIN CAPITAL LETTER G WITH DOT ABOVE" },
1303{ 0x0121, "gdot", "LATIN SMALL LETTER G WITH DOT ABOVE" },
1304{ 0x0122, "Gcedil", "LATIN CAPITAL LETTER G WITH CEDILLA" },
1305{ 0x0124, "Hcirc", "LATIN CAPITAL LETTER H WITH CIRCUMFLEX" },
1306{ 0x0125, "hcirc", "LATIN SMALL LETTER H WITH CIRCUMFLEX" },
1307{ 0x0126, "Hstrok", "LATIN CAPITAL LETTER H WITH STROKE" },
1308{ 0x0127, "hstrok", "LATIN SMALL LETTER H WITH STROKE" },
1309{ 0x0128, "Itilde", "LATIN CAPITAL LETTER I WITH TILDE" },
1310{ 0x0129, "itilde", "LATIN SMALL LETTER I WITH TILDE" },
1311{ 0x012A, "Imacr", "LATIN CAPITAL LETTER I WITH MACRON" },
1312{ 0x012B, "imacr", "LATIN SMALL LETTER I WITH MACRON" },
1313{ 0x012E, "Iogon", "LATIN CAPITAL LETTER I WITH OGONEK" },
1314{ 0x012F, "iogon", "LATIN SMALL LETTER I WITH OGONEK" },
1315{ 0x0130, "Idot", "LATIN CAPITAL LETTER I WITH DOT ABOVE" },
1316{ 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
1317{ 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
1318{ 0x0132, "IJlig", "LATIN CAPITAL LIGATURE IJ" },
1319{ 0x0133, "ijlig", "LATIN SMALL LIGATURE IJ" },
1320{ 0x0134, "Jcirc", "LATIN CAPITAL LETTER J WITH CIRCUMFLEX" },
1321{ 0x0135, "jcirc", "LATIN SMALL LETTER J WITH CIRCUMFLEX" },
1322{ 0x0136, "Kcedil", "LATIN CAPITAL LETTER K WITH CEDILLA" },
1323{ 0x0137, "kcedil", "LATIN SMALL LETTER K WITH CEDILLA" },
1324{ 0x0138, "kgreen", "LATIN SMALL LETTER KRA" },
1325{ 0x0139, "Lacute", "LATIN CAPITAL LETTER L WITH ACUTE" },
1326{ 0x013A, "lacute", "LATIN SMALL LETTER L WITH ACUTE" },
1327{ 0x013B, "Lcedil", "LATIN CAPITAL LETTER L WITH CEDILLA" },
1328{ 0x013C, "lcedil", "LATIN SMALL LETTER L WITH CEDILLA" },
1329{ 0x013D, "Lcaron", "LATIN CAPITAL LETTER L WITH CARON" },
1330{ 0x013E, "lcaron", "LATIN SMALL LETTER L WITH CARON" },
1331{ 0x013F, "Lmidot", "LATIN CAPITAL LETTER L WITH MIDDLE DOT" },
1332{ 0x0140, "lmidot", "LATIN SMALL LETTER L WITH MIDDLE DOT" },
1333{ 0x0141, "Lstrok", "LATIN CAPITAL LETTER L WITH STROKE" },
1334{ 0x0142, "lstrok", "LATIN SMALL LETTER L WITH STROKE" },
1335{ 0x0143, "Nacute", "LATIN CAPITAL LETTER N WITH ACUTE" },
1336{ 0x0144, "nacute", "LATIN SMALL LETTER N WITH ACUTE" },
1337{ 0x0145, "Ncedil", "LATIN CAPITAL LETTER N WITH CEDILLA" },
1338{ 0x0146, "ncedil", "LATIN SMALL LETTER N WITH CEDILLA" },
1339{ 0x0147, "Ncaron", "LATIN CAPITAL LETTER N WITH CARON" },
1340{ 0x0148, "ncaron", "LATIN SMALL LETTER N WITH CARON" },
1341{ 0x0149, "napos", "LATIN SMALL LETTER N PRECEDED BY APOSTROPHE" },
1342{ 0x014A, "ENG", "LATIN CAPITAL LETTER ENG" },
1343{ 0x014B, "eng", "LATIN SMALL LETTER ENG" },
1344{ 0x014C, "Omacr", "LATIN CAPITAL LETTER O WITH MACRON" },
1345{ 0x014D, "omacr", "LATIN SMALL LETTER O WITH MACRON" },
1346{ 0x0150, "Odblac", "LATIN CAPITAL LETTER O WITH DOUBLE ACUTE" },
1347{ 0x0151, "odblac", "LATIN SMALL LETTER O WITH DOUBLE ACUTE" },
1348{ 0x0152, "OElig", "LATIN CAPITAL LIGATURE OE" },
1349{ 0x0153, "oelig", "LATIN SMALL LIGATURE OE" },
1350{ 0x0154, "Racute", "LATIN CAPITAL LETTER R WITH ACUTE" },
1351{ 0x0155, "racute", "LATIN SMALL LETTER R WITH ACUTE" },
1352{ 0x0156, "Rcedil", "LATIN CAPITAL LETTER R WITH CEDILLA" },
1353{ 0x0157, "rcedil", "LATIN SMALL LETTER R WITH CEDILLA" },
1354{ 0x0158, "Rcaron", "LATIN CAPITAL LETTER R WITH CARON" },
1355{ 0x0159, "rcaron", "LATIN SMALL LETTER R WITH CARON" },
1356{ 0x015A, "Sacute", "LATIN CAPITAL LETTER S WITH ACUTE" },
1357{ 0x015B, "sacute", "LATIN SMALL LETTER S WITH ACUTE" },
1358{ 0x015C, "Scirc", "LATIN CAPITAL LETTER S WITH CIRCUMFLEX" },
1359{ 0x015D, "scirc", "LATIN SMALL LETTER S WITH CIRCUMFLEX" },
1360{ 0x015E, "Scedil", "LATIN CAPITAL LETTER S WITH CEDILLA" },
1361{ 0x015F, "scedil", "LATIN SMALL LETTER S WITH CEDILLA" },
1362{ 0x0160, "Scaron", "LATIN CAPITAL LETTER S WITH CARON" },
1363{ 0x0161, "scaron", "LATIN SMALL LETTER S WITH CARON" },
1364{ 0x0162, "Tcedil", "LATIN CAPITAL LETTER T WITH CEDILLA" },
1365{ 0x0163, "tcedil", "LATIN SMALL LETTER T WITH CEDILLA" },
1366{ 0x0164, "Tcaron", "LATIN CAPITAL LETTER T WITH CARON" },
1367{ 0x0165, "tcaron", "LATIN SMALL LETTER T WITH CARON" },
1368{ 0x0166, "Tstrok", "LATIN CAPITAL LETTER T WITH STROKE" },
1369{ 0x0167, "tstrok", "LATIN SMALL LETTER T WITH STROKE" },
1370{ 0x0168, "Utilde", "LATIN CAPITAL LETTER U WITH TILDE" },
1371{ 0x0169, "utilde", "LATIN SMALL LETTER U WITH TILDE" },
1372{ 0x016A, "Umacr", "LATIN CAPITAL LETTER U WITH MACRON" },
1373{ 0x016B, "umacr", "LATIN SMALL LETTER U WITH MACRON" },
1374{ 0x016C, "Ubreve", "LATIN CAPITAL LETTER U WITH BREVE" },
1375{ 0x016D, "ubreve", "LATIN SMALL LETTER U WITH BREVE" },
1376{ 0x016E, "Uring", "LATIN CAPITAL LETTER U WITH RING ABOVE" },
1377{ 0x016F, "uring", "LATIN SMALL LETTER U WITH RING ABOVE" },
1378{ 0x0170, "Udblac", "LATIN CAPITAL LETTER U WITH DOUBLE ACUTE" },
1379{ 0x0171, "udblac", "LATIN SMALL LETTER U WITH DOUBLE ACUTE" },
1380{ 0x0172, "Uogon", "LATIN CAPITAL LETTER U WITH OGONEK" },
1381{ 0x0173, "uogon", "LATIN SMALL LETTER U WITH OGONEK" },
1382{ 0x0174, "Wcirc", "LATIN CAPITAL LETTER W WITH CIRCUMFLEX" },
1383{ 0x0175, "wcirc", "LATIN SMALL LETTER W WITH CIRCUMFLEX" },
1384{ 0x0176, "Ycirc", "LATIN CAPITAL LETTER Y WITH CIRCUMFLEX" },
1385{ 0x0177, "ycirc", "LATIN SMALL LETTER Y WITH CIRCUMFLEX" },
1386{ 0x0178, "Yuml", "LATIN CAPITAL LETTER Y WITH DIAERESIS" },
1387{ 0x0179, "Zacute", "LATIN CAPITAL LETTER Z WITH ACUTE" },
1388{ 0x017A, "zacute", "LATIN SMALL LETTER Z WITH ACUTE" },
1389{ 0x017B, "Zdot", "LATIN CAPITAL LETTER Z WITH DOT ABOVE" },
1390{ 0x017C, "zdot", "LATIN SMALL LETTER Z WITH DOT ABOVE" },
1391{ 0x017D, "Zcaron", "LATIN CAPITAL LETTER Z WITH CARON" },
1392{ 0x017E, "zcaron", "LATIN SMALL LETTER Z WITH CARON" },
1393{ 0x0192, "fnof", "LATIN SMALL LETTER F WITH HOOK" },
1394{ 0x01F5, "gacute", "LATIN SMALL LETTER G WITH ACUTE" },
1395{ 0x02C7, "caron", "CARON" },
1396{ 0x02D8, "breve", "BREVE" },
1397{ 0x02D9, "dot", "DOT ABOVE" },
1398{ 0x02DA, "ring", "RING ABOVE" },
1399{ 0x02DB, "ogon", "OGONEK" },
1400{ 0x02DC, "tilde", "TILDE" },
1401{ 0x02DD, "dblac", "DOUBLE ACUTE ACCENT" },
1402{ 0x0386, "Aacgr", "GREEK CAPITAL LETTER ALPHA WITH TONOS" },
1403{ 0x0388, "Eacgr", "GREEK CAPITAL LETTER EPSILON WITH TONOS" },
1404{ 0x0389, "EEacgr", "GREEK CAPITAL LETTER ETA WITH TONOS" },
1405{ 0x038A, "Iacgr", "GREEK CAPITAL LETTER IOTA WITH TONOS" },
1406{ 0x038C, "Oacgr", "GREEK CAPITAL LETTER OMICRON WITH TONOS" },
1407{ 0x038E, "Uacgr", "GREEK CAPITAL LETTER UPSILON WITH TONOS" },
1408{ 0x038F, "OHacgr", "GREEK CAPITAL LETTER OMEGA WITH TONOS" },
1409{ 0x0390, "idiagr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS" },
1410{ 0x0391, "Agr", "GREEK CAPITAL LETTER ALPHA" },
1411{ 0x0392, "Bgr", "GREEK CAPITAL LETTER BETA" },
1412{ 0x0393, "b.Gamma", "GREEK CAPITAL LETTER GAMMA" },
1413{ 0x0393, "Gamma", "GREEK CAPITAL LETTER GAMMA" },
1414{ 0x0393, "Ggr", "GREEK CAPITAL LETTER GAMMA" },
1415{ 0x0394, "b.Delta", "GREEK CAPITAL LETTER DELTA" },
1416{ 0x0394, "Delta", "GREEK CAPITAL LETTER DELTA" },
1417{ 0x0394, "Dgr", "GREEK CAPITAL LETTER DELTA" },
1418{ 0x0395, "Egr", "GREEK CAPITAL LETTER EPSILON" },
1419{ 0x0396, "Zgr", "GREEK CAPITAL LETTER ZETA" },
1420{ 0x0397, "EEgr", "GREEK CAPITAL LETTER ETA" },
1421{ 0x0398, "b.Theta", "GREEK CAPITAL LETTER THETA" },
1422{ 0x0398, "Theta", "GREEK CAPITAL LETTER THETA" },
1423{ 0x0398, "THgr", "GREEK CAPITAL LETTER THETA" },
1424{ 0x0399, "Igr", "GREEK CAPITAL LETTER IOTA" },
1425{ 0x039A, "Kgr", "GREEK CAPITAL LETTER KAPPA" },
1426{ 0x039B, "b.Lambda", "GREEK CAPITAL LETTER LAMDA" },
1427{ 0x039B, "Lambda", "GREEK CAPITAL LETTER LAMDA" },
1428{ 0x039B, "Lgr", "GREEK CAPITAL LETTER LAMDA" },
1429{ 0x039C, "Mgr", "GREEK CAPITAL LETTER MU" },
1430{ 0x039D, "Ngr", "GREEK CAPITAL LETTER NU" },
1431{ 0x039E, "b.Xi", "GREEK CAPITAL LETTER XI" },
1432{ 0x039E, "Xgr", "GREEK CAPITAL LETTER XI" },
1433{ 0x039E, "Xi", "GREEK CAPITAL LETTER XI" },
1434{ 0x039F, "Ogr", "GREEK CAPITAL LETTER OMICRON" },
1435{ 0x03A0, "b.Pi", "GREEK CAPITAL LETTER PI" },
1436{ 0x03A0, "Pgr", "GREEK CAPITAL LETTER PI" },
1437{ 0x03A0, "Pi", "GREEK CAPITAL LETTER PI" },
1438{ 0x03A1, "Rgr", "GREEK CAPITAL LETTER RHO" },
1439{ 0x03A3, "b.Sigma", "GREEK CAPITAL LETTER SIGMA" },
1440{ 0x03A3, "Sgr", "GREEK CAPITAL LETTER SIGMA" },
1441{ 0x03A3, "Sigma", "GREEK CAPITAL LETTER SIGMA" },
1442{ 0x03A4, "Tgr", "GREEK CAPITAL LETTER TAU" },
1443{ 0x03A5, "Ugr", "" },
1444{ 0x03A6, "b.Phi", "GREEK CAPITAL LETTER PHI" },
1445{ 0x03A6, "PHgr", "GREEK CAPITAL LETTER PHI" },
1446{ 0x03A6, "Phi", "GREEK CAPITAL LETTER PHI" },
1447{ 0x03A7, "KHgr", "GREEK CAPITAL LETTER CHI" },
1448{ 0x03A8, "b.Psi", "GREEK CAPITAL LETTER PSI" },
1449{ 0x03A8, "PSgr", "GREEK CAPITAL LETTER PSI" },
1450{ 0x03A8, "Psi", "GREEK CAPITAL LETTER PSI" },
1451{ 0x03A9, "b.Omega", "GREEK CAPITAL LETTER OMEGA" },
1452{ 0x03A9, "OHgr", "GREEK CAPITAL LETTER OMEGA" },
1453{ 0x03A9, "Omega", "GREEK CAPITAL LETTER OMEGA" },
1454{ 0x03AA, "Idigr", "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA" },
1455{ 0x03AB, "Udigr", "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA" },
1456{ 0x03AC, "aacgr", "GREEK SMALL LETTER ALPHA WITH TONOS" },
1457{ 0x03AD, "eacgr", "GREEK SMALL LETTER EPSILON WITH TONOS" },
1458{ 0x03AE, "eeacgr", "GREEK SMALL LETTER ETA WITH TONOS" },
1459{ 0x03AF, "iacgr", "GREEK SMALL LETTER IOTA WITH TONOS" },
1460{ 0x03B0, "udiagr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS" },
1461{ 0x03B1, "agr", "" },
1462{ 0x03B1, "alpha", "" },
1463{ 0x03B1, "b.alpha", "" },
1464{ 0x03B2, "b.beta", "GREEK SMALL LETTER BETA" },
1465{ 0x03B2, "beta", "GREEK SMALL LETTER BETA" },
1466{ 0x03B2, "bgr", "GREEK SMALL LETTER BETA" },
1467{ 0x03B3, "b.gamma", "GREEK SMALL LETTER GAMMA" },
1468{ 0x03B3, "gamma", "GREEK SMALL LETTER GAMMA" },
1469{ 0x03B3, "ggr", "GREEK SMALL LETTER GAMMA" },
1470{ 0x03B4, "b.delta", "GREEK SMALL LETTER DELTA" },
1471{ 0x03B4, "delta", "GREEK SMALL LETTER DELTA" },
1472{ 0x03B4, "dgr", "GREEK SMALL LETTER DELTA" },
1473{ 0x03B5, "b.epsi", "" },
1474{ 0x03B5, "b.epsis", "" },
1475{ 0x03B5, "b.epsiv", "" },
1476{ 0x03B5, "egr", "" },
1477{ 0x03B5, "epsiv", "" },
1478{ 0x03B6, "b.zeta", "GREEK SMALL LETTER ZETA" },
1479{ 0x03B6, "zeta", "GREEK SMALL LETTER ZETA" },
1480{ 0x03B6, "zgr", "GREEK SMALL LETTER ZETA" },
1481{ 0x03B7, "b.eta", "GREEK SMALL LETTER ETA" },
1482{ 0x03B7, "eegr", "GREEK SMALL LETTER ETA" },
1483{ 0x03B7, "eta", "GREEK SMALL LETTER ETA" },
1484{ 0x03B8, "b.thetas", "" },
1485{ 0x03B8, "thetas", "" },
1486{ 0x03B8, "thgr", "" },
1487{ 0x03B9, "b.iota", "GREEK SMALL LETTER IOTA" },
1488{ 0x03B9, "igr", "GREEK SMALL LETTER IOTA" },
1489{ 0x03B9, "iota", "GREEK SMALL LETTER IOTA" },
1490{ 0x03BA, "b.kappa", "GREEK SMALL LETTER KAPPA" },
1491{ 0x03BA, "kappa", "GREEK SMALL LETTER KAPPA" },
1492{ 0x03BA, "kgr", "GREEK SMALL LETTER KAPPA" },
1493{ 0x03BB, "b.lambda", "GREEK SMALL LETTER LAMDA" },
1494{ 0x03BB, "lambda", "GREEK SMALL LETTER LAMDA" },
1495{ 0x03BB, "lgr", "GREEK SMALL LETTER LAMDA" },
1496{ 0x03BC, "b.mu", "GREEK SMALL LETTER MU" },
1497{ 0x03BC, "mgr", "GREEK SMALL LETTER MU" },
1498{ 0x03BC, "mu", "GREEK SMALL LETTER MU" },
1499{ 0x03BD, "b.nu", "GREEK SMALL LETTER NU" },
1500{ 0x03BD, "ngr", "GREEK SMALL LETTER NU" },
1501{ 0x03BD, "nu", "GREEK SMALL LETTER NU" },
1502{ 0x03BE, "b.xi", "GREEK SMALL LETTER XI" },
1503{ 0x03BE, "xgr", "GREEK SMALL LETTER XI" },
1504{ 0x03BE, "xi", "GREEK SMALL LETTER XI" },
1505{ 0x03BF, "ogr", "GREEK SMALL LETTER OMICRON" },
1506{ 0x03C0, "b.pi", "GREEK SMALL LETTER PI" },
1507{ 0x03C0, "pgr", "GREEK SMALL LETTER PI" },
1508{ 0x03C0, "pi", "GREEK SMALL LETTER PI" },
1509{ 0x03C1, "b.rho", "GREEK SMALL LETTER RHO" },
1510{ 0x03C1, "rgr", "GREEK SMALL LETTER RHO" },
1511{ 0x03C1, "rho", "GREEK SMALL LETTER RHO" },
1512{ 0x03C2, "b.sigmav", "" },
1513{ 0x03C2, "sfgr", "" },
1514{ 0x03C2, "sigmav", "" },
1515{ 0x03C3, "b.sigma", "GREEK SMALL LETTER SIGMA" },
1516{ 0x03C3, "sgr", "GREEK SMALL LETTER SIGMA" },
1517{ 0x03C3, "sigma", "GREEK SMALL LETTER SIGMA" },
1518{ 0x03C4, "b.tau", "GREEK SMALL LETTER TAU" },
1519{ 0x03C4, "tau", "GREEK SMALL LETTER TAU" },
1520{ 0x03C4, "tgr", "GREEK SMALL LETTER TAU" },
1521{ 0x03C5, "b.upsi", "GREEK SMALL LETTER UPSILON" },
1522{ 0x03C5, "ugr", "GREEK SMALL LETTER UPSILON" },
1523{ 0x03C5, "upsi", "GREEK SMALL LETTER UPSILON" },
1524{ 0x03C6, "b.phis", "GREEK SMALL LETTER PHI" },
1525{ 0x03C6, "phgr", "GREEK SMALL LETTER PHI" },
1526{ 0x03C6, "phis", "GREEK SMALL LETTER PHI" },
1527{ 0x03C7, "b.chi", "GREEK SMALL LETTER CHI" },
1528{ 0x03C7, "chi", "GREEK SMALL LETTER CHI" },
1529{ 0x03C7, "khgr", "GREEK SMALL LETTER CHI" },
1530{ 0x03C8, "b.psi", "GREEK SMALL LETTER PSI" },
1531{ 0x03C8, "psgr", "GREEK SMALL LETTER PSI" },
1532{ 0x03C8, "psi", "GREEK SMALL LETTER PSI" },
1533{ 0x03C9, "b.omega", "GREEK SMALL LETTER OMEGA" },
1534{ 0x03C9, "ohgr", "GREEK SMALL LETTER OMEGA" },
1535{ 0x03C9, "omega", "GREEK SMALL LETTER OMEGA" },
1536{ 0x03CA, "idigr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA" },
1537{ 0x03CB, "udigr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA" },
1538{ 0x03CC, "oacgr", "GREEK SMALL LETTER OMICRON WITH TONOS" },
1539{ 0x03CD, "uacgr", "GREEK SMALL LETTER UPSILON WITH TONOS" },
1540{ 0x03CE, "ohacgr", "GREEK SMALL LETTER OMEGA WITH TONOS" },
1541{ 0x03D1, "b.thetav", "" },
1542{ 0x03D1, "thetav", "" },
1543{ 0x03D2, "b.Upsi", "" },
1544{ 0x03D2, "Upsi", "" },
1545{ 0x03D5, "b.phiv", "GREEK PHI SYMBOL" },
1546{ 0x03D5, "phiv", "GREEK PHI SYMBOL" },
1547{ 0x03D6, "b.piv", "GREEK PI SYMBOL" },
1548{ 0x03D6, "piv", "GREEK PI SYMBOL" },
1549{ 0x03DC, "b.gammad", "GREEK LETTER DIGAMMA" },
1550{ 0x03DC, "gammad", "GREEK LETTER DIGAMMA" },
1551{ 0x03F0, "b.kappav", "GREEK KAPPA SYMBOL" },
1552{ 0x03F0, "kappav", "GREEK KAPPA SYMBOL" },
1553{ 0x03F1, "b.rhov", "GREEK RHO SYMBOL" },
1554{ 0x03F1, "rhov", "GREEK RHO SYMBOL" },
1555{ 0x0401, "IOcy", "CYRILLIC CAPITAL LETTER IO" },
1556{ 0x0402, "DJcy", "CYRILLIC CAPITAL LETTER DJE" },
1557{ 0x0403, "GJcy", "CYRILLIC CAPITAL LETTER GJE" },
1558{ 0x0404, "Jukcy", "CYRILLIC CAPITAL LETTER UKRAINIAN IE" },
1559{ 0x0405, "DScy", "CYRILLIC CAPITAL LETTER DZE" },
1560{ 0x0406, "Iukcy", "CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I" },
1561{ 0x0407, "YIcy", "CYRILLIC CAPITAL LETTER YI" },
1562{ 0x0408, "Jsercy", "CYRILLIC CAPITAL LETTER JE" },
1563{ 0x0409, "LJcy", "CYRILLIC CAPITAL LETTER LJE" },
1564{ 0x040A, "NJcy", "CYRILLIC CAPITAL LETTER NJE" },
1565{ 0x040B, "TSHcy", "CYRILLIC CAPITAL LETTER TSHE" },
1566{ 0x040C, "KJcy", "CYRILLIC CAPITAL LETTER KJE" },
1567{ 0x040E, "Ubrcy", "CYRILLIC CAPITAL LETTER SHORT U" },
1568{ 0x040F, "DZcy", "CYRILLIC CAPITAL LETTER DZHE" },
1569{ 0x0410, "Acy", "CYRILLIC CAPITAL LETTER A" },
1570{ 0x0411, "Bcy", "CYRILLIC CAPITAL LETTER BE" },
1571{ 0x0412, "Vcy", "CYRILLIC CAPITAL LETTER VE" },
1572{ 0x0413, "Gcy", "CYRILLIC CAPITAL LETTER GHE" },
1573{ 0x0414, "Dcy", "CYRILLIC CAPITAL LETTER DE" },
1574{ 0x0415, "IEcy", "CYRILLIC CAPITAL LETTER IE" },
1575{ 0x0416, "ZHcy", "CYRILLIC CAPITAL LETTER ZHE" },
1576{ 0x0417, "Zcy", "CYRILLIC CAPITAL LETTER ZE" },
1577{ 0x0418, "Icy", "CYRILLIC CAPITAL LETTER I" },
1578{ 0x0419, "Jcy", "CYRILLIC CAPITAL LETTER SHORT I" },
1579{ 0x041A, "Kcy", "CYRILLIC CAPITAL LETTER KA" },
1580{ 0x041B, "Lcy", "CYRILLIC CAPITAL LETTER EL" },
1581{ 0x041C, "Mcy", "CYRILLIC CAPITAL LETTER EM" },
1582{ 0x041D, "Ncy", "CYRILLIC CAPITAL LETTER EN" },
1583{ 0x041E, "Ocy", "CYRILLIC CAPITAL LETTER O" },
1584{ 0x041F, "Pcy", "CYRILLIC CAPITAL LETTER PE" },
1585{ 0x0420, "Rcy", "CYRILLIC CAPITAL LETTER ER" },
1586{ 0x0421, "Scy", "CYRILLIC CAPITAL LETTER ES" },
1587{ 0x0422, "Tcy", "CYRILLIC CAPITAL LETTER TE" },
1588{ 0x0423, "Ucy", "CYRILLIC CAPITAL LETTER U" },
1589{ 0x0424, "Fcy", "CYRILLIC CAPITAL LETTER EF" },
1590{ 0x0425, "KHcy", "CYRILLIC CAPITAL LETTER HA" },
1591{ 0x0426, "TScy", "CYRILLIC CAPITAL LETTER TSE" },
1592{ 0x0427, "CHcy", "CYRILLIC CAPITAL LETTER CHE" },
1593{ 0x0428, "SHcy", "CYRILLIC CAPITAL LETTER SHA" },
1594{ 0x0429, "SHCHcy", "CYRILLIC CAPITAL LETTER SHCHA" },
1595{ 0x042A, "HARDcy", "CYRILLIC CAPITAL LETTER HARD SIGN" },
1596{ 0x042B, "Ycy", "CYRILLIC CAPITAL LETTER YERU" },
1597{ 0x042C, "SOFTcy", "CYRILLIC CAPITAL LETTER SOFT SIGN" },
1598{ 0x042D, "Ecy", "CYRILLIC CAPITAL LETTER E" },
1599{ 0x042E, "YUcy", "CYRILLIC CAPITAL LETTER YU" },
1600{ 0x042F, "YAcy", "CYRILLIC CAPITAL LETTER YA" },
1601{ 0x0430, "acy", "CYRILLIC SMALL LETTER A" },
1602{ 0x0431, "bcy", "CYRILLIC SMALL LETTER BE" },
1603{ 0x0432, "vcy", "CYRILLIC SMALL LETTER VE" },
1604{ 0x0433, "gcy", "CYRILLIC SMALL LETTER GHE" },
1605{ 0x0434, "dcy", "CYRILLIC SMALL LETTER DE" },
1606{ 0x0435, "iecy", "CYRILLIC SMALL LETTER IE" },
1607{ 0x0436, "zhcy", "CYRILLIC SMALL LETTER ZHE" },
1608{ 0x0437, "zcy", "CYRILLIC SMALL LETTER ZE" },
1609{ 0x0438, "icy", "CYRILLIC SMALL LETTER I" },
1610{ 0x0439, "jcy", "CYRILLIC SMALL LETTER SHORT I" },
1611{ 0x043A, "kcy", "CYRILLIC SMALL LETTER KA" },
1612{ 0x043B, "lcy", "CYRILLIC SMALL LETTER EL" },
1613{ 0x043C, "mcy", "CYRILLIC SMALL LETTER EM" },
1614{ 0x043D, "ncy", "CYRILLIC SMALL LETTER EN" },
1615{ 0x043E, "ocy", "CYRILLIC SMALL LETTER O" },
1616{ 0x043F, "pcy", "CYRILLIC SMALL LETTER PE" },
1617{ 0x0440, "rcy", "CYRILLIC SMALL LETTER ER" },
1618{ 0x0441, "scy", "CYRILLIC SMALL LETTER ES" },
1619{ 0x0442, "tcy", "CYRILLIC SMALL LETTER TE" },
1620{ 0x0443, "ucy", "CYRILLIC SMALL LETTER U" },
1621{ 0x0444, "fcy", "CYRILLIC SMALL LETTER EF" },
1622{ 0x0445, "khcy", "CYRILLIC SMALL LETTER HA" },
1623{ 0x0446, "tscy", "CYRILLIC SMALL LETTER TSE" },
1624{ 0x0447, "chcy", "CYRILLIC SMALL LETTER CHE" },
1625{ 0x0448, "shcy", "CYRILLIC SMALL LETTER SHA" },
1626{ 0x0449, "shchcy", "CYRILLIC SMALL LETTER SHCHA" },
1627{ 0x044A, "hardcy", "CYRILLIC SMALL LETTER HARD SIGN" },
1628{ 0x044B, "ycy", "CYRILLIC SMALL LETTER YERU" },
1629{ 0x044C, "softcy", "CYRILLIC SMALL LETTER SOFT SIGN" },
1630{ 0x044D, "ecy", "CYRILLIC SMALL LETTER E" },
1631{ 0x044E, "yucy", "CYRILLIC SMALL LETTER YU" },
1632{ 0x044F, "yacy", "CYRILLIC SMALL LETTER YA" },
1633{ 0x0451, "iocy", "CYRILLIC SMALL LETTER IO" },
1634{ 0x0452, "djcy", "CYRILLIC SMALL LETTER DJE" },
1635{ 0x0453, "gjcy", "CYRILLIC SMALL LETTER GJE" },
1636{ 0x0454, "jukcy", "CYRILLIC SMALL LETTER UKRAINIAN IE" },
1637{ 0x0455, "dscy", "CYRILLIC SMALL LETTER DZE" },
1638{ 0x0456, "iukcy", "CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I" },
1639{ 0x0457, "yicy", "CYRILLIC SMALL LETTER YI" },
1640{ 0x0458, "jsercy", "CYRILLIC SMALL LETTER JE" },
1641{ 0x0459, "ljcy", "CYRILLIC SMALL LETTER LJE" },
1642{ 0x045A, "njcy", "CYRILLIC SMALL LETTER NJE" },
1643{ 0x045B, "tshcy", "CYRILLIC SMALL LETTER TSHE" },
1644{ 0x045C, "kjcy", "CYRILLIC SMALL LETTER KJE" },
1645{ 0x045E, "ubrcy", "CYRILLIC SMALL LETTER SHORT U" },
1646{ 0x045F, "dzcy", "CYRILLIC SMALL LETTER DZHE" },
1647{ 0x2002, "ensp", "EN SPACE" },
1648{ 0x2003, "emsp", "EM SPACE" },
1649{ 0x2004, "emsp13", "THREE-PER-EM SPACE" },
1650{ 0x2005, "emsp14", "FOUR-PER-EM SPACE" },
1651{ 0x2007, "numsp", "FIGURE SPACE" },
1652{ 0x2008, "puncsp", "PUNCTUATION SPACE" },
1653{ 0x2009, "thinsp", "THIN SPACE" },
1654{ 0x200A, "hairsp", "HAIR SPACE" },
1655{ 0x2010, "dash", "HYPHEN" },
1656{ 0x2013, "ndash", "EN DASH" },
1657{ 0x2014, "mdash", "EM DASH" },
1658{ 0x2015, "horbar", "HORIZONTAL BAR" },
1659{ 0x2016, "Verbar", "DOUBLE VERTICAL LINE" },
1660{ 0x2018, "lsquo", "" },
1661{ 0x2018, "rsquor", "" },
1662{ 0x2019, "rsquo", "RIGHT SINGLE QUOTATION MARK" },
1663{ 0x201A, "lsquor", "SINGLE LOW-9 QUOTATION MARK" },
1664{ 0x201C, "ldquo", "" },
1665{ 0x201C, "rdquor", "" },
1666{ 0x201D, "rdquo", "RIGHT DOUBLE QUOTATION MARK" },
1667{ 0x201E, "ldquor", "DOUBLE LOW-9 QUOTATION MARK" },
1668{ 0x2020, "dagger", "DAGGER" },
1669{ 0x2021, "Dagger", "DOUBLE DAGGER" },
1670{ 0x2022, "bull", "BULLET" },
1671{ 0x2025, "nldr", "TWO DOT LEADER" },
1672{ 0x2026, "hellip", "HORIZONTAL ELLIPSIS" },
1673{ 0x2026, "mldr", "HORIZONTAL ELLIPSIS" },
1674{ 0x2030, "permil", "PER MILLE SIGN" },
1675{ 0x2032, "prime", "PRIME" },
1676{ 0x2032, "vprime", "PRIME" },
1677{ 0x2033, "Prime", "DOUBLE PRIME" },
1678{ 0x2034, "tprime", "TRIPLE PRIME" },
1679{ 0x2035, "bprime", "REVERSED PRIME" },
1680{ 0x2041, "caret", "CARET" },
1681{ 0x2043, "hybull", "HYPHEN BULLET" },
1682{ 0x20DB, "tdot", "COMBINING THREE DOTS ABOVE" },
1683{ 0x20DC, "DotDot", "COMBINING FOUR DOTS ABOVE" },
1684{ 0x2105, "incare", "CARE OF" },
1685{ 0x210B, "hamilt", "SCRIPT CAPITAL H" },
1686{ 0x210F, "planck", "PLANCK CONSTANT OVER TWO PI" },
1687{ 0x2111, "image", "BLACK-LETTER CAPITAL I" },
1688{ 0x2112, "lagran", "SCRIPT CAPITAL L" },
1689{ 0x2113, "ell", "SCRIPT SMALL L" },
1690{ 0x2116, "numero", "NUMERO SIGN" },
1691{ 0x2117, "copysr", "SOUND RECORDING COPYRIGHT" },
1692{ 0x2118, "weierp", "SCRIPT CAPITAL P" },
1693{ 0x211C, "real", "BLACK-LETTER CAPITAL R" },
1694{ 0x211E, "rx", "PRESCRIPTION TAKE" },
1695{ 0x2122, "trade", "TRADE MARK SIGN" },
1696{ 0x2126, "ohm", "OHM SIGN" },
1697{ 0x212B, "angst", "ANGSTROM SIGN" },
1698{ 0x212C, "bernou", "SCRIPT CAPITAL B" },
1699{ 0x2133, "phmmat", "SCRIPT CAPITAL M" },
1700{ 0x2134, "order", "SCRIPT SMALL O" },
1701{ 0x2135, "aleph", "ALEF SYMBOL" },
1702{ 0x2136, "beth", "BET SYMBOL" },
1703{ 0x2137, "gimel", "GIMEL SYMBOL" },
1704{ 0x2138, "daleth", "DALET SYMBOL" },
1705{ 0x2153, "frac13", "VULGAR FRACTION ONE THIRD" },
1706{ 0x2154, "frac23", "VULGAR FRACTION TWO THIRDS" },
1707{ 0x2155, "frac15", "VULGAR FRACTION ONE FIFTH" },
1708{ 0x2156, "frac25", "VULGAR FRACTION TWO FIFTHS" },
1709{ 0x2157, "frac35", "VULGAR FRACTION THREE FIFTHS" },
1710{ 0x2158, "frac45", "VULGAR FRACTION FOUR FIFTHS" },
1711{ 0x2159, "frac16", "VULGAR FRACTION ONE SIXTH" },
1712{ 0x215A, "frac56", "VULGAR FRACTION FIVE SIXTHS" },
1713{ 0x215B, "frac18", "" },
1714{ 0x215C, "frac38", "" },
1715{ 0x215D, "frac58", "" },
1716{ 0x215E, "frac78", "" },
1717{ 0x2190, "larr", "LEFTWARDS DOUBLE ARROW" },
1718{ 0x2191, "uarr", "UPWARDS ARROW" },
1719{ 0x2192, "rarr", "RIGHTWARDS DOUBLE ARROW" },
1720{ 0x2193, "darr", "DOWNWARDS ARROW" },
1721{ 0x2194, "harr", "LEFT RIGHT ARROW" },
1722{ 0x2194, "xhArr", "LEFT RIGHT ARROW" },
1723{ 0x2194, "xharr", "LEFT RIGHT ARROW" },
1724{ 0x2195, "varr", "UP DOWN ARROW" },
1725{ 0x2196, "nwarr", "NORTH WEST ARROW" },
1726{ 0x2197, "nearr", "NORTH EAST ARROW" },
1727{ 0x2198, "drarr", "SOUTH EAST ARROW" },
1728{ 0x2199, "dlarr", "SOUTH WEST ARROW" },
1729{ 0x219A, "nlarr", "LEFTWARDS ARROW WITH STROKE" },
1730{ 0x219B, "nrarr", "RIGHTWARDS ARROW WITH STROKE" },
1731{ 0x219D, "rarrw", "RIGHTWARDS SQUIGGLE ARROW" },
1732{ 0x219E, "Larr", "LEFTWARDS TWO HEADED ARROW" },
1733{ 0x21A0, "Rarr", "RIGHTWARDS TWO HEADED ARROW" },
1734{ 0x21A2, "larrtl", "LEFTWARDS ARROW WITH TAIL" },
1735{ 0x21A3, "rarrtl", "RIGHTWARDS ARROW WITH TAIL" },
1736{ 0x21A6, "map", "RIGHTWARDS ARROW FROM BAR" },
1737{ 0x21A9, "larrhk", "LEFTWARDS ARROW WITH HOOK" },
1738{ 0x21AA, "rarrhk", "RIGHTWARDS ARROW WITH HOOK" },
1739{ 0x21AB, "larrlp", "LEFTWARDS ARROW WITH LOOP" },
1740{ 0x21AC, "rarrlp", "RIGHTWARDS ARROW WITH LOOP" },
1741{ 0x21AD, "harrw", "LEFT RIGHT WAVE ARROW" },
1742{ 0x21AE, "nharr", "LEFT RIGHT ARROW WITH STROKE" },
1743{ 0x21B0, "lsh", "UPWARDS ARROW WITH TIP LEFTWARDS" },
1744{ 0x21B1, "rsh", "UPWARDS ARROW WITH TIP RIGHTWARDS" },
1745{ 0x21B6, "cularr", "ANTICLOCKWISE TOP SEMICIRCLE ARROW" },
1746{ 0x21B7, "curarr", "CLOCKWISE TOP SEMICIRCLE ARROW" },
1747{ 0x21BA, "olarr", "ANTICLOCKWISE OPEN CIRCLE ARROW" },
1748{ 0x21BB, "orarr", "CLOCKWISE OPEN CIRCLE ARROW" },
1749{ 0x21BC, "lharu", "LEFTWARDS HARPOON WITH BARB UPWARDS" },
1750{ 0x21BD, "lhard", "LEFTWARDS HARPOON WITH BARB DOWNWARDS" },
1751{ 0x21BE, "uharr", "UPWARDS HARPOON WITH BARB RIGHTWARDS" },
1752{ 0x21BF, "uharl", "UPWARDS HARPOON WITH BARB LEFTWARDS" },
1753{ 0x21C0, "rharu", "RIGHTWARDS HARPOON WITH BARB UPWARDS" },
1754{ 0x21C1, "rhard", "RIGHTWARDS HARPOON WITH BARB DOWNWARDS" },
1755{ 0x21C2, "dharr", "DOWNWARDS HARPOON WITH BARB RIGHTWARDS" },
1756{ 0x21C3, "dharl", "DOWNWARDS HARPOON WITH BARB LEFTWARDS" },
1757{ 0x21C4, "rlarr2", "RIGHTWARDS ARROW OVER LEFTWARDS ARROW" },
1758{ 0x21C6, "lrarr2", "LEFTWARDS ARROW OVER RIGHTWARDS ARROW" },
1759{ 0x21C7, "larr2", "LEFTWARDS PAIRED ARROWS" },
1760{ 0x21C8, "uarr2", "UPWARDS PAIRED ARROWS" },
1761{ 0x21C9, "rarr2", "RIGHTWARDS PAIRED ARROWS" },
1762{ 0x21CA, "darr2", "DOWNWARDS PAIRED ARROWS" },
1763{ 0x21CB, "lrhar2", "LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON" },
1764{ 0x21CC, "rlhar2", "RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON" },
1765{ 0x21CD, "nlArr", "LEFTWARDS DOUBLE ARROW WITH STROKE" },
1766{ 0x21CE, "nhArr", "LEFT RIGHT DOUBLE ARROW WITH STROKE" },
1767{ 0x21CF, "nrArr", "RIGHTWARDS DOUBLE ARROW WITH STROKE" },
1768{ 0x21D0, "lArr", "LEFTWARDS ARROW" },
1769{ 0x21D0, "xlArr", "LEFTWARDS DOUBLE ARROW" },
1770{ 0x21D1, "uArr", "UPWARDS DOUBLE ARROW" },
1771{ 0x21D2, "rArr", "RIGHTWARDS ARROW" },
1772{ 0x21D2, "xrArr", "RIGHTWARDS DOUBLE ARROW" },
1773{ 0x21D3, "dArr", "DOWNWARDS DOUBLE ARROW" },
1774{ 0x21D4, "hArr", "" },
1775{ 0x21D4, "iff", "LEFT RIGHT DOUBLE ARROW" },
1776{ 0x21D5, "vArr", "UP DOWN DOUBLE ARROW" },
1777{ 0x21DA, "lAarr", "LEFTWARDS TRIPLE ARROW" },
1778{ 0x21DB, "rAarr", "RIGHTWARDS TRIPLE ARROW" },
1779{ 0x2200, "forall", "" },
1780{ 0x2201, "comp", "COMPLEMENT" },
1781{ 0x2202, "part", "" },
1782{ 0x2203, "exist", "" },
1783{ 0x2204, "nexist", "THERE DOES NOT EXIST" },
1784{ 0x2205, "empty", "" },
1785{ 0x2207, "nabla", "NABLA" },
1786{ 0x2209, "notin", "" },
1787{ 0x220A, "epsi", "" },
1788{ 0x220A, "epsis", "" },
1789{ 0x220A, "isin", "" },
1790{ 0x220D, "bepsi", "SMALL CONTAINS AS MEMBER" },
1791{ 0x220D, "ni", "" },
1792{ 0x220F, "prod", "N-ARY PRODUCT" },
1793{ 0x2210, "amalg", "N-ARY COPRODUCT" },
1794{ 0x2210, "coprod", "N-ARY COPRODUCT" },
1795{ 0x2210, "samalg", "" },
1796{ 0x2211, "sum", "N-ARY SUMMATION" },
1797{ 0x2212, "minus", "MINUS SIGN" },
1798{ 0x2213, "mnplus", "" },
1799{ 0x2214, "plusdo", "DOT PLUS" },
1800{ 0x2216, "setmn", "SET MINUS" },
1801{ 0x2216, "ssetmn", "SET MINUS" },
1802{ 0x2217, "lowast", "ASTERISK OPERATOR" },
1803{ 0x2218, "compfn", "RING OPERATOR" },
1804{ 0x221A, "radic", "" },
1805{ 0x221D, "prop", "" },
1806{ 0x221D, "vprop", "" },
1807{ 0x221E, "infin", "" },
1808{ 0x221F, "ang90", "RIGHT ANGLE" },
1809{ 0x2220, "ang", "ANGLE" },
1810{ 0x2221, "angmsd", "MEASURED ANGLE" },
1811{ 0x2222, "angsph", "" },
1812{ 0x2223, "mid", "" },
1813{ 0x2224, "nmid", "DOES NOT DIVIDE" },
1814{ 0x2225, "par", "PARALLEL TO" },
1815{ 0x2225, "spar", "PARALLEL TO" },
1816{ 0x2226, "npar", "NOT PARALLEL TO" },
1817{ 0x2226, "nspar", "NOT PARALLEL TO" },
1818{ 0x2227, "and", "" },
1819{ 0x2228, "or", "" },
1820{ 0x2229, "cap", "" },
1821{ 0x222A, "cup", "" },
1822{ 0x222B, "int", "" },
1823{ 0x222E, "conint", "" },
1824{ 0x2234, "there4", "" },
1825{ 0x2235, "becaus", "BECAUSE" },
1826{ 0x223C, "sim", "" },
1827{ 0x223C, "thksim", "TILDE OPERATOR" },
1828{ 0x223D, "bsim", "" },
1829{ 0x2240, "wreath", "WREATH PRODUCT" },
1830{ 0x2241, "nsim", "" },
1831{ 0x2243, "sime", "" },
1832{ 0x2244, "nsime", "" },
1833{ 0x2245, "cong", "" },
1834{ 0x2247, "ncong", "NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO" },
1835{ 0x2248, "ap", "" },
1836{ 0x2248, "thkap", "ALMOST EQUAL TO" },
1837{ 0x2249, "nap", "NOT ALMOST EQUAL TO" },
1838{ 0x224A, "ape", "" },
1839{ 0x224C, "bcong", "ALL EQUAL TO" },
1840{ 0x224D, "asymp", "EQUIVALENT TO" },
1841{ 0x224E, "bump", "" },
1842{ 0x224F, "bumpe", "" },
1843{ 0x2250, "esdot", "" },
1844{ 0x2251, "eDot", "" },
1845{ 0x2252, "efDot", "" },
1846{ 0x2253, "erDot", "" },
1847{ 0x2254, "colone", "" },
1848{ 0x2255, "ecolon", "" },
1849{ 0x2256, "ecir", "" },
1850{ 0x2257, "cire", "" },
1851{ 0x2259, "wedgeq", "ESTIMATES" },
1852{ 0x225C, "trie", "" },
1853{ 0x2260, "ne", "" },
1854{ 0x2261, "equiv", "" },
1855{ 0x2262, "nequiv", "NOT IDENTICAL TO" },
1856{ 0x2264, "le", "" },
1857{ 0x2264, "les", "LESS-THAN OR EQUAL TO" },
1858{ 0x2265, "ge", "GREATER-THAN OR EQUAL TO" },
1859{ 0x2265, "ges", "GREATER-THAN OR EQUAL TO" },
1860{ 0x2266, "lE", "" },
1861{ 0x2267, "gE", "" },
1862{ 0x2268, "lnE", "" },
1863{ 0x2268, "lne", "" },
1864{ 0x2268, "lvnE", "LESS-THAN BUT NOT EQUAL TO" },
1865{ 0x2269, "gnE", "" },
1866{ 0x2269, "gne", "" },
1867{ 0x2269, "gvnE", "GREATER-THAN BUT NOT EQUAL TO" },
1868{ 0x226A, "Lt", "MUCH LESS-THAN" },
1869{ 0x226B, "Gt", "MUCH GREATER-THAN" },
1870{ 0x226C, "twixt", "BETWEEN" },
1871{ 0x226E, "nlt", "NOT LESS-THAN" },
1872{ 0x226F, "ngt", "NOT GREATER-THAN" },
1873{ 0x2270, "nlE", "" },
1874{ 0x2270, "nle", "NEITHER LESS-THAN NOR EQUAL TO" },
1875{ 0x2270, "nles", "" },
1876{ 0x2271, "ngE", "" },
1877{ 0x2271, "nge", "NEITHER GREATER-THAN NOR EQUAL TO" },
1878{ 0x2271, "nges", "" },
1879{ 0x2272, "lap", "LESS-THAN OR EQUIVALENT TO" },
1880{ 0x2272, "lsim", "LESS-THAN OR EQUIVALENT TO" },
1881{ 0x2273, "gap", "GREATER-THAN OR EQUIVALENT TO" },
1882{ 0x2273, "gsim", "GREATER-THAN OR EQUIVALENT TO" },
1883{ 0x2276, "lg", "LESS-THAN OR GREATER-THAN" },
1884{ 0x2277, "gl", "" },
1885{ 0x227A, "pr", "" },
1886{ 0x227B, "sc", "" },
1887{ 0x227C, "cupre", "" },
1888{ 0x227C, "pre", "" },
1889{ 0x227D, "sccue", "" },
1890{ 0x227D, "sce", "" },
1891{ 0x227E, "prap", "" },
1892{ 0x227E, "prsim", "" },
1893{ 0x227F, "scap", "" },
1894{ 0x227F, "scsim", "" },
1895{ 0x2280, "npr", "DOES NOT PRECEDE" },
1896{ 0x2281, "nsc", "DOES NOT SUCCEED" },
1897{ 0x2282, "sub", "" },
1898{ 0x2283, "sup", "" },
1899{ 0x2284, "nsub", "NOT A SUBSET OF" },
1900{ 0x2285, "nsup", "NOT A SUPERSET OF" },
1901{ 0x2286, "subE", "" },
1902{ 0x2286, "sube", "" },
1903{ 0x2287, "supE", "" },
1904{ 0x2287, "supe", "" },
1905{ 0x2288, "nsubE", "" },
1906{ 0x2288, "nsube", "" },
1907{ 0x2289, "nsupE", "" },
1908{ 0x2289, "nsupe", "" },
1909{ 0x228A, "subne", "" },
1910{ 0x228A, "subnE", "SUBSET OF WITH NOT EQUAL TO" },
1911{ 0x228A, "vsubne", "SUBSET OF WITH NOT EQUAL TO" },
1912{ 0x228B, "supnE", "" },
1913{ 0x228B, "supne", "" },
1914{ 0x228B, "vsupnE", "SUPERSET OF WITH NOT EQUAL TO" },
1915{ 0x228B, "vsupne", "SUPERSET OF WITH NOT EQUAL TO" },
1916{ 0x228E, "uplus", "MULTISET UNION" },
1917{ 0x228F, "sqsub", "" },
1918{ 0x2290, "sqsup", "" },
1919{ 0x2291, "sqsube", "" },
1920{ 0x2292, "sqsupe", "" },
1921{ 0x2293, "sqcap", "SQUARE CAP" },
1922{ 0x2294, "sqcup", "SQUARE CUP" },
1923{ 0x2295, "oplus", "CIRCLED PLUS" },
1924{ 0x2296, "ominus", "CIRCLED MINUS" },
1925{ 0x2297, "otimes", "CIRCLED TIMES" },
1926{ 0x2298, "osol", "CIRCLED DIVISION SLASH" },
1927{ 0x2299, "odot", "CIRCLED DOT OPERATOR" },
1928{ 0x229A, "ocir", "CIRCLED RING OPERATOR" },
1929{ 0x229B, "oast", "CIRCLED ASTERISK OPERATOR" },
1930{ 0x229D, "odash", "CIRCLED DASH" },
1931{ 0x229E, "plusb", "SQUARED PLUS" },
1932{ 0x229F, "minusb", "SQUARED MINUS" },
1933{ 0x22A0, "timesb", "SQUARED TIMES" },
1934{ 0x22A1, "sdotb", "SQUARED DOT OPERATOR" },
1935{ 0x22A2, "vdash", "" },
1936{ 0x22A3, "dashv", "" },
1937{ 0x22A4, "top", "DOWN TACK" },
1938{ 0x22A5, "bottom", "" },
1939{ 0x22A5, "perp", "" },
1940{ 0x22A7, "models", "MODELS" },
1941{ 0x22A8, "vDash", "" },
1942{ 0x22A9, "Vdash", "" },
1943{ 0x22AA, "Vvdash", "" },
1944{ 0x22AC, "nvdash", "DOES NOT PROVE" },
1945{ 0x22AD, "nvDash", "NOT TRUE" },
1946{ 0x22AE, "nVdash", "DOES NOT FORCE" },
1947{ 0x22AF, "nVDash", "NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE" },
1948{ 0x22B2, "vltri", "" },
1949{ 0x22B3, "vrtri", "" },
1950{ 0x22B4, "ltrie", "" },
1951{ 0x22B5, "rtrie", "" },
1952{ 0x22B8, "mumap", "MULTIMAP" },
1953{ 0x22BA, "intcal", "INTERCALATE" },
1954{ 0x22BB, "veebar", "" },
1955{ 0x22BC, "barwed", "NAND" },
1956{ 0x22C4, "diam", "DIAMOND OPERATOR" },
1957{ 0x22C5, "sdot", "DOT OPERATOR" },
1958{ 0x22C6, "sstarf", "STAR OPERATOR" },
1959{ 0x22C6, "star", "STAR OPERATOR" },
1960{ 0x22C7, "divonx", "DIVISION TIMES" },
1961{ 0x22C8, "bowtie", "" },
1962{ 0x22C9, "ltimes", "LEFT NORMAL FACTOR SEMIDIRECT PRODUCT" },
1963{ 0x22CA, "rtimes", "RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT" },
1964{ 0x22CB, "lthree", "LEFT SEMIDIRECT PRODUCT" },
1965{ 0x22CC, "rthree", "RIGHT SEMIDIRECT PRODUCT" },
1966{ 0x22CD, "bsime", "" },
1967{ 0x22CE, "cuvee", "CURLY LOGICAL OR" },
1968{ 0x22CF, "cuwed", "CURLY LOGICAL AND" },
1969{ 0x22D0, "Sub", "" },
1970{ 0x22D1, "Sup", "" },
1971{ 0x22D2, "Cap", "DOUBLE INTERSECTION" },
1972{ 0x22D3, "Cup", "DOUBLE UNION" },
1973{ 0x22D4, "fork", "" },
1974{ 0x22D6, "ldot", "" },
1975{ 0x22D7, "gsdot", "" },
1976{ 0x22D8, "Ll", "" },
1977{ 0x22D9, "Gg", "VERY MUCH GREATER-THAN" },
1978{ 0x22DA, "lEg", "" },
1979{ 0x22DA, "leg", "" },
1980{ 0x22DB, "gEl", "" },
1981{ 0x22DB, "gel", "" },
1982{ 0x22DC, "els", "" },
1983{ 0x22DD, "egs", "" },
1984{ 0x22DE, "cuepr", "" },
1985{ 0x22DF, "cuesc", "" },
1986{ 0x22E0, "npre", "DOES NOT PRECEDE OR EQUAL" },
1987{ 0x22E1, "nsce", "DOES NOT SUCCEED OR EQUAL" },
1988{ 0x22E6, "lnsim", "" },
1989{ 0x22E7, "gnsim", "GREATER-THAN BUT NOT EQUIVALENT TO" },
1990{ 0x22E8, "prnap", "" },
1991{ 0x22E8, "prnsim", "" },
1992{ 0x22E9, "scnap", "" },
1993{ 0x22E9, "scnsim", "" },
1994{ 0x22EA, "nltri", "NOT NORMAL SUBGROUP OF" },
1995{ 0x22EB, "nrtri", "DOES NOT CONTAIN AS NORMAL SUBGROUP" },
1996{ 0x22EC, "nltrie", "NOT NORMAL SUBGROUP OF OR EQUAL TO" },
1997{ 0x22ED, "nrtrie", "DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL" },
1998{ 0x22EE, "vellip", "" },
1999{ 0x2306, "Barwed", "PERSPECTIVE" },
2000{ 0x2308, "lceil", "LEFT CEILING" },
2001{ 0x2309, "rceil", "RIGHT CEILING" },
2002{ 0x230A, "lfloor", "LEFT FLOOR" },
2003{ 0x230B, "rfloor", "RIGHT FLOOR" },
2004{ 0x230C, "drcrop", "BOTTOM RIGHT CROP" },
2005{ 0x230D, "dlcrop", "BOTTOM LEFT CROP" },
2006{ 0x230E, "urcrop", "TOP RIGHT CROP" },
2007{ 0x230F, "ulcrop", "TOP LEFT CROP" },
2008{ 0x2315, "telrec", "TELEPHONE RECORDER" },
2009{ 0x2316, "target", "POSITION INDICATOR" },
2010{ 0x231C, "ulcorn", "TOP LEFT CORNER" },
2011{ 0x231D, "urcorn", "TOP RIGHT CORNER" },
2012{ 0x231E, "dlcorn", "BOTTOM LEFT CORNER" },
2013{ 0x231F, "drcorn", "BOTTOM RIGHT CORNER" },
2014{ 0x2322, "frown", "" },
2015{ 0x2322, "sfrown", "FROWN" },
2016{ 0x2323, "smile", "" },
2017{ 0x2323, "ssmile", "SMILE" },
2018{ 0x2423, "blank", "OPEN BOX" },
2019{ 0x24C8, "oS", "CIRCLED LATIN CAPITAL LETTER S" },
2020{ 0x2500, "boxh", "BOX DRAWINGS LIGHT HORIZONTAL" },
2021{ 0x2502, "boxv", "BOX DRAWINGS LIGHT VERTICAL" },
2022{ 0x250C, "boxdr", "BOX DRAWINGS LIGHT DOWN AND RIGHT" },
2023{ 0x2510, "boxdl", "BOX DRAWINGS LIGHT DOWN AND LEFT" },
2024{ 0x2514, "boxur", "BOX DRAWINGS LIGHT UP AND RIGHT" },
2025{ 0x2518, "boxul", "BOX DRAWINGS LIGHT UP AND LEFT" },
2026{ 0x251C, "boxvr", "BOX DRAWINGS LIGHT VERTICAL AND RIGHT" },
2027{ 0x2524, "boxvl", "BOX DRAWINGS LIGHT VERTICAL AND LEFT" },
2028{ 0x252C, "boxhd", "BOX DRAWINGS LIGHT DOWN AND HORIZONTAL" },
2029{ 0x2534, "boxhu", "BOX DRAWINGS LIGHT UP AND HORIZONTAL" },
2030{ 0x253C, "boxvh", "BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL" },
2031{ 0x2550, "boxH", "BOX DRAWINGS DOUBLE HORIZONTAL" },
2032{ 0x2551, "boxV", "BOX DRAWINGS DOUBLE VERTICAL" },
2033{ 0x2552, "boxDR", "BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE" },
2034{ 0x2553, "boxDr", "BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE" },
2035{ 0x2554, "boxdR", "BOX DRAWINGS DOUBLE DOWN AND RIGHT" },
2036{ 0x2555, "boxDL", "BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE" },
2037{ 0x2556, "boxdL", "BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE" },
2038{ 0x2557, "boxDl", "BOX DRAWINGS DOUBLE DOWN AND LEFT" },
2039{ 0x2558, "boxUR", "BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE" },
2040{ 0x2559, "boxuR", "BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE" },
2041{ 0x255A, "boxUr", "BOX DRAWINGS DOUBLE UP AND RIGHT" },
2042{ 0x255B, "boxUL", "BOX DRAWINGS UP SINGLE AND LEFT DOUBLE" },
2043{ 0x255C, "boxUl", "BOX DRAWINGS UP DOUBLE AND LEFT SINGLE" },
2044{ 0x255D, "boxuL", "BOX DRAWINGS DOUBLE UP AND LEFT" },
2045{ 0x255E, "boxvR", "BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE" },
2046{ 0x255F, "boxVR", "BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE" },
2047{ 0x2560, "boxVr", "BOX DRAWINGS DOUBLE VERTICAL AND RIGHT" },
2048{ 0x2561, "boxvL", "BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE" },
2049{ 0x2562, "boxVL", "BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE" },
2050{ 0x2563, "boxVl", "BOX DRAWINGS DOUBLE VERTICAL AND LEFT" },
2051{ 0x2564, "boxhD", "BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE" },
2052{ 0x2565, "boxHD", "BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE" },
2053{ 0x2566, "boxHd", "BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL" },
2054{ 0x2567, "boxhU", "BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE" },
2055{ 0x2568, "boxHU", "BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE" },
2056{ 0x2569, "boxHu", "BOX DRAWINGS DOUBLE UP AND HORIZONTAL" },
2057{ 0x256A, "boxvH", "BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE" },
2058{ 0x256B, "boxVH", "BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE" },
2059{ 0x256C, "boxVh", "BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL" },
2060{ 0x2580, "uhblk", "UPPER HALF BLOCK" },
2061{ 0x2584, "lhblk", "LOWER HALF BLOCK" },
2062{ 0x2588, "block", "FULL BLOCK" },
2063{ 0x2591, "blk14", "LIGHT SHADE" },
2064{ 0x2592, "blk12", "MEDIUM SHADE" },
2065{ 0x2593, "blk34", "DARK SHADE" },
2066{ 0x25A1, "square", "WHITE SQUARE" },
2067{ 0x25A1, "squ", "WHITE SQUARE" },
2068{ 0x25AA, "squf", "" },
2069{ 0x25AD, "rect", "WHITE RECTANGLE" },
2070{ 0x25AE, "marker", "BLACK VERTICAL RECTANGLE" },
2071{ 0x25B3, "xutri", "WHITE UP-POINTING TRIANGLE" },
2072{ 0x25B4, "utrif", "BLACK UP-POINTING TRIANGLE" },
2073{ 0x25B5, "utri", "WHITE UP-POINTING TRIANGLE" },
2074{ 0x25B8, "rtrif", "BLACK RIGHT-POINTING TRIANGLE" },
2075{ 0x25B9, "rtri", "WHITE RIGHT-POINTING TRIANGLE" },
2076{ 0x25BD, "xdtri", "WHITE DOWN-POINTING TRIANGLE" },
2077{ 0x25BE, "dtrif", "BLACK DOWN-POINTING TRIANGLE" },
2078{ 0x25BF, "dtri", "WHITE DOWN-POINTING TRIANGLE" },
2079{ 0x25C2, "ltrif", "BLACK LEFT-POINTING TRIANGLE" },
2080{ 0x25C3, "ltri", "WHITE LEFT-POINTING TRIANGLE" },
2081{ 0x25CA, "loz", "LOZENGE" },
2082{ 0x25CB, "cir", "WHITE CIRCLE" },
2083{ 0x25CB, "xcirc", "WHITE CIRCLE" },
2084{ 0x2605, "starf", "BLACK STAR" },
2085{ 0x260E, "phone", "TELEPHONE SIGN" },
2086{ 0x2640, "female", "" },
2087{ 0x2642, "male", "MALE SIGN" },
2088{ 0x2660, "spades", "BLACK SPADE SUIT" },
2089{ 0x2663, "clubs", "BLACK CLUB SUIT" },
2090{ 0x2665, "hearts", "BLACK HEART SUIT" },
2091{ 0x2666, "diams", "BLACK DIAMOND SUIT" },
2092{ 0x2669, "sung", "" },
2093{ 0x266D, "flat", "MUSIC FLAT SIGN" },
2094{ 0x266E, "natur", "MUSIC NATURAL SIGN" },
2095{ 0x266F, "sharp", "MUSIC SHARP SIGN" },
2096{ 0x2713, "check", "CHECK MARK" },
2097{ 0x2717, "cross", "BALLOT X" },
2098{ 0x2720, "malt", "MALTESE CROSS" },
2099{ 0x2726, "lozf", "" },
2100{ 0x2736, "sext", "SIX POINTED BLACK STAR" },
2101{ 0x3008, "lang", "" },
2102{ 0x3009, "rang", "" },
2103{ 0xE291, "rpargt", "" },
2104{ 0xE2A2, "lnap", "" },
2105{ 0xE2AA, "nsmid", "" },
2106{ 0xE2B3, "prnE", "" },
2107{ 0xE2B5, "scnE", "" },
2108{ 0xE2B8, "vsubnE", "" },
2109{ 0xE301, "smid", "" },
2110{ 0xE411, "gnap", "" },
2111{ 0xFB00, "fflig", "" },
2112{ 0xFB01, "filig", "" },
2113{ 0xFB02, "fllig", "" },
2114{ 0xFB03, "ffilig", "" },
2115{ 0xFB04, "ffllig", "" },
2116{ 0xFE68, "sbsol", "SMALL REVERSE SOLIDUS" },
2117};
2118
2119/************************************************************************
2120 *                                                                     *
2121 *             Commodity functions to handle entities                  *
2122 *                                                                     *
2123 ************************************************************************/
2124
2125/*
2126 * Macro used to grow the current buffer.
2127 */
2128#define growBuffer(buffer) {                                           \
2129    buffer##_size *= 2;                                                        \
2130    buffer = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar));  \
2131    if (buffer == NULL) {                                              \
2132       perror("realloc failed");                                       \
2133       return(NULL);                                                   \
2134    }                                                                  \
2135}
2136
2137/**
2138 * docbEntityLookup:
2139 * @name: the entity name
2140 *
2141 * Lookup the given entity in EntitiesTable
2142 *
2143 * TODO: the linear scan is really ugly, an hash table is really needed.
2144 *
2145 * Returns the associated docbEntityDescPtr if found, NULL otherwise.
2146 */
2147static docbEntityDescPtr
2148docbEntityLookup(const xmlChar *name) {
2149    unsigned int i;
2150
2151    for (i = 0;i < (sizeof(docbookEntitiesTable)/
2152                    sizeof(docbookEntitiesTable[0]));i++) {
2153        if (xmlStrEqual(name, BAD_CAST docbookEntitiesTable[i].name)) {
2154#ifdef DEBUG
2155            xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", name);
2156#endif
2157            return(&docbookEntitiesTable[i]);
2158       }
2159    }
2160    return(NULL);
2161}
2162
2163/**
2164 * docbEntityValueLookup:
2165 * @value: the entity's unicode value
2166 *
2167 * Lookup the given entity in EntitiesTable
2168 *
2169 * TODO: the linear scan is really ugly, an hash table is really needed.
2170 *
2171 * Returns the associated docbEntityDescPtr if found, NULL otherwise.
2172 */
2173static docbEntityDescPtr
2174docbEntityValueLookup(int value) {
2175    unsigned int i;
2176#ifdef DEBUG
2177    int lv = 0;
2178#endif
2179
2180    for (i = 0;i < (sizeof(docbookEntitiesTable)/
2181                    sizeof(docbookEntitiesTable[0]));i++) {
2182        if (docbookEntitiesTable[i].value >= value) {
2183           if (docbookEntitiesTable[i].value > value)
2184               break;
2185#ifdef DEBUG
2186           xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", docbookEntitiesTable[i].name);
2187#endif
2188            return(&docbookEntitiesTable[i]);
2189       }
2190#ifdef DEBUG
2191       if (lv > docbookEntitiesTable[i].value) {
2192           xmlGenericError(xmlGenericErrorContext,
2193                   "docbookEntitiesTable[] is not sorted (%d > %d)!\n",
2194                   lv, docbookEntitiesTable[i].value);
2195       }
2196       lv = docbookEntitiesTable[i].value;
2197#endif
2198    }
2199    return(NULL);
2200}
2201
2202#if 0
2203/**
2204 * UTF8ToSgml:
2205 * @out:  a pointer to an array of bytes to store the result
2206 * @outlen:  the length of @out
2207 * @in:  a pointer to an array of UTF-8 chars
2208 * @inlen:  the length of @in
2209 *
2210 * Take a block of UTF-8 chars in and try to convert it to an ASCII
2211 * plus SGML entities block of chars out.
2212 *
2213 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2214 * The value of @inlen after return is the number of octets consumed
2215 *     as the return value is positive, else unpredictable.
2216 * The value of @outlen after return is the number of octets consumed.
2217 */
2218int
2219UTF8ToSgml(unsigned char* out, int *outlen,
2220           const unsigned char* in, int *inlen) {
2221    const unsigned char* processed = in;
2222    const unsigned char* outend;
2223    const unsigned char* outstart = out;
2224    const unsigned char* instart = in;
2225    const unsigned char* inend;
2226    unsigned int c, d;
2227    int trailing;
2228
2229    if (in == NULL) {
2230        /*
2231        * initialization nothing to do
2232        */
2233       *outlen = 0;
2234       *inlen = 0;
2235       return(0);
2236    }
2237    inend = in + (*inlen);
2238    outend = out + (*outlen);
2239    while (in < inend) {
2240       d = *in++;
2241       if      (d < 0x80)  { c= d; trailing= 0; }
2242       else if (d < 0xC0) {
2243           /* trailing byte in leading position */
2244           *outlen = out - outstart;
2245           *inlen = processed - instart;
2246           return(-2);
2247        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
2248        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
2249        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
2250       else {
2251           /* no chance for this in Ascii */
2252           *outlen = out - outstart;
2253           *inlen = processed - instart;
2254           return(-2);
2255       }
2256
2257       if (inend - in < trailing) {
2258           break;
2259       }
2260
2261       for ( ; trailing; trailing--) {
2262           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
2263               break;
2264           c <<= 6;
2265           c |= d & 0x3F;
2266       }
2267
2268       /* assertion: c is a single UTF-4 value */
2269       if (c < 0x80) {
2270           if (out + 1 >= outend)
2271               break;
2272           *out++ = c;
2273       } else {
2274           int len;
2275           docbEntityDescPtr ent;
2276
2277           /*
2278            * Try to lookup a predefined SGML entity for it
2279            */
2280
2281           ent = docbEntityValueLookup(c);
2282           if (ent == NULL) {
2283               /* no chance for this in Ascii */
2284               *outlen = out - outstart;
2285               *inlen = processed - instart;
2286               return(-2);
2287           }
2288           len = strlen(ent->name);
2289           if (out + 2 + len >= outend)
2290               break;
2291           *out++ = '&';
2292           memcpy(out, ent->name, len);
2293           out += len;
2294           *out++ = ';';
2295       }
2296       processed = in;
2297    }
2298    *outlen = out - outstart;
2299    *inlen = processed - instart;
2300    return(0);
2301}
2302#endif
2303
2304/**
2305 * docbEncodeEntities:
2306 * @out:  a pointer to an array of bytes to store the result
2307 * @outlen:  the length of @out
2308 * @in:  a pointer to an array of UTF-8 chars
2309 * @inlen:  the length of @in
2310 * @quoteChar: the quote character to escape (' or ") or zero.
2311 *
2312 * Take a block of UTF-8 chars in and try to convert it to an ASCII
2313 * plus SGML entities block of chars out.
2314 *
2315 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2316 * The value of @inlen after return is the number of octets consumed
2317 *     as the return value is positive, else unpredictable.
2318 * The value of @outlen after return is the number of octets consumed.
2319 */
2320int
2321docbEncodeEntities(unsigned char* out, int *outlen,
2322                  const unsigned char* in, int *inlen, int quoteChar) {
2323    const unsigned char* processed = in;
2324    const unsigned char* outend = out + (*outlen);
2325    const unsigned char* outstart = out;
2326    const unsigned char* instart = in;
2327    const unsigned char* inend = in + (*inlen);
2328    unsigned int c, d;
2329    int trailing;
2330
2331    while (in < inend) {
2332       d = *in++;
2333       if      (d < 0x80)  { c= d; trailing= 0; }
2334       else if (d < 0xC0) {
2335           /* trailing byte in leading position */
2336           *outlen = out - outstart;
2337           *inlen = processed - instart;
2338           return(-2);
2339        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
2340        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
2341        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
2342       else {
2343           /* no chance for this in Ascii */
2344           *outlen = out - outstart;
2345           *inlen = processed - instart;
2346           return(-2);
2347       }
2348
2349       if (inend - in < trailing)
2350           break;
2351
2352       while (trailing--) {
2353           if (((d= *in++) & 0xC0) != 0x80) {
2354               *outlen = out - outstart;
2355               *inlen = processed - instart;
2356               return(-2);
2357           }
2358           c <<= 6;
2359           c |= d & 0x3F;
2360       }
2361
2362       /* assertion: c is a single UTF-4 value */
2363       if (c < 0x80 && c != (unsigned int) quoteChar && c != '&' && c != '<' && c != '>') {
2364           if (out >= outend)
2365               break;
2366           *out++ = c;
2367       } else {
2368           docbEntityDescPtr ent;
2369           const char *cp;
2370           char nbuf[16];
2371           int len;
2372
2373           /*
2374            * Try to lookup a predefined SGML entity for it
2375            */
2376           ent = docbEntityValueLookup(c);
2377           if (ent == NULL) {
2378               sprintf(nbuf, "#%u", c);
2379               cp = nbuf;
2380           }
2381           else
2382               cp = ent->name;
2383           len = strlen(cp);
2384           if (out + 2 + len > outend)
2385               break;
2386           *out++ = '&';
2387           memcpy(out, cp, len);
2388           out += len;
2389           *out++ = ';';
2390       }
2391       processed = in;
2392    }
2393    *outlen = out - outstart;
2394    *inlen = processed - instart;
2395    return(0);
2396}
2397
2398
2399/************************************************************************
2400 *                                                                     *
2401 *             Commodity functions to handle streams                   *
2402 *                                                                     *
2403 ************************************************************************/
2404
2405/**
2406 * docbNewInputStream:
2407 * @ctxt:  an SGML parser context
2408 *
2409 * Create a new input stream structure
2410 * Returns the new input stream or NULL
2411 */
2412static docbParserInputPtr
2413docbNewInputStream(docbParserCtxtPtr ctxt) {
2414    docbParserInputPtr input;
2415
2416    input = (xmlParserInputPtr) xmlMalloc(sizeof(docbParserInput));
2417    if (input == NULL) {
2418        ctxt->errNo = XML_ERR_NO_MEMORY;
2419       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2420           ctxt->sax->error(ctxt->userData,
2421                            "malloc: couldn't allocate a new input stream\n");
2422       return(NULL);
2423    }
2424    memset(input, 0, sizeof(docbParserInput));
2425    input->filename = NULL;
2426    input->directory = NULL;
2427    input->base = NULL;
2428    input->cur = NULL;
2429    input->buf = NULL;
2430    input->line = 1;
2431    input->col = 1;
2432    input->buf = NULL;
2433    input->free = NULL;
2434    input->version = NULL;
2435    input->consumed = 0;
2436    input->length = 0;
2437    return(input);
2438}
2439
2440
2441/************************************************************************
2442 *                                                                     *
2443 *             Commodity functions, cleanup needed ?                   *
2444 *                                                                     *
2445 ************************************************************************/
2446
2447/**
2448 * areBlanks:
2449 * @ctxt:  an SGML parser context
2450 * @str:  a xmlChar *
2451 * @len:  the size of @str
2452 *
2453 * Is this a sequence of blank chars that one can ignore ?
2454 *
2455 * Returns 1 if ignorable 0 otherwise.
2456 */
2457
2458static int areBlanks(docbParserCtxtPtr ctxt, const xmlChar *str, int len) {
2459    int i;
2460    xmlNodePtr lastChild;
2461
2462    for (i = 0;i < len;i++)
2463        if (!(IS_BLANK(str[i]))) return(0);
2464
2465    if (CUR == 0) return(1);
2466    if (CUR != '<') return(0);
2467    if (ctxt->name == NULL)
2468       return(1);
2469    if (ctxt->node == NULL) return(0);
2470    lastChild = xmlGetLastChild(ctxt->node);
2471    if (lastChild == NULL) {
2472        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2473	    (ctxt->node->content != NULL)) return(0);
2474    } else if (xmlNodeIsText(lastChild))
2475        return(0);
2476    return(1);
2477}
2478
2479/************************************************************************
2480 *									*
2481 *                     External entities support			*
2482 *									*
2483 ************************************************************************/
2484
2485/**
2486 * docbParseCtxtExternalEntity:
2487 * @ctx:  the existing parsing context
2488 * @URL:  the URL for the entity to load
2489 * @ID:  the System ID for the entity to load
2490 * @list:  the return value for the set of parsed nodes
2491 *
2492 * Parse an external general entity within an existing parsing context
2493 *
2494 * Returns 0 if the entity is well formed, -1 in case of args problem and
2495 *    the parser error code otherwise
2496 */
2497
2498static int
2499docbParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
2500	                    const xmlChar *ID, xmlNodePtr *list) {
2501    xmlParserCtxtPtr ctxt;
2502    xmlDocPtr newDoc;
2503    xmlSAXHandlerPtr oldsax = NULL;
2504    int ret = 0;
2505
2506    if (ctx->depth > 40) {
2507	return(XML_ERR_ENTITY_LOOP);
2508    }
2509
2510    if (list != NULL)
2511        *list = NULL;
2512    if ((URL == NULL) && (ID == NULL))
2513	return(-1);
2514    if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
2515	return(-1);
2516
2517
2518    ctxt = xmlCreateEntityParserCtxt(URL, ID, ctx->myDoc->URL);
2519    if (ctxt == NULL) return(-1);
2520    ctxt->userData = ctxt;
2521    oldsax = ctxt->sax;
2522    ctxt->sax = ctx->sax;
2523    newDoc = xmlNewDoc(BAD_CAST "1.0");
2524    if (newDoc == NULL) {
2525	xmlFreeParserCtxt(ctxt);
2526	return(-1);
2527    }
2528    if (ctx->myDoc != NULL) {
2529	newDoc->intSubset = ctx->myDoc->intSubset;
2530	newDoc->extSubset = ctx->myDoc->extSubset;
2531    }
2532    if (ctx->myDoc->URL != NULL) {
2533	newDoc->URL = xmlStrdup(ctx->myDoc->URL);
2534    }
2535    newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
2536    if (newDoc->children == NULL) {
2537	ctxt->sax = oldsax;
2538	xmlFreeParserCtxt(ctxt);
2539	newDoc->intSubset = NULL;
2540	newDoc->extSubset = NULL;
2541        xmlFreeDoc(newDoc);
2542	return(-1);
2543    }
2544    nodePush(ctxt, newDoc->children);
2545    if (ctx->myDoc == NULL) {
2546	ctxt->myDoc = newDoc;
2547    } else {
2548	ctxt->myDoc = ctx->myDoc;
2549	newDoc->children->doc = ctx->myDoc;
2550    }
2551
2552    /*
2553     * Parse a possible text declaration first
2554     */
2555    GROW;
2556    if ((RAW == '<') && (NXT(1) == '?') &&
2557	(NXT(2) == 'x') && (NXT(3) == 'm') &&
2558	(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
2559	xmlParseTextDecl(ctxt);
2560    }
2561
2562    /*
2563     * Doing validity checking on chunk doesn't make sense
2564     */
2565    ctxt->instate = XML_PARSER_CONTENT;
2566    ctxt->validate = ctx->validate;
2567    ctxt->loadsubset = ctx->loadsubset;
2568    ctxt->depth = ctx->depth + 1;
2569    ctxt->replaceEntities = ctx->replaceEntities;
2570    if (ctxt->validate) {
2571	ctxt->vctxt.error = ctx->vctxt.error;
2572	ctxt->vctxt.warning = ctx->vctxt.warning;
2573	/* Allocate the Node stack */
2574	ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
2575	if (ctxt->vctxt.nodeTab == NULL) {
2576	    xmlGenericError(xmlGenericErrorContext,
2577		    "docbParseCtxtExternalEntity: out of memory\n");
2578	    ctxt->validate = 0;
2579	    ctxt->vctxt.error = NULL;
2580	    ctxt->vctxt.warning = NULL;
2581	} else {
2582	    ctxt->vctxt.nodeNr = 0;
2583	    ctxt->vctxt.nodeMax = 4;
2584	    ctxt->vctxt.node = NULL;
2585	}
2586    } else {
2587	ctxt->vctxt.error = NULL;
2588	ctxt->vctxt.warning = NULL;
2589    }
2590
2591    docbParseContent(ctxt);
2592
2593    if ((RAW == '<') && (NXT(1) == '/')) {
2594	ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
2595	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2596	    ctxt->sax->error(ctxt->userData,
2597		"chunk is not well balanced\n");
2598	ctxt->wellFormed = 0;
2599	ctxt->disableSAX = 1;
2600    } else if (RAW != 0) {
2601	ctxt->errNo = XML_ERR_EXTRA_CONTENT;
2602	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2603	    ctxt->sax->error(ctxt->userData,
2604		"extra content at the end of well balanced chunk\n");
2605	ctxt->wellFormed = 0;
2606	ctxt->disableSAX = 1;
2607    }
2608    if (ctxt->node != newDoc->children) {
2609	ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
2610	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2611	    ctxt->sax->error(ctxt->userData,
2612		"chunk is not well balanced\n");
2613	ctxt->wellFormed = 0;
2614	ctxt->disableSAX = 1;
2615    }
2616
2617    if (!ctxt->wellFormed) {
2618        if (ctxt->errNo == 0)
2619	    ret = 1;
2620	else
2621	    ret = ctxt->errNo;
2622    } else {
2623	if (list != NULL) {
2624	    xmlNodePtr cur;
2625
2626	    /*
2627	     * Return the newly created nodeset after unlinking it from
2628	     * they pseudo parent.
2629	     */
2630	    cur = newDoc->children->children;
2631	    *list = cur;
2632	    while (cur != NULL) {
2633		cur->parent = NULL;
2634		cur = cur->next;
2635	    }
2636            newDoc->children->children = NULL;
2637	}
2638	ret = 0;
2639    }
2640    ctxt->sax = oldsax;
2641    xmlFreeParserCtxt(ctxt);
2642    newDoc->intSubset = NULL;
2643    newDoc->extSubset = NULL;
2644    xmlFreeDoc(newDoc);
2645
2646    return(ret);
2647}
2648
2649/************************************************************************
2650 *									*
2651 *			The parser itself				*
2652 *									*
2653 ************************************************************************/
2654
2655/**
2656 * docbParseSGMLName:
2657 * @ctxt:  an SGML parser context
2658 *
2659 * parse an SGML tag or attribute name, note that we convert it to lowercase
2660 * since SGML names are not case-sensitive.
2661 *
2662 * Returns the Tag Name parsed or NULL
2663 */
2664
2665static xmlChar *
2666docbParseSGMLName(docbParserCtxtPtr ctxt) {
2667    xmlChar *ret = NULL;
2668    int i = 0;
2669    xmlChar loc[DOCB_PARSER_BUFFER_SIZE];
2670
2671    if (!IS_LETTER(CUR) && (CUR != '_') &&
2672        (CUR != ':')) return(NULL);
2673
2674    while ((i < DOCB_PARSER_BUFFER_SIZE) &&
2675           ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2676          (CUR == ':') || (CUR == '_'))) {
2677       if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
2678        else loc[i] = CUR;
2679       i++;
2680
2681       NEXT;
2682    }
2683
2684    ret = xmlStrndup(loc, i);
2685
2686    return(ret);
2687}
2688
2689/**
2690 * docbParseName:
2691 * @ctxt:  an SGML parser context
2692 *
2693 * parse an SGML name, this routine is case sensitive.
2694 *
2695 * Returns the Name parsed or NULL
2696 */
2697
2698static xmlChar *
2699docbParseName(docbParserCtxtPtr ctxt) {
2700    xmlChar buf[DOCB_MAX_NAMELEN];
2701    int len = 0;
2702
2703    GROW;
2704    if (!IS_LETTER(CUR) && (CUR != '_')) {
2705       return(NULL);
2706    }
2707
2708    while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2709           (CUR == '.') || (CUR == '-') ||
2710          (CUR == '_') || (CUR == ':') ||
2711          (IS_COMBINING(CUR)) ||
2712          (IS_EXTENDER(CUR))) {
2713       buf[len++] = CUR;
2714       NEXT;
2715       if (len >= DOCB_MAX_NAMELEN) {
2716           xmlGenericError(xmlGenericErrorContext,
2717              "docbParseName: reached DOCB_MAX_NAMELEN limit\n");
2718           while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2719                  (CUR == '.') || (CUR == '-') ||
2720                  (CUR == '_') || (CUR == ':') ||
2721                  (IS_COMBINING(CUR)) ||
2722                  (IS_EXTENDER(CUR)))
2723                NEXT;
2724           break;
2725       }
2726    }
2727    return(xmlStrndup(buf, len));
2728}
2729
2730/**
2731 * docbParseSGMLAttribute:
2732 * @ctxt:  an SGML parser context
2733 * @stop:  a char stop value
2734 *
2735 * parse an SGML attribute value till the stop (quote), if
2736 * stop is 0 then it stops at the first space
2737 *
2738 * Returns the attribute parsed or NULL
2739 */
2740
2741static xmlChar *
2742docbParseSGMLAttribute(docbParserCtxtPtr ctxt, const xmlChar stop) {
2743    xmlChar *buffer = NULL;
2744    int buffer_size = 0;
2745    xmlChar *out = NULL;
2746    xmlChar *name = NULL;
2747
2748    xmlChar *cur = NULL;
2749    xmlEntityPtr xent;
2750    docbEntityDescPtr ent;
2751
2752    /*
2753     * allocate a translation buffer.
2754     */
2755    buffer_size = DOCB_PARSER_BIG_BUFFER_SIZE;
2756    buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2757    if (buffer == NULL) {
2758       perror("docbParseSGMLAttribute: malloc failed");
2759       return(NULL);
2760    }
2761    out = buffer;
2762
2763    /*
2764     * Ok loop until we reach one of the ending chars
2765     */
2766    while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {
2767       if ((stop == 0) && (IS_BLANK(CUR))) break;
2768        if (CUR == '&') {
2769           if (NXT(1) == '#') {
2770               unsigned int c;
2771               int bits;
2772
2773               c = docbParseCharRef(ctxt);
2774               if      (c <    0x80)
2775                       { *out++  = c;                bits= -6; }
2776               else if (c <   0x800)
2777                       { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
2778               else if (c < 0x10000)
2779                       { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
2780               else
2781                       { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
2782
2783               for ( ; bits >= 0; bits-= 6) {
2784                   *out++  = ((c >> bits) & 0x3F) | 0x80;
2785               }
2786           } else {
2787               xent = docbParseEntityRef(ctxt, &name);
2788               if (name == NULL) {
2789                   *out++ = '&';
2790                   if (out - buffer > buffer_size - 100) {
2791                       int indx = out - buffer;
2792
2793                       growBuffer(buffer);
2794                       out = &buffer[indx];
2795                   }
2796                   *out++ = '&';
2797               } else {
2798		   ent = docbEntityLookup(name);
2799		   if (ent == NULL) {
2800		       *out++ = '&';
2801		       cur = name;
2802		       while (*cur != 0) {
2803			   if (out - buffer > buffer_size - 100) {
2804			       int indx = out - buffer;
2805
2806			       growBuffer(buffer);
2807			       out = &buffer[indx];
2808			   }
2809			   *out++ = *cur++;
2810		       }
2811		       xmlFree(name);
2812		   } else {
2813		       unsigned int c;
2814		       int bits;
2815
2816		       if (out - buffer > buffer_size - 100) {
2817			   int indx = out - buffer;
2818
2819			   growBuffer(buffer);
2820			   out = &buffer[indx];
2821		       }
2822		       c = (xmlChar)ent->value;
2823		       if      (c <    0x80)
2824			   { *out++  = c;                bits= -6; }
2825		       else if (c <   0x800)
2826			   { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
2827		       else if (c < 0x10000)
2828			   { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
2829		       else
2830			   { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
2831
2832		       for ( ; bits >= 0; bits-= 6) {
2833			   *out++  = ((c >> bits) & 0x3F) | 0x80;
2834		       }
2835		       xmlFree(name);
2836		   }
2837	       }
2838           }
2839       } else {
2840           unsigned int c;
2841           int bits;
2842
2843           if (out - buffer > buffer_size - 100) {
2844               int indx = out - buffer;
2845
2846               growBuffer(buffer);
2847               out = &buffer[indx];
2848           }
2849           c = CUR;
2850           if      (c <    0x80)
2851                   { *out++  = c;                bits= -6; }
2852           else if (c <   0x800)
2853                   { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
2854           else if (c < 0x10000)
2855                   { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
2856           else
2857                   { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
2858
2859           for ( ; bits >= 0; bits-= 6) {
2860               *out++  = ((c >> bits) & 0x3F) | 0x80;
2861           }
2862           NEXT;
2863       }
2864    }
2865    *out++ = 0;
2866    return(buffer);
2867}
2868
2869
2870/**
2871 * docbParseEntityRef:
2872 * @ctxt:  an SGML parser context
2873 * @str:  location to store the entity name
2874 *
2875 * parse an SGML ENTITY references
2876 *
2877 * [68] EntityRef ::= '&' Name ';'
2878 *
2879 * Returns the associated xmlEntityPtr if found, or NULL otherwise,
2880 *         if non-NULL *str will have to be freed by the caller.
2881 */
2882static xmlEntityPtr
2883docbParseEntityRef(docbParserCtxtPtr ctxt, xmlChar **str) {
2884    xmlChar *name;
2885    xmlEntityPtr ent = NULL;
2886    *str = NULL;
2887
2888    if (CUR == '&') {
2889        NEXT;
2890        name = docbParseName(ctxt);
2891        if (name == NULL) {
2892            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2893                ctxt->sax->error(ctxt->userData,
2894			   "docbParseEntityRef: no name\n");
2895            ctxt->wellFormed = 0;
2896        } else {
2897           GROW;
2898            if (CUR == ';') {
2899                *str = name;
2900
2901		/*
2902		 * Ask first SAX for entity resolution, otherwise try the
2903		 * predefined set.
2904		 */
2905		if (ctxt->sax != NULL) {
2906		    if (ctxt->sax->getEntity != NULL)
2907			ent = ctxt->sax->getEntity(ctxt->userData, name);
2908		    if (ent == NULL)
2909		        ent = xmlGetPredefinedEntity(name);
2910		}
2911	        NEXT;
2912            } else {
2913                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2914                    ctxt->sax->error(ctxt->userData,
2915                                    "docbParseEntityRef: expecting ';'\n");
2916                *str = name;
2917            }
2918        }
2919    }
2920    return(ent);
2921}
2922
2923/**
2924 * docbParseAttValue:
2925 * @ctxt:  an SGML parser context
2926 *
2927 * parse a value for an attribute
2928 * Note: the parser won't do substitution of entities here, this
2929 * will be handled later in xmlStringGetNodeList, unless it was
2930 * asked for ctxt->replaceEntities != 0
2931 *
2932 * Returns the AttValue parsed or NULL.
2933 */
2934
2935static xmlChar *
2936docbParseAttValue(docbParserCtxtPtr ctxt) {
2937    xmlChar *ret = NULL;
2938
2939    if (CUR == '"') {
2940        NEXT;
2941       ret = docbParseSGMLAttribute(ctxt, '"');
2942        if (CUR != '"') {
2943           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2944               ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2945           ctxt->wellFormed = 0;
2946       } else
2947           NEXT;
2948    } else if (CUR == '\'') {
2949        NEXT;
2950       ret = docbParseSGMLAttribute(ctxt, '\'');
2951        if (CUR != '\'') {
2952           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2953               ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2954           ctxt->wellFormed = 0;
2955       } else
2956           NEXT;
2957    } else {
2958        /*
2959        * That's an SGMLism, the attribute value may not be quoted
2960        */
2961       ret = docbParseSGMLAttribute(ctxt, 0);
2962       if (ret == NULL) {
2963           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2964               ctxt->sax->error(ctxt->userData, "AttValue: no value found\n");
2965           ctxt->wellFormed = 0;
2966       }
2967    }
2968    return(ret);
2969}
2970
2971/**
2972 * docbParseSystemLiteral:
2973 * @ctxt:  an SGML parser context
2974 *
2975 * parse an SGML Literal
2976 *
2977 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2978 *
2979 * Returns the SystemLiteral parsed or NULL
2980 */
2981
2982static xmlChar *
2983docbParseSystemLiteral(docbParserCtxtPtr ctxt) {
2984    const xmlChar *q;
2985    xmlChar *ret = NULL;
2986
2987    if (CUR == '"') {
2988        NEXT;
2989       q = CUR_PTR;
2990       while ((IS_CHAR(CUR)) && (CUR != '"'))
2991           NEXT;
2992       if (!IS_CHAR(CUR)) {
2993           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2994               ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2995           ctxt->wellFormed = 0;
2996       } else {
2997           ret = xmlStrndup(q, CUR_PTR - q);
2998           NEXT;
2999        }
3000    } else if (CUR == '\'') {
3001        NEXT;
3002       q = CUR_PTR;
3003       while ((IS_CHAR(CUR)) && (CUR != '\''))
3004           NEXT;
3005       if (!IS_CHAR(CUR)) {
3006           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3007               ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3008           ctxt->wellFormed = 0;
3009       } else {
3010           ret = xmlStrndup(q, CUR_PTR - q);
3011           NEXT;
3012        }
3013    } else {
3014       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3015           ctxt->sax->error(ctxt->userData,
3016                            "SystemLiteral \" or ' expected\n");
3017       ctxt->wellFormed = 0;
3018    }
3019
3020    return(ret);
3021}
3022
3023/**
3024 * docbParsePubidLiteral:
3025 * @ctxt:  an SGML parser context
3026 *
3027 * parse an SGML public literal
3028 *
3029 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3030 *
3031 * Returns the PubidLiteral parsed or NULL.
3032 */
3033
3034static xmlChar *
3035docbParsePubidLiteral(docbParserCtxtPtr ctxt) {
3036    const xmlChar *q;
3037    xmlChar *ret = NULL;
3038    /*
3039     * Name ::= (Letter | '_') (NameChar)*
3040     */
3041    if (CUR == '"') {
3042        NEXT;
3043       q = CUR_PTR;
3044       while (IS_PUBIDCHAR(CUR)) NEXT;
3045       if (CUR != '"') {
3046           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3047               ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
3048           ctxt->wellFormed = 0;
3049       } else {
3050           ret = xmlStrndup(q, CUR_PTR - q);
3051           NEXT;
3052       }
3053    } else if (CUR == '\'') {
3054        NEXT;
3055       q = CUR_PTR;
3056       while ((IS_LETTER(CUR)) && (CUR != '\''))
3057           NEXT;
3058       if (!IS_LETTER(CUR)) {
3059           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3060               ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
3061           ctxt->wellFormed = 0;
3062       } else {
3063           ret = xmlStrndup(q, CUR_PTR - q);
3064           NEXT;
3065       }
3066    } else {
3067       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3068           ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
3069       ctxt->wellFormed = 0;
3070    }
3071
3072    return(ret);
3073}
3074
3075/**
3076 * docbParseCharData:
3077 * @ctxt:  an SGML parser context
3078 * @cdata:  int indicating whether we are within a CDATA section
3079 *
3080 * parse a CharData section.
3081 * if we are within a CDATA section ']]>' marks an end of section.
3082 *
3083 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3084 */
3085
3086static void
3087docbParseCharData(docbParserCtxtPtr ctxt) {
3088    xmlChar buf[DOCB_PARSER_BIG_BUFFER_SIZE + 5];
3089    int nbchar = 0;
3090    int cur, l;
3091
3092    SHRINK;
3093    cur = CUR_CHAR(l);
3094    while (((cur != '<') || (ctxt->token == '<')) &&
3095           ((cur != '&') || (ctxt->token == '&')) &&
3096          (IS_CHAR(cur))) {
3097       COPY_BUF(l,buf,nbchar,cur);
3098       if (nbchar >= DOCB_PARSER_BIG_BUFFER_SIZE) {
3099           /*
3100            * Ok the segment is to be consumed as chars.
3101            */
3102           if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3103               if (areBlanks(ctxt, buf, nbchar)) {
3104                   if (ctxt->sax->ignorableWhitespace != NULL)
3105                       ctxt->sax->ignorableWhitespace(ctxt->userData,
3106                                                      buf, nbchar);
3107               } else {
3108                   docbCheckParagraph(ctxt);
3109                   if (ctxt->sax->characters != NULL)
3110                       ctxt->sax->characters(ctxt->userData, buf, nbchar);
3111               }
3112           }
3113           nbchar = 0;
3114       }
3115       NEXTL(l);
3116       cur = CUR_CHAR(l);
3117    }
3118    if (nbchar != 0) {
3119       /*
3120        * Ok the segment is to be consumed as chars.
3121        */
3122       if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3123           if (areBlanks(ctxt, buf, nbchar)) {
3124               if (ctxt->sax->ignorableWhitespace != NULL)
3125                   ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3126           } else {
3127               docbCheckParagraph(ctxt);
3128               if (ctxt->sax->characters != NULL)
3129                   ctxt->sax->characters(ctxt->userData, buf, nbchar);
3130           }
3131       }
3132    }
3133}
3134
3135/**
3136 * docbParseExternalID:
3137 * @ctxt:  an SGML parser context
3138 * @publicID:  a xmlChar** receiving PubidLiteral
3139 *
3140 * Parse an External ID or a Public ID
3141 *
3142 * Returns the function returns SystemLiteral and in the second
3143 *                case publicID receives PubidLiteral,
3144 *                it is possible to return NULL and have publicID set.
3145 */
3146
3147static xmlChar *
3148docbParseExternalID(docbParserCtxtPtr ctxt, xmlChar **publicID) {
3149    xmlChar *URI = NULL;
3150
3151    if ((UPPER == 'S') && (UPP(1) == 'Y') &&
3152         (UPP(2) == 'S') && (UPP(3) == 'T') &&
3153        (UPP(4) == 'E') && (UPP(5) == 'M')) {
3154        SKIP(6);
3155       if (!IS_BLANK(CUR)) {
3156           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3157               ctxt->sax->error(ctxt->userData,
3158                   "Space required after 'SYSTEM'\n");
3159           ctxt->wellFormed = 0;
3160       }
3161        SKIP_BLANKS;
3162       URI = docbParseSystemLiteral(ctxt);
3163       if (URI == NULL) {
3164           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3165               ctxt->sax->error(ctxt->userData,
3166                 "docbParseExternalID: SYSTEM, no URI\n");
3167           ctxt->wellFormed = 0;
3168        }
3169    } else if ((UPPER == 'P') && (UPP(1) == 'U') &&
3170              (UPP(2) == 'B') && (UPP(3) == 'L') &&
3171              (UPP(4) == 'I') && (UPP(5) == 'C')) {
3172        SKIP(6);
3173       if (!IS_BLANK(CUR)) {
3174           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3175               ctxt->sax->error(ctxt->userData,
3176                   "Space required after 'PUBLIC'\n");
3177           ctxt->wellFormed = 0;
3178       }
3179        SKIP_BLANKS;
3180       *publicID = docbParsePubidLiteral(ctxt);
3181       if (*publicID == NULL) {
3182           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3183               ctxt->sax->error(ctxt->userData,
3184                 "docbParseExternalID: PUBLIC, no Public Identifier\n");
3185           ctxt->wellFormed = 0;
3186       }
3187        SKIP_BLANKS;
3188        if ((CUR == '"') || (CUR == '\'')) {
3189           URI = docbParseSystemLiteral(ctxt);
3190       }
3191    }
3192    return(URI);
3193}
3194
3195/**
3196 * docbParsePI:
3197 * @ctxt:  an XML parser context
3198 *
3199 * parse an XML Processing Instruction.
3200 *
3201 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3202 *
3203 * The processing is transfered to SAX once parsed.
3204 */
3205
3206static void
3207docbParsePI(xmlParserCtxtPtr ctxt) {
3208    xmlChar *buf = NULL;
3209    int len = 0;
3210    int size = DOCB_PARSER_BUFFER_SIZE;
3211    int cur, l;
3212    xmlChar *target;
3213    xmlParserInputState state;
3214    int count = 0;
3215
3216    if ((RAW == '<') && (NXT(1) == '?')) {
3217	xmlParserInputPtr input = ctxt->input;
3218	state = ctxt->instate;
3219        ctxt->instate = XML_PARSER_PI;
3220	/*
3221	 * this is a Processing Instruction.
3222	 */
3223	SKIP(2);
3224	SHRINK;
3225
3226	/*
3227	 * Parse the target name and check for special support like
3228	 * namespace.
3229	 */
3230	target = xmlParseName(ctxt);
3231	if (target != NULL) {
3232	    xmlChar *encoding = NULL;
3233
3234	    if ((RAW == '?') && (NXT(1) == '>')) {
3235		if (input != ctxt->input) {
3236		    ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3237		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3238			ctxt->sax->error(ctxt->userData,
3239    "PI declaration doesn't start and stop in the same entity\n");
3240		    ctxt->wellFormed = 0;
3241		    ctxt->disableSAX = 1;
3242		}
3243		SKIP(2);
3244
3245		/*
3246		 * SAX: PI detected.
3247		 */
3248		if ((ctxt->sax) && (!ctxt->disableSAX) &&
3249		    (ctxt->sax->processingInstruction != NULL))
3250		    ctxt->sax->processingInstruction(ctxt->userData,
3251		                                     target, NULL);
3252		ctxt->instate = state;
3253		xmlFree(target);
3254		return;
3255	    }
3256	    if (xmlStrEqual(target, BAD_CAST "sgml-declaration")) {
3257
3258		encoding = xmlParseEncodingDecl(ctxt);
3259		if (encoding == NULL) {
3260		    xmlGenericError(xmlGenericErrorContext,
3261			"sgml-declaration: failed to find/handle encoding\n");
3262#ifdef DEBUG
3263		} else {
3264		    xmlGenericError(xmlGenericErrorContext,
3265			    "switched to encoding %s\n", encoding);
3266#endif
3267		}
3268
3269	    }
3270	    buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3271	    if (buf == NULL) {
3272		xmlGenericError(xmlGenericErrorContext,
3273			"malloc of %d byte failed\n", size);
3274		ctxt->instate = state;
3275		return;
3276	    }
3277	    cur = CUR;
3278	    if (encoding != NULL) {
3279		len = snprintf((char *) buf, size - 1,
3280			       " encoding = \"%s\"", encoding);
3281		if (len < 0)
3282		    len = size;
3283	    } else {
3284		if (!IS_BLANK(cur)) {
3285		    ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3286		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3287			ctxt->sax->error(ctxt->userData,
3288			  "docbParsePI: PI %s space expected\n", target);
3289		    ctxt->wellFormed = 0;
3290		    ctxt->disableSAX = 1;
3291		}
3292		SKIP_BLANKS;
3293	    }
3294	    cur = CUR_CHAR(l);
3295	    while (IS_CHAR(cur) && /* checked */
3296		   ((cur != '?') || (NXT(1) != '>'))) {
3297		if (len + 5 >= size) {
3298		    size *= 2;
3299		    buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3300		    if (buf == NULL) {
3301			xmlGenericError(xmlGenericErrorContext,
3302				"realloc of %d byte failed\n", size);
3303			ctxt->instate = state;
3304			return;
3305		    }
3306		}
3307		count++;
3308		if (count > 50) {
3309		    GROW;
3310		    count = 0;
3311		}
3312		COPY_BUF(l,buf,len,cur);
3313		NEXTL(l);
3314		cur = CUR_CHAR(l);
3315		if (cur == 0) {
3316		    SHRINK;
3317		    GROW;
3318		    cur = CUR_CHAR(l);
3319		}
3320	    }
3321	    buf[len] = 0;
3322	    if (cur != '?') {
3323		ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3324		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3325		    ctxt->sax->error(ctxt->userData,
3326		      "docbParsePI: PI %s never end ...\n", target);
3327		ctxt->wellFormed = 0;
3328		ctxt->disableSAX = 1;
3329	    } else {
3330		if (input != ctxt->input) {
3331		    ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3332		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3333			ctxt->sax->error(ctxt->userData,
3334    "PI declaration doesn't start and stop in the same entity\n");
3335		    ctxt->wellFormed = 0;
3336		    ctxt->disableSAX = 1;
3337		}
3338		SKIP(2);
3339
3340		/*
3341		 * SAX: PI detected.
3342		 */
3343		if ((ctxt->sax) && (!ctxt->disableSAX) &&
3344		    (ctxt->sax->processingInstruction != NULL))
3345		    ctxt->sax->processingInstruction(ctxt->userData,
3346		                                     target, buf);
3347	    }
3348	    xmlFree(buf);
3349	    xmlFree(target);
3350	} else {
3351	    ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3352	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3353	        ctxt->sax->error(ctxt->userData,
3354		       "docbParsePI : no target name\n");
3355	    ctxt->wellFormed = 0;
3356	    ctxt->disableSAX = 1;
3357	}
3358	ctxt->instate = state;
3359    }
3360}
3361
3362/**
3363 * docbParseComment:
3364 * @ctxt:  an SGML parser context
3365 *
3366 * Parse an XML (SGML) comment <!-- .... -->
3367 *
3368 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3369 */
3370static void
3371docbParseComment(docbParserCtxtPtr ctxt) {
3372    xmlChar *buf = NULL;
3373    int len;
3374    int size = DOCB_PARSER_BUFFER_SIZE;
3375    int q, ql;
3376    int r, rl;
3377    int cur, l;
3378    xmlParserInputState state;
3379
3380    /*
3381     * Check that there is a comment right here.
3382     */
3383    if ((RAW != '<') || (NXT(1) != '!') ||
3384        (NXT(2) != '-') || (NXT(3) != '-')) return;
3385
3386    state = ctxt->instate;
3387    ctxt->instate = XML_PARSER_COMMENT;
3388    SHRINK;
3389    SKIP(4);
3390    buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3391    if (buf == NULL) {
3392       xmlGenericError(xmlGenericErrorContext,
3393               "malloc of %d byte failed\n", size);
3394       ctxt->instate = state;
3395       return;
3396    }
3397    q = CUR_CHAR(ql);
3398    NEXTL(ql);
3399    r = CUR_CHAR(rl);
3400    NEXTL(rl);
3401    cur = CUR_CHAR(l);
3402    len = 0;
3403    while (IS_CHAR(cur) &&
3404           ((cur != '>') ||
3405           (r != '-') || (q != '-'))) {
3406       if (len + 5 >= size) {
3407           size *= 2;
3408           buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3409           if (buf == NULL) {
3410               xmlGenericError(xmlGenericErrorContext,
3411                       "realloc of %d byte failed\n", size);
3412               ctxt->instate = state;
3413               return;
3414           }
3415       }
3416       COPY_BUF(ql,buf,len,q);
3417       q = r;
3418       ql = rl;
3419       r = cur;
3420       rl = l;
3421       NEXTL(l);
3422       cur = CUR_CHAR(l);
3423       if (cur == 0) {
3424           SHRINK;
3425           GROW;
3426           cur = CUR_CHAR(l);
3427       }
3428    }
3429    buf[len] = 0;
3430    if (!IS_CHAR(cur)) {
3431       ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3432       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3433           ctxt->sax->error(ctxt->userData,
3434                            "Comment not terminated \n<!--%.50s\n", buf);
3435       ctxt->wellFormed = 0;
3436       xmlFree(buf);
3437    } else {
3438        NEXT;
3439       if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3440           (!ctxt->disableSAX))
3441           ctxt->sax->comment(ctxt->userData, buf);
3442       xmlFree(buf);
3443    }
3444    ctxt->instate = state;
3445}
3446
3447/**
3448 * docbParseCharRef:
3449 * @ctxt:  an SGML parser context
3450 *
3451 * parse Reference declarations
3452 *
3453 * [66] CharRef ::= '&#' [0-9]+ ';' |
3454 *                  '&#x' [0-9a-fA-F]+ ';'
3455 *
3456 * Returns the value parsed (as an int)
3457 */
3458static int
3459docbParseCharRef(docbParserCtxtPtr ctxt) {
3460    int val = 0;
3461
3462    if ((CUR == '&') && (NXT(1) == '#') &&
3463        (NXT(2) == 'x')) {
3464       SKIP(3);
3465       while (CUR != ';') {
3466           if ((CUR >= '0') && (CUR <= '9'))
3467               val = val * 16 + (CUR - '0');
3468           else if ((CUR >= 'a') && (CUR <= 'f'))
3469               val = val * 16 + (CUR - 'a') + 10;
3470           else if ((CUR >= 'A') && (CUR <= 'F'))
3471               val = val * 16 + (CUR - 'A') + 10;
3472           else {
3473               if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3474                   ctxt->sax->error(ctxt->userData,
3475                        "docbParseCharRef: invalid hexadecimal value\n");
3476               ctxt->wellFormed = 0;
3477               val = 0;
3478               break;
3479           }
3480           NEXT;
3481       }
3482       if (CUR == ';')
3483           NEXT;
3484    } else if  ((CUR == '&') && (NXT(1) == '#')) {
3485       SKIP(2);
3486       while (CUR != ';') {
3487           if ((CUR >= '0') && (CUR <= '9'))
3488               val = val * 10 + (CUR - '0');
3489           else {
3490               if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3491                   ctxt->sax->error(ctxt->userData,
3492                        "docbParseCharRef: invalid decimal value\n");
3493               ctxt->wellFormed = 0;
3494               val = 0;
3495               break;
3496           }
3497           NEXT;
3498       }
3499       if (CUR == ';')
3500           NEXT;
3501    } else {
3502       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3503           ctxt->sax->error(ctxt->userData, "docbParseCharRef: invalid value\n");
3504       ctxt->wellFormed = 0;
3505    }
3506    /*
3507     * Check the value IS_CHAR ...
3508     */
3509    if (IS_CHAR(val)) {
3510        return(val);
3511    } else {
3512       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3513           ctxt->sax->error(ctxt->userData, "docbParseCharRef: invalid xmlChar value %d\n",
3514                            val);
3515       ctxt->wellFormed = 0;
3516    }
3517    return(0);
3518}
3519
3520
3521/**
3522 * docbParseDocTypeDecl :
3523 * @ctxt:  an SGML parser context
3524 *
3525 * parse a DOCTYPE declaration
3526 *
3527 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
3528 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
3529 */
3530
3531static void
3532docbParseDocTypeDecl(docbParserCtxtPtr ctxt) {
3533    xmlChar *name;
3534    xmlChar *ExternalID = NULL;
3535    xmlChar *URI = NULL;
3536
3537    /*
3538     * We know that '<!DOCTYPE' has been detected.
3539     */
3540    SKIP(9);
3541
3542    SKIP_BLANKS;
3543
3544    /*
3545     * Parse the DOCTYPE name.
3546     */
3547    name = docbParseName(ctxt);
3548    if (name == NULL) {
3549       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3550           ctxt->sax->error(ctxt->userData, "docbParseDocTypeDecl : no DOCTYPE name !\n");
3551       ctxt->wellFormed = 0;
3552    }
3553    /*
3554     * Check that upper(name) == "SGML" !!!!!!!!!!!!!
3555     */
3556
3557    SKIP_BLANKS;
3558
3559    /*
3560     * Check for SystemID and ExternalID
3561     */
3562    URI = docbParseExternalID(ctxt, &ExternalID);
3563    SKIP_BLANKS;
3564
3565    /*
3566     * Create or update the document accordingly to the DOCTYPE
3567     * But use the predefined PUBLIC and SYSTEM ID of DocBook XML
3568     */
3569    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
3570       (!ctxt->disableSAX))
3571       ctxt->sax->internalSubset(ctxt->userData, name,
3572	                         XML_DOCBOOK_XML_PUBLIC,
3573				 XML_DOCBOOK_XML_SYSTEM);
3574
3575    if (RAW != '>') {
3576       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3577           ctxt->sax->error(ctxt->userData,
3578		   "docbParseDocTypeDecl : internal subset not handled\n");
3579    } else {
3580	NEXT;
3581    }
3582
3583    /*
3584     * Cleanup, since we don't use all those identifiers
3585     */
3586    if (URI != NULL) xmlFree(URI);
3587    if (ExternalID != NULL) xmlFree(ExternalID);
3588    if (name != NULL) xmlFree(name);
3589}
3590
3591/**
3592 * docbParseAttribute:
3593 * @ctxt:  an SGML parser context
3594 * @value:  a xmlChar ** used to store the value of the attribute
3595 *
3596 * parse an attribute
3597 *
3598 * [41] Attribute ::= Name Eq AttValue
3599 *
3600 * [25] Eq ::= S? '=' S?
3601 *
3602 * With namespace:
3603 *
3604 * [NS 11] Attribute ::= QName Eq AttValue
3605 *
3606 * Also the case QName == xmlns:??? is handled independently as a namespace
3607 * definition.
3608 *
3609 * Returns the attribute name, and the value in *value.
3610 */
3611
3612static xmlChar *
3613docbParseAttribute(docbParserCtxtPtr ctxt, xmlChar **value) {
3614    xmlChar *name, *val = NULL;
3615
3616    *value = NULL;
3617    name = docbParseName(ctxt);
3618    if (name == NULL) {
3619       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3620           ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
3621       ctxt->wellFormed = 0;
3622        return(NULL);
3623    }
3624
3625    /*
3626     * read the value
3627     */
3628    SKIP_BLANKS;
3629    if (CUR == '=') {
3630        NEXT;
3631       SKIP_BLANKS;
3632       val = docbParseAttValue(ctxt);
3633       /******
3634    } else {
3635        * TODO : some attribute must have values, some may not
3636       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3637           ctxt->sax->warning(ctxt->userData,
3638              "No value for attribute %s\n", name); */
3639    }
3640
3641    *value = val;
3642    return(name);
3643}
3644
3645/**
3646 * docbCheckEncoding:
3647 * @ctxt:  an SGML parser context
3648 * @attvalue: the attribute value
3649 *
3650 * Checks an http-equiv attribute from a Meta tag to detect
3651 * the encoding
3652 * If a new encoding is detected the parser is switched to decode
3653 * it and pass UTF8
3654 */
3655static void
3656docbCheckEncoding(docbParserCtxtPtr ctxt, const xmlChar *attvalue) {
3657    const xmlChar *encoding;
3658
3659    if ((ctxt == NULL) || (attvalue == NULL))
3660       return;
3661
3662    encoding = xmlStrstr(attvalue, BAD_CAST"charset=");
3663    if (encoding == NULL)
3664       encoding = xmlStrstr(attvalue, BAD_CAST"Charset=");
3665    if (encoding == NULL)
3666       encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET=");
3667    if (encoding != NULL) {
3668       encoding += 8;
3669    } else {
3670       encoding = xmlStrstr(attvalue, BAD_CAST"charset =");
3671       if (encoding == NULL)
3672           encoding = xmlStrstr(attvalue, BAD_CAST"Charset =");
3673       if (encoding == NULL)
3674           encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET =");
3675       if (encoding != NULL)
3676           encoding += 9;
3677    }
3678    /*
3679     * Restricted from 2.3.5 */
3680    if (encoding != NULL) {
3681       xmlCharEncoding enc;
3682
3683       if (ctxt->input->encoding != NULL)
3684           xmlFree((xmlChar *) ctxt->input->encoding);
3685       ctxt->input->encoding = encoding;
3686
3687       enc = xmlParseCharEncoding((const char *) encoding);
3688       if (enc == XML_CHAR_ENCODING_8859_1) {
3689           ctxt->charset = XML_CHAR_ENCODING_8859_1;
3690       } else if (enc != XML_CHAR_ENCODING_UTF8) {
3691           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3692               ctxt->sax->error(ctxt->userData,
3693                    "Unsupported encoding %s\n", encoding);
3694           /* xmlFree(encoding); */
3695           ctxt->wellFormed = 0;
3696           ctxt->disableSAX = 1;
3697           ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
3698       }
3699    }
3700}
3701
3702/**
3703 * docbCheckMeta:
3704 * @ctxt:  an SGML parser context
3705 * @atts:  the attributes values
3706 *
3707 * Checks an attributes from a Meta tag
3708 */
3709static void
3710docbCheckMeta(docbParserCtxtPtr ctxt, const xmlChar **atts) {
3711    int i;
3712    const xmlChar *att, *value;
3713    int http = 0;
3714    const xmlChar *content = NULL;
3715
3716    if ((ctxt == NULL) || (atts == NULL))
3717       return;
3718
3719    i = 0;
3720    att = atts[i++];
3721    while (att != NULL) {
3722       value = atts[i++];
3723       if ((value != NULL) &&
3724           ((xmlStrEqual(att, BAD_CAST"http-equiv")) ||
3725            (xmlStrEqual(att, BAD_CAST"Http-Equiv")) ||
3726            (xmlStrEqual(att, BAD_CAST"HTTP-EQUIV"))) &&
3727           ((xmlStrEqual(value, BAD_CAST"Content-Type")) ||
3728            (xmlStrEqual(value, BAD_CAST"content-type")) ||
3729            (xmlStrEqual(value, BAD_CAST"CONTENT-TYPE"))))
3730           http = 1;
3731       else if ((value != NULL) &&
3732                ((xmlStrEqual(att, BAD_CAST"content")) ||
3733                 (xmlStrEqual(att, BAD_CAST"Content")) ||
3734                 (xmlStrEqual(att, BAD_CAST"CONTENT"))))
3735           content = value;
3736       att = atts[i++];
3737    }
3738    if ((http) && (content != NULL))
3739       docbCheckEncoding(ctxt, content);
3740
3741}
3742
3743/**
3744 * docbParseStartTag:
3745 * @ctxt:  an SGML parser context
3746 *
3747 * parse a start of tag either for rule element or
3748 * EmptyElement. In both case we don't parse the tag closing chars.
3749 *
3750 * [40] STag ::= '<' Name (S Attribute)* S? '>'
3751 *
3752 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
3753 *
3754 * With namespace:
3755 *
3756 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
3757 *
3758 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
3759 *
3760 */
3761
3762static void
3763docbParseStartTag(docbParserCtxtPtr ctxt) {
3764    xmlChar *name;
3765    xmlChar *attname;
3766    xmlChar *attvalue;
3767    const xmlChar **atts = NULL;
3768    int nbatts = 0;
3769    int maxatts = 0;
3770    int meta = 0;
3771    int i;
3772
3773    if (CUR != '<') return;
3774    NEXT;
3775
3776    GROW;
3777    name = docbParseSGMLName(ctxt);
3778    if (name == NULL) {
3779       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3780           ctxt->sax->error(ctxt->userData,
3781            "docbParseStartTag: invalid element name\n");
3782       ctxt->wellFormed = 0;
3783        return;
3784    }
3785    if (xmlStrEqual(name, BAD_CAST"meta"))
3786       meta = 1;
3787
3788    /*
3789     * Check for auto-closure of SGML elements.
3790     */
3791    docbAutoClose(ctxt, name);
3792
3793    /*
3794     * Now parse the attributes, it ends up with the ending
3795     *
3796     * (S Attribute)* S?
3797     */
3798    SKIP_BLANKS;
3799    while ((IS_CHAR(CUR)) &&
3800           (CUR != '>') &&
3801          ((CUR != '/') || (NXT(1) != '>'))) {
3802       long cons = ctxt->nbChars;
3803
3804       GROW;
3805       attname = docbParseAttribute(ctxt, &attvalue);
3806        if (attname != NULL) {
3807
3808           /*
3809            * Well formedness requires at most one declaration of an attribute
3810            */
3811           for (i = 0; i < nbatts;i += 2) {
3812               if (xmlStrEqual(atts[i], attname)) {
3813                   if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3814                       ctxt->sax->error(ctxt->userData,
3815                                        "Attribute %s redefined\n",
3816                                        attname);
3817                   ctxt->wellFormed = 0;
3818                   xmlFree(attname);
3819                   if (attvalue != NULL)
3820                       xmlFree(attvalue);
3821                   goto failed;
3822               }
3823           }
3824
3825           /*
3826            * Add the pair to atts
3827            */
3828           if (atts == NULL) {
3829               maxatts = 10;
3830               atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
3831               if (atts == NULL) {
3832                   xmlGenericError(xmlGenericErrorContext,
3833                           "malloc of %ld byte failed\n",
3834                           maxatts * (long)sizeof(xmlChar *));
3835                   if (name != NULL) xmlFree(name);
3836                   return;
3837               }
3838           } else if (nbatts + 4 > maxatts) {
3839               maxatts *= 2;
3840               atts = (const xmlChar **) xmlRealloc((void *)atts, maxatts * sizeof(xmlChar *));
3841               if (atts == NULL) {
3842                   xmlGenericError(xmlGenericErrorContext,
3843                           "realloc of %ld byte failed\n",
3844                           maxatts * (long)sizeof(xmlChar *));
3845                   if (name != NULL) xmlFree(name);
3846                   return;
3847               }
3848           }
3849           atts[nbatts++] = attname;
3850           atts[nbatts++] = attvalue;
3851           atts[nbatts] = NULL;
3852           atts[nbatts + 1] = NULL;
3853       }
3854
3855failed:
3856       SKIP_BLANKS;
3857        if (cons == ctxt->nbChars) {
3858           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3859               ctxt->sax->error(ctxt->userData,
3860                "docbParseStartTag: problem parsing attributes\n");
3861           ctxt->wellFormed = 0;
3862           break;
3863       }
3864    }
3865
3866    /*
3867     * Handle specific association to the META tag
3868     */
3869    if (meta)
3870       docbCheckMeta(ctxt, atts);
3871
3872    /*
3873     * SAX: Start of Element !
3874     */
3875    docbnamePush(ctxt, xmlStrdup(name));
3876#ifdef DEBUG
3877    xmlGenericError(xmlGenericErrorContext,"Start of element %s: pushed %s\n", name, ctxt->name);
3878#endif
3879    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
3880        ctxt->sax->startElement(ctxt->userData, name, atts);
3881
3882    if (atts != NULL) {
3883        for (i = 0;i < nbatts;i++) {
3884           if (atts[i] != NULL)
3885               xmlFree((xmlChar *) atts[i]);
3886       }
3887       xmlFree((void *) atts);
3888    }
3889    if (name != NULL) xmlFree(name);
3890}
3891
3892/**
3893 * docbParseEndTag:
3894 * @ctxt:  an SGML parser context
3895 *
3896 * parse an end of tag
3897 *
3898 * [42] ETag ::= '</' Name S? '>'
3899 *
3900 * With namespace
3901 *
3902 * [NS 9] ETag ::= '</' QName S? '>'
3903 */
3904
3905static void
3906docbParseEndTag(docbParserCtxtPtr ctxt) {
3907    xmlChar *name;
3908    xmlChar *oldname;
3909    int i;
3910
3911    if ((CUR != '<') || (NXT(1) != '/')) {
3912       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3913           ctxt->sax->error(ctxt->userData, "docbParseEndTag: '</' not found\n");
3914       ctxt->wellFormed = 0;
3915       return;
3916    }
3917    SKIP(2);
3918
3919    name = docbParseSGMLName(ctxt);
3920    if (name == NULL) {
3921       if (CUR == '>') {
3922           NEXT;
3923           oldname = docbnamePop(ctxt);
3924           if (oldname != NULL) {
3925               if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
3926                   ctxt->sax->endElement(ctxt->userData, name);
3927#ifdef DEBUG
3928               xmlGenericError(xmlGenericErrorContext,"End of tag </>: popping out %s\n", oldname);
3929#endif
3930               xmlFree(oldname);
3931#ifdef DEBUG
3932           } else {
3933               xmlGenericError(xmlGenericErrorContext,"End of tag </>: stack empty !!!\n");
3934#endif
3935           }
3936           return;
3937       } else
3938           return;
3939    }
3940
3941    /*
3942     * We should definitely be at the ending "S? '>'" part
3943     */
3944    SKIP_BLANKS;
3945    if ((!IS_CHAR(CUR)) || (CUR != '>')) {
3946       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3947           ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
3948       ctxt->wellFormed = 0;
3949    } else
3950       NEXT;
3951
3952    /*
3953     * If the name read is not one of the element in the parsing stack
3954     * then return, it's just an error.
3955     */
3956    for (i = (ctxt->nameNr - 1);i >= 0;i--) {
3957        if (xmlStrEqual(name, ctxt->nameTab[i])) break;
3958    }
3959    if (i < 0) {
3960       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3961           ctxt->sax->error(ctxt->userData,
3962            "Unexpected end tag : %s\n", name);
3963       xmlFree(name);
3964       ctxt->wellFormed = 0;
3965       return;
3966    }
3967
3968
3969    /*
3970     * Check for auto-closure of SGML elements.
3971     */
3972
3973    docbAutoCloseOnClose(ctxt, name);
3974
3975    /*
3976     * Well formedness constraints, opening and closing must match.
3977     * With the exception that the autoclose may have popped stuff out
3978     * of the stack.
3979     */
3980    if (((name[0] != '/') || (name[1] != 0)) &&
3981       (!xmlStrEqual(name, ctxt->name))) {
3982#ifdef DEBUG
3983       xmlGenericError(xmlGenericErrorContext,"End of tag %s: expecting %s\n", name, ctxt->name);
3984#endif
3985        if ((ctxt->name != NULL) &&
3986           (!xmlStrEqual(ctxt->name, name))) {
3987           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3988               ctxt->sax->error(ctxt->userData,
3989                "Opening and ending tag mismatch: %s and %s\n",
3990                                name, ctxt->name);
3991           ctxt->wellFormed = 0;
3992        }
3993    }
3994
3995    /*
3996     * SAX: End of Tag
3997     */
3998    oldname = ctxt->name;
3999    if (((name[0] == '/') && (name[1] == 0)) ||
4000       ((oldname != NULL) && (xmlStrEqual(oldname, name)))) {
4001       if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4002           ctxt->sax->endElement(ctxt->userData, name);
4003       oldname = docbnamePop(ctxt);
4004       if (oldname != NULL) {
4005#ifdef DEBUG
4006           xmlGenericError(xmlGenericErrorContext,"End of tag %s: popping out %s\n", name, oldname);
4007#endif
4008           xmlFree(oldname);
4009#ifdef DEBUG
4010       } else {
4011           xmlGenericError(xmlGenericErrorContext,"End of tag %s: stack empty !!!\n", name);
4012#endif
4013       }
4014    }
4015
4016    if (name != NULL)
4017       xmlFree(name);
4018
4019    return;
4020}
4021
4022
4023/**
4024 * docbParseReference:
4025 * @ctxt:  an SGML parser context
4026 *
4027 * parse and handle entity references in content,
4028 * this will end-up in a call to character() since this is either a
4029 * CharRef, or a predefined entity.
4030 */
4031static void
4032docbParseReference(docbParserCtxtPtr ctxt) {
4033    docbEntityDescPtr ent;
4034    xmlEntityPtr xent;
4035    xmlChar out[6];
4036    xmlChar *name;
4037    if (CUR != '&') return;
4038
4039    if (NXT(1) == '#') {
4040       unsigned int c;
4041       int bits, i = 0;
4042
4043       c = docbParseCharRef(ctxt);
4044        if      (c <    0x80) { out[i++]= c;                bits= -6; }
4045        else if (c <   0x800) { out[i++]=((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
4046        else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
4047        else                  { out[i++]=((c >> 18) & 0x07) | 0xF0;  bits= 12; }
4048
4049        for ( ; bits >= 0; bits-= 6) {
4050            out[i++]= ((c >> bits) & 0x3F) | 0x80;
4051        }
4052       out[i] = 0;
4053
4054       docbCheckParagraph(ctxt);
4055       if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4056           ctxt->sax->characters(ctxt->userData, out, i);
4057    } else {
4058	/*
4059	 * Lookup the entity in the table.
4060	 */
4061       xent = docbParseEntityRef(ctxt, &name);
4062       if (xent != NULL) {
4063	    if (((ctxt->replaceEntities) || (ctxt->loadsubset)) &&
4064		((xent->children == NULL) &&
4065		(xent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))) {
4066		    /*
4067		     * we really need to fetch and parse the external entity
4068		     */
4069		    int parse;
4070		    xmlNodePtr children = NULL;
4071
4072		    parse = docbParseCtxtExternalEntity(ctxt,
4073			       xent->SystemID, xent->ExternalID, &children);
4074		    xmlAddChildList((xmlNodePtr) xent, children);
4075	    }
4076	    if (ctxt->replaceEntities) {
4077		if ((ctxt->node != NULL) && (xent->children != NULL)) {
4078		    /*
4079		     * Seems we are generating the DOM content, do
4080		     * a simple tree copy
4081		     */
4082		    xmlNodePtr new;
4083		    new = xmlCopyNodeList(xent->children);
4084
4085		    xmlAddChildList(ctxt->node, new);
4086		    /*
4087		     * This is to avoid a nasty side effect, see
4088		     * characters() in SAX.c
4089		     */
4090		    ctxt->nodemem = 0;
4091		    ctxt->nodelen = 0;
4092		}
4093	    } else {
4094		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4095		    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
4096		    /*
4097		     * Create a node.
4098		     */
4099		    ctxt->sax->reference(ctxt->userData, xent->name);
4100		}
4101	    }
4102       } else if (name != NULL) {
4103	   ent = docbEntityLookup(name);
4104	   if ((ent == NULL) || (ent->value <= 0)) {
4105	       docbCheckParagraph(ctxt);
4106	       if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
4107		   ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
4108		   ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
4109		   /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
4110	       }
4111	   } else {
4112	       unsigned int c;
4113	       int bits, i = 0;
4114
4115	       c = ent->value;
4116	       if      (c <    0x80)
4117		       { out[i++]= c;                bits= -6; }
4118	       else if (c <   0x800)
4119		       { out[i++]=((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
4120	       else if (c < 0x10000)
4121		       { out[i++]=((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
4122	       else
4123		       { out[i++]=((c >> 18) & 0x07) | 0xF0;  bits= 12; }
4124
4125	       for ( ; bits >= 0; bits-= 6) {
4126		   out[i++]= ((c >> bits) & 0x3F) | 0x80;
4127	       }
4128	       out[i] = 0;
4129
4130	       docbCheckParagraph(ctxt);
4131	       if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4132		   ctxt->sax->characters(ctxt->userData, out, i);
4133	   }
4134       } else {
4135           docbCheckParagraph(ctxt);
4136           if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4137               ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
4138           return;
4139       }
4140       if (name != NULL)
4141	   xmlFree(name);
4142    }
4143}
4144
4145/**
4146 * docbParseContent:
4147 * @ctxt:  an SGML parser context
4148 * @name:  the node name
4149 *
4150 * Parse a content: comment, sub-element, reference or text.
4151 *
4152 */
4153static void
4154docbParseContent(docbParserCtxtPtr ctxt)
4155{
4156    xmlChar *currentNode;
4157    int depth;
4158
4159    currentNode = xmlStrdup(ctxt->name);
4160    depth = ctxt->nameNr;
4161    while (1) {
4162        long cons = ctxt->nbChars;
4163
4164        GROW;
4165        /*
4166         * Our tag or one of it's parent or children is ending.
4167         */
4168        if ((CUR == '<') && (NXT(1) == '/')) {
4169            docbParseEndTag(ctxt);
4170            if (currentNode != NULL)
4171                xmlFree(currentNode);
4172            return;
4173        }
4174
4175        /*
4176         * Has this node been popped out during parsing of
4177         * the next element
4178         */
4179        if ((!xmlStrEqual(currentNode, ctxt->name)) &&
4180            (depth >= ctxt->nameNr)) {
4181            if (currentNode != NULL)
4182                xmlFree(currentNode);
4183            return;
4184        }
4185
4186        /*
4187         * Sometimes DOCTYPE arrives in the middle of the document
4188         */
4189        if ((CUR == '<') && (NXT(1) == '!') &&
4190            (UPP(2) == 'D') && (UPP(3) == 'O') &&
4191            (UPP(4) == 'C') && (UPP(5) == 'T') &&
4192            (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) {
4193            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4194                ctxt->sax->error(ctxt->userData,
4195                                 "Misplaced DOCTYPE declaration\n");
4196            ctxt->wellFormed = 0;
4197            docbParseDocTypeDecl(ctxt);
4198        }
4199
4200        /*
4201         * First case :  a comment
4202         */
4203        if ((CUR == '<') && (NXT(1) == '!') &&
4204            (NXT(2) == '-') && (NXT(3) == '-')) {
4205            docbParseComment(ctxt);
4206        }
4207
4208        /*
4209         * Second case :  a PI
4210         */
4211	else if ((RAW == '<') && (NXT(1) == '?')) {
4212            docbParsePI(ctxt);
4213        }
4214
4215        /*
4216         * Third case :  a sub-element.
4217         */
4218        else if (CUR == '<') {
4219            docbParseElement(ctxt);
4220        }
4221
4222        /*
4223         * Fourth case : a reference. If if has not been resolved,
4224         *    parsing returns it's Name, create the node
4225         */
4226        else if (CUR == '&') {
4227            docbParseReference(ctxt);
4228        }
4229
4230        /*
4231         * Fifth : end of the resource
4232         */
4233        else if (CUR == 0) {
4234            docbAutoClose(ctxt, NULL);
4235            if (ctxt->nameNr == 0)
4236                break;
4237        }
4238
4239        /*
4240         * Last case, text. Note that References are handled directly.
4241         */
4242        else {
4243            docbParseCharData(ctxt);
4244        }
4245
4246        if (cons == ctxt->nbChars) {
4247            if (ctxt->node != NULL) {
4248                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4249                    ctxt->sax->error(ctxt->userData,
4250                                     "detected an error in element content\n");
4251                ctxt->wellFormed = 0;
4252            }
4253            break;
4254        }
4255
4256        GROW;
4257    }
4258    if (currentNode != NULL)
4259        xmlFree(currentNode);
4260}
4261
4262/**
4263 * docbParseElement:
4264 * @ctxt:  an SGML parser context
4265 *
4266 * parse an SGML element, this is highly recursive
4267 *
4268 * [39] element ::= EmptyElemTag | STag content ETag
4269 *
4270 * [41] Attribute ::= Name Eq AttValue
4271 */
4272
4273static void
4274docbParseElement(docbParserCtxtPtr ctxt) {
4275    xmlChar *name;
4276    xmlChar *currentNode = NULL;
4277    docbElemDescPtr info;
4278    docbParserNodeInfo node_info;
4279    xmlChar *oldname;
4280    int depth = ctxt->nameNr;
4281
4282    /* Capture start position */
4283    if (ctxt->record_info) {
4284        node_info.begin_pos = ctxt->input->consumed +
4285                          (CUR_PTR - ctxt->input->base);
4286       node_info.begin_line = ctxt->input->line;
4287    }
4288
4289    oldname = xmlStrdup(ctxt->name);
4290    docbParseStartTag(ctxt);
4291    name = ctxt->name;
4292#ifdef DEBUG
4293    if (oldname == NULL)
4294       xmlGenericError(xmlGenericErrorContext,
4295               "Start of element %s\n", name);
4296    else if (name == NULL)
4297       xmlGenericError(xmlGenericErrorContext,
4298               "Start of element failed, was %s\n", oldname);
4299    else
4300       xmlGenericError(xmlGenericErrorContext,
4301               "Start of element %s, was %s\n", name, oldname);
4302#endif
4303    if (((depth == ctxt->nameNr) && (xmlStrEqual(oldname, ctxt->name))) ||
4304        (name == NULL)) {
4305       if (CUR == '>')
4306           NEXT;
4307       if (oldname != NULL)
4308           xmlFree(oldname);
4309        return;
4310    }
4311    if (oldname != NULL)
4312       xmlFree(oldname);
4313
4314    /*
4315     * Lookup the info for that element.
4316     */
4317    info = docbTagLookup(name);
4318    if (info == NULL) {
4319       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4320           ctxt->sax->error(ctxt->userData, "Tag %s unknown\n",
4321                            name);
4322       ctxt->wellFormed = 0;
4323    } else if (info->depr) {
4324/***************************
4325       if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4326           ctxt->sax->warning(ctxt->userData, "Tag %s is deprecated\n",
4327                              name);
4328 ***************************/
4329    }
4330
4331    /*
4332     * Check for an Empty Element labeled the XML/SGML way
4333     */
4334    if ((CUR == '/') && (NXT(1) == '>')) {
4335        SKIP(2);
4336       if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4337           ctxt->sax->endElement(ctxt->userData, name);
4338       oldname = docbnamePop(ctxt);
4339#ifdef DEBUG
4340        xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n", oldname);
4341#endif
4342       if (oldname != NULL)
4343           xmlFree(oldname);
4344       return;
4345    }
4346
4347    if (CUR == '>') {
4348        NEXT;
4349    } else {
4350       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4351           ctxt->sax->error(ctxt->userData,
4352                            "Couldn't find end of Start Tag %s\n",
4353                            name);
4354       ctxt->wellFormed = 0;
4355
4356       /*
4357        * end of parsing of this node.
4358        */
4359       if (xmlStrEqual(name, ctxt->name)) {
4360           nodePop(ctxt);
4361           oldname = docbnamePop(ctxt);
4362#ifdef DEBUG
4363           xmlGenericError(xmlGenericErrorContext,"End of start tag problem: popping out %s\n", oldname);
4364#endif
4365           if (oldname != NULL)
4366               xmlFree(oldname);
4367       }
4368
4369       /*
4370        * Capture end position and add node
4371        */
4372       if ( currentNode != NULL && ctxt->record_info ) {
4373          node_info.end_pos = ctxt->input->consumed +
4374                             (CUR_PTR - ctxt->input->base);
4375          node_info.end_line = ctxt->input->line;
4376          node_info.node = ctxt->node;
4377          xmlParserAddNodeInfo(ctxt, &node_info);
4378       }
4379       return;
4380    }
4381
4382    /*
4383     * Check for an Empty Element from DTD definition
4384     */
4385    if ((info != NULL) && (info->empty)) {
4386       if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4387           ctxt->sax->endElement(ctxt->userData, name);
4388       oldname = docbnamePop(ctxt);
4389#ifdef DEBUG
4390       xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname);
4391#endif
4392       if (oldname != NULL)
4393           xmlFree(oldname);
4394       return;
4395    }
4396
4397    /*
4398     * Parse the content of the element:
4399     */
4400    currentNode = xmlStrdup(ctxt->name);
4401    depth = ctxt->nameNr;
4402    while (IS_CHAR(CUR)) {
4403       docbParseContent(ctxt);
4404       if (ctxt->nameNr < depth) break;
4405    }
4406
4407    if (!IS_CHAR(CUR)) {
4408       /************
4409       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4410           ctxt->sax->error(ctxt->userData,
4411                "Premature end of data in tag %s\n", currentNode);
4412       ctxt->wellFormed = 0;
4413        *************/
4414
4415       /*
4416        * end of parsing of this node.
4417        */
4418       nodePop(ctxt);
4419       oldname = docbnamePop(ctxt);
4420#ifdef DEBUG
4421       xmlGenericError(xmlGenericErrorContext,"Premature end of tag %s : popping out %s\n", name, oldname);
4422#endif
4423       if (oldname != NULL)
4424           xmlFree(oldname);
4425       if (currentNode != NULL)
4426           xmlFree(currentNode);
4427       return;
4428    }
4429
4430    /*
4431     * Capture end position and add node
4432     */
4433    if ( currentNode != NULL && ctxt->record_info ) {
4434       node_info.end_pos = ctxt->input->consumed +
4435                          (CUR_PTR - ctxt->input->base);
4436       node_info.end_line = ctxt->input->line;
4437       node_info.node = ctxt->node;
4438       xmlParserAddNodeInfo(ctxt, &node_info);
4439    }
4440    if (currentNode != NULL)
4441       xmlFree(currentNode);
4442}
4443
4444/**
4445 * docbParseEntityDecl:
4446 * @ctxt:  an SGML parser context
4447 *
4448 * parse <!ENTITY declarations
4449 *
4450 */
4451
4452static void
4453docbParseEntityDecl(xmlParserCtxtPtr ctxt) {
4454    xmlChar *name = NULL;
4455    xmlChar *value = NULL;
4456    xmlChar *URI = NULL, *literal = NULL;
4457    xmlChar *ndata = NULL;
4458    int isParameter = 0;
4459    xmlChar *orig = NULL;
4460
4461    GROW;
4462    if ((RAW == '<') && (NXT(1) == '!') &&
4463        (UPP(2) == 'E') && (UPP(3) == 'N') &&
4464        (UPP(4) == 'T') && (UPP(5) == 'I') &&
4465        (UPP(6) == 'T') && (UPP(7) == 'Y')) {
4466       xmlParserInputPtr input = ctxt->input;
4467       ctxt->instate = XML_PARSER_ENTITY_DECL;
4468       SHRINK;
4469       SKIP(8);
4470       if (!IS_BLANK(CUR)) {
4471           ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4472           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4473               ctxt->sax->error(ctxt->userData,
4474                                "Space required after '<!ENTITY'\n");
4475           ctxt->wellFormed = 0;
4476           ctxt->disableSAX = 1;
4477       }
4478       SKIP_BLANKS;
4479
4480       if (RAW == '%') {
4481           NEXT;
4482           if (!IS_BLANK(CUR)) {
4483               ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4484               if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4485                   ctxt->sax->error(ctxt->userData,
4486                                    "Space required after '%'\n");
4487               ctxt->wellFormed = 0;
4488               ctxt->disableSAX = 1;
4489           }
4490           SKIP_BLANKS;
4491           isParameter = 1;
4492       }
4493
4494        name = xmlParseName(ctxt);
4495       if (name == NULL) {
4496           ctxt->errNo = XML_ERR_NAME_REQUIRED;
4497           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4498               ctxt->sax->error(ctxt->userData, "sgmlarseEntityDecl: no name\n");
4499           ctxt->wellFormed = 0;
4500           ctxt->disableSAX = 1;
4501            return;
4502       }
4503       if (!IS_BLANK(CUR)) {
4504           ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4505           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4506               ctxt->sax->error(ctxt->userData,
4507                    "Space required after the entity name\n");
4508           ctxt->wellFormed = 0;
4509           ctxt->disableSAX = 1;
4510       }
4511        SKIP_BLANKS;
4512
4513       /*
4514        * handle the various case of definitions...
4515        */
4516       if (isParameter) {
4517           if ((RAW == '"') || (RAW == '\'')) {
4518               value = xmlParseEntityValue(ctxt, &orig);
4519               if (value) {
4520                   if ((ctxt->sax != NULL) &&
4521                       (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4522                       ctxt->sax->entityDecl(ctxt->userData, name,
4523                                   XML_INTERNAL_PARAMETER_ENTITY,
4524                                   NULL, NULL, value);
4525               }
4526           } else {
4527               URI = xmlParseExternalID(ctxt, &literal, 1);
4528               if ((URI == NULL) && (literal == NULL)) {
4529                   ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4530                   if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4531                       ctxt->sax->error(ctxt->userData,
4532                           "Entity value required\n");
4533                   ctxt->wellFormed = 0;
4534                   ctxt->disableSAX = 1;
4535               }
4536               if (URI) {
4537                   xmlURIPtr uri;
4538
4539                   uri = xmlParseURI((const char *) URI);
4540                   if (uri == NULL) {
4541                       ctxt->errNo = XML_ERR_INVALID_URI;
4542                       if ((ctxt->sax != NULL) &&
4543                           (!ctxt->disableSAX) &&
4544                           (ctxt->sax->error != NULL))
4545                           ctxt->sax->error(ctxt->userData,
4546                                       "Invalid URI: %s\n", URI);
4547                       ctxt->wellFormed = 0;
4548                   } else {
4549                       if (uri->fragment != NULL) {
4550                           ctxt->errNo = XML_ERR_URI_FRAGMENT;
4551                           if ((ctxt->sax != NULL) &&
4552                               (!ctxt->disableSAX) &&
4553                               (ctxt->sax->error != NULL))
4554                               ctxt->sax->error(ctxt->userData,
4555                                           "Fragment not allowed: %s\n", URI);
4556                           ctxt->wellFormed = 0;
4557                       } else {
4558                           if ((ctxt->sax != NULL) &&
4559                               (!ctxt->disableSAX) &&
4560                               (ctxt->sax->entityDecl != NULL))
4561                               ctxt->sax->entityDecl(ctxt->userData, name,
4562                                           XML_EXTERNAL_PARAMETER_ENTITY,
4563                                           literal, URI, NULL);
4564                       }
4565                       xmlFreeURI(uri);
4566                   }
4567               }
4568           }
4569       } else {
4570           if ((RAW == '"') || (RAW == '\'')) {
4571               value = xmlParseEntityValue(ctxt, &orig);
4572               if ((ctxt->sax != NULL) &&
4573                   (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4574                   ctxt->sax->entityDecl(ctxt->userData, name,
4575                               XML_INTERNAL_GENERAL_ENTITY,
4576                               NULL, NULL, value);
4577           } else {
4578               URI = xmlParseExternalID(ctxt, &literal, 1);
4579               if ((URI == NULL) && (literal == NULL)) {
4580                   ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4581                   if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4582                       ctxt->sax->error(ctxt->userData,
4583                           "Entity value required\n");
4584                   ctxt->wellFormed = 0;
4585                   ctxt->disableSAX = 1;
4586               }
4587               if (URI) {
4588                   xmlURIPtr uri;
4589
4590                   uri = xmlParseURI((const char *)URI);
4591                   if (uri == NULL) {
4592                       ctxt->errNo = XML_ERR_INVALID_URI;
4593                       if ((ctxt->sax != NULL) &&
4594                           (!ctxt->disableSAX) &&
4595                           (ctxt->sax->error != NULL))
4596                           ctxt->sax->error(ctxt->userData,
4597                                       "Invalid URI: %s\n", URI);
4598                       ctxt->wellFormed = 0;
4599                   } else {
4600                       if (uri->fragment != NULL) {
4601                           ctxt->errNo = XML_ERR_URI_FRAGMENT;
4602                           if ((ctxt->sax != NULL) &&
4603                               (!ctxt->disableSAX) &&
4604                               (ctxt->sax->error != NULL))
4605                               ctxt->sax->error(ctxt->userData,
4606                                           "Fragment not allowed: %s\n", URI);
4607                           ctxt->wellFormed = 0;
4608                       }
4609                       xmlFreeURI(uri);
4610                   }
4611               }
4612               if ((RAW != '>') && (!IS_BLANK(CUR))) {
4613                   ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4614                   if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4615                       ctxt->sax->error(ctxt->userData,
4616                           "Space required before content model\n");
4617                   ctxt->wellFormed = 0;
4618                   ctxt->disableSAX = 1;
4619               }
4620               SKIP_BLANKS;
4621
4622               /*
4623                * SGML specific: here we can get the content model
4624                */
4625               if (RAW != '>') {
4626                   xmlChar *contmod;
4627
4628                   contmod = xmlParseName(ctxt);
4629
4630                   if (contmod == NULL) {
4631                       ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4632                       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4633                           ctxt->sax->error(ctxt->userData,
4634                               "Could not parse entity content model\n");
4635                       ctxt->wellFormed = 0;
4636                       ctxt->disableSAX = 1;
4637                   } else {
4638                       if (xmlStrEqual(contmod, BAD_CAST"NDATA")) {
4639                           if (!IS_BLANK(CUR)) {
4640                               ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4641                               if ((ctxt->sax != NULL) &&
4642                                   (ctxt->sax->error != NULL))
4643                                   ctxt->sax->error(ctxt->userData,
4644                                       "Space required after 'NDATA'\n");
4645                               ctxt->wellFormed = 0;
4646                               ctxt->disableSAX = 1;
4647                           }
4648                           SKIP_BLANKS;
4649                           ndata = xmlParseName(ctxt);
4650                           if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4651                               (ctxt->sax->unparsedEntityDecl != NULL)) {
4652                               ctxt->sax->unparsedEntityDecl(ctxt->userData,
4653                                       name, literal, URI, ndata);
4654                           }
4655                       } else if (xmlStrEqual(contmod, BAD_CAST"SUBDOC")) {
4656                           if ((ctxt->sax != NULL) &&
4657                               (ctxt->sax->warning != NULL))
4658                               ctxt->sax->warning(ctxt->userData,
4659                                   "SUBDOC entities are not supported\n");
4660                           SKIP_BLANKS;
4661                           ndata = xmlParseName(ctxt);
4662                           if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4663                               (ctxt->sax->unparsedEntityDecl != NULL)) {
4664                               ctxt->sax->unparsedEntityDecl(ctxt->userData,
4665                                       name, literal, URI, ndata);
4666                           }
4667                       } else if (xmlStrEqual(contmod, BAD_CAST"CDATA")) {
4668                           if ((ctxt->sax != NULL) &&
4669                               (ctxt->sax->warning != NULL))
4670                               ctxt->sax->warning(ctxt->userData,
4671                                   "CDATA entities are not supported\n");
4672                           SKIP_BLANKS;
4673                           ndata = xmlParseName(ctxt);
4674                           if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4675                               (ctxt->sax->unparsedEntityDecl != NULL)) {
4676                               ctxt->sax->unparsedEntityDecl(ctxt->userData,
4677                                       name, literal, URI, ndata);
4678                           }
4679                       }
4680                       xmlFree(contmod);
4681                   }
4682               } else {
4683                   if ((ctxt->sax != NULL) &&
4684                       (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4685                       ctxt->sax->entityDecl(ctxt->userData, name,
4686                                   XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4687                                   literal, URI, NULL);
4688               }
4689           }
4690       }
4691       SKIP_BLANKS;
4692       if (RAW != '>') {
4693           ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
4694           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4695               ctxt->sax->error(ctxt->userData,
4696                   "docbParseEntityDecl: entity %s not terminated\n", name);
4697           ctxt->wellFormed = 0;
4698           ctxt->disableSAX = 1;
4699       } else {
4700           if (input != ctxt->input) {
4701               ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4702               if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4703                   ctxt->sax->error(ctxt->userData,
4704"Entity declaration doesn't start and stop in the same entity\n");
4705               ctxt->wellFormed = 0;
4706               ctxt->disableSAX = 1;
4707           }
4708           NEXT;
4709       }
4710       if (orig != NULL) {
4711           /*
4712            * Ugly mechanism to save the raw entity value.
4713            */
4714           xmlEntityPtr cur = NULL;
4715
4716           if (isParameter) {
4717               if ((ctxt->sax != NULL) &&
4718                   (ctxt->sax->getParameterEntity != NULL))
4719                   cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4720           } else {
4721               if ((ctxt->sax != NULL) &&
4722                   (ctxt->sax->getEntity != NULL))
4723                   cur = ctxt->sax->getEntity(ctxt->userData, name);
4724           }
4725            if (cur != NULL) {
4726               if (cur->orig != NULL)
4727                   xmlFree(orig);
4728               else
4729                   cur->orig = orig;
4730           } else
4731               xmlFree(orig);
4732       }
4733       if (name != NULL) xmlFree(name);
4734       if (value != NULL) xmlFree(value);
4735       if (URI != NULL) xmlFree(URI);
4736       if (literal != NULL) xmlFree(literal);
4737       if (ndata != NULL) xmlFree(ndata);
4738    }
4739}
4740
4741/**
4742 * docbParseMarkupDecl:
4743 * @ctxt:  an SGML parser context
4744 *
4745 * parse Markup declarations
4746 *
4747 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4748 *                     NotationDecl | PI | Comment
4749 */
4750static void
4751docbParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4752    GROW;
4753    xmlParseElementDecl(ctxt);
4754    xmlParseAttributeListDecl(ctxt);
4755    docbParseEntityDecl(ctxt);
4756    xmlParseNotationDecl(ctxt);
4757    docbParsePI(ctxt);
4758    xmlParseComment(ctxt);
4759    /*
4760     * This is only for internal subset. On external entities,
4761     * the replacement is done before parsing stage
4762     */
4763    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4764       xmlParsePEReference(ctxt);
4765    ctxt->instate = XML_PARSER_DTD;
4766}
4767
4768/**
4769 * docbParseInternalSubset:
4770 * @ctxt:  an SGML parser context
4771 *
4772 * parse the internal subset declaration
4773 *
4774 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
4775 */
4776
4777static void
4778docbParseInternalSubset(xmlParserCtxtPtr ctxt) {
4779    /*
4780     * Is there any DTD definition ?
4781     */
4782    if (RAW == '[') {
4783        ctxt->instate = XML_PARSER_DTD;
4784        NEXT;
4785       /*
4786        * Parse the succession of Markup declarations and
4787        * PEReferences.
4788        * Subsequence (markupdecl | PEReference | S)*
4789        */
4790       while (RAW != ']') {
4791           const xmlChar *check = CUR_PTR;
4792           int cons = ctxt->input->consumed;
4793
4794           SKIP_BLANKS;
4795           docbParseMarkupDecl(ctxt);
4796           xmlParsePEReference(ctxt);
4797
4798           /*
4799            * Pop-up of finished entities.
4800            */
4801           while ((RAW == 0) && (ctxt->inputNr > 1))
4802               xmlPopInput(ctxt);
4803
4804           if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
4805               ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4806               if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4807                   ctxt->sax->error(ctxt->userData,
4808            "docbParseInternalSubset: error detected in Markup declaration\n");
4809               ctxt->wellFormed = 0;
4810               ctxt->disableSAX = 1;
4811               break;
4812           }
4813       }
4814       if (RAW == ']') {
4815           NEXT;
4816           SKIP_BLANKS;
4817       }
4818    }
4819
4820    /*
4821     * We should be at the end of the DOCTYPE declaration.
4822     */
4823    if (RAW != '>') {
4824       ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
4825       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4826           ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
4827       ctxt->wellFormed = 0;
4828       ctxt->disableSAX = 1;
4829    }
4830    NEXT;
4831}
4832
4833/**
4834 * docbParseMisc:
4835 * @ctxt:  an XML parser context
4836 *
4837 * parse an XML Misc* optional field.
4838 *
4839 * [27] Misc ::= Comment | PI |  S
4840 */
4841
4842static void
4843docbParseMisc(xmlParserCtxtPtr ctxt) {
4844    while (((RAW == '<') && (NXT(1) == '?')) ||
4845           ((RAW == '<') && (NXT(1) == '!') &&
4846           (NXT(2) == '-') && (NXT(3) == '-')) ||
4847           IS_BLANK(CUR)) {
4848        if ((RAW == '<') && (NXT(1) == '?')) {
4849            docbParsePI(ctxt);
4850        } else if (IS_BLANK(CUR)) {
4851            NEXT;
4852        } else
4853            xmlParseComment(ctxt);
4854    }
4855}
4856
4857/**
4858 * docbParseDocument :
4859 * @ctxt:  an SGML parser context
4860 *
4861 * parse an SGML document (and build a tree if using the standard SAX
4862 * interface).
4863 *
4864 * Returns 0, -1 in case of error. the parser context is augmented
4865 *                as a result of the parsing.
4866 */
4867
4868int
4869docbParseDocument(docbParserCtxtPtr ctxt) {
4870    xmlChar start[4];
4871    xmlCharEncoding enc;
4872    xmlDtdPtr dtd;
4873
4874    docbDefaultSAXHandlerInit();
4875    ctxt->html = 2;
4876
4877    GROW;
4878    /*
4879     * SAX: beginning of the document processing.
4880     */
4881    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
4882        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
4883
4884    /*
4885     * Get the 4 first bytes and decode the charset
4886     * if enc != XML_CHAR_ENCODING_NONE
4887     * plug some encoding conversion routines.
4888     */
4889    start[0] = RAW;
4890    start[1] = NXT(1);
4891    start[2] = NXT(2);
4892    start[3] = NXT(3);
4893    enc = xmlDetectCharEncoding(start, 4);
4894    if (enc != XML_CHAR_ENCODING_NONE) {
4895        xmlSwitchEncoding(ctxt, enc);
4896    }
4897
4898    /*
4899     * Wipe out everything which is before the first '<'
4900     */
4901    SKIP_BLANKS;
4902    if (CUR == 0) {
4903       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4904           ctxt->sax->error(ctxt->userData, "Document is empty\n");
4905       ctxt->wellFormed = 0;
4906    }
4907
4908    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
4909       ctxt->sax->startDocument(ctxt->userData);
4910
4911
4912    /*
4913     * The Misc part of the Prolog
4914     */
4915    GROW;
4916    docbParseMisc(ctxt);
4917
4918    /*
4919     * Then possibly doc type declaration(s) and more Misc
4920     * (doctypedecl Misc*)?
4921     */
4922    GROW;
4923    if ((RAW == '<') && (NXT(1) == '!') &&
4924       (UPP(2) == 'D') && (UPP(3) == 'O') &&
4925       (UPP(4) == 'C') && (UPP(5) == 'T') &&
4926       (UPP(6) == 'Y') && (UPP(7) == 'P') &&
4927       (UPP(8) == 'E')) {
4928
4929       ctxt->inSubset = 1;
4930       docbParseDocTypeDecl(ctxt);
4931       if (RAW == '[') {
4932           ctxt->instate = XML_PARSER_DTD;
4933           docbParseInternalSubset(ctxt);
4934       }
4935
4936       /*
4937        * Create and update the external subset.
4938        */
4939       ctxt->inSubset = 2;
4940       if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
4941           (!ctxt->disableSAX))
4942           ctxt->sax->internalSubset(ctxt->userData, ctxt->intSubName,
4943                                     ctxt->extSubSystem, ctxt->extSubURI);
4944       ctxt->inSubset = 0;
4945
4946
4947       ctxt->instate = XML_PARSER_PROLOG;
4948       docbParseMisc(ctxt);
4949    }
4950
4951    /*
4952     * Time to start parsing the tree itself
4953     */
4954    docbParseContent(ctxt);
4955
4956    /*
4957     * autoclose
4958     */
4959    if (CUR == 0)
4960       docbAutoClose(ctxt, NULL);
4961
4962
4963    /*
4964     * SAX: end of the document processing.
4965     */
4966    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4967        ctxt->sax->endDocument(ctxt->userData);
4968
4969    if (ctxt->myDoc != NULL) {
4970       dtd = ctxt->myDoc->intSubset;
4971       ctxt->myDoc->standalone = -1;
4972       if (dtd == NULL)
4973           ctxt->myDoc->intSubset =
4974               xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML",
4975                   BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
4976                   BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
4977    }
4978    if (! ctxt->wellFormed) return(-1);
4979    return(0);
4980}
4981
4982
4983/************************************************************************
4984 *                                                                     *
4985 *                     Parser contexts handling                        *
4986 *                                                                     *
4987 ************************************************************************/
4988
4989/**
4990 * docbInitParserCtxt:
4991 * @ctxt:  an SGML parser context
4992 *
4993 * Initialize a parser context
4994 */
4995
4996static void
4997docbInitParserCtxt(docbParserCtxtPtr ctxt)
4998{
4999    docbSAXHandler *sax;
5000
5001    if (ctxt == NULL) return;
5002    memset(ctxt, 0, sizeof(docbParserCtxt));
5003
5004    sax = (docbSAXHandler *) xmlMalloc(sizeof(docbSAXHandler));
5005    if (sax == NULL) {
5006        xmlGenericError(xmlGenericErrorContext,
5007               "docbInitParserCtxt: out of memory\n");
5008    }
5009    memset(sax, 0, sizeof(docbSAXHandler));
5010
5011    /* Allocate the Input stack */
5012    ctxt->inputTab = (docbParserInputPtr *)
5013                      xmlMalloc(5 * sizeof(docbParserInputPtr));
5014    if (ctxt->inputTab == NULL) {
5015        xmlGenericError(xmlGenericErrorContext,
5016               "docbInitParserCtxt: out of memory\n");
5017    }
5018    ctxt->inputNr = 0;
5019    ctxt->inputMax = 5;
5020    ctxt->input = NULL;
5021    ctxt->version = NULL;
5022    ctxt->encoding = NULL;
5023    ctxt->standalone = -1;
5024    ctxt->instate = XML_PARSER_START;
5025
5026    /* Allocate the Node stack */
5027    ctxt->nodeTab = (docbNodePtr *) xmlMalloc(10 * sizeof(docbNodePtr));
5028    ctxt->nodeNr = 0;
5029    ctxt->nodeMax = 10;
5030    ctxt->node = NULL;
5031
5032    /* Allocate the Name stack */
5033    ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
5034    ctxt->nameNr = 0;
5035    ctxt->nameMax = 10;
5036    ctxt->name = NULL;
5037
5038    if (sax == NULL) ctxt->sax = &docbDefaultSAXHandler;
5039    else {
5040        ctxt->sax = sax;
5041       memcpy(sax, &docbDefaultSAXHandler, sizeof(docbSAXHandler));
5042    }
5043    ctxt->userData = ctxt;
5044    ctxt->myDoc = NULL;
5045    ctxt->wellFormed = 1;
5046    ctxt->linenumbers = xmlLineNumbersDefaultValue;
5047    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
5048    ctxt->html = 2;
5049    ctxt->record_info = 0;
5050    ctxt->validate = 0;
5051    ctxt->nbChars = 0;
5052    ctxt->checkIndex = 0;
5053    xmlInitNodeInfoSeq(&ctxt->node_seq);
5054}
5055
5056/**
5057 * docbFreeParserCtxt:
5058 * @ctxt:  an SGML parser context
5059 *
5060 * Free all the memory used by a parser context. However the parsed
5061 * document in ctxt->myDoc is not freed.
5062 */
5063
5064void
5065docbFreeParserCtxt(docbParserCtxtPtr ctxt)
5066{
5067    xmlFreeParserCtxt(ctxt);
5068}
5069
5070/**
5071 * docbCreateDocParserCtxt :
5072 * @cur:  a pointer to an array of xmlChar
5073 * @encoding: the SGML document encoding, or NULL
5074 *
5075 * Create a parser context for an SGML document.
5076 *
5077 * Returns the new parser context or NULL
5078 */
5079static docbParserCtxtPtr
5080docbCreateDocParserCtxt(xmlChar *cur, const char *encoding ATTRIBUTE_UNUSED) {
5081    docbParserCtxtPtr ctxt;
5082    docbParserInputPtr input;
5083    /* sgmlCharEncoding enc; */
5084
5085    ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt));
5086    if (ctxt == NULL) {
5087        perror("malloc");
5088       return(NULL);
5089    }
5090    docbInitParserCtxt(ctxt);
5091    input = (docbParserInputPtr) xmlMalloc(sizeof(docbParserInput));
5092    if (input == NULL) {
5093        perror("malloc");
5094       xmlFree(ctxt);
5095       return(NULL);
5096    }
5097    memset(input, 0, sizeof(docbParserInput));
5098
5099    input->line = 1;
5100    input->col = 1;
5101    input->base = cur;
5102    input->cur = cur;
5103
5104    inputPush(ctxt, input);
5105    return(ctxt);
5106}
5107
5108/************************************************************************
5109 *                                                                     *
5110 *             Progressive parsing interfaces                          *
5111 *                                                                     *
5112 ************************************************************************/
5113
5114/**
5115 * docbParseLookupSequence:
5116 * @ctxt:  an SGML parser context
5117 * @first:  the first char to lookup
5118 * @next:  the next char to lookup or zero
5119 * @third:  the next char to lookup or zero
5120 *
5121 * Try to find if a sequence (first, next, third) or  just (first next) or
5122 * (first) is available in the input stream.
5123 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
5124 * to avoid rescanning sequences of bytes, it DOES change the state of the
5125 * parser, do not use liberally.
5126 * This is basically similar to xmlParseLookupSequence()
5127 *
5128 * Returns the index to the current parsing point if the full sequence
5129 *      is available, -1 otherwise.
5130 */
5131static int
5132docbParseLookupSequence(docbParserCtxtPtr ctxt, xmlChar first,
5133                       xmlChar next, xmlChar third) {
5134    int base, len;
5135    docbParserInputPtr in;
5136    const xmlChar *buf;
5137
5138    in = ctxt->input;
5139    if (in == NULL) return(-1);
5140    base = in->cur - in->base;
5141    if (base < 0) return(-1);
5142    if (ctxt->checkIndex > base)
5143        base = ctxt->checkIndex;
5144    if (in->buf == NULL) {
5145       buf = in->base;
5146       len = in->length;
5147    } else {
5148       buf = in->buf->buffer->content;
5149       len = in->buf->buffer->use;
5150    }
5151    /* take into account the sequence length */
5152    if (third) len -= 2;
5153    else if (next) len --;
5154    for (;base < len;base++) {
5155        if (buf[base] == first) {
5156           if (third != 0) {
5157               if ((buf[base + 1] != next) ||
5158                   (buf[base + 2] != third)) continue;
5159           } else if (next != 0) {
5160               if (buf[base + 1] != next) continue;
5161           }
5162           ctxt->checkIndex = 0;
5163#ifdef DEBUG_PUSH
5164           if (next == 0)
5165               xmlGenericError(xmlGenericErrorContext,
5166                       "HPP: lookup '%c' found at %d\n",
5167                       first, base);
5168           else if (third == 0)
5169               xmlGenericError(xmlGenericErrorContext,
5170                       "HPP: lookup '%c%c' found at %d\n",
5171                       first, next, base);
5172           else
5173               xmlGenericError(xmlGenericErrorContext,
5174                       "HPP: lookup '%c%c%c' found at %d\n",
5175                       first, next, third, base);
5176#endif
5177           return(base - (in->cur - in->base));
5178       }
5179    }
5180    ctxt->checkIndex = base;
5181#ifdef DEBUG_PUSH
5182    if (next == 0)
5183       xmlGenericError(xmlGenericErrorContext,
5184               "HPP: lookup '%c' failed\n", first);
5185    else if (third == 0)
5186       xmlGenericError(xmlGenericErrorContext,
5187               "HPP: lookup '%c%c' failed\n", first, next);
5188    else
5189       xmlGenericError(xmlGenericErrorContext,
5190               "HPP: lookup '%c%c%c' failed\n", first, next, third);
5191#endif
5192    return(-1);
5193}
5194
5195/**
5196 * docbParseTryOrFinish:
5197 * @ctxt:  an SGML parser context
5198 * @terminate:  last chunk indicator
5199 *
5200 * Try to progress on parsing
5201 *
5202 * Returns zero if no parsing was possible
5203 */
5204static int
5205docbParseTryOrFinish(docbParserCtxtPtr ctxt, int terminate) {
5206    int ret = 0;
5207    docbParserInputPtr in;
5208    int avail = 0;
5209    xmlChar cur, next;
5210
5211#ifdef DEBUG_PUSH
5212    switch (ctxt->instate) {
5213       case XML_PARSER_EOF:
5214           xmlGenericError(xmlGenericErrorContext,
5215                   "HPP: try EOF\n"); break;
5216       case XML_PARSER_START:
5217           xmlGenericError(xmlGenericErrorContext,
5218                   "HPP: try START\n"); break;
5219       case XML_PARSER_MISC:
5220           xmlGenericError(xmlGenericErrorContext,
5221                   "HPP: try MISC\n");break;
5222       case XML_PARSER_COMMENT:
5223           xmlGenericError(xmlGenericErrorContext,
5224                   "HPP: try COMMENT\n");break;
5225       case XML_PARSER_PROLOG:
5226           xmlGenericError(xmlGenericErrorContext,
5227                   "HPP: try PROLOG\n");break;
5228       case XML_PARSER_START_TAG:
5229           xmlGenericError(xmlGenericErrorContext,
5230                   "HPP: try START_TAG\n");break;
5231       case XML_PARSER_CONTENT:
5232           xmlGenericError(xmlGenericErrorContext,
5233                   "HPP: try CONTENT\n");break;
5234       case XML_PARSER_CDATA_SECTION:
5235           xmlGenericError(xmlGenericErrorContext,
5236                   "HPP: try CDATA_SECTION\n");break;
5237       case XML_PARSER_END_TAG:
5238           xmlGenericError(xmlGenericErrorContext,
5239                   "HPP: try END_TAG\n");break;
5240       case XML_PARSER_ENTITY_DECL:
5241           xmlGenericError(xmlGenericErrorContext,
5242                   "HPP: try ENTITY_DECL\n");break;
5243       case XML_PARSER_ENTITY_VALUE:
5244           xmlGenericError(xmlGenericErrorContext,
5245                   "HPP: try ENTITY_VALUE\n");break;
5246       case XML_PARSER_ATTRIBUTE_VALUE:
5247           xmlGenericError(xmlGenericErrorContext,
5248                   "HPP: try ATTRIBUTE_VALUE\n");break;
5249       case XML_PARSER_DTD:
5250           xmlGenericError(xmlGenericErrorContext,
5251                   "HPP: try DTD\n");break;
5252       case XML_PARSER_EPILOG:
5253           xmlGenericError(xmlGenericErrorContext,
5254                   "HPP: try EPILOG\n");break;
5255       case XML_PARSER_PI:
5256           xmlGenericError(xmlGenericErrorContext,
5257                   "HPP: try PI\n");break;
5258    }
5259#endif
5260
5261    while (1) {
5262
5263       in = ctxt->input;
5264       if (in == NULL) break;
5265       if (in->buf == NULL)
5266           avail = in->length - (in->cur - in->base);
5267       else
5268           avail = in->buf->buffer->use - (in->cur - in->base);
5269       if ((avail == 0) && (terminate)) {
5270           docbAutoClose(ctxt, NULL);
5271           if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
5272               /*
5273                * SAX: end of the document processing.
5274                */
5275               ctxt->instate = XML_PARSER_EOF;
5276               if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5277                   ctxt->sax->endDocument(ctxt->userData);
5278           }
5279       }
5280        if (avail < 1)
5281           goto done;
5282        switch (ctxt->instate) {
5283            case XML_PARSER_EOF:
5284               /*
5285                * Document parsing is done !
5286                */
5287               goto done;
5288            case XML_PARSER_START:
5289               /*
5290                * Very first chars read from the document flow.
5291                */
5292               cur = in->cur[0];
5293               if (IS_BLANK(cur)) {
5294                   SKIP_BLANKS;
5295                   if (in->buf == NULL)
5296                       avail = in->length - (in->cur - in->base);
5297                   else
5298                       avail = in->buf->buffer->use - (in->cur - in->base);
5299               }
5300               if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
5301                   ctxt->sax->setDocumentLocator(ctxt->userData,
5302                                                 &xmlDefaultSAXLocator);
5303               if ((ctxt->sax) && (ctxt->sax->startDocument) &&
5304                   (!ctxt->disableSAX))
5305                   ctxt->sax->startDocument(ctxt->userData);
5306
5307               cur = in->cur[0];
5308               next = in->cur[1];
5309               if ((cur == '<') && (next == '!') &&
5310                   (UPP(2) == 'D') && (UPP(3) == 'O') &&
5311                   (UPP(4) == 'C') && (UPP(5) == 'T') &&
5312                   (UPP(6) == 'Y') && (UPP(7) == 'P') &&
5313                   (UPP(8) == 'E')) {
5314                   if ((!terminate) &&
5315                       (docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
5316                       goto done;
5317#ifdef DEBUG_PUSH
5318                   xmlGenericError(xmlGenericErrorContext,
5319                           "HPP: Parsing internal subset\n");
5320#endif
5321                   docbParseDocTypeDecl(ctxt);
5322                   ctxt->instate = XML_PARSER_PROLOG;
5323#ifdef DEBUG_PUSH
5324                   xmlGenericError(xmlGenericErrorContext,
5325                           "HPP: entering PROLOG\n");
5326#endif
5327                } else {
5328                   ctxt->instate = XML_PARSER_MISC;
5329               }
5330#ifdef DEBUG_PUSH
5331               xmlGenericError(xmlGenericErrorContext,
5332                       "HPP: entering MISC\n");
5333#endif
5334               break;
5335            case XML_PARSER_MISC:
5336               SKIP_BLANKS;
5337               if (in->buf == NULL)
5338                   avail = in->length - (in->cur - in->base);
5339               else
5340                   avail = in->buf->buffer->use - (in->cur - in->base);
5341               if (avail < 2)
5342                   goto done;
5343               cur = in->cur[0];
5344               next = in->cur[1];
5345               if ((cur == '<') && (next == '!') &&
5346                   (in->cur[2] == '-') && (in->cur[3] == '-')) {
5347                   if ((!terminate) &&
5348                       (docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
5349                       goto done;
5350#ifdef DEBUG_PUSH
5351                   xmlGenericError(xmlGenericErrorContext,
5352                           "HPP: Parsing Comment\n");
5353#endif
5354                   docbParseComment(ctxt);
5355                   ctxt->instate = XML_PARSER_MISC;
5356               } else if ((cur == '<') && (next == '!') &&
5357                   (UPP(2) == 'D') && (UPP(3) == 'O') &&
5358                   (UPP(4) == 'C') && (UPP(5) == 'T') &&
5359                   (UPP(6) == 'Y') && (UPP(7) == 'P') &&
5360                   (UPP(8) == 'E')) {
5361                   if ((!terminate) &&
5362                       (docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
5363                       goto done;
5364#ifdef DEBUG_PUSH
5365                   xmlGenericError(xmlGenericErrorContext,
5366                           "HPP: Parsing internal subset\n");
5367#endif
5368                   docbParseDocTypeDecl(ctxt);
5369                   ctxt->instate = XML_PARSER_PROLOG;
5370#ifdef DEBUG_PUSH
5371                   xmlGenericError(xmlGenericErrorContext,
5372                           "HPP: entering PROLOG\n");
5373#endif
5374               } else if ((cur == '<') && (next == '!') &&
5375                          (avail < 9)) {
5376                   goto done;
5377               } else {
5378                   ctxt->instate = XML_PARSER_START_TAG;
5379#ifdef DEBUG_PUSH
5380                   xmlGenericError(xmlGenericErrorContext,
5381                           "HPP: entering START_TAG\n");
5382#endif
5383               }
5384               break;
5385            case XML_PARSER_PROLOG:
5386               SKIP_BLANKS;
5387               if (in->buf == NULL)
5388                   avail = in->length - (in->cur - in->base);
5389               else
5390                   avail = in->buf->buffer->use - (in->cur - in->base);
5391               if (avail < 2)
5392                   goto done;
5393               cur = in->cur[0];
5394               next = in->cur[1];
5395               if ((cur == '<') && (next == '!') &&
5396                   (in->cur[2] == '-') && (in->cur[3] == '-')) {
5397                   if ((!terminate) &&
5398                       (docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
5399                       goto done;
5400#ifdef DEBUG_PUSH
5401                   xmlGenericError(xmlGenericErrorContext,
5402                           "HPP: Parsing Comment\n");
5403#endif
5404                   docbParseComment(ctxt);
5405                   ctxt->instate = XML_PARSER_PROLOG;
5406               } else if ((cur == '<') && (next == '!') &&
5407                          (avail < 4)) {
5408                   goto done;
5409               } else {
5410                   ctxt->instate = XML_PARSER_START_TAG;
5411#ifdef DEBUG_PUSH
5412                   xmlGenericError(xmlGenericErrorContext,
5413                           "HPP: entering START_TAG\n");
5414#endif
5415               }
5416               break;
5417            case XML_PARSER_EPILOG:
5418               if (in->buf == NULL)
5419                   avail = in->length - (in->cur - in->base);
5420               else
5421                   avail = in->buf->buffer->use - (in->cur - in->base);
5422               if (avail < 1)
5423                   goto done;
5424               cur = in->cur[0];
5425               if (IS_BLANK(cur)) {
5426                   docbParseCharData(ctxt);
5427                   goto done;
5428               }
5429               if (avail < 2)
5430                   goto done;
5431               next = in->cur[1];
5432               if ((cur == '<') && (next == '!') &&
5433                   (in->cur[2] == '-') && (in->cur[3] == '-')) {
5434                   if ((!terminate) &&
5435                       (docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
5436                       goto done;
5437#ifdef DEBUG_PUSH
5438                   xmlGenericError(xmlGenericErrorContext,
5439                           "HPP: Parsing Comment\n");
5440#endif
5441                   docbParseComment(ctxt);
5442                   ctxt->instate = XML_PARSER_EPILOG;
5443               } else if ((cur == '<') && (next == '!') &&
5444                          (avail < 4)) {
5445                   goto done;
5446               } else {
5447                   ctxt->errNo = XML_ERR_DOCUMENT_END;
5448                   if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5449                       ctxt->sax->error(ctxt->userData,
5450                           "Extra content at the end of the document\n");
5451                   ctxt->wellFormed = 0;
5452                   ctxt->instate = XML_PARSER_EOF;
5453#ifdef DEBUG_PUSH
5454                   xmlGenericError(xmlGenericErrorContext,
5455                           "HPP: entering EOF\n");
5456#endif
5457                   if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5458                       ctxt->sax->endDocument(ctxt->userData);
5459                   goto done;
5460               }
5461               break;
5462            case XML_PARSER_START_TAG: {
5463               xmlChar *name, *oldname;
5464               int depth = ctxt->nameNr;
5465               docbElemDescPtr info;
5466
5467               if (avail < 2)
5468                   goto done;
5469               cur = in->cur[0];
5470               if (cur != '<') {
5471                   ctxt->instate = XML_PARSER_CONTENT;
5472#ifdef DEBUG_PUSH
5473                   xmlGenericError(xmlGenericErrorContext,
5474                           "HPP: entering CONTENT\n");
5475#endif
5476                   break;
5477               }
5478               if ((!terminate) &&
5479                   (docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
5480                   goto done;
5481
5482               oldname = xmlStrdup(ctxt->name);
5483               docbParseStartTag(ctxt);
5484               name = ctxt->name;
5485#ifdef DEBUG
5486               if (oldname == NULL)
5487                   xmlGenericError(xmlGenericErrorContext,
5488                           "Start of element %s\n", name);
5489               else if (name == NULL)
5490                   xmlGenericError(xmlGenericErrorContext,
5491                           "Start of element failed, was %s\n",
5492                           oldname);
5493               else
5494                   xmlGenericError(xmlGenericErrorContext,
5495                           "Start of element %s, was %s\n",
5496                           name, oldname);
5497#endif
5498               if (((depth == ctxt->nameNr) &&
5499                    (xmlStrEqual(oldname, ctxt->name))) ||
5500                   (name == NULL)) {
5501                   if (CUR == '>')
5502                       NEXT;
5503                   if (oldname != NULL)
5504                       xmlFree(oldname);
5505                   break;
5506               }
5507               if (oldname != NULL)
5508                   xmlFree(oldname);
5509
5510               /*
5511                * Lookup the info for that element.
5512                */
5513               info = docbTagLookup(name);
5514               if (info == NULL) {
5515                   if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5516                       ctxt->sax->error(ctxt->userData, "Tag %s unknown\n",
5517                                        name);
5518                   ctxt->wellFormed = 0;
5519               } else if (info->depr) {
5520                   /***************************
5521                   if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5522                       ctxt->sax->warning(ctxt->userData,
5523                                          "Tag %s is deprecated\n",
5524                                          name);
5525                    ***************************/
5526               }
5527
5528               /*
5529                * Check for an Empty Element labeled the XML/SGML way
5530                */
5531               if ((CUR == '/') && (NXT(1) == '>')) {
5532                   SKIP(2);
5533                   if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
5534                       ctxt->sax->endElement(ctxt->userData, name);
5535                   oldname = docbnamePop(ctxt);
5536#ifdef DEBUG
5537                   xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n",
5538                           oldname);
5539#endif
5540                   if (oldname != NULL)
5541                       xmlFree(oldname);
5542                   ctxt->instate = XML_PARSER_CONTENT;
5543#ifdef DEBUG_PUSH
5544                   xmlGenericError(xmlGenericErrorContext,
5545                           "HPP: entering CONTENT\n");
5546#endif
5547                   break;
5548               }
5549
5550               if (CUR == '>') {
5551                   NEXT;
5552               } else {
5553                   if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5554                       ctxt->sax->error(ctxt->userData,
5555                                        "Couldn't find end of Start Tag %s\n",
5556                                        name);
5557                   ctxt->wellFormed = 0;
5558
5559                   /*
5560                    * end of parsing of this node.
5561                    */
5562                   if (xmlStrEqual(name, ctxt->name)) {
5563                       nodePop(ctxt);
5564                       oldname = docbnamePop(ctxt);
5565#ifdef DEBUG
5566                       xmlGenericError(xmlGenericErrorContext,
5567                        "End of start tag problem: popping out %s\n", oldname);
5568#endif
5569                       if (oldname != NULL)
5570                           xmlFree(oldname);
5571                   }
5572
5573                   ctxt->instate = XML_PARSER_CONTENT;
5574#ifdef DEBUG_PUSH
5575                   xmlGenericError(xmlGenericErrorContext,
5576                           "HPP: entering CONTENT\n");
5577#endif
5578                   break;
5579               }
5580
5581               /*
5582                * Check for an Empty Element from DTD definition
5583                */
5584               if ((info != NULL) && (info->empty)) {
5585                   if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
5586                       ctxt->sax->endElement(ctxt->userData, name);
5587                   oldname = docbnamePop(ctxt);
5588#ifdef DEBUG
5589                   xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname);
5590#endif
5591                   if (oldname != NULL)
5592                       xmlFree(oldname);
5593               }
5594               ctxt->instate = XML_PARSER_CONTENT;
5595#ifdef DEBUG_PUSH
5596               xmlGenericError(xmlGenericErrorContext,
5597                       "HPP: entering CONTENT\n");
5598#endif
5599                break;
5600           }
5601            case XML_PARSER_CONTENT: {
5602               long cons;
5603                /*
5604                * Handle preparsed entities and charRef
5605                */
5606               if (ctxt->token != 0) {
5607                   xmlChar chr[2] = { 0 , 0 } ;
5608
5609                   chr[0] = (xmlChar) ctxt->token;
5610                   docbCheckParagraph(ctxt);
5611                   if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5612                       ctxt->sax->characters(ctxt->userData, chr, 1);
5613                   ctxt->token = 0;
5614                   ctxt->checkIndex = 0;
5615               }
5616               if ((avail == 1) && (terminate)) {
5617                   cur = in->cur[0];
5618                   if ((cur != '<') && (cur != '&')) {
5619                       if (ctxt->sax != NULL) {
5620                           if (IS_BLANK(cur)) {
5621                               if (ctxt->sax->ignorableWhitespace != NULL)
5622                                   ctxt->sax->ignorableWhitespace(
5623                                           ctxt->userData, &cur, 1);
5624                           } else {
5625                               docbCheckParagraph(ctxt);
5626                               if (ctxt->sax->characters != NULL)
5627                                   ctxt->sax->characters(
5628                                           ctxt->userData, &cur, 1);
5629                           }
5630                       }
5631                       ctxt->token = 0;
5632                       ctxt->checkIndex = 0;
5633                       NEXT;
5634                   }
5635                   break;
5636               }
5637               if (avail < 2)
5638                   goto done;
5639               cur = in->cur[0];
5640               next = in->cur[1];
5641               cons = ctxt->nbChars;
5642               /*
5643                * Sometimes DOCTYPE arrives in the middle of the document
5644                */
5645               if ((cur == '<') && (next == '!') &&
5646                   (UPP(2) == 'D') && (UPP(3) == 'O') &&
5647                   (UPP(4) == 'C') && (UPP(5) == 'T') &&
5648                   (UPP(6) == 'Y') && (UPP(7) == 'P') &&
5649                   (UPP(8) == 'E')) {
5650                   if ((!terminate) &&
5651                       (docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
5652                       goto done;
5653                   if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5654                       ctxt->sax->error(ctxt->userData,
5655                            "Misplaced DOCTYPE declaration\n");
5656                   ctxt->wellFormed = 0;
5657                   docbParseDocTypeDecl(ctxt);
5658               } else if ((cur == '<') && (next == '!') &&
5659                   (in->cur[2] == '-') && (in->cur[3] == '-')) {
5660                   if ((!terminate) &&
5661                       (docbParseLookupSequence(ctxt, '-', '-', '>') < 0))
5662                       goto done;
5663#ifdef DEBUG_PUSH
5664                   xmlGenericError(xmlGenericErrorContext,
5665                           "HPP: Parsing Comment\n");
5666#endif
5667                   docbParseComment(ctxt);
5668                   ctxt->instate = XML_PARSER_CONTENT;
5669               } else if ((cur == '<') && (next == '!') && (avail < 4)) {
5670                   goto done;
5671               } else if ((cur == '<') && (next == '/')) {
5672                   ctxt->instate = XML_PARSER_END_TAG;
5673                   ctxt->checkIndex = 0;
5674#ifdef DEBUG_PUSH
5675                   xmlGenericError(xmlGenericErrorContext,
5676                           "HPP: entering END_TAG\n");
5677#endif
5678                   break;
5679               } else if (cur == '<') {
5680                   ctxt->instate = XML_PARSER_START_TAG;
5681                   ctxt->checkIndex = 0;
5682#ifdef DEBUG_PUSH
5683                   xmlGenericError(xmlGenericErrorContext,
5684                           "HPP: entering START_TAG\n");
5685#endif
5686                   break;
5687               } else if (cur == '&') {
5688                   if ((!terminate) &&
5689                       (docbParseLookupSequence(ctxt, ';', 0, 0) < 0))
5690                       goto done;
5691#ifdef DEBUG_PUSH
5692                   xmlGenericError(xmlGenericErrorContext,
5693                           "HPP: Parsing Reference\n");
5694#endif
5695                   /* TODO: check generation of subtrees if noent !!! */
5696                   docbParseReference(ctxt);
5697               } else {
5698                   /* TODO Avoid the extra copy, handle directly !!!!!! */
5699                   /*
5700                    * Goal of the following test is :
5701                    *  - minimize calls to the SAX 'character' callback
5702                    *    when they are mergeable
5703                    */
5704                   if ((ctxt->inputNr == 1) &&
5705                       (avail < DOCB_PARSER_BIG_BUFFER_SIZE)) {
5706                       if ((!terminate) &&
5707                           (docbParseLookupSequence(ctxt, '<', 0, 0) < 0))
5708                           goto done;
5709                    }
5710                   ctxt->checkIndex = 0;
5711#ifdef DEBUG_PUSH
5712                   xmlGenericError(xmlGenericErrorContext,
5713                           "HPP: Parsing char data\n");
5714#endif
5715                   docbParseCharData(ctxt);
5716               }
5717               if (cons == ctxt->nbChars) {
5718                   if (ctxt->node != NULL) {
5719                       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5720                           ctxt->sax->error(ctxt->userData,
5721                                "detected an error in element content\n");
5722                       ctxt->wellFormed = 0;
5723                       NEXT;
5724                   }
5725                   break;
5726               }
5727
5728               break;
5729           }
5730            case XML_PARSER_END_TAG:
5731               if (avail < 2)
5732                   goto done;
5733               if ((!terminate) &&
5734                   (docbParseLookupSequence(ctxt, '>', 0, 0) < 0))
5735                   goto done;
5736               docbParseEndTag(ctxt);
5737               if (ctxt->nameNr == 0) {
5738                   ctxt->instate = XML_PARSER_EPILOG;
5739               } else {
5740                   ctxt->instate = XML_PARSER_CONTENT;
5741               }
5742               ctxt->checkIndex = 0;
5743#ifdef DEBUG_PUSH
5744               xmlGenericError(xmlGenericErrorContext,
5745                       "HPP: entering CONTENT\n");
5746#endif
5747               break;
5748            case XML_PARSER_CDATA_SECTION:
5749               xmlGenericError(xmlGenericErrorContext,
5750                       "HPP: internal error, state == CDATA\n");
5751               ctxt->instate = XML_PARSER_CONTENT;
5752               ctxt->checkIndex = 0;
5753#ifdef DEBUG_PUSH
5754               xmlGenericError(xmlGenericErrorContext,
5755                       "HPP: entering CONTENT\n");
5756#endif
5757               break;
5758            case XML_PARSER_DTD:
5759               xmlGenericError(xmlGenericErrorContext,
5760                       "HPP: internal error, state == DTD\n");
5761               ctxt->instate = XML_PARSER_CONTENT;
5762               ctxt->checkIndex = 0;
5763#ifdef DEBUG_PUSH
5764               xmlGenericError(xmlGenericErrorContext,
5765                       "HPP: entering CONTENT\n");
5766#endif
5767               break;
5768            case XML_PARSER_COMMENT:
5769               xmlGenericError(xmlGenericErrorContext,
5770                       "HPP: internal error, state == COMMENT\n");
5771               ctxt->instate = XML_PARSER_CONTENT;
5772               ctxt->checkIndex = 0;
5773#ifdef DEBUG_PUSH
5774               xmlGenericError(xmlGenericErrorContext,
5775                       "HPP: entering CONTENT\n");
5776#endif
5777               break;
5778            case XML_PARSER_PI:
5779               xmlGenericError(xmlGenericErrorContext,
5780                       "HPP: internal error, state == PI\n");
5781               ctxt->instate = XML_PARSER_CONTENT;
5782               ctxt->checkIndex = 0;
5783#ifdef DEBUG_PUSH
5784               xmlGenericError(xmlGenericErrorContext,
5785                       "HPP: entering CONTENT\n");
5786#endif
5787               break;
5788            case XML_PARSER_ENTITY_DECL:
5789               xmlGenericError(xmlGenericErrorContext,
5790                       "HPP: internal error, state == ENTITY_DECL\n");
5791               ctxt->instate = XML_PARSER_CONTENT;
5792               ctxt->checkIndex = 0;
5793#ifdef DEBUG_PUSH
5794               xmlGenericError(xmlGenericErrorContext,
5795                       "HPP: entering CONTENT\n");
5796#endif
5797               break;
5798            case XML_PARSER_ENTITY_VALUE:
5799               xmlGenericError(xmlGenericErrorContext,
5800                       "HPP: internal error, state == ENTITY_VALUE\n");
5801               ctxt->instate = XML_PARSER_CONTENT;
5802               ctxt->checkIndex = 0;
5803#ifdef DEBUG_PUSH
5804               xmlGenericError(xmlGenericErrorContext,
5805                       "HPP: entering DTD\n");
5806#endif
5807               break;
5808            case XML_PARSER_ATTRIBUTE_VALUE:
5809               xmlGenericError(xmlGenericErrorContext,
5810                       "HPP: internal error, state == ATTRIBUTE_VALUE\n");
5811               ctxt->instate = XML_PARSER_START_TAG;
5812               ctxt->checkIndex = 0;
5813#ifdef DEBUG_PUSH
5814               xmlGenericError(xmlGenericErrorContext,
5815                       "HPP: entering START_TAG\n");
5816#endif
5817               break;
5818           case XML_PARSER_SYSTEM_LITERAL:
5819               xmlGenericError(xmlGenericErrorContext,
5820                       "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n");
5821               ctxt->instate = XML_PARSER_CONTENT;
5822               ctxt->checkIndex = 0;
5823#ifdef DEBUG_PUSH
5824               xmlGenericError(xmlGenericErrorContext,
5825                       "HPP: entering CONTENT\n");
5826#endif
5827               break;
5828
5829           case XML_PARSER_IGNORE:
5830               xmlGenericError(xmlGenericErrorContext,
5831                       "HPP: internal error, state == XML_PARSER_IGNORE\n");
5832               ctxt->instate = XML_PARSER_CONTENT;
5833               ctxt->checkIndex = 0;
5834#ifdef DEBUG_PUSH
5835               xmlGenericError(xmlGenericErrorContext,
5836                       "HPP: entering CONTENT\n");
5837#endif
5838               break;
5839       }
5840    }
5841done:
5842    if ((avail == 0) && (terminate)) {
5843       docbAutoClose(ctxt, NULL);
5844       if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
5845           /*
5846            * SAX: end of the document processing.
5847            */
5848           ctxt->instate = XML_PARSER_EOF;
5849           if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5850               ctxt->sax->endDocument(ctxt->userData);
5851       }
5852    }
5853    if ((ctxt->myDoc != NULL) &&
5854       ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
5855        (ctxt->instate == XML_PARSER_EPILOG))) {
5856       xmlDtdPtr dtd;
5857       dtd = ctxt->myDoc->intSubset;
5858       if (dtd == NULL)
5859           ctxt->myDoc->intSubset =
5860               xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML",
5861                   BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
5862                   BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
5863    }
5864#ifdef DEBUG_PUSH
5865    xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret);
5866#endif
5867    return(ret);
5868}
5869
5870/**
5871 * docbParseChunk:
5872 * @ctxt:  an XML parser context
5873 * @chunk:  an char array
5874 * @size:  the size in byte of the chunk
5875 * @terminate:  last chunk indicator
5876 *
5877 * Parse a Chunk of memory
5878 *
5879 * Returns zero if no error, the xmlParserErrors otherwise.
5880 */
5881int
5882docbParseChunk(docbParserCtxtPtr ctxt, const char *chunk, int size,
5883              int terminate) {
5884    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
5885        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
5886       int base = ctxt->input->base - ctxt->input->buf->buffer->content;
5887       int cur = ctxt->input->cur - ctxt->input->base;
5888
5889       xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
5890       ctxt->input->base = ctxt->input->buf->buffer->content + base;
5891       ctxt->input->cur = ctxt->input->base + cur;
5892#ifdef DEBUG_PUSH
5893       xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
5894#endif
5895
5896       if ((terminate) || (ctxt->input->buf->buffer->use > 80))
5897           docbParseTryOrFinish(ctxt, terminate);
5898    } else if (ctxt->instate != XML_PARSER_EOF) {
5899       xmlParserInputBufferPush(ctxt->input->buf, 0, "");
5900        docbParseTryOrFinish(ctxt, terminate);
5901    }
5902    if (terminate) {
5903       if ((ctxt->instate != XML_PARSER_EOF) &&
5904           (ctxt->instate != XML_PARSER_EPILOG) &&
5905           (ctxt->instate != XML_PARSER_MISC)) {
5906           ctxt->errNo = XML_ERR_DOCUMENT_END;
5907           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5908               ctxt->sax->error(ctxt->userData,
5909                   "Extra content at the end of the document\n");
5910           ctxt->wellFormed = 0;
5911       }
5912       if (ctxt->instate != XML_PARSER_EOF) {
5913           if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5914               ctxt->sax->endDocument(ctxt->userData);
5915       }
5916       ctxt->instate = XML_PARSER_EOF;
5917    }
5918    return((xmlParserErrors) ctxt->errNo);
5919}
5920
5921/************************************************************************
5922 *                                                                     *
5923 *                     User entry points                               *
5924 *                                                                     *
5925 ************************************************************************/
5926
5927/**
5928 * docbCreatePushParserCtxt :
5929 * @sax:  a SAX handler
5930 * @user_data:  The user data returned on SAX callbacks
5931 * @chunk:  a pointer to an array of chars
5932 * @size:  number of chars in the array
5933 * @filename:  an optional file name or URI
5934 * @enc:  an optional encoding
5935 *
5936 * Create a parser context for using the DocBook SGML parser in push mode
5937 * To allow content encoding detection, @size should be >= 4
5938 * The value of @filename is used for fetching external entities
5939 * and error/warning reports.
5940 *
5941 * Returns the new parser context or NULL
5942 */
5943docbParserCtxtPtr
5944docbCreatePushParserCtxt(docbSAXHandlerPtr sax, void *user_data,
5945                         const char *chunk, int size, const char *filename,
5946                        xmlCharEncoding enc) {
5947    docbParserCtxtPtr ctxt;
5948    docbParserInputPtr inputStream;
5949    xmlParserInputBufferPtr buf;
5950
5951    buf = xmlAllocParserInputBuffer(enc);
5952    if (buf == NULL) return(NULL);
5953
5954    ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt));
5955    if (ctxt == NULL) {
5956       xmlFree(buf);
5957       return(NULL);
5958    }
5959    memset(ctxt, 0, sizeof(docbParserCtxt));
5960    docbInitParserCtxt(ctxt);
5961    if (sax != NULL) {
5962       if (ctxt->sax != &docbDefaultSAXHandler)
5963           xmlFree(ctxt->sax);
5964       ctxt->sax = (docbSAXHandlerPtr) xmlMalloc(sizeof(docbSAXHandler));
5965       if (ctxt->sax == NULL) {
5966           xmlFree(buf);
5967           xmlFree(ctxt);
5968           return(NULL);
5969       }
5970       memcpy(ctxt->sax, sax, sizeof(docbSAXHandler));
5971       if (user_data != NULL)
5972           ctxt->userData = user_data;
5973    }
5974    if (filename == NULL) {
5975       ctxt->directory = NULL;
5976    } else {
5977        ctxt->directory = xmlParserGetDirectory(filename);
5978    }
5979
5980    inputStream = docbNewInputStream(ctxt);
5981    if (inputStream == NULL) {
5982       xmlFreeParserCtxt(ctxt);
5983       return(NULL);
5984    }
5985
5986    if (filename == NULL)
5987       inputStream->filename = NULL;
5988    else
5989       inputStream->filename = xmlMemStrdup(filename);
5990    inputStream->buf = buf;
5991    inputStream->base = inputStream->buf->buffer->content;
5992    inputStream->cur = inputStream->buf->buffer->content;
5993
5994    inputPush(ctxt, inputStream);
5995
5996    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
5997        (ctxt->input->buf != NULL))  {
5998       xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
5999#ifdef DEBUG_PUSH
6000       xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
6001#endif
6002    }
6003
6004    return(ctxt);
6005}
6006
6007/**
6008 * docbSAXParseDoc :
6009 * @cur:  a pointer to an array of xmlChar
6010 * @encoding:  a free form C string describing the SGML document encoding, or NULL
6011 * @sax:  the SAX handler block
6012 * @userData: if using SAX, this pointer will be provided on callbacks.
6013 *
6014 * parse an SGML in-memory document and build a tree.
6015 * It use the given SAX function block to handle the parsing callback.
6016 * If sax is NULL, fallback to the default DOM tree building routines.
6017 *
6018 * Returns the resulting document tree
6019 */
6020
6021docbDocPtr
6022docbSAXParseDoc(xmlChar *cur, const char *encoding, docbSAXHandlerPtr sax, void *userData) {
6023    docbDocPtr ret;
6024    docbParserCtxtPtr ctxt;
6025
6026    if (cur == NULL) return(NULL);
6027
6028
6029    ctxt = docbCreateDocParserCtxt(cur, encoding);
6030    if (ctxt == NULL) return(NULL);
6031    if (sax != NULL) {
6032        ctxt->sax = sax;
6033        ctxt->userData = userData;
6034    }
6035
6036    docbParseDocument(ctxt);
6037    ret = ctxt->myDoc;
6038    if (sax != NULL) {
6039       ctxt->sax = NULL;
6040       ctxt->userData = NULL;
6041    }
6042    docbFreeParserCtxt(ctxt);
6043
6044    return(ret);
6045}
6046
6047/**
6048 * docbParseDoc :
6049 * @cur:  a pointer to an array of xmlChar
6050 * @encoding:  a free form C string describing the SGML document encoding, or NULL
6051 *
6052 * parse an SGML in-memory document and build a tree.
6053 *
6054 * Returns the resulting document tree
6055 */
6056
6057docbDocPtr
6058docbParseDoc(xmlChar *cur, const char *encoding) {
6059    return(docbSAXParseDoc(cur, encoding, NULL, NULL));
6060}
6061
6062
6063/**
6064 * docbCreateFileParserCtxt :
6065 * @filename:  the filename
6066 * @encoding:  the SGML document encoding, or NULL
6067 *
6068 * Create a parser context for a file content.
6069 * Automatic support for ZLIB/Compress compressed document is provided
6070 * by default if found at compile-time.
6071 *
6072 * Returns the new parser context or NULL
6073 */
6074docbParserCtxtPtr
6075docbCreateFileParserCtxt(const char *filename,
6076	                 const char *encoding ATTRIBUTE_UNUSED)
6077{
6078    docbParserCtxtPtr ctxt;
6079    docbParserInputPtr inputStream;
6080    xmlParserInputBufferPtr buf;
6081    /* sgmlCharEncoding enc; */
6082
6083    buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
6084    if (buf == NULL) return(NULL);
6085
6086    ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt));
6087    if (ctxt == NULL) {
6088        perror("malloc");
6089       return(NULL);
6090    }
6091    memset(ctxt, 0, sizeof(docbParserCtxt));
6092    docbInitParserCtxt(ctxt);
6093    inputStream = (docbParserInputPtr) xmlMalloc(sizeof(docbParserInput));
6094    if (inputStream == NULL) {
6095        perror("malloc");
6096       xmlFree(ctxt);
6097       return(NULL);
6098    }
6099    memset(inputStream, 0, sizeof(docbParserInput));
6100
6101    inputStream->filename = xmlMemStrdup(filename);
6102    inputStream->line = 1;
6103    inputStream->col = 1;
6104    inputStream->buf = buf;
6105    inputStream->directory = NULL;
6106
6107    inputStream->base = inputStream->buf->buffer->content;
6108    inputStream->cur = inputStream->buf->buffer->content;
6109    inputStream->free = NULL;
6110
6111    inputPush(ctxt, inputStream);
6112    return(ctxt);
6113}
6114
6115/**
6116 * docbSAXParseFile :
6117 * @filename:  the filename
6118 * @encoding:  a free form C string describing the SGML document encoding, or NULL
6119 * @sax:  the SAX handler block
6120 * @userData: if using SAX, this pointer will be provided on callbacks.
6121 *
6122 * parse an SGML file and build a tree. Automatic support for ZLIB/Compress
6123 * compressed document is provided by default if found at compile-time.
6124 * It use the given SAX function block to handle the parsing callback.
6125 * If sax is NULL, fallback to the default DOM tree building routines.
6126 *
6127 * Returns the resulting document tree
6128 */
6129
6130docbDocPtr
6131docbSAXParseFile(const char *filename, const char *encoding, docbSAXHandlerPtr sax,
6132                 void *userData) {
6133    docbDocPtr ret;
6134    docbParserCtxtPtr ctxt;
6135    docbSAXHandlerPtr oldsax = NULL;
6136
6137    ctxt = docbCreateFileParserCtxt(filename, encoding);
6138    if (ctxt == NULL) return(NULL);
6139    if (sax != NULL) {
6140       oldsax = ctxt->sax;
6141        ctxt->sax = sax;
6142        ctxt->userData = userData;
6143    }
6144
6145    docbParseDocument(ctxt);
6146
6147    ret = ctxt->myDoc;
6148    if (sax != NULL) {
6149        ctxt->sax = oldsax;
6150        ctxt->userData = NULL;
6151    }
6152    docbFreeParserCtxt(ctxt);
6153
6154    return(ret);
6155}
6156
6157/**
6158 * docbParseFile :
6159 * @filename:  the filename
6160 * @encoding:  a free form C string describing document encoding, or NULL
6161 *
6162 * parse a Docbook SGML file and build a tree. Automatic support for
6163 * ZLIB/Compress compressed document is provided by default if found
6164 * at compile-time.
6165 *
6166 * Returns the resulting document tree
6167 */
6168
6169docbDocPtr
6170docbParseFile(const char *filename, const char *encoding) {
6171    return(docbSAXParseFile(filename, encoding, NULL, NULL));
6172}
6173
6174#endif /* LIBXML_DOCB_ENABLED */
6175