1/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2   See the file COPYING for copying permission.
3*/
4
5#include <stddef.h>
6
7#ifdef COMPILED_FROM_DSP
8#include "winconfig.h"
9#elif defined(MACOS_CLASSIC)
10#include "macconfig.h"
11#elif defined(__amigaos__)
12#include "amigaconfig.h"
13#elif defined(__WATCOMC__)
14#include "watcomconfig.h"
15#else
16#ifdef HAVE_EXPAT_CONFIG_H
17#include <expat_config.h>
18#endif
19#endif /* ndef COMPILED_FROM_DSP */
20
21#include "expat_external.h"
22#include "internal.h"
23#include "xmlrole.h"
24#include "ascii.h"
25
26/* Doesn't check:
27
28 that ,| are not mixed in a model group
29 content of literals
30
31*/
32
33static const char KW_ANY[] = {
34    ASCII_A, ASCII_N, ASCII_Y, '\0' };
35static const char KW_ATTLIST[] = {
36    ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
37static const char KW_CDATA[] = {
38    ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
39static const char KW_DOCTYPE[] = {
40    ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
41static const char KW_ELEMENT[] = {
42    ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
43static const char KW_EMPTY[] = {
44    ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
45static const char KW_ENTITIES[] = {
46    ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
47    '\0' };
48static const char KW_ENTITY[] = {
49    ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
50static const char KW_FIXED[] = {
51    ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
52static const char KW_ID[] = {
53    ASCII_I, ASCII_D, '\0' };
54static const char KW_IDREF[] = {
55    ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
56static const char KW_IDREFS[] = {
57    ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
58#ifdef XML_DTD
59static const char KW_IGNORE[] = {
60    ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
61#endif
62static const char KW_IMPLIED[] = {
63    ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
64#ifdef XML_DTD
65static const char KW_INCLUDE[] = {
66    ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
67#endif
68static const char KW_NDATA[] = {
69    ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
70static const char KW_NMTOKEN[] = {
71    ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
72static const char KW_NMTOKENS[] = {
73    ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
74    '\0' };
75static const char KW_NOTATION[] =
76    { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
77      '\0' };
78static const char KW_PCDATA[] = {
79    ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
80static const char KW_PUBLIC[] = {
81    ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
82static const char KW_REQUIRED[] = {
83    ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
84    '\0' };
85static const char KW_SYSTEM[] = {
86    ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
87
88#ifndef MIN_BYTES_PER_CHAR
89#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
90#endif
91
92#ifdef XML_DTD
93#define setTopLevel(state) \
94  ((state)->handler = ((state)->documentEntity \
95                       ? internalSubset \
96                       : externalSubset1))
97#else /* not XML_DTD */
98#define setTopLevel(state) ((state)->handler = internalSubset)
99#endif /* not XML_DTD */
100
101typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state,
102                                   int tok,
103                                   const char *ptr,
104                                   const char *end,
105                                   const ENCODING *enc);
106
107static PROLOG_HANDLER
108  prolog0, prolog1, prolog2,
109  doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
110  internalSubset,
111  entity0, entity1, entity2, entity3, entity4, entity5, entity6,
112  entity7, entity8, entity9, entity10,
113  notation0, notation1, notation2, notation3, notation4,
114  attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
115  attlist7, attlist8, attlist9,
116  element0, element1, element2, element3, element4, element5, element6,
117  element7,
118#ifdef XML_DTD
119  externalSubset0, externalSubset1,
120  condSect0, condSect1, condSect2,
121#endif /* XML_DTD */
122  declClose,
123  error;
124
125static int FASTCALL common(PROLOG_STATE *state, int tok);
126
127static int PTRCALL
128prolog0(PROLOG_STATE *state,
129        int tok,
130        const char *ptr,
131        const char *end,
132        const ENCODING *enc)
133{
134  switch (tok) {
135  case XML_TOK_PROLOG_S:
136    state->handler = prolog1;
137    return XML_ROLE_NONE;
138  case XML_TOK_XML_DECL:
139    state->handler = prolog1;
140    return XML_ROLE_XML_DECL;
141  case XML_TOK_PI:
142    state->handler = prolog1;
143    return XML_ROLE_PI;
144  case XML_TOK_COMMENT:
145    state->handler = prolog1;
146    return XML_ROLE_COMMENT;
147  case XML_TOK_BOM:
148    return XML_ROLE_NONE;
149  case XML_TOK_DECL_OPEN:
150    if (!XmlNameMatchesAscii(enc,
151                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
152                             end,
153                             KW_DOCTYPE))
154      break;
155    state->handler = doctype0;
156    return XML_ROLE_DOCTYPE_NONE;
157  case XML_TOK_INSTANCE_START:
158    state->handler = error;
159    return XML_ROLE_INSTANCE_START;
160  }
161  return common(state, tok);
162}
163
164static int PTRCALL
165prolog1(PROLOG_STATE *state,
166        int tok,
167        const char *ptr,
168        const char *end,
169        const ENCODING *enc)
170{
171  switch (tok) {
172  case XML_TOK_PROLOG_S:
173    return XML_ROLE_NONE;
174  case XML_TOK_PI:
175    return XML_ROLE_PI;
176  case XML_TOK_COMMENT:
177    return XML_ROLE_COMMENT;
178  case XML_TOK_BOM:
179    return XML_ROLE_NONE;
180  case XML_TOK_DECL_OPEN:
181    if (!XmlNameMatchesAscii(enc,
182                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
183                             end,
184                             KW_DOCTYPE))
185      break;
186    state->handler = doctype0;
187    return XML_ROLE_DOCTYPE_NONE;
188  case XML_TOK_INSTANCE_START:
189    state->handler = error;
190    return XML_ROLE_INSTANCE_START;
191  }
192  return common(state, tok);
193}
194
195static int PTRCALL
196prolog2(PROLOG_STATE *state,
197        int tok,
198        const char *ptr,
199        const char *end,
200        const ENCODING *enc)
201{
202  switch (tok) {
203  case XML_TOK_PROLOG_S:
204    return XML_ROLE_NONE;
205  case XML_TOK_PI:
206    return XML_ROLE_PI;
207  case XML_TOK_COMMENT:
208    return XML_ROLE_COMMENT;
209  case XML_TOK_INSTANCE_START:
210    state->handler = error;
211    return XML_ROLE_INSTANCE_START;
212  }
213  return common(state, tok);
214}
215
216static int PTRCALL
217doctype0(PROLOG_STATE *state,
218         int tok,
219         const char *ptr,
220         const char *end,
221         const ENCODING *enc)
222{
223  switch (tok) {
224  case XML_TOK_PROLOG_S:
225    return XML_ROLE_DOCTYPE_NONE;
226  case XML_TOK_NAME:
227  case XML_TOK_PREFIXED_NAME:
228    state->handler = doctype1;
229    return XML_ROLE_DOCTYPE_NAME;
230  }
231  return common(state, tok);
232}
233
234static int PTRCALL
235doctype1(PROLOG_STATE *state,
236         int tok,
237         const char *ptr,
238         const char *end,
239         const ENCODING *enc)
240{
241  switch (tok) {
242  case XML_TOK_PROLOG_S:
243    return XML_ROLE_DOCTYPE_NONE;
244  case XML_TOK_OPEN_BRACKET:
245    state->handler = internalSubset;
246    return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
247  case XML_TOK_DECL_CLOSE:
248    state->handler = prolog2;
249    return XML_ROLE_DOCTYPE_CLOSE;
250  case XML_TOK_NAME:
251    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
252      state->handler = doctype3;
253      return XML_ROLE_DOCTYPE_NONE;
254    }
255    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
256      state->handler = doctype2;
257      return XML_ROLE_DOCTYPE_NONE;
258    }
259    break;
260  }
261  return common(state, tok);
262}
263
264static int PTRCALL
265doctype2(PROLOG_STATE *state,
266         int tok,
267         const char *ptr,
268         const char *end,
269         const ENCODING *enc)
270{
271  switch (tok) {
272  case XML_TOK_PROLOG_S:
273    return XML_ROLE_DOCTYPE_NONE;
274  case XML_TOK_LITERAL:
275    state->handler = doctype3;
276    return XML_ROLE_DOCTYPE_PUBLIC_ID;
277  }
278  return common(state, tok);
279}
280
281static int PTRCALL
282doctype3(PROLOG_STATE *state,
283         int tok,
284         const char *ptr,
285         const char *end,
286         const ENCODING *enc)
287{
288  switch (tok) {
289  case XML_TOK_PROLOG_S:
290    return XML_ROLE_DOCTYPE_NONE;
291  case XML_TOK_LITERAL:
292    state->handler = doctype4;
293    return XML_ROLE_DOCTYPE_SYSTEM_ID;
294  }
295  return common(state, tok);
296}
297
298static int PTRCALL
299doctype4(PROLOG_STATE *state,
300         int tok,
301         const char *ptr,
302         const char *end,
303         const ENCODING *enc)
304{
305  switch (tok) {
306  case XML_TOK_PROLOG_S:
307    return XML_ROLE_DOCTYPE_NONE;
308  case XML_TOK_OPEN_BRACKET:
309    state->handler = internalSubset;
310    return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
311  case XML_TOK_DECL_CLOSE:
312    state->handler = prolog2;
313    return XML_ROLE_DOCTYPE_CLOSE;
314  }
315  return common(state, tok);
316}
317
318static int PTRCALL
319doctype5(PROLOG_STATE *state,
320         int tok,
321         const char *ptr,
322         const char *end,
323         const ENCODING *enc)
324{
325  switch (tok) {
326  case XML_TOK_PROLOG_S:
327    return XML_ROLE_DOCTYPE_NONE;
328  case XML_TOK_DECL_CLOSE:
329    state->handler = prolog2;
330    return XML_ROLE_DOCTYPE_CLOSE;
331  }
332  return common(state, tok);
333}
334
335static int PTRCALL
336internalSubset(PROLOG_STATE *state,
337               int tok,
338               const char *ptr,
339               const char *end,
340               const ENCODING *enc)
341{
342  switch (tok) {
343  case XML_TOK_PROLOG_S:
344    return XML_ROLE_NONE;
345  case XML_TOK_DECL_OPEN:
346    if (XmlNameMatchesAscii(enc,
347                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
348                            end,
349                            KW_ENTITY)) {
350      state->handler = entity0;
351      return XML_ROLE_ENTITY_NONE;
352    }
353    if (XmlNameMatchesAscii(enc,
354                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
355                            end,
356                            KW_ATTLIST)) {
357      state->handler = attlist0;
358      return XML_ROLE_ATTLIST_NONE;
359    }
360    if (XmlNameMatchesAscii(enc,
361                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
362                            end,
363                            KW_ELEMENT)) {
364      state->handler = element0;
365      return XML_ROLE_ELEMENT_NONE;
366    }
367    if (XmlNameMatchesAscii(enc,
368                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
369                            end,
370                            KW_NOTATION)) {
371      state->handler = notation0;
372      return XML_ROLE_NOTATION_NONE;
373    }
374    break;
375  case XML_TOK_PI:
376    return XML_ROLE_PI;
377  case XML_TOK_COMMENT:
378    return XML_ROLE_COMMENT;
379  case XML_TOK_PARAM_ENTITY_REF:
380    return XML_ROLE_PARAM_ENTITY_REF;
381  case XML_TOK_CLOSE_BRACKET:
382    state->handler = doctype5;
383    return XML_ROLE_DOCTYPE_NONE;
384  case XML_TOK_NONE:
385    return XML_ROLE_NONE;
386  }
387  return common(state, tok);
388}
389
390#ifdef XML_DTD
391
392static int PTRCALL
393externalSubset0(PROLOG_STATE *state,
394                int tok,
395                const char *ptr,
396                const char *end,
397                const ENCODING *enc)
398{
399  state->handler = externalSubset1;
400  if (tok == XML_TOK_XML_DECL)
401    return XML_ROLE_TEXT_DECL;
402  return externalSubset1(state, tok, ptr, end, enc);
403}
404
405static int PTRCALL
406externalSubset1(PROLOG_STATE *state,
407                int tok,
408                const char *ptr,
409                const char *end,
410                const ENCODING *enc)
411{
412  switch (tok) {
413  case XML_TOK_COND_SECT_OPEN:
414    state->handler = condSect0;
415    return XML_ROLE_NONE;
416  case XML_TOK_COND_SECT_CLOSE:
417    if (state->includeLevel == 0)
418      break;
419    state->includeLevel -= 1;
420    return XML_ROLE_NONE;
421  case XML_TOK_PROLOG_S:
422    return XML_ROLE_NONE;
423  case XML_TOK_CLOSE_BRACKET:
424    break;
425  case XML_TOK_NONE:
426    if (state->includeLevel)
427      break;
428    return XML_ROLE_NONE;
429  default:
430    return internalSubset(state, tok, ptr, end, enc);
431  }
432  return common(state, tok);
433}
434
435#endif /* XML_DTD */
436
437static int PTRCALL
438entity0(PROLOG_STATE *state,
439        int tok,
440        const char *ptr,
441        const char *end,
442        const ENCODING *enc)
443{
444  switch (tok) {
445  case XML_TOK_PROLOG_S:
446    return XML_ROLE_ENTITY_NONE;
447  case XML_TOK_PERCENT:
448    state->handler = entity1;
449    return XML_ROLE_ENTITY_NONE;
450  case XML_TOK_NAME:
451    state->handler = entity2;
452    return XML_ROLE_GENERAL_ENTITY_NAME;
453  }
454  return common(state, tok);
455}
456
457static int PTRCALL
458entity1(PROLOG_STATE *state,
459        int tok,
460        const char *ptr,
461        const char *end,
462        const ENCODING *enc)
463{
464  switch (tok) {
465  case XML_TOK_PROLOG_S:
466    return XML_ROLE_ENTITY_NONE;
467  case XML_TOK_NAME:
468    state->handler = entity7;
469    return XML_ROLE_PARAM_ENTITY_NAME;
470  }
471  return common(state, tok);
472}
473
474static int PTRCALL
475entity2(PROLOG_STATE *state,
476        int tok,
477        const char *ptr,
478        const char *end,
479        const ENCODING *enc)
480{
481  switch (tok) {
482  case XML_TOK_PROLOG_S:
483    return XML_ROLE_ENTITY_NONE;
484  case XML_TOK_NAME:
485    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
486      state->handler = entity4;
487      return XML_ROLE_ENTITY_NONE;
488    }
489    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
490      state->handler = entity3;
491      return XML_ROLE_ENTITY_NONE;
492    }
493    break;
494  case XML_TOK_LITERAL:
495    state->handler = declClose;
496    state->role_none = XML_ROLE_ENTITY_NONE;
497    return XML_ROLE_ENTITY_VALUE;
498  }
499  return common(state, tok);
500}
501
502static int PTRCALL
503entity3(PROLOG_STATE *state,
504        int tok,
505        const char *ptr,
506        const char *end,
507        const ENCODING *enc)
508{
509  switch (tok) {
510  case XML_TOK_PROLOG_S:
511    return XML_ROLE_ENTITY_NONE;
512  case XML_TOK_LITERAL:
513    state->handler = entity4;
514    return XML_ROLE_ENTITY_PUBLIC_ID;
515  }
516  return common(state, tok);
517}
518
519static int PTRCALL
520entity4(PROLOG_STATE *state,
521        int tok,
522        const char *ptr,
523        const char *end,
524        const ENCODING *enc)
525{
526  switch (tok) {
527  case XML_TOK_PROLOG_S:
528    return XML_ROLE_ENTITY_NONE;
529  case XML_TOK_LITERAL:
530    state->handler = entity5;
531    return XML_ROLE_ENTITY_SYSTEM_ID;
532  }
533  return common(state, tok);
534}
535
536static int PTRCALL
537entity5(PROLOG_STATE *state,
538        int tok,
539        const char *ptr,
540        const char *end,
541        const ENCODING *enc)
542{
543  switch (tok) {
544  case XML_TOK_PROLOG_S:
545    return XML_ROLE_ENTITY_NONE;
546  case XML_TOK_DECL_CLOSE:
547    setTopLevel(state);
548    return XML_ROLE_ENTITY_COMPLETE;
549  case XML_TOK_NAME:
550    if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
551      state->handler = entity6;
552      return XML_ROLE_ENTITY_NONE;
553    }
554    break;
555  }
556  return common(state, tok);
557}
558
559static int PTRCALL
560entity6(PROLOG_STATE *state,
561        int tok,
562        const char *ptr,
563        const char *end,
564        const ENCODING *enc)
565{
566  switch (tok) {
567  case XML_TOK_PROLOG_S:
568    return XML_ROLE_ENTITY_NONE;
569  case XML_TOK_NAME:
570    state->handler = declClose;
571    state->role_none = XML_ROLE_ENTITY_NONE;
572    return XML_ROLE_ENTITY_NOTATION_NAME;
573  }
574  return common(state, tok);
575}
576
577static int PTRCALL
578entity7(PROLOG_STATE *state,
579        int tok,
580        const char *ptr,
581        const char *end,
582        const ENCODING *enc)
583{
584  switch (tok) {
585  case XML_TOK_PROLOG_S:
586    return XML_ROLE_ENTITY_NONE;
587  case XML_TOK_NAME:
588    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
589      state->handler = entity9;
590      return XML_ROLE_ENTITY_NONE;
591    }
592    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
593      state->handler = entity8;
594      return XML_ROLE_ENTITY_NONE;
595    }
596    break;
597  case XML_TOK_LITERAL:
598    state->handler = declClose;
599    state->role_none = XML_ROLE_ENTITY_NONE;
600    return XML_ROLE_ENTITY_VALUE;
601  }
602  return common(state, tok);
603}
604
605static int PTRCALL
606entity8(PROLOG_STATE *state,
607        int tok,
608        const char *ptr,
609        const char *end,
610        const ENCODING *enc)
611{
612  switch (tok) {
613  case XML_TOK_PROLOG_S:
614    return XML_ROLE_ENTITY_NONE;
615  case XML_TOK_LITERAL:
616    state->handler = entity9;
617    return XML_ROLE_ENTITY_PUBLIC_ID;
618  }
619  return common(state, tok);
620}
621
622static int PTRCALL
623entity9(PROLOG_STATE *state,
624        int tok,
625        const char *ptr,
626        const char *end,
627        const ENCODING *enc)
628{
629  switch (tok) {
630  case XML_TOK_PROLOG_S:
631    return XML_ROLE_ENTITY_NONE;
632  case XML_TOK_LITERAL:
633    state->handler = entity10;
634    return XML_ROLE_ENTITY_SYSTEM_ID;
635  }
636  return common(state, tok);
637}
638
639static int PTRCALL
640entity10(PROLOG_STATE *state,
641         int tok,
642         const char *ptr,
643         const char *end,
644         const ENCODING *enc)
645{
646  switch (tok) {
647  case XML_TOK_PROLOG_S:
648    return XML_ROLE_ENTITY_NONE;
649  case XML_TOK_DECL_CLOSE:
650    setTopLevel(state);
651    return XML_ROLE_ENTITY_COMPLETE;
652  }
653  return common(state, tok);
654}
655
656static int PTRCALL
657notation0(PROLOG_STATE *state,
658          int tok,
659          const char *ptr,
660          const char *end,
661          const ENCODING *enc)
662{
663  switch (tok) {
664  case XML_TOK_PROLOG_S:
665    return XML_ROLE_NOTATION_NONE;
666  case XML_TOK_NAME:
667    state->handler = notation1;
668    return XML_ROLE_NOTATION_NAME;
669  }
670  return common(state, tok);
671}
672
673static int PTRCALL
674notation1(PROLOG_STATE *state,
675          int tok,
676          const char *ptr,
677          const char *end,
678          const ENCODING *enc)
679{
680  switch (tok) {
681  case XML_TOK_PROLOG_S:
682    return XML_ROLE_NOTATION_NONE;
683  case XML_TOK_NAME:
684    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
685      state->handler = notation3;
686      return XML_ROLE_NOTATION_NONE;
687    }
688    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
689      state->handler = notation2;
690      return XML_ROLE_NOTATION_NONE;
691    }
692    break;
693  }
694  return common(state, tok);
695}
696
697static int PTRCALL
698notation2(PROLOG_STATE *state,
699          int tok,
700          const char *ptr,
701          const char *end,
702          const ENCODING *enc)
703{
704  switch (tok) {
705  case XML_TOK_PROLOG_S:
706    return XML_ROLE_NOTATION_NONE;
707  case XML_TOK_LITERAL:
708    state->handler = notation4;
709    return XML_ROLE_NOTATION_PUBLIC_ID;
710  }
711  return common(state, tok);
712}
713
714static int PTRCALL
715notation3(PROLOG_STATE *state,
716          int tok,
717          const char *ptr,
718          const char *end,
719          const ENCODING *enc)
720{
721  switch (tok) {
722  case XML_TOK_PROLOG_S:
723    return XML_ROLE_NOTATION_NONE;
724  case XML_TOK_LITERAL:
725    state->handler = declClose;
726    state->role_none = XML_ROLE_NOTATION_NONE;
727    return XML_ROLE_NOTATION_SYSTEM_ID;
728  }
729  return common(state, tok);
730}
731
732static int PTRCALL
733notation4(PROLOG_STATE *state,
734          int tok,
735          const char *ptr,
736          const char *end,
737          const ENCODING *enc)
738{
739  switch (tok) {
740  case XML_TOK_PROLOG_S:
741    return XML_ROLE_NOTATION_NONE;
742  case XML_TOK_LITERAL:
743    state->handler = declClose;
744    state->role_none = XML_ROLE_NOTATION_NONE;
745    return XML_ROLE_NOTATION_SYSTEM_ID;
746  case XML_TOK_DECL_CLOSE:
747    setTopLevel(state);
748    return XML_ROLE_NOTATION_NO_SYSTEM_ID;
749  }
750  return common(state, tok);
751}
752
753static int PTRCALL
754attlist0(PROLOG_STATE *state,
755         int tok,
756         const char *ptr,
757         const char *end,
758         const ENCODING *enc)
759{
760  switch (tok) {
761  case XML_TOK_PROLOG_S:
762    return XML_ROLE_ATTLIST_NONE;
763  case XML_TOK_NAME:
764  case XML_TOK_PREFIXED_NAME:
765    state->handler = attlist1;
766    return XML_ROLE_ATTLIST_ELEMENT_NAME;
767  }
768  return common(state, tok);
769}
770
771static int PTRCALL
772attlist1(PROLOG_STATE *state,
773         int tok,
774         const char *ptr,
775         const char *end,
776         const ENCODING *enc)
777{
778  switch (tok) {
779  case XML_TOK_PROLOG_S:
780    return XML_ROLE_ATTLIST_NONE;
781  case XML_TOK_DECL_CLOSE:
782    setTopLevel(state);
783    return XML_ROLE_ATTLIST_NONE;
784  case XML_TOK_NAME:
785  case XML_TOK_PREFIXED_NAME:
786    state->handler = attlist2;
787    return XML_ROLE_ATTRIBUTE_NAME;
788  }
789  return common(state, tok);
790}
791
792static int PTRCALL
793attlist2(PROLOG_STATE *state,
794         int tok,
795         const char *ptr,
796         const char *end,
797         const ENCODING *enc)
798{
799  switch (tok) {
800  case XML_TOK_PROLOG_S:
801    return XML_ROLE_ATTLIST_NONE;
802  case XML_TOK_NAME:
803    {
804      static const char * const types[] = {
805        KW_CDATA,
806        KW_ID,
807        KW_IDREF,
808        KW_IDREFS,
809        KW_ENTITY,
810        KW_ENTITIES,
811        KW_NMTOKEN,
812        KW_NMTOKENS,
813      };
814      int i;
815      for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
816        if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
817          state->handler = attlist8;
818          return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
819        }
820    }
821    if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
822      state->handler = attlist5;
823      return XML_ROLE_ATTLIST_NONE;
824    }
825    break;
826  case XML_TOK_OPEN_PAREN:
827    state->handler = attlist3;
828    return XML_ROLE_ATTLIST_NONE;
829  }
830  return common(state, tok);
831}
832
833static int PTRCALL
834attlist3(PROLOG_STATE *state,
835         int tok,
836         const char *ptr,
837         const char *end,
838         const ENCODING *enc)
839{
840  switch (tok) {
841  case XML_TOK_PROLOG_S:
842    return XML_ROLE_ATTLIST_NONE;
843  case XML_TOK_NMTOKEN:
844  case XML_TOK_NAME:
845  case XML_TOK_PREFIXED_NAME:
846    state->handler = attlist4;
847    return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
848  }
849  return common(state, tok);
850}
851
852static int PTRCALL
853attlist4(PROLOG_STATE *state,
854         int tok,
855         const char *ptr,
856         const char *end,
857         const ENCODING *enc)
858{
859  switch (tok) {
860  case XML_TOK_PROLOG_S:
861    return XML_ROLE_ATTLIST_NONE;
862  case XML_TOK_CLOSE_PAREN:
863    state->handler = attlist8;
864    return XML_ROLE_ATTLIST_NONE;
865  case XML_TOK_OR:
866    state->handler = attlist3;
867    return XML_ROLE_ATTLIST_NONE;
868  }
869  return common(state, tok);
870}
871
872static int PTRCALL
873attlist5(PROLOG_STATE *state,
874         int tok,
875         const char *ptr,
876         const char *end,
877         const ENCODING *enc)
878{
879  switch (tok) {
880  case XML_TOK_PROLOG_S:
881    return XML_ROLE_ATTLIST_NONE;
882  case XML_TOK_OPEN_PAREN:
883    state->handler = attlist6;
884    return XML_ROLE_ATTLIST_NONE;
885  }
886  return common(state, tok);
887}
888
889static int PTRCALL
890attlist6(PROLOG_STATE *state,
891         int tok,
892         const char *ptr,
893         const char *end,
894         const ENCODING *enc)
895{
896  switch (tok) {
897  case XML_TOK_PROLOG_S:
898    return XML_ROLE_ATTLIST_NONE;
899  case XML_TOK_NAME:
900    state->handler = attlist7;
901    return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
902  }
903  return common(state, tok);
904}
905
906static int PTRCALL
907attlist7(PROLOG_STATE *state,
908         int tok,
909         const char *ptr,
910         const char *end,
911         const ENCODING *enc)
912{
913  switch (tok) {
914  case XML_TOK_PROLOG_S:
915    return XML_ROLE_ATTLIST_NONE;
916  case XML_TOK_CLOSE_PAREN:
917    state->handler = attlist8;
918    return XML_ROLE_ATTLIST_NONE;
919  case XML_TOK_OR:
920    state->handler = attlist6;
921    return XML_ROLE_ATTLIST_NONE;
922  }
923  return common(state, tok);
924}
925
926/* default value */
927static int PTRCALL
928attlist8(PROLOG_STATE *state,
929         int tok,
930         const char *ptr,
931         const char *end,
932         const ENCODING *enc)
933{
934  switch (tok) {
935  case XML_TOK_PROLOG_S:
936    return XML_ROLE_ATTLIST_NONE;
937  case XML_TOK_POUND_NAME:
938    if (XmlNameMatchesAscii(enc,
939                            ptr + MIN_BYTES_PER_CHAR(enc),
940                            end,
941                            KW_IMPLIED)) {
942      state->handler = attlist1;
943      return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
944    }
945    if (XmlNameMatchesAscii(enc,
946                            ptr + MIN_BYTES_PER_CHAR(enc),
947                            end,
948                            KW_REQUIRED)) {
949      state->handler = attlist1;
950      return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
951    }
952    if (XmlNameMatchesAscii(enc,
953                            ptr + MIN_BYTES_PER_CHAR(enc),
954                            end,
955                            KW_FIXED)) {
956      state->handler = attlist9;
957      return XML_ROLE_ATTLIST_NONE;
958    }
959    break;
960  case XML_TOK_LITERAL:
961    state->handler = attlist1;
962    return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
963  }
964  return common(state, tok);
965}
966
967static int PTRCALL
968attlist9(PROLOG_STATE *state,
969         int tok,
970         const char *ptr,
971         const char *end,
972         const ENCODING *enc)
973{
974  switch (tok) {
975  case XML_TOK_PROLOG_S:
976    return XML_ROLE_ATTLIST_NONE;
977  case XML_TOK_LITERAL:
978    state->handler = attlist1;
979    return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
980  }
981  return common(state, tok);
982}
983
984static int PTRCALL
985element0(PROLOG_STATE *state,
986         int tok,
987         const char *ptr,
988         const char *end,
989         const ENCODING *enc)
990{
991  switch (tok) {
992  case XML_TOK_PROLOG_S:
993    return XML_ROLE_ELEMENT_NONE;
994  case XML_TOK_NAME:
995  case XML_TOK_PREFIXED_NAME:
996    state->handler = element1;
997    return XML_ROLE_ELEMENT_NAME;
998  }
999  return common(state, tok);
1000}
1001
1002static int PTRCALL
1003element1(PROLOG_STATE *state,
1004         int tok,
1005         const char *ptr,
1006         const char *end,
1007         const ENCODING *enc)
1008{
1009  switch (tok) {
1010  case XML_TOK_PROLOG_S:
1011    return XML_ROLE_ELEMENT_NONE;
1012  case XML_TOK_NAME:
1013    if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
1014      state->handler = declClose;
1015      state->role_none = XML_ROLE_ELEMENT_NONE;
1016      return XML_ROLE_CONTENT_EMPTY;
1017    }
1018    if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
1019      state->handler = declClose;
1020      state->role_none = XML_ROLE_ELEMENT_NONE;
1021      return XML_ROLE_CONTENT_ANY;
1022    }
1023    break;
1024  case XML_TOK_OPEN_PAREN:
1025    state->handler = element2;
1026    state->level = 1;
1027    return XML_ROLE_GROUP_OPEN;
1028  }
1029  return common(state, tok);
1030}
1031
1032static int PTRCALL
1033element2(PROLOG_STATE *state,
1034         int tok,
1035         const char *ptr,
1036         const char *end,
1037         const ENCODING *enc)
1038{
1039  switch (tok) {
1040  case XML_TOK_PROLOG_S:
1041    return XML_ROLE_ELEMENT_NONE;
1042  case XML_TOK_POUND_NAME:
1043    if (XmlNameMatchesAscii(enc,
1044                            ptr + MIN_BYTES_PER_CHAR(enc),
1045                            end,
1046                            KW_PCDATA)) {
1047      state->handler = element3;
1048      return XML_ROLE_CONTENT_PCDATA;
1049    }
1050    break;
1051  case XML_TOK_OPEN_PAREN:
1052    state->level = 2;
1053    state->handler = element6;
1054    return XML_ROLE_GROUP_OPEN;
1055  case XML_TOK_NAME:
1056  case XML_TOK_PREFIXED_NAME:
1057    state->handler = element7;
1058    return XML_ROLE_CONTENT_ELEMENT;
1059  case XML_TOK_NAME_QUESTION:
1060    state->handler = element7;
1061    return XML_ROLE_CONTENT_ELEMENT_OPT;
1062  case XML_TOK_NAME_ASTERISK:
1063    state->handler = element7;
1064    return XML_ROLE_CONTENT_ELEMENT_REP;
1065  case XML_TOK_NAME_PLUS:
1066    state->handler = element7;
1067    return XML_ROLE_CONTENT_ELEMENT_PLUS;
1068  }
1069  return common(state, tok);
1070}
1071
1072static int PTRCALL
1073element3(PROLOG_STATE *state,
1074         int tok,
1075         const char *ptr,
1076         const char *end,
1077         const ENCODING *enc)
1078{
1079  switch (tok) {
1080  case XML_TOK_PROLOG_S:
1081    return XML_ROLE_ELEMENT_NONE;
1082  case XML_TOK_CLOSE_PAREN:
1083    state->handler = declClose;
1084    state->role_none = XML_ROLE_ELEMENT_NONE;
1085    return XML_ROLE_GROUP_CLOSE;
1086  case XML_TOK_CLOSE_PAREN_ASTERISK:
1087    state->handler = declClose;
1088    state->role_none = XML_ROLE_ELEMENT_NONE;
1089    return XML_ROLE_GROUP_CLOSE_REP;
1090  case XML_TOK_OR:
1091    state->handler = element4;
1092    return XML_ROLE_ELEMENT_NONE;
1093  }
1094  return common(state, tok);
1095}
1096
1097static int PTRCALL
1098element4(PROLOG_STATE *state,
1099         int tok,
1100         const char *ptr,
1101         const char *end,
1102         const ENCODING *enc)
1103{
1104  switch (tok) {
1105  case XML_TOK_PROLOG_S:
1106    return XML_ROLE_ELEMENT_NONE;
1107  case XML_TOK_NAME:
1108  case XML_TOK_PREFIXED_NAME:
1109    state->handler = element5;
1110    return XML_ROLE_CONTENT_ELEMENT;
1111  }
1112  return common(state, tok);
1113}
1114
1115static int PTRCALL
1116element5(PROLOG_STATE *state,
1117         int tok,
1118         const char *ptr,
1119         const char *end,
1120         const ENCODING *enc)
1121{
1122  switch (tok) {
1123  case XML_TOK_PROLOG_S:
1124    return XML_ROLE_ELEMENT_NONE;
1125  case XML_TOK_CLOSE_PAREN_ASTERISK:
1126    state->handler = declClose;
1127    state->role_none = XML_ROLE_ELEMENT_NONE;
1128    return XML_ROLE_GROUP_CLOSE_REP;
1129  case XML_TOK_OR:
1130    state->handler = element4;
1131    return XML_ROLE_ELEMENT_NONE;
1132  }
1133  return common(state, tok);
1134}
1135
1136static int PTRCALL
1137element6(PROLOG_STATE *state,
1138         int tok,
1139         const char *ptr,
1140         const char *end,
1141         const ENCODING *enc)
1142{
1143  switch (tok) {
1144  case XML_TOK_PROLOG_S:
1145    return XML_ROLE_ELEMENT_NONE;
1146  case XML_TOK_OPEN_PAREN:
1147    state->level += 1;
1148    return XML_ROLE_GROUP_OPEN;
1149  case XML_TOK_NAME:
1150  case XML_TOK_PREFIXED_NAME:
1151    state->handler = element7;
1152    return XML_ROLE_CONTENT_ELEMENT;
1153  case XML_TOK_NAME_QUESTION:
1154    state->handler = element7;
1155    return XML_ROLE_CONTENT_ELEMENT_OPT;
1156  case XML_TOK_NAME_ASTERISK:
1157    state->handler = element7;
1158    return XML_ROLE_CONTENT_ELEMENT_REP;
1159  case XML_TOK_NAME_PLUS:
1160    state->handler = element7;
1161    return XML_ROLE_CONTENT_ELEMENT_PLUS;
1162  }
1163  return common(state, tok);
1164}
1165
1166static int PTRCALL
1167element7(PROLOG_STATE *state,
1168         int tok,
1169         const char *ptr,
1170         const char *end,
1171         const ENCODING *enc)
1172{
1173  switch (tok) {
1174  case XML_TOK_PROLOG_S:
1175    return XML_ROLE_ELEMENT_NONE;
1176  case XML_TOK_CLOSE_PAREN:
1177    state->level -= 1;
1178    if (state->level == 0) {
1179      state->handler = declClose;
1180      state->role_none = XML_ROLE_ELEMENT_NONE;
1181    }
1182    return XML_ROLE_GROUP_CLOSE;
1183  case XML_TOK_CLOSE_PAREN_ASTERISK:
1184    state->level -= 1;
1185    if (state->level == 0) {
1186      state->handler = declClose;
1187      state->role_none = XML_ROLE_ELEMENT_NONE;
1188    }
1189    return XML_ROLE_GROUP_CLOSE_REP;
1190  case XML_TOK_CLOSE_PAREN_QUESTION:
1191    state->level -= 1;
1192    if (state->level == 0) {
1193      state->handler = declClose;
1194      state->role_none = XML_ROLE_ELEMENT_NONE;
1195    }
1196    return XML_ROLE_GROUP_CLOSE_OPT;
1197  case XML_TOK_CLOSE_PAREN_PLUS:
1198    state->level -= 1;
1199    if (state->level == 0) {
1200      state->handler = declClose;
1201      state->role_none = XML_ROLE_ELEMENT_NONE;
1202    }
1203    return XML_ROLE_GROUP_CLOSE_PLUS;
1204  case XML_TOK_COMMA:
1205    state->handler = element6;
1206    return XML_ROLE_GROUP_SEQUENCE;
1207  case XML_TOK_OR:
1208    state->handler = element6;
1209    return XML_ROLE_GROUP_CHOICE;
1210  }
1211  return common(state, tok);
1212}
1213
1214#ifdef XML_DTD
1215
1216static int PTRCALL
1217condSect0(PROLOG_STATE *state,
1218          int tok,
1219          const char *ptr,
1220          const char *end,
1221          const ENCODING *enc)
1222{
1223  switch (tok) {
1224  case XML_TOK_PROLOG_S:
1225    return XML_ROLE_NONE;
1226  case XML_TOK_NAME:
1227    if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1228      state->handler = condSect1;
1229      return XML_ROLE_NONE;
1230    }
1231    if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1232      state->handler = condSect2;
1233      return XML_ROLE_NONE;
1234    }
1235    break;
1236  }
1237  return common(state, tok);
1238}
1239
1240static int PTRCALL
1241condSect1(PROLOG_STATE *state,
1242          int tok,
1243          const char *ptr,
1244          const char *end,
1245          const ENCODING *enc)
1246{
1247  switch (tok) {
1248  case XML_TOK_PROLOG_S:
1249    return XML_ROLE_NONE;
1250  case XML_TOK_OPEN_BRACKET:
1251    state->handler = externalSubset1;
1252    state->includeLevel += 1;
1253    return XML_ROLE_NONE;
1254  }
1255  return common(state, tok);
1256}
1257
1258static int PTRCALL
1259condSect2(PROLOG_STATE *state,
1260          int tok,
1261          const char *ptr,
1262          const char *end,
1263          const ENCODING *enc)
1264{
1265  switch (tok) {
1266  case XML_TOK_PROLOG_S:
1267    return XML_ROLE_NONE;
1268  case XML_TOK_OPEN_BRACKET:
1269    state->handler = externalSubset1;
1270    return XML_ROLE_IGNORE_SECT;
1271  }
1272  return common(state, tok);
1273}
1274
1275#endif /* XML_DTD */
1276
1277static int PTRCALL
1278declClose(PROLOG_STATE *state,
1279          int tok,
1280          const char *ptr,
1281          const char *end,
1282          const ENCODING *enc)
1283{
1284  switch (tok) {
1285  case XML_TOK_PROLOG_S:
1286    return state->role_none;
1287  case XML_TOK_DECL_CLOSE:
1288    setTopLevel(state);
1289    return state->role_none;
1290  }
1291  return common(state, tok);
1292}
1293
1294static int PTRCALL
1295error(PROLOG_STATE *state,
1296      int tok,
1297      const char *ptr,
1298      const char *end,
1299      const ENCODING *enc)
1300{
1301  return XML_ROLE_NONE;
1302}
1303
1304static int FASTCALL
1305common(PROLOG_STATE *state, int tok)
1306{
1307#ifdef XML_DTD
1308  if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1309    return XML_ROLE_INNER_PARAM_ENTITY_REF;
1310#endif
1311  state->handler = error;
1312  return XML_ROLE_ERROR;
1313}
1314
1315void
1316XmlPrologStateInit(PROLOG_STATE *state)
1317{
1318  state->handler = prolog0;
1319#ifdef XML_DTD
1320  state->documentEntity = 1;
1321  state->includeLevel = 0;
1322  state->inEntityValue = 0;
1323#endif /* XML_DTD */
1324}
1325
1326#ifdef XML_DTD
1327
1328void
1329XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1330{
1331  state->handler = externalSubset0;
1332  state->documentEntity = 0;
1333  state->includeLevel = 0;
1334}
1335
1336#endif /* XML_DTD */
1337