1/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2   See the file COPYING for copying permission.
3*/
4
5/* This file is included! */
6#ifdef XML_TOK_IMPL_C
7
8#ifndef IS_INVALID_CHAR
9#define IS_INVALID_CHAR(enc, ptr, n) (0)
10#endif
11
12#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
13    case BT_LEAD ## n: \
14      if (end - ptr < n) \
15        return XML_TOK_PARTIAL_CHAR; \
16      if (IS_INVALID_CHAR(enc, ptr, n)) { \
17        *(nextTokPtr) = (ptr); \
18        return XML_TOK_INVALID; \
19      } \
20      ptr += n; \
21      break;
22
23#define INVALID_CASES(ptr, nextTokPtr) \
24  INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
25  INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
26  INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
27  case BT_NONXML: \
28  case BT_MALFORM: \
29  case BT_TRAIL: \
30    *(nextTokPtr) = (ptr); \
31    return XML_TOK_INVALID;
32
33#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
34   case BT_LEAD ## n: \
35     if (end - ptr < n) \
36       return XML_TOK_PARTIAL_CHAR; \
37     if (!IS_NAME_CHAR(enc, ptr, n)) { \
38       *nextTokPtr = ptr; \
39       return XML_TOK_INVALID; \
40     } \
41     ptr += n; \
42     break;
43
44#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
45  case BT_NONASCII: \
46    if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
47      *nextTokPtr = ptr; \
48      return XML_TOK_INVALID; \
49    } \
50  case BT_NMSTRT: \
51  case BT_HEX: \
52  case BT_DIGIT: \
53  case BT_NAME: \
54  case BT_MINUS: \
55    ptr += MINBPC(enc); \
56    break; \
57  CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
58  CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
59  CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
60
61#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
62   case BT_LEAD ## n: \
63     if (end - ptr < n) \
64       return XML_TOK_PARTIAL_CHAR; \
65     if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
66       *nextTokPtr = ptr; \
67       return XML_TOK_INVALID; \
68     } \
69     ptr += n; \
70     break;
71
72#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
73  case BT_NONASCII: \
74    if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
75      *nextTokPtr = ptr; \
76      return XML_TOK_INVALID; \
77    } \
78  case BT_NMSTRT: \
79  case BT_HEX: \
80    ptr += MINBPC(enc); \
81    break; \
82  CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
83  CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
84  CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
85
86#ifndef PREFIX
87#define PREFIX(ident) ident
88#endif
89
90/* ptr points to character following "<!-" */
91
92static int PTRCALL
93PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
94                    const char *end, const char **nextTokPtr)
95{
96  if (ptr != end) {
97    if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
98      *nextTokPtr = ptr;
99      return XML_TOK_INVALID;
100    }
101    ptr += MINBPC(enc);
102    while (ptr != end) {
103      switch (BYTE_TYPE(enc, ptr)) {
104      INVALID_CASES(ptr, nextTokPtr)
105      case BT_MINUS:
106        if ((ptr += MINBPC(enc)) == end)
107          return XML_TOK_PARTIAL;
108        if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
109          if ((ptr += MINBPC(enc)) == end)
110            return XML_TOK_PARTIAL;
111          if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
112            *nextTokPtr = ptr;
113            return XML_TOK_INVALID;
114          }
115          *nextTokPtr = ptr + MINBPC(enc);
116          return XML_TOK_COMMENT;
117        }
118        break;
119      default:
120        ptr += MINBPC(enc);
121        break;
122      }
123    }
124  }
125  return XML_TOK_PARTIAL;
126}
127
128/* ptr points to character following "<!" */
129
130static int PTRCALL
131PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
132                 const char *end, const char **nextTokPtr)
133{
134  if (ptr == end)
135    return XML_TOK_PARTIAL;
136  switch (BYTE_TYPE(enc, ptr)) {
137  case BT_MINUS:
138    return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
139  case BT_LSQB:
140    *nextTokPtr = ptr + MINBPC(enc);
141    return XML_TOK_COND_SECT_OPEN;
142  case BT_NMSTRT:
143  case BT_HEX:
144    ptr += MINBPC(enc);
145    break;
146  default:
147    *nextTokPtr = ptr;
148    return XML_TOK_INVALID;
149  }
150  while (ptr != end) {
151    switch (BYTE_TYPE(enc, ptr)) {
152    case BT_PERCNT:
153      if (ptr + MINBPC(enc) == end)
154        return XML_TOK_PARTIAL;
155      /* don't allow <!ENTITY% foo "whatever"> */
156      switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
157      case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
158        *nextTokPtr = ptr;
159        return XML_TOK_INVALID;
160      }
161      /* fall through */
162    case BT_S: case BT_CR: case BT_LF:
163      *nextTokPtr = ptr;
164      return XML_TOK_DECL_OPEN;
165    case BT_NMSTRT:
166    case BT_HEX:
167      ptr += MINBPC(enc);
168      break;
169    default:
170      *nextTokPtr = ptr;
171      return XML_TOK_INVALID;
172    }
173  }
174  return XML_TOK_PARTIAL;
175}
176
177static int PTRCALL
178PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
179                      const char *end, int *tokPtr)
180{
181  int upper = 0;
182  *tokPtr = XML_TOK_PI;
183  if (end - ptr != MINBPC(enc)*3)
184    return 1;
185  switch (BYTE_TO_ASCII(enc, ptr)) {
186  case ASCII_x:
187    break;
188  case ASCII_X:
189    upper = 1;
190    break;
191  default:
192    return 1;
193  }
194  ptr += MINBPC(enc);
195  switch (BYTE_TO_ASCII(enc, ptr)) {
196  case ASCII_m:
197    break;
198  case ASCII_M:
199    upper = 1;
200    break;
201  default:
202    return 1;
203  }
204  ptr += MINBPC(enc);
205  switch (BYTE_TO_ASCII(enc, ptr)) {
206  case ASCII_l:
207    break;
208  case ASCII_L:
209    upper = 1;
210    break;
211  default:
212    return 1;
213  }
214  if (upper)
215    return 0;
216  *tokPtr = XML_TOK_XML_DECL;
217  return 1;
218}
219
220/* ptr points to character following "<?" */
221
222static int PTRCALL
223PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
224               const char *end, const char **nextTokPtr)
225{
226  int tok;
227  const char *target = ptr;
228  if (ptr == end)
229    return XML_TOK_PARTIAL;
230  switch (BYTE_TYPE(enc, ptr)) {
231  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
232  default:
233    *nextTokPtr = ptr;
234    return XML_TOK_INVALID;
235  }
236  while (ptr != end) {
237    switch (BYTE_TYPE(enc, ptr)) {
238    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
239    case BT_S: case BT_CR: case BT_LF:
240      if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
241        *nextTokPtr = ptr;
242        return XML_TOK_INVALID;
243      }
244      ptr += MINBPC(enc);
245      while (ptr != end) {
246        switch (BYTE_TYPE(enc, ptr)) {
247        INVALID_CASES(ptr, nextTokPtr)
248        case BT_QUEST:
249          ptr += MINBPC(enc);
250          if (ptr == end)
251            return XML_TOK_PARTIAL;
252          if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
253            *nextTokPtr = ptr + MINBPC(enc);
254            return tok;
255          }
256          break;
257        default:
258          ptr += MINBPC(enc);
259          break;
260        }
261      }
262      return XML_TOK_PARTIAL;
263    case BT_QUEST:
264      if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
265        *nextTokPtr = ptr;
266        return XML_TOK_INVALID;
267      }
268      ptr += MINBPC(enc);
269      if (ptr == end)
270        return XML_TOK_PARTIAL;
271      if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
272        *nextTokPtr = ptr + MINBPC(enc);
273        return tok;
274      }
275      /* fall through */
276    default:
277      *nextTokPtr = ptr;
278      return XML_TOK_INVALID;
279    }
280  }
281  return XML_TOK_PARTIAL;
282}
283
284static int PTRCALL
285PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
286                         const char *end, const char **nextTokPtr)
287{
288  static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
289                                     ASCII_T, ASCII_A, ASCII_LSQB };
290  int i;
291  /* CDATA[ */
292  if (end - ptr < 6 * MINBPC(enc))
293    return XML_TOK_PARTIAL;
294  for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
295    if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
296      *nextTokPtr = ptr;
297      return XML_TOK_INVALID;
298    }
299  }
300  *nextTokPtr = ptr;
301  return XML_TOK_CDATA_SECT_OPEN;
302}
303
304static int PTRCALL
305PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
306                        const char *end, const char **nextTokPtr)
307{
308  if (ptr == end)
309    return XML_TOK_NONE;
310  if (MINBPC(enc) > 1) {
311    size_t n = end - ptr;
312    if (n & (MINBPC(enc) - 1)) {
313      n &= ~(MINBPC(enc) - 1);
314      if (n == 0)
315        return XML_TOK_PARTIAL;
316      end = ptr + n;
317    }
318  }
319  switch (BYTE_TYPE(enc, ptr)) {
320  case BT_RSQB:
321    ptr += MINBPC(enc);
322    if (ptr == end)
323      return XML_TOK_PARTIAL;
324    if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
325      break;
326    ptr += MINBPC(enc);
327    if (ptr == end)
328      return XML_TOK_PARTIAL;
329    if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
330      ptr -= MINBPC(enc);
331      break;
332    }
333    *nextTokPtr = ptr + MINBPC(enc);
334    return XML_TOK_CDATA_SECT_CLOSE;
335  case BT_CR:
336    ptr += MINBPC(enc);
337    if (ptr == end)
338      return XML_TOK_PARTIAL;
339    if (BYTE_TYPE(enc, ptr) == BT_LF)
340      ptr += MINBPC(enc);
341    *nextTokPtr = ptr;
342    return XML_TOK_DATA_NEWLINE;
343  case BT_LF:
344    *nextTokPtr = ptr + MINBPC(enc);
345    return XML_TOK_DATA_NEWLINE;
346  INVALID_CASES(ptr, nextTokPtr)
347  default:
348    ptr += MINBPC(enc);
349    break;
350  }
351  while (ptr != end) {
352    switch (BYTE_TYPE(enc, ptr)) {
353#define LEAD_CASE(n) \
354    case BT_LEAD ## n: \
355      if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
356        *nextTokPtr = ptr; \
357        return XML_TOK_DATA_CHARS; \
358      } \
359      ptr += n; \
360      break;
361    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
362#undef LEAD_CASE
363    case BT_NONXML:
364    case BT_MALFORM:
365    case BT_TRAIL:
366    case BT_CR:
367    case BT_LF:
368    case BT_RSQB:
369      *nextTokPtr = ptr;
370      return XML_TOK_DATA_CHARS;
371    default:
372      ptr += MINBPC(enc);
373      break;
374    }
375  }
376  *nextTokPtr = ptr;
377  return XML_TOK_DATA_CHARS;
378}
379
380/* ptr points to character following "</" */
381
382static int PTRCALL
383PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
384                   const char *end, const char **nextTokPtr)
385{
386  if (ptr == end)
387    return XML_TOK_PARTIAL;
388  switch (BYTE_TYPE(enc, ptr)) {
389  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
390  default:
391    *nextTokPtr = ptr;
392    return XML_TOK_INVALID;
393  }
394  while (ptr != end) {
395    switch (BYTE_TYPE(enc, ptr)) {
396    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
397    case BT_S: case BT_CR: case BT_LF:
398      for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
399        switch (BYTE_TYPE(enc, ptr)) {
400        case BT_S: case BT_CR: case BT_LF:
401          break;
402        case BT_GT:
403          *nextTokPtr = ptr + MINBPC(enc);
404          return XML_TOK_END_TAG;
405        default:
406          *nextTokPtr = ptr;
407          return XML_TOK_INVALID;
408        }
409      }
410      return XML_TOK_PARTIAL;
411#ifdef XML_NS
412    case BT_COLON:
413      /* no need to check qname syntax here,
414         since end-tag must match exactly */
415      ptr += MINBPC(enc);
416      break;
417#endif
418    case BT_GT:
419      *nextTokPtr = ptr + MINBPC(enc);
420      return XML_TOK_END_TAG;
421    default:
422      *nextTokPtr = ptr;
423      return XML_TOK_INVALID;
424    }
425  }
426  return XML_TOK_PARTIAL;
427}
428
429/* ptr points to character following "&#X" */
430
431static int PTRCALL
432PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
433                       const char *end, const char **nextTokPtr)
434{
435  if (ptr != end) {
436    switch (BYTE_TYPE(enc, ptr)) {
437    case BT_DIGIT:
438    case BT_HEX:
439      break;
440    default:
441      *nextTokPtr = ptr;
442      return XML_TOK_INVALID;
443    }
444    for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
445      switch (BYTE_TYPE(enc, ptr)) {
446      case BT_DIGIT:
447      case BT_HEX:
448        break;
449      case BT_SEMI:
450        *nextTokPtr = ptr + MINBPC(enc);
451        return XML_TOK_CHAR_REF;
452      default:
453        *nextTokPtr = ptr;
454        return XML_TOK_INVALID;
455      }
456    }
457  }
458  return XML_TOK_PARTIAL;
459}
460
461/* ptr points to character following "&#" */
462
463static int PTRCALL
464PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
465                    const char *end, const char **nextTokPtr)
466{
467  if (ptr != end) {
468    if (CHAR_MATCHES(enc, ptr, ASCII_x))
469      return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
470    switch (BYTE_TYPE(enc, ptr)) {
471    case BT_DIGIT:
472      break;
473    default:
474      *nextTokPtr = ptr;
475      return XML_TOK_INVALID;
476    }
477    for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
478      switch (BYTE_TYPE(enc, ptr)) {
479      case BT_DIGIT:
480        break;
481      case BT_SEMI:
482        *nextTokPtr = ptr + MINBPC(enc);
483        return XML_TOK_CHAR_REF;
484      default:
485        *nextTokPtr = ptr;
486        return XML_TOK_INVALID;
487      }
488    }
489  }
490  return XML_TOK_PARTIAL;
491}
492
493/* ptr points to character following "&" */
494
495static int PTRCALL
496PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
497                const char **nextTokPtr)
498{
499  if (ptr == end)
500    return XML_TOK_PARTIAL;
501  switch (BYTE_TYPE(enc, ptr)) {
502  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
503  case BT_NUM:
504    return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
505  default:
506    *nextTokPtr = ptr;
507    return XML_TOK_INVALID;
508  }
509  while (ptr != end) {
510    switch (BYTE_TYPE(enc, ptr)) {
511    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
512    case BT_SEMI:
513      *nextTokPtr = ptr + MINBPC(enc);
514      return XML_TOK_ENTITY_REF;
515    default:
516      *nextTokPtr = ptr;
517      return XML_TOK_INVALID;
518    }
519  }
520  return XML_TOK_PARTIAL;
521}
522
523/* ptr points to character following first character of attribute name */
524
525static int PTRCALL
526PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
527                 const char **nextTokPtr)
528{
529#ifdef XML_NS
530  int hadColon = 0;
531#endif
532  while (ptr != end) {
533    switch (BYTE_TYPE(enc, ptr)) {
534    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
535#ifdef XML_NS
536    case BT_COLON:
537      if (hadColon) {
538        *nextTokPtr = ptr;
539        return XML_TOK_INVALID;
540      }
541      hadColon = 1;
542      ptr += MINBPC(enc);
543      if (ptr == end)
544        return XML_TOK_PARTIAL;
545      switch (BYTE_TYPE(enc, ptr)) {
546      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
547      default:
548        *nextTokPtr = ptr;
549        return XML_TOK_INVALID;
550      }
551      break;
552#endif
553    case BT_S: case BT_CR: case BT_LF:
554      for (;;) {
555        int t;
556
557        ptr += MINBPC(enc);
558        if (ptr == end)
559          return XML_TOK_PARTIAL;
560        t = BYTE_TYPE(enc, ptr);
561        if (t == BT_EQUALS)
562          break;
563        switch (t) {
564        case BT_S:
565        case BT_LF:
566        case BT_CR:
567          break;
568        default:
569          *nextTokPtr = ptr;
570          return XML_TOK_INVALID;
571        }
572      }
573    /* fall through */
574    case BT_EQUALS:
575      {
576        int open;
577#ifdef XML_NS
578        hadColon = 0;
579#endif
580        for (;;) {
581          ptr += MINBPC(enc);
582          if (ptr == end)
583            return XML_TOK_PARTIAL;
584          open = BYTE_TYPE(enc, ptr);
585          if (open == BT_QUOT || open == BT_APOS)
586            break;
587          switch (open) {
588          case BT_S:
589          case BT_LF:
590          case BT_CR:
591            break;
592          default:
593            *nextTokPtr = ptr;
594            return XML_TOK_INVALID;
595          }
596        }
597        ptr += MINBPC(enc);
598        /* in attribute value */
599        for (;;) {
600          int t;
601          if (ptr == end)
602            return XML_TOK_PARTIAL;
603          t = BYTE_TYPE(enc, ptr);
604          if (t == open)
605            break;
606          switch (t) {
607          INVALID_CASES(ptr, nextTokPtr)
608          case BT_AMP:
609            {
610              int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
611              if (tok <= 0) {
612                if (tok == XML_TOK_INVALID)
613                  *nextTokPtr = ptr;
614                return tok;
615              }
616              break;
617            }
618          case BT_LT:
619            *nextTokPtr = ptr;
620            return XML_TOK_INVALID;
621          default:
622            ptr += MINBPC(enc);
623            break;
624          }
625        }
626        ptr += MINBPC(enc);
627        if (ptr == end)
628          return XML_TOK_PARTIAL;
629        switch (BYTE_TYPE(enc, ptr)) {
630        case BT_S:
631        case BT_CR:
632        case BT_LF:
633          break;
634        case BT_SOL:
635          goto sol;
636        case BT_GT:
637          goto gt;
638        default:
639          *nextTokPtr = ptr;
640          return XML_TOK_INVALID;
641        }
642        /* ptr points to closing quote */
643        for (;;) {
644          ptr += MINBPC(enc);
645          if (ptr == end)
646            return XML_TOK_PARTIAL;
647          switch (BYTE_TYPE(enc, ptr)) {
648          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
649          case BT_S: case BT_CR: case BT_LF:
650            continue;
651          case BT_GT:
652          gt:
653            *nextTokPtr = ptr + MINBPC(enc);
654            return XML_TOK_START_TAG_WITH_ATTS;
655          case BT_SOL:
656          sol:
657            ptr += MINBPC(enc);
658            if (ptr == end)
659              return XML_TOK_PARTIAL;
660            if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
661              *nextTokPtr = ptr;
662              return XML_TOK_INVALID;
663            }
664            *nextTokPtr = ptr + MINBPC(enc);
665            return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
666          default:
667            *nextTokPtr = ptr;
668            return XML_TOK_INVALID;
669          }
670          break;
671        }
672        break;
673      }
674    default:
675      *nextTokPtr = ptr;
676      return XML_TOK_INVALID;
677    }
678  }
679  return XML_TOK_PARTIAL;
680}
681
682/* ptr points to character following "<" */
683
684static int PTRCALL
685PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
686               const char **nextTokPtr)
687{
688#ifdef XML_NS
689  int hadColon;
690#endif
691  if (ptr == end)
692    return XML_TOK_PARTIAL;
693  switch (BYTE_TYPE(enc, ptr)) {
694  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
695  case BT_EXCL:
696    if ((ptr += MINBPC(enc)) == end)
697      return XML_TOK_PARTIAL;
698    switch (BYTE_TYPE(enc, ptr)) {
699    case BT_MINUS:
700      return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
701    case BT_LSQB:
702      return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
703                                      end, nextTokPtr);
704    }
705    *nextTokPtr = ptr;
706    return XML_TOK_INVALID;
707  case BT_QUEST:
708    return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
709  case BT_SOL:
710    return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
711  default:
712    *nextTokPtr = ptr;
713    return XML_TOK_INVALID;
714  }
715#ifdef XML_NS
716  hadColon = 0;
717#endif
718  /* we have a start-tag */
719  while (ptr != end) {
720    switch (BYTE_TYPE(enc, ptr)) {
721    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
722#ifdef XML_NS
723    case BT_COLON:
724      if (hadColon) {
725        *nextTokPtr = ptr;
726        return XML_TOK_INVALID;
727      }
728      hadColon = 1;
729      ptr += MINBPC(enc);
730      if (ptr == end)
731        return XML_TOK_PARTIAL;
732      switch (BYTE_TYPE(enc, ptr)) {
733      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
734      default:
735        *nextTokPtr = ptr;
736        return XML_TOK_INVALID;
737      }
738      break;
739#endif
740    case BT_S: case BT_CR: case BT_LF:
741      {
742        ptr += MINBPC(enc);
743        while (ptr != end) {
744          switch (BYTE_TYPE(enc, ptr)) {
745          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
746          case BT_GT:
747            goto gt;
748          case BT_SOL:
749            goto sol;
750          case BT_S: case BT_CR: case BT_LF:
751            ptr += MINBPC(enc);
752            continue;
753          default:
754            *nextTokPtr = ptr;
755            return XML_TOK_INVALID;
756          }
757          return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
758        }
759        return XML_TOK_PARTIAL;
760      }
761    case BT_GT:
762    gt:
763      *nextTokPtr = ptr + MINBPC(enc);
764      return XML_TOK_START_TAG_NO_ATTS;
765    case BT_SOL:
766    sol:
767      ptr += MINBPC(enc);
768      if (ptr == end)
769        return XML_TOK_PARTIAL;
770      if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
771        *nextTokPtr = ptr;
772        return XML_TOK_INVALID;
773      }
774      *nextTokPtr = ptr + MINBPC(enc);
775      return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
776    default:
777      *nextTokPtr = ptr;
778      return XML_TOK_INVALID;
779    }
780  }
781  return XML_TOK_PARTIAL;
782}
783
784static int PTRCALL
785PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
786                   const char **nextTokPtr)
787{
788  if (ptr == end)
789    return XML_TOK_NONE;
790  if (MINBPC(enc) > 1) {
791    size_t n = end - ptr;
792    if (n & (MINBPC(enc) - 1)) {
793      n &= ~(MINBPC(enc) - 1);
794      if (n == 0)
795        return XML_TOK_PARTIAL;
796      end = ptr + n;
797    }
798  }
799  switch (BYTE_TYPE(enc, ptr)) {
800  case BT_LT:
801    return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
802  case BT_AMP:
803    return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
804  case BT_CR:
805    ptr += MINBPC(enc);
806    if (ptr == end)
807      return XML_TOK_TRAILING_CR;
808    if (BYTE_TYPE(enc, ptr) == BT_LF)
809      ptr += MINBPC(enc);
810    *nextTokPtr = ptr;
811    return XML_TOK_DATA_NEWLINE;
812  case BT_LF:
813    *nextTokPtr = ptr + MINBPC(enc);
814    return XML_TOK_DATA_NEWLINE;
815  case BT_RSQB:
816    ptr += MINBPC(enc);
817    if (ptr == end)
818      return XML_TOK_TRAILING_RSQB;
819    if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
820      break;
821    ptr += MINBPC(enc);
822    if (ptr == end)
823      return XML_TOK_TRAILING_RSQB;
824    if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
825      ptr -= MINBPC(enc);
826      break;
827    }
828    *nextTokPtr = ptr;
829    return XML_TOK_INVALID;
830  INVALID_CASES(ptr, nextTokPtr)
831  default:
832    ptr += MINBPC(enc);
833    break;
834  }
835  while (ptr != end) {
836    switch (BYTE_TYPE(enc, ptr)) {
837#define LEAD_CASE(n) \
838    case BT_LEAD ## n: \
839      if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
840        *nextTokPtr = ptr; \
841        return XML_TOK_DATA_CHARS; \
842      } \
843      ptr += n; \
844      break;
845    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
846#undef LEAD_CASE
847    case BT_RSQB:
848      if (ptr + MINBPC(enc) != end) {
849         if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
850           ptr += MINBPC(enc);
851           break;
852         }
853         if (ptr + 2*MINBPC(enc) != end) {
854           if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
855             ptr += MINBPC(enc);
856             break;
857           }
858           *nextTokPtr = ptr + 2*MINBPC(enc);
859           return XML_TOK_INVALID;
860         }
861      }
862      /* fall through */
863    case BT_AMP:
864    case BT_LT:
865    case BT_NONXML:
866    case BT_MALFORM:
867    case BT_TRAIL:
868    case BT_CR:
869    case BT_LF:
870      *nextTokPtr = ptr;
871      return XML_TOK_DATA_CHARS;
872    default:
873      ptr += MINBPC(enc);
874      break;
875    }
876  }
877  *nextTokPtr = ptr;
878  return XML_TOK_DATA_CHARS;
879}
880
881/* ptr points to character following "%" */
882
883static int PTRCALL
884PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
885                    const char **nextTokPtr)
886{
887  if (ptr == end)
888    return XML_TOK_PARTIAL;
889  switch (BYTE_TYPE(enc, ptr)) {
890  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
891  case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
892    *nextTokPtr = ptr;
893    return XML_TOK_PERCENT;
894  default:
895    *nextTokPtr = ptr;
896    return XML_TOK_INVALID;
897  }
898  while (ptr != end) {
899    switch (BYTE_TYPE(enc, ptr)) {
900    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
901    case BT_SEMI:
902      *nextTokPtr = ptr + MINBPC(enc);
903      return XML_TOK_PARAM_ENTITY_REF;
904    default:
905      *nextTokPtr = ptr;
906      return XML_TOK_INVALID;
907    }
908  }
909  return XML_TOK_PARTIAL;
910}
911
912static int PTRCALL
913PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
914                      const char **nextTokPtr)
915{
916  if (ptr == end)
917    return XML_TOK_PARTIAL;
918  switch (BYTE_TYPE(enc, ptr)) {
919  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
920  default:
921    *nextTokPtr = ptr;
922    return XML_TOK_INVALID;
923  }
924  while (ptr != end) {
925    switch (BYTE_TYPE(enc, ptr)) {
926    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
927    case BT_CR: case BT_LF: case BT_S:
928    case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
929      *nextTokPtr = ptr;
930      return XML_TOK_POUND_NAME;
931    default:
932      *nextTokPtr = ptr;
933      return XML_TOK_INVALID;
934    }
935  }
936  return -XML_TOK_POUND_NAME;
937}
938
939static int PTRCALL
940PREFIX(scanLit)(int open, const ENCODING *enc,
941                const char *ptr, const char *end,
942                const char **nextTokPtr)
943{
944  while (ptr != end) {
945    int t = BYTE_TYPE(enc, ptr);
946    switch (t) {
947    INVALID_CASES(ptr, nextTokPtr)
948    case BT_QUOT:
949    case BT_APOS:
950      ptr += MINBPC(enc);
951      if (t != open)
952        break;
953      if (ptr == end)
954        return -XML_TOK_LITERAL;
955      *nextTokPtr = ptr;
956      switch (BYTE_TYPE(enc, ptr)) {
957      case BT_S: case BT_CR: case BT_LF:
958      case BT_GT: case BT_PERCNT: case BT_LSQB:
959        return XML_TOK_LITERAL;
960      default:
961        return XML_TOK_INVALID;
962      }
963    default:
964      ptr += MINBPC(enc);
965      break;
966    }
967  }
968  return XML_TOK_PARTIAL;
969}
970
971static int PTRCALL
972PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
973                  const char **nextTokPtr)
974{
975  int tok;
976  if (ptr == end)
977    return XML_TOK_NONE;
978  if (MINBPC(enc) > 1) {
979    size_t n = end - ptr;
980    if (n & (MINBPC(enc) - 1)) {
981      n &= ~(MINBPC(enc) - 1);
982      if (n == 0)
983        return XML_TOK_PARTIAL;
984      end = ptr + n;
985    }
986  }
987  switch (BYTE_TYPE(enc, ptr)) {
988  case BT_QUOT:
989    return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
990  case BT_APOS:
991    return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
992  case BT_LT:
993    {
994      ptr += MINBPC(enc);
995      if (ptr == end)
996        return XML_TOK_PARTIAL;
997      switch (BYTE_TYPE(enc, ptr)) {
998      case BT_EXCL:
999        return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1000      case BT_QUEST:
1001        return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1002      case BT_NMSTRT:
1003      case BT_HEX:
1004      case BT_NONASCII:
1005      case BT_LEAD2:
1006      case BT_LEAD3:
1007      case BT_LEAD4:
1008        *nextTokPtr = ptr - MINBPC(enc);
1009        return XML_TOK_INSTANCE_START;
1010      }
1011      *nextTokPtr = ptr;
1012      return XML_TOK_INVALID;
1013    }
1014  case BT_CR:
1015    if (ptr + MINBPC(enc) == end) {
1016      *nextTokPtr = end;
1017      /* indicate that this might be part of a CR/LF pair */
1018      return -XML_TOK_PROLOG_S;
1019    }
1020    /* fall through */
1021  case BT_S: case BT_LF:
1022    for (;;) {
1023      ptr += MINBPC(enc);
1024      if (ptr == end)
1025        break;
1026      switch (BYTE_TYPE(enc, ptr)) {
1027      case BT_S: case BT_LF:
1028        break;
1029      case BT_CR:
1030        /* don't split CR/LF pair */
1031        if (ptr + MINBPC(enc) != end)
1032          break;
1033        /* fall through */
1034      default:
1035        *nextTokPtr = ptr;
1036        return XML_TOK_PROLOG_S;
1037      }
1038    }
1039    *nextTokPtr = ptr;
1040    return XML_TOK_PROLOG_S;
1041  case BT_PERCNT:
1042    return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1043  case BT_COMMA:
1044    *nextTokPtr = ptr + MINBPC(enc);
1045    return XML_TOK_COMMA;
1046  case BT_LSQB:
1047    *nextTokPtr = ptr + MINBPC(enc);
1048    return XML_TOK_OPEN_BRACKET;
1049  case BT_RSQB:
1050    ptr += MINBPC(enc);
1051    if (ptr == end)
1052      return -XML_TOK_CLOSE_BRACKET;
1053    if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1054      if (ptr + MINBPC(enc) == end)
1055        return XML_TOK_PARTIAL;
1056      if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1057        *nextTokPtr = ptr + 2*MINBPC(enc);
1058        return XML_TOK_COND_SECT_CLOSE;
1059      }
1060    }
1061    *nextTokPtr = ptr;
1062    return XML_TOK_CLOSE_BRACKET;
1063  case BT_LPAR:
1064    *nextTokPtr = ptr + MINBPC(enc);
1065    return XML_TOK_OPEN_PAREN;
1066  case BT_RPAR:
1067    ptr += MINBPC(enc);
1068    if (ptr == end)
1069      return -XML_TOK_CLOSE_PAREN;
1070    switch (BYTE_TYPE(enc, ptr)) {
1071    case BT_AST:
1072      *nextTokPtr = ptr + MINBPC(enc);
1073      return XML_TOK_CLOSE_PAREN_ASTERISK;
1074    case BT_QUEST:
1075      *nextTokPtr = ptr + MINBPC(enc);
1076      return XML_TOK_CLOSE_PAREN_QUESTION;
1077    case BT_PLUS:
1078      *nextTokPtr = ptr + MINBPC(enc);
1079      return XML_TOK_CLOSE_PAREN_PLUS;
1080    case BT_CR: case BT_LF: case BT_S:
1081    case BT_GT: case BT_COMMA: case BT_VERBAR:
1082    case BT_RPAR:
1083      *nextTokPtr = ptr;
1084      return XML_TOK_CLOSE_PAREN;
1085    }
1086    *nextTokPtr = ptr;
1087    return XML_TOK_INVALID;
1088  case BT_VERBAR:
1089    *nextTokPtr = ptr + MINBPC(enc);
1090    return XML_TOK_OR;
1091  case BT_GT:
1092    *nextTokPtr = ptr + MINBPC(enc);
1093    return XML_TOK_DECL_CLOSE;
1094  case BT_NUM:
1095    return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1096#define LEAD_CASE(n) \
1097  case BT_LEAD ## n: \
1098    if (end - ptr < n) \
1099      return XML_TOK_PARTIAL_CHAR; \
1100    if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1101      ptr += n; \
1102      tok = XML_TOK_NAME; \
1103      break; \
1104    } \
1105    if (IS_NAME_CHAR(enc, ptr, n)) { \
1106      ptr += n; \
1107      tok = XML_TOK_NMTOKEN; \
1108      break; \
1109    } \
1110    *nextTokPtr = ptr; \
1111    return XML_TOK_INVALID;
1112    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1113#undef LEAD_CASE
1114  case BT_NMSTRT:
1115  case BT_HEX:
1116    tok = XML_TOK_NAME;
1117    ptr += MINBPC(enc);
1118    break;
1119  case BT_DIGIT:
1120  case BT_NAME:
1121  case BT_MINUS:
1122#ifdef XML_NS
1123  case BT_COLON:
1124#endif
1125    tok = XML_TOK_NMTOKEN;
1126    ptr += MINBPC(enc);
1127    break;
1128  case BT_NONASCII:
1129    if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1130      ptr += MINBPC(enc);
1131      tok = XML_TOK_NAME;
1132      break;
1133    }
1134    if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1135      ptr += MINBPC(enc);
1136      tok = XML_TOK_NMTOKEN;
1137      break;
1138    }
1139    /* fall through */
1140  default:
1141    *nextTokPtr = ptr;
1142    return XML_TOK_INVALID;
1143  }
1144  while (ptr != end) {
1145    switch (BYTE_TYPE(enc, ptr)) {
1146    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1147    case BT_GT: case BT_RPAR: case BT_COMMA:
1148    case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
1149    case BT_S: case BT_CR: case BT_LF:
1150      *nextTokPtr = ptr;
1151      return tok;
1152#ifdef XML_NS
1153    case BT_COLON:
1154      ptr += MINBPC(enc);
1155      switch (tok) {
1156      case XML_TOK_NAME:
1157        if (ptr == end)
1158          return XML_TOK_PARTIAL;
1159        tok = XML_TOK_PREFIXED_NAME;
1160        switch (BYTE_TYPE(enc, ptr)) {
1161        CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1162        default:
1163          tok = XML_TOK_NMTOKEN;
1164          break;
1165        }
1166        break;
1167      case XML_TOK_PREFIXED_NAME:
1168        tok = XML_TOK_NMTOKEN;
1169        break;
1170      }
1171      break;
1172#endif
1173    case BT_PLUS:
1174      if (tok == XML_TOK_NMTOKEN)  {
1175        *nextTokPtr = ptr;
1176        return XML_TOK_INVALID;
1177      }
1178      *nextTokPtr = ptr + MINBPC(enc);
1179      return XML_TOK_NAME_PLUS;
1180    case BT_AST:
1181      if (tok == XML_TOK_NMTOKEN)  {
1182        *nextTokPtr = ptr;
1183        return XML_TOK_INVALID;
1184      }
1185      *nextTokPtr = ptr + MINBPC(enc);
1186      return XML_TOK_NAME_ASTERISK;
1187    case BT_QUEST:
1188      if (tok == XML_TOK_NMTOKEN)  {
1189        *nextTokPtr = ptr;
1190        return XML_TOK_INVALID;
1191      }
1192      *nextTokPtr = ptr + MINBPC(enc);
1193      return XML_TOK_NAME_QUESTION;
1194    default:
1195      *nextTokPtr = ptr;
1196      return XML_TOK_INVALID;
1197    }
1198  }
1199  return -tok;
1200}
1201
1202static int PTRCALL
1203PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
1204                          const char *end, const char **nextTokPtr)
1205{
1206  const char *start;
1207  if (ptr == end)
1208    return XML_TOK_NONE;
1209  start = ptr;
1210  while (ptr != end) {
1211    switch (BYTE_TYPE(enc, ptr)) {
1212#define LEAD_CASE(n) \
1213    case BT_LEAD ## n: ptr += n; break;
1214    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1215#undef LEAD_CASE
1216    case BT_AMP:
1217      if (ptr == start)
1218        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1219      *nextTokPtr = ptr;
1220      return XML_TOK_DATA_CHARS;
1221    case BT_LT:
1222      /* this is for inside entity references */
1223      *nextTokPtr = ptr;
1224      return XML_TOK_INVALID;
1225    case BT_LF:
1226      if (ptr == start) {
1227        *nextTokPtr = ptr + MINBPC(enc);
1228        return XML_TOK_DATA_NEWLINE;
1229      }
1230      *nextTokPtr = ptr;
1231      return XML_TOK_DATA_CHARS;
1232    case BT_CR:
1233      if (ptr == start) {
1234        ptr += MINBPC(enc);
1235        if (ptr == end)
1236          return XML_TOK_TRAILING_CR;
1237        if (BYTE_TYPE(enc, ptr) == BT_LF)
1238          ptr += MINBPC(enc);
1239        *nextTokPtr = ptr;
1240        return XML_TOK_DATA_NEWLINE;
1241      }
1242      *nextTokPtr = ptr;
1243      return XML_TOK_DATA_CHARS;
1244    case BT_S:
1245      if (ptr == start) {
1246        *nextTokPtr = ptr + MINBPC(enc);
1247        return XML_TOK_ATTRIBUTE_VALUE_S;
1248      }
1249      *nextTokPtr = ptr;
1250      return XML_TOK_DATA_CHARS;
1251    default:
1252      ptr += MINBPC(enc);
1253      break;
1254    }
1255  }
1256  *nextTokPtr = ptr;
1257  return XML_TOK_DATA_CHARS;
1258}
1259
1260static int PTRCALL
1261PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
1262                       const char *end, const char **nextTokPtr)
1263{
1264  const char *start;
1265  if (ptr == end)
1266    return XML_TOK_NONE;
1267  start = ptr;
1268  while (ptr != end) {
1269    switch (BYTE_TYPE(enc, ptr)) {
1270#define LEAD_CASE(n) \
1271    case BT_LEAD ## n: ptr += n; break;
1272    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1273#undef LEAD_CASE
1274    case BT_AMP:
1275      if (ptr == start)
1276        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1277      *nextTokPtr = ptr;
1278      return XML_TOK_DATA_CHARS;
1279    case BT_PERCNT:
1280      if (ptr == start) {
1281        int tok =  PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
1282                                       end, nextTokPtr);
1283        return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1284      }
1285      *nextTokPtr = ptr;
1286      return XML_TOK_DATA_CHARS;
1287    case BT_LF:
1288      if (ptr == start) {
1289        *nextTokPtr = ptr + MINBPC(enc);
1290        return XML_TOK_DATA_NEWLINE;
1291      }
1292      *nextTokPtr = ptr;
1293      return XML_TOK_DATA_CHARS;
1294    case BT_CR:
1295      if (ptr == start) {
1296        ptr += MINBPC(enc);
1297        if (ptr == end)
1298          return XML_TOK_TRAILING_CR;
1299        if (BYTE_TYPE(enc, ptr) == BT_LF)
1300          ptr += MINBPC(enc);
1301        *nextTokPtr = ptr;
1302        return XML_TOK_DATA_NEWLINE;
1303      }
1304      *nextTokPtr = ptr;
1305      return XML_TOK_DATA_CHARS;
1306    default:
1307      ptr += MINBPC(enc);
1308      break;
1309    }
1310  }
1311  *nextTokPtr = ptr;
1312  return XML_TOK_DATA_CHARS;
1313}
1314
1315#ifdef XML_DTD
1316
1317static int PTRCALL
1318PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
1319                         const char *end, const char **nextTokPtr)
1320{
1321  int level = 0;
1322  if (MINBPC(enc) > 1) {
1323    size_t n = end - ptr;
1324    if (n & (MINBPC(enc) - 1)) {
1325      n &= ~(MINBPC(enc) - 1);
1326      end = ptr + n;
1327    }
1328  }
1329  while (ptr != end) {
1330    switch (BYTE_TYPE(enc, ptr)) {
1331    INVALID_CASES(ptr, nextTokPtr)
1332    case BT_LT:
1333      if ((ptr += MINBPC(enc)) == end)
1334        return XML_TOK_PARTIAL;
1335      if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1336        if ((ptr += MINBPC(enc)) == end)
1337          return XML_TOK_PARTIAL;
1338        if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1339          ++level;
1340          ptr += MINBPC(enc);
1341        }
1342      }
1343      break;
1344    case BT_RSQB:
1345      if ((ptr += MINBPC(enc)) == end)
1346        return XML_TOK_PARTIAL;
1347      if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1348        if ((ptr += MINBPC(enc)) == end)
1349          return XML_TOK_PARTIAL;
1350        if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1351          ptr += MINBPC(enc);
1352          if (level == 0) {
1353            *nextTokPtr = ptr;
1354            return XML_TOK_IGNORE_SECT;
1355          }
1356          --level;
1357        }
1358      }
1359      break;
1360    default:
1361      ptr += MINBPC(enc);
1362      break;
1363    }
1364  }
1365  return XML_TOK_PARTIAL;
1366}
1367
1368#endif /* XML_DTD */
1369
1370static int PTRCALL
1371PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1372                   const char **badPtr)
1373{
1374  ptr += MINBPC(enc);
1375  end -= MINBPC(enc);
1376  for (; ptr != end; ptr += MINBPC(enc)) {
1377    switch (BYTE_TYPE(enc, ptr)) {
1378    case BT_DIGIT:
1379    case BT_HEX:
1380    case BT_MINUS:
1381    case BT_APOS:
1382    case BT_LPAR:
1383    case BT_RPAR:
1384    case BT_PLUS:
1385    case BT_COMMA:
1386    case BT_SOL:
1387    case BT_EQUALS:
1388    case BT_QUEST:
1389    case BT_CR:
1390    case BT_LF:
1391    case BT_SEMI:
1392    case BT_EXCL:
1393    case BT_AST:
1394    case BT_PERCNT:
1395    case BT_NUM:
1396#ifdef XML_NS
1397    case BT_COLON:
1398#endif
1399      break;
1400    case BT_S:
1401      if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1402        *badPtr = ptr;
1403        return 0;
1404      }
1405      break;
1406    case BT_NAME:
1407    case BT_NMSTRT:
1408      if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1409        break;
1410    default:
1411      switch (BYTE_TO_ASCII(enc, ptr)) {
1412      case 0x24: /* $ */
1413      case 0x40: /* @ */
1414        break;
1415      default:
1416        *badPtr = ptr;
1417        return 0;
1418      }
1419      break;
1420    }
1421  }
1422  return 1;
1423}
1424
1425/* This must only be called for a well-formed start-tag or empty
1426   element tag.  Returns the number of attributes.  Pointers to the
1427   first attsMax attributes are stored in atts.
1428*/
1429
1430static int PTRCALL
1431PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
1432                int attsMax, ATTRIBUTE *atts)
1433{
1434  enum { other, inName, inValue } state = inName;
1435  int nAtts = 0;
1436  int open = 0; /* defined when state == inValue;
1437                   initialization just to shut up compilers */
1438
1439  for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1440    switch (BYTE_TYPE(enc, ptr)) {
1441#define START_NAME \
1442      if (state == other) { \
1443        if (nAtts < attsMax) { \
1444          atts[nAtts].name = ptr; \
1445          atts[nAtts].normalized = 1; \
1446        } \
1447        state = inName; \
1448      }
1449#define LEAD_CASE(n) \
1450    case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1451    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1452#undef LEAD_CASE
1453    case BT_NONASCII:
1454    case BT_NMSTRT:
1455    case BT_HEX:
1456      START_NAME
1457      break;
1458#undef START_NAME
1459    case BT_QUOT:
1460      if (state != inValue) {
1461        if (nAtts < attsMax)
1462          atts[nAtts].valuePtr = ptr + MINBPC(enc);
1463        state = inValue;
1464        open = BT_QUOT;
1465      }
1466      else if (open == BT_QUOT) {
1467        state = other;
1468        if (nAtts < attsMax)
1469          atts[nAtts].valueEnd = ptr;
1470        nAtts++;
1471      }
1472      break;
1473    case BT_APOS:
1474      if (state != inValue) {
1475        if (nAtts < attsMax)
1476          atts[nAtts].valuePtr = ptr + MINBPC(enc);
1477        state = inValue;
1478        open = BT_APOS;
1479      }
1480      else if (open == BT_APOS) {
1481        state = other;
1482        if (nAtts < attsMax)
1483          atts[nAtts].valueEnd = ptr;
1484        nAtts++;
1485      }
1486      break;
1487    case BT_AMP:
1488      if (nAtts < attsMax)
1489        atts[nAtts].normalized = 0;
1490      break;
1491    case BT_S:
1492      if (state == inName)
1493        state = other;
1494      else if (state == inValue
1495               && nAtts < attsMax
1496               && atts[nAtts].normalized
1497               && (ptr == atts[nAtts].valuePtr
1498                   || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1499                   || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1500                   || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1501        atts[nAtts].normalized = 0;
1502      break;
1503    case BT_CR: case BT_LF:
1504      /* This case ensures that the first attribute name is counted
1505         Apart from that we could just change state on the quote. */
1506      if (state == inName)
1507        state = other;
1508      else if (state == inValue && nAtts < attsMax)
1509        atts[nAtts].normalized = 0;
1510      break;
1511    case BT_GT:
1512    case BT_SOL:
1513      if (state != inValue)
1514        return nAtts;
1515      break;
1516    default:
1517      break;
1518    }
1519  }
1520  /* not reached */
1521}
1522
1523static int PTRFASTCALL
1524PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
1525{
1526  int result = 0;
1527  /* skip &# */
1528  ptr += 2*MINBPC(enc);
1529  if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1530    for (ptr += MINBPC(enc);
1531         !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
1532         ptr += MINBPC(enc)) {
1533      int c = BYTE_TO_ASCII(enc, ptr);
1534      switch (c) {
1535      case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
1536      case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
1537        result <<= 4;
1538        result |= (c - ASCII_0);
1539        break;
1540      case ASCII_A: case ASCII_B: case ASCII_C:
1541      case ASCII_D: case ASCII_E: case ASCII_F:
1542        result <<= 4;
1543        result += 10 + (c - ASCII_A);
1544        break;
1545      case ASCII_a: case ASCII_b: case ASCII_c:
1546      case ASCII_d: case ASCII_e: case ASCII_f:
1547        result <<= 4;
1548        result += 10 + (c - ASCII_a);
1549        break;
1550      }
1551      if (result >= 0x110000)
1552        return -1;
1553    }
1554  }
1555  else {
1556    for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1557      int c = BYTE_TO_ASCII(enc, ptr);
1558      result *= 10;
1559      result += (c - ASCII_0);
1560      if (result >= 0x110000)
1561        return -1;
1562    }
1563  }
1564  return checkCharRefNumber(result);
1565}
1566
1567static int PTRCALL
1568PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
1569                             const char *end)
1570{
1571  switch ((end - ptr)/MINBPC(enc)) {
1572  case 2:
1573    if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1574      switch (BYTE_TO_ASCII(enc, ptr)) {
1575      case ASCII_l:
1576        return ASCII_LT;
1577      case ASCII_g:
1578        return ASCII_GT;
1579      }
1580    }
1581    break;
1582  case 3:
1583    if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1584      ptr += MINBPC(enc);
1585      if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1586        ptr += MINBPC(enc);
1587        if (CHAR_MATCHES(enc, ptr, ASCII_p))
1588          return ASCII_AMP;
1589      }
1590    }
1591    break;
1592  case 4:
1593    switch (BYTE_TO_ASCII(enc, ptr)) {
1594    case ASCII_q:
1595      ptr += MINBPC(enc);
1596      if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1597        ptr += MINBPC(enc);
1598        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1599          ptr += MINBPC(enc);
1600          if (CHAR_MATCHES(enc, ptr, ASCII_t))
1601            return ASCII_QUOT;
1602        }
1603      }
1604      break;
1605    case ASCII_a:
1606      ptr += MINBPC(enc);
1607      if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1608        ptr += MINBPC(enc);
1609        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1610          ptr += MINBPC(enc);
1611          if (CHAR_MATCHES(enc, ptr, ASCII_s))
1612            return ASCII_APOS;
1613        }
1614      }
1615      break;
1616    }
1617  }
1618  return 0;
1619}
1620
1621static int PTRCALL
1622PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
1623{
1624  for (;;) {
1625    switch (BYTE_TYPE(enc, ptr1)) {
1626#define LEAD_CASE(n) \
1627    case BT_LEAD ## n: \
1628      if (*ptr1++ != *ptr2++) \
1629        return 0;
1630    LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1631#undef LEAD_CASE
1632      /* fall through */
1633      if (*ptr1++ != *ptr2++)
1634        return 0;
1635      break;
1636    case BT_NONASCII:
1637    case BT_NMSTRT:
1638#ifdef XML_NS
1639    case BT_COLON:
1640#endif
1641    case BT_HEX:
1642    case BT_DIGIT:
1643    case BT_NAME:
1644    case BT_MINUS:
1645      if (*ptr2++ != *ptr1++)
1646        return 0;
1647      if (MINBPC(enc) > 1) {
1648        if (*ptr2++ != *ptr1++)
1649          return 0;
1650        if (MINBPC(enc) > 2) {
1651          if (*ptr2++ != *ptr1++)
1652            return 0;
1653          if (MINBPC(enc) > 3) {
1654            if (*ptr2++ != *ptr1++)
1655              return 0;
1656          }
1657        }
1658      }
1659      break;
1660    default:
1661      if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
1662        return 1;
1663      switch (BYTE_TYPE(enc, ptr2)) {
1664      case BT_LEAD2:
1665      case BT_LEAD3:
1666      case BT_LEAD4:
1667      case BT_NONASCII:
1668      case BT_NMSTRT:
1669#ifdef XML_NS
1670      case BT_COLON:
1671#endif
1672      case BT_HEX:
1673      case BT_DIGIT:
1674      case BT_NAME:
1675      case BT_MINUS:
1676        return 0;
1677      default:
1678        return 1;
1679      }
1680    }
1681  }
1682  /* not reached */
1683}
1684
1685static int PTRCALL
1686PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
1687                         const char *end1, const char *ptr2)
1688{
1689  for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1690    if (ptr1 == end1)
1691      return 0;
1692    if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1693      return 0;
1694  }
1695  return ptr1 == end1;
1696}
1697
1698static int PTRFASTCALL
1699PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
1700{
1701  const char *start = ptr;
1702  for (;;) {
1703    switch (BYTE_TYPE(enc, ptr)) {
1704#define LEAD_CASE(n) \
1705    case BT_LEAD ## n: ptr += n; break;
1706    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1707#undef LEAD_CASE
1708    case BT_NONASCII:
1709    case BT_NMSTRT:
1710#ifdef XML_NS
1711    case BT_COLON:
1712#endif
1713    case BT_HEX:
1714    case BT_DIGIT:
1715    case BT_NAME:
1716    case BT_MINUS:
1717      ptr += MINBPC(enc);
1718      break;
1719    default:
1720      return (int)(ptr - start);
1721    }
1722  }
1723}
1724
1725static const char * PTRFASTCALL
1726PREFIX(skipS)(const ENCODING *enc, const char *ptr)
1727{
1728  for (;;) {
1729    switch (BYTE_TYPE(enc, ptr)) {
1730    case BT_LF:
1731    case BT_CR:
1732    case BT_S:
1733      ptr += MINBPC(enc);
1734      break;
1735    default:
1736      return ptr;
1737    }
1738  }
1739}
1740
1741static void PTRCALL
1742PREFIX(updatePosition)(const ENCODING *enc,
1743                       const char *ptr,
1744                       const char *end,
1745                       POSITION *pos)
1746{
1747  while (ptr < end) {
1748    switch (BYTE_TYPE(enc, ptr)) {
1749#define LEAD_CASE(n) \
1750    case BT_LEAD ## n: \
1751      ptr += n; \
1752      break;
1753    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1754#undef LEAD_CASE
1755    case BT_LF:
1756      pos->columnNumber = (XML_Size)-1;
1757      pos->lineNumber++;
1758      ptr += MINBPC(enc);
1759      break;
1760    case BT_CR:
1761      pos->lineNumber++;
1762      ptr += MINBPC(enc);
1763      if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
1764        ptr += MINBPC(enc);
1765      pos->columnNumber = (XML_Size)-1;
1766      break;
1767    default:
1768      ptr += MINBPC(enc);
1769      break;
1770    }
1771    pos->columnNumber++;
1772  }
1773}
1774
1775#undef DO_LEAD_CASE
1776#undef MULTIBYTE_CASES
1777#undef INVALID_CASES
1778#undef CHECK_NAME_CASE
1779#undef CHECK_NAME_CASES
1780#undef CHECK_NMSTRT_CASE
1781#undef CHECK_NMSTRT_CASES
1782
1783#endif /* XML_TOK_IMPL_C */
1784