1/**********************************************************************
2  regparse.c -  Oniguruma (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include "regparse.h"
33#include "st.h"
34
35#define WARN_BUFSIZE    256
36
37#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
38
39
40OnigSyntaxType OnigSyntaxRuby = {
41  (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
42     ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
43     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
44     ONIG_SYN_OP_ESC_C_CONTROL )
45   & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
46  , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
47      ONIG_SYN_OP2_OPTION_RUBY |
48      ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
49      ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
50      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
51      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
52      ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
53      ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
54      ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
55      ONIG_SYN_OP2_ESC_H_XDIGIT )
56  , ( SYN_GNU_REGEX_BV |
57      ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
58      ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
59      ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
60      ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
61      ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
62      ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
63      ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
64  , ONIG_OPTION_NONE
65  ,
66  {
67      (OnigCodePoint )'\\'                       /* esc */
68    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
69    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
70    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
71    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
72    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
73  }
74};
75
76OnigSyntaxType*  OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
77
78extern void onig_null_warn(const char* s ARG_UNUSED) { }
79
80#ifdef DEFAULT_WARN_FUNCTION
81static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
82#else
83static OnigWarnFunc onig_warn = onig_null_warn;
84#endif
85
86#ifdef DEFAULT_VERB_WARN_FUNCTION
87static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
88#else
89static OnigWarnFunc onig_verb_warn = onig_null_warn;
90#endif
91
92extern void onig_set_warn_func(OnigWarnFunc f)
93{
94  onig_warn = f;
95}
96
97extern void onig_set_verb_warn_func(OnigWarnFunc f)
98{
99  onig_verb_warn = f;
100}
101
102static void
103bbuf_free(BBuf* bbuf)
104{
105  if (IS_NOT_NULL(bbuf)) {
106    if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
107    xfree(bbuf);
108  }
109}
110
111static int
112bbuf_clone(BBuf** rto, BBuf* from)
113{
114  int r;
115  BBuf *to;
116
117  *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
118  CHECK_NULL_RETURN_MEMERR(to);
119  r = BBUF_INIT(to, from->alloc);
120  if (r != 0) return r;
121  to->used = from->used;
122  xmemcpy(to->p, from->p, from->used);
123  return 0;
124}
125
126#define BACKREF_REL_TO_ABS(rel_no, env) \
127  ((env)->num_mem + 1 + (rel_no))
128
129#define ONOFF(v,f,negative)    (negative) ? ((v) &= ~(f)) : ((v) |= (f))
130
131#define MBCODE_START_POS(enc) \
132  (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
133
134#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
135  add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
136
137#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
138  if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
139    r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
140    if (r) return r;\
141  }\
142} while (0)
143
144
145#define BITSET_IS_EMPTY(bs,empty) do {\
146  int i;\
147  empty = 1;\
148  for (i = 0; i < (int )BITSET_SIZE; i++) {\
149    if ((bs)[i] != 0) {\
150      empty = 0; break;\
151    }\
152  }\
153} while (0)
154
155static void
156bitset_set_range(BitSetRef bs, int from, int to)
157{
158  int i;
159  for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
160    BITSET_SET_BIT(bs, i);
161  }
162}
163
164#if 0
165static void
166bitset_set_all(BitSetRef bs)
167{
168  int i;
169  for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
170}
171#endif
172
173static void
174bitset_invert(BitSetRef bs)
175{
176  int i;
177  for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
178}
179
180static void
181bitset_invert_to(BitSetRef from, BitSetRef to)
182{
183  int i;
184  for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
185}
186
187static void
188bitset_and(BitSetRef dest, BitSetRef bs)
189{
190  int i;
191  for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
192}
193
194static void
195bitset_or(BitSetRef dest, BitSetRef bs)
196{
197  int i;
198  for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
199}
200
201static void
202bitset_copy(BitSetRef dest, BitSetRef bs)
203{
204  int i;
205  for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
206}
207
208extern int
209onig_strncmp(const UChar* s1, const UChar* s2, int n)
210{
211  int x;
212
213  while (n-- > 0) {
214    x = *s2++ - *s1++;
215    if (x) return x;
216  }
217  return 0;
218}
219
220extern void
221onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
222{
223  int len = (int)(end - src);
224  if (len > 0) {
225    xmemcpy(dest, src, len);
226    dest[len] = (UChar )0;
227  }
228}
229
230#ifdef USE_NAMED_GROUP
231static UChar*
232strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
233{
234  int slen, term_len, i;
235  UChar *r;
236
237  slen = (int)(end - s);
238  term_len = ONIGENC_MBC_MINLEN(enc);
239
240  r = (UChar* )xmalloc(slen + term_len);
241  CHECK_NULL_RETURN(r);
242  xmemcpy(r, s, slen);
243
244  for (i = 0; i < term_len; i++)
245    r[slen + i] = (UChar )0;
246
247  return r;
248}
249#endif
250
251/* scan pattern methods */
252#define PEND_VALUE   0
253
254#define PFETCH_READY  UChar* pfetch_prev
255#define PEND         (p < end ?  0 : 1)
256#define PUNFETCH     p = pfetch_prev
257#define PINC       do { \
258  pfetch_prev = p; \
259  p += ONIGENC_MBC_ENC_LEN(enc, p); \
260} while (0)
261#define PFETCH(c)  do { \
262  c = ONIGENC_MBC_TO_CODE(enc, p, end); \
263  pfetch_prev = p; \
264  p += ONIGENC_MBC_ENC_LEN(enc, p); \
265} while (0)
266
267#define PINC_S     do { \
268  p += ONIGENC_MBC_ENC_LEN(enc, p); \
269} while (0)
270#define PFETCH_S(c) do { \
271  c = ONIGENC_MBC_TO_CODE(enc, p, end); \
272  p += ONIGENC_MBC_ENC_LEN(enc, p); \
273} while (0)
274
275#define PPEEK        (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
276#define PPEEK_IS(c)  (PPEEK == (OnigCodePoint )c)
277
278static UChar*
279strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
280	      int capa, int oldCapa)
281{
282  UChar* r;
283
284  if (dest)
285    r = (UChar* )xrealloc(dest, capa + 1, oldCapa);
286  else
287    r = (UChar* )xmalloc(capa + 1);
288
289  CHECK_NULL_RETURN(r);
290  onig_strcpy(r + (dest_end - dest), src, src_end);
291  return r;
292}
293
294/* dest on static area */
295static UChar*
296strcat_capa_from_static(UChar* dest, UChar* dest_end,
297			const UChar* src, const UChar* src_end, int capa)
298{
299  UChar* r;
300
301  r = (UChar* )xmalloc(capa + 1);
302  CHECK_NULL_RETURN(r);
303  onig_strcpy(r, dest, dest_end);
304  onig_strcpy(r + (dest_end - dest), src, src_end);
305  return r;
306}
307
308
309#ifdef USE_ST_LIBRARY
310
311typedef struct {
312  UChar* s;
313  UChar* end;
314} st_str_end_key;
315
316static int
317str_end_cmp(st_str_end_key* x, st_str_end_key* y)
318{
319  UChar *p, *q;
320  int c;
321
322  if ((x->end - x->s) != (y->end - y->s))
323    return 1;
324
325  p = x->s;
326  q = y->s;
327  while (p < x->end) {
328    c = (int )*p - (int )*q;
329    if (c != 0) return c;
330
331    p++; q++;
332  }
333
334  return 0;
335}
336
337static int
338str_end_hash(st_str_end_key* x)
339{
340  UChar *p;
341  int val = 0;
342
343  p = x->s;
344  while (p < x->end) {
345    val = val * 997 + (int )*p++;
346  }
347
348  return val + (val >> 5);
349}
350
351extern hash_table_type*
352onig_st_init_strend_table_with_size(int size)
353{
354  static struct st_hash_type hashType = {
355    str_end_cmp,
356    str_end_hash,
357  };
358
359  return (hash_table_type* )
360           onig_st_init_table_with_size(&hashType, size);
361}
362
363extern int
364onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
365		      const UChar* end_key, hash_data_type *value)
366{
367  st_str_end_key key;
368
369  key.s   = (UChar* )str_key;
370  key.end = (UChar* )end_key;
371
372  return onig_st_lookup(table, (st_data_t )(UINTN)(&key), value);
373}
374
375extern int
376onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
377		      const UChar* end_key, hash_data_type value)
378{
379  st_str_end_key* key;
380  int result;
381
382  key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
383  CHECK_NULL_RETURN_MEMERR(key);
384  key->s   = (UChar* )str_key;
385  key->end = (UChar* )end_key;
386  result = onig_st_insert(table, (st_data_t )(UINTN)key, value);
387  if (result) {
388    xfree(key);
389  }
390  return result;
391}
392
393#endif /* USE_ST_LIBRARY */
394
395
396#ifdef USE_NAMED_GROUP
397
398#define INIT_NAME_BACKREFS_ALLOC_NUM   8
399
400typedef struct {
401  UChar* name;
402  int    name_len;   /* byte length */
403  int    back_num;   /* number of backrefs */
404  int    back_alloc;
405  int    back_ref1;
406  int*   back_refs;
407} NameEntry;
408
409#ifdef USE_ST_LIBRARY
410
411typedef st_table  NameTable;
412typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */
413
414#define NAMEBUF_SIZE    24
415#define NAMEBUF_SIZE_1  25
416
417#ifdef ONIG_DEBUG
418static int
419i_print_name_entry(UChar* key, NameEntry* e, void* arg)
420{
421  int i;
422  FILE* fp = (FILE* )arg;
423
424  fprintf(fp, "%s: ", e->name);
425  if (e->back_num == 0)
426    fputs("-", fp);
427  else if (e->back_num == 1)
428    fprintf(fp, "%d", e->back_ref1);
429  else {
430    for (i = 0; i < e->back_num; i++) {
431      if (i > 0) fprintf(fp, ", ");
432      fprintf(fp, "%d", e->back_refs[i]);
433    }
434  }
435  fputs("\n", fp);
436  return ST_CONTINUE;
437}
438
439extern int
440onig_print_names(FILE* fp, regex_t* reg)
441{
442  NameTable* t = (NameTable* )reg->name_table;
443
444  if (IS_NOT_NULL(t)) {
445    fprintf(fp, "name table\n");
446    onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
447    fputs("\n", fp);
448  }
449  return 0;
450}
451#endif /* ONIG_DEBUG */
452
453static int
454i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
455{
456  xfree(e->name);
457  if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
458  xfree(key);
459  xfree(e);
460  return ST_DELETE;
461}
462
463static int
464names_clear(regex_t* reg)
465{
466  NameTable* t = (NameTable* )reg->name_table;
467
468  if (IS_NOT_NULL(t)) {
469    onig_st_foreach(t, i_free_name_entry, 0);
470  }
471  return 0;
472}
473
474extern int
475onig_names_free(regex_t* reg)
476{
477  int r;
478  NameTable* t;
479
480  r = names_clear(reg);
481  if (r) return r;
482
483  t = (NameTable* )reg->name_table;
484  if (IS_NOT_NULL(t)) onig_st_free_table(t);
485  reg->name_table = (void* )NULL;
486  return 0;
487}
488
489static NameEntry*
490name_find(regex_t* reg, const UChar* name, const UChar* name_end)
491{
492  NameEntry* e;
493  NameTable* t = (NameTable* )reg->name_table;
494
495  e = (NameEntry* )NULL;
496  if (IS_NOT_NULL(t)) {
497    onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
498  }
499  return e;
500}
501
502typedef struct {
503  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
504  regex_t* reg;
505  void* arg;
506  int ret;
507  OnigEncoding enc;
508} INamesArg;
509
510static int
511i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
512{
513  int r = (*(arg->func))(e->name,
514                         e->name + e->name_len,
515                         e->back_num,
516			 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
517			 arg->reg, arg->arg);
518  if (r != 0) {
519    arg->ret = r;
520    return ST_STOP;
521  }
522  return ST_CONTINUE;
523}
524
525extern int
526onig_foreach_name(regex_t* reg,
527  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
528{
529  INamesArg narg;
530  NameTable* t = (NameTable* )reg->name_table;
531
532  narg.ret = 0;
533  if (IS_NOT_NULL(t)) {
534    narg.func = func;
535    narg.reg  = reg;
536    narg.arg  = arg;
537    narg.enc  = reg->enc; /* should be pattern encoding. */
538    onig_st_foreach(t, i_names, (HashDataType )(UINTN)&narg);
539  }
540  return narg.ret;
541}
542
543static int
544i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
545{
546  int i;
547
548  if (e->back_num > 1) {
549    for (i = 0; i < e->back_num; i++) {
550      e->back_refs[i] = map[e->back_refs[i]].new_val;
551    }
552  }
553  else if (e->back_num == 1) {
554    e->back_ref1 = map[e->back_ref1].new_val;
555  }
556
557  return ST_CONTINUE;
558}
559
560extern int
561onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
562{
563  NameTable* t = (NameTable* )reg->name_table;
564
565  if (IS_NOT_NULL(t)) {
566    onig_st_foreach(t, i_renumber_name, (HashDataType )(UINTN)map);
567  }
568  return 0;
569}
570
571
572extern int
573onig_number_of_names(regex_t* reg)
574{
575  NameTable* t = (NameTable* )reg->name_table;
576
577  if (IS_NOT_NULL(t))
578    return t->num_entries;
579  else
580    return 0;
581}
582
583#else  /* USE_ST_LIBRARY */
584
585#define INIT_NAMES_ALLOC_NUM    8
586
587typedef struct {
588  NameEntry* e;
589  int        num;
590  int        alloc;
591} NameTable;
592
593#ifdef ONIG_DEBUG
594extern int
595onig_print_names(FILE* fp, regex_t* reg)
596{
597  int i, j;
598  NameEntry* e;
599  NameTable* t = (NameTable* )reg->name_table;
600
601  if (IS_NOT_NULL(t) && t->num > 0) {
602    fprintf(fp, "name table\n");
603    for (i = 0; i < t->num; i++) {
604      e = &(t->e[i]);
605      fprintf(fp, "%s: ", e->name);
606      if (e->back_num == 0) {
607	fputs("-", fp);
608      }
609      else if (e->back_num == 1) {
610	fprintf(fp, "%d", e->back_ref1);
611      }
612      else {
613	for (j = 0; j < e->back_num; j++) {
614	  if (j > 0) fprintf(fp, ", ");
615	  fprintf(fp, "%d", e->back_refs[j]);
616	}
617      }
618      fputs("\n", fp);
619    }
620    fputs("\n", fp);
621  }
622  return 0;
623}
624#endif
625
626static int
627names_clear(regex_t* reg)
628{
629  int i;
630  NameEntry* e;
631  NameTable* t = (NameTable* )reg->name_table;
632
633  if (IS_NOT_NULL(t)) {
634    for (i = 0; i < t->num; i++) {
635      e = &(t->e[i]);
636      if (IS_NOT_NULL(e->name)) {
637	xfree(e->name);
638	e->name       = NULL;
639	e->name_len   = 0;
640	e->back_num   = 0;
641	e->back_alloc = 0;
642	if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
643	e->back_refs = (int* )NULL;
644      }
645    }
646    if (IS_NOT_NULL(t->e)) {
647      xfree(t->e);
648      t->e = NULL;
649    }
650    t->num = 0;
651  }
652  return 0;
653}
654
655extern int
656onig_names_free(regex_t* reg)
657{
658  int r;
659  NameTable* t;
660
661  r = names_clear(reg);
662  if (r) return r;
663
664  t = (NameTable* )reg->name_table;
665  if (IS_NOT_NULL(t)) xfree(t);
666  reg->name_table = NULL;
667  return 0;
668}
669
670static NameEntry*
671name_find(regex_t* reg, UChar* name, UChar* name_end)
672{
673  int i, len;
674  NameEntry* e;
675  NameTable* t = (NameTable* )reg->name_table;
676
677  if (IS_NOT_NULL(t)) {
678    len = name_end - name;
679    for (i = 0; i < t->num; i++) {
680      e = &(t->e[i]);
681      if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
682	return e;
683    }
684  }
685  return (NameEntry* )NULL;
686}
687
688extern int
689onig_foreach_name(regex_t* reg,
690  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
691{
692  int i, r;
693  NameEntry* e;
694  NameTable* t = (NameTable* )reg->name_table;
695
696  if (IS_NOT_NULL(t)) {
697    for (i = 0; i < t->num; i++) {
698      e = &(t->e[i]);
699      r = (*func)(e->name, e->name + e->name_len, e->back_num,
700		  (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
701		  reg, arg);
702      if (r != 0) return r;
703    }
704  }
705  return 0;
706}
707
708extern int
709onig_number_of_names(regex_t* reg)
710{
711  NameTable* t = (NameTable* )reg->name_table;
712
713  if (IS_NOT_NULL(t))
714    return t->num;
715  else
716    return 0;
717}
718
719#endif /* else USE_ST_LIBRARY */
720
721static int
722name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
723{
724  int alloc;
725  NameEntry* e;
726  NameTable* t = (NameTable* )reg->name_table;
727
728  if (name_end - name <= 0)
729    return ONIGERR_EMPTY_GROUP_NAME;
730
731  e = name_find(reg, name, name_end);
732  if (IS_NULL(e)) {
733#ifdef USE_ST_LIBRARY
734    if (IS_NULL(t)) {
735      t = onig_st_init_strend_table_with_size(5);
736      CHECK_NULL_RETURN_MEMERR(t);
737      reg->name_table = (void* )t;
738    }
739    e = (NameEntry* )xmalloc(sizeof(NameEntry));
740    CHECK_NULL_RETURN_MEMERR(e);
741
742    e->name = strdup_with_null(reg->enc, name, name_end);
743    if (IS_NULL(e->name)) {
744      xfree(e);  return ONIGERR_MEMORY;
745    }
746    onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
747                          (HashDataType )(UINTN)e);
748
749    e->name_len   = (int)(name_end - name);
750    e->back_num   = 0;
751    e->back_alloc = 0;
752    e->back_refs  = (int* )NULL;
753
754#else
755
756    if (IS_NULL(t)) {
757      alloc = INIT_NAMES_ALLOC_NUM;
758      t = (NameTable* )xmalloc(sizeof(NameTable));
759      CHECK_NULL_RETURN_MEMERR(t);
760      t->e     = NULL;
761      t->alloc = 0;
762      t->num   = 0;
763
764      t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
765      if (IS_NULL(t->e)) {
766	xfree(t);
767	return ONIGERR_MEMORY;
768      }
769      t->alloc = alloc;
770      reg->name_table = t;
771      goto clear;
772    }
773    else if (t->num == t->alloc) {
774      int i;
775
776      alloc = t->alloc * 2;
777      t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
778      CHECK_NULL_RETURN_MEMERR(t->e);
779      t->alloc = alloc;
780
781    clear:
782      for (i = t->num; i < t->alloc; i++) {
783	t->e[i].name       = NULL;
784	t->e[i].name_len   = 0;
785	t->e[i].back_num   = 0;
786	t->e[i].back_alloc = 0;
787	t->e[i].back_refs  = (int* )NULL;
788      }
789    }
790    e = &(t->e[t->num]);
791    t->num++;
792    e->name = strdup_with_null(reg->enc, name, name_end);
793    if (IS_NULL(e->name)) return ONIGERR_MEMORY;
794    e->name_len = name_end - name;
795#endif
796  }
797
798  if (e->back_num >= 1 &&
799      ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
800    onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
801				    name, name_end);
802    return ONIGERR_MULTIPLEX_DEFINED_NAME;
803  }
804
805  e->back_num++;
806  if (e->back_num == 1) {
807    e->back_ref1 = backref;
808  }
809  else {
810    if (e->back_num == 2) {
811      alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
812      e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
813      CHECK_NULL_RETURN_MEMERR(e->back_refs);
814      e->back_alloc = alloc;
815      e->back_refs[0] = e->back_ref1;
816      e->back_refs[1] = backref;
817    }
818    else {
819      if (e->back_num > e->back_alloc) {
820	alloc = e->back_alloc * 2;
821	e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);
822	CHECK_NULL_RETURN_MEMERR(e->back_refs);
823	e->back_alloc = alloc;
824      }
825      e->back_refs[e->back_num - 1] = backref;
826    }
827  }
828
829  return 0;
830}
831
832extern int
833onig_name_to_group_numbers(regex_t* reg, const UChar* name,
834			   const UChar* name_end, int** nums)
835{
836  NameEntry* e = name_find(reg, name, name_end);
837
838  if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
839
840  switch (e->back_num) {
841  case 0:
842    break;
843  case 1:
844    *nums = &(e->back_ref1);
845    break;
846  default:
847    *nums = e->back_refs;
848    break;
849  }
850  return e->back_num;
851}
852
853extern int
854onig_name_to_backref_number(regex_t* reg, const UChar* name,
855			    const UChar* name_end, OnigRegion *region)
856{
857  int i, n, *nums;
858
859  n = onig_name_to_group_numbers(reg, name, name_end, &nums);
860  if (n < 0)
861    return n;
862  else if (n == 0)
863    return ONIGERR_PARSER_BUG;
864  else if (n == 1)
865    return nums[0];
866  else {
867    if (IS_NOT_NULL(region)) {
868      for (i = n - 1; i >= 0; i--) {
869	if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
870	  return nums[i];
871      }
872    }
873    return nums[n - 1];
874  }
875}
876
877#else /* USE_NAMED_GROUP */
878
879extern int
880onig_name_to_group_numbers(regex_t* reg, const UChar* name,
881			   const UChar* name_end, int** nums)
882{
883  return ONIG_NO_SUPPORT_CONFIG;
884}
885
886extern int
887onig_name_to_backref_number(regex_t* reg, const UChar* name,
888			    const UChar* name_end, OnigRegion* region)
889{
890  return ONIG_NO_SUPPORT_CONFIG;
891}
892
893extern int
894onig_foreach_name(regex_t* reg,
895  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
896{
897  return ONIG_NO_SUPPORT_CONFIG;
898}
899
900extern int
901onig_number_of_names(regex_t* reg)
902{
903  return 0;
904}
905#endif /* else USE_NAMED_GROUP */
906
907extern int
908onig_noname_group_capture_is_active(regex_t* reg)
909{
910  if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
911    return 0;
912
913#ifdef USE_NAMED_GROUP
914  if (onig_number_of_names(reg) > 0 &&
915      IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
916      !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
917    return 0;
918  }
919#endif
920
921  return 1;
922}
923
924
925#define INIT_SCANENV_MEMNODES_ALLOC_SIZE   16
926
927static void
928scan_env_clear(ScanEnv* env)
929{
930  int i;
931
932  BIT_STATUS_CLEAR(env->capture_history);
933  BIT_STATUS_CLEAR(env->bt_mem_start);
934  BIT_STATUS_CLEAR(env->bt_mem_end);
935  BIT_STATUS_CLEAR(env->backrefed_mem);
936  env->error      = (UChar* )NULL;
937  env->error_end  = (UChar* )NULL;
938  env->num_call   = 0;
939  env->num_mem    = 0;
940#ifdef USE_NAMED_GROUP
941  env->num_named  = 0;
942#endif
943  env->mem_alloc         = 0;
944  env->mem_nodes_dynamic = (Node** )NULL;
945
946  for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
947    env->mem_nodes_static[i] = NULL_NODE;
948
949#ifdef USE_COMBINATION_EXPLOSION_CHECK
950  env->num_comb_exp_check  = 0;
951  env->comb_exp_max_regnum = 0;
952  env->curr_max_regnum     = 0;
953  env->has_recursion       = 0;
954#endif
955}
956
957static int
958scan_env_add_mem_entry(ScanEnv* env)
959{
960  int i, need, alloc;
961  Node** p;
962
963  need = env->num_mem + 1;
964  if (need >= SCANENV_MEMNODES_SIZE) {
965    if (env->mem_alloc <= need) {
966      if (IS_NULL(env->mem_nodes_dynamic)) {
967	alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;
968	p = (Node** )xmalloc(sizeof(Node*) * alloc);
969    CHECK_NULL_RETURN_MEMERR(p);
970
971	xmemcpy(p, env->mem_nodes_static,
972		sizeof(Node*) * SCANENV_MEMNODES_SIZE);
973      }
974      else {
975	alloc = env->mem_alloc * 2;
976	p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc, sizeof(Node*) * env->mem_alloc);
977      }
978      CHECK_NULL_RETURN_MEMERR(p);
979
980      for (i = env->num_mem + 1; i < alloc; i++)
981	p[i] = NULL_NODE;
982
983      env->mem_nodes_dynamic = p;
984      env->mem_alloc = alloc;
985    }
986  }
987
988  env->num_mem++;
989  return env->num_mem;
990}
991
992static int
993scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
994{
995  if (env->num_mem >= num)
996    SCANENV_MEM_NODES(env)[num] = node;
997  else
998    return ONIGERR_PARSER_BUG;
999  return 0;
1000}
1001
1002
1003#ifdef USE_PARSE_TREE_NODE_RECYCLE
1004typedef struct _FreeNode {
1005  struct _FreeNode* next;
1006} FreeNode;
1007
1008static FreeNode* FreeNodeList = (FreeNode* )NULL;
1009#endif
1010
1011extern void
1012onig_node_free(Node* node)
1013{
1014 start:
1015  if (IS_NULL(node)) return ;
1016
1017  switch (NTYPE(node)) {
1018  case NT_STR:
1019    if (NSTR(node)->capa != 0 &&
1020	IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
1021      xfree(NSTR(node)->s);
1022    }
1023    break;
1024
1025  case NT_LIST:
1026  case NT_ALT:
1027    onig_node_free(NCAR(node));
1028    {
1029      Node* next_node = NCDR(node);
1030
1031#ifdef USE_PARSE_TREE_NODE_RECYCLE
1032      {
1033	FreeNode* n = (FreeNode* )node;
1034
1035        THREAD_ATOMIC_START;
1036	n->next = FreeNodeList;
1037	FreeNodeList = n;
1038        THREAD_ATOMIC_END;
1039      }
1040#else
1041      xfree(node);
1042#endif
1043      node = next_node;
1044      goto start;
1045    }
1046    break;
1047
1048  case NT_CCLASS:
1049    {
1050      CClassNode* cc = NCCLASS(node);
1051
1052      if (IS_NCCLASS_SHARE(cc)) return ;
1053      if (cc->mbuf)
1054        bbuf_free(cc->mbuf);
1055    }
1056    break;
1057
1058  case NT_QTFR:
1059    if (NQTFR(node)->target)
1060      onig_node_free(NQTFR(node)->target);
1061    break;
1062
1063  case NT_ENCLOSE:
1064    if (NENCLOSE(node)->target)
1065      onig_node_free(NENCLOSE(node)->target);
1066    break;
1067
1068  case NT_BREF:
1069    if (IS_NOT_NULL(NBREF(node)->back_dynamic))
1070      xfree(NBREF(node)->back_dynamic);
1071    break;
1072
1073  case NT_ANCHOR:
1074    if (NANCHOR(node)->target)
1075      onig_node_free(NANCHOR(node)->target);
1076    break;
1077  }
1078
1079#ifdef USE_PARSE_TREE_NODE_RECYCLE
1080  {
1081    FreeNode* n = (FreeNode* )node;
1082
1083    THREAD_ATOMIC_START;
1084    n->next = FreeNodeList;
1085    FreeNodeList = n;
1086    THREAD_ATOMIC_END;
1087  }
1088#else
1089  xfree(node);
1090#endif
1091}
1092
1093#ifdef USE_PARSE_TREE_NODE_RECYCLE
1094extern int
1095onig_free_node_list(void)
1096{
1097  FreeNode* n;
1098
1099  /* THREAD_ATOMIC_START; */
1100  while (IS_NOT_NULL(FreeNodeList)) {
1101    n = FreeNodeList;
1102    FreeNodeList = FreeNodeList->next;
1103    xfree(n);
1104  }
1105  /* THREAD_ATOMIC_END; */
1106  return 0;
1107}
1108#endif
1109
1110static Node*
1111node_new(void)
1112{
1113  Node* node;
1114
1115#ifdef USE_PARSE_TREE_NODE_RECYCLE
1116  THREAD_ATOMIC_START;
1117  if (IS_NOT_NULL(FreeNodeList)) {
1118    node = (Node* )FreeNodeList;
1119    FreeNodeList = FreeNodeList->next;
1120    THREAD_ATOMIC_END;
1121    return node;
1122  }
1123  THREAD_ATOMIC_END;
1124#endif
1125
1126  node = (Node* )xmalloc(sizeof(Node));
1127  /* xmemset(node, 0, sizeof(Node)); */
1128  return node;
1129}
1130
1131
1132static void
1133initialize_cclass(CClassNode* cc)
1134{
1135  BITSET_CLEAR(cc->bs);
1136  /* cc->base.flags = 0; */
1137  cc->flags = 0;
1138  cc->mbuf  = NULL;
1139}
1140
1141static Node*
1142node_new_cclass(void)
1143{
1144  Node* node = node_new();
1145  CHECK_NULL_RETURN(node);
1146
1147  SET_NTYPE(node, NT_CCLASS);
1148  initialize_cclass(NCCLASS(node));
1149  return node;
1150}
1151
1152static Node*
1153node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
1154				   const OnigCodePoint ranges[])
1155{
1156  int n, i;
1157  CClassNode* cc;
1158  OnigCodePoint j;
1159
1160  Node* node = node_new_cclass();
1161  CHECK_NULL_RETURN(node);
1162
1163  cc = NCCLASS(node);
1164  if (not != 0) NCCLASS_SET_NOT(cc);
1165
1166  BITSET_CLEAR(cc->bs);
1167  if (sb_out > 0 && IS_NOT_NULL(ranges)) {
1168    n = ONIGENC_CODE_RANGE_NUM(ranges);
1169    for (i = 0; i < n; i++) {
1170      for (j  = ONIGENC_CODE_RANGE_FROM(ranges, i);
1171           j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
1172	if (j >= sb_out) goto sb_end;
1173
1174        BITSET_SET_BIT(cc->bs, j);
1175      }
1176    }
1177  }
1178
1179 sb_end:
1180  if (IS_NULL(ranges)) {
1181  is_null:
1182    cc->mbuf = NULL;
1183  }
1184  else {
1185    BBuf* bbuf;
1186
1187    n = ONIGENC_CODE_RANGE_NUM(ranges);
1188    if (n == 0) goto is_null;
1189
1190    bbuf = (BBuf* )xmalloc(sizeof(BBuf));
1191    CHECK_NULL_RETURN(bbuf);
1192    bbuf->alloc = n + 1;
1193    bbuf->used  = n + 1;
1194    bbuf->p     = (UChar* )((void* )ranges);
1195
1196    cc->mbuf = bbuf;
1197  }
1198
1199  return node;
1200}
1201
1202static Node*
1203node_new_ctype(int type, int not)
1204{
1205  Node* node = node_new();
1206  CHECK_NULL_RETURN(node);
1207
1208  SET_NTYPE(node, NT_CTYPE);
1209  NCTYPE(node)->ctype = type;
1210  NCTYPE(node)->not   = not;
1211  return node;
1212}
1213
1214static Node*
1215node_new_anychar(void)
1216{
1217  Node* node = node_new();
1218  CHECK_NULL_RETURN(node);
1219
1220  SET_NTYPE(node, NT_CANY);
1221  return node;
1222}
1223
1224static Node*
1225node_new_list(Node* left, Node* right)
1226{
1227  Node* node = node_new();
1228  CHECK_NULL_RETURN(node);
1229
1230  SET_NTYPE(node, NT_LIST);
1231  NCAR(node)  = left;
1232  NCDR(node) = right;
1233  return node;
1234}
1235
1236extern Node*
1237onig_node_new_list(Node* left, Node* right)
1238{
1239  return node_new_list(left, right);
1240}
1241
1242extern Node*
1243onig_node_list_add(Node* list, Node* x)
1244{
1245  Node *n;
1246
1247  n = onig_node_new_list(x, NULL);
1248  if (IS_NULL(n)) return NULL_NODE;
1249
1250  if (IS_NOT_NULL(list)) {
1251    while (IS_NOT_NULL(NCDR(list)))
1252      list = NCDR(list);
1253
1254    NCDR(list) = n;
1255  }
1256
1257  return n;
1258}
1259
1260extern Node*
1261onig_node_new_alt(Node* left, Node* right)
1262{
1263  Node* node = node_new();
1264  CHECK_NULL_RETURN(node);
1265
1266  SET_NTYPE(node, NT_ALT);
1267  NCAR(node)  = left;
1268  NCDR(node) = right;
1269  return node;
1270}
1271
1272extern Node*
1273onig_node_new_anchor(int type)
1274{
1275  Node* node = node_new();
1276  CHECK_NULL_RETURN(node);
1277
1278  SET_NTYPE(node, NT_ANCHOR);
1279  NANCHOR(node)->type     = type;
1280  NANCHOR(node)->target   = NULL;
1281  NANCHOR(node)->char_len = -1;
1282  return node;
1283}
1284
1285static Node*
1286node_new_backref(int back_num, int* backrefs, int by_name,
1287#ifdef USE_BACKREF_WITH_LEVEL
1288		 int exist_level, int nest_level,
1289#endif
1290		 ScanEnv* env)
1291{
1292  int i;
1293  Node* node = node_new();
1294
1295  CHECK_NULL_RETURN(node);
1296
1297  SET_NTYPE(node, NT_BREF);
1298  NBREF(node)->state    = 0;
1299  NBREF(node)->back_num = back_num;
1300  NBREF(node)->back_dynamic = (int* )NULL;
1301  if (by_name != 0)
1302    NBREF(node)->state |= NST_NAME_REF;
1303
1304#ifdef USE_BACKREF_WITH_LEVEL
1305  if (exist_level != 0) {
1306    NBREF(node)->state |= NST_NEST_LEVEL;
1307    NBREF(node)->nest_level  = nest_level;
1308  }
1309#endif
1310
1311  for (i = 0; i < back_num; i++) {
1312    if (backrefs[i] <= env->num_mem &&
1313	IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
1314      NBREF(node)->state |= NST_RECURSION;   /* /...(\1).../ */
1315      break;
1316    }
1317  }
1318
1319  if (back_num <= NODE_BACKREFS_SIZE) {
1320    for (i = 0; i < back_num; i++)
1321      NBREF(node)->back_static[i] = backrefs[i];
1322  }
1323  else {
1324    int* p = (int* )xmalloc(sizeof(int) * back_num);
1325    if (IS_NULL(p)) {
1326      onig_node_free(node);
1327      return NULL;
1328    }
1329    NBREF(node)->back_dynamic = p;
1330    for (i = 0; i < back_num; i++)
1331      p[i] = backrefs[i];
1332  }
1333  return node;
1334}
1335
1336#ifdef USE_SUBEXP_CALL
1337static Node*
1338node_new_call(UChar* name, UChar* name_end, int gnum)
1339{
1340  Node* node = node_new();
1341  CHECK_NULL_RETURN(node);
1342
1343  SET_NTYPE(node, NT_CALL);
1344  NCALL(node)->state     = 0;
1345  NCALL(node)->target    = NULL_NODE;
1346  NCALL(node)->name      = name;
1347  NCALL(node)->name_end  = name_end;
1348  NCALL(node)->group_num = gnum;  /* call by number if gnum != 0 */
1349  return node;
1350}
1351#endif
1352
1353static Node*
1354node_new_quantifier(int lower, int upper, int by_number)
1355{
1356  Node* node = node_new();
1357  CHECK_NULL_RETURN(node);
1358
1359  SET_NTYPE(node, NT_QTFR);
1360  NQTFR(node)->state  = 0;
1361  NQTFR(node)->target = NULL;
1362  NQTFR(node)->lower  = lower;
1363  NQTFR(node)->upper  = upper;
1364  NQTFR(node)->greedy = 1;
1365  NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
1366  NQTFR(node)->head_exact        = NULL_NODE;
1367  NQTFR(node)->next_head_exact   = NULL_NODE;
1368  NQTFR(node)->is_refered        = 0;
1369  if (by_number != 0)
1370    NQTFR(node)->state |= NST_BY_NUMBER;
1371
1372#ifdef USE_COMBINATION_EXPLOSION_CHECK
1373  NQTFR(node)->comb_exp_check_num = 0;
1374#endif
1375
1376  return node;
1377}
1378
1379static Node*
1380node_new_enclose(int type)
1381{
1382  Node* node = node_new();
1383  CHECK_NULL_RETURN(node);
1384
1385  SET_NTYPE(node, NT_ENCLOSE);
1386  NENCLOSE(node)->type      = type;
1387  NENCLOSE(node)->state     =  0;
1388  NENCLOSE(node)->regnum    =  0;
1389  NENCLOSE(node)->option    =  0;
1390  NENCLOSE(node)->target    = NULL;
1391  NENCLOSE(node)->call_addr = -1;
1392  NENCLOSE(node)->opt_count =  0;
1393  return node;
1394}
1395
1396extern Node*
1397onig_node_new_enclose(int type)
1398{
1399  return node_new_enclose(type);
1400}
1401
1402static Node*
1403node_new_enclose_memory(OnigOptionType option, int is_named)
1404{
1405  Node* node = node_new_enclose(ENCLOSE_MEMORY);
1406  CHECK_NULL_RETURN(node);
1407  if (is_named != 0)
1408    SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP);
1409
1410#ifdef USE_SUBEXP_CALL
1411  NENCLOSE(node)->option = option;
1412#endif
1413  return node;
1414}
1415
1416static Node*
1417node_new_option(OnigOptionType option)
1418{
1419  Node* node = node_new_enclose(ENCLOSE_OPTION);
1420  CHECK_NULL_RETURN(node);
1421  NENCLOSE(node)->option = option;
1422  return node;
1423}
1424
1425extern int
1426onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
1427{
1428  int addlen = (int)(end - s);
1429
1430  if (addlen > 0) {
1431    int len  = (int)(NSTR(node)->end - NSTR(node)->s);
1432
1433    if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
1434      UChar* p;
1435      int capa = len + addlen + NODE_STR_MARGIN;
1436
1437      if (capa <= NSTR(node)->capa) {
1438	onig_strcpy(NSTR(node)->s + len, s, end);
1439      }
1440      else {
1441	if (NSTR(node)->s == NSTR(node)->buf)
1442	  p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
1443				      s, end, capa);
1444	else
1445	  p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa, NSTR(node)->capa);
1446
1447	CHECK_NULL_RETURN_MEMERR(p);
1448	NSTR(node)->s    = p;
1449	NSTR(node)->capa = capa;
1450      }
1451    }
1452    else {
1453      onig_strcpy(NSTR(node)->s + len, s, end);
1454    }
1455    NSTR(node)->end = NSTR(node)->s + len + addlen;
1456  }
1457
1458  return 0;
1459}
1460
1461extern int
1462onig_node_str_set(Node* node, const UChar* s, const UChar* end)
1463{
1464  onig_node_str_clear(node);
1465  return onig_node_str_cat(node, s, end);
1466}
1467
1468static int
1469node_str_cat_char(Node* node, UChar c)
1470{
1471  UChar s[1];
1472
1473  s[0] = c;
1474  return onig_node_str_cat(node, s, s + 1);
1475}
1476
1477extern void
1478onig_node_conv_to_str_node(Node* node, int flag)
1479{
1480  SET_NTYPE(node, NT_STR);
1481  NSTR(node)->flag = flag;
1482  NSTR(node)->capa = 0;
1483  NSTR(node)->s    = NSTR(node)->buf;
1484  NSTR(node)->end  = NSTR(node)->buf;
1485}
1486
1487extern void
1488onig_node_str_clear(Node* node)
1489{
1490  if (NSTR(node)->capa != 0 &&
1491      IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
1492    xfree(NSTR(node)->s);
1493  }
1494
1495  NSTR(node)->capa = 0;
1496  NSTR(node)->flag = 0;
1497  NSTR(node)->s    = NSTR(node)->buf;
1498  NSTR(node)->end  = NSTR(node)->buf;
1499}
1500
1501static Node*
1502node_new_str(const UChar* s, const UChar* end)
1503{
1504  Node* node = node_new();
1505  CHECK_NULL_RETURN(node);
1506
1507  SET_NTYPE(node, NT_STR);
1508  NSTR(node)->capa = 0;
1509  NSTR(node)->flag = 0;
1510  NSTR(node)->s    = NSTR(node)->buf;
1511  NSTR(node)->end  = NSTR(node)->buf;
1512  if (onig_node_str_cat(node, s, end)) {
1513    onig_node_free(node);
1514    return NULL;
1515  }
1516  return node;
1517}
1518
1519extern Node*
1520onig_node_new_str(const UChar* s, const UChar* end)
1521{
1522  return node_new_str(s, end);
1523}
1524
1525static Node*
1526node_new_str_raw(UChar* s, UChar* end)
1527{
1528  Node* node = node_new_str(s, end);
1529  CHECK_NULL_RETURN(node);
1530  NSTRING_SET_RAW(node);
1531  return node;
1532}
1533
1534static Node*
1535node_new_empty(void)
1536{
1537  return node_new_str(NULL, NULL);
1538}
1539
1540static Node*
1541node_new_str_raw_char(UChar c)
1542{
1543  UChar p[1];
1544
1545  p[0] = c;
1546  return node_new_str_raw(p, p + 1);
1547}
1548
1549static Node*
1550str_node_split_last_char(StrNode* sn, OnigEncoding enc)
1551{
1552  const UChar *p;
1553  Node* n = NULL_NODE;
1554
1555  if (sn->end > sn->s) {
1556    p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
1557    if (p && p > sn->s) { /* can be splitted. */
1558      n = node_new_str(p, sn->end);
1559      CHECK_NULL_RETURN(n);
1560      if ((sn->flag & NSTR_RAW) != 0)
1561	NSTRING_SET_RAW(n);
1562      sn->end = (UChar* )p;
1563    }
1564  }
1565  return n;
1566}
1567
1568static int
1569str_node_can_be_split(StrNode* sn, OnigEncoding enc)
1570{
1571  if (sn->end > sn->s) {
1572    return ((enclen(enc, sn->s) < sn->end - sn->s)  ?  1 : 0);
1573  }
1574  return 0;
1575}
1576
1577#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
1578static int
1579node_str_head_pad(StrNode* sn, int num, UChar val)
1580{
1581  UChar buf[NODE_STR_BUF_SIZE];
1582  int i, len;
1583
1584  len = sn->end - sn->s;
1585  onig_strcpy(buf, sn->s, sn->end);
1586  onig_strcpy(&(sn->s[num]), buf, buf + len);
1587  sn->end += num;
1588
1589  for (i = 0; i < num; i++) {
1590    sn->s[i] = val;
1591  }
1592}
1593#endif
1594
1595extern int
1596onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
1597{
1598  unsigned int num, val;
1599  OnigCodePoint c;
1600  UChar* p = *src;
1601  PFETCH_READY;
1602
1603  num = 0;
1604  while (!PEND) {
1605    PFETCH(c);
1606    if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
1607      val = (unsigned int )DIGITVAL(c);
1608      if ((INT_MAX_LIMIT - val) / 10UL < num)
1609	return -1;  /* overflow */
1610
1611      num = num * 10 + val;
1612    }
1613    else {
1614      PUNFETCH;
1615      break;
1616    }
1617  }
1618  *src = p;
1619  return num;
1620}
1621
1622static int
1623scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
1624				 OnigEncoding enc)
1625{
1626  OnigCodePoint c;
1627  unsigned int num, val;
1628  UChar* p = *src;
1629  PFETCH_READY;
1630
1631  num = 0;
1632  while (!PEND && maxlen-- != 0) {
1633    PFETCH(c);
1634    if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
1635      val = (unsigned int )XDIGITVAL(enc,c);
1636      if ((INT_MAX_LIMIT - val) / 16UL < num)
1637	return -1;  /* overflow */
1638
1639      num = (num << 4) + XDIGITVAL(enc,c);
1640    }
1641    else {
1642      PUNFETCH;
1643      break;
1644    }
1645  }
1646  *src = p;
1647  return num;
1648}
1649
1650static int
1651scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
1652			   OnigEncoding enc)
1653{
1654  OnigCodePoint c;
1655  unsigned int num, val;
1656  UChar* p = *src;
1657  PFETCH_READY;
1658
1659  num = 0;
1660  while (!PEND && maxlen-- != 0) {
1661    PFETCH(c);
1662    if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
1663      val = ODIGITVAL(c);
1664      if ((INT_MAX_LIMIT - val) / 8UL < num)
1665	return -1;  /* overflow */
1666
1667      num = (num << 3) + val;
1668    }
1669    else {
1670      PUNFETCH;
1671      break;
1672    }
1673  }
1674  *src = p;
1675  return num;
1676}
1677
1678
1679#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
1680    BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
1681
1682/* data format:
1683     [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
1684     (all data size is OnigCodePoint)
1685 */
1686static int
1687new_code_range(BBuf** pbuf)
1688{
1689#define INIT_MULTI_BYTE_RANGE_SIZE  (SIZE_CODE_POINT * 5)
1690  int r;
1691  OnigCodePoint n;
1692  BBuf* bbuf;
1693
1694  bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
1695  CHECK_NULL_RETURN_MEMERR(*pbuf);
1696  r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
1697  if (r) return r;
1698
1699  n = 0;
1700  BBUF_WRITE_CODE_POINT(bbuf, 0, n);
1701  return 0;
1702}
1703
1704static int
1705add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
1706{
1707  int r, inc_n, pos;
1708  int low, high, bound, x;
1709  OnigCodePoint n, *data;
1710  BBuf* bbuf;
1711
1712  if (from > to) {
1713    n = from; from = to; to = n;
1714  }
1715
1716  if (IS_NULL(*pbuf)) {
1717    r = new_code_range(pbuf);
1718    if (r) return r;
1719    bbuf = *pbuf;
1720    n = 0;
1721  }
1722  else {
1723    bbuf = *pbuf;
1724    GET_CODE_POINT(n, bbuf->p);
1725  }
1726  data = (OnigCodePoint* )(bbuf->p);
1727  data++;
1728
1729  for (low = 0, bound = n; low < bound; ) {
1730    x = (low + bound) >> 1;
1731    if (from > data[x*2 + 1])
1732      low = x + 1;
1733    else
1734      bound = x;
1735  }
1736
1737  for (high = low, bound = n; high < bound; ) {
1738    x = (high + bound) >> 1;
1739    if (to >= data[x*2] - 1)
1740      high = x + 1;
1741    else
1742      bound = x;
1743  }
1744
1745  inc_n = low + 1 - high;
1746  if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
1747    return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
1748
1749  if (inc_n != 1) {
1750    if (from > data[low*2])
1751      from = data[low*2];
1752    if (to < data[(high - 1)*2 + 1])
1753      to = data[(high - 1)*2 + 1];
1754  }
1755
1756  if (inc_n != 0 && (OnigCodePoint )high < n) {
1757    int from_pos = SIZE_CODE_POINT * (1 + high * 2);
1758    int to_pos   = SIZE_CODE_POINT * (1 + (low + 1) * 2);
1759    int size = (n - high) * 2 * SIZE_CODE_POINT;
1760
1761    if (inc_n > 0) {
1762      BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
1763    }
1764    else {
1765      BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
1766    }
1767  }
1768
1769  pos = SIZE_CODE_POINT * (1 + low * 2);
1770  BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
1771  BBUF_WRITE_CODE_POINT(bbuf, pos, from);
1772  BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
1773  n += inc_n;
1774  BBUF_WRITE_CODE_POINT(bbuf, 0, n);
1775
1776  return 0;
1777}
1778
1779static int
1780add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
1781{
1782  if (from > to) {
1783    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
1784      return 0;
1785    else
1786      return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
1787  }
1788
1789  return add_code_range_to_buf(pbuf, from, to);
1790}
1791
1792static int
1793not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
1794{
1795  int r, i, n;
1796  OnigCodePoint pre, from, *data, to = 0;
1797
1798  *pbuf = (BBuf* )NULL;
1799  if (IS_NULL(bbuf)) {
1800  set_all:
1801    return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1802  }
1803
1804  data = (OnigCodePoint* )(bbuf->p);
1805  GET_CODE_POINT(n, data);
1806  data++;
1807  if (n <= 0) goto set_all;
1808
1809  r = 0;
1810  pre = MBCODE_START_POS(enc);
1811  for (i = 0; i < n; i++) {
1812    from = data[i*2];
1813    to   = data[i*2+1];
1814    if (pre <= from - 1) {
1815      r = add_code_range_to_buf(pbuf, pre, from - 1);
1816      if (r != 0) return r;
1817    }
1818    if (to == ~((OnigCodePoint )0)) break;
1819    pre = to + 1;
1820  }
1821  if (to < ~((OnigCodePoint )0)) {
1822    r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
1823  }
1824  return r;
1825}
1826
1827#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
1828  BBuf *tbuf; \
1829  int  tnot; \
1830  tnot = not1;  not1  = not2;  not2  = tnot; \
1831  tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
1832} while (0)
1833
1834static int
1835or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
1836                  BBuf* bbuf2, int not2, BBuf** pbuf)
1837{
1838  int r;
1839  OnigCodePoint i, n1, *data1;
1840  OnigCodePoint from, to;
1841
1842  *pbuf = (BBuf* )NULL;
1843  if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
1844    if (not1 != 0 || not2 != 0)
1845      return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1846    return 0;
1847  }
1848
1849  r = 0;
1850  if (IS_NULL(bbuf2))
1851    SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1852
1853  if (IS_NULL(bbuf1)) {
1854    if (not1 != 0) {
1855      return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1856    }
1857    else {
1858      if (not2 == 0) {
1859	return bbuf_clone(pbuf, bbuf2);
1860      }
1861      else {
1862	return not_code_range_buf(enc, bbuf2, pbuf);
1863      }
1864    }
1865  }
1866
1867  if (not1 != 0)
1868    SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1869
1870  data1 = (OnigCodePoint* )(bbuf1->p);
1871  GET_CODE_POINT(n1, data1);
1872  data1++;
1873
1874  if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
1875    r = bbuf_clone(pbuf, bbuf2);
1876  }
1877  else if (not1 == 0) { /* 1 OR (not 2) */
1878    r = not_code_range_buf(enc, bbuf2, pbuf);
1879  }
1880  if (r != 0) return r;
1881
1882  for (i = 0; i < n1; i++) {
1883    from = data1[i*2];
1884    to   = data1[i*2+1];
1885    r = add_code_range_to_buf(pbuf, from, to);
1886    if (r != 0) return r;
1887  }
1888  return 0;
1889}
1890
1891static int
1892and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
1893	        OnigCodePoint* data, int n)
1894{
1895  int i, r;
1896  OnigCodePoint from2, to2;
1897
1898  for (i = 0; i < n; i++) {
1899    from2 = data[i*2];
1900    to2   = data[i*2+1];
1901    if (from2 < from1) {
1902      if (to2 < from1) continue;
1903      else {
1904	from1 = to2 + 1;
1905      }
1906    }
1907    else if (from2 <= to1) {
1908      if (to2 < to1) {
1909	if (from1 <= from2 - 1) {
1910	  r = add_code_range_to_buf(pbuf, from1, from2-1);
1911	  if (r != 0) return r;
1912	}
1913	from1 = to2 + 1;
1914      }
1915      else {
1916	to1 = from2 - 1;
1917      }
1918    }
1919    else {
1920      from1 = from2;
1921    }
1922    if (from1 > to1) break;
1923  }
1924  if (from1 <= to1) {
1925    r = add_code_range_to_buf(pbuf, from1, to1);
1926    if (r != 0) return r;
1927  }
1928  return 0;
1929}
1930
1931static int
1932and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
1933{
1934  int r;
1935  OnigCodePoint i, j, n1, n2, *data1, *data2;
1936  OnigCodePoint from, to, from1, to1, from2, to2;
1937
1938  *pbuf = (BBuf* )NULL;
1939  if (IS_NULL(bbuf1)) {
1940    if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
1941      return bbuf_clone(pbuf, bbuf2);
1942    return 0;
1943  }
1944  else if (IS_NULL(bbuf2)) {
1945    if (not2 != 0)
1946      return bbuf_clone(pbuf, bbuf1);
1947    return 0;
1948  }
1949
1950  if (not1 != 0)
1951    SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1952
1953  data1 = (OnigCodePoint* )(bbuf1->p);
1954  data2 = (OnigCodePoint* )(bbuf2->p);
1955  GET_CODE_POINT(n1, data1);
1956  GET_CODE_POINT(n2, data2);
1957  data1++;
1958  data2++;
1959
1960  if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
1961    for (i = 0; i < n1; i++) {
1962      from1 = data1[i*2];
1963      to1   = data1[i*2+1];
1964      for (j = 0; j < n2; j++) {
1965	from2 = data2[j*2];
1966	to2   = data2[j*2+1];
1967	if (from2 > to1) break;
1968	if (to2 < from1) continue;
1969	from = MAX(from1, from2);
1970	to   = MIN(to1, to2);
1971	r = add_code_range_to_buf(pbuf, from, to);
1972	if (r != 0) return r;
1973      }
1974    }
1975  }
1976  else if (not1 == 0) { /* 1 AND (not 2) */
1977    for (i = 0; i < n1; i++) {
1978      from1 = data1[i*2];
1979      to1   = data1[i*2+1];
1980      r = and_code_range1(pbuf, from1, to1, data2, n2);
1981      if (r != 0) return r;
1982    }
1983  }
1984
1985  return 0;
1986}
1987
1988static int
1989and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
1990{
1991  int r, not1, not2;
1992  BBuf *buf1, *buf2, *pbuf;
1993  BitSetRef bsr1, bsr2;
1994  BitSet bs1, bs2;
1995
1996  not1 = IS_NCCLASS_NOT(dest);
1997  bsr1 = dest->bs;
1998  buf1 = dest->mbuf;
1999  not2 = IS_NCCLASS_NOT(cc);
2000  bsr2 = cc->bs;
2001  buf2 = cc->mbuf;
2002
2003  if (not1 != 0) {
2004    bitset_invert_to(bsr1, bs1);
2005    bsr1 = bs1;
2006  }
2007  if (not2 != 0) {
2008    bitset_invert_to(bsr2, bs2);
2009    bsr2 = bs2;
2010  }
2011  bitset_and(bsr1, bsr2);
2012  if (bsr1 != dest->bs) {
2013    bitset_copy(dest->bs, bsr1);
2014    bsr1 = dest->bs;
2015  }
2016  if (not1 != 0) {
2017    bitset_invert(dest->bs);
2018  }
2019
2020  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
2021    if (not1 != 0 && not2 != 0) {
2022      r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
2023    }
2024    else {
2025      r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
2026      if (r == 0 && not1 != 0) {
2027	BBuf *tbuf;
2028	r = not_code_range_buf(enc, pbuf, &tbuf);
2029	if (r != 0) {
2030	  bbuf_free(pbuf);
2031	  return r;
2032	}
2033	bbuf_free(pbuf);
2034	pbuf = tbuf;
2035      }
2036    }
2037    if (r != 0) return r;
2038
2039    dest->mbuf = pbuf;
2040    bbuf_free(buf1);
2041    return r;
2042  }
2043  return 0;
2044}
2045
2046static int
2047or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
2048{
2049  int r, not1, not2;
2050  BBuf *buf1, *buf2, *pbuf;
2051  BitSetRef bsr1, bsr2;
2052  BitSet bs1, bs2;
2053
2054  not1 = IS_NCCLASS_NOT(dest);
2055  bsr1 = dest->bs;
2056  buf1 = dest->mbuf;
2057  not2 = IS_NCCLASS_NOT(cc);
2058  bsr2 = cc->bs;
2059  buf2 = cc->mbuf;
2060
2061  if (not1 != 0) {
2062    bitset_invert_to(bsr1, bs1);
2063    bsr1 = bs1;
2064  }
2065  if (not2 != 0) {
2066    bitset_invert_to(bsr2, bs2);
2067    bsr2 = bs2;
2068  }
2069  bitset_or(bsr1, bsr2);
2070  if (bsr1 != dest->bs) {
2071    bitset_copy(dest->bs, bsr1);
2072    bsr1 = dest->bs;
2073  }
2074  if (not1 != 0) {
2075    bitset_invert(dest->bs);
2076  }
2077
2078  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
2079    if (not1 != 0 && not2 != 0) {
2080      r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
2081    }
2082    else {
2083      r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
2084      if (r == 0 && not1 != 0) {
2085	BBuf *tbuf;
2086	r = not_code_range_buf(enc, pbuf, &tbuf);
2087	if (r != 0) {
2088	  bbuf_free(pbuf);
2089	  return r;
2090	}
2091	bbuf_free(pbuf);
2092	pbuf = tbuf;
2093      }
2094    }
2095    if (r != 0) return r;
2096
2097    dest->mbuf = pbuf;
2098    bbuf_free(buf1);
2099    return r;
2100  }
2101  else
2102    return 0;
2103}
2104
2105static int
2106conv_backslash_value(int c, ScanEnv* env)
2107{
2108  if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
2109    switch (c) {
2110    case 'n': return '\n';
2111    case 't': return '\t';
2112    case 'r': return '\r';
2113    case 'f': return '\f';
2114    case 'a': return '\007';
2115    case 'b': return '\010';
2116    case 'e': return '\033';
2117    case 'v':
2118      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
2119	return '\v';
2120      break;
2121
2122    default:
2123      break;
2124    }
2125  }
2126  return c;
2127}
2128
2129static int
2130is_invalid_quantifier_target(Node* node)
2131{
2132  switch (NTYPE(node)) {
2133  case NT_ANCHOR:
2134    return 1;
2135    break;
2136
2137  case NT_ENCLOSE:
2138    /* allow enclosed elements */
2139    /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */
2140    break;
2141
2142  case NT_LIST:
2143    do {
2144      if (! is_invalid_quantifier_target(NCAR(node))) return 0;
2145    } while (IS_NOT_NULL(node = NCDR(node)));
2146    return 0;
2147    break;
2148
2149  case NT_ALT:
2150    do {
2151      if (is_invalid_quantifier_target(NCAR(node))) return 1;
2152    } while (IS_NOT_NULL(node = NCDR(node)));
2153    break;
2154
2155  default:
2156    break;
2157  }
2158  return 0;
2159}
2160
2161/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
2162static int
2163popular_quantifier_num(QtfrNode* q)
2164{
2165  if (q->greedy) {
2166    if (q->lower == 0) {
2167      if (q->upper == 1) return 0;
2168      else if (IS_REPEAT_INFINITE(q->upper)) return 1;
2169    }
2170    else if (q->lower == 1) {
2171      if (IS_REPEAT_INFINITE(q->upper)) return 2;
2172    }
2173  }
2174  else {
2175    if (q->lower == 0) {
2176      if (q->upper == 1) return 3;
2177      else if (IS_REPEAT_INFINITE(q->upper)) return 4;
2178    }
2179    else if (q->lower == 1) {
2180      if (IS_REPEAT_INFINITE(q->upper)) return 5;
2181    }
2182  }
2183  return -1;
2184}
2185
2186
2187enum ReduceType {
2188  RQ_ASIS = 0, /* as is */
2189  RQ_DEL  = 1, /* delete parent */
2190  RQ_A,        /* to '*'    */
2191  RQ_AQ,       /* to '*?'   */
2192  RQ_QQ,       /* to '??'   */
2193  RQ_P_QQ,     /* to '+)??' */
2194  RQ_PQ_Q      /* to '+?)?' */
2195};
2196
2197static enum ReduceType ReduceTypeTable[6][6] = {
2198  {RQ_DEL,  RQ_A,    RQ_A,   RQ_QQ,   RQ_AQ,   RQ_ASIS}, /* '?'  */
2199  {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL},  /* '*'  */
2200  {RQ_A,    RQ_A,    RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL},  /* '+'  */
2201  {RQ_DEL,  RQ_AQ,   RQ_AQ,  RQ_DEL,  RQ_AQ,   RQ_AQ},   /* '??' */
2202  {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_DEL,  RQ_DEL,  RQ_DEL},  /* '*?' */
2203  {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ,   RQ_AQ,   RQ_DEL}   /* '+?' */
2204};
2205
2206extern void
2207onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
2208{
2209  int pnum, cnum;
2210  QtfrNode *p, *c;
2211
2212  p = NQTFR(pnode);
2213  c = NQTFR(cnode);
2214  pnum = popular_quantifier_num(p);
2215  cnum = popular_quantifier_num(c);
2216  if (pnum < 0 || cnum < 0) return ;
2217
2218  switch(ReduceTypeTable[cnum][pnum]) {
2219  case RQ_DEL:
2220    CopyMem (pnode, cnode, sizeof (Node));
2221    break;
2222  case RQ_A:
2223    p->target = c->target;
2224    p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 1;
2225    break;
2226  case RQ_AQ:
2227    p->target = c->target;
2228    p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 0;
2229    break;
2230  case RQ_QQ:
2231    p->target = c->target;
2232    p->lower  = 0;  p->upper = 1;  p->greedy = 0;
2233    break;
2234  case RQ_P_QQ:
2235    p->target = cnode;
2236    p->lower  = 0;  p->upper = 1;  p->greedy = 0;
2237    c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 1;
2238    return ;
2239    break;
2240  case RQ_PQ_Q:
2241    p->target = cnode;
2242    p->lower  = 0;  p->upper = 1;  p->greedy = 1;
2243    c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 0;
2244    return ;
2245    break;
2246  case RQ_ASIS:
2247    p->target = cnode;
2248    return ;
2249    break;
2250  }
2251
2252  c->target = NULL_NODE;
2253  onig_node_free(cnode);
2254}
2255
2256
2257enum TokenSyms {
2258  TK_EOT      = 0,   /* end of token */
2259  TK_RAW_BYTE = 1,
2260  TK_CHAR,
2261  TK_STRING,
2262  TK_CODE_POINT,
2263  TK_ANYCHAR,
2264  TK_CHAR_TYPE,
2265  TK_BACKREF,
2266  TK_CALL,
2267  TK_ANCHOR,
2268  TK_OP_REPEAT,
2269  TK_INTERVAL,
2270  TK_ANYCHAR_ANYTIME,  /* SQL '%' == .* */
2271  TK_ALT,
2272  TK_SUBEXP_OPEN,
2273  TK_SUBEXP_CLOSE,
2274  TK_CC_OPEN,
2275  TK_QUOTE_OPEN,
2276  TK_CHAR_PROPERTY,    /* \p{...}, \P{...} */
2277  /* in cc */
2278  TK_CC_CLOSE,
2279  TK_CC_RANGE,
2280  TK_POSIX_BRACKET_OPEN,
2281  TK_CC_AND,             /* && */
2282  TK_CC_CC_OPEN          /* [ */
2283};
2284
2285typedef struct {
2286  enum TokenSyms type;
2287  int escaped;
2288  int base;   /* is number: 8, 16 (used in [....]) */
2289  UChar* backp;
2290  union {
2291    UChar* s;
2292    int   c;
2293    OnigCodePoint code;
2294    int   anchor;
2295    int   subtype;
2296    struct {
2297      int lower;
2298      int upper;
2299      int greedy;
2300      int possessive;
2301    } repeat;
2302    struct {
2303      int  num;
2304      int  ref1;
2305      int* refs;
2306      int  by_name;
2307#ifdef USE_BACKREF_WITH_LEVEL
2308      int  exist_level;
2309      int  level;   /* \k<name+n> */
2310#endif
2311    } backref;
2312    struct {
2313      UChar* name;
2314      UChar* name_end;
2315      int    gnum;
2316    } call;
2317    struct {
2318      int ctype;
2319      int not;
2320    } prop;
2321  } u;
2322} OnigToken;
2323
2324
2325static int
2326fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
2327{
2328  int low, up, syn_allow, non_low = 0;
2329  int r = 0;
2330  OnigCodePoint c;
2331  OnigEncoding enc = env->enc;
2332  UChar* p = *src;
2333  PFETCH_READY;
2334
2335  syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
2336
2337  if (PEND) {
2338    if (syn_allow)
2339      return 1;  /* "....{" : OK! */
2340    else
2341      return ONIGERR_END_PATTERN_AT_LEFT_BRACE;  /* "....{" syntax error */
2342  }
2343
2344  if (! syn_allow) {
2345    c = PPEEK;
2346    if (c == ')' || c == '(' || c == '|') {
2347      return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
2348    }
2349  }
2350
2351  low = onig_scan_unsigned_number(&p, end, env->enc);
2352  if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2353  if (low > ONIG_MAX_REPEAT_NUM)
2354    return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2355
2356  if (p == *src) { /* can't read low */
2357    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
2358      /* allow {,n} as {0,n} */
2359      low = 0;
2360      non_low = 1;
2361    }
2362    else
2363      goto invalid;
2364  }
2365
2366  if (PEND) goto invalid;
2367  PFETCH(c);
2368  if (c == ',') {
2369    UChar* prev = p;
2370    up = onig_scan_unsigned_number(&p, end, env->enc);
2371    if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2372    if (up > ONIG_MAX_REPEAT_NUM)
2373      return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2374
2375    if (p == prev) {
2376      if (non_low != 0)
2377	goto invalid;
2378      up = REPEAT_INFINITE;  /* {n,} : {n,infinite} */
2379    }
2380  }
2381  else {
2382    if (non_low != 0)
2383      goto invalid;
2384
2385    PUNFETCH;
2386    up = low;  /* {n} : exact n times */
2387    r = 2;     /* fixed */
2388  }
2389
2390  if (PEND) goto invalid;
2391  PFETCH(c);
2392  if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
2393    if (c != MC_ESC(env->syntax)) goto invalid;
2394    PFETCH(c);
2395  }
2396  if (c != '}') goto invalid;
2397
2398  if (!IS_REPEAT_INFINITE(up) && low > up) {
2399    return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
2400  }
2401
2402  tok->type = TK_INTERVAL;
2403  tok->u.repeat.lower = low;
2404  tok->u.repeat.upper = up;
2405  *src = p;
2406  return r; /* 0: normal {n,m}, 2: fixed {n} */
2407
2408 invalid:
2409  if (syn_allow)
2410    return 1;  /* OK */
2411  else
2412    return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
2413}
2414
2415/* \M-, \C-, \c, or \... */
2416static int
2417fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
2418{
2419  int v;
2420  OnigCodePoint c;
2421  OnigEncoding enc = env->enc;
2422  UChar* p = *src;
2423
2424  if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
2425
2426  PFETCH_S(c);
2427  switch (c) {
2428  case 'M':
2429    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
2430      if (PEND) return ONIGERR_END_PATTERN_AT_META;
2431      PFETCH_S(c);
2432      if (c != '-') return ONIGERR_META_CODE_SYNTAX;
2433      if (PEND) return ONIGERR_END_PATTERN_AT_META;
2434      PFETCH_S(c);
2435      if (c == MC_ESC(env->syntax)) {
2436        v = fetch_escaped_value(&p, end, env);
2437        if (v < 0) return v;
2438        c = (OnigCodePoint )v;
2439      }
2440      c = ((c & 0xff) | 0x80);
2441    }
2442    else
2443      goto backslash;
2444    break;
2445
2446  case 'C':
2447    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
2448      if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
2449      PFETCH_S(c);
2450      if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
2451      goto control;
2452    }
2453    else
2454      goto backslash;
2455
2456  case 'c':
2457    if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
2458    control:
2459      if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
2460      PFETCH_S(c);
2461      if (c == '?') {
2462        c = 0177;
2463      }
2464      else {
2465        if (c == MC_ESC(env->syntax)) {
2466          v = fetch_escaped_value(&p, end, env);
2467          if (v < 0) return v;
2468          c = (OnigCodePoint )v;
2469        }
2470        c &= 0x9f;
2471      }
2472      break;
2473    }
2474    /* fall through */
2475
2476  default:
2477    {
2478    backslash:
2479      c = conv_backslash_value(c, env);
2480    }
2481    break;
2482  }
2483
2484  *src = p;
2485  return c;
2486}
2487
2488static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
2489
2490static OnigCodePoint
2491get_name_end_code_point(OnigCodePoint start)
2492{
2493  switch (start) {
2494  case '<':  return (OnigCodePoint )'>'; break;
2495  case '\'': return (OnigCodePoint )'\''; break;
2496  default:
2497    break;
2498  }
2499
2500  return (OnigCodePoint )0;
2501}
2502
2503#ifdef USE_NAMED_GROUP
2504#ifdef USE_BACKREF_WITH_LEVEL
2505/*
2506   \k<name+n>, \k<name-n>
2507   \k<num+n>,  \k<num-n>
2508   \k<-num+n>, \k<-num-n>
2509*/
2510static int
2511fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
2512		      UChar** rname_end, ScanEnv* env,
2513		      int* rback_num, int* rlevel)
2514{
2515  int r, sign, is_num, exist_level;
2516  OnigCodePoint end_code;
2517  OnigCodePoint c = 0;
2518  OnigEncoding enc = env->enc;
2519  UChar *name_end;
2520  UChar *pnum_head;
2521  UChar *p = *src;
2522  PFETCH_READY;
2523
2524  *rback_num = 0;
2525  is_num = exist_level = 0;
2526  sign = 1;
2527  pnum_head = *src;
2528
2529  end_code = get_name_end_code_point(start_code);
2530
2531  name_end = end;
2532  r = 0;
2533  if (PEND) {
2534    return ONIGERR_EMPTY_GROUP_NAME;
2535  }
2536  else {
2537    PFETCH(c);
2538    if (c == end_code)
2539      return ONIGERR_EMPTY_GROUP_NAME;
2540
2541    if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2542      is_num = 1;
2543    }
2544    else if (c == '-') {
2545      is_num = 2;
2546      sign = -1;
2547      pnum_head = p;
2548    }
2549    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
2550      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
2551    }
2552  }
2553
2554  while (!PEND) {
2555    name_end = p;
2556    PFETCH(c);
2557    if (c == end_code || c == ')' || c == '+' || c == '-') {
2558      if (is_num == 2) 	r = ONIGERR_INVALID_GROUP_NAME;
2559      break;
2560    }
2561
2562    if (is_num != 0) {
2563      if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2564        is_num = 1;
2565      }
2566      else {
2567        r = ONIGERR_INVALID_GROUP_NAME;
2568        is_num = 0;
2569      }
2570    }
2571    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
2572      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
2573    }
2574  }
2575
2576  if (r == 0 && c != end_code) {
2577    if (c == '+' || c == '-') {
2578      int level;
2579      int flag = (c == '-' ? -1 : 1);
2580
2581      PFETCH(c);
2582      if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
2583      PUNFETCH;
2584      level = onig_scan_unsigned_number(&p, end, enc);
2585      if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
2586      *rlevel = (level * flag);
2587      exist_level = 1;
2588
2589      PFETCH(c);
2590      if (c == end_code)
2591	goto end;
2592    }
2593
2594  err:
2595    r = ONIGERR_INVALID_GROUP_NAME;
2596    name_end = end;
2597  }
2598
2599 end:
2600  if (r == 0) {
2601    if (is_num != 0) {
2602      *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2603      if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2604      else if (*rback_num == 0) goto err;
2605
2606      *rback_num *= sign;
2607    }
2608
2609    *rname_end = name_end;
2610    *src = p;
2611    return (exist_level ? 1 : 0);
2612  }
2613  else {
2614    onig_scan_env_set_error_string(env, r, *src, name_end);
2615    return r;
2616  }
2617}
2618#endif /* USE_BACKREF_WITH_LEVEL */
2619
2620/*
2621  def: 0 -> define name    (don't allow number name)
2622       1 -> reference name (allow number name)
2623*/
2624static int
2625fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
2626	   UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
2627{
2628  int r, is_num, sign;
2629  OnigCodePoint end_code;
2630  OnigCodePoint c = 0;
2631  OnigEncoding enc = env->enc;
2632  UChar *name_end;
2633  UChar *pnum_head;
2634  UChar *p = *src;
2635
2636  *rback_num = 0;
2637
2638  end_code = get_name_end_code_point(start_code);
2639
2640  name_end = end;
2641  pnum_head = *src;
2642  r = 0;
2643  is_num = 0;
2644  sign = 1;
2645  if (PEND) {
2646    return ONIGERR_EMPTY_GROUP_NAME;
2647  }
2648  else {
2649    PFETCH_S(c);
2650    if (c == end_code)
2651      return ONIGERR_EMPTY_GROUP_NAME;
2652
2653    if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2654      if (ref == 1)
2655        is_num = 1;
2656      else {
2657        r = ONIGERR_INVALID_GROUP_NAME;
2658        is_num = 0;
2659      }
2660    }
2661    else if (c == '-') {
2662      if (ref == 1) {
2663        is_num = 2;
2664        sign = -1;
2665        pnum_head = p;
2666      }
2667      else {
2668        r = ONIGERR_INVALID_GROUP_NAME;
2669        is_num = 0;
2670      }
2671    }
2672    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
2673      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
2674    }
2675  }
2676
2677  if (r == 0) {
2678    while (!PEND) {
2679      name_end = p;
2680      PFETCH_S(c);
2681      if (c == end_code || c == ')') {
2682        if (is_num == 2) 	r = ONIGERR_INVALID_GROUP_NAME;
2683        break;
2684      }
2685
2686      if (is_num != 0) {
2687        if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2688          is_num = 1;
2689        }
2690        else {
2691          if (!ONIGENC_IS_CODE_WORD(enc, c))
2692            r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
2693          else
2694            r = ONIGERR_INVALID_GROUP_NAME;
2695          is_num = 0;
2696        }
2697      }
2698      else {
2699        if (!ONIGENC_IS_CODE_WORD(enc, c)) {
2700          r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
2701        }
2702      }
2703    }
2704
2705    if (c != end_code) {
2706      r = ONIGERR_INVALID_GROUP_NAME;
2707      name_end = end;
2708    }
2709
2710    if (is_num != 0) {
2711      *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2712      if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2713      else if (*rback_num == 0) {
2714        r = ONIGERR_INVALID_GROUP_NAME;
2715        goto err;
2716      }
2717
2718      *rback_num *= sign;
2719    }
2720
2721    *rname_end = name_end;
2722    *src = p;
2723    return 0;
2724  }
2725  else {
2726    while (!PEND) {
2727      name_end = p;
2728      PFETCH_S(c);
2729      if (c == end_code || c == ')')
2730        break;
2731    }
2732    if (PEND)
2733      name_end = end;
2734
2735  err:
2736    onig_scan_env_set_error_string(env, r, *src, name_end);
2737    return r;
2738  }
2739}
2740#else
2741static int
2742fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
2743	   UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
2744{
2745  int r, is_num, sign;
2746  OnigCodePoint end_code;
2747  OnigCodePoint c = 0;
2748  UChar *name_end;
2749  OnigEncoding enc = env->enc;
2750  UChar *pnum_head;
2751  UChar *p = *src;
2752  PFETCH_READY;
2753
2754  *rback_num = 0;
2755
2756  end_code = get_name_end_code_point(start_code);
2757
2758  *rname_end = name_end = end;
2759  r = 0;
2760  pnum_head = *src;
2761  is_num = 0;
2762  sign = 1;
2763
2764  if (PEND) {
2765    return ONIGERR_EMPTY_GROUP_NAME;
2766  }
2767  else {
2768    PFETCH(c);
2769    if (c == end_code)
2770      return ONIGERR_EMPTY_GROUP_NAME;
2771
2772    if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2773      is_num = 1;
2774    }
2775    else if (c == '-') {
2776      is_num = 2;
2777      sign = -1;
2778      pnum_head = p;
2779    }
2780    else {
2781      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
2782    }
2783  }
2784
2785  while (!PEND) {
2786    name_end = p;
2787
2788    PFETCH(c);
2789    if (c == end_code || c == ')') break;
2790    if (! ONIGENC_IS_CODE_DIGIT(enc, c))
2791      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
2792  }
2793  if (r == 0 && c != end_code) {
2794    r = ONIGERR_INVALID_GROUP_NAME;
2795    name_end = end;
2796  }
2797
2798  if (r == 0) {
2799    *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2800    if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2801    else if (*rback_num == 0) {
2802      r = ONIGERR_INVALID_GROUP_NAME;
2803      goto err;
2804    }
2805    *rback_num *= sign;
2806
2807    *rname_end = name_end;
2808    *src = p;
2809    return 0;
2810  }
2811  else {
2812  err:
2813    onig_scan_env_set_error_string(env, r, *src, name_end);
2814    return r;
2815  }
2816}
2817#endif /* USE_NAMED_GROUP */
2818
2819static void
2820CC_ESC_WARN(ScanEnv* env, UChar *c)
2821{
2822  if (onig_warn == onig_null_warn) return ;
2823
2824  if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
2825      IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
2826    UChar buf[WARN_BUFSIZE];
2827    onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
2828		env->pattern, env->pattern_end,
2829                (UChar* )"character class has '%s' without escape", c);
2830    (*onig_warn)((char* )buf);
2831  }
2832}
2833
2834static void
2835CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
2836{
2837  if (onig_warn == onig_null_warn) return ;
2838
2839  if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
2840    UChar buf[WARN_BUFSIZE];
2841    onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
2842		(env)->pattern, (env)->pattern_end,
2843		(UChar* )"regular expression has '%s' without escape", c);
2844    (*onig_warn)((char* )buf);
2845  }
2846}
2847
2848static UChar*
2849find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
2850		  UChar **next, OnigEncoding enc)
2851{
2852  int i;
2853  OnigCodePoint x;
2854  UChar *q;
2855  UChar *p = from;
2856
2857  while (p < to) {
2858    x = ONIGENC_MBC_TO_CODE(enc, p, to);
2859    q = p + enclen(enc, p);
2860    if (x == s[0]) {
2861      for (i = 1; i < n && q < to; i++) {
2862	x = ONIGENC_MBC_TO_CODE(enc, q, to);
2863	if (x != s[i]) break;
2864	q += enclen(enc, q);
2865      }
2866      if (i >= n) {
2867	if (IS_NOT_NULL(next))
2868	  *next = q;
2869	return p;
2870      }
2871    }
2872    p = q;
2873  }
2874  return NULL_UCHARP;
2875}
2876
2877static int
2878str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
2879		 OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)
2880{
2881  int i, in_esc;
2882  OnigCodePoint x;
2883  UChar *q;
2884  UChar *p = from;
2885
2886  in_esc = 0;
2887  while (p < to) {
2888    if (in_esc) {
2889      in_esc = 0;
2890      p += enclen(enc, p);
2891    }
2892    else {
2893      x = ONIGENC_MBC_TO_CODE(enc, p, to);
2894      q = p + enclen(enc, p);
2895      if (x == s[0]) {
2896	for (i = 1; i < n && q < to; i++) {
2897	  x = ONIGENC_MBC_TO_CODE(enc, q, to);
2898	  if (x != s[i]) break;
2899	  q += enclen(enc, q);
2900	}
2901	if (i >= n) return 1;
2902	p += enclen(enc, p);
2903      }
2904      else {
2905	x = ONIGENC_MBC_TO_CODE(enc, p, to);
2906	if (x == bad) return 0;
2907	else if (x == MC_ESC(syn)) in_esc = 1;
2908	p = q;
2909      }
2910    }
2911  }
2912  return 0;
2913}
2914
2915static int
2916fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
2917{
2918  int num;
2919  OnigCodePoint c, c2;
2920  OnigSyntaxType* syn = env->syntax;
2921  OnigEncoding enc = env->enc;
2922  UChar* prev;
2923  UChar* p = *src;
2924  PFETCH_READY;
2925
2926  if (PEND) {
2927    tok->type = TK_EOT;
2928    return tok->type;
2929  }
2930
2931  PFETCH(c);
2932  tok->type = TK_CHAR;
2933  tok->base = 0;
2934  tok->u.c  = c;
2935  tok->escaped = 0;
2936
2937  if (c == ']') {
2938    tok->type = TK_CC_CLOSE;
2939  }
2940  else if (c == '-') {
2941    tok->type = TK_CC_RANGE;
2942  }
2943  else if (c == MC_ESC(syn)) {
2944    if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
2945      goto end;
2946
2947    if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
2948
2949    PFETCH(c);
2950    tok->escaped = 1;
2951    tok->u.c = c;
2952    switch (c) {
2953    case 'w':
2954      tok->type = TK_CHAR_TYPE;
2955      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
2956      tok->u.prop.not   = 0;
2957      break;
2958    case 'W':
2959      tok->type = TK_CHAR_TYPE;
2960      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
2961      tok->u.prop.not   = 1;
2962      break;
2963    case 'd':
2964      tok->type = TK_CHAR_TYPE;
2965      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
2966      tok->u.prop.not   = 0;
2967      break;
2968    case 'D':
2969      tok->type = TK_CHAR_TYPE;
2970      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
2971      tok->u.prop.not   = 1;
2972      break;
2973    case 's':
2974      tok->type = TK_CHAR_TYPE;
2975      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
2976      tok->u.prop.not   = 0;
2977      break;
2978    case 'S':
2979      tok->type = TK_CHAR_TYPE;
2980      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
2981      tok->u.prop.not   = 1;
2982      break;
2983    case 'h':
2984      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
2985      tok->type = TK_CHAR_TYPE;
2986      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
2987      tok->u.prop.not   = 0;
2988      break;
2989    case 'H':
2990      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
2991      tok->type = TK_CHAR_TYPE;
2992      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
2993      tok->u.prop.not   = 1;
2994      break;
2995
2996    case 'p':
2997    case 'P':
2998      c2 = PPEEK;
2999      if (c2 == '{' &&
3000	  IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
3001	PINC;
3002	tok->type = TK_CHAR_PROPERTY;
3003	tok->u.prop.not = (c == 'P' ? 1 : 0);
3004
3005	if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
3006	  PFETCH(c2);
3007	  if (c2 == '^') {
3008	    tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
3009	  }
3010	  else
3011	    PUNFETCH;
3012	}
3013      }
3014      break;
3015
3016    case 'x':
3017      if (PEND) break;
3018
3019      prev = p;
3020      if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
3021	PINC;
3022	num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
3023	if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3024	if (!PEND) {
3025          c2 = PPEEK;
3026          if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
3027            return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3028        }
3029
3030	if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
3031	  PINC;
3032	  tok->type   = TK_CODE_POINT;
3033	  tok->base   = 16;
3034	  tok->u.code = (OnigCodePoint )num;
3035	}
3036	else {
3037	  /* can't read nothing or invalid format */
3038	  p = prev;
3039	}
3040      }
3041      else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
3042	num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
3043	if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3044	if (p == prev) {  /* can't read nothing. */
3045	  num = 0; /* but, it's not error */
3046	}
3047	tok->type = TK_RAW_BYTE;
3048	tok->base = 16;
3049	tok->u.c  = num;
3050      }
3051      break;
3052
3053    case 'u':
3054      if (PEND) break;
3055
3056      prev = p;
3057      if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
3058	num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
3059	if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3060	if (p == prev) {  /* can't read nothing. */
3061	  num = 0; /* but, it's not error */
3062	}
3063	tok->type   = TK_CODE_POINT;
3064	tok->base   = 16;
3065	tok->u.code = (OnigCodePoint )num;
3066      }
3067      break;
3068
3069    case '0':
3070    case '1': case '2': case '3': case '4': case '5': case '6': case '7':
3071      if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
3072	PUNFETCH;
3073	prev = p;
3074	num = scan_unsigned_octal_number(&p, end, 3, enc);
3075	if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3076	if (p == prev) {  /* can't read nothing. */
3077	  num = 0; /* but, it's not error */
3078	}
3079	tok->type = TK_RAW_BYTE;
3080	tok->base = 8;
3081	tok->u.c  = num;
3082      }
3083      break;
3084
3085    default:
3086      PUNFETCH;
3087      num = fetch_escaped_value(&p, end, env);
3088      if (num < 0) return num;
3089      if (tok->u.c != num) {
3090	tok->u.code = (OnigCodePoint )num;
3091	tok->type   = TK_CODE_POINT;
3092      }
3093      break;
3094    }
3095  }
3096  else if (c == '[') {
3097    if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
3098      OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
3099      tok->backp = p; /* point at '[' is readed */
3100      PINC;
3101      if (str_exist_check_with_esc(send, 2, p, end,
3102                                   (OnigCodePoint )']', enc, syn)) {
3103	tok->type = TK_POSIX_BRACKET_OPEN;
3104      }
3105      else {
3106	PUNFETCH;
3107	goto cc_in_cc;
3108      }
3109    }
3110    else {
3111    cc_in_cc:
3112      if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
3113	tok->type = TK_CC_CC_OPEN;
3114      }
3115      else {
3116	CC_ESC_WARN(env, (UChar* )"[");
3117      }
3118    }
3119  }
3120  else if (c == '&') {
3121    if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
3122	!PEND && (PPEEK_IS('&'))) {
3123      PINC;
3124      tok->type = TK_CC_AND;
3125    }
3126  }
3127
3128 end:
3129  *src = p;
3130  return tok->type;
3131}
3132
3133static int
3134fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
3135{
3136  int r, num;
3137  OnigCodePoint c;
3138  OnigEncoding enc = env->enc;
3139  OnigSyntaxType* syn = env->syntax;
3140  UChar* prev;
3141  UChar* p = *src;
3142  PFETCH_READY;
3143
3144 start:
3145  if (PEND) {
3146    tok->type = TK_EOT;
3147    return tok->type;
3148  }
3149
3150  tok->type  = TK_STRING;
3151  tok->base  = 0;
3152  tok->backp = p;
3153
3154  PFETCH(c);
3155  if (IS_MC_ESC_CODE(c, syn)) {
3156    if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
3157
3158    tok->backp = p;
3159    PFETCH(c);
3160
3161    tok->u.c = c;
3162    tok->escaped = 1;
3163    switch (c) {
3164    case '*':
3165      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
3166      tok->type = TK_OP_REPEAT;
3167      tok->u.repeat.lower = 0;
3168      tok->u.repeat.upper = REPEAT_INFINITE;
3169      goto greedy_check;
3170      break;
3171
3172    case '+':
3173      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
3174      tok->type = TK_OP_REPEAT;
3175      tok->u.repeat.lower = 1;
3176      tok->u.repeat.upper = REPEAT_INFINITE;
3177      goto greedy_check;
3178      break;
3179
3180    case '?':
3181      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
3182      tok->type = TK_OP_REPEAT;
3183      tok->u.repeat.lower = 0;
3184      tok->u.repeat.upper = 1;
3185    greedy_check:
3186      if (!PEND && PPEEK_IS('?') &&
3187	  IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
3188	PFETCH(c);
3189	tok->u.repeat.greedy     = 0;
3190	tok->u.repeat.possessive = 0;
3191      }
3192      else {
3193      possessive_check:
3194	if (!PEND && PPEEK_IS('+') &&
3195	    ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
3196	      tok->type != TK_INTERVAL)  ||
3197	     (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
3198	      tok->type == TK_INTERVAL))) {
3199	  PFETCH(c);
3200	  tok->u.repeat.greedy     = 1;
3201	  tok->u.repeat.possessive = 1;
3202	}
3203	else {
3204	  tok->u.repeat.greedy     = 1;
3205	  tok->u.repeat.possessive = 0;
3206	}
3207      }
3208      break;
3209
3210    case '{':
3211      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
3212      r = fetch_range_quantifier(&p, end, tok, env);
3213      if (r < 0) return r;  /* error */
3214      if (r == 0) goto greedy_check;
3215      else if (r == 2) { /* {n} */
3216	if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
3217	  goto possessive_check;
3218
3219	goto greedy_check;
3220      }
3221      /* r == 1 : normal char */
3222      break;
3223
3224    case '|':
3225      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
3226      tok->type = TK_ALT;
3227      break;
3228
3229    case '(':
3230      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
3231      tok->type = TK_SUBEXP_OPEN;
3232      break;
3233
3234    case ')':
3235      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
3236      tok->type = TK_SUBEXP_CLOSE;
3237      break;
3238
3239    case 'w':
3240      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
3241      tok->type = TK_CHAR_TYPE;
3242      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3243      tok->u.prop.not   = 0;
3244      break;
3245
3246    case 'W':
3247      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
3248      tok->type = TK_CHAR_TYPE;
3249      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3250      tok->u.prop.not   = 1;
3251      break;
3252
3253    case 'b':
3254      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
3255      tok->type = TK_ANCHOR;
3256      tok->u.anchor = ANCHOR_WORD_BOUND;
3257      break;
3258
3259    case 'B':
3260      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
3261      tok->type = TK_ANCHOR;
3262      tok->u.anchor = ANCHOR_NOT_WORD_BOUND;
3263      break;
3264
3265#ifdef USE_WORD_BEGIN_END
3266    case '<':
3267      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
3268      tok->type = TK_ANCHOR;
3269      tok->u.anchor = ANCHOR_WORD_BEGIN;
3270      break;
3271
3272    case '>':
3273      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
3274      tok->type = TK_ANCHOR;
3275      tok->u.anchor = ANCHOR_WORD_END;
3276      break;
3277#endif
3278
3279    case 's':
3280      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
3281      tok->type = TK_CHAR_TYPE;
3282      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3283      tok->u.prop.not   = 0;
3284      break;
3285
3286    case 'S':
3287      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
3288      tok->type = TK_CHAR_TYPE;
3289      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3290      tok->u.prop.not   = 1;
3291      break;
3292
3293    case 'd':
3294      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
3295      tok->type = TK_CHAR_TYPE;
3296      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3297      tok->u.prop.not   = 0;
3298      break;
3299
3300    case 'D':
3301      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
3302      tok->type = TK_CHAR_TYPE;
3303      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3304      tok->u.prop.not   = 1;
3305      break;
3306
3307    case 'h':
3308      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3309      tok->type = TK_CHAR_TYPE;
3310      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3311      tok->u.prop.not   = 0;
3312      break;
3313
3314    case 'H':
3315      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3316      tok->type = TK_CHAR_TYPE;
3317      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3318      tok->u.prop.not   = 1;
3319      break;
3320
3321    case 'A':
3322      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
3323    begin_buf:
3324      tok->type = TK_ANCHOR;
3325      tok->u.subtype = ANCHOR_BEGIN_BUF;
3326      break;
3327
3328    case 'Z':
3329      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
3330      tok->type = TK_ANCHOR;
3331      tok->u.subtype = ANCHOR_SEMI_END_BUF;
3332      break;
3333
3334    case 'z':
3335      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
3336    end_buf:
3337      tok->type = TK_ANCHOR;
3338      tok->u.subtype = ANCHOR_END_BUF;
3339      break;
3340
3341    case 'G':
3342      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
3343      tok->type = TK_ANCHOR;
3344      tok->u.subtype = ANCHOR_BEGIN_POSITION;
3345      break;
3346
3347    case '`':
3348      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
3349      goto begin_buf;
3350      break;
3351
3352    case '\'':
3353      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
3354      goto end_buf;
3355      break;
3356
3357    case 'x':
3358      if (PEND) break;
3359
3360      prev = p;
3361      if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
3362	PINC;
3363	num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
3364	if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3365	if (!PEND) {
3366          if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
3367            return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3368        }
3369
3370	if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
3371	  PINC;
3372	  tok->type   = TK_CODE_POINT;
3373	  tok->u.code = (OnigCodePoint )num;
3374	}
3375	else {
3376	  /* can't read nothing or invalid format */
3377	  p = prev;
3378	}
3379      }
3380      else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
3381	num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
3382	if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3383	if (p == prev) {  /* can't read nothing. */
3384	  num = 0; /* but, it's not error */
3385	}
3386	tok->type = TK_RAW_BYTE;
3387	tok->base = 16;
3388	tok->u.c  = num;
3389      }
3390      break;
3391
3392    case 'u':
3393      if (PEND) break;
3394
3395      prev = p;
3396      if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
3397	num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
3398	if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3399	if (p == prev) {  /* can't read nothing. */
3400	  num = 0; /* but, it's not error */
3401	}
3402	tok->type   = TK_CODE_POINT;
3403	tok->base   = 16;
3404	tok->u.code = (OnigCodePoint )num;
3405      }
3406      break;
3407
3408    case '1': case '2': case '3': case '4':
3409    case '5': case '6': case '7': case '8': case '9':
3410      PUNFETCH;
3411      prev = p;
3412      num = onig_scan_unsigned_number(&p, end, enc);
3413      if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
3414        goto skip_backref;
3415      }
3416
3417      if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
3418	  (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
3419	if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
3420	  if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
3421	    return ONIGERR_INVALID_BACKREF;
3422	}
3423
3424	tok->type = TK_BACKREF;
3425	tok->u.backref.num     = 1;
3426	tok->u.backref.ref1    = num;
3427	tok->u.backref.by_name = 0;
3428#ifdef USE_BACKREF_WITH_LEVEL
3429	tok->u.backref.exist_level = 0;
3430#endif
3431	break;
3432      }
3433
3434    skip_backref:
3435      if (c == '8' || c == '9') {
3436	/* normal char */
3437	p = prev; PINC;
3438	break;
3439      }
3440
3441      p = prev;
3442      /* fall through */
3443    case '0':
3444      if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
3445	prev = p;
3446	num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
3447	if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3448	if (p == prev) {  /* can't read nothing. */
3449	  num = 0; /* but, it's not error */
3450	}
3451	tok->type = TK_RAW_BYTE;
3452	tok->base = 8;
3453	tok->u.c  = num;
3454      }
3455      else if (c != '0') {
3456	PINC;
3457      }
3458      break;
3459
3460#ifdef USE_NAMED_GROUP
3461    case 'k':
3462      if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
3463	PFETCH(c);
3464	if (c == '<' || c == '\'') {
3465	  UChar* name_end;
3466	  int* backs;
3467	  int back_num;
3468
3469	  prev = p;
3470
3471#ifdef USE_BACKREF_WITH_LEVEL
3472	  name_end = NULL_UCHARP; /* no need. escape gcc warning. */
3473	  r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
3474				    env, &back_num, &tok->u.backref.level);
3475	  if (r == 1) tok->u.backref.exist_level = 1;
3476	  else        tok->u.backref.exist_level = 0;
3477#else
3478	  r = fetch_name(&p, end, &name_end, env, &back_num, 1);
3479#endif
3480	  if (r < 0) return r;
3481
3482	  if (back_num != 0) {
3483	    if (back_num < 0) {
3484	      back_num = BACKREF_REL_TO_ABS(back_num, env);
3485	      if (back_num <= 0)
3486		return ONIGERR_INVALID_BACKREF;
3487	    }
3488
3489	    if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
3490	      if (back_num > env->num_mem ||
3491		  IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
3492		return ONIGERR_INVALID_BACKREF;
3493	    }
3494	    tok->type = TK_BACKREF;
3495	    tok->u.backref.by_name = 0;
3496	    tok->u.backref.num  = 1;
3497	    tok->u.backref.ref1 = back_num;
3498	  }
3499	  else {
3500	    num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
3501	    if (num <= 0) {
3502	      onig_scan_env_set_error_string(env,
3503			     ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
3504	      return ONIGERR_UNDEFINED_NAME_REFERENCE;
3505	    }
3506	    if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
3507	      int i;
3508	      for (i = 0; i < num; i++) {
3509		if (backs[i] > env->num_mem ||
3510		    IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
3511		  return ONIGERR_INVALID_BACKREF;
3512	      }
3513	    }
3514
3515	    tok->type = TK_BACKREF;
3516	    tok->u.backref.by_name = 1;
3517	    if (num == 1) {
3518	      tok->u.backref.num  = 1;
3519	      tok->u.backref.ref1 = backs[0];
3520	    }
3521	    else {
3522	      tok->u.backref.num  = num;
3523	      tok->u.backref.refs = backs;
3524	    }
3525	  }
3526	}
3527	else
3528	  PUNFETCH;
3529      }
3530      break;
3531#endif
3532
3533#ifdef USE_SUBEXP_CALL
3534    case 'g':
3535      if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
3536	PFETCH(c);
3537	if (c == '<' || c == '\'') {
3538	  int gnum;
3539	  UChar* name_end;
3540
3541	  prev = p;
3542	  r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
3543	  if (r < 0) return r;
3544
3545	  tok->type = TK_CALL;
3546	  tok->u.call.name     = prev;
3547	  tok->u.call.name_end = name_end;
3548	  tok->u.call.gnum     = gnum;
3549	}
3550	else
3551	  PUNFETCH;
3552      }
3553      break;
3554#endif
3555
3556    case 'Q':
3557      if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
3558	tok->type = TK_QUOTE_OPEN;
3559      }
3560      break;
3561
3562    case 'p':
3563    case 'P':
3564      if (PPEEK_IS('{') &&
3565	  IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
3566	PINC;
3567	tok->type = TK_CHAR_PROPERTY;
3568	tok->u.prop.not = (c == 'P' ? 1 : 0);
3569
3570	if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
3571	  PFETCH(c);
3572	  if (c == '^') {
3573	    tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
3574	  }
3575	  else
3576	    PUNFETCH;
3577	}
3578      }
3579      break;
3580
3581    default:
3582      PUNFETCH;
3583      num = fetch_escaped_value(&p, end, env);
3584      if (num < 0) return num;
3585      /* set_raw: */
3586      if (tok->u.c != num) {
3587	tok->type = TK_CODE_POINT;
3588	tok->u.code = (OnigCodePoint )num;
3589      }
3590      else { /* string */
3591	p = tok->backp + enclen(enc, tok->backp);
3592      }
3593      break;
3594    }
3595  }
3596  else {
3597    tok->u.c = c;
3598    tok->escaped = 0;
3599
3600#ifdef USE_VARIABLE_META_CHARS
3601    if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
3602	IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
3603      if (c == MC_ANYCHAR(syn))
3604	goto any_char;
3605      else if (c == MC_ANYTIME(syn))
3606	goto anytime;
3607      else if (c == MC_ZERO_OR_ONE_TIME(syn))
3608	goto zero_or_one_time;
3609      else if (c == MC_ONE_OR_MORE_TIME(syn))
3610	goto one_or_more_time;
3611      else if (c == MC_ANYCHAR_ANYTIME(syn)) {
3612	tok->type = TK_ANYCHAR_ANYTIME;
3613	goto out;
3614      }
3615    }
3616#endif
3617
3618    switch (c) {
3619    case '.':
3620      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
3621#ifdef USE_VARIABLE_META_CHARS
3622    any_char:
3623#endif
3624      tok->type = TK_ANYCHAR;
3625      break;
3626
3627    case '*':
3628      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
3629#ifdef USE_VARIABLE_META_CHARS
3630    anytime:
3631#endif
3632      tok->type = TK_OP_REPEAT;
3633      tok->u.repeat.lower = 0;
3634      tok->u.repeat.upper = REPEAT_INFINITE;
3635      goto greedy_check;
3636      break;
3637
3638    case '+':
3639      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
3640#ifdef USE_VARIABLE_META_CHARS
3641    one_or_more_time:
3642#endif
3643      tok->type = TK_OP_REPEAT;
3644      tok->u.repeat.lower = 1;
3645      tok->u.repeat.upper = REPEAT_INFINITE;
3646      goto greedy_check;
3647      break;
3648
3649    case '?':
3650      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
3651#ifdef USE_VARIABLE_META_CHARS
3652    zero_or_one_time:
3653#endif
3654      tok->type = TK_OP_REPEAT;
3655      tok->u.repeat.lower = 0;
3656      tok->u.repeat.upper = 1;
3657      goto greedy_check;
3658      break;
3659
3660    case '{':
3661      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
3662      r = fetch_range_quantifier(&p, end, tok, env);
3663      if (r < 0) return r;  /* error */
3664      if (r == 0) goto greedy_check;
3665      else if (r == 2) { /* {n} */
3666	if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
3667	  goto possessive_check;
3668
3669	goto greedy_check;
3670      }
3671      /* r == 1 : normal char */
3672      break;
3673
3674    case '|':
3675      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
3676      tok->type = TK_ALT;
3677      break;
3678
3679    case '(':
3680      if (PPEEK_IS('?') &&
3681          IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
3682        PINC;
3683        if (PPEEK_IS('#')) {
3684          PFETCH(c);
3685          while (1) {
3686            if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
3687            PFETCH(c);
3688            if (c == MC_ESC(syn)) {
3689              if (!PEND) PFETCH(c);
3690            }
3691            else {
3692              if (c == ')') break;
3693            }
3694          }
3695          goto start;
3696        }
3697        PUNFETCH;
3698      }
3699
3700      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
3701      tok->type = TK_SUBEXP_OPEN;
3702      break;
3703
3704    case ')':
3705      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
3706      tok->type = TK_SUBEXP_CLOSE;
3707      break;
3708
3709    case '^':
3710      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
3711      tok->type = TK_ANCHOR;
3712      tok->u.subtype = (IS_SINGLELINE(env->option)
3713			? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
3714      break;
3715
3716    case '$':
3717      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
3718      tok->type = TK_ANCHOR;
3719      tok->u.subtype = (IS_SINGLELINE(env->option)
3720			? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
3721      break;
3722
3723    case '[':
3724      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
3725      tok->type = TK_CC_OPEN;
3726      break;
3727
3728    case ']':
3729      if (*src > env->pattern)   /* /].../ is allowed. */
3730	CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
3731      break;
3732
3733    case '#':
3734      if (IS_EXTEND(env->option)) {
3735	while (!PEND) {
3736	  PFETCH(c);
3737	  if (ONIGENC_IS_CODE_NEWLINE(enc, c))
3738	    break;
3739	}
3740	goto start;
3741	break;
3742      }
3743      break;
3744
3745    case ' ': case '\t': case '\n': case '\r': case '\f':
3746      if (IS_EXTEND(env->option))
3747	goto start;
3748      break;
3749
3750    default:
3751      /* string */
3752      break;
3753    }
3754  }
3755
3756#ifdef USE_VARIABLE_META_CHARS
3757 out:
3758#endif
3759  *src = p;
3760  return tok->type;
3761}
3762
3763static int
3764add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
3765			 OnigEncoding enc ARG_UNUSED,
3766                         OnigCodePoint sb_out, const OnigCodePoint mbr[])
3767{
3768  int i, r;
3769  OnigCodePoint j;
3770
3771  int n = ONIGENC_CODE_RANGE_NUM(mbr);
3772
3773  if (not == 0) {
3774    for (i = 0; i < n; i++) {
3775      for (j  = ONIGENC_CODE_RANGE_FROM(mbr, i);
3776           j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
3777	if (j >= sb_out) {
3778	  if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++;
3779	  else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
3780	    r = add_code_range_to_buf(&(cc->mbuf), j,
3781				      ONIGENC_CODE_RANGE_TO(mbr, i));
3782	    if (r != 0) return r;
3783	    i++;
3784	  }
3785
3786	  goto sb_end;
3787	}
3788        BITSET_SET_BIT(cc->bs, j);
3789      }
3790    }
3791
3792  sb_end:
3793    for ( ; i < n; i++) {
3794      r = add_code_range_to_buf(&(cc->mbuf),
3795                                ONIGENC_CODE_RANGE_FROM(mbr, i),
3796                                ONIGENC_CODE_RANGE_TO(mbr, i));
3797      if (r != 0) return r;
3798    }
3799  }
3800  else {
3801    OnigCodePoint prev = 0;
3802
3803    for (i = 0; i < n; i++) {
3804      for (j = prev;
3805	   j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
3806	if (j >= sb_out) {
3807	  goto sb_end2;
3808	}
3809	BITSET_SET_BIT(cc->bs, j);
3810      }
3811      prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
3812    }
3813    for (j = prev; j < sb_out; j++) {
3814      BITSET_SET_BIT(cc->bs, j);
3815    }
3816
3817  sb_end2:
3818    prev = sb_out;
3819
3820    for (i = 0; i < n; i++) {
3821      if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
3822	r = add_code_range_to_buf(&(cc->mbuf), prev,
3823                                  ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
3824	if (r != 0) return r;
3825      }
3826      prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
3827    }
3828    if (prev < 0x7fffffff) {
3829      r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);
3830      if (r != 0) return r;
3831    }
3832  }
3833
3834  return 0;
3835}
3836
3837static int
3838add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
3839{
3840  int c, r;
3841  const OnigCodePoint *ranges;
3842  OnigCodePoint sb_out;
3843  OnigEncoding enc = env->enc;
3844
3845  r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
3846  if (r == 0) {
3847    return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
3848  }
3849  else if (r != ONIG_NO_SUPPORT_CONFIG) {
3850    return r;
3851  }
3852
3853  r = 0;
3854  switch (ctype) {
3855  case ONIGENC_CTYPE_ALPHA:
3856  case ONIGENC_CTYPE_BLANK:
3857  case ONIGENC_CTYPE_CNTRL:
3858  case ONIGENC_CTYPE_DIGIT:
3859  case ONIGENC_CTYPE_LOWER:
3860  case ONIGENC_CTYPE_PUNCT:
3861  case ONIGENC_CTYPE_SPACE:
3862  case ONIGENC_CTYPE_UPPER:
3863  case ONIGENC_CTYPE_XDIGIT:
3864  case ONIGENC_CTYPE_ASCII:
3865  case ONIGENC_CTYPE_ALNUM:
3866    if (not != 0) {
3867      for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
3868	if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
3869	  BITSET_SET_BIT(cc->bs, c);
3870      }
3871      ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
3872    }
3873    else {
3874      for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
3875	if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
3876	  BITSET_SET_BIT(cc->bs, c);
3877      }
3878    }
3879    break;
3880
3881  case ONIGENC_CTYPE_GRAPH:
3882  case ONIGENC_CTYPE_PRINT:
3883    if (not != 0) {
3884      for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
3885	if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
3886	  BITSET_SET_BIT(cc->bs, c);
3887      }
3888    }
3889    else {
3890      for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
3891	if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
3892	  BITSET_SET_BIT(cc->bs, c);
3893      }
3894      ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
3895    }
3896    break;
3897
3898  case ONIGENC_CTYPE_WORD:
3899    if (not == 0) {
3900      for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
3901	if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
3902      }
3903      ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
3904    }
3905    else {
3906      for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
3907        if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
3908	    && ! ONIGENC_IS_CODE_WORD(enc, c))
3909	  BITSET_SET_BIT(cc->bs, c);
3910      }
3911    }
3912    break;
3913
3914  default:
3915    return ONIGERR_PARSER_BUG;
3916    break;
3917  }
3918
3919  return r;
3920}
3921
3922static int
3923parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
3924{
3925#define POSIX_BRACKET_CHECK_LIMIT_LENGTH  20
3926#define POSIX_BRACKET_NAME_MIN_LEN         4
3927
3928  static PosixBracketEntryType PBS[] = {
3929    { (UChar* )"alnum",  ONIGENC_CTYPE_ALNUM,  5 },
3930    { (UChar* )"alpha",  ONIGENC_CTYPE_ALPHA,  5 },
3931    { (UChar* )"blank",  ONIGENC_CTYPE_BLANK,  5 },
3932    { (UChar* )"cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
3933    { (UChar* )"digit",  ONIGENC_CTYPE_DIGIT,  5 },
3934    { (UChar* )"graph",  ONIGENC_CTYPE_GRAPH,  5 },
3935    { (UChar* )"lower",  ONIGENC_CTYPE_LOWER,  5 },
3936    { (UChar* )"print",  ONIGENC_CTYPE_PRINT,  5 },
3937    { (UChar* )"punct",  ONIGENC_CTYPE_PUNCT,  5 },
3938    { (UChar* )"space",  ONIGENC_CTYPE_SPACE,  5 },
3939    { (UChar* )"upper",  ONIGENC_CTYPE_UPPER,  5 },
3940    { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
3941    { (UChar* )"ascii",  ONIGENC_CTYPE_ASCII,  5 },
3942    { (UChar* )"word",   ONIGENC_CTYPE_WORD,   4 },
3943    { (UChar* )NULL,     -1, 0 }
3944  };
3945
3946  PosixBracketEntryType *pb;
3947  int not, i, r;
3948  OnigCodePoint c;
3949  OnigEncoding enc = env->enc;
3950  UChar *p = *src;
3951
3952  if (PPEEK_IS('^')) {
3953    PINC_S;
3954    not = 1;
3955  }
3956  else
3957    not = 0;
3958
3959  if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
3960    goto not_posix_bracket;
3961
3962  for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
3963    if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
3964      p = (UChar* )onigenc_step(enc, p, end, pb->len);
3965      if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
3966        return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
3967
3968      r = add_ctype_to_cc(cc, pb->ctype, not, env);
3969      if (r != 0) return r;
3970
3971      PINC_S; PINC_S;
3972      *src = p;
3973      return 0;
3974    }
3975  }
3976
3977 not_posix_bracket:
3978  c = 0;
3979  i = 0;
3980  while (!PEND && ((c = PPEEK) != ':') && c != ']') {
3981    PINC_S;
3982    if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
3983  }
3984  if (c == ':' && ! PEND) {
3985    PINC_S;
3986    if (! PEND) {
3987      PFETCH_S(c);
3988      if (c == ']')
3989        return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
3990    }
3991  }
3992
3993  return 1;  /* 1: is not POSIX bracket, but no error. */
3994}
3995
3996static int
3997fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
3998{
3999  int r;
4000  OnigCodePoint c;
4001  OnigEncoding enc = env->enc;
4002  UChar *prev, *start, *p = *src;
4003
4004  r = 0;
4005  start = prev = p;
4006
4007  while (!PEND) {
4008    prev = p;
4009    PFETCH_S(c);
4010    if (c == '}') {
4011      r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
4012      if (r < 0) break;
4013
4014      *src = p;
4015      return r;
4016    }
4017    else if (c == '(' || c == ')' || c == '{' || c == '|') {
4018      r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
4019      break;
4020    }
4021  }
4022
4023  onig_scan_env_set_error_string(env, r, *src, prev);
4024  return r;
4025}
4026
4027static int
4028parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
4029		    ScanEnv* env)
4030{
4031  int r, ctype;
4032  CClassNode* cc;
4033
4034  ctype = fetch_char_property_to_ctype(src, end, env);
4035  if (ctype < 0) return ctype;
4036
4037  *np = node_new_cclass();
4038  CHECK_NULL_RETURN_MEMERR(*np);
4039  cc = NCCLASS(*np);
4040  r = add_ctype_to_cc(cc, ctype, 0, env);
4041  if (r != 0) return r;
4042  if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
4043
4044  return 0;
4045}
4046
4047
4048enum CCSTATE {
4049  CCS_VALUE,
4050  CCS_RANGE,
4051  CCS_COMPLETE,
4052  CCS_START
4053};
4054
4055enum CCVALTYPE {
4056  CCV_SB,
4057  CCV_CODE_POINT,
4058  CCV_CLASS
4059};
4060
4061static int
4062next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
4063		 enum CCSTATE* state, ScanEnv* env)
4064{
4065  int r;
4066
4067  if (*state == CCS_RANGE)
4068    return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
4069
4070  if (*state == CCS_VALUE && *type != CCV_CLASS) {
4071    if (*type == CCV_SB)
4072      BITSET_SET_BIT(cc->bs, (int )(*vs));
4073    else if (*type == CCV_CODE_POINT) {
4074      r = add_code_range(&(cc->mbuf), env, *vs, *vs);
4075      if (r < 0) return r;
4076    }
4077  }
4078
4079  *state = CCS_VALUE;
4080  *type  = CCV_CLASS;
4081  return 0;
4082}
4083
4084static int
4085next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
4086	       int* vs_israw, int v_israw,
4087	       enum CCVALTYPE intype, enum CCVALTYPE* type,
4088	       enum CCSTATE* state, ScanEnv* env)
4089{
4090  int r;
4091
4092  switch (*state) {
4093  case CCS_VALUE:
4094    if (*type == CCV_SB)
4095      BITSET_SET_BIT(cc->bs, (int )(*vs));
4096    else if (*type == CCV_CODE_POINT) {
4097      r = add_code_range(&(cc->mbuf), env, *vs, *vs);
4098      if (r < 0) return r;
4099    }
4100    break;
4101
4102  case CCS_RANGE:
4103    if (intype == *type) {
4104      if (intype == CCV_SB) {
4105        if (*vs > 0xff || v > 0xff)
4106          return ONIGERR_INVALID_CODE_POINT_VALUE;
4107
4108	if (*vs > v) {
4109	  if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
4110	    goto ccs_range_end;
4111	  else
4112	    return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
4113	}
4114	bitset_set_range(cc->bs, (int )*vs, (int )v);
4115      }
4116      else {
4117	r = add_code_range(&(cc->mbuf), env, *vs, v);
4118	if (r < 0) return r;
4119      }
4120    }
4121    else {
4122#if 0
4123      if (intype == CCV_CODE_POINT && *type == CCV_SB) {
4124#endif
4125	if (*vs > v) {
4126	  if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
4127	    goto ccs_range_end;
4128	  else
4129	    return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
4130	}
4131	bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
4132	r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
4133	if (r < 0) return r;
4134#if 0
4135      }
4136      else
4137	return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
4138#endif
4139    }
4140  ccs_range_end:
4141    *state = CCS_COMPLETE;
4142    break;
4143
4144  case CCS_COMPLETE:
4145  case CCS_START:
4146    *state = CCS_VALUE;
4147    break;
4148
4149  default:
4150    break;
4151  }
4152
4153  *vs_israw = v_israw;
4154  *vs       = v;
4155  *type     = intype;
4156  return 0;
4157}
4158
4159static int
4160code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
4161		 ScanEnv* env)
4162{
4163  int in_esc;
4164  OnigCodePoint code;
4165  OnigEncoding enc = env->enc;
4166  UChar* p = from;
4167
4168  in_esc = 0;
4169  while (! PEND) {
4170    if (ignore_escaped && in_esc) {
4171      in_esc = 0;
4172    }
4173    else {
4174      PFETCH_S(code);
4175      if (code == c) return 1;
4176      if (code == MC_ESC(env->syntax)) in_esc = 1;
4177    }
4178  }
4179  return 0;
4180}
4181
4182static int
4183parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
4184		 ScanEnv* env)
4185{
4186  int r, neg, len, fetched, and_start;
4187  OnigCodePoint v, vs;
4188  UChar *p;
4189  Node* node;
4190  CClassNode *cc, *prev_cc;
4191  CClassNode work_cc;
4192
4193  enum CCSTATE state;
4194  enum CCVALTYPE val_type, in_type;
4195  int val_israw, in_israw;
4196
4197  prev_cc = (CClassNode* )NULL;
4198  *np = NULL_NODE;
4199  r = fetch_token_in_cc(tok, src, end, env);
4200  if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
4201    neg = 1;
4202    r = fetch_token_in_cc(tok, src, end, env);
4203  }
4204  else {
4205    neg = 0;
4206  }
4207
4208  if (r < 0) return r;
4209  if (r == TK_CC_CLOSE) {
4210    if (! code_exist_check((OnigCodePoint )']',
4211                           *src, env->pattern_end, 1, env))
4212      return ONIGERR_EMPTY_CHAR_CLASS;
4213
4214    CC_ESC_WARN(env, (UChar* )"]");
4215    r = tok->type = TK_CHAR;  /* allow []...] */
4216  }
4217
4218  *np = node = node_new_cclass();
4219  CHECK_NULL_RETURN_MEMERR(node);
4220  cc = NCCLASS(node);
4221
4222  and_start = 0;
4223  state = CCS_START;
4224  p = *src;
4225  while (r != TK_CC_CLOSE) {
4226    fetched = 0;
4227    switch (r) {
4228    case TK_CHAR:
4229      len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);
4230      if (len > 1) {
4231	in_type = CCV_CODE_POINT;
4232      }
4233      else if (len < 0) {
4234	r = len;
4235	goto err;
4236      }
4237      else {
4238      sb_char:
4239	in_type = CCV_SB;
4240      }
4241      v = (OnigCodePoint )tok->u.c;
4242      in_israw = 0;
4243      goto val_entry2;
4244      break;
4245
4246    case TK_RAW_BYTE:
4247      /* tok->base != 0 : octal or hexadec. */
4248      if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
4249	UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4250	UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
4251	UChar* psave = p;
4252	int i, base = tok->base;
4253
4254	buf[0] = (UChar)tok->u.c;
4255	for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
4256	  r = fetch_token_in_cc(tok, &p, end, env);
4257	  if (r < 0) goto err;
4258	  if (r != TK_RAW_BYTE || tok->base != base) {
4259	    fetched = 1;
4260	    break;
4261	  }
4262	  buf[i] = (UChar)tok->u.c;
4263	}
4264
4265	if (i < ONIGENC_MBC_MINLEN(env->enc)) {
4266	  r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
4267	  goto err;
4268	}
4269
4270	len = enclen(env->enc, buf);
4271	if (i < len) {
4272	  r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
4273	  goto err;
4274	}
4275	else if (i > len) { /* fetch back */
4276	  p = psave;
4277	  for (i = 1; i < len; i++) {
4278	    r = fetch_token_in_cc(tok, &p, end, env);
4279	  }
4280	  fetched = 0;
4281	}
4282
4283	if (i == 1) {
4284	  v = (OnigCodePoint )buf[0];
4285	  goto raw_single;
4286	}
4287	else {
4288	  v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
4289	  in_type = CCV_CODE_POINT;
4290	}
4291      }
4292      else {
4293	v = (OnigCodePoint )tok->u.c;
4294      raw_single:
4295	in_type = CCV_SB;
4296      }
4297      in_israw = 1;
4298      goto val_entry2;
4299      break;
4300
4301    case TK_CODE_POINT:
4302      v = tok->u.code;
4303      in_israw = 1;
4304    val_entry:
4305      len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
4306      if (len < 0) {
4307	r = len;
4308	goto err;
4309      }
4310      in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
4311    val_entry2:
4312      r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
4313			 &state, env);
4314      if (r != 0) goto err;
4315      break;
4316
4317    case TK_POSIX_BRACKET_OPEN:
4318      r = parse_posix_bracket(cc, &p, end, env);
4319      if (r < 0) goto err;
4320      if (r == 1) {  /* is not POSIX bracket */
4321	CC_ESC_WARN(env, (UChar* )"[");
4322	p = tok->backp;
4323	v = (OnigCodePoint )tok->u.c;
4324	in_israw = 0;
4325	goto val_entry;
4326      }
4327      goto next_class;
4328      break;
4329
4330    case TK_CHAR_TYPE:
4331      r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
4332      if (r != 0) return r;
4333
4334    next_class:
4335      r = next_state_class(cc, &vs, &val_type, &state, env);
4336      if (r != 0) goto err;
4337      break;
4338
4339    case TK_CHAR_PROPERTY:
4340      {
4341	int ctype;
4342
4343	ctype = fetch_char_property_to_ctype(&p, end, env);
4344	if (ctype < 0) return ctype;
4345	r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
4346	if (r != 0) return r;
4347	goto next_class;
4348      }
4349      break;
4350
4351    case TK_CC_RANGE:
4352      if (state == CCS_VALUE) {
4353	r = fetch_token_in_cc(tok, &p, end, env);
4354	if (r < 0) goto err;
4355	fetched = 1;
4356	if (r == TK_CC_CLOSE) { /* allow [x-] */
4357	range_end_val:
4358	  v = (OnigCodePoint )'-';
4359	  in_israw = 0;
4360	  goto val_entry;
4361	}
4362	else if (r == TK_CC_AND) {
4363	  CC_ESC_WARN(env, (UChar* )"-");
4364	  goto range_end_val;
4365	}
4366	state = CCS_RANGE;
4367      }
4368      else if (state == CCS_START) {
4369	/* [-xa] is allowed */
4370	v = (OnigCodePoint )tok->u.c;
4371	in_israw = 0;
4372
4373	r = fetch_token_in_cc(tok, &p, end, env);
4374	if (r < 0) goto err;
4375	fetched = 1;
4376	/* [--x] or [a&&-x] is warned. */
4377	if (r == TK_CC_RANGE || and_start != 0)
4378	  CC_ESC_WARN(env, (UChar* )"-");
4379
4380	goto val_entry;
4381      }
4382      else if (state == CCS_RANGE) {
4383	CC_ESC_WARN(env, (UChar* )"-");
4384	goto sb_char;  /* [!--x] is allowed */
4385      }
4386      else { /* CCS_COMPLETE */
4387	r = fetch_token_in_cc(tok, &p, end, env);
4388	if (r < 0) goto err;
4389	fetched = 1;
4390	if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
4391	else if (r == TK_CC_AND) {
4392	  CC_ESC_WARN(env, (UChar* )"-");
4393	  goto range_end_val;
4394	}
4395
4396	if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
4397	  CC_ESC_WARN(env, (UChar* )"-");
4398	  goto sb_char;   /* [0-9-a] is allowed as [0-9\-a] */
4399	}
4400	r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
4401	goto err;
4402      }
4403      break;
4404
4405    case TK_CC_CC_OPEN: /* [ */
4406      {
4407	Node *anode;
4408	CClassNode* acc;
4409
4410	r = parse_char_class(&anode, tok, &p, end, env);
4411	if (r != 0) goto cc_open_err;
4412	acc = NCCLASS(anode);
4413	r = or_cclass(cc, acc, env->enc);
4414
4415	onig_node_free(anode);
4416      cc_open_err:
4417	if (r != 0) goto err;
4418      }
4419      break;
4420
4421    case TK_CC_AND: /* && */
4422      {
4423	if (state == CCS_VALUE) {
4424	  r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
4425			     &val_type, &state, env);
4426	  if (r != 0) goto err;
4427	}
4428	/* initialize local variables */
4429	and_start = 1;
4430	state = CCS_START;
4431
4432	if (IS_NOT_NULL(prev_cc)) {
4433	  r = and_cclass(prev_cc, cc, env->enc);
4434	  if (r != 0) goto err;
4435	  bbuf_free(cc->mbuf);
4436	}
4437	else {
4438	  prev_cc = cc;
4439	  cc = &work_cc;
4440	}
4441	initialize_cclass(cc);
4442      }
4443      break;
4444
4445    case TK_EOT:
4446      r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
4447      goto err;
4448      break;
4449    default:
4450      r = ONIGERR_PARSER_BUG;
4451      goto err;
4452      break;
4453    }
4454
4455    if (fetched)
4456      r = tok->type;
4457    else {
4458      r = fetch_token_in_cc(tok, &p, end, env);
4459      if (r < 0) goto err;
4460    }
4461  }
4462
4463  if (state == CCS_VALUE) {
4464    r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
4465		       &val_type, &state, env);
4466    if (r != 0) goto err;
4467  }
4468
4469  if (IS_NOT_NULL(prev_cc)) {
4470    r = and_cclass(prev_cc, cc, env->enc);
4471    if (r != 0) goto err;
4472    bbuf_free(cc->mbuf);
4473    cc = prev_cc;
4474  }
4475
4476  if (neg != 0)
4477    NCCLASS_SET_NOT(cc);
4478  else
4479    NCCLASS_CLEAR_NOT(cc);
4480  if (IS_NCCLASS_NOT(cc) &&
4481      IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
4482    int is_empty;
4483
4484    is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
4485    if (is_empty != 0)
4486      BITSET_IS_EMPTY(cc->bs, is_empty);
4487
4488    if (is_empty == 0) {
4489#define NEWLINE_CODE    0x0a
4490
4491      if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
4492        if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
4493          BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
4494        else
4495          add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
4496      }
4497    }
4498  }
4499  *src = p;
4500  return 0;
4501
4502 err:
4503  if (cc != NCCLASS(*np))
4504    bbuf_free(cc->mbuf);
4505  onig_node_free(*np);
4506  return r;
4507}
4508
4509static int parse_subexp(Node** top, OnigToken* tok, int term,
4510			UChar** src, UChar* end, ScanEnv* env);
4511
4512static int
4513parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
4514	      ScanEnv* env)
4515{
4516  int r, num;
4517  Node *target;
4518  OnigOptionType option;
4519  OnigCodePoint c;
4520  OnigEncoding enc = env->enc;
4521
4522#ifdef USE_NAMED_GROUP
4523  int list_capture;
4524#endif
4525
4526  UChar* p = *src;
4527  PFETCH_READY;
4528
4529  *np = NULL;
4530  if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
4531
4532  option = env->option;
4533  if (PPEEK_IS('?') &&
4534      IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
4535    PINC;
4536    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
4537
4538    PFETCH(c);
4539    switch (c) {
4540    case ':':   /* (?:...) grouping only */
4541    group:
4542      r = fetch_token(tok, &p, end, env);
4543      if (r < 0) return r;
4544      r = parse_subexp(np, tok, term, &p, end, env);
4545      if (r < 0) return r;
4546      *src = p;
4547      return 1; /* group */
4548      break;
4549
4550    case '=':
4551      *np = onig_node_new_anchor(ANCHOR_PREC_READ);
4552      break;
4553    case '!':  /*         preceding read */
4554      *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
4555      break;
4556    case '>':            /* (?>...) stop backtrack */
4557      *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
4558      break;
4559
4560#ifdef USE_NAMED_GROUP
4561    case '\'':
4562      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
4563	goto named_group1;
4564      }
4565      else
4566	return ONIGERR_UNDEFINED_GROUP_OPTION;
4567      break;
4568#endif
4569
4570    case '<':   /* look behind (?<=...), (?<!...) */
4571      PFETCH(c);
4572      if (c == '=')
4573	*np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
4574      else if (c == '!')
4575	*np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
4576#ifdef USE_NAMED_GROUP
4577      else {
4578	if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
4579	  UChar *name;
4580	  UChar *name_end;
4581
4582	  PUNFETCH;
4583	  c = '<';
4584
4585	named_group1:
4586	  list_capture = 0;
4587
4588	named_group2:
4589	  name = p;
4590	  r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
4591	  if (r < 0) return r;
4592
4593	  num = scan_env_add_mem_entry(env);
4594	  if (num < 0) return num;
4595	  if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
4596	    return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
4597
4598	  r = name_add(env->reg, name, name_end, num, env);
4599	  if (r != 0) return r;
4600	  *np = node_new_enclose_memory(env->option, 1);
4601	  CHECK_NULL_RETURN_MEMERR(*np);
4602	  NENCLOSE(*np)->regnum = num;
4603	  if (list_capture != 0)
4604	    BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
4605	  env->num_named++;
4606	}
4607	else {
4608	  return ONIGERR_UNDEFINED_GROUP_OPTION;
4609	}
4610      }
4611#else
4612      else {
4613	return ONIGERR_UNDEFINED_GROUP_OPTION;
4614      }
4615#endif
4616      break;
4617
4618    case '@':
4619      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
4620#ifdef USE_NAMED_GROUP
4621	if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
4622	  PFETCH(c);
4623	  if (c == '<' || c == '\'') {
4624	    list_capture = 1;
4625	    goto named_group2; /* (?@<name>...) */
4626	  }
4627	  PUNFETCH;
4628	}
4629#endif
4630	*np = node_new_enclose_memory(env->option, 0);
4631	CHECK_NULL_RETURN_MEMERR(*np);
4632	num = scan_env_add_mem_entry(env);
4633	if (num < 0) {
4634	  onig_node_free(*np);
4635	  return num;
4636	}
4637	else if (num >= (int )BIT_STATUS_BITS_NUM) {
4638	  onig_node_free(*np);
4639	  return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
4640	}
4641	NENCLOSE(*np)->regnum = num;
4642	BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
4643      }
4644      else {
4645	return ONIGERR_UNDEFINED_GROUP_OPTION;
4646      }
4647      break;
4648
4649#ifdef USE_POSIXLINE_OPTION
4650    case 'p':
4651#endif
4652    case '-': case 'i': case 'm': case 's': case 'x':
4653      {
4654	int neg = 0;
4655
4656	while (1) {
4657	  switch (c) {
4658	  case ':':
4659	  case ')':
4660	  break;
4661
4662	  case '-':  neg = 1; break;
4663	  case 'x':  ONOFF(option, ONIG_OPTION_EXTEND,     neg); break;
4664	  case 'i':  ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
4665	  case 's':
4666	    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
4667	      ONOFF(option, ONIG_OPTION_MULTILINE,  neg);
4668	    }
4669	    else
4670	      return ONIGERR_UNDEFINED_GROUP_OPTION;
4671	    break;
4672
4673	  case 'm':
4674	    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
4675	      ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
4676	    }
4677	    else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
4678	      ONOFF(option, ONIG_OPTION_MULTILINE,  neg);
4679	    }
4680	    else
4681	      return ONIGERR_UNDEFINED_GROUP_OPTION;
4682	    break;
4683#ifdef USE_POSIXLINE_OPTION
4684	  case 'p':
4685	    ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
4686	    break;
4687#endif
4688	  default:
4689	    return ONIGERR_UNDEFINED_GROUP_OPTION;
4690	  }
4691
4692	  if (c == ')') {
4693	    *np = node_new_option(option);
4694	    CHECK_NULL_RETURN_MEMERR(*np);
4695	    *src = p;
4696	    return 2; /* option only */
4697	  }
4698	  else if (c == ':') {
4699	    OnigOptionType prev = env->option;
4700
4701	    env->option     = option;
4702	    r = fetch_token(tok, &p, end, env);
4703	    if (r < 0) return r;
4704	    r = parse_subexp(&target, tok, term, &p, end, env);
4705	    env->option = prev;
4706	    if (r < 0) return r;
4707	    *np = node_new_option(option);
4708	    CHECK_NULL_RETURN_MEMERR(*np);
4709	    NENCLOSE(*np)->target = target;
4710	    *src = p;
4711	    return 0;
4712	  }
4713
4714	  if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
4715	  PFETCH(c);
4716	}
4717      }
4718      break;
4719
4720    default:
4721      return ONIGERR_UNDEFINED_GROUP_OPTION;
4722    }
4723  }
4724  else {
4725    if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
4726      goto group;
4727
4728    *np = node_new_enclose_memory(env->option, 0);
4729    CHECK_NULL_RETURN_MEMERR(*np);
4730    num = scan_env_add_mem_entry(env);
4731    if (num < 0) return num;
4732    NENCLOSE(*np)->regnum = num;
4733  }
4734
4735  CHECK_NULL_RETURN_MEMERR(*np);
4736  r = fetch_token(tok, &p, end, env);
4737  if (r < 0) return r;
4738  r = parse_subexp(&target, tok, term, &p, end, env);
4739  if (r < 0) return r;
4740
4741  if (NTYPE(*np) == NT_ANCHOR)
4742    NANCHOR(*np)->target = target;
4743  else {
4744    NENCLOSE(*np)->target = target;
4745    if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
4746      /* Don't move this to previous of parse_subexp() */
4747      r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
4748      if (r != 0) return r;
4749    }
4750  }
4751
4752  *src = p;
4753  return 0;
4754}
4755
4756static const char* PopularQStr[] = {
4757  "?", "*", "+", "??", "*?", "+?"
4758};
4759
4760static const char* ReduceQStr[] = {
4761  "", "", "*", "*?", "??", "+ and ??", "+? and ?"
4762};
4763
4764static int
4765set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
4766{
4767  QtfrNode* qn;
4768
4769  qn = NQTFR(qnode);
4770  if (qn->lower == 1 && qn->upper == 1) {
4771    return 1;
4772  }
4773
4774  switch (NTYPE(target)) {
4775  case NT_STR:
4776    if (! group) {
4777      StrNode* sn = NSTR(target);
4778      if (str_node_can_be_split(sn, env->enc)) {
4779	Node* n = str_node_split_last_char(sn, env->enc);
4780	if (IS_NOT_NULL(n)) {
4781	  qn->target = n;
4782	  return 2;
4783	}
4784      }
4785    }
4786    break;
4787
4788  case NT_QTFR:
4789    { /* check redundant double repeat. */
4790      /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
4791      QtfrNode* qnt   = NQTFR(target);
4792      int nestq_num   = popular_quantifier_num(qn);
4793      int targetq_num = popular_quantifier_num(qnt);
4794      if (nestq_num < 0 || targetq_num < 0) {
4795        return ONIGERR_TYPE_BUG;
4796      }
4797
4798#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
4799      if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
4800	  IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
4801        UChar buf[WARN_BUFSIZE];
4802
4803        switch(ReduceTypeTable[targetq_num][nestq_num]) {
4804        case RQ_ASIS:
4805          break;
4806
4807        case RQ_DEL:
4808          if (onig_verb_warn != onig_null_warn) {
4809            onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
4810                                 env->pattern, env->pattern_end,
4811                                 (UChar* )"redundant nested repeat operator");
4812            (*onig_verb_warn)((char* )buf);
4813          }
4814          goto warn_exit;
4815          break;
4816
4817        default:
4818          if (onig_verb_warn != onig_null_warn) {
4819            onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
4820                                       env->pattern, env->pattern_end,
4821            (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
4822            PopularQStr[targetq_num], PopularQStr[nestq_num],
4823            ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
4824            (*onig_verb_warn)((char* )buf);
4825          }
4826          goto warn_exit;
4827          break;
4828        }
4829      }
4830
4831    warn_exit:
4832#endif
4833      if (targetq_num >= 0) {
4834	if (nestq_num >= 0) {
4835	  onig_reduce_nested_quantifier(qnode, target);
4836	  goto q_exit;
4837	}
4838	else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
4839	  /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
4840	  if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
4841	    qn->upper = (qn->lower == 0 ? 1 : qn->lower);
4842	  }
4843	}
4844      }
4845    }
4846    break;
4847
4848  default:
4849    break;
4850  }
4851
4852  qn->target = target;
4853 q_exit:
4854  return 0;
4855}
4856
4857
4858#ifdef USE_SHARED_CCLASS_TABLE
4859
4860#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS     8
4861
4862/* for ctype node hash table */
4863
4864typedef struct {
4865  OnigEncoding enc;
4866  int not;
4867  int type;
4868} type_cclass_key;
4869
4870static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
4871{
4872  if (x->type != y->type) return 1;
4873  if (x->enc  != y->enc)  return 1;
4874  if (x->not  != y->not)  return 1;
4875  return 0;
4876}
4877
4878static int type_cclass_hash(type_cclass_key* key)
4879{
4880  int i, val;
4881  UChar *p;
4882
4883  val = 0;
4884
4885  p = (UChar* )&(key->enc);
4886  for (i = 0; i < (int )sizeof(key->enc); i++) {
4887    val = val * 997 + (int )*p++;
4888  }
4889
4890  p = (UChar* )(&key->type);
4891  for (i = 0; i < (int )sizeof(key->type); i++) {
4892    val = val * 997 + (int )*p++;
4893  }
4894
4895  val += key->not;
4896  return val + (val >> 5);
4897}
4898
4899static struct st_hash_type type_type_cclass_hash = {
4900    type_cclass_cmp,
4901    type_cclass_hash,
4902};
4903
4904static st_table* OnigTypeCClassTable;
4905
4906
4907static int
4908i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)
4909{
4910  if (IS_NOT_NULL(node)) {
4911    CClassNode* cc = NCCLASS(node);
4912    if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
4913    xfree(node);
4914  }
4915
4916  if (IS_NOT_NULL(key)) xfree(key);
4917  return ST_DELETE;
4918}
4919
4920extern int
4921onig_free_shared_cclass_table(void)
4922{
4923  if (IS_NOT_NULL(OnigTypeCClassTable)) {
4924    onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
4925    onig_st_free_table(OnigTypeCClassTable);
4926    OnigTypeCClassTable = NULL;
4927  }
4928
4929  return 0;
4930}
4931
4932#endif /* USE_SHARED_CCLASS_TABLE */
4933
4934
4935#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
4936static int
4937clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
4938{
4939  BBuf *tbuf;
4940  int r;
4941
4942  if (IS_NCCLASS_NOT(cc)) {
4943    bitset_invert(cc->bs);
4944
4945    if (! ONIGENC_IS_SINGLEBYTE(enc)) {
4946      r = not_code_range_buf(enc, cc->mbuf, &tbuf);
4947      if (r != 0) return r;
4948
4949      bbuf_free(cc->mbuf);
4950      cc->mbuf = tbuf;
4951    }
4952
4953    NCCLASS_CLEAR_NOT(cc);
4954  }
4955
4956  return 0;
4957}
4958#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
4959
4960typedef struct {
4961  ScanEnv*    env;
4962  CClassNode* cc;
4963  Node*       alt_root;
4964  Node**      ptail;
4965} IApplyCaseFoldArg;
4966
4967static int
4968i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
4969		  int to_len, void* arg)
4970{
4971  IApplyCaseFoldArg* iarg;
4972  ScanEnv* env;
4973  CClassNode* cc;
4974  BitSetRef bs;
4975
4976  iarg = (IApplyCaseFoldArg* )arg;
4977  env = iarg->env;
4978  cc  = iarg->cc;
4979  bs = cc->bs;
4980
4981  if (to_len == 1) {
4982    int is_in = onig_is_code_in_cc(env->enc, from, cc);
4983#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
4984    if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
4985	(is_in == 0 &&  IS_NCCLASS_NOT(cc))) {
4986      if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
4987	add_code_range(&(cc->mbuf), env, *to, *to);
4988      }
4989      else {
4990	BITSET_SET_BIT(bs, *to);
4991      }
4992    }
4993#else
4994    if (is_in != 0) {
4995      if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
4996	if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
4997	add_code_range(&(cc->mbuf), env, *to, *to);
4998      }
4999      else {
5000	if (IS_NCCLASS_NOT(cc)) {
5001	  BITSET_CLEAR_BIT(bs, *to);
5002	}
5003	else
5004	  BITSET_SET_BIT(bs, *to);
5005      }
5006    }
5007#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
5008  }
5009  else {
5010    int r, i, len;
5011    UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
5012    Node *snode = NULL_NODE;
5013
5014    if (onig_is_code_in_cc(env->enc, from, cc)
5015#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5016	&& !IS_NCCLASS_NOT(cc)
5017#endif
5018	) {
5019      for (i = 0; i < to_len; i++) {
5020	len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
5021	if (i == 0) {
5022	  snode = onig_node_new_str(buf, buf + len);
5023	  CHECK_NULL_RETURN_MEMERR(snode);
5024
5025	  /* char-class expanded multi-char only
5026	     compare with string folded at match time. */
5027	  NSTRING_SET_AMBIG(snode);
5028	}
5029	else {
5030	  r = onig_node_str_cat(snode, buf, buf + len);
5031	  if (r < 0) {
5032	    onig_node_free(snode);
5033	    return r;
5034	  }
5035	}
5036      }
5037
5038      *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
5039      CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
5040      iarg->ptail = &(NCDR((*(iarg->ptail))));
5041    }
5042  }
5043
5044  return 0;
5045}
5046
5047static int
5048parse_exp(Node** np, OnigToken* tok, int term,
5049	  UChar** src, UChar* end, ScanEnv* env)
5050{
5051  int r, len, group = 0;
5052  Node* qn;
5053  Node** targetp;
5054
5055  *np = NULL;
5056  if (tok->type == (enum TokenSyms )term)
5057    goto end_of_token;
5058
5059  switch (tok->type) {
5060  case TK_ALT:
5061  case TK_EOT:
5062  end_of_token:
5063  *np = node_new_empty();
5064  return tok->type;
5065  break;
5066
5067  case TK_SUBEXP_OPEN:
5068    r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
5069    if (r < 0) return r;
5070    if (r == 1) group = 1;
5071    else if (r == 2) { /* option only */
5072      Node* target;
5073      OnigOptionType prev = env->option;
5074
5075      env->option = NENCLOSE(*np)->option;
5076      r = fetch_token(tok, src, end, env);
5077      if (r < 0) return r;
5078      r = parse_subexp(&target, tok, term, src, end, env);
5079      env->option = prev;
5080      if (r < 0) return r;
5081      NENCLOSE(*np)->target = target;
5082      return tok->type;
5083    }
5084    break;
5085
5086  case TK_SUBEXP_CLOSE:
5087    if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
5088      return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
5089
5090    if (tok->escaped) goto tk_raw_byte;
5091    else goto tk_byte;
5092    break;
5093
5094  case TK_STRING:
5095  tk_byte:
5096    {
5097      *np = node_new_str(tok->backp, *src);
5098      CHECK_NULL_RETURN_MEMERR(*np);
5099
5100      while (1) {
5101	r = fetch_token(tok, src, end, env);
5102	if (r < 0) return r;
5103	if (r != TK_STRING) break;
5104
5105	r = onig_node_str_cat(*np, tok->backp, *src);
5106	if (r < 0) return r;
5107      }
5108
5109    string_end:
5110      targetp = np;
5111      goto repeat;
5112    }
5113    break;
5114
5115  case TK_RAW_BYTE:
5116  tk_raw_byte:
5117    {
5118      *np = node_new_str_raw_char((UChar )tok->u.c);
5119      CHECK_NULL_RETURN_MEMERR(*np);
5120      len = 1;
5121      while (1) {
5122	if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
5123	  if (len == enclen(env->enc, NSTR(*np)->s)) {
5124	    r = fetch_token(tok, src, end, env);
5125	    NSTRING_CLEAR_RAW(*np);
5126	    goto string_end;
5127	  }
5128	}
5129
5130	r = fetch_token(tok, src, end, env);
5131	if (r < 0) return r;
5132	if (r != TK_RAW_BYTE) {
5133	  /* Don't use this, it is wrong for little endian encodings. */
5134#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
5135	  int rem;
5136	  if (len < ONIGENC_MBC_MINLEN(env->enc)) {
5137	    rem = ONIGENC_MBC_MINLEN(env->enc) - len;
5138	    (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);
5139	    if (len + rem == enclen(env->enc, NSTR(*np)->s)) {
5140	      NSTRING_CLEAR_RAW(*np);
5141	      goto string_end;
5142	    }
5143	  }
5144#endif
5145	  return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
5146	}
5147
5148	r = node_str_cat_char(*np, (UChar )tok->u.c);
5149	if (r < 0) return r;
5150
5151	len++;
5152      }
5153    }
5154    break;
5155
5156  case TK_CODE_POINT:
5157    {
5158      UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
5159      int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
5160      if (num < 0) return num;
5161#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
5162      *np = node_new_str_raw(buf, buf + num);
5163#else
5164      *np = node_new_str(buf, buf + num);
5165#endif
5166      CHECK_NULL_RETURN_MEMERR(*np);
5167    }
5168    break;
5169
5170  case TK_QUOTE_OPEN:
5171    {
5172      OnigCodePoint end_op[2];
5173      UChar *qstart, *qend, *nextp;
5174
5175      end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
5176      end_op[1] = (OnigCodePoint )'E';
5177      qstart = *src;
5178      qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
5179      if (IS_NULL(qend)) {
5180	nextp = qend = end;
5181      }
5182      *np = node_new_str(qstart, qend);
5183      CHECK_NULL_RETURN_MEMERR(*np);
5184      *src = nextp;
5185    }
5186    break;
5187
5188  case TK_CHAR_TYPE:
5189    {
5190      switch (tok->u.prop.ctype) {
5191      case ONIGENC_CTYPE_WORD:
5192	*np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not);
5193	CHECK_NULL_RETURN_MEMERR(*np);
5194	break;
5195
5196      case ONIGENC_CTYPE_SPACE:
5197      case ONIGENC_CTYPE_DIGIT:
5198      case ONIGENC_CTYPE_XDIGIT:
5199	{
5200	  CClassNode* cc;
5201
5202#ifdef USE_SHARED_CCLASS_TABLE
5203          const OnigCodePoint *mbr;
5204	  OnigCodePoint sb_out;
5205
5206          r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,
5207					   &sb_out, &mbr);
5208          if (r == 0 &&
5209              ONIGENC_CODE_RANGE_NUM(mbr)
5210              >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
5211            type_cclass_key  key;
5212            type_cclass_key* new_key;
5213
5214            key.enc  = env->enc;
5215            key.not  = tok->u.prop.not;
5216            key.type = tok->u.prop.ctype;
5217
5218            THREAD_ATOMIC_START;
5219
5220            if (IS_NULL(OnigTypeCClassTable)) {
5221              OnigTypeCClassTable
5222                = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
5223              if (IS_NULL(OnigTypeCClassTable)) {
5224                THREAD_ATOMIC_END;
5225                return ONIGERR_MEMORY;
5226              }
5227            }
5228            else {
5229              if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )(UINTN)&key,
5230                                 (st_data_t* )np)) {
5231                THREAD_ATOMIC_END;
5232                break;
5233              }
5234            }
5235
5236            *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,
5237						     sb_out, mbr);
5238            if (IS_NULL(*np)) {
5239              THREAD_ATOMIC_END;
5240              return ONIGERR_MEMORY;
5241            }
5242
5243            cc = NCCLASS(*np);
5244            NCCLASS_SET_SHARE(cc);
5245            new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
5246            CHECK_NULL_RETURN_MEMERR(new_key);
5247	    xmemcpy(new_key, &key, sizeof(type_cclass_key));
5248            onig_st_add_direct(OnigTypeCClassTable, (st_data_t )(UINTN)new_key,
5249                               (st_data_t )(UINTN)*np);
5250
5251            THREAD_ATOMIC_END;
5252          }
5253          else {
5254#endif
5255            *np = node_new_cclass();
5256            CHECK_NULL_RETURN_MEMERR(*np);
5257            cc = NCCLASS(*np);
5258            add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
5259            if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
5260#ifdef USE_SHARED_CCLASS_TABLE
5261          }
5262#endif
5263	}
5264	break;
5265
5266      default:
5267	return ONIGERR_PARSER_BUG;
5268	break;
5269      }
5270    }
5271    break;
5272
5273  case TK_CHAR_PROPERTY:
5274    r = parse_char_property(np, tok, src, end, env);
5275    if (r != 0) return r;
5276    break;
5277
5278  case TK_CC_OPEN:
5279    {
5280      CClassNode* cc;
5281
5282      r = parse_char_class(np, tok, src, end, env);
5283      if (r != 0) return r;
5284
5285      cc = NCCLASS(*np);
5286      if (IS_IGNORECASE(env->option)) {
5287	IApplyCaseFoldArg iarg;
5288
5289	iarg.env      = env;
5290	iarg.cc       = cc;
5291	iarg.alt_root = NULL_NODE;
5292	iarg.ptail    = &(iarg.alt_root);
5293
5294	r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
5295					i_apply_case_fold, &iarg);
5296	if (r != 0) {
5297	  onig_node_free(iarg.alt_root);
5298	  return r;
5299	}
5300	if (IS_NOT_NULL(iarg.alt_root)) {
5301          Node* work = onig_node_new_alt(*np, iarg.alt_root);
5302          if (IS_NULL(work)) {
5303            onig_node_free(iarg.alt_root);
5304            return ONIGERR_MEMORY;
5305          }
5306          *np = work;
5307	}
5308      }
5309    }
5310    break;
5311
5312  case TK_ANYCHAR:
5313    *np = node_new_anychar();
5314    CHECK_NULL_RETURN_MEMERR(*np);
5315    break;
5316
5317  case TK_ANYCHAR_ANYTIME:
5318    *np = node_new_anychar();
5319    CHECK_NULL_RETURN_MEMERR(*np);
5320    qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
5321    CHECK_NULL_RETURN_MEMERR(qn);
5322    NQTFR(qn)->target = *np;
5323    *np = qn;
5324    break;
5325
5326  case TK_BACKREF:
5327    len = tok->u.backref.num;
5328    *np = node_new_backref(len,
5329		   (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
5330			   tok->u.backref.by_name,
5331#ifdef USE_BACKREF_WITH_LEVEL
5332			   tok->u.backref.exist_level,
5333			   tok->u.backref.level,
5334#endif
5335			   env);
5336    CHECK_NULL_RETURN_MEMERR(*np);
5337    break;
5338
5339#ifdef USE_SUBEXP_CALL
5340  case TK_CALL:
5341    {
5342      int gnum = tok->u.call.gnum;
5343
5344      if (gnum < 0) {
5345	gnum = BACKREF_REL_TO_ABS(gnum, env);
5346	if (gnum <= 0)
5347	  return ONIGERR_INVALID_BACKREF;
5348      }
5349      *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
5350      CHECK_NULL_RETURN_MEMERR(*np);
5351      env->num_call++;
5352    }
5353    break;
5354#endif
5355
5356  case TK_ANCHOR:
5357    *np = onig_node_new_anchor(tok->u.anchor);
5358    CHECK_NULL_RETURN_MEMERR(*np);
5359    break;
5360
5361  case TK_OP_REPEAT:
5362  case TK_INTERVAL:
5363    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
5364      if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
5365	return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
5366      else
5367	*np = node_new_empty();
5368  CHECK_NULL_RETURN_MEMERR(*np);
5369    }
5370    else {
5371      goto tk_byte;
5372    }
5373    break;
5374
5375  default:
5376    return ONIGERR_PARSER_BUG;
5377    break;
5378  }
5379
5380  {
5381    targetp = np;
5382
5383  re_entry:
5384    r = fetch_token(tok, src, end, env);
5385    if (r < 0) return r;
5386
5387  repeat:
5388    if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
5389      if (is_invalid_quantifier_target(*targetp))
5390	return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
5391
5392      qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
5393			       (r == TK_INTERVAL ? 1 : 0));
5394      CHECK_NULL_RETURN_MEMERR(qn);
5395      NQTFR(qn)->greedy = tok->u.repeat.greedy;
5396      r = set_quantifier(qn, *targetp, group, env);
5397      if (r < 0) {
5398	onig_node_free(qn);
5399	return r;
5400      }
5401
5402      if (tok->u.repeat.possessive != 0) {
5403	Node* en;
5404	en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
5405	if (IS_NULL(en)) {
5406	  onig_node_free(qn);
5407	  return ONIGERR_MEMORY;
5408	}
5409	NENCLOSE(en)->target = qn;
5410	qn = en;
5411      }
5412
5413      if (r == 0) {
5414	*targetp = qn;
5415      }
5416      else if (r == 1) {
5417	onig_node_free(qn);
5418      }
5419      else if (r == 2) { /* split case: /abc+/ */
5420	Node *tmp;
5421
5422	*targetp = node_new_list(*targetp, NULL);
5423	if (IS_NULL(*targetp)) {
5424	  onig_node_free(qn);
5425	  return ONIGERR_MEMORY;
5426	}
5427	tmp = NCDR(*targetp) = node_new_list(qn, NULL);
5428	if (IS_NULL(tmp)) {
5429	  onig_node_free(qn);
5430	  return ONIGERR_MEMORY;
5431	}
5432	targetp = &(NCAR(tmp));
5433      }
5434      goto re_entry;
5435    }
5436  }
5437
5438  return r;
5439}
5440
5441static int
5442parse_branch(Node** top, OnigToken* tok, int term,
5443	     UChar** src, UChar* end, ScanEnv* env)
5444{
5445  int r;
5446  Node *node, **headp;
5447
5448  *top = NULL;
5449  r = parse_exp(&node, tok, term, src, end, env);
5450  if (r < 0) return r;
5451
5452  if (r == TK_EOT || r == term || r == TK_ALT) {
5453    *top = node;
5454  }
5455  else {
5456    *top  = node_new_list(node, NULL);
5457    CHECK_NULL_RETURN_MEMERR(*top);
5458    headp = &(NCDR(*top));
5459    while (r != TK_EOT && r != term && r != TK_ALT) {
5460      r = parse_exp(&node, tok, term, src, end, env);
5461      CHECK_NULL_RETURN_MEMERR(node);
5462      if (r < 0) return r;
5463
5464      if (NTYPE(node) == NT_LIST) {
5465	*headp = node;
5466	while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
5467	headp = &(NCDR(node));
5468      }
5469      else {
5470	*headp = node_new_list(node, NULL);
5471	headp = &(NCDR(*headp));
5472      }
5473    }
5474  }
5475
5476  return r;
5477}
5478
5479/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
5480static int
5481parse_subexp(Node** top, OnigToken* tok, int term,
5482	     UChar** src, UChar* end, ScanEnv* env)
5483{
5484  int r;
5485  Node *node, **headp;
5486
5487  *top = NULL;
5488  r = parse_branch(&node, tok, term, src, end, env);
5489  if (r < 0) {
5490    onig_node_free(node);
5491    return r;
5492  }
5493
5494  if (r == term) {
5495    *top = node;
5496  }
5497  else if (r == TK_ALT) {
5498    *top  = onig_node_new_alt(node, NULL);
5499    CHECK_NULL_RETURN_MEMERR(*top);
5500    headp = &(NCDR(*top));
5501    while (r == TK_ALT) {
5502      r = fetch_token(tok, src, end, env);
5503      if (r < 0) return r;
5504      r = parse_branch(&node, tok, term, src, end, env);
5505      if (r < 0) return r;
5506
5507      *headp = onig_node_new_alt(node, NULL);
5508      headp = &(NCDR(*headp));
5509    }
5510
5511    if (tok->type != (enum TokenSyms )term)
5512      goto err;
5513  }
5514  else {
5515  err:
5516    if (term == TK_SUBEXP_CLOSE)
5517      return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
5518    else
5519      return ONIGERR_PARSER_BUG;
5520  }
5521
5522  return r;
5523}
5524
5525static int
5526parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
5527{
5528  int r;
5529  OnigToken tok;
5530
5531  r = fetch_token(&tok, src, end, env);
5532  if (r < 0) return r;
5533  r = parse_subexp(top, &tok, TK_EOT, src, end, env);
5534  if (r < 0) return r;
5535  return 0;
5536}
5537
5538extern int
5539onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
5540		     regex_t* reg, ScanEnv* env)
5541{
5542  int r;
5543  UChar* p;
5544
5545#ifdef USE_NAMED_GROUP
5546  names_clear(reg);
5547#endif
5548
5549  scan_env_clear(env);
5550  env->option         = reg->options;
5551  env->case_fold_flag = reg->case_fold_flag;
5552  env->enc            = reg->enc;
5553  env->syntax         = reg->syntax;
5554  env->pattern        = (UChar* )pattern;
5555  env->pattern_end    = (UChar* )end;
5556  env->reg            = reg;
5557
5558  *root = NULL;
5559  p = (UChar* )pattern;
5560  r = parse_regexp(root, &p, (UChar* )end, env);
5561  reg->num_mem = env->num_mem;
5562  return r;
5563}
5564
5565extern void
5566onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
5567				UChar* arg, UChar* arg_end)
5568{
5569  env->error     = arg;
5570  env->error_end = arg_end;
5571}
5572