1/**********************************************************************
2  regcomp.c -  Oniguruma (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2013  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include "regparse.h"
33
34OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
35
36extern OnigCaseFoldType
37onig_get_default_case_fold_flag(void)
38{
39  return OnigDefaultCaseFoldFlag;
40}
41
42extern int
43onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
44{
45  OnigDefaultCaseFoldFlag = case_fold_flag;
46  return 0;
47}
48
49
50#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
51static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
52#endif
53
54static UChar*
55str_dup(UChar* s, UChar* end)
56{
57  int len = (int)(end - s);
58
59  if (len > 0) {
60    UChar* r = (UChar* )xmalloc(len + 1);
61    CHECK_NULL_RETURN(r);
62    xmemcpy(r, s, len);
63    r[len] = (UChar )0;
64    return r;
65  }
66  else return NULL;
67}
68
69static void
70swap_node(Node* a, Node* b)
71{
72  Node c;
73  CopyMem (&c, a, sizeof (Node));
74  CopyMem (a, b, sizeof (Node));
75  CopyMem (b, &c, sizeof (Node));
76
77  if (NTYPE(a) == NT_STR) {
78    StrNode* sn = NSTR(a);
79    if (sn->capa == 0) {
80      int len = (int)(sn->end - sn->s);
81      sn->s   = sn->buf;
82      sn->end = sn->s + len;
83    }
84  }
85
86  if (NTYPE(b) == NT_STR) {
87    StrNode* sn = NSTR(b);
88    if (sn->capa == 0) {
89      int len = (int)(sn->end - sn->s);
90      sn->s   = sn->buf;
91      sn->end = sn->s + len;
92    }
93  }
94}
95
96static OnigDistance
97distance_add(OnigDistance d1, OnigDistance d2)
98{
99  if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
100    return ONIG_INFINITE_DISTANCE;
101  else {
102    if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
103    else return ONIG_INFINITE_DISTANCE;
104  }
105}
106
107static OnigDistance
108distance_multiply(OnigDistance d, int m)
109{
110  if (m == 0) return 0;
111
112  if (d < ONIG_INFINITE_DISTANCE / m)
113    return d * m;
114  else
115    return ONIG_INFINITE_DISTANCE;
116}
117
118static int
119bitset_is_empty(BitSetRef bs)
120{
121  int i;
122  for (i = 0; i < (int )BITSET_SIZE; i++) {
123    if (bs[i] != 0) return 0;
124  }
125  return 1;
126}
127
128#ifdef ONIG_DEBUG
129static int
130bitset_on_num(BitSetRef bs)
131{
132  int i, n;
133
134  n = 0;
135  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
136    if (BITSET_AT(bs, i)) n++;
137  }
138  return n;
139}
140#endif
141
142extern int
143onig_bbuf_init(BBuf* buf, int size)
144{
145  if (size <= 0) {
146    size   = 0;
147    buf->p = NULL;
148  }
149  else {
150    buf->p = (UChar* )xmalloc(size);
151    if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
152  }
153
154  buf->alloc = size;
155  buf->used  = 0;
156  return 0;
157}
158
159
160#ifdef USE_SUBEXP_CALL
161
162static int
163unset_addr_list_init(UnsetAddrList* uslist, int size)
164{
165  UnsetAddr* p;
166
167  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
168  CHECK_NULL_RETURN_MEMERR(p);
169  uslist->num   = 0;
170  uslist->alloc = size;
171  uslist->us    = p;
172  return 0;
173}
174
175static void
176unset_addr_list_end(UnsetAddrList* uslist)
177{
178  if (IS_NOT_NULL(uslist->us))
179    xfree(uslist->us);
180}
181
182static int
183unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
184{
185  UnsetAddr* p;
186  int size;
187
188  if (uslist->num >= uslist->alloc) {
189    size = uslist->alloc * 2;
190    p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr) * uslist->alloc);
191    CHECK_NULL_RETURN_MEMERR(p);
192    uslist->alloc = size;
193    uslist->us    = p;
194  }
195
196  uslist->us[uslist->num].offset = offset;
197  uslist->us[uslist->num].target = node;
198  uslist->num++;
199  return 0;
200}
201#endif /* USE_SUBEXP_CALL */
202
203
204static int
205add_opcode(regex_t* reg, int opcode)
206{
207  BBUF_ADD1(reg, ((unsigned char)opcode));
208  return 0;
209}
210
211#ifdef USE_COMBINATION_EXPLOSION_CHECK
212static int
213add_state_check_num(regex_t* reg, int num)
214{
215  StateCheckNumType n = (StateCheckNumType )num;
216
217  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
218  return 0;
219}
220#endif
221
222static int
223add_rel_addr(regex_t* reg, int addr)
224{
225  RelAddrType ra = (RelAddrType )addr;
226
227  BBUF_ADD(reg, &ra, SIZE_RELADDR);
228  return 0;
229}
230
231static int
232add_abs_addr(regex_t* reg, int addr)
233{
234  AbsAddrType ra = (AbsAddrType )addr;
235
236  BBUF_ADD(reg, &ra, SIZE_ABSADDR);
237  return 0;
238}
239
240static int
241add_length(regex_t* reg, int len)
242{
243  LengthType l = (LengthType )len;
244
245  BBUF_ADD(reg, &l, SIZE_LENGTH);
246  return 0;
247}
248
249static int
250add_mem_num(regex_t* reg, int num)
251{
252  MemNumType n = (MemNumType )num;
253
254  BBUF_ADD(reg, &n, SIZE_MEMNUM);
255  return 0;
256}
257
258static int
259add_pointer(regex_t* reg, void* addr)
260{
261  PointerType ptr = (PointerType )addr;
262
263  BBUF_ADD(reg, &ptr, SIZE_POINTER);
264  return 0;
265}
266
267static int
268add_option(regex_t* reg, OnigOptionType option)
269{
270  BBUF_ADD(reg, &option, SIZE_OPTION);
271  return 0;
272}
273
274static int
275add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
276{
277  int r;
278
279  r = add_opcode(reg, opcode);
280  if (r) return r;
281  r = add_rel_addr(reg, addr);
282  return r;
283}
284
285static int
286add_bytes(regex_t* reg, UChar* bytes, int len)
287{
288  BBUF_ADD(reg, bytes, len);
289  return 0;
290}
291
292static int
293add_bitset(regex_t* reg, BitSetRef bs)
294{
295  BBUF_ADD(reg, bs, SIZE_BITSET);
296  return 0;
297}
298
299static int
300add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
301{
302  int r;
303
304  r = add_opcode(reg, opcode);
305  if (r) return r;
306  r = add_option(reg, option);
307  return r;
308}
309
310static int compile_length_tree(Node* node, regex_t* reg);
311static int compile_tree(Node* node, regex_t* reg);
312
313
314#define IS_NEED_STR_LEN_OP_EXACT(op) \
315   ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
316    (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
317
318static int
319select_str_opcode(int mb_len, int str_len, int ignore_case)
320{
321  int op;
322
323  if (ignore_case) {
324    switch (str_len) {
325    case 1:  op = OP_EXACT1_IC; break;
326    default: op = OP_EXACTN_IC; break;
327    }
328  }
329  else {
330    switch (mb_len) {
331    case 1:
332      switch (str_len) {
333      case 1:  op = OP_EXACT1; break;
334      case 2:  op = OP_EXACT2; break;
335      case 3:  op = OP_EXACT3; break;
336      case 4:  op = OP_EXACT4; break;
337      case 5:  op = OP_EXACT5; break;
338      default: op = OP_EXACTN; break;
339      }
340      break;
341
342    case 2:
343      switch (str_len) {
344      case 1:  op = OP_EXACTMB2N1; break;
345      case 2:  op = OP_EXACTMB2N2; break;
346      case 3:  op = OP_EXACTMB2N3; break;
347      default: op = OP_EXACTMB2N;  break;
348      }
349      break;
350
351    case 3:
352      op = OP_EXACTMB3N;
353      break;
354
355    default:
356      op = OP_EXACTMBN;
357      break;
358    }
359  }
360  return op;
361}
362
363static int
364compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
365{
366  int r;
367  int saved_num_null_check = reg->num_null_check;
368
369  if (empty_info != 0) {
370    r = add_opcode(reg, OP_NULL_CHECK_START);
371    if (r) return r;
372    r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
373    if (r) return r;
374    reg->num_null_check++;
375  }
376
377  r = compile_tree(node, reg);
378  if (r) return r;
379
380  if (empty_info != 0) {
381    if (empty_info == NQ_TARGET_IS_EMPTY)
382      r = add_opcode(reg, OP_NULL_CHECK_END);
383    else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
384      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
385    else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
386      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
387
388    if (r) return r;
389    r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
390  }
391  return r;
392}
393
394#ifdef USE_SUBEXP_CALL
395static int
396compile_call(CallNode* node, regex_t* reg)
397{
398  int r;
399
400  r = add_opcode(reg, OP_CALL);
401  if (r) return r;
402  r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
403                          node->target);
404  if (r) return r;
405  r = add_abs_addr(reg, 0 /*dummy addr.*/);
406  return r;
407}
408#endif
409
410static int
411compile_tree_n_times(Node* node, int n, regex_t* reg)
412{
413  int i, r;
414
415  for (i = 0; i < n; i++) {
416    r = compile_tree(node, reg);
417    if (r) return r;
418  }
419  return 0;
420}
421
422static int
423add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
424                          regex_t* reg ARG_UNUSED, int ignore_case)
425{
426  int len;
427  int op = select_str_opcode(mb_len, str_len, ignore_case);
428
429  len = SIZE_OPCODE;
430
431  if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
432  if (IS_NEED_STR_LEN_OP_EXACT(op))
433    len += SIZE_LENGTH;
434
435  len += mb_len * str_len;
436  return len;
437}
438
439static int
440add_compile_string(UChar* s, int mb_len, int str_len,
441                   regex_t* reg, int ignore_case)
442{
443  int op = select_str_opcode(mb_len, str_len, ignore_case);
444  add_opcode(reg, op);
445
446  if (op == OP_EXACTMBN)
447    add_length(reg, mb_len);
448
449  if (IS_NEED_STR_LEN_OP_EXACT(op)) {
450    if (op == OP_EXACTN_IC)
451      add_length(reg, mb_len * str_len);
452    else
453      add_length(reg, str_len);
454  }
455
456  add_bytes(reg, s, mb_len * str_len);
457  return 0;
458}
459
460
461static int
462compile_length_string_node(Node* node, regex_t* reg)
463{
464  int rlen, r, len, prev_len, slen, ambig;
465  OnigEncoding enc = reg->enc;
466  UChar *p, *prev;
467  StrNode* sn;
468
469  sn = NSTR(node);
470  if (sn->end <= sn->s)
471    return 0;
472
473  ambig = NSTRING_IS_AMBIG(node);
474
475  p = prev = sn->s;
476  prev_len = enclen(enc, p);
477  p += prev_len;
478  slen = 1;
479  rlen = 0;
480
481  for (; p < sn->end; ) {
482    len = enclen(enc, p);
483    if (len == prev_len) {
484      slen++;
485    }
486    else {
487      r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
488      rlen += r;
489      prev = p;
490      slen = 1;
491      prev_len = len;
492    }
493    p += len;
494  }
495  r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
496  rlen += r;
497  return rlen;
498}
499
500static int
501compile_length_string_raw_node(StrNode* sn, regex_t* reg)
502{
503  if (sn->end <= sn->s)
504    return 0;
505
506  return add_compile_string_length(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0);
507}
508
509static int
510compile_string_node(Node* node, regex_t* reg)
511{
512  int r, len, prev_len, slen, ambig;
513  OnigEncoding enc = reg->enc;
514  UChar *p, *prev, *end;
515  StrNode* sn;
516
517  sn = NSTR(node);
518  if (sn->end <= sn->s)
519    return 0;
520
521  end = sn->end;
522  ambig = NSTRING_IS_AMBIG(node);
523
524  p = prev = sn->s;
525  prev_len = enclen(enc, p);
526  p += prev_len;
527  slen = 1;
528
529  for (; p < end; ) {
530    len = enclen(enc, p);
531    if (len == prev_len) {
532      slen++;
533    }
534    else {
535      r = add_compile_string(prev, prev_len, slen, reg, ambig);
536      if (r) return r;
537
538      prev  = p;
539      slen  = 1;
540      prev_len = len;
541    }
542
543    p += len;
544  }
545  return add_compile_string(prev, prev_len, slen, reg, ambig);
546}
547
548static int
549compile_string_raw_node(StrNode* sn, regex_t* reg)
550{
551  if (sn->end <= sn->s)
552    return 0;
553
554  return add_compile_string(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0);
555}
556
557static int
558add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
559{
560#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
561  add_length(reg, mbuf->used);
562  return add_bytes(reg, mbuf->p, mbuf->used);
563#else
564  int r, pad_size;
565  UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
566
567  GET_ALIGNMENT_PAD_SIZE(p, pad_size);
568  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
569  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
570
571  r = add_bytes(reg, mbuf->p, mbuf->used);
572
573  /* padding for return value from compile_length_cclass_node() to be fix. */
574  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
575  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
576  return r;
577#endif
578}
579
580static int
581compile_length_cclass_node(CClassNode* cc, regex_t* reg)
582{
583  int len;
584
585  if (IS_NCCLASS_SHARE(cc)) {
586    len = SIZE_OPCODE + SIZE_POINTER;
587    return len;
588  }
589
590  if (IS_NULL(cc->mbuf)) {
591    len = SIZE_OPCODE + SIZE_BITSET;
592  }
593  else {
594    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
595      len = SIZE_OPCODE;
596    }
597    else {
598      len = SIZE_OPCODE + SIZE_BITSET;
599    }
600#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
601    len += SIZE_LENGTH + cc->mbuf->used;
602#else
603    len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
604#endif
605  }
606
607  return len;
608}
609
610static int
611compile_cclass_node(CClassNode* cc, regex_t* reg)
612{
613  int r;
614
615  if (IS_NCCLASS_SHARE(cc)) {
616    add_opcode(reg, OP_CCLASS_NODE);
617    r = add_pointer(reg, cc);
618    return r;
619  }
620
621  if (IS_NULL(cc->mbuf)) {
622    if (IS_NCCLASS_NOT(cc))
623      add_opcode(reg, OP_CCLASS_NOT);
624    else
625      add_opcode(reg, OP_CCLASS);
626
627    r = add_bitset(reg, cc->bs);
628  }
629  else {
630    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
631      if (IS_NCCLASS_NOT(cc))
632        add_opcode(reg, OP_CCLASS_MB_NOT);
633      else
634        add_opcode(reg, OP_CCLASS_MB);
635
636      r = add_multi_byte_cclass(cc->mbuf, reg);
637    }
638    else {
639      if (IS_NCCLASS_NOT(cc))
640        add_opcode(reg, OP_CCLASS_MIX_NOT);
641      else
642        add_opcode(reg, OP_CCLASS_MIX);
643
644      r = add_bitset(reg, cc->bs);
645      if (r) return r;
646      r = add_multi_byte_cclass(cc->mbuf, reg);
647    }
648  }
649
650  return r;
651}
652
653static int
654entry_repeat_range(regex_t* reg, int id, int lower, int upper)
655{
656#define REPEAT_RANGE_ALLOC  4
657
658  OnigRepeatRange* p;
659
660  if (reg->repeat_range_alloc == 0) {
661    p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
662    CHECK_NULL_RETURN_MEMERR(p);
663    reg->repeat_range = p;
664    reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
665  }
666  else if (reg->repeat_range_alloc <= id) {
667    int n;
668    n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
669    p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
670                                    sizeof(OnigRepeatRange) * n,
671                                    sizeof(OnigRepeatRange) * reg->repeat_range_alloc);
672    CHECK_NULL_RETURN_MEMERR(p);
673    reg->repeat_range = p;
674    reg->repeat_range_alloc = n;
675  }
676  else {
677    p = reg->repeat_range;
678  }
679
680  p[id].lower = lower;
681  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
682  return 0;
683}
684
685static int
686compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
687                          regex_t* reg)
688{
689  int r;
690  int num_repeat = reg->num_repeat;
691
692  r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
693  if (r) return r;
694  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
695  reg->num_repeat++;
696  if (r) return r;
697  r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
698  if (r) return r;
699
700  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
701  if (r) return r;
702
703  r = compile_tree_empty_check(qn->target, reg, empty_info);
704  if (r) return r;
705
706  if (
707#ifdef USE_SUBEXP_CALL
708      reg->num_call > 0 ||
709#endif
710      IS_QUANTIFIER_IN_REPEAT(qn)) {
711    r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
712  }
713  else {
714    r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
715  }
716  if (r) return r;
717  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
718  return r;
719}
720
721static int
722is_anychar_star_quantifier(QtfrNode* qn)
723{
724  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
725      NTYPE(qn->target) == NT_CANY)
726    return 1;
727  else
728    return 0;
729}
730
731#define QUANTIFIER_EXPAND_LIMIT_SIZE   50
732#define CKN_ON   (ckn > 0)
733
734#ifdef USE_COMBINATION_EXPLOSION_CHECK
735
736static int
737compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
738{
739  int len, mod_tlen, cklen;
740  int ckn;
741  int infinite = IS_REPEAT_INFINITE(qn->upper);
742  int empty_info = qn->target_empty_info;
743  int tlen = compile_length_tree(qn->target, reg);
744
745  if (tlen < 0) return tlen;
746
747  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
748
749  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
750
751  /* anychar repeat */
752  if (NTYPE(qn->target) == NT_CANY) {
753    if (qn->greedy && infinite) {
754      if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
755        return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
756      else
757        return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
758    }
759  }
760
761  if (empty_info != 0)
762    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
763  else
764    mod_tlen = tlen;
765
766  if (infinite && qn->lower <= 1) {
767    if (qn->greedy) {
768      if (qn->lower == 1)
769	len = SIZE_OP_JUMP;
770      else
771	len = 0;
772
773      len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
774    }
775    else {
776      if (qn->lower == 0)
777	len = SIZE_OP_JUMP;
778      else
779	len = 0;
780
781      len += mod_tlen + SIZE_OP_PUSH + cklen;
782    }
783  }
784  else if (qn->upper == 0) {
785    if (qn->is_refered != 0) /* /(?<n>..){0}/ */
786      len = SIZE_OP_JUMP + tlen;
787    else
788      len = 0;
789  }
790  else if (qn->upper == 1 && qn->greedy) {
791    if (qn->lower == 0) {
792      if (CKN_ON) {
793	len = SIZE_OP_STATE_CHECK_PUSH + tlen;
794      }
795      else {
796	len = SIZE_OP_PUSH + tlen;
797      }
798    }
799    else {
800      len = tlen;
801    }
802  }
803  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
804    len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
805  }
806  else {
807    len = SIZE_OP_REPEAT_INC
808        + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
809    if (CKN_ON)
810      len += SIZE_OP_STATE_CHECK;
811  }
812
813  return len;
814}
815
816static int
817compile_quantifier_node(QtfrNode* qn, regex_t* reg)
818{
819  int r, mod_tlen;
820  int ckn;
821  int infinite = IS_REPEAT_INFINITE(qn->upper);
822  int empty_info = qn->target_empty_info;
823  int tlen = compile_length_tree(qn->target, reg);
824
825  if (tlen < 0) return tlen;
826
827  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
828
829  if (is_anychar_star_quantifier(qn)) {
830    r = compile_tree_n_times(qn->target, qn->lower, reg);
831    if (r) return r;
832    if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
833      if (IS_MULTILINE(reg->options))
834	r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
835      else
836	r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
837      if (r) return r;
838      if (CKN_ON) {
839	r = add_state_check_num(reg, ckn);
840	if (r) return r;
841      }
842
843      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
844    }
845    else {
846      if (IS_MULTILINE(reg->options)) {
847	r = add_opcode(reg, (CKN_ON ?
848			       OP_STATE_CHECK_ANYCHAR_ML_STAR
849			     : OP_ANYCHAR_ML_STAR));
850      }
851      else {
852	r = add_opcode(reg, (CKN_ON ?
853			       OP_STATE_CHECK_ANYCHAR_STAR
854			     : OP_ANYCHAR_STAR));
855      }
856      if (r) return r;
857      if (CKN_ON)
858	r = add_state_check_num(reg, ckn);
859
860      return r;
861    }
862  }
863
864  if (empty_info != 0)
865    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
866  else
867    mod_tlen = tlen;
868
869  if (infinite && qn->lower <= 1) {
870    if (qn->greedy) {
871      if (qn->lower == 1) {
872	r = add_opcode_rel_addr(reg, OP_JUMP,
873			(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
874	if (r) return r;
875      }
876
877      if (CKN_ON) {
878	r = add_opcode(reg, OP_STATE_CHECK_PUSH);
879	if (r) return r;
880	r = add_state_check_num(reg, ckn);
881	if (r) return r;
882	r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
883      }
884      else {
885	r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
886      }
887      if (r) return r;
888      r = compile_tree_empty_check(qn->target, reg, empty_info);
889      if (r) return r;
890      r = add_opcode_rel_addr(reg, OP_JUMP,
891	      -(mod_tlen + (int )SIZE_OP_JUMP
892		+ (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
893    }
894    else {
895      if (qn->lower == 0) {
896	r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
897	if (r) return r;
898      }
899      r = compile_tree_empty_check(qn->target, reg, empty_info);
900      if (r) return r;
901      if (CKN_ON) {
902	r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
903	if (r) return r;
904	r = add_state_check_num(reg, ckn);
905	if (r) return r;
906	r = add_rel_addr(reg,
907		 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
908      }
909      else
910	r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
911    }
912  }
913  else if (qn->upper == 0) {
914    if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
915      r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
916      if (r) return r;
917      r = compile_tree(qn->target, reg);
918    }
919    else
920      r = 0;
921  }
922  else if (qn->upper == 1 && qn->greedy) {
923    if (qn->lower == 0) {
924      if (CKN_ON) {
925	r = add_opcode(reg, OP_STATE_CHECK_PUSH);
926	if (r) return r;
927	r = add_state_check_num(reg, ckn);
928	if (r) return r;
929	r = add_rel_addr(reg, tlen);
930      }
931      else {
932	r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
933      }
934      if (r) return r;
935    }
936
937    r = compile_tree(qn->target, reg);
938  }
939  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
940    if (CKN_ON) {
941      r = add_opcode(reg, OP_STATE_CHECK_PUSH);
942      if (r) return r;
943      r = add_state_check_num(reg, ckn);
944      if (r) return r;
945      r = add_rel_addr(reg, SIZE_OP_JUMP);
946    }
947    else {
948      r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
949    }
950
951    if (r) return r;
952    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
953    if (r) return r;
954    r = compile_tree(qn->target, reg);
955  }
956  else {
957    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
958    if (CKN_ON) {
959      if (r) return r;
960      r = add_opcode(reg, OP_STATE_CHECK);
961      if (r) return r;
962      r = add_state_check_num(reg, ckn);
963    }
964  }
965  return r;
966}
967
968#else /* USE_COMBINATION_EXPLOSION_CHECK */
969
970static int
971compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
972{
973  int len, mod_tlen;
974  int infinite = IS_REPEAT_INFINITE(qn->upper);
975  int empty_info = qn->target_empty_info;
976  int tlen = compile_length_tree(qn->target, reg);
977
978  if (tlen < 0) return tlen;
979
980  /* anychar repeat */
981  if (NTYPE(qn->target) == NT_CANY) {
982    if (qn->greedy && infinite) {
983      if (IS_NOT_NULL(qn->next_head_exact))
984        return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
985      else
986        return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
987    }
988  }
989
990  if (empty_info != 0)
991    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
992  else
993    mod_tlen = tlen;
994
995  if (infinite &&
996      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
997    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
998      len = SIZE_OP_JUMP;
999    }
1000    else {
1001      len = tlen * qn->lower;
1002    }
1003
1004    if (qn->greedy) {
1005      if (IS_NOT_NULL(qn->head_exact))
1006	len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1007      else if (IS_NOT_NULL(qn->next_head_exact))
1008	len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1009      else
1010	len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1011    }
1012    else
1013      len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1014  }
1015  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1016    len = SIZE_OP_JUMP + tlen;
1017  }
1018  else if (!infinite && qn->greedy &&
1019           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1020                                      <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1021    len = tlen * qn->lower;
1022    len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1023  }
1024  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1025    len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1026  }
1027  else {
1028    len = SIZE_OP_REPEAT_INC
1029        + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1030  }
1031
1032  return len;
1033}
1034
1035static int
1036compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1037{
1038  int i, r, mod_tlen;
1039  int infinite = IS_REPEAT_INFINITE(qn->upper);
1040  int empty_info = qn->target_empty_info;
1041  int tlen = compile_length_tree(qn->target, reg);
1042
1043  if (tlen < 0) return tlen;
1044
1045  if (is_anychar_star_quantifier(qn)) {
1046    r = compile_tree_n_times(qn->target, qn->lower, reg);
1047    if (r) return r;
1048    if (IS_NOT_NULL(qn->next_head_exact)) {
1049      if (IS_MULTILINE(reg->options))
1050	r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1051      else
1052	r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1053      if (r) return r;
1054      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1055    }
1056    else {
1057      if (IS_MULTILINE(reg->options))
1058	return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1059      else
1060	return add_opcode(reg, OP_ANYCHAR_STAR);
1061    }
1062  }
1063
1064  if (empty_info != 0)
1065    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1066  else
1067    mod_tlen = tlen;
1068
1069  if (infinite &&
1070      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1071    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1072      if (qn->greedy) {
1073	if (IS_NOT_NULL(qn->head_exact))
1074	  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1075	else if (IS_NOT_NULL(qn->next_head_exact))
1076	  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1077	else
1078	  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1079      }
1080      else {
1081	r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1082      }
1083      if (r) return r;
1084    }
1085    else {
1086      r = compile_tree_n_times(qn->target, qn->lower, reg);
1087      if (r) return r;
1088    }
1089
1090    if (qn->greedy) {
1091      if (IS_NOT_NULL(qn->head_exact)) {
1092	r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1093			     mod_tlen + SIZE_OP_JUMP);
1094	if (r) return r;
1095	add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1096	r = compile_tree_empty_check(qn->target, reg, empty_info);
1097	if (r) return r;
1098	r = add_opcode_rel_addr(reg, OP_JUMP,
1099	-(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1100      }
1101      else if (IS_NOT_NULL(qn->next_head_exact)) {
1102	r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1103				mod_tlen + SIZE_OP_JUMP);
1104	if (r) return r;
1105	add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1106	r = compile_tree_empty_check(qn->target, reg, empty_info);
1107	if (r) return r;
1108	r = add_opcode_rel_addr(reg, OP_JUMP,
1109          -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1110      }
1111      else {
1112	r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1113	if (r) return r;
1114	r = compile_tree_empty_check(qn->target, reg, empty_info);
1115	if (r) return r;
1116	r = add_opcode_rel_addr(reg, OP_JUMP,
1117		     -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1118      }
1119    }
1120    else {
1121      r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1122      if (r) return r;
1123      r = compile_tree_empty_check(qn->target, reg, empty_info);
1124      if (r) return r;
1125      r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1126    }
1127  }
1128  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1129    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1130    if (r) return r;
1131    r = compile_tree(qn->target, reg);
1132  }
1133  else if (!infinite && qn->greedy &&
1134           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1135                                  <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1136    int n = qn->upper - qn->lower;
1137
1138    r = compile_tree_n_times(qn->target, qn->lower, reg);
1139    if (r) return r;
1140
1141    for (i = 0; i < n; i++) {
1142      r = add_opcode_rel_addr(reg, OP_PUSH,
1143			   (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1144      if (r) return r;
1145      r = compile_tree(qn->target, reg);
1146      if (r) return r;
1147    }
1148  }
1149  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1150    r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1151    if (r) return r;
1152    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1153    if (r) return r;
1154    r = compile_tree(qn->target, reg);
1155  }
1156  else {
1157    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1158  }
1159  return r;
1160}
1161#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1162
1163static int
1164compile_length_option_node(EncloseNode* node, regex_t* reg)
1165{
1166  int tlen;
1167  OnigOptionType prev = reg->options;
1168
1169  reg->options = node->option;
1170  tlen = compile_length_tree(node->target, reg);
1171  reg->options = prev;
1172
1173  if (tlen < 0) return tlen;
1174
1175  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1176    return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1177           + tlen + SIZE_OP_SET_OPTION;
1178  }
1179  else
1180    return tlen;
1181}
1182
1183static int
1184compile_option_node(EncloseNode* node, regex_t* reg)
1185{
1186  int r;
1187  OnigOptionType prev = reg->options;
1188
1189  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1190    r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1191    if (r) return r;
1192    r = add_opcode_option(reg, OP_SET_OPTION, prev);
1193    if (r) return r;
1194    r = add_opcode(reg, OP_FAIL);
1195    if (r) return r;
1196  }
1197
1198  reg->options = node->option;
1199  r = compile_tree(node->target, reg);
1200  reg->options = prev;
1201
1202  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1203    if (r) return r;
1204    r = add_opcode_option(reg, OP_SET_OPTION, prev);
1205  }
1206  return r;
1207}
1208
1209static int
1210compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1211{
1212  int len;
1213  int tlen;
1214  QtfrNode* qn;
1215
1216  qn = NULL;
1217
1218  if (node->type == ENCLOSE_OPTION)
1219    return compile_length_option_node(node, reg);
1220
1221  if (node->target) {
1222    tlen = compile_length_tree(node->target, reg);
1223    if (tlen < 0) return tlen;
1224  }
1225  else
1226    tlen = 0;
1227
1228  switch (node->type) {
1229  case ENCLOSE_MEMORY:
1230#ifdef USE_SUBEXP_CALL
1231    if (IS_ENCLOSE_CALLED(node)) {
1232      len = SIZE_OP_MEMORY_START_PUSH + tlen
1233	  + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1234      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1235	len += (IS_ENCLOSE_RECURSION(node)
1236		? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1237      else
1238	len += (IS_ENCLOSE_RECURSION(node)
1239		? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1240    }
1241    else
1242#endif
1243    {
1244      if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1245	len = SIZE_OP_MEMORY_START_PUSH;
1246      else
1247	len = SIZE_OP_MEMORY_START;
1248
1249      len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1250		     ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1251    }
1252    break;
1253
1254  case ENCLOSE_STOP_BACKTRACK:
1255    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1256      if (node->target == NULL) {
1257        CHECK_NULL_RETURN_MEMERR(node->target);
1258      }
1259      qn = NQTFR(node->target);
1260      tlen = compile_length_tree(qn->target, reg);
1261      if (tlen < 0) return tlen;
1262
1263      len = tlen * qn->lower
1264	  + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1265    }
1266    else {
1267      len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1268    }
1269    break;
1270
1271  default:
1272    return ONIGERR_TYPE_BUG;
1273    break;
1274  }
1275
1276  return len;
1277}
1278
1279static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1280
1281static int
1282compile_enclose_node(EncloseNode* node, regex_t* reg)
1283{
1284  int r, len;
1285
1286  if (node->type == ENCLOSE_OPTION)
1287    return compile_option_node(node, reg);
1288
1289  switch (node->type) {
1290  case ENCLOSE_MEMORY:
1291#ifdef USE_SUBEXP_CALL
1292    if (IS_ENCLOSE_CALLED(node)) {
1293      r = add_opcode(reg, OP_CALL);
1294      if (r) return r;
1295      node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1296      node->state |= NST_ADDR_FIXED;
1297      r = add_abs_addr(reg, (int )node->call_addr);
1298      if (r) return r;
1299      len = compile_length_tree(node->target, reg);
1300      len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1301      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1302	len += (IS_ENCLOSE_RECURSION(node)
1303		? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1304      else
1305	len += (IS_ENCLOSE_RECURSION(node)
1306		? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1307
1308      r = add_opcode_rel_addr(reg, OP_JUMP, len);
1309      if (r) return r;
1310    }
1311#endif
1312    if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1313      r = add_opcode(reg, OP_MEMORY_START_PUSH);
1314    else
1315      r = add_opcode(reg, OP_MEMORY_START);
1316    if (r) return r;
1317    r = add_mem_num(reg, node->regnum);
1318    if (r) return r;
1319    r = compile_tree(node->target, reg);
1320    if (r) return r;
1321#ifdef USE_SUBEXP_CALL
1322    if (IS_ENCLOSE_CALLED(node)) {
1323      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1324	r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1325			     ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1326      else
1327	r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1328			     ? OP_MEMORY_END_REC : OP_MEMORY_END));
1329
1330      if (r) return r;
1331      r = add_mem_num(reg, node->regnum);
1332      if (r) return r;
1333      r = add_opcode(reg, OP_RETURN);
1334    }
1335    else
1336#endif
1337    {
1338      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1339	r = add_opcode(reg, OP_MEMORY_END_PUSH);
1340      else
1341	r = add_opcode(reg, OP_MEMORY_END);
1342      if (r) return r;
1343      r = add_mem_num(reg, node->regnum);
1344    }
1345    break;
1346
1347  case ENCLOSE_STOP_BACKTRACK:
1348    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1349      QtfrNode* qn = NQTFR(node->target);
1350      r = compile_tree_n_times(qn->target, qn->lower, reg);
1351      if (r) return r;
1352
1353      len = compile_length_tree(qn->target, reg);
1354      if (len < 0) return len;
1355
1356      r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1357      if (r) return r;
1358      r = compile_tree(qn->target, reg);
1359      if (r) return r;
1360      r = add_opcode(reg, OP_POP);
1361      if (r) return r;
1362      r = add_opcode_rel_addr(reg, OP_JUMP,
1363	 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1364    }
1365    else {
1366      r = add_opcode(reg, OP_PUSH_STOP_BT);
1367      if (r) return r;
1368      r = compile_tree(node->target, reg);
1369      if (r) return r;
1370      r = add_opcode(reg, OP_POP_STOP_BT);
1371    }
1372    break;
1373
1374  default:
1375    return ONIGERR_TYPE_BUG;
1376    break;
1377  }
1378
1379  return r;
1380}
1381
1382static int
1383compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1384{
1385  int len;
1386  int tlen = 0;
1387
1388  if (node->target) {
1389    tlen = compile_length_tree(node->target, reg);
1390    if (tlen < 0) return tlen;
1391  }
1392
1393  switch (node->type) {
1394  case ANCHOR_PREC_READ:
1395    len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1396    break;
1397  case ANCHOR_PREC_READ_NOT:
1398    len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1399    break;
1400  case ANCHOR_LOOK_BEHIND:
1401    len = SIZE_OP_LOOK_BEHIND + tlen;
1402    break;
1403  case ANCHOR_LOOK_BEHIND_NOT:
1404    len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1405    break;
1406
1407  default:
1408    len = SIZE_OPCODE;
1409    break;
1410  }
1411
1412  return len;
1413}
1414
1415static int
1416compile_anchor_node(AnchorNode* node, regex_t* reg)
1417{
1418  int r, len;
1419
1420  switch (node->type) {
1421  case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
1422  case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
1423  case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
1424  case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
1425  case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
1426  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1427
1428  case ANCHOR_WORD_BOUND:     r = add_opcode(reg, OP_WORD_BOUND);     break;
1429  case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
1430#ifdef USE_WORD_BEGIN_END
1431  case ANCHOR_WORD_BEGIN:     r = add_opcode(reg, OP_WORD_BEGIN);     break;
1432  case ANCHOR_WORD_END:       r = add_opcode(reg, OP_WORD_END);       break;
1433#endif
1434
1435  case ANCHOR_PREC_READ:
1436    r = add_opcode(reg, OP_PUSH_POS);
1437    if (r) return r;
1438    r = compile_tree(node->target, reg);
1439    if (r) return r;
1440    r = add_opcode(reg, OP_POP_POS);
1441    break;
1442
1443  case ANCHOR_PREC_READ_NOT:
1444    len = compile_length_tree(node->target, reg);
1445    if (len < 0) return len;
1446    r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1447    if (r) return r;
1448    r = compile_tree(node->target, reg);
1449    if (r) return r;
1450    r = add_opcode(reg, OP_FAIL_POS);
1451    break;
1452
1453  case ANCHOR_LOOK_BEHIND:
1454    {
1455      int n;
1456      r = add_opcode(reg, OP_LOOK_BEHIND);
1457      if (r) return r;
1458      if (node->char_len < 0) {
1459	r = get_char_length_tree(node->target, reg, &n);
1460	if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1461      }
1462      else
1463	n = node->char_len;
1464      r = add_length(reg, n);
1465      if (r) return r;
1466      r = compile_tree(node->target, reg);
1467    }
1468    break;
1469
1470  case ANCHOR_LOOK_BEHIND_NOT:
1471    {
1472      int n;
1473      len = compile_length_tree(node->target, reg);
1474      r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1475			   len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1476      if (r) return r;
1477      if (node->char_len < 0) {
1478	r = get_char_length_tree(node->target, reg, &n);
1479	if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1480      }
1481      else
1482	n = node->char_len;
1483      r = add_length(reg, n);
1484      if (r) return r;
1485      r = compile_tree(node->target, reg);
1486      if (r) return r;
1487      r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1488    }
1489    break;
1490
1491  default:
1492    return ONIGERR_TYPE_BUG;
1493    break;
1494  }
1495
1496  return r;
1497}
1498
1499static int
1500compile_length_tree(Node* node, regex_t* reg)
1501{
1502  int len, type, r;
1503
1504  type = NTYPE(node);
1505  switch (type) {
1506  case NT_LIST:
1507    len = 0;
1508    do {
1509      r = compile_length_tree(NCAR(node), reg);
1510      if (r < 0) return r;
1511      len += r;
1512    } while (IS_NOT_NULL(node = NCDR(node)));
1513    r = len;
1514    break;
1515
1516  case NT_ALT:
1517    {
1518      int n;
1519
1520      n = r = 0;
1521      do {
1522	r += compile_length_tree(NCAR(node), reg);
1523	n++;
1524      } while (IS_NOT_NULL(node = NCDR(node)));
1525      r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1526    }
1527    break;
1528
1529  case NT_STR:
1530    if (NSTRING_IS_RAW(node))
1531      r = compile_length_string_raw_node(NSTR(node), reg);
1532    else
1533      r = compile_length_string_node(node, reg);
1534    break;
1535
1536  case NT_CCLASS:
1537    r = compile_length_cclass_node(NCCLASS(node), reg);
1538    break;
1539
1540  case NT_CTYPE:
1541  case NT_CANY:
1542    r = SIZE_OPCODE;
1543    break;
1544
1545  case NT_BREF:
1546    {
1547      BRefNode* br = NBREF(node);
1548
1549#ifdef USE_BACKREF_WITH_LEVEL
1550      if (IS_BACKREF_NEST_LEVEL(br)) {
1551        r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1552            SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1553      }
1554      else
1555#endif
1556      if (br->back_num == 1) {
1557	r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1558	     ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1559      }
1560      else {
1561	r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1562      }
1563    }
1564    break;
1565
1566#ifdef USE_SUBEXP_CALL
1567  case NT_CALL:
1568    r = SIZE_OP_CALL;
1569    break;
1570#endif
1571
1572  case NT_QTFR:
1573    r = compile_length_quantifier_node(NQTFR(node), reg);
1574    break;
1575
1576  case NT_ENCLOSE:
1577    r = compile_length_enclose_node(NENCLOSE(node), reg);
1578    break;
1579
1580  case NT_ANCHOR:
1581    r = compile_length_anchor_node(NANCHOR(node), reg);
1582    break;
1583
1584  default:
1585    return ONIGERR_TYPE_BUG;
1586    break;
1587  }
1588
1589  return r;
1590}
1591
1592static int
1593compile_tree(Node* node, regex_t* reg)
1594{
1595  int n, type, len, pos, r = 0;
1596
1597  type = NTYPE(node);
1598  switch (type) {
1599  case NT_LIST:
1600    do {
1601      r = compile_tree(NCAR(node), reg);
1602    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1603    break;
1604
1605  case NT_ALT:
1606    {
1607      Node* x = node;
1608      len = 0;
1609      do {
1610	len += compile_length_tree(NCAR(x), reg);
1611	if (NCDR(x) != NULL) {
1612	  len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1613	}
1614      } while (IS_NOT_NULL(x = NCDR(x)));
1615      pos = reg->used + len;  /* goal position */
1616
1617      do {
1618	len = compile_length_tree(NCAR(node), reg);
1619	if (IS_NOT_NULL(NCDR(node))) {
1620	  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1621	  if (r) break;
1622	}
1623	r = compile_tree(NCAR(node), reg);
1624	if (r) break;
1625	if (IS_NOT_NULL(NCDR(node))) {
1626	  len = pos - (reg->used + SIZE_OP_JUMP);
1627	  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1628	  if (r) break;
1629	}
1630      } while (IS_NOT_NULL(node = NCDR(node)));
1631    }
1632    break;
1633
1634  case NT_STR:
1635    if (NSTRING_IS_RAW(node))
1636      r = compile_string_raw_node(NSTR(node), reg);
1637    else
1638      r = compile_string_node(node, reg);
1639    break;
1640
1641  case NT_CCLASS:
1642    r = compile_cclass_node(NCCLASS(node), reg);
1643    break;
1644
1645  case NT_CTYPE:
1646    {
1647      int op;
1648
1649      switch (NCTYPE(node)->ctype) {
1650      case ONIGENC_CTYPE_WORD:
1651	if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD;
1652	else                         op = OP_WORD;
1653	break;
1654      default:
1655	return ONIGERR_TYPE_BUG;
1656	break;
1657      }
1658      r = add_opcode(reg, op);
1659    }
1660    break;
1661
1662  case NT_CANY:
1663    if (IS_MULTILINE(reg->options))
1664      r = add_opcode(reg, OP_ANYCHAR_ML);
1665    else
1666      r = add_opcode(reg, OP_ANYCHAR);
1667    break;
1668
1669  case NT_BREF:
1670    {
1671      BRefNode* br = NBREF(node);
1672
1673#ifdef USE_BACKREF_WITH_LEVEL
1674      if (IS_BACKREF_NEST_LEVEL(br)) {
1675	r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1676	if (r) return r;
1677	r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1678	if (r) return r;
1679	r = add_length(reg, br->nest_level);
1680	if (r) return r;
1681
1682	goto add_bacref_mems;
1683      }
1684      else
1685#endif
1686      if (br->back_num == 1) {
1687	n = br->back_static[0];
1688	if (IS_IGNORECASE(reg->options)) {
1689	  r = add_opcode(reg, OP_BACKREFN_IC);
1690	  if (r) return r;
1691	  r = add_mem_num(reg, n);
1692	}
1693	else {
1694	  switch (n) {
1695	  case 1:  r = add_opcode(reg, OP_BACKREF1); break;
1696	  case 2:  r = add_opcode(reg, OP_BACKREF2); break;
1697	  default:
1698	    r = add_opcode(reg, OP_BACKREFN);
1699	    if (r) return r;
1700	    r = add_mem_num(reg, n);
1701	    break;
1702	  }
1703	}
1704      }
1705      else {
1706	int i;
1707	int* p;
1708
1709        if (IS_IGNORECASE(reg->options)) {
1710          r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1711        }
1712        else {
1713          r = add_opcode(reg, OP_BACKREF_MULTI);
1714        }
1715	if (r) return r;
1716
1717#ifdef USE_BACKREF_WITH_LEVEL
1718      add_bacref_mems:
1719#endif
1720	r = add_length(reg, br->back_num);
1721	if (r) return r;
1722	p = BACKREFS_P(br);
1723	for (i = br->back_num - 1; i >= 0; i--) {
1724	  r = add_mem_num(reg, p[i]);
1725	  if (r) return r;
1726	}
1727      }
1728    }
1729    break;
1730
1731#ifdef USE_SUBEXP_CALL
1732  case NT_CALL:
1733    r = compile_call(NCALL(node), reg);
1734    break;
1735#endif
1736
1737  case NT_QTFR:
1738    r = compile_quantifier_node(NQTFR(node), reg);
1739    break;
1740
1741  case NT_ENCLOSE:
1742    r = compile_enclose_node(NENCLOSE(node), reg);
1743    break;
1744
1745  case NT_ANCHOR:
1746    r = compile_anchor_node(NANCHOR(node), reg);
1747    break;
1748
1749  default:
1750#ifdef ONIG_DEBUG
1751    fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1752#endif
1753    break;
1754  }
1755
1756  return r;
1757}
1758
1759#ifdef USE_NAMED_GROUP
1760
1761static int
1762noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1763{
1764  int r = 0;
1765  Node* node = *plink;
1766
1767  switch (NTYPE(node)) {
1768  case NT_LIST:
1769  case NT_ALT:
1770    do {
1771      r = noname_disable_map(&(NCAR(node)), map, counter);
1772    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1773    break;
1774
1775  case NT_QTFR:
1776    {
1777      Node** ptarget = &(NQTFR(node)->target);
1778      Node*  old = *ptarget;
1779      r = noname_disable_map(ptarget, map, counter);
1780      if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1781	onig_reduce_nested_quantifier(node, *ptarget);
1782      }
1783    }
1784    break;
1785
1786  case NT_ENCLOSE:
1787    {
1788      EncloseNode* en = NENCLOSE(node);
1789      if (en->type == ENCLOSE_MEMORY) {
1790	if (IS_ENCLOSE_NAMED_GROUP(en)) {
1791	  (*counter)++;
1792	  map[en->regnum].new_val = *counter;
1793	  en->regnum = *counter;
1794	  r = noname_disable_map(&(en->target), map, counter);
1795	}
1796	else {
1797	  *plink = en->target;
1798	  en->target = NULL_NODE;
1799	  onig_node_free(node);
1800	  r = noname_disable_map(plink, map, counter);
1801	}
1802      }
1803      else
1804	r = noname_disable_map(&(en->target), map, counter);
1805    }
1806    break;
1807
1808  default:
1809    break;
1810  }
1811
1812  return r;
1813}
1814
1815static int
1816renumber_node_backref(Node* node, GroupNumRemap* map)
1817{
1818  int i, pos, n, old_num;
1819  int *backs;
1820  BRefNode* bn = NBREF(node);
1821
1822  if (! IS_BACKREF_NAME_REF(bn))
1823    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1824
1825  old_num = bn->back_num;
1826  if (IS_NULL(bn->back_dynamic))
1827    backs = bn->back_static;
1828  else
1829    backs = bn->back_dynamic;
1830
1831  for (i = 0, pos = 0; i < old_num; i++) {
1832    n = map[backs[i]].new_val;
1833    if (n > 0) {
1834      backs[pos] = n;
1835      pos++;
1836    }
1837  }
1838
1839  bn->back_num = pos;
1840  return 0;
1841}
1842
1843static int
1844renumber_by_map(Node* node, GroupNumRemap* map)
1845{
1846  int r = 0;
1847
1848  switch (NTYPE(node)) {
1849  case NT_LIST:
1850  case NT_ALT:
1851    do {
1852      r = renumber_by_map(NCAR(node), map);
1853    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1854    break;
1855  case NT_QTFR:
1856    r = renumber_by_map(NQTFR(node)->target, map);
1857    break;
1858  case NT_ENCLOSE:
1859    r = renumber_by_map(NENCLOSE(node)->target, map);
1860    break;
1861
1862  case NT_BREF:
1863    r = renumber_node_backref(node, map);
1864    break;
1865
1866  default:
1867    break;
1868  }
1869
1870  return r;
1871}
1872
1873static int
1874numbered_ref_check(Node* node)
1875{
1876  int r = 0;
1877
1878  switch (NTYPE(node)) {
1879  case NT_LIST:
1880  case NT_ALT:
1881    do {
1882      r = numbered_ref_check(NCAR(node));
1883    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1884    break;
1885  case NT_QTFR:
1886    r = numbered_ref_check(NQTFR(node)->target);
1887    break;
1888  case NT_ENCLOSE:
1889    r = numbered_ref_check(NENCLOSE(node)->target);
1890    break;
1891
1892  case NT_BREF:
1893    if (! IS_BACKREF_NAME_REF(NBREF(node)))
1894      return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1895    break;
1896
1897  default:
1898    break;
1899  }
1900
1901  return r;
1902}
1903
1904static int
1905disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
1906{
1907  int r, i, pos, counter;
1908  int Result;
1909  BitStatusType loc;
1910  GroupNumRemap* map;
1911
1912  map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));
1913  CHECK_NULL_RETURN_MEMERR(map);
1914  for (i = 1; i <= env->num_mem; i++) {
1915    map[i].new_val = 0;
1916  }
1917  counter = 0;
1918  r = noname_disable_map(root, map, &counter);
1919  if (r != 0) return r;
1920
1921  r = renumber_by_map(*root, map);
1922  if (r != 0) return r;
1923
1924  for (i = 1, pos = 1; i <= env->num_mem; i++) {
1925    if (map[i].new_val > 0) {
1926      SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
1927      pos++;
1928    }
1929  }
1930
1931  loc = env->capture_history;
1932  BIT_STATUS_CLEAR(env->capture_history);
1933  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
1934    if (BIT_STATUS_AT(loc, i)) {
1935      BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
1936    }
1937  }
1938
1939  env->num_mem = env->num_named;
1940  reg->num_mem = env->num_named;
1941
1942  Result = onig_renumber_name_table(reg, map);
1943  xfree(map);
1944  return Result;
1945}
1946#endif /* USE_NAMED_GROUP */
1947
1948#ifdef USE_SUBEXP_CALL
1949static int
1950unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
1951{
1952  int i, offset;
1953  EncloseNode* en;
1954  AbsAddrType addr;
1955
1956  for (i = 0; i < uslist->num; i++) {
1957    en = NENCLOSE(uslist->us[i].target);
1958    if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
1959    addr = en->call_addr;
1960    offset = uslist->us[i].offset;
1961
1962    BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
1963  }
1964  return 0;
1965}
1966#endif
1967
1968#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1969static int
1970quantifiers_memory_node_info(Node* node)
1971{
1972  int r = 0;
1973
1974  switch (NTYPE(node)) {
1975  case NT_LIST:
1976  case NT_ALT:
1977    {
1978      int v;
1979      do {
1980	v = quantifiers_memory_node_info(NCAR(node));
1981	if (v > r) r = v;
1982      } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
1983    }
1984    break;
1985
1986#ifdef USE_SUBEXP_CALL
1987  case NT_CALL:
1988    if (IS_CALL_RECURSION(NCALL(node))) {
1989      return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
1990    }
1991    else
1992      r = quantifiers_memory_node_info(NCALL(node)->target);
1993    break;
1994#endif
1995
1996  case NT_QTFR:
1997    {
1998      QtfrNode* qn = NQTFR(node);
1999      if (qn->upper != 0) {
2000	r = quantifiers_memory_node_info(qn->target);
2001      }
2002    }
2003    break;
2004
2005  case NT_ENCLOSE:
2006    {
2007      EncloseNode* en = NENCLOSE(node);
2008      switch (en->type) {
2009      case ENCLOSE_MEMORY:
2010	return NQ_TARGET_IS_EMPTY_MEM;
2011	break;
2012
2013      case ENCLOSE_OPTION:
2014      case ENCLOSE_STOP_BACKTRACK:
2015	r = quantifiers_memory_node_info(en->target);
2016	break;
2017      default:
2018	break;
2019      }
2020    }
2021    break;
2022
2023  case NT_BREF:
2024  case NT_STR:
2025  case NT_CTYPE:
2026  case NT_CCLASS:
2027  case NT_CANY:
2028  case NT_ANCHOR:
2029  default:
2030    break;
2031  }
2032
2033  return r;
2034}
2035#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2036
2037static int
2038get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2039{
2040  OnigDistance tmin;
2041  int r = 0;
2042
2043  *min = 0;
2044  switch (NTYPE(node)) {
2045  case NT_BREF:
2046    {
2047      int i;
2048      int* backs;
2049      Node** nodes = SCANENV_MEM_NODES(env);
2050      BRefNode* br = NBREF(node);
2051      if (br->state & NST_RECURSION) break;
2052
2053      backs = BACKREFS_P(br);
2054      if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2055      r = get_min_match_length(nodes[backs[0]], min, env);
2056      if (r != 0) break;
2057      for (i = 1; i < br->back_num; i++) {
2058	if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2059	r = get_min_match_length(nodes[backs[i]], &tmin, env);
2060	if (r != 0) break;
2061	if (*min > tmin) *min = tmin;
2062      }
2063    }
2064    break;
2065
2066#ifdef USE_SUBEXP_CALL
2067  case NT_CALL:
2068    if (IS_CALL_RECURSION(NCALL(node))) {
2069      EncloseNode* en = NENCLOSE(NCALL(node)->target);
2070      if (IS_ENCLOSE_MIN_FIXED(en))
2071	*min = en->min_len;
2072    }
2073    else
2074      r = get_min_match_length(NCALL(node)->target, min, env);
2075    break;
2076#endif
2077
2078  case NT_LIST:
2079    do {
2080      r = get_min_match_length(NCAR(node), &tmin, env);
2081      if (r == 0) *min += tmin;
2082    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2083    break;
2084
2085  case NT_ALT:
2086    {
2087      Node *x, *y;
2088      y = node;
2089      do {
2090	x = NCAR(y);
2091	r = get_min_match_length(x, &tmin, env);
2092	if (r != 0) break;
2093	if (y == node) *min = tmin;
2094	else if (*min > tmin) *min = tmin;
2095      } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2096    }
2097    break;
2098
2099  case NT_STR:
2100    {
2101      StrNode* sn = NSTR(node);
2102      *min = (OnigDistance)(sn->end - sn->s);
2103    }
2104    break;
2105
2106  case NT_CTYPE:
2107    *min = 1;
2108    break;
2109
2110  case NT_CCLASS:
2111  case NT_CANY:
2112    *min = 1;
2113    break;
2114
2115  case NT_QTFR:
2116    {
2117      QtfrNode* qn = NQTFR(node);
2118
2119      if (qn->lower > 0) {
2120	r = get_min_match_length(qn->target, min, env);
2121	if (r == 0)
2122	  *min = distance_multiply(*min, qn->lower);
2123      }
2124    }
2125    break;
2126
2127  case NT_ENCLOSE:
2128    {
2129      EncloseNode* en = NENCLOSE(node);
2130      switch (en->type) {
2131      case ENCLOSE_MEMORY:
2132#ifdef USE_SUBEXP_CALL
2133	if (IS_ENCLOSE_MIN_FIXED(en))
2134	  *min = en->min_len;
2135	else {
2136	  r = get_min_match_length(en->target, min, env);
2137	  if (r == 0) {
2138	    en->min_len = *min;
2139	    SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2140	  }
2141	}
2142	break;
2143#endif
2144      case ENCLOSE_OPTION:
2145      case ENCLOSE_STOP_BACKTRACK:
2146	r = get_min_match_length(en->target, min, env);
2147	break;
2148      }
2149    }
2150    break;
2151
2152  case NT_ANCHOR:
2153  default:
2154    break;
2155  }
2156
2157  return r;
2158}
2159
2160static int
2161get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2162{
2163  OnigDistance tmax;
2164  int r = 0;
2165
2166  *max = 0;
2167  switch (NTYPE(node)) {
2168  case NT_LIST:
2169    do {
2170      r = get_max_match_length(NCAR(node), &tmax, env);
2171      if (r == 0)
2172	*max = distance_add(*max, tmax);
2173    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2174    break;
2175
2176  case NT_ALT:
2177    do {
2178      r = get_max_match_length(NCAR(node), &tmax, env);
2179      if (r == 0 && *max < tmax) *max = tmax;
2180    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2181    break;
2182
2183  case NT_STR:
2184    {
2185      StrNode* sn = NSTR(node);
2186      *max = (OnigDistance)(sn->end - sn->s);
2187    }
2188    break;
2189
2190  case NT_CTYPE:
2191    *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2192    break;
2193
2194  case NT_CCLASS:
2195  case NT_CANY:
2196    *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2197    break;
2198
2199  case NT_BREF:
2200    {
2201      int i;
2202      int* backs;
2203      Node** nodes = SCANENV_MEM_NODES(env);
2204      BRefNode* br = NBREF(node);
2205      if (br->state & NST_RECURSION) {
2206	*max = ONIG_INFINITE_DISTANCE;
2207	break;
2208      }
2209      backs = BACKREFS_P(br);
2210      for (i = 0; i < br->back_num; i++) {
2211	if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2212	r = get_max_match_length(nodes[backs[i]], &tmax, env);
2213	if (r != 0) break;
2214	if (*max < tmax) *max = tmax;
2215      }
2216    }
2217    break;
2218
2219#ifdef USE_SUBEXP_CALL
2220  case NT_CALL:
2221    if (! IS_CALL_RECURSION(NCALL(node)))
2222      r = get_max_match_length(NCALL(node)->target, max, env);
2223    else
2224      *max = ONIG_INFINITE_DISTANCE;
2225    break;
2226#endif
2227
2228  case NT_QTFR:
2229    {
2230      QtfrNode* qn = NQTFR(node);
2231
2232      if (qn->upper != 0) {
2233	r = get_max_match_length(qn->target, max, env);
2234	if (r == 0 && *max != 0) {
2235	  if (! IS_REPEAT_INFINITE(qn->upper))
2236	    *max = distance_multiply(*max, qn->upper);
2237	  else
2238	    *max = ONIG_INFINITE_DISTANCE;
2239	}
2240      }
2241    }
2242    break;
2243
2244  case NT_ENCLOSE:
2245    {
2246      EncloseNode* en = NENCLOSE(node);
2247      switch (en->type) {
2248      case ENCLOSE_MEMORY:
2249#ifdef USE_SUBEXP_CALL
2250	if (IS_ENCLOSE_MAX_FIXED(en))
2251	  *max = en->max_len;
2252	else {
2253	  r = get_max_match_length(en->target, max, env);
2254	  if (r == 0) {
2255	    en->max_len = *max;
2256	    SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2257	  }
2258	}
2259	break;
2260#endif
2261      case ENCLOSE_OPTION:
2262      case ENCLOSE_STOP_BACKTRACK:
2263	r = get_max_match_length(en->target, max, env);
2264	break;
2265      }
2266    }
2267    break;
2268
2269  case NT_ANCHOR:
2270  default:
2271    break;
2272  }
2273
2274  return r;
2275}
2276
2277#define GET_CHAR_LEN_VARLEN           -1
2278#define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
2279
2280/* fixed size pattern node only */
2281static int
2282get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2283{
2284  int tlen;
2285  int r = 0;
2286
2287  level++;
2288  *len = 0;
2289  switch (NTYPE(node)) {
2290  case NT_LIST:
2291    do {
2292      r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2293      if (r == 0)
2294	*len = distance_add(*len, tlen);
2295    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2296    break;
2297
2298  case NT_ALT:
2299    {
2300      int tlen2;
2301      int varlen = 0;
2302
2303      r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2304      while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2305	r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2306	if (r == 0) {
2307	  if (tlen != tlen2)
2308	    varlen = 1;
2309	}
2310      }
2311      if (r == 0) {
2312	if (varlen != 0) {
2313	  if (level == 1)
2314	    r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2315	  else
2316	    r = GET_CHAR_LEN_VARLEN;
2317	}
2318	else
2319	  *len = tlen;
2320      }
2321    }
2322    break;
2323
2324  case NT_STR:
2325    {
2326      StrNode* sn = NSTR(node);
2327      UChar *s = sn->s;
2328      while (s < sn->end) {
2329	s += enclen(reg->enc, s);
2330	(*len)++;
2331      }
2332    }
2333    break;
2334
2335  case NT_QTFR:
2336    {
2337      QtfrNode* qn = NQTFR(node);
2338      if (qn->lower == qn->upper) {
2339	r = get_char_length_tree1(qn->target, reg, &tlen, level);
2340	if (r == 0)
2341	  *len = distance_multiply(tlen, qn->lower);
2342      }
2343      else
2344	r = GET_CHAR_LEN_VARLEN;
2345    }
2346    break;
2347
2348#ifdef USE_SUBEXP_CALL
2349  case NT_CALL:
2350    if (! IS_CALL_RECURSION(NCALL(node)))
2351      r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2352    else
2353      r = GET_CHAR_LEN_VARLEN;
2354    break;
2355#endif
2356
2357  case NT_CTYPE:
2358    *len = 1;
2359    break;
2360
2361  case NT_CCLASS:
2362  case NT_CANY:
2363    *len = 1;
2364    break;
2365
2366  case NT_ENCLOSE:
2367    {
2368      EncloseNode* en = NENCLOSE(node);
2369      switch (en->type) {
2370      case ENCLOSE_MEMORY:
2371#ifdef USE_SUBEXP_CALL
2372	if (IS_ENCLOSE_CLEN_FIXED(en))
2373	  *len = en->char_len;
2374	else {
2375	  r = get_char_length_tree1(en->target, reg, len, level);
2376	  if (r == 0) {
2377	    en->char_len = *len;
2378	    SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2379	  }
2380	}
2381	break;
2382#endif
2383      case ENCLOSE_OPTION:
2384      case ENCLOSE_STOP_BACKTRACK:
2385	r = get_char_length_tree1(en->target, reg, len, level);
2386	break;
2387      default:
2388	break;
2389      }
2390    }
2391    break;
2392
2393  case NT_ANCHOR:
2394    break;
2395
2396  default:
2397    r = GET_CHAR_LEN_VARLEN;
2398    break;
2399  }
2400
2401  return r;
2402}
2403
2404static int
2405get_char_length_tree(Node* node, regex_t* reg, int* len)
2406{
2407  return get_char_length_tree1(node, reg, len, 0);
2408}
2409
2410/* x is not included y ==>  1 : 0 */
2411static int
2412is_not_included(Node* x, Node* y, regex_t* reg)
2413{
2414  int i, len;
2415  OnigCodePoint code;
2416  UChar *p;
2417  int ytype;
2418
2419 retry:
2420  ytype = NTYPE(y);
2421  switch (NTYPE(x)) {
2422  case NT_CTYPE:
2423    {
2424      switch (ytype) {
2425      case NT_CTYPE:
2426	if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2427	    NCTYPE(y)->not   != NCTYPE(x)->not)
2428	  return 1;
2429	else
2430	  return 0;
2431	break;
2432
2433      case NT_CCLASS:
2434      swap:
2435	{
2436	  Node* tmp;
2437	  tmp = x; x = y; y = tmp;
2438	  goto retry;
2439	}
2440	break;
2441
2442      case NT_STR:
2443	goto swap;
2444	break;
2445
2446      default:
2447	break;
2448      }
2449    }
2450    break;
2451
2452  case NT_CCLASS:
2453    {
2454      CClassNode* xc = NCCLASS(x);
2455      switch (ytype) {
2456      case NT_CTYPE:
2457	switch (NCTYPE(y)->ctype) {
2458	case ONIGENC_CTYPE_WORD:
2459	  if (NCTYPE(y)->not == 0) {
2460	    if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2461	      for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2462		if (BITSET_AT(xc->bs, i)) {
2463		  if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2464		}
2465	      }
2466	      return 1;
2467	    }
2468	    return 0;
2469	  }
2470	  else {
2471	    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2472	      if (! IS_CODE_SB_WORD(reg->enc, i)) {
2473		if (!IS_NCCLASS_NOT(xc)) {
2474		  if (BITSET_AT(xc->bs, i))
2475		    return 0;
2476		}
2477		else {
2478		  if (! BITSET_AT(xc->bs, i))
2479		    return 0;
2480		}
2481	      }
2482	    }
2483	    return 1;
2484	  }
2485	  break;
2486
2487	default:
2488	  break;
2489	}
2490	break;
2491
2492      case NT_CCLASS:
2493	{
2494	  int v;
2495	  CClassNode* yc = NCCLASS(y);
2496
2497	  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2498	    v = BITSET_AT(xc->bs, i);
2499	    if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2500                (v == 0 && IS_NCCLASS_NOT(xc))) {
2501	      v = BITSET_AT(yc->bs, i);
2502	      if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2503                  (v == 0 && IS_NCCLASS_NOT(yc)))
2504		return 0;
2505	    }
2506	  }
2507	  if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2508	      (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2509	    return 1;
2510	  return 0;
2511	}
2512	break;
2513
2514      case NT_STR:
2515	goto swap;
2516	break;
2517
2518      default:
2519	break;
2520      }
2521    }
2522    break;
2523
2524  case NT_STR:
2525    {
2526      StrNode* xs = NSTR(x);
2527      if (NSTRING_LEN(x) == 0)
2528	break;
2529
2530      //c = *(xs->s);
2531      switch (ytype) {
2532      case NT_CTYPE:
2533        switch (NCTYPE(y)->ctype) {
2534        case ONIGENC_CTYPE_WORD:
2535          if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2536            return NCTYPE(y)->not;
2537          else
2538            return !(NCTYPE(y)->not);
2539          break;
2540        default:
2541          break;
2542        }
2543        break;
2544
2545      case NT_CCLASS:
2546        {
2547          CClassNode* cc = NCCLASS(y);
2548
2549          code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2550                                     xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2551          return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2552        }
2553        break;
2554
2555      case NT_STR:
2556        {
2557          UChar *q;
2558          StrNode* ys = NSTR(y);
2559          len = NSTRING_LEN(x);
2560          if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2561          if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2562            /* tiny version */
2563            return 0;
2564          }
2565          else {
2566            for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
2567              if (*p != *q) return 1;
2568            }
2569          }
2570        }
2571        break;
2572
2573      default:
2574        break;
2575      }
2576    }
2577    break;
2578
2579  default:
2580    break;
2581  }
2582
2583  return 0;
2584}
2585
2586static Node*
2587get_head_value_node(Node* node, int exact, regex_t* reg)
2588{
2589  Node* n = NULL_NODE;
2590
2591  switch (NTYPE(node)) {
2592  case NT_BREF:
2593  case NT_ALT:
2594  case NT_CANY:
2595#ifdef USE_SUBEXP_CALL
2596  case NT_CALL:
2597#endif
2598    break;
2599
2600  case NT_CTYPE:
2601  case NT_CCLASS:
2602    if (exact == 0) {
2603      n = node;
2604    }
2605    break;
2606
2607  case NT_LIST:
2608    n = get_head_value_node(NCAR(node), exact, reg);
2609    break;
2610
2611  case NT_STR:
2612    {
2613      StrNode* sn = NSTR(node);
2614
2615      if (sn->end <= sn->s)
2616	break;
2617
2618      if (exact != 0 &&
2619	  !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2620      }
2621      else {
2622	n = node;
2623      }
2624    }
2625    break;
2626
2627  case NT_QTFR:
2628    {
2629      QtfrNode* qn = NQTFR(node);
2630      if (qn->lower > 0) {
2631	if (IS_NOT_NULL(qn->head_exact))
2632	  n = qn->head_exact;
2633	else
2634	  n = get_head_value_node(qn->target, exact, reg);
2635      }
2636    }
2637    break;
2638
2639  case NT_ENCLOSE:
2640    {
2641      EncloseNode* en = NENCLOSE(node);
2642      switch (en->type) {
2643      case ENCLOSE_OPTION:
2644	{
2645	  OnigOptionType options = reg->options;
2646
2647	  reg->options = NENCLOSE(node)->option;
2648	  n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2649	  reg->options = options;
2650	}
2651	break;
2652
2653      case ENCLOSE_MEMORY:
2654      case ENCLOSE_STOP_BACKTRACK:
2655	n = get_head_value_node(en->target, exact, reg);
2656	break;
2657      }
2658    }
2659    break;
2660
2661  case NT_ANCHOR:
2662    if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2663      n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2664    break;
2665
2666  default:
2667    break;
2668  }
2669
2670  return n;
2671}
2672
2673static int
2674check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2675{
2676  int type, r = 0;
2677
2678  type = NTYPE(node);
2679  if ((NTYPE2BIT(type) & type_mask) == 0)
2680    return 1;
2681
2682  switch (type) {
2683  case NT_LIST:
2684  case NT_ALT:
2685    do {
2686      r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2687			  anchor_mask);
2688    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2689    break;
2690
2691  case NT_QTFR:
2692    r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2693			anchor_mask);
2694    break;
2695
2696  case NT_ENCLOSE:
2697    {
2698      EncloseNode* en = NENCLOSE(node);
2699      if ((en->type & enclose_mask) == 0)
2700	return 1;
2701
2702      r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2703    }
2704    break;
2705
2706  case NT_ANCHOR:
2707    type = NANCHOR(node)->type;
2708    if ((type & anchor_mask) == 0)
2709      return 1;
2710
2711    if (NANCHOR(node)->target)
2712      r = check_type_tree(NANCHOR(node)->target,
2713			  type_mask, enclose_mask, anchor_mask);
2714    break;
2715
2716  default:
2717    break;
2718  }
2719  return r;
2720}
2721
2722#ifdef USE_SUBEXP_CALL
2723
2724#define RECURSION_EXIST       1
2725#define RECURSION_INFINITE    2
2726
2727static int
2728subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2729{
2730  int type;
2731  int r = 0;
2732
2733  type = NTYPE(node);
2734  switch (type) {
2735  case NT_LIST:
2736    {
2737      Node *x;
2738      OnigDistance min;
2739      int ret;
2740
2741      x = node;
2742      do {
2743	ret = subexp_inf_recursive_check(NCAR(x), env, head);
2744	if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2745	r |= ret;
2746	if (head) {
2747	  ret = get_min_match_length(NCAR(x), &min, env);
2748	  if (ret != 0) return ret;
2749	  if (min != 0) head = 0;
2750	}
2751      } while (IS_NOT_NULL(x = NCDR(x)));
2752    }
2753    break;
2754
2755  case NT_ALT:
2756    {
2757      int ret;
2758      r = RECURSION_EXIST;
2759      do {
2760	ret = subexp_inf_recursive_check(NCAR(node), env, head);
2761	if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2762	r &= ret;
2763      } while (IS_NOT_NULL(node = NCDR(node)));
2764    }
2765    break;
2766
2767  case NT_QTFR:
2768    r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2769    if (r == RECURSION_EXIST) {
2770      if (NQTFR(node)->lower == 0) r = 0;
2771    }
2772    break;
2773
2774  case NT_ANCHOR:
2775    {
2776      AnchorNode* an = NANCHOR(node);
2777      switch (an->type) {
2778      case ANCHOR_PREC_READ:
2779      case ANCHOR_PREC_READ_NOT:
2780      case ANCHOR_LOOK_BEHIND:
2781      case ANCHOR_LOOK_BEHIND_NOT:
2782	r = subexp_inf_recursive_check(an->target, env, head);
2783	break;
2784      }
2785    }
2786    break;
2787
2788  case NT_CALL:
2789    r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2790    break;
2791
2792  case NT_ENCLOSE:
2793    if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2794      return 0;
2795    else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2796      return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2797    else {
2798      SET_ENCLOSE_STATUS(node, NST_MARK2);
2799      r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2800      CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2801    }
2802    break;
2803
2804  default:
2805    break;
2806  }
2807
2808  return r;
2809}
2810
2811static int
2812subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2813{
2814  int type;
2815  int r = 0;
2816
2817  type = NTYPE(node);
2818  switch (type) {
2819  case NT_LIST:
2820  case NT_ALT:
2821    do {
2822      r = subexp_inf_recursive_check_trav(NCAR(node), env);
2823    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2824    break;
2825
2826  case NT_QTFR:
2827    r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2828    break;
2829
2830  case NT_ANCHOR:
2831    {
2832      AnchorNode* an = NANCHOR(node);
2833      switch (an->type) {
2834      case ANCHOR_PREC_READ:
2835      case ANCHOR_PREC_READ_NOT:
2836      case ANCHOR_LOOK_BEHIND:
2837      case ANCHOR_LOOK_BEHIND_NOT:
2838	r = subexp_inf_recursive_check_trav(an->target, env);
2839	break;
2840      }
2841    }
2842    break;
2843
2844  case NT_ENCLOSE:
2845    {
2846      EncloseNode* en = NENCLOSE(node);
2847
2848      if (IS_ENCLOSE_RECURSION(en)) {
2849	SET_ENCLOSE_STATUS(node, NST_MARK1);
2850	r = subexp_inf_recursive_check(en->target, env, 1);
2851	if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
2852	CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2853      }
2854      r = subexp_inf_recursive_check_trav(en->target, env);
2855    }
2856
2857    break;
2858
2859  default:
2860    break;
2861  }
2862
2863  return r;
2864}
2865
2866static int
2867subexp_recursive_check(Node* node)
2868{
2869  int r = 0;
2870
2871  switch (NTYPE(node)) {
2872  case NT_LIST:
2873  case NT_ALT:
2874    do {
2875      r |= subexp_recursive_check(NCAR(node));
2876    } while (IS_NOT_NULL(node = NCDR(node)));
2877    break;
2878
2879  case NT_QTFR:
2880    r = subexp_recursive_check(NQTFR(node)->target);
2881    break;
2882
2883  case NT_ANCHOR:
2884    {
2885      AnchorNode* an = NANCHOR(node);
2886      switch (an->type) {
2887      case ANCHOR_PREC_READ:
2888      case ANCHOR_PREC_READ_NOT:
2889      case ANCHOR_LOOK_BEHIND:
2890      case ANCHOR_LOOK_BEHIND_NOT:
2891	r = subexp_recursive_check(an->target);
2892	break;
2893      }
2894    }
2895    break;
2896
2897  case NT_CALL:
2898    r = subexp_recursive_check(NCALL(node)->target);
2899    if (r != 0) SET_CALL_RECURSION(node);
2900    break;
2901
2902  case NT_ENCLOSE:
2903    if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2904      return 0;
2905    else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2906      return 1; /* recursion */
2907    else {
2908      SET_ENCLOSE_STATUS(node, NST_MARK2);
2909      r = subexp_recursive_check(NENCLOSE(node)->target);
2910      CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2911    }
2912    break;
2913
2914  default:
2915    break;
2916  }
2917
2918  return r;
2919}
2920
2921
2922static int
2923subexp_recursive_check_trav(Node* node, ScanEnv* env)
2924{
2925#define FOUND_CALLED_NODE    1
2926
2927  int type;
2928  int r = 0;
2929
2930  type = NTYPE(node);
2931  switch (type) {
2932  case NT_LIST:
2933  case NT_ALT:
2934    {
2935      int ret;
2936      do {
2937	ret = subexp_recursive_check_trav(NCAR(node), env);
2938	if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
2939	else if (ret < 0) return ret;
2940      } while (IS_NOT_NULL(node = NCDR(node)));
2941    }
2942    break;
2943
2944  case NT_QTFR:
2945    r = subexp_recursive_check_trav(NQTFR(node)->target, env);
2946    if (NQTFR(node)->upper == 0) {
2947      if (r == FOUND_CALLED_NODE)
2948	NQTFR(node)->is_refered = 1;
2949    }
2950    break;
2951
2952  case NT_ANCHOR:
2953    {
2954      AnchorNode* an = NANCHOR(node);
2955      switch (an->type) {
2956      case ANCHOR_PREC_READ:
2957      case ANCHOR_PREC_READ_NOT:
2958      case ANCHOR_LOOK_BEHIND:
2959      case ANCHOR_LOOK_BEHIND_NOT:
2960	r = subexp_recursive_check_trav(an->target, env);
2961	break;
2962      }
2963    }
2964    break;
2965
2966  case NT_ENCLOSE:
2967    {
2968      EncloseNode* en = NENCLOSE(node);
2969
2970      if (! IS_ENCLOSE_RECURSION(en)) {
2971	if (IS_ENCLOSE_CALLED(en)) {
2972	  SET_ENCLOSE_STATUS(node, NST_MARK1);
2973	  r = subexp_recursive_check(en->target);
2974	  if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
2975	  CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2976	}
2977      }
2978      r = subexp_recursive_check_trav(en->target, env);
2979      if (IS_ENCLOSE_CALLED(en))
2980	r |= FOUND_CALLED_NODE;
2981    }
2982    break;
2983
2984  default:
2985    break;
2986  }
2987
2988  return r;
2989}
2990
2991static int
2992setup_subexp_call(Node* node, ScanEnv* env)
2993{
2994  int type;
2995  int r = 0;
2996
2997  type = NTYPE(node);
2998  switch (type) {
2999  case NT_LIST:
3000    do {
3001      r = setup_subexp_call(NCAR(node), env);
3002    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3003    break;
3004
3005  case NT_ALT:
3006    do {
3007      r = setup_subexp_call(NCAR(node), env);
3008    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3009    break;
3010
3011  case NT_QTFR:
3012    r = setup_subexp_call(NQTFR(node)->target, env);
3013    break;
3014  case NT_ENCLOSE:
3015    r = setup_subexp_call(NENCLOSE(node)->target, env);
3016    break;
3017
3018  case NT_CALL:
3019    {
3020      CallNode* cn = NCALL(node);
3021      Node** nodes = SCANENV_MEM_NODES(env);
3022
3023      if (cn->group_num != 0) {
3024	int gnum = cn->group_num;
3025
3026#ifdef USE_NAMED_GROUP
3027	if (env->num_named > 0 &&
3028	    IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3029	    !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3030	  return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3031	}
3032#endif
3033	if (gnum > env->num_mem) {
3034	  onig_scan_env_set_error_string(env,
3035		 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3036	  return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3037	}
3038
3039#ifdef USE_NAMED_GROUP
3040      set_call_attr:
3041#endif
3042	cn->target = nodes[cn->group_num];
3043	if (IS_NULL(cn->target)) {
3044	  onig_scan_env_set_error_string(env,
3045		 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3046	  return ONIGERR_UNDEFINED_NAME_REFERENCE;
3047	}
3048	SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3049	BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3050	cn->unset_addr_list = env->unset_addr_list;
3051      }
3052#ifdef USE_NAMED_GROUP
3053      else {
3054	int *refs;
3055
3056	int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3057					   &refs);
3058	if (n <= 0) {
3059	  onig_scan_env_set_error_string(env,
3060		 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3061	  return ONIGERR_UNDEFINED_NAME_REFERENCE;
3062	}
3063	else if (n > 1) {
3064	  onig_scan_env_set_error_string(env,
3065	    ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3066	  return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3067	}
3068	else {
3069	  cn->group_num = refs[0];
3070	  goto set_call_attr;
3071	}
3072      }
3073#endif
3074    }
3075    break;
3076
3077  case NT_ANCHOR:
3078    {
3079      AnchorNode* an = NANCHOR(node);
3080
3081      switch (an->type) {
3082      case ANCHOR_PREC_READ:
3083      case ANCHOR_PREC_READ_NOT:
3084      case ANCHOR_LOOK_BEHIND:
3085      case ANCHOR_LOOK_BEHIND_NOT:
3086	r = setup_subexp_call(an->target, env);
3087	break;
3088      }
3089    }
3090    break;
3091
3092  default:
3093    break;
3094  }
3095
3096  return r;
3097}
3098#endif
3099
3100/* divide different length alternatives in look-behind.
3101  (?<=A|B) ==> (?<=A)|(?<=B)
3102  (?<!A|B) ==> (?<!A)(?<!B)
3103*/
3104static int
3105divide_look_behind_alternatives(Node* node)
3106{
3107  Node *head, *np, *insert_node;
3108  AnchorNode* an = NANCHOR(node);
3109  int anc_type = an->type;
3110
3111  head = an->target;
3112  np = NCAR(head);
3113  swap_node(node, head);
3114  NCAR(node) = head;
3115  NANCHOR(head)->target = np;
3116
3117  np = node;
3118  while ((np = NCDR(np)) != NULL_NODE) {
3119    insert_node = onig_node_new_anchor(anc_type);
3120    CHECK_NULL_RETURN_MEMERR(insert_node);
3121    NANCHOR(insert_node)->target = NCAR(np);
3122    NCAR(np) = insert_node;
3123  }
3124
3125  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3126    np = node;
3127    do {
3128      SET_NTYPE(np, NT_LIST);  /* alt -> list */
3129    } while ((np = NCDR(np)) != NULL_NODE);
3130  }
3131  return 0;
3132}
3133
3134static int
3135setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3136{
3137  int r, len;
3138  AnchorNode* an = NANCHOR(node);
3139
3140  r = get_char_length_tree(an->target, reg, &len);
3141  if (r == 0)
3142    an->char_len = len;
3143  else if (r == GET_CHAR_LEN_VARLEN)
3144    r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3145  else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3146    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3147      r = divide_look_behind_alternatives(node);
3148    else
3149      r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3150  }
3151
3152  return r;
3153}
3154
3155static int
3156next_setup(Node* node, Node* next_node, regex_t* reg)
3157{
3158  int type;
3159
3160 retry:
3161  type = NTYPE(node);
3162  if (type == NT_QTFR) {
3163    QtfrNode* qn = NQTFR(node);
3164    if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3165#ifdef USE_QTFR_PEEK_NEXT
3166      Node* n = get_head_value_node(next_node, 1, reg);
3167      /* '\0': for UTF-16BE etc... */
3168      if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3169	qn->next_head_exact = n;
3170      }
3171#endif
3172      /* automatic posseivation a*b ==> (?>a*)b */
3173      if (qn->lower <= 1) {
3174	int ttype = NTYPE(qn->target);
3175	if (IS_NODE_TYPE_SIMPLE(ttype)) {
3176	  Node *x, *y;
3177	  x = get_head_value_node(qn->target, 0, reg);
3178	  if (IS_NOT_NULL(x)) {
3179	    y = get_head_value_node(next_node,  0, reg);
3180	    if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3181	      Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3182	      CHECK_NULL_RETURN_MEMERR(en);
3183	      SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3184	      swap_node(node, en);
3185	      NENCLOSE(node)->target = en;
3186	    }
3187	  }
3188	}
3189      }
3190    }
3191  }
3192  else if (type == NT_ENCLOSE) {
3193    EncloseNode* en = NENCLOSE(node);
3194    if (en->type == ENCLOSE_MEMORY) {
3195      node = en->target;
3196      goto retry;
3197    }
3198  }
3199  return 0;
3200}
3201
3202
3203static int
3204update_string_node_case_fold(regex_t* reg, Node *node)
3205{
3206  UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3207  UChar *sbuf, *ebuf, *sp;
3208  int r, i, len, sbuf_size;
3209  StrNode* sn = NSTR(node);
3210
3211  end = sn->end;
3212  sbuf_size = (int)(end - sn->s) * 2;
3213  sbuf = (UChar* )xmalloc(sbuf_size);
3214  CHECK_NULL_RETURN_MEMERR(sbuf);
3215  ebuf = sbuf + sbuf_size;
3216
3217  sp = sbuf;
3218  p = sn->s;
3219  while (p < end) {
3220    len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3221    for (i = 0; i < len; i++) {
3222      if (sp >= ebuf) {
3223        sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);
3224        CHECK_NULL_RETURN_MEMERR(sbuf);
3225        sp = sbuf + sbuf_size;
3226        sbuf_size *= 2;
3227        ebuf = sbuf + sbuf_size;
3228      }
3229
3230      *sp++ = buf[i];
3231    }
3232  }
3233
3234  r = onig_node_str_set(node, sbuf, sp);
3235  if (r != 0) {
3236    xfree(sbuf);
3237    return r;
3238  }
3239
3240  xfree(sbuf);
3241  return 0;
3242}
3243
3244static int
3245expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3246				 regex_t* reg)
3247{
3248  int r;
3249  Node *node;
3250
3251  node = onig_node_new_str(s, end);
3252  if (IS_NULL(node)) return ONIGERR_MEMORY;
3253
3254  r = update_string_node_case_fold(reg, node);
3255  if (r != 0) {
3256    onig_node_free(node);
3257    return r;
3258  }
3259
3260  NSTRING_SET_AMBIG(node);
3261  NSTRING_SET_DONT_GET_OPT_INFO(node);
3262  *rnode = node;
3263  return 0;
3264}
3265
3266static int
3267expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3268			    UChar *p, int slen, UChar *end,
3269			    regex_t* reg, Node **rnode)
3270{
3271  int r, i, j, len, varlen;
3272  Node *anode, *var_anode, *snode, *xnode, *an;
3273  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3274  xnode = NULL_NODE;
3275
3276  *rnode = var_anode = NULL_NODE;
3277
3278  varlen = 0;
3279  for (i = 0; i < item_num; i++) {
3280    if (items[i].byte_len != slen) {
3281      varlen = 1;
3282      break;
3283    }
3284  }
3285
3286  if (varlen != 0) {
3287    *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3288    if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3289
3290    xnode = onig_node_new_list(NULL, NULL);
3291    if (IS_NULL(xnode)) goto mem_err;
3292    NCAR(var_anode) = xnode;
3293
3294    anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3295    if (IS_NULL(anode)) goto mem_err;
3296    NCAR(xnode) = anode;
3297  }
3298  else {
3299    *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3300    if (IS_NULL(anode)) return ONIGERR_MEMORY;
3301  }
3302
3303  snode = onig_node_new_str(p, p + slen);
3304  if (IS_NULL(snode)) goto mem_err;
3305
3306  NCAR(anode) = snode;
3307
3308  for (i = 0; i < item_num; i++) {
3309    snode = onig_node_new_str(NULL, NULL);
3310    if (IS_NULL(snode)) goto mem_err;
3311
3312    for (j = 0; j < items[i].code_len; j++) {
3313      len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3314      if (len < 0) {
3315	r = len;
3316	goto mem_err2;
3317      }
3318
3319      r = onig_node_str_cat(snode, buf, buf + len);
3320      if (r != 0) goto mem_err2;
3321    }
3322
3323    an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3324    if (IS_NULL(an)) {
3325      goto mem_err2;
3326    }
3327
3328    if (items[i].byte_len != slen) {
3329      Node *rem = NULL_NODE;
3330      UChar *q = p + items[i].byte_len;
3331
3332      if (q < end) {
3333        r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3334        if (r != 0) {
3335          onig_node_free(an);
3336          goto mem_err2;
3337        }
3338
3339        xnode = onig_node_list_add(NULL_NODE, snode);
3340        if (IS_NULL(xnode)) {
3341          onig_node_free(an);
3342          onig_node_free(rem);
3343          goto mem_err2;
3344        }
3345        if (IS_NULL(onig_node_list_add(xnode, rem))) {
3346          onig_node_free(an);
3347          onig_node_free(xnode);
3348          onig_node_free(rem);
3349          goto mem_err;
3350        }
3351
3352        NCAR(an) = xnode;
3353      }
3354      else {
3355        NCAR(an) = snode;
3356      }
3357
3358      if (var_anode == NULL) {
3359        onig_node_free(an);
3360        onig_node_free(xnode);
3361        onig_node_free(rem);
3362        goto mem_err2;
3363      }
3364      NCDR(var_anode) = an;
3365      var_anode = an;
3366    }
3367    else {
3368      NCAR(an)     = snode;
3369      NCDR(anode) = an;
3370      anode = an;
3371    }
3372  }
3373
3374  return varlen;
3375
3376 mem_err2:
3377  onig_node_free(snode);
3378
3379 mem_err:
3380  onig_node_free(*rnode);
3381
3382  return ONIGERR_MEMORY;
3383}
3384
3385static int
3386expand_case_fold_string(Node* node, regex_t* reg)
3387{
3388#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
3389
3390  int r, n, len, alt_num;
3391  UChar *start, *end, *p;
3392  Node *top_root, *root, *snode, *prev_node;
3393  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3394  StrNode* sn = NSTR(node);
3395
3396  if (NSTRING_IS_AMBIG(node)) return 0;
3397
3398  start = sn->s;
3399  end   = sn->end;
3400  if (start >= end) return 0;
3401
3402  r = 0;
3403  top_root = root = prev_node = snode = NULL_NODE;
3404  alt_num = 1;
3405  p = start;
3406  while (p < end) {
3407    n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3408					   p, end, items);
3409    if (n < 0) {
3410      r = n;
3411      goto err;
3412    }
3413
3414    len = enclen(reg->enc, p);
3415
3416    if (n == 0) {
3417      if (IS_NULL(snode)) {
3418	if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3419	  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3420	  if (IS_NULL(root)) {
3421	    onig_node_free(prev_node);
3422	    goto mem_err;
3423	  }
3424	}
3425
3426	prev_node = snode = onig_node_new_str(NULL, NULL);
3427	if (IS_NULL(snode)) goto mem_err;
3428	if (IS_NOT_NULL(root)) {
3429	  if (IS_NULL(onig_node_list_add(root, snode))) {
3430	    onig_node_free(snode);
3431	    goto mem_err;
3432	  }
3433	}
3434      }
3435
3436      r = onig_node_str_cat(snode, p, p + len);
3437      if (r != 0) goto err;
3438    }
3439    else {
3440      alt_num *= (n + 1);
3441      if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3442
3443      if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3444	top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3445	if (IS_NULL(root)) {
3446	  onig_node_free(prev_node);
3447	  goto mem_err;
3448	}
3449      }
3450
3451      r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3452      if (r < 0) goto mem_err;
3453      if (r == 1) {
3454	if (IS_NULL(root)) {
3455	  top_root = prev_node;
3456	}
3457	else {
3458	  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3459	    onig_node_free(prev_node);
3460	    goto mem_err;
3461	  }
3462	}
3463
3464	root = NCAR(prev_node);
3465      }
3466      else { /* r == 0 */
3467	if (IS_NOT_NULL(root)) {
3468	  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3469	    onig_node_free(prev_node);
3470	    goto mem_err;
3471	  }
3472	}
3473      }
3474
3475      snode = NULL_NODE;
3476    }
3477
3478    p += len;
3479  }
3480
3481  if (p < end) {
3482    Node *srem;
3483
3484    r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3485    if (r != 0) goto mem_err;
3486
3487    if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3488      top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3489      if (IS_NULL(root)) {
3490	onig_node_free(srem);
3491	onig_node_free(prev_node);
3492	goto mem_err;
3493      }
3494    }
3495
3496    if (IS_NULL(root)) {
3497      prev_node = srem;
3498    }
3499    else {
3500      if (IS_NULL(onig_node_list_add(root, srem))) {
3501	onig_node_free(srem);
3502	goto mem_err;
3503      }
3504    }
3505  }
3506
3507  /* ending */
3508  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3509  swap_node(node, top_root);
3510  onig_node_free(top_root);
3511  return 0;
3512
3513 mem_err:
3514  r = ONIGERR_MEMORY;
3515
3516 err:
3517  onig_node_free(top_root);
3518  return r;
3519}
3520
3521
3522#ifdef USE_COMBINATION_EXPLOSION_CHECK
3523
3524#define CEC_THRES_NUM_BIG_REPEAT         512
3525#define CEC_INFINITE_NUM          0x7fffffff
3526
3527#define CEC_IN_INFINITE_REPEAT    (1<<0)
3528#define CEC_IN_FINITE_REPEAT      (1<<1)
3529#define CEC_CONT_BIG_REPEAT       (1<<2)
3530
3531static int
3532setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3533{
3534  int type;
3535  int r = state;
3536
3537  type = NTYPE(node);
3538  switch (type) {
3539  case NT_LIST:
3540    {
3541      Node* prev = NULL_NODE;
3542      do {
3543	r = setup_comb_exp_check(NCAR(node), r, env);
3544	prev = NCAR(node);
3545      } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3546    }
3547    break;
3548
3549  case NT_ALT:
3550    {
3551      int ret;
3552      do {
3553	ret = setup_comb_exp_check(NCAR(node), state, env);
3554	r |= ret;
3555      } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3556    }
3557    break;
3558
3559  case NT_QTFR:
3560    {
3561      int child_state = state;
3562      int add_state = 0;
3563      QtfrNode* qn = NQTFR(node);
3564      Node* target = qn->target;
3565      int var_num;
3566
3567      if (! IS_REPEAT_INFINITE(qn->upper)) {
3568	if (qn->upper > 1) {
3569	  /* {0,1}, {1,1} are allowed */
3570	  child_state |= CEC_IN_FINITE_REPEAT;
3571
3572	  /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3573	  if (env->backrefed_mem == 0) {
3574	    if (NTYPE(qn->target) == NT_ENCLOSE) {
3575	      EncloseNode* en = NENCLOSE(qn->target);
3576	      if (en->type == ENCLOSE_MEMORY) {
3577		if (NTYPE(en->target) == NT_QTFR) {
3578		  QtfrNode* q = NQTFR(en->target);
3579		  if (IS_REPEAT_INFINITE(q->upper)
3580		      && q->greedy == qn->greedy) {
3581		    qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3582		    if (qn->upper == 1)
3583		      child_state = state;
3584		  }
3585		}
3586	      }
3587	    }
3588	  }
3589	}
3590      }
3591
3592      if (state & CEC_IN_FINITE_REPEAT) {
3593	qn->comb_exp_check_num = -1;
3594      }
3595      else {
3596	if (IS_REPEAT_INFINITE(qn->upper)) {
3597	  var_num = CEC_INFINITE_NUM;
3598	  child_state |= CEC_IN_INFINITE_REPEAT;
3599	}
3600	else {
3601	  var_num = qn->upper - qn->lower;
3602	}
3603
3604	if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3605	  add_state |= CEC_CONT_BIG_REPEAT;
3606
3607	if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3608	    ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3609	     var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3610	  if (qn->comb_exp_check_num == 0) {
3611	    env->num_comb_exp_check++;
3612	    qn->comb_exp_check_num = env->num_comb_exp_check;
3613	    if (env->curr_max_regnum > env->comb_exp_max_regnum)
3614	      env->comb_exp_max_regnum = env->curr_max_regnum;
3615	  }
3616	}
3617      }
3618
3619      r = setup_comb_exp_check(target, child_state, env);
3620      r |= add_state;
3621    }
3622    break;
3623
3624  case NT_ENCLOSE:
3625    {
3626      EncloseNode* en = NENCLOSE(node);
3627
3628      switch (en->type) {
3629      case ENCLOSE_MEMORY:
3630	{
3631	  if (env->curr_max_regnum < en->regnum)
3632	    env->curr_max_regnum = en->regnum;
3633
3634	  r = setup_comb_exp_check(en->target, state, env);
3635	}
3636	break;
3637
3638      default:
3639	r = setup_comb_exp_check(en->target, state, env);
3640	break;
3641      }
3642    }
3643    break;
3644
3645#ifdef USE_SUBEXP_CALL
3646  case NT_CALL:
3647    if (IS_CALL_RECURSION(NCALL(node)))
3648      env->has_recursion = 1;
3649    else
3650      r = setup_comb_exp_check(NCALL(node)->target, state, env);
3651    break;
3652#endif
3653
3654  default:
3655    break;
3656  }
3657
3658  return r;
3659}
3660#endif
3661
3662#define IN_ALT        (1<<0)
3663#define IN_NOT        (1<<1)
3664#define IN_REPEAT     (1<<2)
3665#define IN_VAR_REPEAT (1<<3)
3666
3667/* setup_tree does the following work.
3668 1. check empty loop. (set qn->target_empty_info)
3669 2. expand ignore-case in char class.
3670 3. set memory status bit flags. (reg->mem_stats)
3671 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3672 5. find invalid patterns in look-behind.
3673 6. expand repeated string.
3674 */
3675static int
3676setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3677{
3678  int type;
3679  int r = 0;
3680
3681  type = NTYPE(node);
3682  switch (type) {
3683  case NT_LIST:
3684    {
3685      Node* prev = NULL_NODE;
3686      do {
3687	r = setup_tree(NCAR(node), reg, state, env);
3688	if (IS_NOT_NULL(prev) && r == 0) {
3689	  r = next_setup(prev, NCAR(node), reg);
3690	}
3691	prev = NCAR(node);
3692      } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3693    }
3694    break;
3695
3696  case NT_ALT:
3697    do {
3698      r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3699    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3700    break;
3701
3702  case NT_CCLASS:
3703    break;
3704
3705  case NT_STR:
3706    if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3707      r = expand_case_fold_string(node, reg);
3708    }
3709    break;
3710
3711  case NT_CTYPE:
3712  case NT_CANY:
3713    break;
3714
3715#ifdef USE_SUBEXP_CALL
3716  case NT_CALL:
3717    break;
3718#endif
3719
3720  case NT_BREF:
3721    {
3722      int i;
3723      int* p;
3724      Node** nodes = SCANENV_MEM_NODES(env);
3725      BRefNode* br = NBREF(node);
3726      p = BACKREFS_P(br);
3727      for (i = 0; i < br->back_num; i++) {
3728	if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
3729	BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3730	BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3731#ifdef USE_BACKREF_WITH_LEVEL
3732	if (IS_BACKREF_NEST_LEVEL(br)) {
3733	  BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3734	}
3735#endif
3736	SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3737      }
3738    }
3739    break;
3740
3741  case NT_QTFR:
3742    {
3743      OnigDistance d;
3744      QtfrNode* qn = NQTFR(node);
3745      Node* target = qn->target;
3746
3747      if ((state & IN_REPEAT) != 0) {
3748        qn->state |= NST_IN_REPEAT;
3749      }
3750
3751      if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3752	r = get_min_match_length(target, &d, env);
3753	if (r) break;
3754	if (d == 0) {
3755	  qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3756#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3757	  r = quantifiers_memory_node_info(target);
3758	  if (r < 0) break;
3759	  if (r > 0) {
3760	    qn->target_empty_info = r;
3761	  }
3762#endif
3763#if 0
3764	  r = get_max_match_length(target, &d, env);
3765	  if (r == 0 && d == 0) {
3766	    /*  ()* ==> ()?, ()+ ==> ()  */
3767	    qn->upper = 1;
3768	    if (qn->lower > 1) qn->lower = 1;
3769	    if (NTYPE(target) == NT_STR) {
3770	      qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
3771	    }
3772	  }
3773#endif
3774	}
3775      }
3776
3777      state |= IN_REPEAT;
3778      if (qn->lower != qn->upper)
3779	state |= IN_VAR_REPEAT;
3780      r = setup_tree(target, reg, state, env);
3781      if (r) break;
3782
3783      /* expand string */
3784#define EXPAND_STRING_MAX_LENGTH  100
3785      if (NTYPE(target) == NT_STR) {
3786	if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
3787	    qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3788	  int len = NSTRING_LEN(target);
3789	  StrNode* sn = NSTR(target);
3790
3791	  if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3792	    int i, n = qn->lower;
3793	    onig_node_conv_to_str_node(node, NSTR(target)->flag);
3794	    for (i = 0; i < n; i++) {
3795	      r = onig_node_str_cat(node, sn->s, sn->end);
3796	      if (r) break;
3797	    }
3798	    onig_node_free(target);
3799	    break; /* break case NT_QTFR: */
3800	  }
3801	}
3802      }
3803
3804#ifdef USE_OP_PUSH_OR_JUMP_EXACT
3805      if (qn->greedy && (qn->target_empty_info != 0)) {
3806	if (NTYPE(target) == NT_QTFR) {
3807	  QtfrNode* tqn = NQTFR(target);
3808	  if (IS_NOT_NULL(tqn->head_exact)) {
3809	    qn->head_exact  = tqn->head_exact;
3810	    tqn->head_exact = NULL;
3811	  }
3812	}
3813	else {
3814	  qn->head_exact = get_head_value_node(qn->target, 1, reg);
3815	}
3816      }
3817#endif
3818    }
3819    break;
3820
3821  case NT_ENCLOSE:
3822    {
3823      EncloseNode* en = NENCLOSE(node);
3824
3825      switch (en->type) {
3826      case ENCLOSE_OPTION:
3827	{
3828	  OnigOptionType options = reg->options;
3829	  reg->options = NENCLOSE(node)->option;
3830	  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
3831	  reg->options = options;
3832	}
3833	break;
3834
3835      case ENCLOSE_MEMORY:
3836	if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
3837	  BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
3838	  /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
3839	}
3840        r = setup_tree(en->target, reg, state, env);
3841        break;
3842
3843      case ENCLOSE_STOP_BACKTRACK:
3844	{
3845	  Node* target = en->target;
3846	  r = setup_tree(target, reg, state, env);
3847	  if (NTYPE(target) == NT_QTFR) {
3848	    QtfrNode* tqn = NQTFR(target);
3849	    if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
3850		tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
3851	      int qtype = NTYPE(tqn->target);
3852	      if (IS_NODE_TYPE_SIMPLE(qtype))
3853		SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
3854	    }
3855	  }
3856	}
3857	break;
3858      }
3859    }
3860    break;
3861
3862  case NT_ANCHOR:
3863    {
3864      AnchorNode* an = NANCHOR(node);
3865
3866      switch (an->type) {
3867      case ANCHOR_PREC_READ:
3868	r = setup_tree(an->target, reg, state, env);
3869	break;
3870      case ANCHOR_PREC_READ_NOT:
3871	r = setup_tree(an->target, reg, (state | IN_NOT), env);
3872	break;
3873
3874/* allowed node types in look-behind */
3875#define ALLOWED_TYPE_IN_LB  \
3876  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
3877    BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
3878
3879#define ALLOWED_ENCLOSE_IN_LB       ( ENCLOSE_MEMORY )
3880#define ALLOWED_ENCLOSE_IN_LB_NOT   0
3881
3882#define ALLOWED_ANCHOR_IN_LB \
3883( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3884#define ALLOWED_ANCHOR_IN_LB_NOT \
3885( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3886
3887      case ANCHOR_LOOK_BEHIND:
3888	{
3889	  r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
3890			      ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
3891	  if (r < 0) return r;
3892	  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3893	  r = setup_look_behind(node, reg, env);
3894	  if (r != 0) return r;
3895	  r = setup_tree(an->target, reg, state, env);
3896	}
3897	break;
3898
3899      case ANCHOR_LOOK_BEHIND_NOT:
3900	{
3901	  r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
3902		      ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
3903	  if (r < 0) return r;
3904	  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3905	  r = setup_look_behind(node, reg, env);
3906	  if (r != 0) return r;
3907	  r = setup_tree(an->target, reg, (state | IN_NOT), env);
3908	}
3909	break;
3910      }
3911    }
3912    break;
3913
3914  default:
3915    break;
3916  }
3917
3918  return r;
3919}
3920
3921/* set skip map for Boyer-Moor search */
3922static int
3923set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
3924	    UChar skip[], int** int_skip)
3925{
3926  int i, len;
3927
3928  len = (int)(end - s);
3929  if (len < ONIG_CHAR_TABLE_SIZE) {
3930    for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar)len;
3931
3932    for (i = 0; i < len - 1; i++)
3933      skip[s[i]] = (UChar)(len - 1 - i);
3934  }
3935  else {
3936    if (IS_NULL(*int_skip)) {
3937      *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3938      if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
3939    }
3940    for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
3941
3942    for (i = 0; i < len - 1; i++)
3943      (*int_skip)[s[i]] = len - 1 - i;
3944  }
3945  return 0;
3946}
3947
3948#define OPT_EXACT_MAXLEN   24
3949
3950typedef struct {
3951  OnigDistance min;  /* min byte length */
3952  OnigDistance max;  /* max byte length */
3953} MinMaxLen;
3954
3955typedef struct {
3956  MinMaxLen        mmd;
3957  OnigEncoding     enc;
3958  OnigOptionType   options;
3959  OnigCaseFoldType case_fold_flag;
3960  ScanEnv*         scan_env;
3961} OptEnv;
3962
3963typedef struct {
3964  int left_anchor;
3965  int right_anchor;
3966} OptAncInfo;
3967
3968typedef struct {
3969  MinMaxLen  mmd; /* info position */
3970  OptAncInfo anc;
3971
3972  int   reach_end;
3973  int   ignore_case;
3974  int   len;
3975  UChar s[OPT_EXACT_MAXLEN];
3976} OptExactInfo;
3977
3978typedef struct {
3979  MinMaxLen mmd; /* info position */
3980  OptAncInfo anc;
3981
3982  int   value;      /* weighted value */
3983  UChar map[ONIG_CHAR_TABLE_SIZE];
3984} OptMapInfo;
3985
3986typedef struct {
3987  MinMaxLen    len;
3988
3989  OptAncInfo   anc;
3990  OptExactInfo exb;    /* boundary */
3991  OptExactInfo exm;    /* middle */
3992  OptExactInfo expr;   /* prec read (?=...) */
3993
3994  OptMapInfo   map;   /* boundary */
3995} NodeOptInfo;
3996
3997
3998static int
3999map_position_value(OnigEncoding enc, int i)
4000{
4001  static const short int ByteValTable[] = {
4002     5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
4003     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
4004    12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
4005     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
4006     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4007     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
4008     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4009     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
4010  };
4011
4012  if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
4013    if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4014      return 20;
4015    else
4016      return (int )ByteValTable[i];
4017  }
4018  else
4019    return 4;   /* Take it easy. */
4020}
4021
4022static int
4023distance_value(MinMaxLen* mm)
4024{
4025  /* 1000 / (min-max-dist + 1) */
4026  static const short int dist_vals[] = {
4027    1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
4028      91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
4029      48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
4030      32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
4031      24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
4032      20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
4033      16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
4034      14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
4035      12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
4036      11,   11,   11,   11,   11,   10,   10,   10,   10,   10
4037  };
4038
4039  int d;
4040
4041  if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4042
4043  d = mm->max - mm->min;
4044  if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0])))
4045    /* return dist_vals[d] * 16 / (mm->min + 12); */
4046    return (int )dist_vals[d];
4047  else
4048    return 1;
4049}
4050
4051static int
4052comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4053{
4054  if (v2 <= 0) return -1;
4055  if (v1 <= 0) return  1;
4056
4057  v1 *= distance_value(d1);
4058  v2 *= distance_value(d2);
4059
4060  if (v2 > v1) return  1;
4061  if (v2 < v1) return -1;
4062
4063  if (d2->min < d1->min) return  1;
4064  if (d2->min > d1->min) return -1;
4065  return 0;
4066}
4067
4068static int
4069is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4070{
4071  return (a->min == b->min && a->max == b->max) ? 1 : 0;
4072}
4073
4074
4075static void
4076set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4077{
4078  mml->min = min;
4079  mml->max = max;
4080}
4081
4082static void
4083clear_mml(MinMaxLen* mml)
4084{
4085  mml->min = mml->max = 0;
4086}
4087
4088static void
4089copy_mml(MinMaxLen* to, MinMaxLen* from)
4090{
4091  to->min = from->min;
4092  to->max = from->max;
4093}
4094
4095static void
4096add_mml(MinMaxLen* to, MinMaxLen* from)
4097{
4098  to->min = distance_add(to->min, from->min);
4099  to->max = distance_add(to->max, from->max);
4100}
4101
4102#if 0
4103static void
4104add_len_mml(MinMaxLen* to, OnigDistance len)
4105{
4106  to->min = distance_add(to->min, len);
4107  to->max = distance_add(to->max, len);
4108}
4109#endif
4110
4111static void
4112alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4113{
4114  if (to->min > from->min) to->min = from->min;
4115  if (to->max < from->max) to->max = from->max;
4116}
4117
4118static void
4119copy_opt_env(OptEnv* to, OptEnv* from)
4120{
4121  CopyMem (to, from, sizeof (OptEnv));
4122}
4123
4124static void
4125clear_opt_anc_info(OptAncInfo* anc)
4126{
4127  anc->left_anchor  = 0;
4128  anc->right_anchor = 0;
4129}
4130
4131static void
4132copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4133{
4134  CopyMem (to, from, sizeof (OptAncInfo));
4135}
4136
4137static void
4138concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4139		    OnigDistance left_len, OnigDistance right_len)
4140{
4141  clear_opt_anc_info(to);
4142
4143  to->left_anchor = left->left_anchor;
4144  if (left_len == 0) {
4145    to->left_anchor |= right->left_anchor;
4146  }
4147
4148  to->right_anchor = right->right_anchor;
4149  if (right_len == 0) {
4150    to->right_anchor |= left->right_anchor;
4151  }
4152}
4153
4154static int
4155is_left_anchor(int anc)
4156{
4157  if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4158      anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4159      anc == ANCHOR_PREC_READ_NOT)
4160    return 0;
4161
4162  return 1;
4163}
4164
4165static int
4166is_set_opt_anc_info(OptAncInfo* to, int anc)
4167{
4168  if ((to->left_anchor & anc) != 0) return 1;
4169
4170  return ((to->right_anchor & anc) != 0 ? 1 : 0);
4171}
4172
4173static void
4174add_opt_anc_info(OptAncInfo* to, int anc)
4175{
4176  if (is_left_anchor(anc))
4177    to->left_anchor |= anc;
4178  else
4179    to->right_anchor |= anc;
4180}
4181
4182static void
4183remove_opt_anc_info(OptAncInfo* to, int anc)
4184{
4185  if (is_left_anchor(anc))
4186    to->left_anchor &= ~anc;
4187  else
4188    to->right_anchor &= ~anc;
4189}
4190
4191static void
4192alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4193{
4194  to->left_anchor  &= add->left_anchor;
4195  to->right_anchor &= add->right_anchor;
4196}
4197
4198static int
4199is_full_opt_exact_info(OptExactInfo* ex)
4200{
4201  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4202}
4203
4204static void
4205clear_opt_exact_info(OptExactInfo* ex)
4206{
4207  clear_mml(&ex->mmd);
4208  clear_opt_anc_info(&ex->anc);
4209  ex->reach_end   = 0;
4210  ex->ignore_case = 0;
4211  ex->len         = 0;
4212  ex->s[0]        = '\0';
4213}
4214
4215static void
4216copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4217{
4218  CopyMem (to, from, sizeof (OptExactInfo));
4219}
4220
4221static void
4222concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4223{
4224  int i, j, len;
4225  UChar *p, *end;
4226  OptAncInfo tanc;
4227
4228  if (! to->ignore_case && add->ignore_case) {
4229    if (to->len >= add->len) return ;  /* avoid */
4230
4231    to->ignore_case = 1;
4232  }
4233
4234  p = add->s;
4235  end = p + add->len;
4236  for (i = to->len; p < end; ) {
4237    len = enclen(enc, p);
4238    if (i + len > OPT_EXACT_MAXLEN) break;
4239    for (j = 0; j < len && p < end; j++)
4240      to->s[i++] = *p++;
4241  }
4242
4243  to->len = i;
4244  to->reach_end = (p == end ? add->reach_end : 0);
4245
4246  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4247  if (! to->reach_end) tanc.right_anchor = 0;
4248  copy_opt_anc_info(&to->anc, &tanc);
4249}
4250
4251static void
4252concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4253			  int raw ARG_UNUSED, OnigEncoding enc)
4254{
4255  int i, j, len;
4256  UChar *p;
4257
4258  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4259    len = enclen(enc, p);
4260    if (i + len > OPT_EXACT_MAXLEN) break;
4261    for (j = 0; j < len && p < end; j++)
4262      to->s[i++] = *p++;
4263  }
4264
4265  to->len = i;
4266}
4267
4268static void
4269alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4270{
4271  int i, j, len;
4272
4273  if (add->len == 0 || to->len == 0) {
4274    clear_opt_exact_info(to);
4275    return ;
4276  }
4277
4278  if (! is_equal_mml(&to->mmd, &add->mmd)) {
4279    clear_opt_exact_info(to);
4280    return ;
4281  }
4282
4283  for (i = 0; i < to->len && i < add->len; ) {
4284    if (to->s[i] != add->s[i]) break;
4285    len = enclen(env->enc, to->s + i);
4286
4287    for (j = 1; j < len; j++) {
4288      if (to->s[i+j] != add->s[i+j]) break;
4289    }
4290    if (j < len) break;
4291    i += len;
4292  }
4293
4294  if (! add->reach_end || i < add->len || i < to->len) {
4295    to->reach_end = 0;
4296  }
4297  to->len = i;
4298  to->ignore_case |= add->ignore_case;
4299
4300  alt_merge_opt_anc_info(&to->anc, &add->anc);
4301  if (! to->reach_end) to->anc.right_anchor = 0;
4302}
4303
4304static void
4305select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4306{
4307  int v1, v2;
4308
4309  v1 = now->len;
4310  v2 = alt->len;
4311
4312  if (v2 == 0) {
4313    return ;
4314  }
4315  else if (v1 == 0) {
4316    copy_opt_exact_info(now, alt);
4317    return ;
4318  }
4319  else if (v1 <= 2 && v2 <= 2) {
4320    /* ByteValTable[x] is big value --> low price */
4321    v2 = map_position_value(enc, now->s[0]);
4322    v1 = map_position_value(enc, alt->s[0]);
4323
4324    if (now->len > 1) v1 += 5;
4325    if (alt->len > 1) v2 += 5;
4326  }
4327
4328  if (now->ignore_case == 0) v1 *= 2;
4329  if (alt->ignore_case == 0) v2 *= 2;
4330
4331  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4332    copy_opt_exact_info(now, alt);
4333}
4334
4335static void
4336clear_opt_map_info(OptMapInfo* map)
4337{
4338  static const OptMapInfo clean_info = {
4339    {0, 0}, {0, 0}, 0,
4340    {
4341      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4342      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4343      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4344      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4345      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4346      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4347      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4348      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4349      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4350      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4351      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4352      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4353      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4354      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4355      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4356      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4357    }
4358  };
4359
4360  xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4361}
4362
4363static void
4364copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4365{
4366  CopyMem (to, from, sizeof (OptMapInfo));
4367}
4368
4369static void
4370add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4371{
4372  if (map->map[c] == 0) {
4373    map->map[c] = 1;
4374    map->value += map_position_value(enc, c);
4375  }
4376}
4377
4378static int
4379add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4380                          OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4381{
4382  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4383  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4384  int i, n;
4385
4386  add_char_opt_map_info(map, p[0], enc);
4387
4388  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4389  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4390  if (n < 0) return n;
4391
4392  for (i = 0; i < n; i++) {
4393    ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4394    add_char_opt_map_info(map, buf[0], enc);
4395  }
4396
4397  return 0;
4398}
4399
4400static void
4401select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4402{
4403  static int z = 1<<15; /* 32768: something big value */
4404
4405  int v1, v2;
4406
4407  if (alt->value == 0) return ;
4408  if (now->value == 0) {
4409    copy_opt_map_info(now, alt);
4410    return ;
4411  }
4412
4413  v1 = z / now->value;
4414  v2 = z / alt->value;
4415  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4416    copy_opt_map_info(now, alt);
4417}
4418
4419static int
4420comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4421{
4422#define COMP_EM_BASE  20
4423  int ve, vm;
4424
4425  if (m->value <= 0) return -1;
4426
4427  ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
4428  vm = COMP_EM_BASE * 5 * 2 / m->value;
4429  return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4430}
4431
4432static void
4433alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4434{
4435  int i, val;
4436
4437  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4438  if (to->value == 0) return ;
4439  if (add->value == 0 || to->mmd.max < add->mmd.min) {
4440    clear_opt_map_info(to);
4441    return ;
4442  }
4443
4444  alt_merge_mml(&to->mmd, &add->mmd);
4445
4446  val = 0;
4447  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4448    if (add->map[i])
4449      to->map[i] = 1;
4450
4451    if (to->map[i])
4452      val += map_position_value(enc, i);
4453  }
4454  to->value = val;
4455
4456  alt_merge_opt_anc_info(&to->anc, &add->anc);
4457}
4458
4459static void
4460set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4461{
4462  copy_mml(&(opt->exb.mmd),  mmd);
4463  copy_mml(&(opt->expr.mmd), mmd);
4464  copy_mml(&(opt->map.mmd),  mmd);
4465}
4466
4467static void
4468clear_node_opt_info(NodeOptInfo* opt)
4469{
4470  clear_mml(&opt->len);
4471  clear_opt_anc_info(&opt->anc);
4472  clear_opt_exact_info(&opt->exb);
4473  clear_opt_exact_info(&opt->exm);
4474  clear_opt_exact_info(&opt->expr);
4475  clear_opt_map_info(&opt->map);
4476}
4477
4478static void
4479copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4480{
4481  CopyMem (to, from, sizeof (NodeOptInfo));
4482}
4483
4484static void
4485concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4486{
4487  int exb_reach, exm_reach;
4488  OptAncInfo tanc;
4489
4490  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4491  copy_opt_anc_info(&to->anc, &tanc);
4492
4493  if (add->exb.len > 0 && to->len.max == 0) {
4494    concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4495			to->len.max, add->len.max);
4496    copy_opt_anc_info(&add->exb.anc, &tanc);
4497  }
4498
4499  if (add->map.value > 0 && to->len.max == 0) {
4500    if (add->map.mmd.max == 0)
4501      add->map.anc.left_anchor |= to->anc.left_anchor;
4502  }
4503
4504  exb_reach = to->exb.reach_end;
4505  exm_reach = to->exm.reach_end;
4506
4507  if (add->len.max != 0)
4508    to->exb.reach_end = to->exm.reach_end = 0;
4509
4510  if (add->exb.len > 0) {
4511    if (exb_reach) {
4512      concat_opt_exact_info(&to->exb, &add->exb, enc);
4513      clear_opt_exact_info(&add->exb);
4514    }
4515    else if (exm_reach) {
4516      concat_opt_exact_info(&to->exm, &add->exb, enc);
4517      clear_opt_exact_info(&add->exb);
4518    }
4519  }
4520  select_opt_exact_info(enc, &to->exm, &add->exb);
4521  select_opt_exact_info(enc, &to->exm, &add->exm);
4522
4523  if (to->expr.len > 0) {
4524    if (add->len.max > 0) {
4525      if (to->expr.len > (int )add->len.max)
4526	to->expr.len = add->len.max;
4527
4528      if (to->expr.mmd.max == 0)
4529	select_opt_exact_info(enc, &to->exb, &to->expr);
4530      else
4531	select_opt_exact_info(enc, &to->exm, &to->expr);
4532    }
4533  }
4534  else if (add->expr.len > 0) {
4535    copy_opt_exact_info(&to->expr, &add->expr);
4536  }
4537
4538  select_opt_map_info(&to->map, &add->map);
4539
4540  add_mml(&to->len, &add->len);
4541}
4542
4543static void
4544alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4545{
4546  alt_merge_opt_anc_info  (&to->anc,  &add->anc);
4547  alt_merge_opt_exact_info(&to->exb,  &add->exb, env);
4548  alt_merge_opt_exact_info(&to->exm,  &add->exm, env);
4549  alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4550  alt_merge_opt_map_info(env->enc, &to->map,  &add->map);
4551
4552  alt_merge_mml(&to->len, &add->len);
4553}
4554
4555
4556#define MAX_NODE_OPT_INFO_REF_COUNT    5
4557
4558static int
4559optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4560{
4561  int type;
4562  int r = 0;
4563
4564  clear_node_opt_info(opt);
4565  set_bound_node_opt_info(opt, &env->mmd);
4566
4567  type = NTYPE(node);
4568  switch (type) {
4569  case NT_LIST:
4570    {
4571      OptEnv nenv;
4572      NodeOptInfo nopt;
4573      Node* nd = node;
4574
4575      copy_opt_env(&nenv, env);
4576      do {
4577	r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4578	if (r == 0) {
4579	  add_mml(&nenv.mmd, &nopt.len);
4580	  concat_left_node_opt_info(env->enc, opt, &nopt);
4581	}
4582      } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4583    }
4584    break;
4585
4586  case NT_ALT:
4587    {
4588      NodeOptInfo nopt;
4589      Node* nd = node;
4590
4591      do {
4592	r = optimize_node_left(NCAR(nd), &nopt, env);
4593	if (r == 0) {
4594	  if (nd == node) copy_node_opt_info(opt, &nopt);
4595	  else            alt_merge_node_opt_info(opt, &nopt, env);
4596	}
4597      } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4598    }
4599    break;
4600
4601  case NT_STR:
4602    {
4603      StrNode* sn = NSTR(node);
4604      int slen = (int)(sn->end - sn->s);
4605      int is_raw = NSTRING_IS_RAW(node);
4606
4607      if (! NSTRING_IS_AMBIG(node)) {
4608	concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4609				  NSTRING_IS_RAW(node), env->enc);
4610	if (slen > 0) {
4611	  add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4612	}
4613        set_mml(&opt->len, slen, slen);
4614      }
4615      else {
4616        int max;
4617
4618	if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4619          int n = onigenc_strlen(env->enc, sn->s, sn->end);
4620          max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4621	}
4622	else {
4623	  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4624				    is_raw, env->enc);
4625	  opt->exb.ignore_case = 1;
4626
4627	  if (slen > 0) {
4628	    r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4629					  env->enc, env->case_fold_flag);
4630	    if (r != 0) break;
4631	  }
4632
4633	  max = slen;
4634	}
4635
4636        set_mml(&opt->len, slen, max);
4637      }
4638
4639      if (opt->exb.len == slen)
4640	opt->exb.reach_end = 1;
4641    }
4642    break;
4643
4644  case NT_CCLASS:
4645    {
4646      int i, z;
4647      CClassNode* cc = NCCLASS(node);
4648
4649      /* no need to check ignore case. (setted in setup_tree()) */
4650
4651      if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
4652        OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4653	OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4654
4655	set_mml(&opt->len, min, max);
4656      }
4657      else {
4658        for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4659          z = BITSET_AT(cc->bs, i);
4660          if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
4661            add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4662          }
4663        }
4664	set_mml(&opt->len, 1, 1);
4665      }
4666    }
4667    break;
4668
4669  case NT_CTYPE:
4670    {
4671      int i, min, max;
4672
4673      max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4674
4675      if (max == 1) {
4676        min = 1;
4677
4678	switch (NCTYPE(node)->ctype) {
4679	case ONIGENC_CTYPE_WORD:
4680	  if (NCTYPE(node)->not != 0) {
4681	    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4682	      if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
4683		add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4684	      }
4685	    }
4686	  }
4687	  else {
4688	    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4689	      if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
4690		add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4691	      }
4692	    }
4693	  }
4694	  break;
4695	}
4696      }
4697      else {
4698        min = ONIGENC_MBC_MINLEN(env->enc);
4699      }
4700      set_mml(&opt->len, min, max);
4701    }
4702    break;
4703
4704  case NT_CANY:
4705    {
4706      OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4707      OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4708      set_mml(&opt->len, min, max);
4709    }
4710    break;
4711
4712  case NT_ANCHOR:
4713    switch (NANCHOR(node)->type) {
4714    case ANCHOR_BEGIN_BUF:
4715    case ANCHOR_BEGIN_POSITION:
4716    case ANCHOR_BEGIN_LINE:
4717    case ANCHOR_END_BUF:
4718    case ANCHOR_SEMI_END_BUF:
4719    case ANCHOR_END_LINE:
4720      add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
4721      break;
4722
4723    case ANCHOR_PREC_READ:
4724      {
4725	NodeOptInfo nopt;
4726
4727	r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
4728	if (r == 0) {
4729	  if (nopt.exb.len > 0)
4730	    copy_opt_exact_info(&opt->expr, &nopt.exb);
4731	  else if (nopt.exm.len > 0)
4732	    copy_opt_exact_info(&opt->expr, &nopt.exm);
4733
4734	  opt->expr.reach_end = 0;
4735
4736	  if (nopt.map.value > 0)
4737	    copy_opt_map_info(&opt->map, &nopt.map);
4738	}
4739      }
4740      break;
4741
4742    case ANCHOR_PREC_READ_NOT:
4743    case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
4744    case ANCHOR_LOOK_BEHIND_NOT:
4745      break;
4746    }
4747    break;
4748
4749  case NT_BREF:
4750    {
4751      int i;
4752      int* backs;
4753      OnigDistance min, max, tmin, tmax;
4754      Node** nodes = SCANENV_MEM_NODES(env->scan_env);
4755      BRefNode* br = NBREF(node);
4756
4757      if (br->state & NST_RECURSION) {
4758	set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4759	break;
4760      }
4761      backs = BACKREFS_P(br);
4762      r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
4763      if (r != 0) break;
4764      r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
4765      if (r != 0) break;
4766      for (i = 1; i < br->back_num; i++) {
4767	r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
4768	if (r != 0) break;
4769	r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
4770	if (r != 0) break;
4771	if (min > tmin) min = tmin;
4772	if (max < tmax) max = tmax;
4773      }
4774      if (r == 0) set_mml(&opt->len, min, max);
4775    }
4776    break;
4777
4778#ifdef USE_SUBEXP_CALL
4779  case NT_CALL:
4780    if (IS_CALL_RECURSION(NCALL(node)))
4781      set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4782    else {
4783      OnigOptionType save = env->options;
4784      env->options = NENCLOSE(NCALL(node)->target)->option;
4785      r = optimize_node_left(NCALL(node)->target, opt, env);
4786      env->options = save;
4787    }
4788    break;
4789#endif
4790
4791  case NT_QTFR:
4792    {
4793      int i;
4794      OnigDistance min, max;
4795      NodeOptInfo nopt;
4796      QtfrNode* qn = NQTFR(node);
4797
4798      r = optimize_node_left(qn->target, &nopt, env);
4799      if (r) break;
4800
4801      if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
4802	if (env->mmd.max == 0 &&
4803	    NTYPE(qn->target) == NT_CANY && qn->greedy) {
4804	  if (IS_MULTILINE(env->options))
4805	    add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
4806	  else
4807	    add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
4808	}
4809      }
4810      else {
4811	if (qn->lower > 0) {
4812	  copy_node_opt_info(opt, &nopt);
4813	  if (nopt.exb.len > 0) {
4814	    if (nopt.exb.reach_end) {
4815	      for (i = 2; i <= qn->lower &&
4816		          ! is_full_opt_exact_info(&opt->exb); i++) {
4817		concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
4818	      }
4819	      if (i < qn->lower) {
4820		opt->exb.reach_end = 0;
4821	      }
4822	    }
4823	  }
4824
4825	  if (qn->lower != qn->upper) {
4826	    opt->exb.reach_end = 0;
4827	    opt->exm.reach_end = 0;
4828	  }
4829	  if (qn->lower > 1)
4830	    opt->exm.reach_end = 0;
4831	}
4832      }
4833
4834      min = distance_multiply(nopt.len.min, qn->lower);
4835      if (IS_REPEAT_INFINITE(qn->upper))
4836	max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
4837      else
4838	max = distance_multiply(nopt.len.max, qn->upper);
4839
4840      set_mml(&opt->len, min, max);
4841    }
4842    break;
4843
4844  case NT_ENCLOSE:
4845    {
4846      EncloseNode* en = NENCLOSE(node);
4847
4848      switch (en->type) {
4849      case ENCLOSE_OPTION:
4850	{
4851	  OnigOptionType save = env->options;
4852
4853	  env->options = en->option;
4854	  r = optimize_node_left(en->target, opt, env);
4855	  env->options = save;
4856	}
4857	break;
4858
4859      case ENCLOSE_MEMORY:
4860#ifdef USE_SUBEXP_CALL
4861	en->opt_count++;
4862	if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
4863	  OnigDistance min, max;
4864
4865	  min = 0;
4866	  max = ONIG_INFINITE_DISTANCE;
4867	  if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
4868	  if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
4869	  set_mml(&opt->len, min, max);
4870	}
4871	else
4872#endif
4873	{
4874	  r = optimize_node_left(en->target, opt, env);
4875
4876	  if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
4877	    if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
4878	      remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
4879	  }
4880	}
4881	break;
4882
4883      case ENCLOSE_STOP_BACKTRACK:
4884	r = optimize_node_left(en->target, opt, env);
4885	break;
4886      }
4887    }
4888    break;
4889
4890  default:
4891#ifdef ONIG_DEBUG
4892    fprintf(stderr, "optimize_node_left: undefined node type %d\n",
4893	    NTYPE(node));
4894#endif
4895    r = ONIGERR_TYPE_BUG;
4896    break;
4897  }
4898
4899  return r;
4900}
4901
4902static int
4903set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
4904{
4905  int r;
4906
4907  if (e->len == 0) return 0;
4908
4909  if (e->ignore_case) {
4910    reg->exact = (UChar* )xmalloc(e->len);
4911    CHECK_NULL_RETURN_MEMERR(reg->exact);
4912    xmemcpy(reg->exact, e->s, e->len);
4913    reg->exact_end = reg->exact + e->len;
4914    reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
4915  }
4916  else {
4917    int allow_reverse;
4918
4919    reg->exact = str_dup(e->s, e->s + e->len);
4920    CHECK_NULL_RETURN_MEMERR(reg->exact);
4921    reg->exact_end = reg->exact + e->len;
4922
4923    allow_reverse =
4924	ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
4925
4926    if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
4927      r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
4928	              reg->map, &(reg->int_map));
4929      if (r) return r;
4930
4931      reg->optimize = (allow_reverse != 0
4932		       ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
4933    }
4934    else {
4935      reg->optimize = ONIG_OPTIMIZE_EXACT;
4936    }
4937  }
4938
4939  reg->dmin = e->mmd.min;
4940  reg->dmax = e->mmd.max;
4941
4942  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4943    reg->threshold_len = reg->dmin + (int)(reg->exact_end - reg->exact);
4944  }
4945
4946  return 0;
4947}
4948
4949static void
4950set_optimize_map_info(regex_t* reg, OptMapInfo* m)
4951{
4952  int i;
4953
4954  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4955    reg->map[i] = m->map[i];
4956
4957  reg->optimize   = ONIG_OPTIMIZE_MAP;
4958  reg->dmin       = m->mmd.min;
4959  reg->dmax       = m->mmd.max;
4960
4961  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4962    reg->threshold_len = reg->dmin + 1;
4963  }
4964}
4965
4966static void
4967set_sub_anchor(regex_t* reg, OptAncInfo* anc)
4968{
4969  reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE;
4970  reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
4971}
4972
4973#ifdef ONIG_DEBUG
4974static void print_optimize_info(FILE* f, regex_t* reg);
4975#endif
4976
4977static int
4978set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
4979{
4980
4981  int r;
4982  NodeOptInfo opt;
4983  OptEnv env;
4984
4985  env.enc            = reg->enc;
4986  env.options        = reg->options;
4987  env.case_fold_flag = reg->case_fold_flag;
4988  env.scan_env   = scan_env;
4989  clear_mml(&env.mmd);
4990
4991  r = optimize_node_left(node, &opt, &env);
4992  if (r) return r;
4993
4994  reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
4995        ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
4996
4997  reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
4998
4999  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5000    reg->anchor_dmin = opt.len.min;
5001    reg->anchor_dmax = opt.len.max;
5002  }
5003
5004  if (opt.exb.len > 0 || opt.exm.len > 0) {
5005    select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5006    if (opt.map.value > 0 &&
5007	comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5008      goto set_map;
5009    }
5010    else {
5011      r = set_optimize_exact_info(reg, &opt.exb);
5012      set_sub_anchor(reg, &opt.exb.anc);
5013    }
5014  }
5015  else if (opt.map.value > 0) {
5016  set_map:
5017    set_optimize_map_info(reg, &opt.map);
5018    set_sub_anchor(reg, &opt.map.anc);
5019  }
5020  else {
5021    reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5022    if (opt.len.max == 0)
5023      reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5024  }
5025
5026#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5027  print_optimize_info(stderr, reg);
5028#endif
5029  return r;
5030}
5031
5032static void
5033clear_optimize_info(regex_t* reg)
5034{
5035  reg->optimize      = ONIG_OPTIMIZE_NONE;
5036  reg->anchor        = 0;
5037  reg->anchor_dmin   = 0;
5038  reg->anchor_dmax   = 0;
5039  reg->sub_anchor    = 0;
5040  reg->exact_end     = (UChar* )NULL;
5041  reg->threshold_len = 0;
5042  if (IS_NOT_NULL(reg->exact)) {
5043    xfree(reg->exact);
5044    reg->exact = (UChar* )NULL;
5045  }
5046}
5047
5048#ifdef ONIG_DEBUG
5049
5050static void print_enc_string(FILE* fp, OnigEncoding enc,
5051			     const UChar *s, const UChar *end)
5052{
5053  fprintf(fp, "\nPATTERN: /");
5054
5055  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5056    const UChar *p;
5057    OnigCodePoint code;
5058
5059    p = s;
5060    while (p < end) {
5061      code = ONIGENC_MBC_TO_CODE(enc, p, end);
5062      if (code >= 0x80) {
5063	fprintf(fp, " 0x%04x ", (int )code);
5064      }
5065      else {
5066	fputc((int )code, fp);
5067      }
5068
5069      p += enclen(enc, p);
5070    }
5071  }
5072  else {
5073    while (s < end) {
5074      fputc((int )*s, fp);
5075      s++;
5076    }
5077  }
5078
5079  fprintf(fp, "/\n");
5080}
5081
5082static void
5083print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5084{
5085  if (a == ONIG_INFINITE_DISTANCE)
5086    fputs("inf", f);
5087  else
5088    fprintf(f, "(%u)", a);
5089
5090  fputs("-", f);
5091
5092  if (b == ONIG_INFINITE_DISTANCE)
5093    fputs("inf", f);
5094  else
5095    fprintf(f, "(%u)", b);
5096}
5097
5098static void
5099print_anchor(FILE* f, int anchor)
5100{
5101  int q = 0;
5102
5103  fprintf(f, "[");
5104
5105  if (anchor & ANCHOR_BEGIN_BUF) {
5106    fprintf(f, "begin-buf");
5107    q = 1;
5108  }
5109  if (anchor & ANCHOR_BEGIN_LINE) {
5110    if (q) fprintf(f, ", ");
5111    q = 1;
5112    fprintf(f, "begin-line");
5113  }
5114  if (anchor & ANCHOR_BEGIN_POSITION) {
5115    if (q) fprintf(f, ", ");
5116    q = 1;
5117    fprintf(f, "begin-pos");
5118  }
5119  if (anchor & ANCHOR_END_BUF) {
5120    if (q) fprintf(f, ", ");
5121    q = 1;
5122    fprintf(f, "end-buf");
5123  }
5124  if (anchor & ANCHOR_SEMI_END_BUF) {
5125    if (q) fprintf(f, ", ");
5126    q = 1;
5127    fprintf(f, "semi-end-buf");
5128  }
5129  if (anchor & ANCHOR_END_LINE) {
5130    if (q) fprintf(f, ", ");
5131    q = 1;
5132    fprintf(f, "end-line");
5133  }
5134  if (anchor & ANCHOR_ANYCHAR_STAR) {
5135    if (q) fprintf(f, ", ");
5136    q = 1;
5137    fprintf(f, "anychar-star");
5138  }
5139  if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5140    if (q) fprintf(f, ", ");
5141    fprintf(f, "anychar-star-pl");
5142  }
5143
5144  fprintf(f, "]");
5145}
5146
5147static void
5148print_optimize_info(FILE* f, regex_t* reg)
5149{
5150  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5151                              "EXACT_IC", "MAP" };
5152
5153  fprintf(f, "optimize: %s\n", on[reg->optimize]);
5154  fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
5155  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5156    print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5157  fprintf(f, "\n");
5158
5159  if (reg->optimize) {
5160    fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
5161    fprintf(f, "\n");
5162  }
5163  fprintf(f, "\n");
5164
5165  if (reg->exact) {
5166    UChar *p;
5167    fprintf(f, "exact: [");
5168    for (p = reg->exact; p < reg->exact_end; p++) {
5169      fputc(*p, f);
5170    }
5171    fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
5172  }
5173  else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5174    int c, i, n = 0;
5175
5176    for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5177      if (reg->map[i]) n++;
5178
5179    fprintf(f, "map: n=%d\n", n);
5180    if (n > 0) {
5181      c = 0;
5182      fputc('[', f);
5183      for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5184	if (reg->map[i] != 0) {
5185          if (c > 0)  fputs(", ", f);
5186          c++;
5187          if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5188              ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5189            fputc(i, f);
5190          else
5191            fprintf(f, "%d", i);
5192        }
5193      }
5194      fprintf(f, "]\n");
5195    }
5196  }
5197}
5198#endif /* ONIG_DEBUG */
5199
5200
5201extern void
5202onig_free_body(regex_t* reg)
5203{
5204  if (IS_NOT_NULL(reg)) {
5205    if (IS_NOT_NULL(reg->p))                xfree(reg->p);
5206    if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
5207    if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map);
5208    if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
5209    if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
5210    if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
5211
5212#ifdef USE_NAMED_GROUP
5213    onig_names_free(reg);
5214#endif
5215  }
5216}
5217
5218extern void
5219onig_free(regex_t* reg)
5220{
5221  if (IS_NOT_NULL(reg)) {
5222    onig_free_body(reg);
5223    xfree(reg);
5224  }
5225}
5226
5227#define REGEX_TRANSFER(to,from) do {\
5228  (to)->state = ONIG_STATE_MODIFY;\
5229  onig_free_body(to);\
5230  xmemcpy(to, from, sizeof(regex_t));\
5231  xfree(from);\
5232} while (0)
5233
5234extern void
5235onig_transfer(regex_t* to, regex_t* from)
5236{
5237  THREAD_ATOMIC_START;
5238  REGEX_TRANSFER(to, from);
5239  THREAD_ATOMIC_END;
5240}
5241
5242#define REGEX_CHAIN_HEAD(reg) do {\
5243  while (IS_NOT_NULL((reg)->chain)) {\
5244    (reg) = (reg)->chain;\
5245  }\
5246} while (0)
5247
5248extern void
5249onig_chain_link_add(regex_t* to, regex_t* add)
5250{
5251  THREAD_ATOMIC_START;
5252  REGEX_CHAIN_HEAD(to);
5253  to->chain = add;
5254  THREAD_ATOMIC_END;
5255}
5256
5257extern void
5258onig_chain_reduce(regex_t* reg)
5259{
5260  regex_t *head, *prev;
5261
5262  prev = reg;
5263  head = prev->chain;
5264  if (IS_NOT_NULL(head)) {
5265    reg->state = ONIG_STATE_MODIFY;
5266    while (IS_NOT_NULL(head->chain)) {
5267      prev = head;
5268      head = head->chain;
5269    }
5270    prev->chain = (regex_t* )NULL;
5271    REGEX_TRANSFER(reg, head);
5272  }
5273}
5274
5275#ifdef ONIG_DEBUG
5276static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
5277#endif
5278#ifdef ONIG_DEBUG_PARSE_TREE
5279static void print_tree P_((FILE* f, Node* node));
5280#endif
5281
5282extern int
5283onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5284	      OnigErrorInfo* einfo)
5285{
5286#define COMPILE_INIT_SIZE  20
5287
5288  int r, init_size;
5289  Node*  root;
5290  ScanEnv  scan_env;
5291#ifdef USE_SUBEXP_CALL
5292  UnsetAddrList  uslist;
5293#endif
5294
5295  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5296
5297  reg->state = ONIG_STATE_COMPILING;
5298
5299#ifdef ONIG_DEBUG
5300  print_enc_string(stderr, reg->enc, pattern, pattern_end);
5301#endif
5302
5303  if (reg->alloc == 0) {
5304    init_size = ((int)(pattern_end - pattern)) * 2;
5305    if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5306    r = BBUF_INIT(reg, init_size);
5307    if (r != 0) goto end;
5308  }
5309  else
5310    reg->used = 0;
5311
5312  reg->num_mem            = 0;
5313  reg->num_repeat         = 0;
5314  reg->num_null_check     = 0;
5315  reg->repeat_range_alloc = 0;
5316  reg->repeat_range       = (OnigRepeatRange* )NULL;
5317#ifdef USE_COMBINATION_EXPLOSION_CHECK
5318  reg->num_comb_exp_check = 0;
5319#endif
5320
5321  r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5322  if (r != 0 || root == NULL) goto err;
5323
5324#ifdef USE_NAMED_GROUP
5325  /* mixed use named group and no-named group */
5326  if (scan_env.num_named > 0 &&
5327      IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5328      !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5329    if (scan_env.num_named != scan_env.num_mem)
5330      r = disable_noname_group_capture(&root, reg, &scan_env);
5331    else
5332      r = numbered_ref_check(root);
5333
5334    if (r != 0) goto err;
5335  }
5336#endif
5337
5338#ifdef USE_SUBEXP_CALL
5339  if (scan_env.num_call > 0) {
5340    r = unset_addr_list_init(&uslist, scan_env.num_call);
5341    if (r != 0) goto err;
5342    scan_env.unset_addr_list = &uslist;
5343    r = setup_subexp_call(root, &scan_env);
5344    if (r != 0) goto err_unset;
5345    r = subexp_recursive_check_trav(root, &scan_env);
5346    if (r  < 0) goto err_unset;
5347    r = subexp_inf_recursive_check_trav(root, &scan_env);
5348    if (r != 0) goto err_unset;
5349
5350    reg->num_call = scan_env.num_call;
5351  }
5352  else
5353    reg->num_call = 0;
5354#endif
5355
5356  r = setup_tree(root, reg, 0, &scan_env);
5357  if (r != 0) goto err_unset;
5358
5359#ifdef ONIG_DEBUG_PARSE_TREE
5360  print_tree(stderr, root);
5361#endif
5362
5363  reg->capture_history  = scan_env.capture_history;
5364  reg->bt_mem_start     = scan_env.bt_mem_start;
5365  reg->bt_mem_start    |= reg->capture_history;
5366  if (IS_FIND_CONDITION(reg->options))
5367    BIT_STATUS_ON_ALL(reg->bt_mem_end);
5368  else {
5369    reg->bt_mem_end  = scan_env.bt_mem_end;
5370    reg->bt_mem_end |= reg->capture_history;
5371  }
5372
5373#ifdef USE_COMBINATION_EXPLOSION_CHECK
5374  if (scan_env.backrefed_mem == 0
5375#ifdef USE_SUBEXP_CALL
5376      || scan_env.num_call == 0
5377#endif
5378      ) {
5379    setup_comb_exp_check(root, 0, &scan_env);
5380#ifdef USE_SUBEXP_CALL
5381    if (scan_env.has_recursion != 0) {
5382      scan_env.num_comb_exp_check = 0;
5383    }
5384    else
5385#endif
5386    if (scan_env.comb_exp_max_regnum > 0) {
5387      int i;
5388      for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5389	if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5390	  scan_env.num_comb_exp_check = 0;
5391	  break;
5392	}
5393      }
5394    }
5395  }
5396
5397  reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5398#endif
5399
5400  clear_optimize_info(reg);
5401#ifndef ONIG_DONT_OPTIMIZE
5402  r = set_optimize_info_from_tree(root, reg, &scan_env);
5403  if (r != 0) goto err_unset;
5404#endif
5405
5406  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5407    xfree(scan_env.mem_nodes_dynamic);
5408    scan_env.mem_nodes_dynamic = (Node** )NULL;
5409  }
5410
5411  r = compile_tree(root, reg);
5412  if (r == 0) {
5413    r = add_opcode(reg, OP_END);
5414#ifdef USE_SUBEXP_CALL
5415    if (scan_env.num_call > 0) {
5416      r = unset_addr_list_fix(&uslist, reg);
5417      unset_addr_list_end(&uslist);
5418      if (r) goto err;
5419    }
5420#endif
5421
5422    if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5423      reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5424    else {
5425      if (reg->bt_mem_start != 0)
5426	reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5427      else
5428	reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5429    }
5430  }
5431#ifdef USE_SUBEXP_CALL
5432  else if (scan_env.num_call > 0) {
5433    unset_addr_list_end(&uslist);
5434  }
5435#endif
5436  onig_node_free(root);
5437
5438#ifdef ONIG_DEBUG_COMPILE
5439#ifdef USE_NAMED_GROUP
5440  onig_print_names(stderr, reg);
5441#endif
5442  print_compiled_byte_code_list(stderr, reg);
5443#endif
5444
5445 end:
5446  reg->state = ONIG_STATE_NORMAL;
5447  return r;
5448
5449 err_unset:
5450#ifdef USE_SUBEXP_CALL
5451  if (scan_env.num_call > 0) {
5452    unset_addr_list_end(&uslist);
5453  }
5454#endif
5455 err:
5456  if (IS_NOT_NULL(scan_env.error)) {
5457    if (IS_NOT_NULL(einfo)) {
5458      einfo->enc     = scan_env.enc;
5459      einfo->par     = scan_env.error;
5460      einfo->par_end = scan_env.error_end;
5461    }
5462  }
5463
5464  onig_node_free(root);
5465  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5466      xfree(scan_env.mem_nodes_dynamic);
5467  return r;
5468}
5469
5470#ifdef USE_RECOMPILE_API
5471extern int
5472onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5473	    OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5474	    OnigErrorInfo* einfo)
5475{
5476  int r;
5477  regex_t *new_reg;
5478
5479  r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
5480  if (r) return r;
5481  if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
5482    onig_transfer(reg, new_reg);
5483  }
5484  else {
5485    onig_chain_link_add(reg, new_reg);
5486  }
5487  return 0;
5488}
5489#endif
5490
5491static int onig_inited = 0;
5492
5493extern int
5494onig_reg_init(regex_t* reg, OnigOptionType option,
5495	      OnigCaseFoldType case_fold_flag,
5496	      OnigEncoding enc, OnigSyntaxType* syntax)
5497{
5498  if (! onig_inited)
5499    onig_init();
5500
5501  if (IS_NULL(reg))
5502    return ONIGERR_INVALID_ARGUMENT;
5503
5504  if (ONIGENC_IS_UNDEF(enc))
5505    return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
5506
5507  if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5508      == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5509    return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5510  }
5511
5512  (reg)->state = ONIG_STATE_MODIFY;
5513
5514  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5515    option |= syntax->options;
5516    option &= ~ONIG_OPTION_SINGLELINE;
5517  }
5518  else
5519    option |= syntax->options;
5520
5521  (reg)->enc              = enc;
5522  (reg)->options          = option;
5523  (reg)->syntax           = syntax;
5524  (reg)->optimize         = 0;
5525  (reg)->exact            = (UChar* )NULL;
5526  (reg)->int_map          = (int* )NULL;
5527  (reg)->int_map_backward = (int* )NULL;
5528  (reg)->chain            = (regex_t* )NULL;
5529
5530  (reg)->p                = (UChar* )NULL;
5531  (reg)->alloc            = 0;
5532  (reg)->used             = 0;
5533  (reg)->name_table       = (void* )NULL;
5534
5535  (reg)->case_fold_flag   = case_fold_flag;
5536  return 0;
5537}
5538
5539extern int
5540onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5541          const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5542          OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5543{
5544  int r;
5545
5546  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5547  if (r) return r;
5548
5549  r = onig_compile(reg, pattern, pattern_end, einfo);
5550  return r;
5551}
5552
5553extern int
5554onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5555	  OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5556	  OnigErrorInfo* einfo)
5557{
5558  int r;
5559
5560  *reg = (regex_t* )xmalloc(sizeof(regex_t));
5561  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5562
5563  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5564  if (r) goto err;
5565
5566  r = onig_compile(*reg, pattern, pattern_end, einfo);
5567  if (r) {
5568  err:
5569    onig_free(*reg);
5570    *reg = NULL;
5571  }
5572  return r;
5573}
5574
5575
5576extern int
5577onig_init(void)
5578{
5579  if (onig_inited != 0)
5580    return 0;
5581
5582  THREAD_SYSTEM_INIT;
5583  THREAD_ATOMIC_START;
5584
5585  onig_inited = 1;
5586
5587  onigenc_init();
5588  /* onigenc_set_default_caseconv_table((UChar* )0); */
5589
5590#ifdef ONIG_DEBUG_STATISTICS
5591  onig_statistics_init();
5592#endif
5593
5594  THREAD_ATOMIC_END;
5595  return 0;
5596}
5597
5598
5599static OnigEndCallListItemType* EndCallTop;
5600
5601extern void onig_add_end_call(void (*func)(void))
5602{
5603  OnigEndCallListItemType* item;
5604
5605  item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
5606  if (item == 0) return ;
5607
5608  item->next = EndCallTop;
5609  item->func = func;
5610
5611  EndCallTop = item;
5612}
5613
5614static void
5615exec_end_call_list(void)
5616{
5617  OnigEndCallListItemType* prev;
5618  void (*func)(void);
5619
5620  while (EndCallTop != 0) {
5621    func = EndCallTop->func;
5622    (*func)();
5623
5624    prev = EndCallTop;
5625    EndCallTop = EndCallTop->next;
5626    xfree(prev);
5627  }
5628}
5629
5630extern int
5631onig_end(void)
5632{
5633  THREAD_ATOMIC_START;
5634
5635  exec_end_call_list();
5636
5637#ifdef ONIG_DEBUG_STATISTICS
5638  onig_print_statistics(stderr);
5639#endif
5640
5641#ifdef USE_SHARED_CCLASS_TABLE
5642  onig_free_shared_cclass_table();
5643#endif
5644
5645#ifdef USE_PARSE_TREE_NODE_RECYCLE
5646  onig_free_node_list();
5647#endif
5648
5649  onig_inited = 0;
5650
5651  THREAD_ATOMIC_END;
5652  THREAD_SYSTEM_END;
5653  return 0;
5654}
5655
5656extern int
5657onig_is_in_code_range(const UChar* p, OnigCodePoint code)
5658{
5659  OnigCodePoint n, *data;
5660  OnigCodePoint low, high, x;
5661
5662  GET_CODE_POINT(n, p);
5663  data = (OnigCodePoint* )p;
5664  data++;
5665
5666  for (low = 0, high = n; low < high; ) {
5667    x = (low + high) >> 1;
5668    if (code > data[x * 2 + 1])
5669      low = x + 1;
5670    else
5671      high = x;
5672  }
5673
5674  return ((low < n && code >= data[low * 2]) ? 1 : 0);
5675}
5676
5677extern int
5678onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
5679{
5680  int found;
5681
5682  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
5683    if (IS_NULL(cc->mbuf)) {
5684      found = 0;
5685    }
5686    else {
5687      found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
5688    }
5689  }
5690  else {
5691    found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
5692  }
5693
5694  if (IS_NCCLASS_NOT(cc))
5695    return !found;
5696  else
5697    return found;
5698}
5699
5700extern int
5701onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
5702{
5703  int len;
5704
5705  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5706    len = 2;
5707  }
5708  else {
5709    len = ONIGENC_CODE_TO_MBCLEN(enc, code);
5710  }
5711  return onig_is_code_in_cc_len(len, code, cc);
5712}
5713
5714
5715#ifdef ONIG_DEBUG
5716
5717/* arguments type */
5718#define ARG_SPECIAL     -1
5719#define ARG_NON          0
5720#define ARG_RELADDR      1
5721#define ARG_ABSADDR      2
5722#define ARG_LENGTH       3
5723#define ARG_MEMNUM       4
5724#define ARG_OPTION       5
5725#define ARG_STATE_CHECK  6
5726
5727OnigOpInfoType OnigOpInfo[] = {
5728  { OP_FINISH,            "finish",          ARG_NON },
5729  { OP_END,               "end",             ARG_NON },
5730  { OP_EXACT1,            "exact1",          ARG_SPECIAL },
5731  { OP_EXACT2,            "exact2",          ARG_SPECIAL },
5732  { OP_EXACT3,            "exact3",          ARG_SPECIAL },
5733  { OP_EXACT4,            "exact4",          ARG_SPECIAL },
5734  { OP_EXACT5,            "exact5",          ARG_SPECIAL },
5735  { OP_EXACTN,            "exactn",          ARG_SPECIAL },
5736  { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL },
5737  { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL },
5738  { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL },
5739  { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL },
5740  { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL },
5741  { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL },
5742  { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL },
5743  { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL },
5744  { OP_CCLASS,            "cclass",          ARG_SPECIAL },
5745  { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL },
5746  { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL },
5747  { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },
5748  { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },
5749  { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL },
5750  { OP_CCLASS_NODE,       "cclass-node",     ARG_SPECIAL },
5751  { OP_ANYCHAR,           "anychar",         ARG_NON },
5752  { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },
5753  { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON },
5754  { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
5755  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
5756  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
5757  { OP_WORD,                "word",            ARG_NON },
5758  { OP_NOT_WORD,            "not-word",        ARG_NON },
5759  { OP_WORD_BOUND,          "word-bound",      ARG_NON },
5760  { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
5761  { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
5762  { OP_WORD_END,            "word-end",        ARG_NON },
5763  { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
5764  { OP_END_BUF,             "end-buf",         ARG_NON },
5765  { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
5766  { OP_END_LINE,            "end-line",        ARG_NON },
5767  { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
5768  { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
5769  { OP_BACKREF1,            "backref1",             ARG_NON },
5770  { OP_BACKREF2,            "backref2",             ARG_NON },
5771  { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
5772  { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
5773  { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
5774  { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
5775  { OP_BACKREF_WITH_LEVEL,    "backref_at_level",     ARG_SPECIAL },
5776  { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
5777  { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
5778  { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
5779  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
5780  { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
5781  { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
5782  { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
5783  { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
5784  { OP_FAIL,                "fail",                 ARG_NON },
5785  { OP_JUMP,                "jump",                 ARG_RELADDR },
5786  { OP_PUSH,                "push",                 ARG_RELADDR },
5787  { OP_POP,                 "pop",                  ARG_NON },
5788  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
5789  { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
5790  { OP_REPEAT,              "repeat",               ARG_SPECIAL },
5791  { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
5792  { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
5793  { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
5794  { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
5795  { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
5796  { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
5797  { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
5798  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
5799  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
5800  { OP_PUSH_POS,             "push-pos",             ARG_NON },
5801  { OP_POP_POS,              "pop-pos",              ARG_NON },
5802  { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
5803  { OP_FAIL_POS,             "fail-pos",             ARG_NON },
5804  { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
5805  { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
5806  { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
5807  { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
5808  { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
5809  { OP_CALL,                 "call",                 ARG_ABSADDR },
5810  { OP_RETURN,               "return",               ARG_NON },
5811  { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
5812  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
5813  { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
5814  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
5815  { OP_STATE_CHECK_ANYCHAR_ML_STAR,
5816    "state-check-anychar-ml*", ARG_STATE_CHECK },
5817  { -1, "", ARG_NON }
5818};
5819
5820static char*
5821op2name(int opcode)
5822{
5823  int i;
5824
5825  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5826    if (opcode == OnigOpInfo[i].opcode)
5827      return OnigOpInfo[i].name;
5828  }
5829  return "";
5830}
5831
5832static int
5833op2arg_type(int opcode)
5834{
5835  int i;
5836
5837  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5838    if (opcode == OnigOpInfo[i].opcode)
5839      return OnigOpInfo[i].arg_type;
5840  }
5841  return ARG_SPECIAL;
5842}
5843
5844static void
5845Indent(FILE* f, int indent)
5846{
5847  int i;
5848  for (i = 0; i < indent; i++) putc(' ', f);
5849}
5850
5851static void
5852p_string(FILE* f, int len, UChar* s)
5853{
5854  fputs(":", f);
5855  while (len-- > 0) { fputc(*s++, f); }
5856}
5857
5858static void
5859p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
5860{
5861  int x = len * mb_len;
5862
5863  fprintf(f, ":%d:", len);
5864  while (x-- > 0) { fputc(*s++, f); }
5865}
5866
5867extern void
5868onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
5869                              OnigEncoding enc)
5870{
5871  int i, n, arg_type;
5872  RelAddrType addr;
5873  LengthType len;
5874  MemNumType mem;
5875  StateCheckNumType scn;
5876  OnigCodePoint code;
5877  UChar *q;
5878
5879  fprintf(f, "[%s", op2name(*bp));
5880  arg_type = op2arg_type(*bp);
5881  if (arg_type != ARG_SPECIAL) {
5882    bp++;
5883    switch (arg_type) {
5884    case ARG_NON:
5885      break;
5886    case ARG_RELADDR:
5887      GET_RELADDR_INC(addr, bp);
5888      fprintf(f, ":(%d)", addr);
5889      break;
5890    case ARG_ABSADDR:
5891      GET_ABSADDR_INC(addr, bp);
5892      fprintf(f, ":(%d)", addr);
5893      break;
5894    case ARG_LENGTH:
5895      GET_LENGTH_INC(len, bp);
5896      fprintf(f, ":%d", len);
5897      break;
5898    case ARG_MEMNUM:
5899      mem = *((MemNumType* )bp);
5900      bp += SIZE_MEMNUM;
5901      fprintf(f, ":%d", mem);
5902      break;
5903    case ARG_OPTION:
5904      {
5905	OnigOptionType option = *((OnigOptionType* )bp);
5906	bp += SIZE_OPTION;
5907	fprintf(f, ":%d", option);
5908      }
5909      break;
5910
5911    case ARG_STATE_CHECK:
5912      scn = *((StateCheckNumType* )bp);
5913      bp += SIZE_STATE_CHECK_NUM;
5914      fprintf(f, ":%d", scn);
5915      break;
5916    }
5917  }
5918  else {
5919    switch (*bp++) {
5920    case OP_EXACT1:
5921    case OP_ANYCHAR_STAR_PEEK_NEXT:
5922    case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
5923      p_string(f, 1, bp++); break;
5924    case OP_EXACT2:
5925      p_string(f, 2, bp); bp += 2; break;
5926    case OP_EXACT3:
5927      p_string(f, 3, bp); bp += 3; break;
5928    case OP_EXACT4:
5929      p_string(f, 4, bp); bp += 4; break;
5930    case OP_EXACT5:
5931      p_string(f, 5, bp); bp += 5; break;
5932    case OP_EXACTN:
5933      GET_LENGTH_INC(len, bp);
5934      p_len_string(f, len, 1, bp);
5935      bp += len;
5936      break;
5937
5938    case OP_EXACTMB2N1:
5939      p_string(f, 2, bp); bp += 2; break;
5940    case OP_EXACTMB2N2:
5941      p_string(f, 4, bp); bp += 4; break;
5942    case OP_EXACTMB2N3:
5943      p_string(f, 6, bp); bp += 6; break;
5944    case OP_EXACTMB2N:
5945      GET_LENGTH_INC(len, bp);
5946      p_len_string(f, len, 2, bp);
5947      bp += len * 2;
5948      break;
5949    case OP_EXACTMB3N:
5950      GET_LENGTH_INC(len, bp);
5951      p_len_string(f, len, 3, bp);
5952      bp += len * 3;
5953      break;
5954    case OP_EXACTMBN:
5955      {
5956	int mb_len;
5957
5958	GET_LENGTH_INC(mb_len, bp);
5959	GET_LENGTH_INC(len, bp);
5960	fprintf(f, ":%d:%d:", mb_len, len);
5961	n = len * mb_len;
5962	while (n-- > 0) { fputc(*bp++, f); }
5963      }
5964      break;
5965
5966    case OP_EXACT1_IC:
5967      len = enclen(enc, bp);
5968      p_string(f, len, bp);
5969      bp += len;
5970      break;
5971    case OP_EXACTN_IC:
5972      GET_LENGTH_INC(len, bp);
5973      p_len_string(f, len, 1, bp);
5974      bp += len;
5975      break;
5976
5977    case OP_CCLASS:
5978      n = bitset_on_num((BitSetRef )bp);
5979      bp += SIZE_BITSET;
5980      fprintf(f, ":%d", n);
5981      break;
5982
5983    case OP_CCLASS_NOT:
5984      n = bitset_on_num((BitSetRef )bp);
5985      bp += SIZE_BITSET;
5986      fprintf(f, ":%d", n);
5987      break;
5988
5989    case OP_CCLASS_MB:
5990    case OP_CCLASS_MB_NOT:
5991      GET_LENGTH_INC(len, bp);
5992      q = bp;
5993#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
5994      ALIGNMENT_RIGHT(q);
5995#endif
5996      GET_CODE_POINT(code, q);
5997      bp += len;
5998      fprintf(f, ":%d:%d", (int )code, len);
5999      break;
6000
6001    case OP_CCLASS_MIX:
6002    case OP_CCLASS_MIX_NOT:
6003      n = bitset_on_num((BitSetRef )bp);
6004      bp += SIZE_BITSET;
6005      GET_LENGTH_INC(len, bp);
6006      q = bp;
6007#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6008      ALIGNMENT_RIGHT(q);
6009#endif
6010      GET_CODE_POINT(code, q);
6011      bp += len;
6012      fprintf(f, ":%d:%d:%d", n, (int )code, len);
6013      break;
6014
6015    case OP_CCLASS_NODE:
6016      {
6017        CClassNode *cc;
6018
6019        GET_POINTER_INC(cc, bp);
6020        n = bitset_on_num(cc->bs);
6021        fprintf(f, ":%u:%d", (unsigned int )cc, n);
6022      }
6023      break;
6024
6025    case OP_BACKREFN_IC:
6026      mem = *((MemNumType* )bp);
6027      bp += SIZE_MEMNUM;
6028      fprintf(f, ":%d", mem);
6029      break;
6030
6031    case OP_BACKREF_MULTI_IC:
6032    case OP_BACKREF_MULTI:
6033      fputs(" ", f);
6034      GET_LENGTH_INC(len, bp);
6035      for (i = 0; i < len; i++) {
6036	GET_MEMNUM_INC(mem, bp);
6037	if (i > 0) fputs(", ", f);
6038	fprintf(f, "%d", mem);
6039      }
6040      break;
6041
6042    case OP_BACKREF_WITH_LEVEL:
6043      {
6044	OnigOptionType option;
6045	LengthType level;
6046
6047	GET_OPTION_INC(option, bp);
6048	fprintf(f, ":%d", option);
6049	GET_LENGTH_INC(level, bp);
6050	fprintf(f, ":%d", level);
6051
6052	fputs(" ", f);
6053	GET_LENGTH_INC(len, bp);
6054	for (i = 0; i < len; i++) {
6055	  GET_MEMNUM_INC(mem, bp);
6056	  if (i > 0) fputs(", ", f);
6057	  fprintf(f, "%d", mem);
6058	}
6059      }
6060      break;
6061
6062    case OP_REPEAT:
6063    case OP_REPEAT_NG:
6064      {
6065	mem = *((MemNumType* )bp);
6066	bp += SIZE_MEMNUM;
6067	addr = *((RelAddrType* )bp);
6068	bp += SIZE_RELADDR;
6069	fprintf(f, ":%d:%d", mem, addr);
6070      }
6071      break;
6072
6073    case OP_PUSH_OR_JUMP_EXACT1:
6074    case OP_PUSH_IF_PEEK_NEXT:
6075      addr = *((RelAddrType* )bp);
6076      bp += SIZE_RELADDR;
6077      fprintf(f, ":(%d)", addr);
6078      p_string(f, 1, bp);
6079      bp += 1;
6080      break;
6081
6082    case OP_LOOK_BEHIND:
6083      GET_LENGTH_INC(len, bp);
6084      fprintf(f, ":%d", len);
6085      break;
6086
6087    case OP_PUSH_LOOK_BEHIND_NOT:
6088      GET_RELADDR_INC(addr, bp);
6089      GET_LENGTH_INC(len, bp);
6090      fprintf(f, ":%d:(%d)", len, addr);
6091      break;
6092
6093    case OP_STATE_CHECK_PUSH:
6094    case OP_STATE_CHECK_PUSH_OR_JUMP:
6095      scn = *((StateCheckNumType* )bp);
6096      bp += SIZE_STATE_CHECK_NUM;
6097      addr = *((RelAddrType* )bp);
6098      bp += SIZE_RELADDR;
6099      fprintf(f, ":%d:(%d)", scn, addr);
6100      break;
6101
6102    default:
6103      fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6104	      *--bp);
6105    }
6106  }
6107  fputs("]", f);
6108  if (nextp) *nextp = bp;
6109}
6110
6111static void
6112print_compiled_byte_code_list(FILE* f, regex_t* reg)
6113{
6114  int ncode;
6115  UChar* bp = reg->p;
6116  UChar* end = reg->p + reg->used;
6117
6118  fprintf(f, "code length: %d\n", reg->used);
6119
6120  ncode = 0;
6121  while (bp < end) {
6122    ncode++;
6123    if (bp > reg->p) {
6124      if (ncode % 5 == 0)
6125	fprintf(f, "\n");
6126      else
6127	fputs(" ", f);
6128    }
6129    onig_print_compiled_byte_code(f, bp, &bp, reg->enc);
6130  }
6131
6132  fprintf(f, "\n");
6133}
6134
6135static void
6136print_indent_tree(FILE* f, Node* node, int indent)
6137{
6138  int i, type;
6139  int add = 3;
6140  UChar* p;
6141
6142  Indent(f, indent);
6143  if (IS_NULL(node)) {
6144    fprintf(f, "ERROR: null node!!!\n");
6145    exit (0);
6146  }
6147
6148  type = NTYPE(node);
6149  switch (type) {
6150  case NT_LIST:
6151  case NT_ALT:
6152    if (NTYPE(node) == NT_LIST)
6153      fprintf(f, "<list:%x>\n", (int )node);
6154    else
6155      fprintf(f, "<alt:%x>\n", (int )node);
6156
6157    print_indent_tree(f, NCAR(node), indent + add);
6158    while (IS_NOT_NULL(node = NCDR(node))) {
6159      if (NTYPE(node) != type) {
6160	fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6161	exit(0);
6162      }
6163      print_indent_tree(f, NCAR(node), indent + add);
6164    }
6165    break;
6166
6167  case NT_STR:
6168    fprintf(f, "<string%s:%x>",
6169	    (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
6170    for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6171      if (*p >= 0x20 && *p < 0x7f)
6172	fputc(*p, f);
6173      else {
6174	fprintf(f, " 0x%02x", *p);
6175      }
6176    }
6177    break;
6178
6179  case NT_CCLASS:
6180    fprintf(f, "<cclass:%x>", (int )node);
6181    if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
6182    if (NCCLASS(node)->mbuf) {
6183      BBuf* bbuf = NCCLASS(node)->mbuf;
6184      for (i = 0; i < bbuf->used; i++) {
6185	if (i > 0) fprintf(f, ",");
6186	fprintf(f, "%0x", bbuf->p[i]);
6187      }
6188    }
6189    break;
6190
6191  case NT_CTYPE:
6192    fprintf(f, "<ctype:%x> ", (int )node);
6193    switch (NCTYPE(node)->ctype) {
6194    case ONIGENC_CTYPE_WORD:
6195      if (NCTYPE(node)->not != 0)
6196	fputs("not word",       f);
6197      else
6198	fputs("word",           f);
6199      break;
6200
6201    default:
6202      fprintf(f, "ERROR: undefined ctype.\n");
6203      exit(0);
6204    }
6205    break;
6206
6207  case NT_CANY:
6208    fprintf(f, "<anychar:%x>", (int )node);
6209    break;
6210
6211  case NT_ANCHOR:
6212    fprintf(f, "<anchor:%x> ", (int )node);
6213    switch (NANCHOR(node)->type) {
6214    case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
6215    case ANCHOR_END_BUF:        fputs("end buf",        f); break;
6216    case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
6217    case ANCHOR_END_LINE:       fputs("end line",       f); break;
6218    case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break;
6219    case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6220
6221    case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break;
6222    case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break;
6223#ifdef USE_WORD_BEGIN_END
6224    case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break;
6225    case ANCHOR_WORD_END:        fputs("word end", f);       break;
6226#endif
6227    case ANCHOR_PREC_READ:       fputs("prec read",      f); break;
6228    case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); break;
6229    case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); break;
6230    case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;
6231
6232    default:
6233      fprintf(f, "ERROR: undefined anchor type.\n");
6234      break;
6235    }
6236    break;
6237
6238  case NT_BREF:
6239    {
6240      int* p;
6241      BRefNode* br = NBREF(node);
6242      p = BACKREFS_P(br);
6243      fprintf(f, "<backref:%x>", (int )node);
6244      for (i = 0; i < br->back_num; i++) {
6245	if (i > 0) fputs(", ", f);
6246	fprintf(f, "%d", p[i]);
6247      }
6248    }
6249    break;
6250
6251#ifdef USE_SUBEXP_CALL
6252  case NT_CALL:
6253    {
6254      CallNode* cn = NCALL(node);
6255      fprintf(f, "<call:%x>", (int )node);
6256      p_string(f, cn->name_end - cn->name, cn->name);
6257    }
6258    break;
6259#endif
6260
6261  case NT_QTFR:
6262    fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
6263	    NQTFR(node)->lower, NQTFR(node)->upper,
6264	    (NQTFR(node)->greedy ? "" : "?"));
6265    print_indent_tree(f, NQTFR(node)->target, indent + add);
6266    break;
6267
6268  case NT_ENCLOSE:
6269    fprintf(f, "<enclose:%x> ", (int )node);
6270    switch (NENCLOSE(node)->type) {
6271    case ENCLOSE_OPTION:
6272      fprintf(f, "option:%d", NENCLOSE(node)->option);
6273      break;
6274    case ENCLOSE_MEMORY:
6275      fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6276      break;
6277    case ENCLOSE_STOP_BACKTRACK:
6278      fprintf(f, "stop-bt");
6279      break;
6280
6281    default:
6282      break;
6283    }
6284    fprintf(f, "\n");
6285    print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6286    break;
6287
6288  default:
6289    fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6290    break;
6291  }
6292
6293  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6294      type != NT_ENCLOSE)
6295    fprintf(f, "\n");
6296  fflush(f);
6297}
6298#endif /* ONIG_DEBUG */
6299
6300#ifdef ONIG_DEBUG_PARSE_TREE
6301static void
6302print_tree(FILE* f, Node* node)
6303{
6304  print_indent_tree(f, node, 0);
6305}
6306#endif
6307