1/**********************************************************************
2  regenc.c -  Oniguruma (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include "regint.h"
33
34OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
35
36extern int
37onigenc_init(void)
38{
39  return 0;
40}
41
42extern OnigEncoding
43onigenc_get_default_encoding(void)
44{
45  return OnigEncDefaultCharEncoding;
46}
47
48extern int
49onigenc_set_default_encoding(OnigEncoding enc)
50{
51  OnigEncDefaultCharEncoding = enc;
52  return 0;
53}
54
55extern UChar*
56onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
57{
58  UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
59  if (p < s) {
60    p += enclen(enc, p);
61  }
62  return p;
63}
64
65extern UChar*
66onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
67				   const UChar* start, const UChar* s, const UChar** prev)
68{
69  UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
70
71  if (p < s) {
72    if (prev) *prev = (const UChar* )p;
73    p += enclen(enc, p);
74  }
75  else {
76    if (prev) *prev = (const UChar* )NULL; /* Sorry */
77  }
78  return p;
79}
80
81extern UChar*
82onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
83{
84  if (s <= start)
85    return (UChar* )NULL;
86
87  return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
88}
89
90extern UChar*
91onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
92{
93  while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
94    if (s <= start)
95      return (UChar* )NULL;
96
97    s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
98  }
99  return (UChar* )s;
100}
101
102extern UChar*
103onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
104{
105  UChar* q = (UChar* )p;
106  while (n-- > 0) {
107    q += ONIGENC_MBC_ENC_LEN(enc, q);
108  }
109  return (q <= end ? q : NULL);
110}
111
112extern int
113onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
114{
115  int n = 0;
116  UChar* q = (UChar* )p;
117
118  while (q < end) {
119    q += ONIGENC_MBC_ENC_LEN(enc, q);
120    n++;
121  }
122  return n;
123}
124
125extern int
126onigenc_strlen_null(OnigEncoding enc, const UChar* s)
127{
128  int n = 0;
129  UChar* p = (UChar* )s;
130
131  while (1) {
132    if (*p == '\0') {
133      UChar* q;
134      int len = ONIGENC_MBC_MINLEN(enc);
135
136      if (len == 1) return n;
137      q = p + 1;
138      while (len > 1) {
139        if (*q != '\0') break;
140        q++;
141        len--;
142      }
143      if (len == 1) return n;
144    }
145    p += ONIGENC_MBC_ENC_LEN(enc, p);
146    n++;
147  }
148}
149
150extern int
151onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
152{
153  UChar* start = (UChar* )s;
154  UChar* p = (UChar* )s;
155
156  while (1) {
157    if (*p == '\0') {
158      UChar* q;
159      int len = ONIGENC_MBC_MINLEN(enc);
160
161      if (len == 1) return (int )(p - start);
162      q = p + 1;
163      while (len > 1) {
164        if (*q != '\0') break;
165        q++;
166        len--;
167      }
168      if (len == 1) return (int )(p - start);
169    }
170    p += ONIGENC_MBC_ENC_LEN(enc, p);
171  }
172}
173
174const UChar OnigEncAsciiToLowerCaseTable[] = {
175  0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
176  0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
177  0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
178  0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
179  0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
180  0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
181  0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
182  0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
183  0100, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
184  0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
185  0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
186  0170, 0171, 0172, 0133, 0134, 0135, 0136, 0137,
187  0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
188  0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
189  0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
190  0170, 0171, 0172, 0173, 0174, 0175, 0176, 0177,
191  0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
192  0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
193  0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
194  0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
195  0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
196  0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
197  0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
198  0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
199  0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
200  0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
201  0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
202  0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
203  0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
204  0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
205  0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
206  0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377,
207};
208
209#ifdef USE_UPPER_CASE_TABLE
210const UChar OnigEncAsciiToUpperCaseTable[256] = {
211  0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
212  0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
213  0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
214  0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
215  0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
216  0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
217  0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
218  0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
219  0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
220  0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
221  0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
222  0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
223  0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
224  0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
225  0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
226  0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
227  0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
228  0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
229  0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
230  0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
231  0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
232  0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
233  0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
234  0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
235  0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
236  0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
237  0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
238  0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
239  0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
240  0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
241  0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
242  0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377,
243};
244#endif
245
246const unsigned short OnigEncAsciiCtypeTable[256] = {
247  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
248  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
249  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
250  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
251  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
252  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
253  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
254  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
255  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
256  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
257  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
258  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
259  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
260  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
261  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
262  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
263  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
264  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
265  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
266  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
267  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
268  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
269  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
270  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
271  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
272  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
273  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
274  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
275  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
276  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
277  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
278  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
279};
280
281const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
282  0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
283  0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
284  0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
285  0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
286  0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
287  0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
288  0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
289  0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
290  0100, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
291  0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
292  0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
293  0170, 0171, 0172, 0133, 0134, 0135, 0136, 0137,
294  0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
295  0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
296  0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
297  0170, 0171, 0172, 0173, 0174, 0175, 0176, 0177,
298  0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
299  0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
300  0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
301  0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
302  0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
303  0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
304  0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
305  0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
306  0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
307  0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
308  0360, 0361, 0362, 0363, 0364, 0365, 0366, 0327,
309  0370, 0371, 0372, 0373, 0374, 0375, 0376, 0337,
310  0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
311  0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
312  0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
313  0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
314};
315
316#ifdef USE_UPPER_CASE_TABLE
317const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
318  0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
319  0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
320  0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
321  0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
322  0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
323  0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
324  0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
325  0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
326  0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
327  0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
328  0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
329  0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
330  0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
331  0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
332  0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
333  0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
334  0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
335  0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
336  0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
337  0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
338  0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
339  0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
340  0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
341  0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
342  0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
343  0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
344  0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
345  0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
346  0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
347  0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
348  0320, 0321, 0322, 0323, 0324, 0325, 0326, 0367,
349  0330, 0331, 0332, 0333, 0334, 0335, 0336, 0377,
350};
351#endif
352
353extern void
354onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
355{
356  /* nothing */
357  /* obsoleted. */
358}
359
360extern UChar*
361onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
362{
363  return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
364}
365
366const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
367  { 0x41, 0x61 },
368  { 0x42, 0x62 },
369  { 0x43, 0x63 },
370  { 0x44, 0x64 },
371  { 0x45, 0x65 },
372  { 0x46, 0x66 },
373  { 0x47, 0x67 },
374  { 0x48, 0x68 },
375  { 0x49, 0x69 },
376  { 0x4a, 0x6a },
377  { 0x4b, 0x6b },
378  { 0x4c, 0x6c },
379  { 0x4d, 0x6d },
380  { 0x4e, 0x6e },
381  { 0x4f, 0x6f },
382  { 0x50, 0x70 },
383  { 0x51, 0x71 },
384  { 0x52, 0x72 },
385  { 0x53, 0x73 },
386  { 0x54, 0x74 },
387  { 0x55, 0x75 },
388  { 0x56, 0x76 },
389  { 0x57, 0x77 },
390  { 0x58, 0x78 },
391  { 0x59, 0x79 },
392  { 0x5a, 0x7a }
393};
394
395extern int
396onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
397				  OnigApplyAllCaseFoldFunc f, void* arg)
398{
399  OnigCodePoint code;
400  int i, r;
401
402  for (i = 0;
403       i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
404       i++) {
405    code = OnigAsciiLowerMap[i].to;
406    r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
407    if (r != 0) return r;
408
409    code = OnigAsciiLowerMap[i].from;
410    r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
411    if (r != 0) return r;
412  }
413
414  return 0;
415}
416
417extern int
418onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
419	 const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
420	 OnigCaseFoldCodeItem items[])
421{
422  if (0x41 <= *p && *p <= 0x5a) {
423    items[0].byte_len = 1;
424    items[0].code_len = 1;
425    items[0].code[0] = (OnigCodePoint )(*p + 0x20);
426    return 1;
427  }
428  else if (0x61 <= *p && *p <= 0x7a) {
429    items[0].byte_len = 1;
430    items[0].code_len = 1;
431    items[0].code[0] = (OnigCodePoint )(*p - 0x20);
432    return 1;
433  }
434  else
435    return 0;
436}
437
438static int
439ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
440		       OnigApplyAllCaseFoldFunc f, void* arg)
441{
442  static OnigCodePoint ss[] = { 0x73, 0x73 };
443
444  return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
445}
446
447extern int
448onigenc_apply_all_case_fold_with_map(int map_size,
449    const OnigPairCaseFoldCodes map[],
450    int ess_tsett_flag, OnigCaseFoldType flag,
451    OnigApplyAllCaseFoldFunc f, void* arg)
452{
453  OnigCodePoint code;
454  int i, r;
455
456  r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
457  if (r != 0) return r;
458
459  for (i = 0; i < map_size; i++) {
460    code = map[i].to;
461    r = (*f)(map[i].from, &code, 1, arg);
462    if (r != 0) return r;
463
464    code = map[i].from;
465    r = (*f)(map[i].to, &code, 1, arg);
466    if (r != 0) return r;
467  }
468
469  if (ess_tsett_flag != 0)
470    return ss_apply_all_case_fold(flag, f, arg);
471
472  return 0;
473}
474
475extern int
476onigenc_get_case_fold_codes_by_str_with_map(int map_size,
477    const OnigPairCaseFoldCodes map[],
478    int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
479    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
480{
481  if (0x41 <= *p && *p <= 0x5a) {
482    items[0].byte_len = 1;
483    items[0].code_len = 1;
484    items[0].code[0] = (OnigCodePoint )(*p + 0x20);
485    if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
486	&& (*(p+1) == 0x53 || *(p+1) == 0x73)) {
487      /* SS */
488      items[1].byte_len = 2;
489      items[1].code_len = 1;
490      items[1].code[0] = (OnigCodePoint )0xdf;
491      return 2;
492    }
493    else
494      return 1;
495  }
496  else if (0x61 <= *p && *p <= 0x7a) {
497    items[0].byte_len = 1;
498    items[0].code_len = 1;
499    items[0].code[0] = (OnigCodePoint )(*p - 0x20);
500    if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
501	&& (*(p+1) == 0x73 || *(p+1) == 0x53)) {
502      /* ss */
503      items[1].byte_len = 2;
504      items[1].code_len = 1;
505      items[1].code[0] = (OnigCodePoint )0xdf;
506      return 2;
507    }
508    else
509      return 1;
510  }
511  else if (*p == 0xdf && ess_tsett_flag != 0) {
512    items[0].byte_len = 1;
513    items[0].code_len = 2;
514    items[0].code[0] = (OnigCodePoint )'s';
515    items[0].code[1] = (OnigCodePoint )'s';
516
517    items[1].byte_len = 1;
518    items[1].code_len = 2;
519    items[1].code[0] = (OnigCodePoint )'S';
520    items[1].code[1] = (OnigCodePoint )'S';
521
522    items[2].byte_len = 1;
523    items[2].code_len = 2;
524    items[2].code[0] = (OnigCodePoint )'s';
525    items[2].code[1] = (OnigCodePoint )'S';
526
527    items[3].byte_len = 1;
528    items[3].code_len = 2;
529    items[3].code[0] = (OnigCodePoint )'S';
530    items[3].code[1] = (OnigCodePoint )'s';
531
532    return 4;
533  }
534  else {
535    int i;
536
537    for (i = 0; i < map_size; i++) {
538      if (*p == map[i].from) {
539	items[0].byte_len = 1;
540	items[0].code_len = 1;
541	items[0].code[0] = map[i].to;
542	return 1;
543      }
544      else if (*p == map[i].to) {
545	items[0].byte_len = 1;
546	items[0].code_len = 1;
547	items[0].code[0] = map[i].from;
548	return 1;
549      }
550    }
551  }
552
553  return 0;
554}
555
556
557extern int
558onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
559	 OnigCodePoint* sb_out ARG_UNUSED,
560	 const OnigCodePoint* ranges[] ARG_UNUSED)
561{
562  return ONIG_NO_SUPPORT_CONFIG;
563}
564
565extern int
566onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
567{
568  if (p < end) {
569    if (*p == 0x0a) return 1;
570  }
571  return 0;
572}
573
574/* for single byte encodings */
575extern int
576onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
577	    const UChar*end ARG_UNUSED, UChar* lower)
578{
579  *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
580
581  (*p)++;
582  return 1; /* return byte length of converted char to lower */
583}
584
585#if 0
586extern int
587onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
588			       const UChar** pp, const UChar* end)
589{
590  const UChar* p = *pp;
591
592  (*pp)++;
593  return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
594}
595#endif
596
597extern int
598onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
599{
600  return 1;
601}
602
603extern OnigCodePoint
604onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
605{
606  return (OnigCodePoint )(*p);
607}
608
609extern int
610onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
611{
612  return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);
613}
614
615extern int
616onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
617{
618  *buf = (UChar )(code & 0xff);
619  return 1;
620}
621
622extern UChar*
623onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
624					  const UChar* s)
625{
626  return (UChar* )s;
627}
628
629extern int
630onigenc_always_true_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
631					     const UChar* end ARG_UNUSED)
632{
633  return TRUE;
634}
635
636extern int
637onigenc_always_false_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
638					      const UChar* end ARG_UNUSED)
639{
640  return FALSE;
641}
642
643extern OnigCodePoint
644onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
645{
646  int c, i, len;
647  OnigCodePoint n;
648
649  len = enclen(enc, p);
650  n = (OnigCodePoint )(*p++);
651  if (len == 1) return n;
652
653  for (i = 1; i < len; i++) {
654    if (p >= end) break;
655    c = *p++;
656    n <<= 8;  n += c;
657  }
658  return n;
659}
660
661extern int
662onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
663                          const UChar** pp, const UChar* end ARG_UNUSED,
664			  UChar* lower)
665{
666  int len;
667  const UChar *p = *pp;
668
669  if (ONIGENC_IS_MBC_ASCII(p)) {
670    *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
671    (*pp)++;
672    return 1;
673  }
674  else {
675    int i;
676
677    len = enclen(enc, p);
678    for (i = 0; i < len; i++) {
679      *lower++ = *p++;
680    }
681    (*pp) += len;
682    return len; /* return byte length of converted to lower char */
683  }
684}
685
686#if 0
687extern int
688onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
689                             const UChar** pp, const UChar* end)
690{
691  const UChar* p = *pp;
692
693  if (ONIGENC_IS_MBC_ASCII(p)) {
694    (*pp)++;
695    return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
696  }
697
698  (*pp) += enclen(enc, p);
699  return FALSE;
700}
701#endif
702
703extern int
704onigenc_mb2_code_to_mbclen(OnigCodePoint code)
705{
706  if ((code & 0xff00) != 0) return 2;
707  else return 1;
708}
709
710extern int
711onigenc_mb4_code_to_mbclen(OnigCodePoint code)
712{
713       if ((code & 0xff000000) != 0) return 4;
714  else if ((code & 0xff0000) != 0) return 3;
715  else if ((code & 0xff00) != 0) return 2;
716  else return 1;
717}
718
719extern int
720onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
721{
722  UChar *p = buf;
723
724  if ((code & 0xff00) != 0) {
725    *p++ = (UChar )((code >>  8) & 0xff);
726  }
727  *p++ = (UChar )(code & 0xff);
728
729#if 1
730  if (enclen(enc, buf) != (p - buf))
731    return ONIGERR_INVALID_CODE_POINT_VALUE;
732#endif
733  return (int)(p - buf);
734}
735
736extern int
737onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
738{
739  UChar *p = buf;
740
741  if ((code & 0xff000000) != 0) {
742    *p++ = (UChar )((code >> 24) & 0xff);
743  }
744  if ((code & 0xff0000) != 0 || p != buf) {
745    *p++ = (UChar )((code >> 16) & 0xff);
746  }
747  if ((code & 0xff00) != 0 || p != buf) {
748    *p++ = (UChar )((code >> 8) & 0xff);
749  }
750  *p++ = (UChar )(code & 0xff);
751
752#if 1
753  if (enclen(enc, buf) != (p - buf))
754    return ONIGERR_INVALID_CODE_POINT_VALUE;
755#endif
756  return (int)(p - buf);
757}
758
759extern int
760onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
761{
762  static PosixBracketEntryType PBS[] = {
763    { (UChar* )"Alnum",  ONIGENC_CTYPE_ALNUM,  5 },
764    { (UChar* )"Alpha",  ONIGENC_CTYPE_ALPHA,  5 },
765    { (UChar* )"Blank",  ONIGENC_CTYPE_BLANK,  5 },
766    { (UChar* )"Cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
767    { (UChar* )"Digit",  ONIGENC_CTYPE_DIGIT,  5 },
768    { (UChar* )"Graph",  ONIGENC_CTYPE_GRAPH,  5 },
769    { (UChar* )"Lower",  ONIGENC_CTYPE_LOWER,  5 },
770    { (UChar* )"Print",  ONIGENC_CTYPE_PRINT,  5 },
771    { (UChar* )"Punct",  ONIGENC_CTYPE_PUNCT,  5 },
772    { (UChar* )"Space",  ONIGENC_CTYPE_SPACE,  5 },
773    { (UChar* )"Upper",  ONIGENC_CTYPE_UPPER,  5 },
774    { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
775    { (UChar* )"ASCII",  ONIGENC_CTYPE_ASCII,  5 },
776    { (UChar* )"Word",   ONIGENC_CTYPE_WORD,   4 },
777    { (UChar* )NULL, -1, 0 }
778  };
779
780  PosixBracketEntryType *pb;
781  int len;
782
783  len = onigenc_strlen(enc, p, end);
784  for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
785    if (len == pb->len &&
786        onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
787      return pb->ctype;
788  }
789
790  return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
791}
792
793extern int
794onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
795			  unsigned int ctype)
796{
797  if (code < 128)
798    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
799  else {
800    if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
801      return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
802    }
803  }
804
805  return FALSE;
806}
807
808extern int
809onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
810			  unsigned int ctype)
811{
812  if (code < 128)
813    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
814  else {
815    if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
816      return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
817    }
818  }
819
820  return FALSE;
821}
822
823extern int
824onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
825                           const UChar* sascii /* ascii */, int n)
826{
827  int x, c;
828
829  while (n-- > 0) {
830    if (p >= end) return (int )(*sascii);
831
832    c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
833    x = *sascii - c;
834    if (x) return x;
835
836    sascii++;
837    p += enclen(enc, p);
838  }
839  return 0;
840}
841
842/* Property management */
843static int
844resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
845{
846  int size;
847  const OnigCodePoint **list = *plist;
848
849  size = sizeof(OnigCodePoint*) * new_size;
850  if (IS_NULL(list)) {
851    list = (const OnigCodePoint** )xmalloc(size);
852  }
853  else {
854    list = (const OnigCodePoint** )xrealloc((void* )list, size, *psize * sizeof(OnigCodePoint*));
855  }
856
857  if (IS_NULL(list)) return ONIGERR_MEMORY;
858
859  *plist = list;
860  *psize = new_size;
861
862  return 0;
863}
864
865extern int
866onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
867     hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
868     int *psize)
869{
870#define PROP_INIT_SIZE     16
871
872  int r;
873
874  if (*psize <= *pnum) {
875    int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
876    r = resize_property_list(new_size, plist, psize);
877    if (r != 0) return r;
878  }
879
880  (*plist)[*pnum] = prop;
881
882  if (ONIG_IS_NULL(*table)) {
883    *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
884    if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
885  }
886
887  *pnum = *pnum + 1;
888  onig_st_insert_strend(*table, name, name + strlen_s((char* )name, MAX_STRING_SIZE),
889			(hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
890  return 0;
891}
892
893extern int
894onigenc_property_list_init(int (*f)(void))
895{
896  int r;
897
898  THREAD_ATOMIC_START;
899
900  r = f();
901
902  THREAD_ATOMIC_END;
903  return r;
904}
905