1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5 *******************************************************************************
6 * Copyright (C) 2009-2015, International Business Machines Corporation and
7 * others. All Rights Reserved.
8 *******************************************************************************
9 */
10package android.icu.dev.test.translit;
11
12import java.util.ArrayList;
13import java.util.List;
14import java.util.regex.Matcher;
15import java.util.regex.Pattern;
16
17import org.junit.Test;
18
19import android.icu.dev.test.TestFmwk;
20import android.icu.impl.UnicodeRegex;
21import android.icu.lang.UCharacter;
22import android.icu.lang.UProperty;
23import android.icu.lang.UProperty.NameChoice;
24import android.icu.text.Transliterator;
25import android.icu.text.UTF16;
26import android.icu.text.UnicodeSet;
27
28/**
29 * @author markdavis
30 */
31public class RegexUtilitiesTest extends TestFmwk {
32    /**
33     * Check basic construction.
34     */
35    @Test
36    public void TestConstruction() {
37        String[][] tests = {
38                {"a"},
39                {"a[a-z]b"},
40                {"[ba-z]", "[a-z]"},
41                {"q[ba-z]", "q[a-z]"},
42                {"[ba-z]q", "[a-z]q"},
43                {"a\\p{joincontrol}b", "a[\u200C\u200D]b"},
44                {"a\\P{joincontrol}b", "a[^\u200C\u200D]b"},
45                {"a[[:whitespace:]&[:Zl:]]b", "a[\\\u2028]b"},
46                {"a [[:bc=cs:]&[:wspace:]] b", "a [\u00A0\u202F] b"},
47        };
48        for (int i = 0; i < tests.length; ++i) {
49            final String source = tests[i][0];
50            String expected = tests[i].length == 1 ? source : tests[i][1];
51            String actual = UnicodeRegex.fix(source);
52            assertEquals(source, expected, actual);
53        }
54    }
55
56    Transliterator hex = Transliterator.getInstance("hex");
57
58    /**
59     * Perform an exhaustive test on all Unicode characters to make sure that the UnicodeSet with each
60     * character works.
61     */
62    @Test
63    public void TestCharacters() {
64        UnicodeSet requiresQuote = new UnicodeSet("[\\$\\&\\-\\:\\[\\\\\\]\\^\\{\\}[:pattern_whitespace:]]");
65        boolean skip = TestFmwk.getExhaustiveness() < 10;
66        for (int cp = 0; cp < 0x110000; ++cp) {
67            if (cp > 0xFF && skip && (cp % 37 != 0)) {
68                continue;
69            }
70            String cpString = UTF16.valueOf(cp);
71            String s = requiresQuote.contains(cp) ? "\\" + cpString : cpString;
72            String pattern = null;
73            final String rawPattern = "[" + s + s + "]";
74            try {
75                pattern = UnicodeRegex.fix(rawPattern);
76            } catch (Exception e) {
77                errln(e.getMessage());
78                continue;
79            }
80            final String expected = "[" + s + "]";
81            assertEquals("Doubled character works" + hex.transform(s), expected, pattern);
82
83            // verify that we can create a regex pattern and use as expected
84            String shouldNotMatch = UTF16.valueOf((cp + 1) % 0x110000);
85            checkCharPattern(Pattern.compile(pattern), pattern, cpString, shouldNotMatch);
86
87            // verify that the Pattern.compile works
88            checkCharPattern(UnicodeRegex.compile(rawPattern), pattern, cpString, shouldNotMatch);
89        }
90    }
91
92    /**
93     * Check all integer Unicode properties to make sure they work.
94     */
95    @Test
96    public void TestUnicodeProperties() {
97        final boolean skip = TestFmwk.getExhaustiveness() < 10;
98        UnicodeSet temp = new UnicodeSet();
99        for (int propNum = UProperty.INT_START; propNum < UProperty.INT_LIMIT; ++propNum) {
100            if (skip && (propNum % 5 != 0)) {
101                continue;
102            }
103            String propName = UCharacter.getPropertyName(propNum, NameChoice.LONG);
104            final int intPropertyMinValue = UCharacter.getIntPropertyMinValue(propNum);
105            int intPropertyMaxValue = UCharacter.getIntPropertyMaxValue(propNum);
106            if (skip) { // only test first if not exhaustive
107                intPropertyMaxValue = intPropertyMinValue;
108            }
109            for (int valueNum = intPropertyMinValue; valueNum <= intPropertyMaxValue; ++valueNum) {
110                // hack for getting property value name
111                String valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.LONG);
112                if (valueName == null) {
113                    valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.SHORT);
114                    if (valueName == null) {
115                        valueName = Integer.toString(valueNum);
116                    }
117                }
118                temp.applyIntPropertyValue(propNum, valueNum);
119                if (temp.size() == 0) {
120                    continue;
121                }
122                final String prefix = "a";
123                final String suffix = "b";
124                String shouldMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix;
125                temp.complement();
126                String shouldNotMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix;
127
128                // posix style pattern
129                String rawPattern = prefix + "[:" + propName + "=" + valueName + ":]" + suffix;
130                String rawNegativePattern = prefix + "[:^" + propName + "=" + valueName + ":]" + suffix;
131                checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch);
132                checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch);
133
134                // perl style pattern
135                rawPattern = prefix + "\\p{" + propName + "=" + valueName + "}" + suffix;
136                rawNegativePattern = prefix + "\\P{" + propName + "=" + valueName + "}" + suffix;
137                checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch);
138                checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch);
139            }
140        }
141    }
142
143    @Test
144    public void TestBnf() {
145        UnicodeRegex regex = new UnicodeRegex();
146        final String[][] tests = {
147                {
148                    "c = a wq;\n" +
149                    "a = xyz;\n" +
150                    "b = a a c;\n"
151                },
152                {
153                    "c = a b;\n" +
154                    "a = xyz;\n" +
155                    "b = a a c;\n",
156                    "Exception"
157                },
158                {
159                    "uri = (?: (scheme) \\:)? (host) (?: \\? (query))? (?: \\u0023 (fragment))?;\n" +
160                    "scheme = reserved+;\n" +
161                    "host = // reserved+;\n" +
162                    "query = [\\=reserved]+;\n" +
163                    "fragment = reserved+;\n" +
164                    "reserved = [[:ascii:][:sc=grek:]&[:alphabetic:]];\n",
165                "http://\u03B1\u03B2\u03B3?huh=hi#there"},
166                {
167                    "langtagRegex.txt"
168                }
169        };
170        for (int i = 0; i < tests.length; ++i) {
171            String test = tests[i][0];
172            final boolean expectException = tests[i].length < 2 ? false : tests[i][1].equals("Exception");
173            try {
174                String result;
175                if (test.endsWith(".txt")) {
176                    java.io.InputStream is = RegexUtilitiesTest.class.getResourceAsStream(test);
177                    List lines;
178                    try {
179                        lines = UnicodeRegex.appendLines(new ArrayList(), is, "UTF-8");
180                    } finally {
181                        is.close();
182                    }
183                    result = regex.compileBnf(lines);
184                } else {
185                    result = regex.compileBnf(test);
186                }
187                if (expectException) {
188                    errln("Expected exception for " + test);
189                    continue;
190                }
191                result = result.replaceAll("[0-9]+%", ""); // just so we can use the language subtag stuff
192                String resolved = regex.transform(result);
193                logln(resolved);
194                Matcher m = Pattern.compile(resolved, Pattern.COMMENTS).matcher("");
195                String checks = "";
196                for (int j = 1; j < tests[i].length; ++j) {
197                    String check = tests[i][j];
198                    if (!m.reset(check).matches()) {
199                        checks = checks + "Fails " + check + "\n";
200                    } else {
201                        for (int k = 1; k <= m.groupCount(); ++k) {
202                            checks += "(" + m.group(k) + ")";
203                        }
204                        checks += "\n";
205                    }
206                }
207                logln("Result: " + result + "\n" + checks + "\n" + test);
208            } catch (Exception e) {
209                if (!expectException) {
210                    errln(e.getClass().getName() + ": " + e.getMessage());
211                }
212                continue;
213            }
214        }
215    }
216
217    /**
218     * Utility for checking patterns
219     */
220    private void checkCharPattern(Pattern pat, String matchTitle, String shouldMatch, String shouldNotMatch) {
221        Matcher matcher = pat.matcher(shouldMatch);
222        assertTrue(matchTitle + " and " + shouldMatch, matcher.matches());
223        matcher.reset(shouldNotMatch);
224        assertFalse(matchTitle + " and " + shouldNotMatch, matcher.matches());
225    }
226}
227