1/* GENERATED SOURCE. DO NOT MODIFY. */ 2// © 2016 and later: Unicode, Inc. and others. 3// License & terms of use: http://www.unicode.org/copyright.html#License 4/* 5 ******************************************************************************* 6 * Copyright (C) 2009-2015, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10package android.icu.dev.test.translit; 11 12import java.util.ArrayList; 13import java.util.List; 14import java.util.regex.Matcher; 15import java.util.regex.Pattern; 16 17import org.junit.Test; 18 19import android.icu.dev.test.TestFmwk; 20import android.icu.impl.UnicodeRegex; 21import android.icu.lang.UCharacter; 22import android.icu.lang.UProperty; 23import android.icu.lang.UProperty.NameChoice; 24import android.icu.text.Transliterator; 25import android.icu.text.UTF16; 26import android.icu.text.UnicodeSet; 27 28/** 29 * @author markdavis 30 */ 31public class RegexUtilitiesTest extends TestFmwk { 32 /** 33 * Check basic construction. 34 */ 35 @Test 36 public void TestConstruction() { 37 String[][] tests = { 38 {"a"}, 39 {"a[a-z]b"}, 40 {"[ba-z]", "[a-z]"}, 41 {"q[ba-z]", "q[a-z]"}, 42 {"[ba-z]q", "[a-z]q"}, 43 {"a\\p{joincontrol}b", "a[\u200C\u200D]b"}, 44 {"a\\P{joincontrol}b", "a[^\u200C\u200D]b"}, 45 {"a[[:whitespace:]&[:Zl:]]b", "a[\\\u2028]b"}, 46 {"a [[:bc=cs:]&[:wspace:]] b", "a [\u00A0\u202F] b"}, 47 }; 48 for (int i = 0; i < tests.length; ++i) { 49 final String source = tests[i][0]; 50 String expected = tests[i].length == 1 ? source : tests[i][1]; 51 String actual = UnicodeRegex.fix(source); 52 assertEquals(source, expected, actual); 53 } 54 } 55 56 Transliterator hex = Transliterator.getInstance("hex"); 57 58 /** 59 * Perform an exhaustive test on all Unicode characters to make sure that the UnicodeSet with each 60 * character works. 61 */ 62 @Test 63 public void TestCharacters() { 64 UnicodeSet requiresQuote = new UnicodeSet("[\\$\\&\\-\\:\\[\\\\\\]\\^\\{\\}[:pattern_whitespace:]]"); 65 boolean skip = TestFmwk.getExhaustiveness() < 10; 66 for (int cp = 0; cp < 0x110000; ++cp) { 67 if (cp > 0xFF && skip && (cp % 37 != 0)) { 68 continue; 69 } 70 String cpString = UTF16.valueOf(cp); 71 String s = requiresQuote.contains(cp) ? "\\" + cpString : cpString; 72 String pattern = null; 73 final String rawPattern = "[" + s + s + "]"; 74 try { 75 pattern = UnicodeRegex.fix(rawPattern); 76 } catch (Exception e) { 77 errln(e.getMessage()); 78 continue; 79 } 80 final String expected = "[" + s + "]"; 81 assertEquals("Doubled character works" + hex.transform(s), expected, pattern); 82 83 // verify that we can create a regex pattern and use as expected 84 String shouldNotMatch = UTF16.valueOf((cp + 1) % 0x110000); 85 checkCharPattern(Pattern.compile(pattern), pattern, cpString, shouldNotMatch); 86 87 // verify that the Pattern.compile works 88 checkCharPattern(UnicodeRegex.compile(rawPattern), pattern, cpString, shouldNotMatch); 89 } 90 } 91 92 /** 93 * Check all integer Unicode properties to make sure they work. 94 */ 95 @Test 96 public void TestUnicodeProperties() { 97 final boolean skip = TestFmwk.getExhaustiveness() < 10; 98 UnicodeSet temp = new UnicodeSet(); 99 for (int propNum = UProperty.INT_START; propNum < UProperty.INT_LIMIT; ++propNum) { 100 if (skip && (propNum % 5 != 0)) { 101 continue; 102 } 103 String propName = UCharacter.getPropertyName(propNum, NameChoice.LONG); 104 final int intPropertyMinValue = UCharacter.getIntPropertyMinValue(propNum); 105 int intPropertyMaxValue = UCharacter.getIntPropertyMaxValue(propNum); 106 if (skip) { // only test first if not exhaustive 107 intPropertyMaxValue = intPropertyMinValue; 108 } 109 for (int valueNum = intPropertyMinValue; valueNum <= intPropertyMaxValue; ++valueNum) { 110 // hack for getting property value name 111 String valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.LONG); 112 if (valueName == null) { 113 valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.SHORT); 114 if (valueName == null) { 115 valueName = Integer.toString(valueNum); 116 } 117 } 118 temp.applyIntPropertyValue(propNum, valueNum); 119 if (temp.size() == 0) { 120 continue; 121 } 122 final String prefix = "a"; 123 final String suffix = "b"; 124 String shouldMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix; 125 temp.complement(); 126 String shouldNotMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix; 127 128 // posix style pattern 129 String rawPattern = prefix + "[:" + propName + "=" + valueName + ":]" + suffix; 130 String rawNegativePattern = prefix + "[:^" + propName + "=" + valueName + ":]" + suffix; 131 checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch); 132 checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch); 133 134 // perl style pattern 135 rawPattern = prefix + "\\p{" + propName + "=" + valueName + "}" + suffix; 136 rawNegativePattern = prefix + "\\P{" + propName + "=" + valueName + "}" + suffix; 137 checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch); 138 checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch); 139 } 140 } 141 } 142 143 @Test 144 public void TestBnf() { 145 UnicodeRegex regex = new UnicodeRegex(); 146 final String[][] tests = { 147 { 148 "c = a wq;\n" + 149 "a = xyz;\n" + 150 "b = a a c;\n" 151 }, 152 { 153 "c = a b;\n" + 154 "a = xyz;\n" + 155 "b = a a c;\n", 156 "Exception" 157 }, 158 { 159 "uri = (?: (scheme) \\:)? (host) (?: \\? (query))? (?: \\u0023 (fragment))?;\n" + 160 "scheme = reserved+;\n" + 161 "host = // reserved+;\n" + 162 "query = [\\=reserved]+;\n" + 163 "fragment = reserved+;\n" + 164 "reserved = [[:ascii:][:sc=grek:]&[:alphabetic:]];\n", 165 "http://\u03B1\u03B2\u03B3?huh=hi#there"}, 166 { 167 "langtagRegex.txt" 168 } 169 }; 170 for (int i = 0; i < tests.length; ++i) { 171 String test = tests[i][0]; 172 final boolean expectException = tests[i].length < 2 ? false : tests[i][1].equals("Exception"); 173 try { 174 String result; 175 if (test.endsWith(".txt")) { 176 java.io.InputStream is = RegexUtilitiesTest.class.getResourceAsStream(test); 177 List lines; 178 try { 179 lines = UnicodeRegex.appendLines(new ArrayList(), is, "UTF-8"); 180 } finally { 181 is.close(); 182 } 183 result = regex.compileBnf(lines); 184 } else { 185 result = regex.compileBnf(test); 186 } 187 if (expectException) { 188 errln("Expected exception for " + test); 189 continue; 190 } 191 result = result.replaceAll("[0-9]+%", ""); // just so we can use the language subtag stuff 192 String resolved = regex.transform(result); 193 logln(resolved); 194 Matcher m = Pattern.compile(resolved, Pattern.COMMENTS).matcher(""); 195 String checks = ""; 196 for (int j = 1; j < tests[i].length; ++j) { 197 String check = tests[i][j]; 198 if (!m.reset(check).matches()) { 199 checks = checks + "Fails " + check + "\n"; 200 } else { 201 for (int k = 1; k <= m.groupCount(); ++k) { 202 checks += "(" + m.group(k) + ")"; 203 } 204 checks += "\n"; 205 } 206 } 207 logln("Result: " + result + "\n" + checks + "\n" + test); 208 } catch (Exception e) { 209 if (!expectException) { 210 errln(e.getClass().getName() + ": " + e.getMessage()); 211 } 212 continue; 213 } 214 } 215 } 216 217 /** 218 * Utility for checking patterns 219 */ 220 private void checkCharPattern(Pattern pat, String matchTitle, String shouldMatch, String shouldNotMatch) { 221 Matcher matcher = pat.matcher(shouldMatch); 222 assertTrue(matchTitle + " and " + shouldMatch, matcher.matches()); 223 matcher.reset(shouldNotMatch); 224 assertFalse(matchTitle + " and " + shouldNotMatch, matcher.matches()); 225 } 226} 227