1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html#License 3/* 4******************************************************************************* 5* Copyright (C) 2001-2013, International Business Machines 6* Corporation and others. All Rights Reserved. 7******************************************************************************* 8*/ 9 10package com.ibm.icu.dev.test.bidi; 11 12import java.util.Arrays; 13 14import com.ibm.icu.dev.test.TestFmwk; 15import com.ibm.icu.impl.Utility; 16import com.ibm.icu.lang.UCharacter; 17import com.ibm.icu.text.Bidi; 18import com.ibm.icu.text.BidiRun; 19import com.ibm.icu.util.VersionInfo; 20 21/** 22 * A base class for the Bidi test suite. 23 * 24 * @author Lina Kemmel, Matitiahu Allouche 25 */ 26 27public class BidiFmwk extends TestFmwk { 28 29 protected static final char[] charFromDirProp = { 30 /* L R EN ES ET AN CS B S WS ON */ 31 0x61, 0x5d0, 0x30, 0x2f, 0x25, 0x660, 0x2c, 0xa, 0x9, 0x20, 0x26, 32 /* LRE LRO AL RLE RLO PDF NSM BN */ 33 0x202a, 0x202d, 0x627, 0x202b, 0x202e, 0x202c, 0x308, 0x200c, 34 /* FSI LRI RLI PDI */ 35 0x2068, 0x2066, 0x2067, 0x2069 /* new in Unicode 6.3/ICU 52 */ 36 }; 37 38 static { 39 initCharFromDirProps(); 40 } 41 42 private static void initCharFromDirProps() { 43 final VersionInfo ucd401 = VersionInfo.getInstance(4, 0, 1, 0); 44 VersionInfo ucdVersion = VersionInfo.getInstance(0, 0, 0, 0); 45 46 /* lazy initialization */ 47 if (ucdVersion.getMajor() > 0) { 48 return; 49 50 } 51 ucdVersion = UCharacter.getUnicodeVersion(); 52 if (ucdVersion.compareTo(ucd401) >= 0) { 53 /* Unicode 4.0.1 changes bidi classes for +-/ */ 54 /* change ES character from / to + */ 55 charFromDirProp[TestData.ES] = 0x2b; 56 } 57 } 58 59 protected boolean assertEquals(String message, String expected, String actual, 60 String src, String mode, String option, 61 String level) { 62 if (expected == null || actual == null) { 63 return super.assertEquals(message, expected, actual); 64 } 65 if (expected.equals(actual)) { 66 return true; 67 } 68 errln(""); 69 errcontln(message); 70 if (src != null) { 71 errcontln("source : \"" + Utility.escape(src) + "\""); 72 } 73 errcontln("expected : \"" + Utility.escape(expected) + "\""); 74 errcontln("actual : \"" + Utility.escape(actual) + "\""); 75 if (mode != null) { 76 errcontln("reordering mode : " + mode); 77 } 78 if (option != null) { 79 errcontln("reordering option : " + option); 80 } 81 if (level != null) { 82 errcontln("paragraph level : " + level); 83 } 84 return false; 85 } 86 87 protected static String valueOf(int[] array) { 88 StringBuffer result = new StringBuffer(array.length * 4); 89 for (int i = 0; i < array.length; i++) { 90 result.append(' '); 91 result.append(array[i]); 92 } 93 return result.toString(); 94 } 95 96 private static final String[] modeDescriptions = { 97 "REORDER_DEFAULT", 98 "REORDER_NUMBERS_SPECIAL", 99 "REORDER_GROUP_NUMBERS_WITH_R", 100 "REORDER_RUNS_ONLY", 101 "REORDER_INVERSE_NUMBERS_AS_L", 102 "REORDER_INVERSE_LIKE_DIRECT", 103 "REORDER_INVERSE_FOR_NUMBERS_SPECIAL" 104 }; 105 106 protected static String modeToString(int mode) { 107 if (mode < Bidi.REORDER_DEFAULT || 108 mode > Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) { 109 return "INVALID"; 110 } 111 return modeDescriptions[mode]; 112 } 113 114 private static final short SETPARA_MASK = Bidi.OPTION_INSERT_MARKS | 115 Bidi.OPTION_REMOVE_CONTROLS | Bidi.OPTION_STREAMING; 116 117 private static final String[] setParaDescriptions = { 118 "OPTION_INSERT_MARKS", 119 "OPTION_REMOVE_CONTROLS", 120 "OPTION_STREAMING" 121 }; 122 123 protected static String spOptionsToString(int option) { 124 return optionToString(option, SETPARA_MASK, setParaDescriptions); 125 } 126 127 private static final int MAX_WRITE_REORDERED_OPTION = Bidi.OUTPUT_REVERSE; 128 private static final int REORDER_MASK = (MAX_WRITE_REORDERED_OPTION << 1) - 1; 129 130 private static final String[] writeReorderedDescriptions = { 131 "KEEP_BASE_COMBINING", // 1 132 "DO_MIRRORING", // 2 133 "INSERT_LRM_FOR_NUMERIC", // 4 134 "REMOVE_BIDI_CONTROLS", // 8 135 "OUTPUT_REVERSE" // 16 136 }; 137 138 public static String wrOptionsToString(int option) { 139 return optionToString(option, REORDER_MASK, writeReorderedDescriptions); 140 } 141 public static String optionToString(int option, int mask, 142 String[] descriptions) { 143 StringBuffer desc = new StringBuffer(50); 144 145 if ((option &= mask) == 0) { 146 return "0"; 147 } 148 desc.setLength(0); 149 150 for (int i = 0; option > 0; i++, option >>= 1) { 151 if ((option & 1) != 0) { 152 if (desc.length() > 0) { 153 desc.append(" | "); 154 } 155 desc.append(descriptions[i]); 156 } 157 } 158 return desc.toString(); 159 } 160 161 static final String columnString = 162 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; 163 static final char[] columns = columnString.toCharArray(); 164 private static final int TABLE_SIZE = 256; 165 private static boolean tablesInitialized = false; 166 private static char[] pseudoToUChar; 167 private static char[] UCharToPseudo; /* used for Unicode chars < 0x0100 */ 168 private static char[] UCharToPseud2; /* used for Unicode chars >=0x0100 */ 169 170 static void buildPseudoTables() 171 /* 172 The rules for pseudo-Bidi are as follows: 173 - [ == LRE 174 - ] == RLE 175 - { == LRO 176 - } == RLO 177 - ^ == PDF 178 - @ == LRM 179 - & == RLM 180 - A-F == Arabic Letters 0631-0636 181 - G-V == Hebrew letters 05d7-05ea 182 - W-Z == Unassigned RTL 08d0-08d3 183 - 0-5 == western digits 0030-0035 184 - 6-9 == Arabic-Indic digits 0666-0669 185 - ` == Combining Grave Accent 0300 (NSM) 186 - ~ == Delete 007f (BN) 187 - | == Paragraph Separator 2029 (B) 188 - _ == Info Separator 1 001f (S) 189 All other characters represent themselves as Latin-1, with the corresponding 190 Bidi properties. 191 */ 192 { 193 int i; 194 char uchar; 195 char c; 196 197 /* initialize all tables to unknown */ 198 pseudoToUChar = new char[TABLE_SIZE]; 199 UCharToPseudo = new char[TABLE_SIZE]; 200 UCharToPseud2 = new char[TABLE_SIZE]; 201 for (i = 0; i < TABLE_SIZE; i++) { 202 pseudoToUChar[i] = 0xFFFD; 203 UCharToPseudo[i] = '?'; 204 UCharToPseud2[i] = '?'; 205 } 206 /* initialize non letters or digits */ 207 pseudoToUChar[ 0 ] = 0x0000; UCharToPseudo[0x00] = 0 ; 208 pseudoToUChar[' '] = 0x0020; UCharToPseudo[0x20] = ' '; 209 pseudoToUChar['!'] = 0x0021; UCharToPseudo[0x21] = '!'; 210 pseudoToUChar['"'] = 0x0022; UCharToPseudo[0x22] = '"'; 211 pseudoToUChar['#'] = 0x0023; UCharToPseudo[0x23] = '#'; 212 pseudoToUChar['$'] = 0x0024; UCharToPseudo[0x24] = '$'; 213 pseudoToUChar['%'] = 0x0025; UCharToPseudo[0x25] = '%'; 214 pseudoToUChar['\'']= 0x0027; UCharToPseudo[0x27] = '\''; 215 pseudoToUChar['('] = 0x0028; UCharToPseudo[0x28] = '('; 216 pseudoToUChar[')'] = 0x0029; UCharToPseudo[0x29] = ')'; 217 pseudoToUChar['*'] = 0x002A; UCharToPseudo[0x2A] = '*'; 218 pseudoToUChar['+'] = 0x002B; UCharToPseudo[0x2B] = '+'; 219 pseudoToUChar[','] = 0x002C; UCharToPseudo[0x2C] = ','; 220 pseudoToUChar['-'] = 0x002D; UCharToPseudo[0x2D] = '-'; 221 pseudoToUChar['.'] = 0x002E; UCharToPseudo[0x2E] = '.'; 222 pseudoToUChar['/'] = 0x002F; UCharToPseudo[0x2F] = '/'; 223 pseudoToUChar[':'] = 0x003A; UCharToPseudo[0x3A] = ':'; 224 pseudoToUChar[';'] = 0x003B; UCharToPseudo[0x3B] = ';'; 225 pseudoToUChar['<'] = 0x003C; UCharToPseudo[0x3C] = '<'; 226 pseudoToUChar['='] = 0x003D; UCharToPseudo[0x3D] = '='; 227 pseudoToUChar['>'] = 0x003E; UCharToPseudo[0x3E] = '>'; 228 pseudoToUChar['?'] = 0x003F; UCharToPseudo[0x3F] = '?'; 229 pseudoToUChar['\\']= 0x005C; UCharToPseudo[0x5C] = '\\'; 230 /* initialize specially used characters */ 231 pseudoToUChar['`'] = 0x0300; UCharToPseud2[0x00] = '`'; /* NSM */ 232 pseudoToUChar['@'] = 0x200E; UCharToPseud2[0x0E] = '@'; /* LRM */ 233 pseudoToUChar['&'] = 0x200F; UCharToPseud2[0x0F] = '&'; /* RLM */ 234 pseudoToUChar['_'] = 0x001F; UCharToPseudo[0x1F] = '_'; /* S */ 235 pseudoToUChar['|'] = 0x2029; UCharToPseud2[0x29] = '|'; /* B */ 236 pseudoToUChar['['] = 0x202A; UCharToPseud2[0x2A] = '['; /* LRE */ 237 pseudoToUChar[']'] = 0x202B; UCharToPseud2[0x2B] = ']'; /* RLE */ 238 pseudoToUChar['^'] = 0x202C; UCharToPseud2[0x2C] = '^'; /* PDF */ 239 pseudoToUChar['{'] = 0x202D; UCharToPseud2[0x2D] = '{'; /* LRO */ 240 pseudoToUChar['}'] = 0x202E; UCharToPseud2[0x2E] = '}'; /* RLO */ 241 pseudoToUChar['~'] = 0x007F; UCharToPseudo[0x7F] = '~'; /* BN */ 242 /* initialize western digits */ 243 for (i = 0, uchar = 0x0030; i < 6; i++, uchar++) { 244 c = columns[i]; 245 pseudoToUChar[c] = uchar; 246 UCharToPseudo[uchar & 0x00ff] = c; 247 } 248 /* initialize Hindi digits */ 249 for (i = 6, uchar = 0x0666; i < 10; i++, uchar++) { 250 c = columns[i]; 251 pseudoToUChar[c] = uchar; 252 UCharToPseud2[uchar & 0x00ff] = c; 253 } 254 /* initialize Arabic letters */ 255 for (i = 10, uchar = 0x0631; i < 16; i++, uchar++) { 256 c = columns[i]; 257 pseudoToUChar[c] = uchar; 258 UCharToPseud2[uchar & 0x00ff] = c; 259 } 260 /* initialize Hebrew letters */ 261 for (i = 16, uchar = 0x05D7; i < 32; i++, uchar++) { 262 c = columns[i]; 263 pseudoToUChar[c] = uchar; 264 UCharToPseud2[uchar & 0x00ff] = c; 265 } 266 /* initialize Unassigned code points */ 267 for (i = 32, uchar = 0x08D0; i < 36; i++, uchar++) { 268 c = columns[i]; 269 pseudoToUChar[c] = uchar; 270 UCharToPseud2[uchar & 0x00ff] = c; 271 } 272 /* initialize Latin lower case letters */ 273 for (i = 36, uchar = 0x0061; i < 62; i++, uchar++) { 274 c = columns[i]; 275 pseudoToUChar[c] = uchar; 276 UCharToPseudo[uchar & 0x00ff] = c; 277 } 278 tablesInitialized = true; 279 } 280 281 /*----------------------------------------------------------------------*/ 282 283 static String pseudoToU16(String input) 284 /* This function converts a pseudo-Bidi string into a char string. 285 It returns the char string. 286 */ 287 { 288 int len = input.length(); 289 char[] output = new char[len]; 290 int i; 291 if (!tablesInitialized) { 292 buildPseudoTables(); 293 } 294 for (i = 0; i < len; i++) 295 output[i] = pseudoToUChar[input.charAt(i)]; 296 return new String(output); 297 } 298 299 /*----------------------------------------------------------------------*/ 300 301 static String u16ToPseudo(String input) 302 /* This function converts a char string into a pseudo-Bidi string. 303 It returns the pseudo-Bidi string. 304 */ 305 { 306 int len = input.length(); 307 char[] output = new char[len]; 308 int i; 309 char uchar; 310 if (!tablesInitialized) { 311 buildPseudoTables(); 312 } 313 for (i = 0; i < len; i++) 314 { 315 uchar = input.charAt(i); 316 output[i] = uchar < 0x0100 ? UCharToPseudo[uchar] : 317 UCharToPseud2[uchar & 0x00ff]; 318 } 319 return new String(output); 320 } 321 322 void errcont(String message) { 323 msg(message, ERR, false, false); 324 } 325 326 void errcontln(String message) { 327 msg(message, ERR, false, true); 328 } 329 330 void printCaseInfo(Bidi bidi, String src, String dst) 331 { 332 int length = bidi.getProcessedLength(); 333 byte[] levels = bidi.getLevels(); 334 char[] levelChars = new char[length]; 335 byte lev; 336 int runCount = bidi.countRuns(); 337 errcontln("========================================"); 338 errcontln("Processed length: " + length); 339 for (int i = 0; i < length; i++) { 340 lev = levels[i]; 341 if (lev < 0) { 342 levelChars[i] = '-'; 343 } else if (lev < columns.length) { 344 levelChars[i] = columns[lev]; 345 } else { 346 levelChars[i] = '+'; 347 } 348 } 349 errcontln("Levels: " + new String(levelChars)); 350 errcontln("Source: " + src); 351 errcontln("Result: " + dst); 352 errcontln("Direction: " + bidi.getDirection()); 353 errcontln("paraLevel: " + Byte.toString(bidi.getParaLevel())); 354 errcontln("reorderingMode: " + modeToString(bidi.getReorderingMode())); 355 errcontln("reorderingOptions: " + spOptionsToString(bidi.getReorderingOptions())); 356 errcont("Runs: " + runCount + " => logicalStart.length/level: "); 357 for (int i = 0; i < runCount; i++) { 358 BidiRun run; 359 run = bidi.getVisualRun(i); 360 errcont(" " + run.getStart() + "." + run.getLength() + "/" + 361 run.getEmbeddingLevel()); 362 } 363 errcont("\n"); 364 } 365 366 static final String mates1 = "<>()[]{}"; 367 static final String mates2 = "><)(][}{"; 368 static final char[] mates1Chars = mates1.toCharArray(); 369 static final char[] mates2Chars = mates2.toCharArray(); 370 371 boolean matchingPair(Bidi bidi, int i, char c1, char c2) 372 { 373 if (c1 == c2) { 374 return true; 375 } 376 /* For REORDER_RUNS_ONLY, it would not be correct to check levels[i], 377 so we use the appropriate run's level, which is good for all cases. 378 */ 379 if (bidi.getLogicalRun(i).getDirection() == 0) { 380 return false; 381 } 382 for (int k = 0; k < mates1Chars.length; k++) { 383 if ((c1 == mates1Chars[k]) && (c2 == mates2Chars[k])) { 384 return true; 385 } 386 } 387 return false; 388 } 389 390 boolean checkWhatYouCan(Bidi bidi, String src, String dst) 391 { 392 int i, idx, logLimit, visLimit; 393 boolean testOK, errMap, errDst; 394 char[] srcChars = src.toCharArray(); 395 char[] dstChars = dst.toCharArray(); 396 int[] visMap = bidi.getVisualMap(); 397 int[] logMap = bidi.getLogicalMap(); 398 399 testOK = true; 400 errMap = errDst = false; 401 logLimit = bidi.getProcessedLength(); 402 visLimit = bidi.getResultLength(); 403 if (visLimit > dstChars.length) { 404 visLimit = dstChars.length; 405 } 406 char[] accumSrc = new char[logLimit]; 407 char[] accumDst = new char[visLimit]; 408 Arrays.fill(accumSrc, '?'); 409 Arrays.fill(accumDst, '?'); 410 411 if (logMap.length != logLimit) { 412 errMap = true; 413 } 414 for (i = 0; i < logLimit; i++) { 415 idx = bidi.getVisualIndex(i); 416 if (idx != logMap[i]) { 417 errMap = true; 418 } 419 if (idx == Bidi.MAP_NOWHERE) { 420 continue; 421 } 422 if (idx >= visLimit) { 423 continue; 424 } 425 accumDst[idx] = srcChars[i]; 426 if (!matchingPair(bidi, i, srcChars[i], dstChars[idx])) { 427 errDst = true; 428 } 429 } 430 if (errMap) { 431 if (testOK) { 432 printCaseInfo(bidi, src, dst); 433 testOK = false; 434 } 435 errln("Mismatch between getLogicalMap() and getVisualIndex()"); 436 errcont("Map :" + valueOf(logMap)); 437 errcont("\n"); 438 errcont("Indexes:"); 439 for (i = 0; i < logLimit; i++) { 440 errcont(" " + bidi.getVisualIndex(i)); 441 } 442 errcont("\n"); 443 } 444 if (errDst) { 445 if (testOK) { 446 printCaseInfo(bidi, src, dst); 447 testOK = false; 448 } 449 errln("Source does not map to Result"); 450 errcontln("We got: " + new String(accumDst)); 451 } 452 453 errMap = errDst = false; 454 if (visMap.length != visLimit) { 455 errMap = true; 456 } 457 for (i = 0; i < visLimit; i++) { 458 idx = bidi.getLogicalIndex(i); 459 if (idx != visMap[i]) { 460 errMap = true; 461 } 462 if (idx == Bidi.MAP_NOWHERE) { 463 continue; 464 } 465 if (idx >= logLimit) { 466 continue; 467 } 468 accumSrc[idx] = dstChars[i]; 469 if (!matchingPair(bidi, idx, srcChars[idx], dstChars[i])) { 470 errDst = true; 471 } 472 } 473 if (errMap) { 474 if (testOK) { 475 printCaseInfo(bidi, src, dst); 476 testOK = false; 477 } 478 errln("Mismatch between getVisualMap() and getLogicalIndex()"); 479 errcont("Map :" + valueOf(visMap)); 480 errcont("\n"); 481 errcont("Indexes:"); 482 for (i = 0; i < visLimit; i++) { 483 errcont(" " + bidi.getLogicalIndex(i)); 484 } 485 errcont("\n"); 486 } 487 if (errDst) { 488 if (testOK) { 489 printCaseInfo(bidi, src, dst); 490 testOK = false; 491 } 492 errln("Result does not map to Source"); 493 errcontln("We got: " + new String(accumSrc)); 494 } 495 return testOK; 496 } 497 498} 499