1/* 2******************************************************************************* 3* 4* Copyright (C) 2009, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: bidiconf.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2009oct16 14* created by: Markus W. Scherer 15* 16* BiDi conformance test, using the Unicode BidiTest.txt file. 17*/ 18 19#include <stdio.h> 20#include <stdlib.h> 21#include <string.h> 22#include "unicode/utypes.h" 23#include "unicode/ubidi.h" 24#include "unicode/errorcode.h" 25#include "unicode/localpointer.h" 26#include "unicode/putil.h" 27#include "unicode/unistr.h" 28#include "intltest.h" 29#include "uparse.h" 30 31class BiDiConformanceTest : public IntlTest { 32public: 33 BiDiConformanceTest() : 34 directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0), 35 errorCount(0) {} 36 37 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); 38 39 void TestBidiTest(); 40private: 41 char *getUnidataPath(char path[]); 42 43 UBool parseLevels(const char *start); 44 UBool parseOrdering(const char *start); 45 UBool parseInputStringFromBiDiClasses(const char *&start); 46 47 UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount, 48 const char *paraLevelName); 49 UBool checkOrdering(UBiDi *ubidi, const char *paraLevelName); 50 51 void printErrorLine(const char *paraLevelName); 52 53 char line[10000]; 54 UBiDiLevel levels[1000]; 55 uint32_t directionBits; 56 int32_t ordering[1000]; 57 int32_t lineNumber; 58 int32_t levelsCount; 59 int32_t orderingCount; 60 int32_t errorCount; 61 UnicodeString inputString; 62}; 63 64extern IntlTest *createBiDiConformanceTest() { 65 return new BiDiConformanceTest(); 66} 67 68void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par) { 69 if(exec) { 70 logln("TestSuite BiDiConformanceTest: "); 71 } 72 switch (index) { 73 TESTCASE(0, TestBidiTest); 74 default: 75 name=""; 76 break; // needed to end the loop 77 } 78} 79 80// TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp). 81char *BiDiConformanceTest::getUnidataPath(char path[]) { 82 IcuTestErrorCode errorCode(*this, "getUnidataPath"); 83 const int kUnicodeDataTxtLength=15; // strlen("UnicodeData.txt") 84 85 // Look inside ICU_DATA first. 86 strcpy(path, pathToDataDirectory()); 87 strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt"); 88 FILE *f=fopen(path, "r"); 89 if(f!=NULL) { 90 fclose(f); 91 *(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename. 92 return path; 93 } 94 95 // As a fallback, try to guess where the source data was located 96 // at the time ICU was built, and look there. 97# ifdef U_TOPSRCDIR 98 strcpy(path, U_TOPSRCDIR U_FILE_SEP_STRING "data"); 99# else 100 strcpy(path, loadTestData(errorCode)); 101 strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." 102 U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." 103 U_FILE_SEP_STRING "data"); 104# endif 105 strcat(path, U_FILE_SEP_STRING); 106 strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt"); 107 f=fopen(path, "r"); 108 if(f!=NULL) { 109 fclose(f); 110 *(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename. 111 return path; 112 } 113 return NULL; 114} 115 116U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); 117 118// TODO: Make "public" in uparse.h. 119#define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n') 120 121UBool BiDiConformanceTest::parseLevels(const char *start) { 122 directionBits=0; 123 levelsCount=0; 124 while(*start!=0 && *(start=u_skipWhitespace(start))!=0) { 125 if(*start=='x') { 126 levels[levelsCount++]=UBIDI_DEFAULT_LTR; 127 ++start; 128 } else { 129 char *end; 130 uint32_t value=(uint32_t)strtoul(start, &end, 10); 131 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) { 132 errln("@Levels: parse error at %s", start); 133 return FALSE; 134 } 135 levels[levelsCount++]=(UBiDiLevel)value; 136 directionBits|=(1<<(value&1)); 137 start=end; 138 } 139 } 140 return TRUE; 141} 142 143UBool BiDiConformanceTest::parseOrdering(const char *start) { 144 orderingCount=0; 145 while(*start!=0 && *(start=u_skipWhitespace(start))!=0) { 146 char *end; 147 uint32_t value=(uint32_t)strtoul(start, &end, 10); 148 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>=1000) { 149 errln("@Reorder: parse error at %s", start); 150 return FALSE; 151 } 152 ordering[orderingCount++]=(int32_t)value; 153 start=end; 154 } 155 return TRUE; 156} 157 158static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={ 159 0x6c, // 'l' for L 160 0x52, // 'R' for R 161 0x33, // '3' for EN 162 0x2d, // '-' for ES 163 0x25, // '%' for ET 164 0x39, // '9' for AN 165 0x2c, // ',' for CS 166 0x2f, // '/' for B 167 0x5f, // '_' for S 168 0x20, // ' ' for WS 169 0x3d, // '=' for ON 170 0x65, // 'e' for LRE 171 0x6f, // 'o' for LRO 172 0x41, // 'A' for AL 173 0x45, // 'E' for RLE 174 0x4f, // 'O' for RLO 175 0x2a, // '*' for PDF 176 0x60, // '`' for NSM 177 0x7c // '|' for BN 178}; 179 180U_CDECL_BEGIN 181 182static UCharDirection U_CALLCONV 183biDiConfUBiDiClassCallback(const void *context, UChar32 c) { 184 for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) { 185 if(c==charFromBiDiClass[i]) { 186 return (UCharDirection)i; 187 } 188 } 189 // Character not in our hardcoded table. 190 // Should not occur during testing. 191 return U_BIDI_CLASS_DEFAULT; 192} 193 194U_CDECL_END 195 196static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={ 197 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0 198}; 199 200UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) { 201 inputString.remove(); 202 /* 203 * Lengthy but fast BiDi class parser. 204 * A simple parser could terminate or extract the name string and use 205 * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString); 206 * but that makes this test take significantly more time. 207 */ 208 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { 209 UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT; 210 // Compare each character once until we have a match on 211 // a complete, short BiDi class name. 212 if(start[0]=='L') { 213 if(start[1]=='R') { 214 if(start[2]=='E') { 215 biDiClass=U_LEFT_TO_RIGHT_EMBEDDING; 216 } else if(start[2]=='O') { 217 biDiClass=U_LEFT_TO_RIGHT_OVERRIDE; 218 } 219 } else { 220 biDiClass=U_LEFT_TO_RIGHT; 221 } 222 } else if(start[0]=='R') { 223 if(start[1]=='L') { 224 if(start[2]=='E') { 225 biDiClass=U_RIGHT_TO_LEFT_EMBEDDING; 226 } else if(start[2]=='O') { 227 biDiClass=U_RIGHT_TO_LEFT_OVERRIDE; 228 } 229 } else { 230 biDiClass=U_RIGHT_TO_LEFT; 231 } 232 } else if(start[0]=='E') { 233 if(start[1]=='N') { 234 biDiClass=U_EUROPEAN_NUMBER; 235 } else if(start[1]=='S') { 236 biDiClass=U_EUROPEAN_NUMBER_SEPARATOR; 237 } else if(start[1]=='T') { 238 biDiClass=U_EUROPEAN_NUMBER_TERMINATOR; 239 } 240 } else if(start[0]=='A') { 241 if(start[1]=='L') { 242 biDiClass=U_RIGHT_TO_LEFT_ARABIC; 243 } else if(start[1]=='N') { 244 biDiClass=U_ARABIC_NUMBER; 245 } 246 } else if(start[0]=='C' && start[1]=='S') { 247 biDiClass=U_COMMON_NUMBER_SEPARATOR; 248 } else if(start[0]=='B') { 249 if(start[1]=='N') { 250 biDiClass=U_BOUNDARY_NEUTRAL; 251 } else { 252 biDiClass=U_BLOCK_SEPARATOR; 253 } 254 } else if(start[0]=='S') { 255 biDiClass=U_SEGMENT_SEPARATOR; 256 } else if(start[0]=='W' && start[1]=='S') { 257 biDiClass=U_WHITE_SPACE_NEUTRAL; 258 } else if(start[0]=='O' && start[1]=='N') { 259 biDiClass=U_OTHER_NEUTRAL; 260 } else if(start[0]=='P' && start[1]=='D' && start[2]=='F') { 261 biDiClass=U_POP_DIRECTIONAL_FORMAT; 262 } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') { 263 biDiClass=U_DIR_NON_SPACING_MARK; 264 } 265 // Now we verify that the class name is terminated properly, 266 // and not just the start of a longer word. 267 int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass]; 268 char c=start[biDiClassNameLength]; 269 if(biDiClass==U_CHAR_DIRECTION_COUNT || (!U_IS_INV_WHITESPACE(c) && c!=';' && c!=0)) { 270 errln("BiDi class string not recognized at %s", start); 271 return FALSE; 272 } 273 inputString.append(charFromBiDiClass[biDiClass]); 274 start+=biDiClassNameLength; 275 } 276 return TRUE; 277} 278 279void BiDiConformanceTest::TestBidiTest() { 280 IcuTestErrorCode errorCode(*this, "TestBidiTest"); 281 const char *sourceTestDataPath=getSourceTestData(errorCode); 282 if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " 283 "folder (getSourceTestData())")) { 284 return; 285 } 286 char bidiTestPath[400]; 287 strcpy(bidiTestPath, sourceTestDataPath); 288 strcat(bidiTestPath, "BidiTest.txt"); 289 LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); 290 if(bidiTestFile.isNull()) { 291 errln("unable to open %s", bidiTestPath); 292 return; 293 } 294 LocalUBiDiPointer ubidi(ubidi_open()); 295 ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL, 296 NULL, NULL, errorCode); 297 if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) { 298 return; 299 } 300 lineNumber=0; 301 levelsCount=0; 302 orderingCount=0; 303 errorCount=0; 304 while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) { 305 ++lineNumber; 306 // Remove trailing comments and whitespace. 307 char *commentStart=strchr(line, '#'); 308 if(commentStart!=NULL) { 309 *commentStart=0; 310 } 311 u_rtrim(line); 312 const char *start=u_skipWhitespace(line); 313 if(*start==0) { 314 continue; // Skip empty and comment-only lines. 315 } 316 if(*start=='@') { 317 ++start; 318 if(0==strncmp(start, "Levels:", 7)) { 319 if(!parseLevels(start+7)) { 320 return; 321 } 322 } else if(0==strncmp(start, "Reorder:", 8)) { 323 if(!parseOrdering(start+8)) { 324 return; 325 } 326 } 327 // Skip unknown @Xyz: ... 328 } else { 329 if(!parseInputStringFromBiDiClasses(start)) { 330 return; 331 } 332 start=u_skipWhitespace(start); 333 if(*start!=';') { 334 errln("missing ; separator on input line %s", line); 335 return; 336 } 337 start=u_skipWhitespace(start+1); 338 char *end; 339 uint32_t bitset=(uint32_t)strtoul(start, &end, 10); 340 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) { 341 errln("input bitset parse error at %s", start); 342 return; 343 } 344 // Loop over the bitset. 345 static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1 }; 346 static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL" }; 347 for(int i=0; i<=2; ++i) { 348 if(bitset&(1<<i)) { 349 ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(), 350 paraLevels[i], NULL, errorCode); 351 const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); 352 if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) { 353 errln("Input line %d: %s", (int)lineNumber, line); 354 return; 355 } 356 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()), 357 paraLevelNames[i])) { 358 // continue outerLoop; does not exist in C++ 359 // so just break out of the inner loop. 360 break; 361 } 362 if(!checkOrdering(ubidi.getAlias(), paraLevelNames[i])) { 363 // continue outerLoop; does not exist in C++ 364 // so just break out of the inner loop. 365 break; 366 } 367 } 368 } 369 } 370 } 371} 372 373static UChar printLevel(UBiDiLevel level) { 374 if(level<UBIDI_DEFAULT_LTR) { 375 return 0x30+level; 376 } else { 377 return 0x78; // 'x' 378 } 379} 380 381static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) { 382 uint32_t actualDirectionBits=0; 383 for(int32_t i=0; i<actualCount; ++i) { 384 actualDirectionBits|=(1<<(actualLevels[i]&1)); 385 } 386 return actualDirectionBits; 387} 388 389UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount, 390 const char *paraLevelName) { 391 UBool isOk=TRUE; 392 if(levelsCount!=actualCount) { 393 errln("Wrong number of level values; expected %d actual %d", 394 (int)levelsCount, (int)actualCount); 395 isOk=FALSE; 396 } else { 397 for(int32_t i=0; i<actualCount; ++i) { 398 if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) { 399 if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) { 400 // ICU used a shortcut: 401 // Since the text is unidirectional, it did not store the resolved 402 // levels but just returns all levels as the paragraph level 0 or 1. 403 // The reordering result is the same, so this is fine. 404 break; 405 } else { 406 errln("Wrong level value at index %d; expected %d actual %d", 407 (int)i, levels[i], actualLevels[i]); 408 isOk=FALSE; 409 break; 410 } 411 } 412 } 413 } 414 if(!isOk) { 415 printErrorLine(paraLevelName); 416 UnicodeString els("Expected levels: "); 417 int32_t i; 418 for(i=0; i<levelsCount; ++i) { 419 els.append((UChar)0x20).append(printLevel(levels[i])); 420 } 421 UnicodeString als("Actual levels: "); 422 for(i=0; i<actualCount; ++i) { 423 als.append((UChar)0x20).append(printLevel(actualLevels[i])); 424 } 425 errln(els); 426 errln(als); 427 } 428 return isOk; 429} 430 431// Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS); 432// does not work for custom BiDi class assignments 433// and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here. 434// Therefore we just skip the indexes for BiDi controls while comparing 435// with the expected ordering that has them omitted. 436UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName) { 437 UBool isOk=TRUE; 438 IcuTestErrorCode errorCode(*this, "TestBidiTest/checkOrdering()"); 439 int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls 440 int32_t i, visualIndex; 441 // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun() 442 // and loop over each run's indexes, but that seems unnecessary for this test code. 443 for(i=visualIndex=0; i<resultLength; ++i) { 444 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); 445 if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) { 446 errln("Input line %d: %s", (int)lineNumber, line); 447 return FALSE; 448 } 449 if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) { 450 continue; // BiDi control, omitted from expected ordering. 451 } 452 if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) { 453 errln("Wrong ordering value at visual index %d; expected %d actual %d", 454 (int)visualIndex, ordering[visualIndex], logicalIndex); 455 isOk=FALSE; 456 break; 457 } 458 ++visualIndex; 459 } 460 // visualIndex is now the visual length minus the BiDi controls, 461 // which should match the length of the BidiTest.txt ordering. 462 if(isOk && orderingCount!=visualIndex) { 463 errln("Wrong number of ordering values; expected %d actual %d", 464 (int)orderingCount, (int)visualIndex); 465 isOk=FALSE; 466 } 467 if(!isOk) { 468 printErrorLine(paraLevelName); 469 UnicodeString eord("Expected ordering: "); 470 for(i=0; i<orderingCount; ++i) { 471 eord.append((UChar)0x20).append((UChar)(0x30+ordering[i])); 472 } 473 UnicodeString aord("Actual ordering: "); 474 for(i=0; i<resultLength; ++i) { 475 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); 476 if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) { 477 aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex)); 478 } 479 } 480 errln(eord); 481 errln(aord); 482 } 483 return isOk; 484} 485 486void BiDiConformanceTest::printErrorLine(const char *paraLevelName) { 487 ++errorCount; 488 errln("Input line %5d: %s", (int)lineNumber, line); 489 errln(UnicodeString("Input string: ")+inputString); 490 errln("Para level: %s", paraLevelName); 491} 492