1/*
2*******************************************************************************
3*
4*   Copyright (C) 2009-2013, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  bidiconf.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2009oct16
14*   created by: Markus W. Scherer
15*
16*   BiDi conformance test, using the Unicode BidiTest.txt and BidiCharacterTest.txt files.
17*/
18
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22#include "unicode/utypes.h"
23#include "unicode/ubidi.h"
24#include "unicode/errorcode.h"
25#include "unicode/localpointer.h"
26#include "unicode/putil.h"
27#include "unicode/unistr.h"
28#include "intltest.h"
29#include "uparse.h"
30
31class BiDiConformanceTest : public IntlTest {
32public:
33    BiDiConformanceTest() :
34        directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
35        errorCount(0) {}
36
37    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
38
39    void TestBidiTest();
40    void TestBidiCharacterTest();
41private:
42    char *getUnidataPath(char path[]);
43
44    UBool parseLevels(const char *&start);
45    UBool parseOrdering(const char *start);
46    UBool parseInputStringFromBiDiClasses(const char *&start);
47
48    UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount);
49    UBool checkOrdering(UBiDi *ubidi);
50
51    void printErrorLine();
52
53    char line[10000];
54    UBiDiLevel levels[1000];
55    uint32_t directionBits;
56    int32_t ordering[1000];
57    int32_t lineNumber;
58    int32_t levelsCount;
59    int32_t orderingCount;
60    int32_t errorCount;
61    UnicodeString inputString;
62    const char *paraLevelName;
63    char levelNameString[12];
64};
65
66extern IntlTest *createBiDiConformanceTest() {
67    return new BiDiConformanceTest();
68}
69
70void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
71    if(exec) {
72        logln("TestSuite BiDiConformanceTest: ");
73    }
74    TESTCASE_AUTO_BEGIN;
75    TESTCASE_AUTO(TestBidiTest);
76    TESTCASE_AUTO(TestBidiCharacterTest);
77    TESTCASE_AUTO_END;
78}
79
80// TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp).
81char *BiDiConformanceTest::getUnidataPath(char path[]) {
82    IcuTestErrorCode errorCode(*this, "getUnidataPath");
83    const int kUnicodeDataTxtLength=15;  // strlen("UnicodeData.txt")
84
85    // Look inside ICU_DATA first.
86    strcpy(path, pathToDataDirectory());
87    strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
88    FILE *f=fopen(path, "r");
89    if(f!=NULL) {
90        fclose(f);
91        *(strchr(path, 0)-kUnicodeDataTxtLength)=0;  // Remove the basename.
92        return path;
93    }
94
95    // As a fallback, try to guess where the source data was located
96    // at the time ICU was built, and look there.
97#   ifdef U_TOPSRCDIR
98        strcpy(path, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
99#   else
100        strcpy(path, loadTestData(errorCode));
101        strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
102                     U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
103                     U_FILE_SEP_STRING "data");
104#   endif
105    strcat(path, U_FILE_SEP_STRING);
106    strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
107    f=fopen(path, "r");
108    if(f!=NULL) {
109        fclose(f);
110        *(strchr(path, 0)-kUnicodeDataTxtLength)=0;  // Remove the basename.
111        return path;
112    }
113    return NULL;
114}
115
116U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
117
118UBool BiDiConformanceTest::parseLevels(const char *&start) {
119    directionBits=0;
120    levelsCount=0;
121    while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
122        if(*start=='x') {
123            levels[levelsCount++]=UBIDI_DEFAULT_LTR;
124            ++start;
125        } else {
126            char *end;
127            uint32_t value=(uint32_t)strtoul(start, &end, 10);
128            if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';')
129                          || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
130                errln("\nError on line %d: Levels parse error at %s", (int)lineNumber, start);
131                printErrorLine();
132                return FALSE;
133            }
134            levels[levelsCount++]=(UBiDiLevel)value;
135            directionBits|=(1<<(value&1));
136            start=end;
137        }
138    }
139    return TRUE;
140}
141
142UBool BiDiConformanceTest::parseOrdering(const char *start) {
143    orderingCount=0;
144    while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
145        char *end;
146        uint32_t value=(uint32_t)strtoul(start, &end, 10);
147        if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') || value>=1000) {
148            errln("\nError on line %d: Reorder parse error at %s", (int)lineNumber, start);
149            printErrorLine();
150            return FALSE;
151        }
152        ordering[orderingCount++]=(int32_t)value;
153        start=end;
154    }
155    return TRUE;
156}
157
158static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
159    0x6c,   // 'l' for L
160    0x52,   // 'R' for R
161    0x33,   // '3' for EN
162    0x2d,   // '-' for ES
163    0x25,   // '%' for ET
164    0x39,   // '9' for AN
165    0x2c,   // ',' for CS
166    0x2f,   // '/' for B
167    0x5f,   // '_' for S
168    0x20,   // ' ' for WS
169    0x3d,   // '=' for ON
170    0x65,   // 'e' for LRE
171    0x6f,   // 'o' for LRO
172    0x41,   // 'A' for AL
173    0x45,   // 'E' for RLE
174    0x4f,   // 'O' for RLO
175    0x2a,   // '*' for PDF
176    0x60,   // '`' for NSM
177    0x7c,   // '|' for BN
178    // new in Unicode 6.3/ICU 52
179    0x53,   // 'S' for FSI
180    0x69,   // 'i' for LRI
181    0x49,   // 'I' for RLI
182    0x2e    // '.' for PDI
183};
184
185U_CDECL_BEGIN
186
187static UCharDirection U_CALLCONV
188biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) {
189    for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
190        if(c==charFromBiDiClass[i]) {
191            return (UCharDirection)i;
192        }
193    }
194    // Character not in our hardcoded table.
195    // Should not occur during testing.
196    return U_BIDI_CLASS_DEFAULT;
197}
198
199U_CDECL_END
200
201static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
202    1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 0
203};
204
205UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
206    inputString.remove();
207    /*
208     * Lengthy but fast BiDi class parser.
209     * A simple parser could terminate or extract the name string and use
210     *   int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
211     * but that makes this test take significantly more time.
212     */
213    while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
214        UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
215        // Compare each character once until we have a match on
216        // a complete, short BiDi class name.
217        if(start[0]=='L') {
218            if(start[1]=='R') {
219                if(start[2]=='E') {
220                    biDiClass=U_LEFT_TO_RIGHT_EMBEDDING;
221                } else if(start[2]=='I') {
222                    biDiClass=U_LEFT_TO_RIGHT_ISOLATE;
223                } else if(start[2]=='O') {
224                    biDiClass=U_LEFT_TO_RIGHT_OVERRIDE;
225                }
226            } else {
227                biDiClass=U_LEFT_TO_RIGHT;
228            }
229        } else if(start[0]=='R') {
230            if(start[1]=='L') {
231                if(start[2]=='E') {
232                    biDiClass=U_RIGHT_TO_LEFT_EMBEDDING;
233                } else if(start[2]=='I') {
234                    biDiClass=U_RIGHT_TO_LEFT_ISOLATE;
235                } else if(start[2]=='O') {
236                    biDiClass=U_RIGHT_TO_LEFT_OVERRIDE;
237                }
238            } else {
239                biDiClass=U_RIGHT_TO_LEFT;
240            }
241        } else if(start[0]=='E') {
242            if(start[1]=='N') {
243                biDiClass=U_EUROPEAN_NUMBER;
244            } else if(start[1]=='S') {
245                biDiClass=U_EUROPEAN_NUMBER_SEPARATOR;
246            } else if(start[1]=='T') {
247                biDiClass=U_EUROPEAN_NUMBER_TERMINATOR;
248            }
249        } else if(start[0]=='A') {
250            if(start[1]=='L') {
251                biDiClass=U_RIGHT_TO_LEFT_ARABIC;
252            } else if(start[1]=='N') {
253                biDiClass=U_ARABIC_NUMBER;
254            }
255        } else if(start[0]=='C' && start[1]=='S') {
256            biDiClass=U_COMMON_NUMBER_SEPARATOR;
257        } else if(start[0]=='B') {
258            if(start[1]=='N') {
259                biDiClass=U_BOUNDARY_NEUTRAL;
260            } else {
261                biDiClass=U_BLOCK_SEPARATOR;
262            }
263        } else if(start[0]=='S') {
264            biDiClass=U_SEGMENT_SEPARATOR;
265        } else if(start[0]=='W' && start[1]=='S') {
266            biDiClass=U_WHITE_SPACE_NEUTRAL;
267        } else if(start[0]=='O' && start[1]=='N') {
268            biDiClass=U_OTHER_NEUTRAL;
269        } else if(start[0]=='P' && start[1]=='D') {
270            if(start[2]=='F') {
271                biDiClass=U_POP_DIRECTIONAL_FORMAT;
272            } else if(start[2]=='I') {
273                biDiClass=U_POP_DIRECTIONAL_ISOLATE;
274            }
275        } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') {
276            biDiClass=U_DIR_NON_SPACING_MARK;
277        } else if(start[0]=='F' && start[1]=='S' && start[2]=='I') {
278            biDiClass=U_FIRST_STRONG_ISOLATE;
279        }
280        // Now we verify that the class name is terminated properly,
281        // and not just the start of a longer word.
282        int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
283        char c=start[biDiClassNameLength];
284        if(biDiClass<U_CHAR_DIRECTION_COUNT && (U_IS_INV_WHITESPACE(c) || c==';' || c==0)) {
285            inputString.append(charFromBiDiClass[biDiClass]);
286            start+=biDiClassNameLength;
287            continue;
288        }
289        errln("\nError on line %d: BiDi class string not recognized at %s", (int)lineNumber, start);
290        printErrorLine();
291        return FALSE;
292    }
293    return TRUE;
294}
295
296void BiDiConformanceTest::TestBidiTest() {
297    IcuTestErrorCode errorCode(*this, "TestBidiTest");
298    const char *sourceTestDataPath=getSourceTestData(errorCode);
299    if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
300                                      "folder (getSourceTestData())")) {
301        return;
302    }
303    char bidiTestPath[400];
304    strcpy(bidiTestPath, sourceTestDataPath);
305    strcat(bidiTestPath, "BidiTest.txt");
306    LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
307    if(bidiTestFile.isNull()) {
308        errln("unable to open %s", bidiTestPath);
309        return;
310    }
311    LocalUBiDiPointer ubidi(ubidi_open());
312    ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL,
313                           NULL, NULL, errorCode);
314    if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) {
315        return;
316    }
317    lineNumber=0;
318    levelsCount=0;
319    orderingCount=0;
320    errorCount=0;
321    while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
322        ++lineNumber;
323        // Remove trailing comments and whitespace.
324        char *commentStart=strchr(line, '#');
325        if(commentStart!=NULL) {
326            *commentStart=0;
327        }
328        u_rtrim(line);
329        const char *start=u_skipWhitespace(line);
330        if(*start==0) {
331            continue;  // Skip empty and comment-only lines.
332        }
333        if(*start=='@') {
334            ++start;
335            if(0==strncmp(start, "Levels:", 7)) {
336                start+=7;
337                if(!parseLevels(start)) {
338                    return;
339                }
340            } else if(0==strncmp(start, "Reorder:", 8)) {
341                if(!parseOrdering(start+8)) {
342                    return;
343                }
344            }
345            // Skip unknown @Xyz: ...
346        } else {
347            if(!parseInputStringFromBiDiClasses(start)) {
348                return;
349            }
350            start=u_skipWhitespace(start);
351            if(*start!=';') {
352                errln("missing ; separator on input line %s", line);
353                return;
354            }
355            start=u_skipWhitespace(start+1);
356            char *end;
357            uint32_t bitset=(uint32_t)strtoul(start, &end, 16);
358            if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) {
359                errln("input bitset parse error at %s", start);
360                return;
361            }
362            // Loop over the bitset.
363            static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL };
364            static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" };
365            for(int i=0; i<=3; ++i) {
366                if(bitset&(1<<i)) {
367                    ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
368                                  paraLevels[i], NULL, errorCode);
369                    const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
370                    if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
371                        errln("Input line %d: %s", (int)lineNumber, line);
372                        return;
373                    }
374                    paraLevelName=paraLevelNames[i];
375                    if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
376                        // continue outerLoop;  does not exist in C++
377                        // so just break out of the inner loop.
378                        break;
379                    }
380                    if(!checkOrdering(ubidi.getAlias())) {
381                        // continue outerLoop;  does not exist in C++
382                        // so just break out of the inner loop.
383                        break;
384                    }
385                }
386            }
387        }
388    }
389}
390
391/*
392*******************************************************************************
393*
394*   created on: 2013jul01
395*   created by: Matitiahu Allouche
396
397This function performs a conformance test for implementations of the
398Unicode Bidirectional Algorithm, specified in UAX #9: Unicode
399Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/
400
401Each test case is represented in a single line which is read from a file
402named BidiCharacter.txt.  Empty, blank and comment lines may also appear
403in this file.
404
405The format of the test data is specified below.  Note that each test
406case constitutes a single line of text; reordering is applied within a
407single line and independently of a rendering engine, and rules L3 and L4
408are out of scope.
409
410The number sign '#' is the comment character: everything is ignored from
411the occurrence of '#' until the end of the line,
412Empty lines and lines containing only spaces and/or comments are ignored.
413
414Lines which represent test cases consist of 4 or 5 fields separated by a
415semicolon.  Each field consists of tokens separated by whitespace (space
416or Tab).  Whitespace before and after semicolons is optional.
417
418Field 0: A sequence of hexadecimal code point values separated by space
419
420Field 1: A value representing the paragraph direction, as follows:
421    - 0 represents left-to-right
422    - 1 represents right-to-left
423    - 2 represents auto-LTR according to rules P2 and P3 of the algorithm
424    - 3 represents auto-RTL according to rules P2 and P3 of the algorithm
425    - a negative number whose absolute value is taken as paragraph level;
426      this may be useful to test cases where the embedding level approaches
427      or exceeds the maximum embedding level.
428
429Field 2: The resolved paragraph embedding level.  If the input (field 0)
430         includes more than one paragraph, this field represents the
431         resolved level of the first paragraph.
432
433Field 3: An ordered list of resulting levels for each token in field 0
434         (each token represents one source character).
435         The UBA does not assign levels to certain characters (e.g. LRO);
436         characters removed in rule X9 are indicated with an 'x'.
437
438Field 4: An ordered list of indices showing the resulting visual ordering
439         from left to right; characters with a resolved level of 'x' are
440         skipped.  The number are zero-based.  Each index corresponds to
441         a character in the reordered (visual) string. It represents the
442         index of the source character in the input (field 0).
443         This field is optional.  When it is absent, the visual ordering
444         is not verified.
445
446Examples:
447
448# This is a comment line.
449L L ON R ; 0 ; 0 ; 0 0 0 1 ; 0 1 2 3
450L L ON R;0;0;0 0 0 1;0 1 2 3
451
452# Note: in the next line, 'B' represents a block separator, not the letter 'B'.
453LRE A B C PDF;2;0;x 2 0 0 x;1 2 3
454# Note: in the next line, 'b' represents the letter 'b', not a block separator.
455a b c 05d0 05d1 x ; 0 ; 0 ; 0 0 0 1 1 0 ; 0 1 2 4 3 5
456
457a R R x ; 1 ; 1 ; 2 1 1 2
458L L R R R B R R L L L B ON ON ; 3 ; 0 ; 0 0 1 1 1 0 1 1 2 2 2 1 1 1
459
460*
461*******************************************************************************
462*/
463void BiDiConformanceTest::TestBidiCharacterTest() {
464    IcuTestErrorCode errorCode(*this, "TestBidiCharacterTest");
465    const char *sourceTestDataPath=getSourceTestData(errorCode);
466    if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
467                                      "folder (getSourceTestData())")) {
468        return;
469    }
470    char bidiTestPath[400];
471    strcpy(bidiTestPath, sourceTestDataPath);
472    strcat(bidiTestPath, "BidiCharacterTest.txt");
473    LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
474    if(bidiTestFile.isNull()) {
475        errln("unable to open %s", bidiTestPath);
476        return;
477    }
478    LocalUBiDiPointer ubidi(ubidi_open());
479    lineNumber=0;
480    levelsCount=0;
481    orderingCount=0;
482    errorCount=0;
483    while(errorCount<20 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
484        ++lineNumber;
485        paraLevelName="N/A";
486        inputString="N/A";
487        // Remove trailing comments and whitespace.
488        char *commentStart=strchr(line, '#');
489        if(commentStart!=NULL) {
490            *commentStart=0;
491        }
492        u_rtrim(line);
493        const char *start=u_skipWhitespace(line);
494        if(*start==0) {
495            continue;  // Skip empty and comment-only lines.
496        }
497        // Parse the code point string in field 0.
498        UChar *buffer=inputString.getBuffer(200);
499        int32_t length=u_parseString(start, buffer, inputString.getCapacity(), NULL, errorCode);
500        if(errorCode.logIfFailureAndReset("Invalid string in field 0")) {
501            errln("Input line %d: %s", (int)lineNumber, line);
502            inputString.remove();
503            continue;
504        }
505        inputString.releaseBuffer(length);
506        start=strchr(start, ';');
507        if(start==NULL) {
508            errorCount++;
509            errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
510            continue;
511        }
512        start=u_skipWhitespace(start+1);
513        char *end;
514        int32_t paraDirection=(int32_t)strtol(start, &end, 10);
515        UBiDiLevel paraLevel=UBIDI_MAX_EXPLICIT_LEVEL+2;
516        if(paraDirection==0) {
517            paraLevel=0;
518            paraLevelName="LTR";
519        }
520        else if(paraDirection==1) {
521            paraLevel=1;
522            paraLevelName="RTL";
523        }
524        else if(paraDirection==2) {
525            paraLevel=UBIDI_DEFAULT_LTR;
526            paraLevelName="Auto/LTR";
527        }
528        else if(paraDirection==3) {
529            paraLevel=UBIDI_DEFAULT_RTL;
530            paraLevelName="Auto/RTL";
531        }
532        else if(paraDirection<0 && -paraDirection<=(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
533            paraLevel=(UBiDiLevel)(-paraDirection);
534            sprintf(levelNameString, "%d", (int)paraLevel);
535            paraLevelName=levelNameString;
536        }
537        if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
538                         paraLevel==(UBIDI_MAX_EXPLICIT_LEVEL+2)) {
539            errln("\nError on line %d: Input paragraph direction incorrect at %s", (int)lineNumber, start);
540            printErrorLine();
541            continue;
542        }
543        start=u_skipWhitespace(end);
544        if(*start!=';') {
545            errorCount++;
546            errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
547            continue;
548        }
549        start++;
550        uint32_t resolvedParaLevel=(uint32_t)strtoul(start, &end, 10);
551        if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
552           resolvedParaLevel>1) {
553            errln("\nError on line %d: Resolved paragraph level incorrect at %s", (int)lineNumber, start);
554            printErrorLine();
555            continue;
556        }
557        start=u_skipWhitespace(end);
558        if(*start!=';') {
559            errorCount++;
560            errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
561            return;
562        }
563        start++;
564        if(!parseLevels(start)) {
565            continue;
566        }
567        start=u_skipWhitespace(start);
568        if(*start==';') {
569            if(!parseOrdering(start+1)) {
570                continue;
571            }
572        }
573        else
574            orderingCount=-1;
575
576        ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
577                      paraLevel, NULL, errorCode);
578        const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
579        if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
580            errln("Input line %d: %s", (int)lineNumber, line);
581            continue;
582        }
583        UBiDiLevel actualLevel;
584        if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) {
585            printErrorLine();
586            errln("\nError on line %d: Wrong resolved paragraph level; expected %d actual %d",
587                   (int)lineNumber, resolvedParaLevel, actualLevel);
588            continue;
589        }
590        if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
591            continue;
592        }
593        if(orderingCount>=0 && !checkOrdering(ubidi.getAlias())) {
594            continue;
595        }
596    }
597}
598
599static UChar printLevel(UBiDiLevel level) {
600    if(level<UBIDI_DEFAULT_LTR) {
601        return 0x30+level;
602    } else {
603        return 0x78;  // 'x'
604    }
605}
606
607static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) {
608    uint32_t actualDirectionBits=0;
609    for(int32_t i=0; i<actualCount; ++i) {
610        actualDirectionBits|=(1<<(actualLevels[i]&1));
611    }
612    return actualDirectionBits;
613}
614
615UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount) {
616    UBool isOk=TRUE;
617    if(levelsCount!=actualCount) {
618        errln("\nError on line %d: Wrong number of level values; expected %d actual %d",
619              (int)lineNumber, (int)levelsCount, (int)actualCount);
620        isOk=FALSE;
621    } else {
622        for(int32_t i=0; i<actualCount; ++i) {
623            if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
624                if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
625                    // ICU used a shortcut:
626                    // Since the text is unidirectional, it did not store the resolved
627                    // levels but just returns all levels as the paragraph level 0 or 1.
628                    // The reordering result is the same, so this is fine.
629                    break;
630                } else {
631                    errln("\nError on line %d: Wrong level value at index %d; expected %d actual %d",
632                          (int)lineNumber, (int)i, levels[i], actualLevels[i]);
633                    isOk=FALSE;
634                    break;
635                }
636            }
637        }
638    }
639    if(!isOk) {
640        printErrorLine();
641        UnicodeString els("Expected levels:   ");
642        int32_t i;
643        for(i=0; i<levelsCount; ++i) {
644            els.append((UChar)0x20).append(printLevel(levels[i]));
645        }
646        UnicodeString als("Actual   levels:   ");
647        for(i=0; i<actualCount; ++i) {
648            als.append((UChar)0x20).append(printLevel(actualLevels[i]));
649        }
650        errln(els);
651        errln(als);
652    }
653    return isOk;
654}
655
656// Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
657// does not work for custom BiDi class assignments
658// and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
659// Therefore we just skip the indexes for BiDi controls while comparing
660// with the expected ordering that has them omitted.
661UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi) {
662    UBool isOk=TRUE;
663    IcuTestErrorCode errorCode(*this, "checkOrdering()");
664    int32_t resultLength=ubidi_getResultLength(ubidi);  // visual length including BiDi controls
665    int32_t i, visualIndex;
666    // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
667    // and loop over each run's indexes, but that seems unnecessary for this test code.
668    for(i=visualIndex=0; i<resultLength; ++i) {
669        int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
670        if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) {
671            errln("Input line %d: %s", (int)lineNumber, line);
672            return FALSE;
673        }
674        if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) {
675            continue;  // BiDi control, omitted from expected ordering.
676        }
677        if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
678            errln("\nError on line %d: Wrong ordering value at visual index %d; expected %d actual %d",
679                  (int)lineNumber, (int)visualIndex, ordering[visualIndex], logicalIndex);
680            isOk=FALSE;
681            break;
682        }
683        ++visualIndex;
684    }
685    // visualIndex is now the visual length minus the BiDi controls,
686    // which should match the length of the BidiTest.txt ordering.
687    if(isOk && orderingCount!=visualIndex) {
688        errln("\nError on line %d: Wrong number of ordering values; expected %d actual %d",
689              (int)lineNumber, (int)orderingCount, (int)visualIndex);
690        isOk=FALSE;
691    }
692    if(!isOk) {
693        printErrorLine();
694        UnicodeString eord("Expected ordering: ");
695        for(i=0; i<orderingCount; ++i) {
696            eord.append((UChar)0x20).append((UChar)(0x30+ordering[i]));
697        }
698        UnicodeString aord("Actual   ordering: ");
699        for(i=0; i<resultLength; ++i) {
700            int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
701            if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) {
702                aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex));
703            }
704        }
705        errln(eord);
706        errln(aord);
707    }
708    return isOk;
709}
710
711void BiDiConformanceTest::printErrorLine() {
712    ++errorCount;
713    errln("Input line %5d:   %s", (int)lineNumber, line);
714    errln(UnicodeString("Input string:       ")+inputString);
715    errln("Para level:         %s", paraLevelName);
716}
717