1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5*******************************************************************************
6* Copyright (C) 2013-2015, International Business Machines
7* Corporation and others.  All Rights Reserved.
8*******************************************************************************
9* CollationFastLatin.java, ported from collationfastlatin.h/.cpp
10*
11* C++ version created on: 2013aug09
12* created by: Markus W. Scherer
13*/
14
15package android.icu.impl.coll;
16
17import android.icu.lang.UScript;
18import android.icu.text.Collator;
19
20/**
21 * @hide Only a subset of ICU is exposed in Android
22 */
23public final class CollationFastLatin /* all static */ {
24    /**
25     * Fast Latin format version (one byte 1..FF).
26     * Must be incremented for any runtime-incompatible changes,
27     * in particular, for changes to any of the following constants.
28     *
29     * When the major version number of the main data format changes,
30     * we can reset this fast Latin version to 1.
31     */
32    public static final int VERSION = 2;
33
34    public static final int LATIN_MAX = 0x17f;
35    public static final int LATIN_LIMIT = LATIN_MAX + 1;
36
37    static final int LATIN_MAX_UTF8_LEAD = 0xc5;  // UTF-8 lead byte of LATIN_MAX
38
39    static final int PUNCT_START = 0x2000;
40    static final int PUNCT_LIMIT = 0x2040;
41
42    // excludes U+FFFE & U+FFFF
43    static final int NUM_FAST_CHARS = LATIN_LIMIT + (PUNCT_LIMIT - PUNCT_START);
44
45    // Note on the supported weight ranges:
46    // Analysis of UCA 6.3 and CLDR 23 non-search tailorings shows that
47    // the CEs for characters in the above ranges, excluding expansions with length >2,
48    // excluding contractions of >2 characters, and other restrictions
49    // (see the builder's getCEsFromCE32()),
50    // use at most about 150 primary weights,
51    // where about 94 primary weights are possibly-variable (space/punct/symbol/currency),
52    // at most 4 secondary before-common weights,
53    // at most 4 secondary after-common weights,
54    // at most 16 secondary high weights (in secondary CEs), and
55    // at most 4 tertiary after-common weights.
56    // The following ranges are designed to support slightly more weights than that.
57    // (en_US_POSIX is unusual: It creates about 64 variable + 116 Latin primaries.)
58
59    // Digits may use long primaries (preserving more short ones)
60    // or short primaries (faster) without changing this data structure.
61    // (If we supported numeric collation, then digits would have to have long primaries
62    // so that special handling does not affect the fast path.)
63
64    static final int SHORT_PRIMARY_MASK = 0xfc00;  // bits 15..10
65    static final int INDEX_MASK = 0x3ff;  // bits 9..0 for expansions & contractions
66    static final int SECONDARY_MASK = 0x3e0;  // bits 9..5
67    static final int CASE_MASK = 0x18;  // bits 4..3
68    static final int LONG_PRIMARY_MASK = 0xfff8;  // bits 15..3
69    static final int TERTIARY_MASK = 7;  // bits 2..0
70    static final int CASE_AND_TERTIARY_MASK = CASE_MASK | TERTIARY_MASK;
71
72    static final int TWO_SHORT_PRIMARIES_MASK =
73            (SHORT_PRIMARY_MASK << 16) | SHORT_PRIMARY_MASK;  // 0xfc00fc00
74    static final int TWO_LONG_PRIMARIES_MASK =
75            (LONG_PRIMARY_MASK << 16) | LONG_PRIMARY_MASK;  // 0xfff8fff8
76    static final int TWO_SECONDARIES_MASK =
77            (SECONDARY_MASK << 16) | SECONDARY_MASK;  // 0x3e003e0
78    static final int TWO_CASES_MASK =
79            (CASE_MASK << 16) | CASE_MASK;  // 0x180018
80    static final int TWO_TERTIARIES_MASK =
81            (TERTIARY_MASK << 16) | TERTIARY_MASK;  // 0x70007
82
83    /**
84     * Contraction with one fast Latin character.
85     * Use INDEX_MASK to find the start of the contraction list after the fixed table.
86     * The first entry contains the default mapping.
87     * Otherwise use CONTR_CHAR_MASK for the contraction character index
88     * (in ascending order).
89     * Use CONTR_LENGTH_SHIFT for the length of the entry
90     * (1=BAIL_OUT, 2=one CE, 3=two CEs).
91     *
92     * Also, U+0000 maps to a contraction entry, so that the fast path need not
93     * check for NUL termination.
94     * It usually maps to a contraction list with only the completely ignorable default value.
95     */
96    static final int CONTRACTION = 0x400;
97    /**
98     * An expansion encodes two CEs.
99     * Use INDEX_MASK to find the pair of CEs after the fixed table.
100     *
101     * The higher a mini CE value, the easier it is to process.
102     * For expansions and higher, no context needs to be considered.
103     */
104    static final int EXPANSION = 0x800;
105    /**
106     * Encodes one CE with a long/low mini primary (there are 128).
107     * All potentially-variable primaries must be in this range,
108     * to make the short-primary path as fast as possible.
109     */
110    static final int MIN_LONG = 0xc00;
111    static final int LONG_INC = 8;
112    static final int MAX_LONG = 0xff8;
113    /**
114     * Encodes one CE with a short/high primary (there are 60),
115     * plus a secondary CE if the secondary weight is high.
116     * Fast handling: At least all letter primaries should be in this range.
117     */
118    static final int MIN_SHORT = 0x1000;
119    static final int SHORT_INC = 0x400;
120    /** The highest primary weight is reserved for U+FFFF. */
121    static final int MAX_SHORT = SHORT_PRIMARY_MASK;
122
123    static final int MIN_SEC_BEFORE = 0;  // must add SEC_OFFSET
124    static final int SEC_INC = 0x20;
125    static final int MAX_SEC_BEFORE = MIN_SEC_BEFORE + 4 * SEC_INC;  // 5 before common
126    static final int COMMON_SEC = MAX_SEC_BEFORE + SEC_INC;
127    static final int MIN_SEC_AFTER = COMMON_SEC + SEC_INC;
128    static final int MAX_SEC_AFTER = MIN_SEC_AFTER + 5 * SEC_INC;  // 6 after common
129    static final int MIN_SEC_HIGH = MAX_SEC_AFTER + SEC_INC;  // 20 high secondaries
130    static final int MAX_SEC_HIGH = SECONDARY_MASK;
131
132    /**
133     * Lookup: Add this offset to secondary weights, except for completely ignorable CEs.
134     * Must be greater than any special value, e.g., MERGE_WEIGHT.
135     * The exact value is not relevant for the format version.
136     */
137    static final int SEC_OFFSET = SEC_INC;
138    static final int COMMON_SEC_PLUS_OFFSET = COMMON_SEC + SEC_OFFSET;
139
140    static final int TWO_SEC_OFFSETS =
141            (SEC_OFFSET << 16) | SEC_OFFSET;  // 0x200020
142    static final int TWO_COMMON_SEC_PLUS_OFFSET =
143            (COMMON_SEC_PLUS_OFFSET << 16) | COMMON_SEC_PLUS_OFFSET;
144
145    static final int LOWER_CASE = 8;  // case bits include this offset
146    static final int TWO_LOWER_CASES = (LOWER_CASE << 16) | LOWER_CASE;  // 0x80008
147
148    static final int COMMON_TER = 0;  // must add TER_OFFSET
149    static final int MAX_TER_AFTER = 7;  // 7 after common
150
151    /**
152     * Lookup: Add this offset to tertiary weights, except for completely ignorable CEs.
153     * Must be greater than any special value, e.g., MERGE_WEIGHT.
154     * Must be greater than case bits as well, so that with combined case+tertiary weights
155     * plus the offset the tertiary bits does not spill over into the case bits.
156     * The exact value is not relevant for the format version.
157     */
158    static final int TER_OFFSET = SEC_OFFSET;
159    static final int COMMON_TER_PLUS_OFFSET = COMMON_TER + TER_OFFSET;
160
161    static final int TWO_TER_OFFSETS = (TER_OFFSET << 16) | TER_OFFSET;
162    static final int TWO_COMMON_TER_PLUS_OFFSET =
163            (COMMON_TER_PLUS_OFFSET << 16) | COMMON_TER_PLUS_OFFSET;
164
165    static final int MERGE_WEIGHT = 3;
166    static final int EOS = 2;  // end of string
167    static final int BAIL_OUT = 1;
168
169    /**
170     * Contraction result first word bits 8..0 contain the
171     * second contraction character, as a char index 0..NUM_FAST_CHARS-1.
172     * Each contraction list is terminated with a word containing CONTR_CHAR_MASK.
173     */
174    static final int CONTR_CHAR_MASK = 0x1ff;
175    /**
176     * Contraction result first word bits 10..9 contain the result length:
177     * 1=bail out, 2=one mini CE, 3=two mini CEs
178     */
179    static final int CONTR_LENGTH_SHIFT = 9;
180
181    /**
182     * Comparison return value when the regular comparison must be used.
183     * The exact value is not relevant for the format version.
184     */
185    public static final int BAIL_OUT_RESULT = -2;
186
187    static int getCharIndex(char c) {
188        if(c <= LATIN_MAX) {
189            return c;
190        } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
191            return c - (PUNCT_START - LATIN_LIMIT);
192        } else {
193            // Not a fast Latin character.
194            // Note: U+FFFE & U+FFFF are forbidden in tailorings
195            // and thus do not occur in any contractions.
196            return -1;
197        }
198    }
199
200    /**
201     * Computes the options value for the compare functions
202     * and writes the precomputed primary weights.
203     * Returns -1 if the Latin fastpath is not supported for the data and settings.
204     * The capacity must be LATIN_LIMIT.
205     */
206    public static int getOptions(CollationData data, CollationSettings settings,
207            char[] primaries) {
208        char[] header = data.fastLatinTableHeader;
209        if(header == null) { return -1; }
210        assert((header[0] >> 8) == VERSION);
211        if(primaries.length != LATIN_LIMIT) {
212            assert false;
213            return -1;
214        }
215
216        int miniVarTop;
217        if((settings.options & CollationSettings.ALTERNATE_MASK) == 0) {
218            // No mini primaries are variable, set a variableTop just below the
219            // lowest long mini primary.
220            miniVarTop = MIN_LONG - 1;
221        } else {
222            int headerLength = header[0] & 0xff;
223            int i = 1 + settings.getMaxVariable();
224            if(i >= headerLength) {
225                return -1;  // variableTop >= digits, should not occur
226            }
227            miniVarTop = header[i];
228        }
229
230        boolean digitsAreReordered = false;
231        if(settings.hasReordering()) {
232            long prevStart = 0;
233            long beforeDigitStart = 0;
234            long digitStart = 0;
235            long afterDigitStart = 0;
236            for(int group = Collator.ReorderCodes.FIRST;
237                    group < Collator.ReorderCodes.FIRST + CollationData.MAX_NUM_SPECIAL_REORDER_CODES;
238                    ++group) {
239                long start = data.getFirstPrimaryForGroup(group);
240                start = settings.reorder(start);
241                if(group == Collator.ReorderCodes.DIGIT) {
242                    beforeDigitStart = prevStart;
243                    digitStart = start;
244                } else if(start != 0) {
245                    if(start < prevStart) {
246                        // The permutation affects the groups up to Latin.
247                        return -1;
248                    }
249                    // In the future, there might be a special group between digits & Latin.
250                    if(digitStart != 0 && afterDigitStart == 0 && prevStart == beforeDigitStart) {
251                        afterDigitStart = start;
252                    }
253                    prevStart = start;
254                }
255            }
256            long latinStart = data.getFirstPrimaryForGroup(UScript.LATIN);
257            latinStart = settings.reorder(latinStart);
258            if(latinStart < prevStart) {
259                return -1;
260            }
261            if(afterDigitStart == 0) {
262                afterDigitStart = latinStart;
263            }
264            if(!(beforeDigitStart < digitStart && digitStart < afterDigitStart)) {
265                digitsAreReordered = true;
266            }
267        }
268
269        char[] table = data.fastLatinTable;  // skip the header
270        for(int c = 0; c < LATIN_LIMIT; ++c) {
271            int p = table[c];
272            if(p >= MIN_SHORT) {
273                p &= SHORT_PRIMARY_MASK;
274            } else if(p > miniVarTop) {
275                p &= LONG_PRIMARY_MASK;
276            } else {
277                p = 0;
278            }
279            primaries[c] = (char)p;
280        }
281        if(digitsAreReordered || (settings.options & CollationSettings.NUMERIC) != 0) {
282            // Bail out for digits.
283            for(int c = 0x30; c <= 0x39; ++c) { primaries[c] = 0; }
284        }
285
286        // Shift the miniVarTop above other options.
287        return (miniVarTop << 16) | settings.options;
288    }
289
290    public static int compareUTF16(char[] table, char[] primaries, int options,
291            CharSequence left, CharSequence right, int startIndex) {
292        // This is a modified copy of CollationCompare.compareUpToQuaternary(),
293        // optimized for common Latin text.
294        // Keep them in sync!
295
296        int variableTop = options >> 16;  // see getOptions()
297        options &= 0xffff;  // needed for CollationSettings.getStrength() to work
298
299        // Check for supported characters, fetch mini CEs, and compare primaries.
300        int leftIndex = startIndex, rightIndex = startIndex;
301        /**
302         * Single mini CE or a pair.
303         * The current mini CE is in the lower 16 bits, the next one is in the upper 16 bits.
304         * If there is only one, then it is in the lower bits, and the upper bits are 0.
305         */
306        int leftPair = 0, rightPair = 0;
307        for(;;) {
308            // We fetch CEs until we get a non-ignorable primary or reach the end.
309            while(leftPair == 0) {
310                if(leftIndex == left.length()) {
311                    leftPair = EOS;
312                    break;
313                }
314                int c = left.charAt(leftIndex++);
315                if(c <= LATIN_MAX) {
316                    leftPair = primaries[c];
317                    if(leftPair != 0) { break; }
318                    if(c <= 0x39 && c >= 0x30 && (options & CollationSettings.NUMERIC) != 0) {
319                        return BAIL_OUT_RESULT;
320                    }
321                    leftPair = table[c];
322                } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
323                    leftPair = table[c - PUNCT_START + LATIN_LIMIT];
324                } else {
325                    leftPair = lookup(table, c);
326                }
327                if(leftPair >= MIN_SHORT) {
328                    leftPair &= SHORT_PRIMARY_MASK;
329                    break;
330                } else if(leftPair > variableTop) {
331                    leftPair &= LONG_PRIMARY_MASK;
332                    break;
333                } else {
334                    long pairAndInc = nextPair(table, c, leftPair, left, leftIndex);
335                    if(pairAndInc < 0) {
336                        ++leftIndex;
337                        pairAndInc = ~pairAndInc;
338                    }
339                    leftPair = (int)pairAndInc;
340                    if(leftPair == BAIL_OUT) { return BAIL_OUT_RESULT; }
341                    leftPair = getPrimaries(variableTop, leftPair);
342                }
343            }
344
345            while(rightPair == 0) {
346                if(rightIndex == right.length()) {
347                    rightPair = EOS;
348                    break;
349                }
350                int c = right.charAt(rightIndex++);
351                if(c <= LATIN_MAX) {
352                    rightPair = primaries[c];
353                    if(rightPair != 0) { break; }
354                    if(c <= 0x39 && c >= 0x30 && (options & CollationSettings.NUMERIC) != 0) {
355                        return BAIL_OUT_RESULT;
356                    }
357                    rightPair = table[c];
358                } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
359                    rightPair = table[c - PUNCT_START + LATIN_LIMIT];
360                } else {
361                    rightPair = lookup(table, c);
362                }
363                if(rightPair >= MIN_SHORT) {
364                    rightPair &= SHORT_PRIMARY_MASK;
365                    break;
366                } else if(rightPair > variableTop) {
367                    rightPair &= LONG_PRIMARY_MASK;
368                    break;
369                } else {
370                    long pairAndInc = nextPair(table, c, rightPair, right, rightIndex);
371                    if(pairAndInc < 0) {
372                        ++rightIndex;
373                        pairAndInc = ~pairAndInc;
374                    }
375                    rightPair = (int)pairAndInc;
376                    if(rightPair == BAIL_OUT) { return BAIL_OUT_RESULT; }
377                    rightPair = getPrimaries(variableTop, rightPair);
378                }
379            }
380
381            if(leftPair == rightPair) {
382                if(leftPair == EOS) { break; }
383                leftPair = rightPair = 0;
384                continue;
385            }
386            int leftPrimary = leftPair & 0xffff;
387            int rightPrimary = rightPair & 0xffff;
388            if(leftPrimary != rightPrimary) {
389                // Return the primary difference.
390                return (leftPrimary < rightPrimary) ? Collation.LESS : Collation.GREATER;
391            }
392            if(leftPair == EOS) { break; }
393            leftPair >>>= 16;
394            rightPair >>>= 16;
395        }
396        // In the following, we need to re-fetch each character because we did not buffer the CEs,
397        // but we know that the string is well-formed and
398        // only contains supported characters and mappings.
399
400        // We might skip the secondary level but continue with the case level
401        // which is turned on separately.
402        if(CollationSettings.getStrength(options) >= Collator.SECONDARY) {
403            leftIndex = rightIndex = startIndex;
404            leftPair = rightPair = 0;
405            for(;;) {
406                while(leftPair == 0) {
407                    if(leftIndex == left.length()) {
408                        leftPair = EOS;
409                        break;
410                    }
411                    int c = left.charAt(leftIndex++);
412                    if(c <= LATIN_MAX) {
413                        leftPair = table[c];
414                    } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
415                        leftPair = table[c - PUNCT_START + LATIN_LIMIT];
416                    } else {
417                        leftPair = lookup(table, c);
418                    }
419                    if(leftPair >= MIN_SHORT) {
420                        leftPair = getSecondariesFromOneShortCE(leftPair);
421                        break;
422                    } else if(leftPair > variableTop) {
423                        leftPair = COMMON_SEC_PLUS_OFFSET;
424                        break;
425                    } else {
426                        long pairAndInc = nextPair(table, c, leftPair, left, leftIndex);
427                        if(pairAndInc < 0) {
428                            ++leftIndex;
429                            pairAndInc = ~pairAndInc;
430                        }
431                        leftPair = getSecondaries(variableTop, (int)pairAndInc);
432                    }
433                }
434
435                while(rightPair == 0) {
436                    if(rightIndex == right.length()) {
437                        rightPair = EOS;
438                        break;
439                    }
440                    int c = right.charAt(rightIndex++);
441                    if(c <= LATIN_MAX) {
442                        rightPair = table[c];
443                    } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
444                        rightPair = table[c - PUNCT_START + LATIN_LIMIT];
445                    } else {
446                        rightPair = lookup(table, c);
447                    }
448                    if(rightPair >= MIN_SHORT) {
449                        rightPair = getSecondariesFromOneShortCE(rightPair);
450                        break;
451                    } else if(rightPair > variableTop) {
452                        rightPair = COMMON_SEC_PLUS_OFFSET;
453                        break;
454                    } else {
455                        long pairAndInc = nextPair(table, c, rightPair, right, rightIndex);
456                        if(pairAndInc < 0) {
457                            ++rightIndex;
458                            pairAndInc = ~pairAndInc;
459                        }
460                        rightPair = getSecondaries(variableTop, (int)pairAndInc);
461                    }
462                }
463
464                if(leftPair == rightPair) {
465                    if(leftPair == EOS) { break; }
466                    leftPair = rightPair = 0;
467                    continue;
468                }
469                int leftSecondary = leftPair & 0xffff;
470                int rightSecondary = rightPair & 0xffff;
471                if(leftSecondary != rightSecondary) {
472                    if((options & CollationSettings.BACKWARD_SECONDARY) != 0) {
473                        // Full support for backwards secondary requires backwards contraction matching
474                        // and moving backwards between merge separators.
475                        return BAIL_OUT_RESULT;
476                    }
477                    return (leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER;
478                }
479                if(leftPair == EOS) { break; }
480                leftPair >>>= 16;
481                rightPair >>>= 16;
482            }
483        }
484
485        if((options & CollationSettings.CASE_LEVEL) != 0) {
486            boolean strengthIsPrimary = CollationSettings.getStrength(options) == Collator.PRIMARY;
487            leftIndex = rightIndex = startIndex;
488            leftPair = rightPair = 0;
489            for(;;) {
490                while(leftPair == 0) {
491                    if(leftIndex == left.length()) {
492                        leftPair = EOS;
493                        break;
494                    }
495                    int c = left.charAt(leftIndex++);
496                    leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
497                    if(leftPair < MIN_LONG) {
498                        long pairAndInc = nextPair(table, c, leftPair, left, leftIndex);
499                        if(pairAndInc < 0) {
500                            ++leftIndex;
501                            pairAndInc = ~pairAndInc;
502                        }
503                        leftPair = (int)pairAndInc;
504                    }
505                    leftPair = getCases(variableTop, strengthIsPrimary, leftPair);
506                }
507
508                while(rightPair == 0) {
509                    if(rightIndex == right.length()) {
510                        rightPair = EOS;
511                        break;
512                    }
513                    int c = right.charAt(rightIndex++);
514                    rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
515                    if(rightPair < MIN_LONG) {
516                        long pairAndInc = nextPair(table, c, rightPair, right, rightIndex);
517                        if(pairAndInc < 0) {
518                            ++rightIndex;
519                            pairAndInc = ~pairAndInc;
520                        }
521                        rightPair = (int)pairAndInc;
522                    }
523                    rightPair = getCases(variableTop, strengthIsPrimary, rightPair);
524                }
525
526                if(leftPair == rightPair) {
527                    if(leftPair == EOS) { break; }
528                    leftPair = rightPair = 0;
529                    continue;
530                }
531                int leftCase = leftPair & 0xffff;
532                int rightCase = rightPair & 0xffff;
533                if(leftCase != rightCase) {
534                    if((options & CollationSettings.UPPER_FIRST) == 0) {
535                        return (leftCase < rightCase) ? Collation.LESS : Collation.GREATER;
536                    } else {
537                        return (leftCase < rightCase) ? Collation.GREATER : Collation.LESS;
538                    }
539                }
540                if(leftPair == EOS) { break; }
541                leftPair >>>= 16;
542                rightPair >>>= 16;
543            }
544        }
545        if(CollationSettings.getStrength(options) <= Collator.SECONDARY) { return Collation.EQUAL; }
546
547        // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off.
548        boolean withCaseBits = CollationSettings.isTertiaryWithCaseBits(options);
549
550        leftIndex = rightIndex = startIndex;
551        leftPair = rightPair = 0;
552        for(;;) {
553            while(leftPair == 0) {
554                if(leftIndex == left.length()) {
555                    leftPair = EOS;
556                    break;
557                }
558                int c = left.charAt(leftIndex++);
559                leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
560                if(leftPair < MIN_LONG) {
561                    long pairAndInc = nextPair(table, c, leftPair, left, leftIndex);
562                    if(pairAndInc < 0) {
563                        ++leftIndex;
564                        pairAndInc = ~pairAndInc;
565                    }
566                    leftPair = (int)pairAndInc;
567                }
568                leftPair = getTertiaries(variableTop, withCaseBits, leftPair);
569            }
570
571            while(rightPair == 0) {
572                if(rightIndex == right.length()) {
573                    rightPair = EOS;
574                    break;
575                }
576                int c = right.charAt(rightIndex++);
577                rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
578                if(rightPair < MIN_LONG) {
579                    long pairAndInc = nextPair(table, c, rightPair, right, rightIndex);
580                    if(pairAndInc < 0) {
581                        ++rightIndex;
582                        pairAndInc = ~pairAndInc;
583                    }
584                    rightPair = (int)pairAndInc;
585                }
586                rightPair = getTertiaries(variableTop, withCaseBits, rightPair);
587            }
588
589            if(leftPair == rightPair) {
590                if(leftPair == EOS) { break; }
591                leftPair = rightPair = 0;
592                continue;
593            }
594            int leftTertiary = leftPair & 0xffff;
595            int rightTertiary = rightPair & 0xffff;
596            if(leftTertiary != rightTertiary) {
597                if(CollationSettings.sortsTertiaryUpperCaseFirst(options)) {
598                    // Pass through EOS and MERGE_WEIGHT
599                    // and keep real tertiary weights larger than the MERGE_WEIGHT.
600                    // Tertiary CEs (secondary ignorables) are not supported in fast Latin.
601                    if(leftTertiary > MERGE_WEIGHT) {
602                        leftTertiary ^= CASE_MASK;
603                    }
604                    if(rightTertiary > MERGE_WEIGHT) {
605                        rightTertiary ^= CASE_MASK;
606                    }
607                }
608                return (leftTertiary < rightTertiary) ? Collation.LESS : Collation.GREATER;
609            }
610            if(leftPair == EOS) { break; }
611            leftPair >>>= 16;
612            rightPair >>>= 16;
613        }
614        if(CollationSettings.getStrength(options) <= Collator.TERTIARY) { return Collation.EQUAL; }
615
616        leftIndex = rightIndex = startIndex;
617        leftPair = rightPair = 0;
618        for(;;) {
619            while(leftPair == 0) {
620                if(leftIndex == left.length()) {
621                    leftPair = EOS;
622                    break;
623                }
624                int c = left.charAt(leftIndex++);
625                leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
626                if(leftPair < MIN_LONG) {
627                    long pairAndInc = nextPair(table, c, leftPair, left, leftIndex);
628                    if(pairAndInc < 0) {
629                        ++leftIndex;
630                        pairAndInc = ~pairAndInc;
631                    }
632                    leftPair = (int)pairAndInc;
633                }
634                leftPair = getQuaternaries(variableTop, leftPair);
635            }
636
637            while(rightPair == 0) {
638                if(rightIndex == right.length()) {
639                    rightPair = EOS;
640                    break;
641                }
642                int c = right.charAt(rightIndex++);
643                rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
644                if(rightPair < MIN_LONG) {
645                    long pairAndInc = nextPair(table, c, rightPair, right, rightIndex);
646                    if(pairAndInc < 0) {
647                        ++rightIndex;
648                        pairAndInc = ~pairAndInc;
649                    }
650                    rightPair = (int)pairAndInc;
651                }
652                rightPair = getQuaternaries(variableTop, rightPair);
653            }
654
655            if(leftPair == rightPair) {
656                if(leftPair == EOS) { break; }
657                leftPair = rightPair = 0;
658                continue;
659            }
660            int leftQuaternary = leftPair & 0xffff;
661            int rightQuaternary = rightPair & 0xffff;
662            if(leftQuaternary != rightQuaternary) {
663                return (leftQuaternary < rightQuaternary) ? Collation.LESS : Collation.GREATER;
664            }
665            if(leftPair == EOS) { break; }
666            leftPair >>>= 16;
667            rightPair >>>= 16;
668        }
669        return Collation.EQUAL;
670    }
671
672    private static int lookup(char[] table, int c) {
673        assert(c > LATIN_MAX);
674        if(PUNCT_START <= c && c < PUNCT_LIMIT) {
675            return table[c - PUNCT_START + LATIN_LIMIT];
676        } else if(c == 0xfffe) {
677            return MERGE_WEIGHT;
678        } else if(c == 0xffff) {
679            return MAX_SHORT | COMMON_SEC | LOWER_CASE | COMMON_TER;
680        } else {
681            return BAIL_OUT;
682        }
683    }
684
685    /**
686     * Java returns a negative result (use the '~' operator) if sIndex is to be incremented.
687     * C++ modifies sIndex.
688     */
689    private static long nextPair(char[] table, int c, int ce, CharSequence s16, int sIndex) {
690        if(ce >= MIN_LONG || ce < CONTRACTION) {
691            return ce;  // simple or special mini CE
692        } else if(ce >= EXPANSION) {
693            int index = NUM_FAST_CHARS + (ce & INDEX_MASK);
694            return ((long)table[index + 1] << 16) | table[index];
695        } else /* ce >= CONTRACTION */ {
696            // Contraction list: Default mapping followed by
697            // 0 or more single-character contraction suffix mappings.
698            int index = NUM_FAST_CHARS + (ce & INDEX_MASK);
699            boolean inc = false;  // true if the next char is consumed.
700            if(sIndex != s16.length()) {
701                // Read the next character.
702                int c2;
703                int nextIndex = sIndex;
704                c2 = s16.charAt(nextIndex++);
705                if(c2 > LATIN_MAX) {
706                    if(PUNCT_START <= c2 && c2 < PUNCT_LIMIT) {
707                        c2 = c2 - PUNCT_START + LATIN_LIMIT;  // 2000..203F -> 0180..01BF
708                    } else if(c2 == 0xfffe || c2 == 0xffff) {
709                        c2 = -1;  // U+FFFE & U+FFFF cannot occur in contractions.
710                    } else {
711                        return BAIL_OUT;
712                    }
713                }
714                // Look for the next character in the contraction suffix list,
715                // which is in ascending order of single suffix characters.
716                int i = index;
717                int head = table[i];  // first skip the default mapping
718                int x;
719                do {
720                    i += head >> CONTR_LENGTH_SHIFT;
721                    head = table[i];
722                    x = head & CONTR_CHAR_MASK;
723                } while(x < c2);
724                if(x == c2) {
725                    index = i;
726                    inc = true;
727                }
728            }
729            // Return the CE or CEs for the default or contraction mapping.
730            int length = table[index] >> CONTR_LENGTH_SHIFT;
731            if(length == 1) {
732                return BAIL_OUT;
733            }
734            ce = table[index + 1];
735            long result;
736            if(length == 2) {
737                result = ce;
738            } else {
739                result = ((long)table[index + 2] << 16) | ce;
740            }
741            return inc ? ~result : result;
742        }
743    }
744
745    private static int getPrimaries(int variableTop, int pair) {
746        int ce = pair & 0xffff;
747        if(ce >= MIN_SHORT) { return pair & TWO_SHORT_PRIMARIES_MASK; }
748        if(ce > variableTop) { return pair & TWO_LONG_PRIMARIES_MASK; }
749        if(ce >= MIN_LONG) { return 0; }  // variable
750        return pair;  // special mini CE
751    }
752
753    private static int getSecondariesFromOneShortCE(int ce) {
754        ce &= SECONDARY_MASK;
755        if(ce < MIN_SEC_HIGH) {
756            return ce + SEC_OFFSET;
757        } else {
758            return ((ce + SEC_OFFSET) << 16) | COMMON_SEC_PLUS_OFFSET;
759        }
760    }
761
762    private static int getSecondaries(int variableTop, int pair) {
763        if(pair <= 0xffff) {
764            // one mini CE
765            if(pair >= MIN_SHORT) {
766                pair = getSecondariesFromOneShortCE(pair);
767            } else if(pair > variableTop) {
768                pair = COMMON_SEC_PLUS_OFFSET;
769            } else if(pair >= MIN_LONG) {
770                pair = 0;  // variable
771            }
772            // else special mini CE
773        } else {
774            int ce = pair & 0xffff;
775            if(ce >= MIN_SHORT) {
776                pair = (pair & TWO_SECONDARIES_MASK) + TWO_SEC_OFFSETS;
777            } else if(ce > variableTop) {
778                pair = TWO_COMMON_SEC_PLUS_OFFSET;
779            } else {
780                assert(ce >= MIN_LONG);
781                pair = 0;  // variable
782            }
783        }
784        return pair;
785    }
786
787    private static int getCases(int variableTop, boolean strengthIsPrimary, int pair) {
788        // Primary+caseLevel: Ignore case level weights of primary ignorables.
789        // Otherwise: Ignore case level weights of secondary ignorables.
790        // For details see the comments in the CollationCompare class.
791        // Tertiary CEs (secondary ignorables) are not supported in fast Latin.
792        if(pair <= 0xffff) {
793            // one mini CE
794            if(pair >= MIN_SHORT) {
795                // A high secondary weight means we really have two CEs,
796                // a primary CE and a secondary CE.
797                int ce = pair;
798                pair &= CASE_MASK;  // explicit weight of primary CE
799                if(!strengthIsPrimary && (ce & SECONDARY_MASK) >= MIN_SEC_HIGH) {
800                    pair |= LOWER_CASE << 16;  // implied weight of secondary CE
801                }
802            } else if(pair > variableTop) {
803                pair = LOWER_CASE;
804            } else if(pair >= MIN_LONG) {
805                pair = 0;  // variable
806            }
807            // else special mini CE
808        } else {
809            // two mini CEs, same primary groups, neither expands like above
810            int ce = pair & 0xffff;
811            if(ce >= MIN_SHORT) {
812                if(strengthIsPrimary && (pair & (SHORT_PRIMARY_MASK << 16)) == 0) {
813                    pair &= CASE_MASK;
814                } else {
815                    pair &= TWO_CASES_MASK;
816                }
817            } else if(ce > variableTop) {
818                pair = TWO_LOWER_CASES;
819            } else {
820                assert(ce >= MIN_LONG);
821                pair = 0;  // variable
822            }
823        }
824        return pair;
825    }
826
827    private static int getTertiaries(int variableTop, boolean withCaseBits, int pair) {
828        if(pair <= 0xffff) {
829            // one mini CE
830            if(pair >= MIN_SHORT) {
831                // A high secondary weight means we really have two CEs,
832                // a primary CE and a secondary CE.
833                int ce = pair;
834                if(withCaseBits) {
835                    pair = (pair & CASE_AND_TERTIARY_MASK) + TER_OFFSET;
836                    if((ce & SECONDARY_MASK) >= MIN_SEC_HIGH) {
837                        pair |= (LOWER_CASE | COMMON_TER_PLUS_OFFSET) << 16;
838                    }
839                } else {
840                    pair = (pair & TERTIARY_MASK) + TER_OFFSET;
841                    if((ce & SECONDARY_MASK) >= MIN_SEC_HIGH) {
842                        pair |= COMMON_TER_PLUS_OFFSET << 16;
843                    }
844                }
845            } else if(pair > variableTop) {
846                pair = (pair & TERTIARY_MASK) + TER_OFFSET;
847                if(withCaseBits) {
848                    pair |= LOWER_CASE;
849                }
850            } else if(pair >= MIN_LONG) {
851                pair = 0;  // variable
852            }
853            // else special mini CE
854        } else {
855            // two mini CEs, same primary groups, neither expands like above
856            int ce = pair & 0xffff;
857            if(ce >= MIN_SHORT) {
858                if(withCaseBits) {
859                    pair &= TWO_CASES_MASK | TWO_TERTIARIES_MASK;
860                } else {
861                    pair &= TWO_TERTIARIES_MASK;
862                }
863                pair += TWO_TER_OFFSETS;
864            } else if(ce > variableTop) {
865                pair = (pair & TWO_TERTIARIES_MASK) + TWO_TER_OFFSETS;
866                if(withCaseBits) {
867                    pair |= TWO_LOWER_CASES;
868                }
869            } else {
870                assert(ce >= MIN_LONG);
871                pair = 0;  // variable
872            }
873        }
874        return pair;
875    }
876
877    private static int getQuaternaries(int variableTop, int pair) {
878        // Return the primary weight of a variable CE,
879        // or the maximum primary weight for a non-variable, not-completely-ignorable CE.
880        if(pair <= 0xffff) {
881            // one mini CE
882            if(pair >= MIN_SHORT) {
883                // A high secondary weight means we really have two CEs,
884                // a primary CE and a secondary CE.
885                if((pair & SECONDARY_MASK) >= MIN_SEC_HIGH) {
886                    pair = TWO_SHORT_PRIMARIES_MASK;
887                } else {
888                    pair = SHORT_PRIMARY_MASK;
889                }
890            } else if(pair > variableTop) {
891                pair = SHORT_PRIMARY_MASK;
892            } else if(pair >= MIN_LONG) {
893                pair &= LONG_PRIMARY_MASK;  // variable
894            }
895            // else special mini CE
896        } else {
897            // two mini CEs, same primary groups, neither expands like above
898            int ce = pair & 0xffff;
899            if(ce > variableTop) {
900                pair = TWO_SHORT_PRIMARIES_MASK;
901            } else {
902                assert(ce >= MIN_LONG);
903                pair &= TWO_LONG_PRIMARIES_MASK;  // variable
904            }
905        }
906        return pair;
907    }
908
909    private CollationFastLatin() {}  // no constructor
910}
911