1/*
2 *******************************************************************************
3 * Copyright (C) 2012-2015, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 * CollationKeys.java, ported from collationkeys.h/.cpp
7 *
8 * C++ version created on: 2012sep02
9 * created by: Markus W. Scherer
10 */
11
12package com.ibm.icu.impl.coll;
13
14import com.ibm.icu.text.Collator;
15
16public final class CollationKeys /* all methods are static */ {
17
18    // Java porting note: C++ SortKeyByteSink class extends a common class ByteSink,
19    // which is not available in Java. We don't need a super class created for implementing
20    // collation features.
21    public static abstract class SortKeyByteSink {
22        protected byte[] buffer_;
23        // protected int capacity_; == buffer_.length
24        private int appended_ = 0;
25        // not used in Java -- private int ignore_ = 0;
26
27        public SortKeyByteSink(byte[] dest) {
28            buffer_ = dest;
29        }
30
31        /**
32         * Needed in Java for when we write to the buffer directly.
33         * In C++, the SortKeyByteSink is a subclass of ByteSink and lower-level code can write to that.
34         * TODO: Can we make Java SortKeyByteSink have-a ByteArrayWrapper and write through to it?
35         * Or maybe create interface ByteSink, have SortKeyByteSink implement it, and have BOCSU write to that??
36         */
37        public void setBufferAndAppended(byte[] dest, int app) {
38            buffer_ = dest;
39            appended_ = app;
40        }
41
42        /* not used in Java -- public void IgnoreBytes(int numIgnore) {
43            ignore_ = numIgnore;
44        } */
45
46        /**
47         * @param bytes
48         *            the array of byte
49         * @param n
50         *            the length of bytes to be appended
51         */
52        public void Append(byte[] bytes, int n) {
53            if (n <= 0 || bytes == null) {
54                return;
55            }
56
57            /* not used in Java -- if (ignore_ > 0) {
58                int ignoreRest = ignore_ - n;
59                if (ignoreRest >= 0) {
60                    ignore_ = ignoreRest;
61                    return;
62                } else {
63                    start = ignore_;
64                    n = -ignoreRest;
65                    ignore_ = 0;
66                }
67            } */
68
69            int length = appended_;
70            appended_ += n;
71
72            int available = buffer_.length - length;
73            if (n <= available) {
74                System.arraycopy(bytes, 0, buffer_, length, n);
75            } else {
76                AppendBeyondCapacity(bytes, 0, n, length);
77            }
78        }
79
80        public void Append(int b) {
81            /* not used in Java -- if (ignore_ > 0) {
82                --ignore_;
83            } else */ {
84                if (appended_ < buffer_.length || Resize(1, appended_)) {
85                    buffer_[appended_] = (byte) b;
86                }
87                ++appended_;
88            }
89        }
90
91        // Java porting note: This method is not used by collator implementation.
92        //
93        // virtual char *GetAppendBuffer(int min_capacity,
94        // int desired_capacity_hint,
95        // char *scratch, int scratch_capacity,
96        // int *result_capacity);
97
98        public int NumberOfBytesAppended() {
99            return appended_;
100        }
101
102        public int GetRemainingCapacity() {
103            return /* not used in Java -- ignore_ + */ buffer_.length - appended_;
104        }
105
106        public boolean Overflowed() {
107            return appended_ > buffer_.length;
108        }
109
110        /* not used in Java -- public boolean IsOk() {
111            return true;
112        } */
113
114        /**
115         * @param bytes
116         *            the array of byte
117         * @param start
118         *            the start index within the array to be appended
119         * @param n
120         *            the length of bytes to be appended
121         * @param length
122         *            the length of buffer required to store the entire data (i.e. already appended
123         *            bytes + bytes to be appended by this method)
124         */
125        protected abstract void AppendBeyondCapacity(byte[] bytes, int start, int n, int length);
126
127        protected abstract boolean Resize(int appendCapacity, int length);
128    }
129
130    public static class LevelCallback {
131        /**
132         * @param level
133         *            The next level about to be written to the ByteSink.
134         * @return true if the level is to be written (the base class implementation always returns
135         *         true)
136         */
137        boolean needToWrite(int level) {
138            return true;
139        }
140    }
141    public static final LevelCallback SIMPLE_LEVEL_FALLBACK = new LevelCallback();
142
143    private static final class SortKeyLevel {
144        private static final int INITIAL_CAPACITY = 40;
145
146        byte[] buffer = new byte[INITIAL_CAPACITY];
147        int len = 0;
148        // not used in Java -- private static final boolean ok = true;  // In C++ "ok" is reset when memory allocations fail.
149
150        SortKeyLevel() {
151        }
152
153        /* not used in Java -- boolean isOk() {
154            return ok;
155        } */
156
157        boolean isEmpty() {
158            return len == 0;
159        }
160
161        int length() {
162            return len;
163        }
164
165        // Java porting note: Java uses this instead of C++ operator [] overload
166        // uint8_t operator[](int index)
167        byte getAt(int index) {
168            return buffer[index];
169        }
170
171        byte[] data() {
172            return buffer;
173        }
174
175        void appendByte(int b) {
176            if (len < buffer.length || ensureCapacity(1)) {
177                buffer[len++] = (byte) b;
178            }
179        }
180
181        void appendWeight16(int w) {
182            assert ((w & 0xffff) != 0);
183            byte b0 = (byte) (w >>> 8);
184            byte b1 = (byte) w;
185            int appendLength = (b1 == 0) ? 1 : 2;
186            if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) {
187                buffer[len++] = b0;
188                if (b1 != 0) {
189                    buffer[len++] = b1;
190                }
191            }
192        }
193
194        void appendWeight32(long w) {
195            assert (w != 0);
196            byte[] bytes = new byte[] { (byte) (w >>> 24), (byte) (w >>> 16), (byte) (w >>> 8),
197                    (byte) w };
198            int appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4;
199            if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) {
200                buffer[len++] = bytes[0];
201                if (bytes[1] != 0) {
202                    buffer[len++] = bytes[1];
203                    if (bytes[2] != 0) {
204                        buffer[len++] = bytes[2];
205                        if (bytes[3] != 0) {
206                            buffer[len++] = bytes[3];
207                        }
208                    }
209                }
210            }
211        }
212
213        void appendReverseWeight16(int w) {
214            assert ((w & 0xffff) != 0);
215            byte b0 = (byte) (w >>> 8);
216            byte b1 = (byte) w;
217            int appendLength = (b1 == 0) ? 1 : 2;
218            if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) {
219                if (b1 == 0) {
220                    buffer[len++] = b0;
221                } else {
222                    buffer[len] = b1;
223                    buffer[len + 1] = b0;
224                    len += 2;
225                }
226            }
227        }
228
229        // Appends all but the last byte to the sink. The last byte should be the 01 terminator.
230        void appendTo(SortKeyByteSink sink) {
231            assert (len > 0 && buffer[len - 1] == 1);
232            sink.Append(buffer, len - 1);
233        }
234
235        private boolean ensureCapacity(int appendCapacity) {
236            /* not used in Java -- if (!ok) {
237                return false;
238            } */
239            int newCapacity = 2 * buffer.length;
240            int altCapacity = len + 2 * appendCapacity;
241            if (newCapacity < altCapacity) {
242                newCapacity = altCapacity;
243            }
244            if (newCapacity < 200) {
245                newCapacity = 200;
246            }
247            byte[] newbuf = new byte[newCapacity];
248            System.arraycopy(buffer, 0, newbuf, 0, len);
249            buffer = newbuf;
250
251            return true;
252        }
253    }
254
255    private static SortKeyLevel getSortKeyLevel(int levels, int level) {
256        return (levels & level) != 0 ? new SortKeyLevel() : null;
257    }
258
259    private CollationKeys() {
260    } // no instantiation
261
262    // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
263    private static final int SEC_COMMON_LOW = Collation.COMMON_BYTE;
264    private static final int SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
265    static final int SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40; // read by CollationDataReader
266    private static final int SEC_COMMON_MAX_COUNT = 0x21;
267
268    // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
269    private static final int CASE_LOWER_FIRST_COMMON_LOW = 1;
270    private static final int CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
271    private static final int CASE_LOWER_FIRST_COMMON_HIGH = 13;
272    private static final int CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;
273
274    // Case level, upperFirst: Compress up to 13 common weights as 3..15.
275    private static final int CASE_UPPER_FIRST_COMMON_LOW = 3;
276    @SuppressWarnings("unused")
277    private static final int CASE_UPPER_FIRST_COMMON_HIGH = 15;
278    private static final int CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;
279
280    // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
281    private static final int TER_ONLY_COMMON_LOW = Collation.COMMON_BYTE;
282    private static final int TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
283    private static final int TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
284    private static final int TER_ONLY_COMMON_MAX_COUNT = 0x61;
285
286    // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
287    private static final int TER_LOWER_FIRST_COMMON_LOW = Collation.COMMON_BYTE;
288    private static final int TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
289    private static final int TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
290    private static final int TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;
291
292    // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
293    private static final int TER_UPPER_FIRST_COMMON_LOW = Collation.COMMON_BYTE + 0x80;
294    private static final int TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
295    private static final int TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
296    private static final int TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;
297
298    // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
299    private static final int QUAT_COMMON_LOW = 0x1c;
300    private static final int QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
301    private static final int QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
302    private static final int QUAT_COMMON_MAX_COUNT = 0x71;
303    // Primary weights shifted to quaternary level must be encoded with
304    // a lead byte below the common-weight compression range.
305    private static final int QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b
306
307    /**
308     * Map from collation strength (UColAttributeValue) to a mask of Collation.Level bits up to that
309     * strength, excluding the CASE_LEVEL which is independent of the strength, and excluding
310     * IDENTICAL_LEVEL which this function does not write.
311     */
312    private static final int levelMasks[] = new int[] {
313        2,          // UCOL_PRIMARY -> PRIMARY_LEVEL
314        6,          // UCOL_SECONDARY -> up to SECONDARY_LEVEL
315        0x16,       // UCOL_TERTIARY -> up to TERTIARY_LEVEL
316        0x36,       // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL
317        0, 0, 0, 0,
318        0, 0, 0, 0,
319        0, 0, 0,
320        0x36        // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL
321    };
322
323    /**
324     * Writes the sort key bytes for minLevel up to the iterator data's strength. Optionally writes
325     * the case level. Stops writing levels when callback.needToWrite(level) returns false.
326     * Separates levels with the LEVEL_SEPARATOR_BYTE but does not write a TERMINATOR_BYTE.
327     */
328    public static void writeSortKeyUpToQuaternary(CollationIterator iter, boolean[] compressibleBytes,
329            CollationSettings settings, SortKeyByteSink sink, int minLevel, LevelCallback callback,
330            boolean preflight) {
331
332        int options = settings.options;
333        // Set of levels to process and write.
334        int levels = levelMasks[CollationSettings.getStrength(options)];
335        if ((options & CollationSettings.CASE_LEVEL) != 0) {
336            levels |= Collation.CASE_LEVEL_FLAG;
337        }
338        // Minus the levels below minLevel.
339        levels &= ~((1 << minLevel) - 1);
340        if (levels == 0) {
341            return;
342        }
343
344        long variableTop;
345        if ((options & CollationSettings.ALTERNATE_MASK) == 0) {
346            variableTop = 0;
347        } else {
348            // +1 so that we can use "<" and primary ignorables test out early.
349            variableTop = settings.variableTop + 1;
350        }
351
352        int tertiaryMask = CollationSettings.getTertiaryMask(options);
353
354        byte[] p234 = new byte[3];
355        SortKeyLevel cases = getSortKeyLevel(levels, Collation.CASE_LEVEL_FLAG);
356        SortKeyLevel secondaries = getSortKeyLevel(levels, Collation.SECONDARY_LEVEL_FLAG);
357        SortKeyLevel tertiaries = getSortKeyLevel(levels, Collation.TERTIARY_LEVEL_FLAG);
358        SortKeyLevel quaternaries = getSortKeyLevel(levels, Collation.QUATERNARY_LEVEL_FLAG);
359
360        long prevReorderedPrimary = 0;  // 0==no compression
361        int commonCases = 0;
362        int commonSecondaries = 0;
363        int commonTertiaries = 0;
364        int commonQuaternaries = 0;
365
366        int prevSecondary = 0;
367        int secSegmentStart = 0;
368
369        for (;;) {
370            // No need to keep all CEs in the buffer when we write a sort key.
371            iter.clearCEsIfNoneRemaining();
372            long ce = iter.nextCE();
373            long p = ce >>> 32;
374            if (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY) {
375                // Variable CE, shift it to quaternary level.
376                // Ignore all following primary ignorables, and shift further variable CEs.
377                if (commonQuaternaries != 0) {
378                    --commonQuaternaries;
379                    while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
380                        quaternaries.appendByte(QUAT_COMMON_MIDDLE);
381                        commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
382                    }
383                    // Shifted primary weights are lower than the common weight.
384                    quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries);
385                    commonQuaternaries = 0;
386                }
387                do {
388                    if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) {
389                        if (settings.hasReordering()) {
390                            p = settings.reorder(p);
391                        }
392                        if (((int) p >>> 24) >= QUAT_SHIFTED_LIMIT_BYTE) {
393                            // Prevent shifted primary lead bytes from
394                            // overlapping with the common compression range.
395                            quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE);
396                        }
397                        quaternaries.appendWeight32(p);
398                    }
399                    do {
400                        ce = iter.nextCE();
401                        p = ce >>> 32;
402                    } while (p == 0);
403                } while (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY);
404            }
405            // ce could be primary ignorable, or NO_CE, or the merge separator,
406            // or a regular primary CE, but it is not variable.
407            // If ce==NO_CE, then write nothing for the primary level but
408            // terminate compression on all levels and then exit the loop.
409            if (p > Collation.NO_CE_PRIMARY && (levels & Collation.PRIMARY_LEVEL_FLAG) != 0) {
410                // Test the un-reordered primary for compressibility.
411                boolean isCompressible = compressibleBytes[(int) p >>> 24];
412                if(settings.hasReordering()) {
413                    p = settings.reorder(p);
414                }
415                int p1 = (int) p >>> 24;
416                if (!isCompressible || p1 != ((int) prevReorderedPrimary >>> 24)) {
417                    if (prevReorderedPrimary != 0) {
418                        if (p < prevReorderedPrimary) {
419                            // No primary compression terminator
420                            // at the end of the level or merged segment.
421                            if (p1 > Collation.MERGE_SEPARATOR_BYTE) {
422                                sink.Append(Collation.PRIMARY_COMPRESSION_LOW_BYTE);
423                            }
424                        } else {
425                            sink.Append(Collation.PRIMARY_COMPRESSION_HIGH_BYTE);
426                        }
427                    }
428                    sink.Append(p1);
429                    if(isCompressible) {
430                        prevReorderedPrimary = p;
431                    } else {
432                        prevReorderedPrimary = 0;
433                    }
434                }
435                byte p2 = (byte) (p >>> 16);
436                if (p2 != 0) {
437                    p234[0] = p2;
438                    p234[1] = (byte) (p >>> 8);
439                    p234[2] = (byte) p;
440                    sink.Append(p234, (p234[1] == 0) ? 1 : (p234[2] == 0) ? 2 : 3);
441                }
442                // Optimization for internalNextSortKeyPart():
443                // When the primary level overflows we can stop because we need not
444                // calculate (preflight) the whole sort key length.
445                if (!preflight && sink.Overflowed()) {
446                    // not used in Java -- if (!sink.IsOk()) {
447                    // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in
448                    // C implementation. IsOk() in Java always returns true, so this
449                    // is a dead code.
450                    return;
451                }
452            }
453
454            int lower32 = (int) ce;
455            if (lower32 == 0) {
456                continue;
457            } // completely ignorable, no secondary/case/tertiary/quaternary
458
459            if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) {
460                int s = lower32 >>> 16;  // 16 bits
461                if (s == 0) {
462                    // secondary ignorable
463                } else if (s == Collation.COMMON_WEIGHT16 &&
464                        ((options & CollationSettings.BACKWARD_SECONDARY) == 0 ||
465                            p != Collation.MERGE_SEPARATOR_PRIMARY)) {
466                    // s is a common secondary weight, and
467                    // backwards-secondary is off or the ce is not the merge separator.
468                    ++commonSecondaries;
469                } else if ((options & CollationSettings.BACKWARD_SECONDARY) == 0) {
470                    if (commonSecondaries != 0) {
471                        --commonSecondaries;
472                        while (commonSecondaries >= SEC_COMMON_MAX_COUNT) {
473                            secondaries.appendByte(SEC_COMMON_MIDDLE);
474                            commonSecondaries -= SEC_COMMON_MAX_COUNT;
475                        }
476                        int b;
477                        if (s < Collation.COMMON_WEIGHT16) {
478                            b = SEC_COMMON_LOW + commonSecondaries;
479                        } else {
480                            b = SEC_COMMON_HIGH - commonSecondaries;
481                        }
482                        secondaries.appendByte(b);
483                        commonSecondaries = 0;
484                    }
485                    secondaries.appendWeight16(s);
486                } else {
487                    if (commonSecondaries != 0) {
488                        --commonSecondaries;
489                        // Append reverse weights. The level will be re-reversed later.
490                        int remainder = commonSecondaries % SEC_COMMON_MAX_COUNT;
491                        int b;
492                        if (prevSecondary < Collation.COMMON_WEIGHT16) {
493                            b = SEC_COMMON_LOW + remainder;
494                        } else {
495                            b = SEC_COMMON_HIGH - remainder;
496                        }
497                        secondaries.appendByte(b);
498                        commonSecondaries -= remainder;
499                        // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT.
500                        while (commonSecondaries > 0) { // same as >= SEC_COMMON_MAX_COUNT
501                            secondaries.appendByte(SEC_COMMON_MIDDLE);
502                            commonSecondaries -= SEC_COMMON_MAX_COUNT;
503                        }
504                        // commonSecondaries == 0
505                    }
506                    if (0 < p && p <= Collation.MERGE_SEPARATOR_PRIMARY) {
507                        // The backwards secondary level compares secondary weights backwards
508                        // within segments separated by the merge separator (U+FFFE).
509                        byte[] secs = secondaries.data();
510                        int last = secondaries.length() - 1;
511                        while (secSegmentStart < last) {
512                            byte b = secs[secSegmentStart];
513                            secs[secSegmentStart++] = secs[last];
514                            secs[last--] = b;
515                        }
516                        secondaries.appendByte(p == Collation.NO_CE_PRIMARY ?
517                            Collation.LEVEL_SEPARATOR_BYTE : Collation.MERGE_SEPARATOR_BYTE);
518                        prevSecondary = 0;
519                        secSegmentStart = secondaries.length();
520                    } else {
521                        secondaries.appendReverseWeight16(s);
522                        prevSecondary = s;
523                    }
524                }
525            }
526
527            if ((levels & Collation.CASE_LEVEL_FLAG) != 0) {
528                if ((CollationSettings.getStrength(options) == Collator.PRIMARY) ? p == 0
529                        : (lower32 >>> 16) == 0) {
530                    // Primary+caseLevel: Ignore case level weights of primary ignorables.
531                    // Otherwise: Ignore case level weights of secondary ignorables.
532                    // For details see the comments in the CollationCompare class.
533                } else {
534                    int c = (lower32 >>> 8) & 0xff; // case bits & tertiary lead byte
535                    assert ((c & 0xc0) != 0xc0);
536                    if ((c & 0xc0) == 0 && c > Collation.LEVEL_SEPARATOR_BYTE) {
537                        ++commonCases;
538                    } else {
539                        if ((options & CollationSettings.UPPER_FIRST) == 0) {
540                            // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14,
541                            // upper=15.
542                            // If there are only common (=lowest) weights in the whole level,
543                            // then we need not write anything.
544                            // Level length differences are handled already on the next-higher level.
545                            if (commonCases != 0 &&
546                                    (c > Collation.LEVEL_SEPARATOR_BYTE || !cases.isEmpty())) {
547                                --commonCases;
548                                while (commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) {
549                                    cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);
550                                    commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;
551                                }
552                                int b;
553                                if (c <= Collation.LEVEL_SEPARATOR_BYTE) {
554                                    b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;
555                                } else {
556                                    b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;
557                                }
558                                cases.appendByte(b << 4);
559                                commonCases = 0;
560                            }
561                            if (c > Collation.LEVEL_SEPARATOR_BYTE) {
562                                c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >>> 6)) << 4; // 14 or 15
563                            }
564                        } else {
565                            // upperFirst: Compress common weights to nibbles 3..15, mixed=2,
566                            // upper=1.
567                            // The compressed common case weights only go up from the "low" value
568                            // because with upperFirst the common weight is the highest one.
569                            if (commonCases != 0) {
570                                --commonCases;
571                                while (commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) {
572                                    cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4);
573                                    commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT;
574                                }
575                                cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);
576                                commonCases = 0;
577                            }
578                            if (c > Collation.LEVEL_SEPARATOR_BYTE) {
579                                c = (CASE_UPPER_FIRST_COMMON_LOW - (c >>> 6)) << 4; // 2 or 1
580                            }
581                        }
582                        // c is a separator byte 01,
583                        // or a left-shifted nibble 0x10, 0x20, ... 0xf0.
584                        cases.appendByte(c);
585                    }
586                }
587            }
588
589            if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) {
590                int t = lower32 & tertiaryMask;
591                assert ((lower32 & 0xc000) != 0xc000);
592                if (t == Collation.COMMON_WEIGHT16) {
593                    ++commonTertiaries;
594                } else if ((tertiaryMask & 0x8000) == 0) {
595                    // Tertiary weights without case bits.
596                    // Move lead bytes 06..3F to C6..FF for a large common-weight range.
597                    if (commonTertiaries != 0) {
598                        --commonTertiaries;
599                        while (commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) {
600                            tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE);
601                            commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT;
602                        }
603                        int b;
604                        if (t < Collation.COMMON_WEIGHT16) {
605                            b = TER_ONLY_COMMON_LOW + commonTertiaries;
606                        } else {
607                            b = TER_ONLY_COMMON_HIGH - commonTertiaries;
608                        }
609                        tertiaries.appendByte(b);
610                        commonTertiaries = 0;
611                    }
612                    if (t > Collation.COMMON_WEIGHT16) {
613                        t += 0xc000;
614                    }
615                    tertiaries.appendWeight16(t);
616                } else if ((options & CollationSettings.UPPER_FIRST) == 0) {
617                    // Tertiary weights with caseFirst=lowerFirst.
618                    // Move lead bytes 06..BF to 46..FF for the common-weight range.
619                    if (commonTertiaries != 0) {
620                        --commonTertiaries;
621                        while (commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) {
622                            tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE);
623                            commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT;
624                        }
625                        int b;
626                        if (t < Collation.COMMON_WEIGHT16) {
627                            b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries;
628                        } else {
629                            b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries;
630                        }
631                        tertiaries.appendByte(b);
632                        commonTertiaries = 0;
633                    }
634                    if (t > Collation.COMMON_WEIGHT16) {
635                        t += 0x4000;
636                    }
637                    tertiaries.appendWeight16(t);
638                } else {
639                    // Tertiary weights with caseFirst=upperFirst.
640                    // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
641                    // to keep tertiary CEs well-formed.
642                    // Their case+tertiary weights must be greater than those of
643                    // primary and secondary CEs.
644                    //
645                    // Separator         01 -> 01      (unchanged)
646                    // Lowercase     02..04 -> 82..84  (includes uncased)
647                    // Common weight     05 -> 85..C5  (common-weight compression range)
648                    // Lowercase     06..3F -> C6..FF
649                    // Mixed case    42..7F -> 42..7F
650                    // Uppercase     82..BF -> 02..3F
651                    // Tertiary CE   86..BF -> C6..FF
652                    if (t <= Collation.NO_CE_WEIGHT16) {
653                        // Keep separators unchanged.
654                    } else if ((lower32 >>> 16) != 0) {
655                        // Invert case bits of primary & secondary CEs.
656                        t ^= 0xc000;
657                        if (t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) {
658                            t -= 0x4000;
659                        }
660                    } else {
661                        // Keep uppercase bits of tertiary CEs.
662                        assert (0x8600 <= t && t <= 0xbfff);
663                        t += 0x4000;
664                    }
665                    if (commonTertiaries != 0) {
666                        --commonTertiaries;
667                        while (commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) {
668                            tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE);
669                            commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT;
670                        }
671                        int b;
672                        if (t < (TER_UPPER_FIRST_COMMON_LOW << 8)) {
673                            b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries;
674                        } else {
675                            b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries;
676                        }
677                        tertiaries.appendByte(b);
678                        commonTertiaries = 0;
679                    }
680                    tertiaries.appendWeight16(t);
681                }
682            }
683
684            if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) {
685                int q = lower32 & 0xffff;
686                if ((q & 0xc0) == 0 && q > Collation.NO_CE_WEIGHT16) {
687                    ++commonQuaternaries;
688                } else if (q == Collation.NO_CE_WEIGHT16
689                        && (options & CollationSettings.ALTERNATE_MASK) == 0
690                        && quaternaries.isEmpty()) {
691                    // If alternate=non-ignorable and there are only common quaternary weights,
692                    // then we need not write anything.
693                    // The only weights greater than the merge separator and less than the common
694                    // weight
695                    // are shifted primary weights, which are not generated for
696                    // alternate=non-ignorable.
697                    // There are also exactly as many quaternary weights as tertiary weights,
698                    // so level length differences are handled already on tertiary level.
699                    // Any above-common quaternary weight will compare greater regardless.
700                    quaternaries.appendByte(Collation.LEVEL_SEPARATOR_BYTE);
701                } else {
702                    if (q == Collation.NO_CE_WEIGHT16) {
703                        q = Collation.LEVEL_SEPARATOR_BYTE;
704                    } else {
705                        q = 0xfc + ((q >>> 6) & 3);
706                    }
707                    if (commonQuaternaries != 0) {
708                        --commonQuaternaries;
709                        while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
710                            quaternaries.appendByte(QUAT_COMMON_MIDDLE);
711                            commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
712                        }
713                        int b;
714                        if (q < QUAT_COMMON_LOW) {
715                            b = QUAT_COMMON_LOW + commonQuaternaries;
716                        } else {
717                            b = QUAT_COMMON_HIGH - commonQuaternaries;
718                        }
719                        quaternaries.appendByte(b);
720                        commonQuaternaries = 0;
721                    }
722                    quaternaries.appendByte(q);
723                }
724            }
725
726            if ((lower32 >>> 24) == Collation.LEVEL_SEPARATOR_BYTE) {
727                break;
728            } // ce == NO_CE
729        }
730
731        // Append the beyond-primary levels.
732        // not used in Java -- boolean ok = true;
733        if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) {
734            if (!callback.needToWrite(Collation.SECONDARY_LEVEL)) {
735                return;
736            }
737            // not used in Java -- ok &= secondaries.isOk();
738            sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
739            secondaries.appendTo(sink);
740        }
741
742        if ((levels & Collation.CASE_LEVEL_FLAG) != 0) {
743            if (!callback.needToWrite(Collation.CASE_LEVEL)) {
744                return;
745            }
746            // not used in Java -- ok &= cases.isOk();
747            sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
748            // Write pairs of nibbles as bytes, except separator bytes as themselves.
749            int length = cases.length() - 1; // Ignore the trailing NO_CE.
750            byte b = 0;
751            for (int i = 0; i < length; ++i) {
752                byte c = cases.getAt(i);
753                assert ((c & 0xf) == 0 && c != 0);
754                if (b == 0) {
755                    b = c;
756                } else {
757                    sink.Append(b | ((c >> 4) & 0xf));
758                    b = 0;
759                }
760            }
761            if (b != 0) {
762                sink.Append(b);
763            }
764        }
765
766        if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) {
767            if (!callback.needToWrite(Collation.TERTIARY_LEVEL)) {
768                return;
769            }
770            // not used in Java -- ok &= tertiaries.isOk();
771            sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
772            tertiaries.appendTo(sink);
773        }
774
775        if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) {
776            if (!callback.needToWrite(Collation.QUATERNARY_LEVEL)) {
777                return;
778            }
779            // not used in Java -- ok &= quaternaries.isOk();
780            sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
781            quaternaries.appendTo(sink);
782        }
783
784        // not used in Java -- if (!ok || !sink.IsOk()) {
785        // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in
786        // C implementation. IsOk() in Java always returns true, so this
787        // is a dead code.
788    }
789}
790