1/*
2 *******************************************************************************
3 * Copyright (C) 1996-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 */
7
8package com.ibm.icu.impl;
9
10import java.io.IOException;
11import java.nio.ByteBuffer;
12import java.util.Locale;
13import java.util.MissingResourceException;
14
15import com.ibm.icu.lang.UCharacter;
16import com.ibm.icu.lang.UCharacterCategory;
17import com.ibm.icu.text.UTF16;
18import com.ibm.icu.text.UnicodeSet;
19
20/**
21* Internal class to manage character names.
22* Since data for names are stored
23* in an array of char, by default indexes used in this class is refering to
24* a 2 byte count, unless otherwise stated. Cases where the index is refering
25* to a byte count, the index is halved and depending on whether the index is
26* even or odd, the MSB or LSB of the result char at the halved index is
27* returned. For indexes to an array of int, the index is multiplied by 2,
28* result char at the multiplied index and its following char is returned as an
29* int.
30* <a href=../lang/UCharacter.html>UCharacter</a> acts as a public facade for this class
31* Note : 0 - 0x1F are control characters without names in Unicode 3.0
32* @author Syn Wee Quek
33* @since nov0700
34*/
35
36public final class UCharacterName
37{
38    // public data members ----------------------------------------------
39
40    /*
41     * public singleton instance
42     */
43    public static final UCharacterName INSTANCE;
44
45    static {
46        try {
47            INSTANCE = new UCharacterName();
48        } catch (IOException e) {
49            ///CLOVER:OFF
50            throw new MissingResourceException("Could not construct UCharacterName. Missing unames.icu","","");
51            ///CLOVER:ON
52        }
53    }
54
55    /**
56    * Number of lines per group
57    * 1 << GROUP_SHIFT_
58    */
59    public static final int LINES_PER_GROUP_ = 1 << 5;
60    /**
61     * Maximum number of groups
62     */
63    public int m_groupcount_ = 0;
64
65    // public methods ---------------------------------------------------
66
67    /**
68    * Retrieve the name of a Unicode code point.
69    * Depending on <code>choice</code>, the character name written into the
70    * buffer is the "modern" name or the name that was defined in Unicode
71    * version 1.0.
72    * The name contains only "invariant" characters
73    * like A-Z, 0-9, space, and '-'.
74    *
75    * @param ch the code point for which to get the name.
76    * @param choice Selector for which name to get.
77    * @return if code point is above 0x1fff, null is returned
78    */
79    public String getName(int ch, int choice)
80    {
81        if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE ||
82            choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) {
83            return null;
84        }
85
86        String result = null;
87
88        result = getAlgName(ch, choice);
89
90        // getting normal character name
91        if (result == null || result.length() == 0) {
92            if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
93                result = getExtendedName(ch);
94            } else {
95                result = getGroupName(ch, choice);
96            }
97        }
98
99        return result;
100    }
101
102    /**
103    * Find a character by its name and return its code point value
104    * @param choice selector to indicate if argument name is a Unicode 1.0
105    *        or the most current version
106    * @param name the name to search for
107    * @return code point
108    */
109    public int getCharFromName(int choice, String name)
110    {
111        // checks for illegal arguments
112        if (choice >= UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT ||
113            name == null || name.length() == 0) {
114            return -1;
115        }
116
117        // try extended names first
118        int result = getExtendedChar(name.toLowerCase(Locale.ENGLISH), choice);
119        if (result >= -1) {
120            return result;
121        }
122
123        String upperCaseName = name.toUpperCase(Locale.ENGLISH);
124        // try algorithmic names first, if fails then try group names
125        // int result = getAlgorithmChar(choice, uppercasename);
126
127        if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME ||
128            choice == UCharacterNameChoice.EXTENDED_CHAR_NAME
129        ) {
130            int count = 0;
131            if (m_algorithm_ != null) {
132                count = m_algorithm_.length;
133            }
134            for (count --; count >= 0; count --) {
135                result = m_algorithm_[count].getChar(upperCaseName);
136                if (result >= 0) {
137                    return result;
138                }
139            }
140        }
141
142        if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
143            result = getGroupChar(upperCaseName,
144                                  UCharacterNameChoice.UNICODE_CHAR_NAME);
145            if (result == -1) {
146                result = getGroupChar(upperCaseName,
147                                      UCharacterNameChoice.CHAR_NAME_ALIAS);
148            }
149        }
150        else {
151            result = getGroupChar(upperCaseName, choice);
152        }
153        return result;
154    }
155
156    // these are all UCharacterNameIterator use methods -------------------
157
158    /**
159    * Reads a block of compressed lengths of 32 strings and expands them into
160    * offsets and lengths for each string. Lengths are stored with a
161    * variable-width encoding in consecutive nibbles:
162    * If a nibble<0xc, then it is the length itself (0 = empty string).
163    * If a nibble>=0xc, then it forms a length value with the following
164    * nibble.
165    * The offsets and lengths arrays must be at least 33 (one more) long
166    * because there is no check here at the end if the last nibble is still
167    * used.
168    * @param index of group string object in array
169    * @param offsets array to store the value of the string offsets
170    * @param lengths array to store the value of the string length
171    * @return next index of the data string immediately after the lengths
172    *         in terms of byte address
173    */
174    public int getGroupLengths(int index, char offsets[], char lengths[])
175    {
176        char length = 0xffff;
177        byte b = 0,
178            n = 0;
179        int shift;
180        index = index * m_groupsize_; // byte count offsets of group strings
181        int stringoffset = UCharacterUtility.toInt(
182                                 m_groupinfo_[index + OFFSET_HIGH_OFFSET_],
183                                 m_groupinfo_[index + OFFSET_LOW_OFFSET_]);
184
185        offsets[0] = 0;
186
187        // all 32 lengths must be read to get the offset of the first group
188        // string
189        for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) {
190            b = m_groupstring_[stringoffset];
191            shift = 4;
192
193            while (shift >= 0) {
194                // getting nibble
195                n = (byte)((b >> shift) & 0x0F);
196                if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) {
197                    length = (char)((n - 12) << 4);
198                }
199                else {
200                    if (length != 0xffff) {
201                       lengths[i] = (char)((length | n) + 12);
202                    }
203                    else {
204                       lengths[i] = (char)n;
205                    }
206
207                    if (i < LINES_PER_GROUP_) {
208                       offsets[i + 1] = (char)(offsets[i] + lengths[i]);
209                    }
210
211                    length = 0xffff;
212                    i ++;
213                }
214
215                shift -= 4;
216            }
217        }
218        return stringoffset;
219    }
220
221    /**
222    * Gets the name of the argument group index.
223    * UnicodeData.txt uses ';' as a field separator, so no field can contain
224    * ';' as part of its contents. In unames.icu, it is marked as
225    * token[';'] == -1 only if the semicolon is used in the data file - which
226    * is iff we have Unicode 1.0 names or ISO comments or aliases.
227    * So, it will be token[';'] == -1 if we store U1.0 names/ISO comments/aliases
228    * although we know that it will never be part of a name.
229    * Equivalent to ICU4C's expandName.
230    * @param index of the group name string in byte count
231    * @param length of the group name string
232    * @param choice of Unicode 1.0 name or the most current name
233    * @return name of the group
234    */
235    public String getGroupName(int index, int length, int choice)
236    {
237        if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME &&
238            choice != UCharacterNameChoice.EXTENDED_CHAR_NAME
239        ) {
240            if (';' >= m_tokentable_.length || m_tokentable_[';'] == 0xFFFF) {
241                /*
242                 * skip the modern name if it is not requested _and_
243                 * if the semicolon byte value is a character, not a token number
244                 */
245                int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice;
246                do {
247                    int oldindex = index;
248                    index += UCharacterUtility.skipByteSubString(m_groupstring_,
249                                                       index, length, (byte)';');
250                    length -= (index - oldindex);
251                } while(--fieldIndex>0);
252            }
253            else {
254                // the semicolon byte is a token number, therefore only modern
255                // names are stored in unames.dat and there is no such
256                // requested alternate name here
257                length = 0;
258            }
259        }
260
261        synchronized (m_utilStringBuffer_) {
262            m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
263            byte b;
264            char token;
265            for (int i = 0; i < length;) {
266                b = m_groupstring_[index + i];
267                i ++;
268
269                if (b >= m_tokentable_.length) {
270                    if (b == ';') {
271                        break;
272                    }
273                    m_utilStringBuffer_.append(b); // implicit letter
274                }
275                else {
276                    token = m_tokentable_[b & 0x00ff];
277                    if (token == 0xFFFE) {
278                        // this is a lead byte for a double-byte token
279                        token = m_tokentable_[b << 8 |
280                                          (m_groupstring_[index + i] & 0x00ff)];
281                        i ++;
282                    }
283                    if (token == 0xFFFF) {
284                        if (b == ';') {
285                            // skip the semicolon if we are seeking extended
286                            // names and there was no 2.0 name but there
287                            // is a 1.0 name.
288                            if (m_utilStringBuffer_.length() == 0 && choice ==
289                                   UCharacterNameChoice.EXTENDED_CHAR_NAME) {
290                                continue;
291                            }
292                            break;
293                        }
294                        // explicit letter
295                        m_utilStringBuffer_.append((char)(b & 0x00ff));
296                    }
297                    else { // write token word
298                        UCharacterUtility.getNullTermByteSubString(
299                                m_utilStringBuffer_, m_tokenstring_, token);
300                    }
301                }
302            }
303
304            if (m_utilStringBuffer_.length() > 0) {
305                return m_utilStringBuffer_.toString();
306            }
307        }
308        return null;
309    }
310
311    /**
312    * Retrieves the extended name
313    */
314    public String getExtendedName(int ch)
315    {
316        String result = getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
317        if (result == null) {
318            // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F.
319            result = getExtendedOr10Name(ch);
320        }
321        return result;
322    }
323
324    /**
325     * Gets the group index for the codepoint, or the group before it.
326     * @param codepoint The codepoint index.
327     * @return group index containing codepoint or the group before it.
328     */
329    public int getGroup(int codepoint)
330    {
331        int endGroup = m_groupcount_;
332        int msb      = getCodepointMSB(codepoint);
333        int result   = 0;
334        // binary search for the group of names that contains the one for
335        // code
336        // find the group that contains codepoint, or the highest before it
337        while (result < endGroup - 1) {
338            int gindex = (result + endGroup) >> 1;
339            if (msb < getGroupMSB(gindex)) {
340                endGroup = gindex;
341            }
342            else {
343                result = gindex;
344            }
345        }
346        return result;
347    }
348
349    /**
350     * Gets the extended and 1.0 name when the most current unicode names
351     * fail
352     * @param ch codepoint
353     * @return name of codepoint extended or 1.0
354     */
355    public String getExtendedOr10Name(int ch)
356    {
357        String result = null;
358        // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F.
359        if (result == null) {
360            int type = getType(ch);
361            // Return unknown if the table of names above is not up to
362            // date.
363            if (type >= TYPE_NAMES_.length) {
364                result = UNKNOWN_TYPE_NAME_;
365            }
366            else {
367                result = TYPE_NAMES_[type];
368            }
369            synchronized (m_utilStringBuffer_) {
370                m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
371                m_utilStringBuffer_.append('<');
372                m_utilStringBuffer_.append(result);
373                m_utilStringBuffer_.append('-');
374                String chStr = Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
375                int zeros = 4 - chStr.length();
376                while (zeros > 0) {
377                    m_utilStringBuffer_.append('0');
378                    zeros --;
379                }
380                m_utilStringBuffer_.append(chStr);
381                m_utilStringBuffer_.append('>');
382                result = m_utilStringBuffer_.toString();
383            }
384        }
385        return result;
386    }
387
388    /**
389     * Gets the MSB from the group index
390     * @param gindex group index
391     * @return the MSB of the group if gindex is valid, -1 otherwise
392     */
393    public int getGroupMSB(int gindex)
394    {
395        if (gindex >= m_groupcount_) {
396            return -1;
397        }
398        return m_groupinfo_[gindex * m_groupsize_];
399    }
400
401    /**
402     * Gets the MSB of the codepoint
403     * @param codepoint The codepoint value.
404     * @return the MSB of the codepoint
405     */
406    public static int getCodepointMSB(int codepoint)
407    {
408        return codepoint >> GROUP_SHIFT_;
409    }
410
411    /**
412     * Gets the maximum codepoint + 1 of the group
413     * @param msb most significant byte of the group
414     * @return limit codepoint of the group
415     */
416    public static int getGroupLimit(int msb)
417    {
418        return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_;
419    }
420
421    /**
422     * Gets the minimum codepoint of the group
423     * @param msb most significant byte of the group
424     * @return minimum codepoint of the group
425     */
426    public static int getGroupMin(int msb)
427    {
428        return msb << GROUP_SHIFT_;
429    }
430
431    /**
432     * Gets the offset to a group
433     * @param codepoint The codepoint value.
434     * @return offset to a group
435     */
436    public static int getGroupOffset(int codepoint)
437    {
438        return codepoint & GROUP_MASK_;
439    }
440
441    /**
442     * Gets the minimum codepoint of a group
443     * @param codepoint The codepoint value.
444     * @return minimum codepoint in the group which codepoint belongs to
445     */
446    ///CLOVER:OFF
447    public static int getGroupMinFromCodepoint(int codepoint)
448    {
449        return codepoint & ~GROUP_MASK_;
450    }
451    ///CLOVER:ON
452
453    /**
454     * Get the Algorithm range length
455     * @return Algorithm range length
456     */
457    public int getAlgorithmLength()
458    {
459        return m_algorithm_.length;
460    }
461
462    /**
463     * Gets the start of the range
464     * @param index algorithm index
465     * @return algorithm range start
466     */
467    public int getAlgorithmStart(int index)
468    {
469        return m_algorithm_[index].m_rangestart_;
470    }
471
472    /**
473     * Gets the end of the range
474     * @param index algorithm index
475     * @return algorithm range end
476     */
477    public int getAlgorithmEnd(int index)
478    {
479        return m_algorithm_[index].m_rangeend_;
480    }
481
482    /**
483     * Gets the Algorithmic name of the codepoint
484     * @param index algorithmic range index
485     * @param codepoint The codepoint value.
486     * @return algorithmic name of codepoint
487     */
488    public String getAlgorithmName(int index, int codepoint)
489    {
490        String result = null;
491        synchronized (m_utilStringBuffer_) {
492            m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
493            m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_);
494            result = m_utilStringBuffer_.toString();
495        }
496        return result;
497    }
498
499    /**
500    * Gets the group name of the character
501    * @param ch character to get the group name
502    * @param choice name choice selector to choose a unicode 1.0 or newer name
503    */
504    public synchronized String getGroupName(int ch, int choice)
505    {
506        // gets the msb
507        int msb   = getCodepointMSB(ch);
508        int group = getGroup(ch);
509
510        // return this if it is an exact match
511        if (msb == m_groupinfo_[group * m_groupsize_]) {
512            int index = getGroupLengths(group, m_groupoffsets_,
513                                        m_grouplengths_);
514            int offset = ch & GROUP_MASK_;
515            return getGroupName(index + m_groupoffsets_[offset],
516                                m_grouplengths_[offset], choice);
517        }
518
519        return null;
520    }
521
522    // these are transliterator use methods ---------------------------------
523
524    /**
525     * Gets the maximum length of any codepoint name.
526     * Equivalent to uprv_getMaxCharNameLength.
527     * @return the maximum length of any codepoint name
528     */
529    public int getMaxCharNameLength()
530    {
531        if (initNameSetsLengths()) {
532            return m_maxNameLength_;
533        }
534        else {
535            return 0;
536        }
537    }
538
539    /**
540     * Gets the maximum length of any iso comments.
541     * Equivalent to uprv_getMaxISOCommentLength.
542     * @return the maximum length of any codepoint name
543     */
544    ///CLOVER:OFF
545    public int getMaxISOCommentLength()
546    {
547        if (initNameSetsLengths()) {
548            return m_maxISOCommentLength_;
549        }
550        else {
551            return 0;
552        }
553    }
554    ///CLOVER:ON
555
556    /**
557     * Fills set with characters that are used in Unicode character names.
558     * Equivalent to uprv_getCharNameCharacters.
559     * @param set USet to receive characters. Existing contents are deleted.
560     */
561    public void getCharNameCharacters(UnicodeSet set)
562    {
563        convert(m_nameSet_, set);
564    }
565
566    /**
567     * Fills set with characters that are used in Unicode character names.
568     * Equivalent to uprv_getISOCommentCharacters.
569     * @param set USet to receive characters. Existing contents are deleted.
570     */
571    ///CLOVER:OFF
572    public void getISOCommentCharacters(UnicodeSet set)
573    {
574        convert(m_ISOCommentSet_, set);
575    }
576    ///CLOVER:ON
577
578    // package private inner class --------------------------------------
579
580    /**
581    * Algorithmic name class
582    */
583    static final class AlgorithmName
584    {
585        // package private data members ----------------------------------
586
587        /**
588        * Constant type value of the different AlgorithmName
589        */
590        static final int TYPE_0_ = 0;
591        static final int TYPE_1_ = 1;
592
593        // package private constructors ----------------------------------
594
595        /**
596        * Constructor
597        */
598        AlgorithmName()
599        {
600        }
601
602        // package private methods ---------------------------------------
603
604        /**
605        * Sets the information for accessing the algorithmic names
606        * @param rangestart starting code point that lies within this name group
607        * @param rangeend end code point that lies within this name group
608        * @param type algorithm type. There's 2 kinds of algorithmic type. First
609        *        which uses code point as part of its name and the other uses
610        *        variant postfix strings
611        * @param variant algorithmic variant
612        * @return true if values are valid
613        */
614        boolean setInfo(int rangestart, int rangeend, byte type, byte variant)
615        {
616            if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend
617                && rangeend <= UCharacter.MAX_VALUE &&
618                (type == TYPE_0_ || type == TYPE_1_)) {
619                m_rangestart_ = rangestart;
620                m_rangeend_ = rangeend;
621                m_type_ = type;
622                m_variant_ = variant;
623                return true;
624            }
625            return false;
626        }
627
628        /**
629        * Sets the factor data
630        * @param factor Array of factor
631        * @return true if factors are valid
632        */
633        boolean setFactor(char factor[])
634        {
635            if (factor.length == m_variant_) {
636                m_factor_ = factor;
637                return true;
638            }
639            return false;
640        }
641
642        /**
643        * Sets the name prefix
644        * @param prefix
645        * @return true if prefix is set
646        */
647        boolean setPrefix(String prefix)
648        {
649            if (prefix != null && prefix.length() > 0) {
650                m_prefix_ = prefix;
651                return true;
652            }
653            return false;
654        }
655
656        /**
657        * Sets the variant factorized name data
658        * @param string variant factorized name data
659        * @return true if values are set
660        */
661        boolean setFactorString(byte string[])
662        {
663            // factor and variant string can be empty for things like
664            // hanggul code points
665            m_factorstring_ = string;
666            return true;
667        }
668
669        /**
670        * Checks if code point lies in Algorithm object at index
671        * @param ch code point
672        */
673        boolean contains(int ch)
674        {
675            return m_rangestart_ <= ch && ch <= m_rangeend_;
676        }
677
678        /**
679        * Appends algorithm name of code point into StringBuffer.
680        * Note this method does not check for validity of code point in Algorithm,
681        * result is undefined if code point does not belong in Algorithm.
682        * @param ch code point
683        * @param str StringBuffer to append to
684        */
685        void appendName(int ch, StringBuffer str)
686        {
687            str.append(m_prefix_);
688            switch (m_type_)
689            {
690                case TYPE_0_:
691                    // prefix followed by hex digits indicating variants
692                str.append(Utility.hex(ch,m_variant_));
693                    break;
694                case TYPE_1_:
695                    // prefix followed by factorized-elements
696                    int offset = ch - m_rangestart_;
697                    int indexes[] = m_utilIntBuffer_;
698                    int factor;
699
700                    // write elements according to the factors
701                    // the factorized elements are determined by modulo
702                    // arithmetic
703                    synchronized (m_utilIntBuffer_) {
704                        for (int i = m_variant_ - 1; i > 0; i --)
705                        {
706                            factor = m_factor_[i] & 0x00FF;
707                            indexes[i] = offset % factor;
708                            offset /= factor;
709                        }
710
711                        // we don't need to calculate the last modulus because
712                        // start <= code <= end guarantees here that
713                        // code <= factors[0]
714                        indexes[0] = offset;
715
716                        // joining up the factorized strings
717                        str.append(getFactorString(indexes, m_variant_));
718                    }
719                    break;
720            }
721        }
722
723        /**
724        * Gets the character for the argument algorithmic name
725        * @return the algorithmic char or -1 otherwise.
726        */
727        int getChar(String name)
728        {
729            int prefixlen = m_prefix_.length();
730            if (name.length() < prefixlen ||
731                !m_prefix_.equals(name.substring(0, prefixlen))) {
732                return -1;
733            }
734
735            switch (m_type_)
736            {
737                case TYPE_0_ :
738                try
739                {
740                    int result = Integer.parseInt(name.substring(prefixlen),
741                                                  16);
742                    // does it fit into the range?
743                    if (m_rangestart_ <= result && result <= m_rangeend_) {
744                        return result;
745                    }
746                }
747                catch (NumberFormatException e)
748                {
749                    return -1;
750                }
751                break;
752                case TYPE_1_ :
753                    // repetitative suffix name comparison done here
754                    // offset is the character code - start
755                    for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++)
756                    {
757                        int offset = ch - m_rangestart_;
758                        int indexes[] = m_utilIntBuffer_;
759                        int factor;
760
761                        // write elements according to the factors
762                        // the factorized elements are determined by modulo
763                        // arithmetic
764                        synchronized (m_utilIntBuffer_) {
765                            for (int i = m_variant_ - 1; i > 0; i --)
766                            {
767                                factor = m_factor_[i] & 0x00FF;
768                                indexes[i] = offset % factor;
769                                offset /= factor;
770                            }
771
772                            // we don't need to calculate the last modulus
773                            // because start <= code <= end guarantees here that
774                            // code <= factors[0]
775                            indexes[0] = offset;
776
777                            // joining up the factorized strings
778                            if (compareFactorString(indexes, m_variant_, name,
779                                                    prefixlen)) {
780                                return ch;
781                            }
782                        }
783                    }
784            }
785
786            return -1;
787        }
788
789        /**
790         * Adds all chars in the set of algorithmic names into the set.
791         * Equivalent to part of calcAlgNameSetsLengths.
792         * @param set int set to add the chars of the algorithm names into
793         * @param maxlength maximum length to compare to
794         * @return the length that is either maxlength of the length of this
795         *         algorithm name if it is longer than maxlength
796         */
797        int add(int set[], int maxlength)
798        {
799            // prefix length
800            int length = UCharacterName.add(set, m_prefix_);
801            switch (m_type_) {
802                case TYPE_0_ : {
803                    // name = prefix + (range->variant times) hex-digits
804                    // prefix
805                    length += m_variant_;
806                    /* synwee to check
807                     * addString(set, (const char *)(range + 1))
808                                       + range->variant;*/
809                    break;
810                }
811                case TYPE_1_ : {
812                    // name = prefix factorized-elements
813                    // get the set and maximum factor suffix length for each
814                    // factor
815                    for (int i = m_variant_ - 1; i > 0; i --)
816                    {
817                        int maxfactorlength = 0;
818                        int count = 0;
819                        for (int factor = m_factor_[i]; factor > 0; -- factor) {
820                            synchronized (m_utilStringBuffer_) {
821                                m_utilStringBuffer_.delete(0,
822                                                m_utilStringBuffer_.length());
823                                count
824                                  = UCharacterUtility.getNullTermByteSubString(
825                                                m_utilStringBuffer_,
826                                                m_factorstring_, count);
827                                UCharacterName.add(set, m_utilStringBuffer_);
828                                if (m_utilStringBuffer_.length()
829                                                            > maxfactorlength)
830                                {
831                                    maxfactorlength
832                                                = m_utilStringBuffer_.length();
833                                }
834                            }
835                        }
836                        length += maxfactorlength;
837                    }
838                }
839            }
840            if (length > maxlength) {
841                return length;
842            }
843            return maxlength;
844        }
845
846        // private data members ------------------------------------------
847
848        /**
849        * Algorithmic data information
850        */
851        private int m_rangestart_;
852        private int m_rangeend_;
853        private byte m_type_;
854        private byte m_variant_;
855        private char m_factor_[];
856        private String m_prefix_;
857        private byte m_factorstring_[];
858        /**
859         * Utility StringBuffer
860         */
861        private StringBuffer m_utilStringBuffer_ = new StringBuffer();
862        /**
863         * Utility int buffer
864         */
865        private int m_utilIntBuffer_[] = new int[256];
866
867        // private methods -----------------------------------------------
868
869        /**
870        * Gets the indexth string in each of the argument factor block
871        * @param index array with each index corresponding to each factor block
872        * @param length length of the array index
873        * @return the combined string of the array of indexth factor string in
874        *         factor block
875        */
876        private String getFactorString(int index[], int length)
877        {
878            int size = m_factor_.length;
879            if (index == null || length != size) {
880                return null;
881            }
882
883            synchronized (m_utilStringBuffer_) {
884                m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
885                int count = 0;
886                int factor;
887                size --;
888                for (int i = 0; i <= size; i ++) {
889                    factor = m_factor_[i];
890                    count = UCharacterUtility.skipNullTermByteSubString(
891                                             m_factorstring_, count, index[i]);
892                    count = UCharacterUtility.getNullTermByteSubString(
893                                          m_utilStringBuffer_, m_factorstring_,
894                                          count);
895                    if (i != size) {
896                        count = UCharacterUtility.skipNullTermByteSubString(
897                                                       m_factorstring_, count,
898                                                       factor - index[i] - 1);
899                    }
900                }
901                return m_utilStringBuffer_.toString();
902            }
903        }
904
905        /**
906        * Compares the indexth string in each of the argument factor block with
907        * the argument string
908        * @param index array with each index corresponding to each factor block
909        * @param length index array length
910        * @param str string to compare with
911        * @param offset of str to start comparison
912        * @return true if string matches
913        */
914        private boolean compareFactorString(int index[], int length, String str,
915                                            int offset)
916        {
917            int size = m_factor_.length;
918            if (index == null || length != size)
919                return false;
920
921            int count = 0;
922            int strcount = offset;
923            int factor;
924            size --;
925            for (int i = 0; i <= size; i ++)
926            {
927                factor = m_factor_[i];
928                count = UCharacterUtility.skipNullTermByteSubString(
929                                          m_factorstring_, count, index[i]);
930                strcount = UCharacterUtility.compareNullTermByteSubString(str,
931                                          m_factorstring_, strcount, count);
932                if (strcount < 0) {
933                    return false;
934                }
935
936                if (i != size) {
937                    count = UCharacterUtility.skipNullTermByteSubString(
938                                  m_factorstring_, count, factor - index[i]);
939                }
940            }
941            if (strcount != str.length()) {
942                return false;
943            }
944            return true;
945        }
946    }
947
948    // package private data members --------------------------------------
949
950    /**
951     * Size of each groups
952     */
953    int m_groupsize_ = 0;
954
955    // package private methods --------------------------------------------
956
957    /**
958    * Sets the token data
959    * @param token array of tokens
960    * @param tokenstring array of string values of the tokens
961    * @return false if there is a data error
962    */
963    boolean setToken(char token[], byte tokenstring[])
964    {
965        if (token != null && tokenstring != null && token.length > 0 &&
966            tokenstring.length > 0) {
967            m_tokentable_ = token;
968            m_tokenstring_ = tokenstring;
969            return true;
970        }
971        return false;
972    }
973
974    /**
975    * Set the algorithm name information array
976    * @param alg Algorithm information array
977    * @return true if the group string offset has been set correctly
978    */
979    boolean setAlgorithm(AlgorithmName alg[])
980    {
981        if (alg != null && alg.length != 0) {
982            m_algorithm_ = alg;
983            return true;
984        }
985        return false;
986    }
987
988    /**
989    * Sets the number of group and size of each group in number of char
990    * @param count number of groups
991    * @param size size of group in char
992    * @return true if group size is set correctly
993    */
994    boolean setGroupCountSize(int count, int size)
995    {
996        if (count <= 0 || size <= 0) {
997            return false;
998        }
999        m_groupcount_ = count;
1000        m_groupsize_ = size;
1001        return true;
1002    }
1003
1004    /**
1005    * Sets the group name data
1006    * @param group index information array
1007    * @param groupstring name information array
1008    * @return false if there is a data error
1009    */
1010    boolean setGroup(char group[], byte groupstring[])
1011    {
1012        if (group != null && groupstring != null && group.length > 0 &&
1013            groupstring.length > 0) {
1014            m_groupinfo_ = group;
1015            m_groupstring_ = groupstring;
1016            return true;
1017        }
1018        return false;
1019    }
1020
1021    // private data members ----------------------------------------------
1022
1023    /**
1024    * Data used in unames.icu
1025    */
1026    private char m_tokentable_[];
1027    private byte m_tokenstring_[];
1028    private char m_groupinfo_[];
1029    private byte m_groupstring_[];
1030    private AlgorithmName m_algorithm_[];
1031
1032    /**
1033    * Group use.  Note - access must be synchronized.
1034    */
1035    private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1];
1036    private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1];
1037
1038    /**
1039    * Default name of the name datafile
1040    */
1041    private static final String FILE_NAME_ = "unames.icu";
1042    /**
1043    * Shift count to retrieve group information
1044    */
1045    private static final int GROUP_SHIFT_ = 5;
1046    /**
1047    * Mask to retrieve the offset for a particular character within a group
1048    */
1049    private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
1050
1051    /**
1052    * Position of offsethigh in group information array
1053    */
1054    private static final int OFFSET_HIGH_OFFSET_ = 1;
1055
1056    /**
1057    * Position of offsetlow in group information array
1058    */
1059    private static final int OFFSET_LOW_OFFSET_ = 2;
1060    /**
1061    * Double nibble indicator, any nibble > this number has to be combined
1062    * with its following nibble
1063    */
1064    private static final int SINGLE_NIBBLE_MAX_ = 11;
1065
1066    /*
1067     * Maximum length of character names (regular & 1.0).
1068     */
1069    //private static int MAX_NAME_LENGTH_ = 0;
1070    /*
1071     * Maximum length of ISO comments.
1072     */
1073    //private static int MAX_ISO_COMMENT_LENGTH_ = 0;
1074
1075    /**
1076     * Set of chars used in character names (regular & 1.0).
1077     * Chars are platform-dependent (can be EBCDIC).
1078     */
1079    private int m_nameSet_[] = new int[8];
1080    /**
1081     * Set of chars used in ISO comments. (regular & 1.0).
1082     * Chars are platform-dependent (can be EBCDIC).
1083     */
1084    private int m_ISOCommentSet_[] = new int[8];
1085    /**
1086     * Utility StringBuffer
1087     */
1088    private StringBuffer m_utilStringBuffer_ = new StringBuffer();
1089    /**
1090     * Utility int buffer
1091     */
1092    private int m_utilIntBuffer_[] = new int[2];
1093    /**
1094     * Maximum ISO comment length
1095     */
1096    private int m_maxISOCommentLength_;
1097    /**
1098     * Maximum name length
1099     */
1100    private int m_maxNameLength_;
1101    /**
1102     * Type names used for extended names
1103     */
1104    private static final String TYPE_NAMES_[] = {"unassigned",
1105                                                 "uppercase letter",
1106                                                 "lowercase letter",
1107                                                 "titlecase letter",
1108                                                 "modifier letter",
1109                                                 "other letter",
1110                                                 "non spacing mark",
1111                                                 "enclosing mark",
1112                                                 "combining spacing mark",
1113                                                 "decimal digit number",
1114                                                 "letter number",
1115                                                 "other number",
1116                                                 "space separator",
1117                                                 "line separator",
1118                                                 "paragraph separator",
1119                                                 "control",
1120                                                 "format",
1121                                                 "private use area",
1122                                                 "surrogate",
1123                                                 "dash punctuation",
1124                                                 "start punctuation",
1125                                                 "end punctuation",
1126                                                 "connector punctuation",
1127                                                 "other punctuation",
1128                                                 "math symbol",
1129                                                 "currency symbol",
1130                                                 "modifier symbol",
1131                                                 "other symbol",
1132                                                 "initial punctuation",
1133                                                 "final punctuation",
1134                                                 "noncharacter",
1135                                                 "lead surrogate",
1136                                                 "trail surrogate"};
1137    /**
1138     * Unknown type name
1139     */
1140    private static final String UNKNOWN_TYPE_NAME_ = "unknown";
1141    /**
1142     * Not a character type
1143     */
1144    private static final int NON_CHARACTER_
1145                                    = UCharacterCategory.CHAR_CATEGORY_COUNT;
1146    /**
1147    * Lead surrogate type
1148    */
1149    private static final int LEAD_SURROGATE_
1150                                  = UCharacterCategory.CHAR_CATEGORY_COUNT + 1;
1151    /**
1152    * Trail surrogate type
1153    */
1154    private static final int TRAIL_SURROGATE_
1155                                  = UCharacterCategory.CHAR_CATEGORY_COUNT + 2;
1156    /**
1157    * Extended category count
1158    */
1159    static final int EXTENDED_CATEGORY_
1160                                  = UCharacterCategory.CHAR_CATEGORY_COUNT + 3;
1161
1162    // private constructor ------------------------------------------------
1163
1164    /**
1165    * <p>Protected constructor for use in UCharacter.</p>
1166    * @exception IOException thrown when data reading fails
1167    */
1168    private UCharacterName() throws IOException
1169    {
1170        ByteBuffer b = ICUBinary.getRequiredData(FILE_NAME_);
1171        UCharacterNameReader reader = new UCharacterNameReader(b);
1172        reader.read(this);
1173    }
1174
1175    // private methods ---------------------------------------------------
1176
1177    /**
1178    * Gets the algorithmic name for the argument character
1179    * @param ch character to determine name for
1180    * @param choice name choice
1181    * @return the algorithmic name or null if not found
1182    */
1183    private String getAlgName(int ch, int choice)
1184    {
1185        /* Only the normative character name can be algorithmic. */
1186        if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME ||
1187            choice == UCharacterNameChoice.EXTENDED_CHAR_NAME
1188        ) {
1189            // index in terms integer index
1190            synchronized (m_utilStringBuffer_) {
1191                m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
1192
1193                for (int index = m_algorithm_.length - 1; index >= 0; index --)
1194                {
1195                   if (m_algorithm_[index].contains(ch)) {
1196                      m_algorithm_[index].appendName(ch, m_utilStringBuffer_);
1197                      return m_utilStringBuffer_.toString();
1198                   }
1199                }
1200            }
1201        }
1202        return null;
1203    }
1204
1205    /**
1206    * Getting the character with the tokenized argument name
1207    * @param name of the character
1208    * @return character with the tokenized argument name or -1 if character
1209    *         is not found
1210    */
1211    private synchronized int getGroupChar(String name, int choice)
1212    {
1213        for (int i = 0; i < m_groupcount_; i ++) {
1214            // populating the data set of grouptable
1215
1216            int startgpstrindex = getGroupLengths(i, m_groupoffsets_,
1217                                                  m_grouplengths_);
1218
1219            // shift out to function
1220            int result = getGroupChar(startgpstrindex, m_grouplengths_, name,
1221                                      choice);
1222            if (result != -1) {
1223                return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_)
1224                         | result;
1225            }
1226        }
1227        return -1;
1228    }
1229
1230    /**
1231    * Compares and retrieve character if name is found within the argument
1232    * group
1233    * @param index index where the set of names reside in the group block
1234    * @param length list of lengths of the strings
1235    * @param name character name to search for
1236    * @param choice of either 1.0 or the most current unicode name
1237    * @return relative character in the group which matches name, otherwise if
1238    *         not found, -1 will be returned
1239    */
1240    private int getGroupChar(int index, char length[], String name,
1241                             int choice)
1242    {
1243        byte b = 0;
1244        char token;
1245        int len;
1246        int namelen = name.length();
1247        int nindex;
1248        int count;
1249
1250        for (int result = 0; result <= LINES_PER_GROUP_; result ++) {
1251            nindex = 0;
1252            len = length[result];
1253
1254            if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME &&
1255                choice != UCharacterNameChoice.EXTENDED_CHAR_NAME
1256            ) {
1257                /*
1258                 * skip the modern name if it is not requested _and_
1259                 * if the semicolon byte value is a character, not a token number
1260                 */
1261                int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice;
1262                do {
1263                    int oldindex = index;
1264                    index += UCharacterUtility.skipByteSubString(m_groupstring_,
1265                                                         index, len, (byte)';');
1266                    len -= (index - oldindex);
1267                } while(--fieldIndex>0);
1268            }
1269
1270            // number of tokens is > the length of the name
1271            // write each letter directly, and write a token word per token
1272            for (count = 0; count < len && nindex != -1 && nindex < namelen;
1273                ) {
1274                b = m_groupstring_[index + count];
1275                count ++;
1276
1277                if (b >= m_tokentable_.length) {
1278                    if (name.charAt(nindex ++) != (b & 0xFF)) {
1279                        nindex = -1;
1280                    }
1281                }
1282                else {
1283                    token = m_tokentable_[b & 0xFF];
1284                    if (token == 0xFFFE) {
1285                        // this is a lead byte for a double-byte token
1286                        token = m_tokentable_[b << 8 |
1287                                   (m_groupstring_[index + count] & 0x00ff)];
1288                        count ++;
1289                    }
1290                    if (token == 0xFFFF) {
1291                        if (name.charAt(nindex ++) != (b & 0xFF)) {
1292                            nindex = -1;
1293                        }
1294                    }
1295                    else {
1296                        // compare token with name
1297                        nindex = UCharacterUtility.compareNullTermByteSubString(
1298                                        name, m_tokenstring_, nindex, token);
1299                    }
1300                }
1301            }
1302
1303            if (namelen == nindex &&
1304                (count == len || m_groupstring_[index + count] == ';')) {
1305                return result;
1306            }
1307
1308            index += len;
1309        }
1310        return -1;
1311    }
1312
1313    /**
1314    * Gets the character extended type
1315    * @param ch character to be tested
1316    * @return extended type it is associated with
1317    */
1318    private static int getType(int ch)
1319    {
1320        if (UCharacterUtility.isNonCharacter(ch)) {
1321            // not a character we return a invalid category count
1322            return NON_CHARACTER_;
1323        }
1324        int result = UCharacter.getType(ch);
1325        if (result == UCharacterCategory.SURROGATE) {
1326            if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
1327                result = LEAD_SURROGATE_;
1328            }
1329            else {
1330                result = TRAIL_SURROGATE_;
1331            }
1332        }
1333        return result;
1334    }
1335
1336    /**
1337    * Getting the character with extended name of the form <....>.
1338    * @param name of the character to be found
1339    * @param choice name choice
1340    * @return character associated with the name, -1 if such character is not
1341    *                   found and -2 if we should continue with the search.
1342    */
1343    private static int getExtendedChar(String name, int choice)
1344    {
1345        if (name.charAt(0) == '<') {
1346            if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
1347                int endIndex = name.length() - 1;
1348                if (name.charAt(endIndex) == '>') {
1349                    int startIndex = name.lastIndexOf('-');
1350                    if (startIndex >= 0) { // We've got a category.
1351                        startIndex ++;
1352                        int result = -1;
1353                        try {
1354                            result = Integer.parseInt(
1355                                        name.substring(startIndex, endIndex),
1356                                        16);
1357                        }
1358                        catch (NumberFormatException e) {
1359                            return -1;
1360                        }
1361                        // Now validate the category name. We could use a
1362                        // binary search, or a trie, if we really wanted to.
1363                        String type = name.substring(1, startIndex - 1);
1364                        int length = TYPE_NAMES_.length;
1365                        for (int i = 0; i < length; ++ i) {
1366                            if (type.compareTo(TYPE_NAMES_[i]) == 0) {
1367                                if (getType(result) == i) {
1368                                    return result;
1369                                }
1370                                break;
1371                            }
1372                        }
1373                    }
1374                }
1375            }
1376            return -1;
1377        }
1378        return -2;
1379    }
1380
1381    // sets of name characters, maximum name lengths -----------------------
1382
1383    /**
1384     * Adds a codepoint into a set of ints.
1385     * Equivalent to SET_ADD.
1386     * @param set set to add to
1387     * @param ch 16 bit char to add
1388     */
1389    private static void add(int set[], char ch)
1390    {
1391        set[ch >>> 5] |= 1 << (ch & 0x1f);
1392    }
1393
1394    /**
1395     * Checks if a codepoint is a part of a set of ints.
1396     * Equivalent to SET_CONTAINS.
1397     * @param set set to check in
1398     * @param ch 16 bit char to check
1399     * @return true if codepoint is part of the set, false otherwise
1400     */
1401    private static boolean contains(int set[], char ch)
1402    {
1403        return (set[ch >>> 5] & (1 << (ch & 0x1f))) != 0;
1404    }
1405
1406    /**
1407     * Adds all characters of the argument str and gets the length
1408     * Equivalent to calcStringSetLength.
1409     * @param set set to add all chars of str to
1410     * @param str string to add
1411     */
1412    private static int add(int set[], String str)
1413    {
1414        int result = str.length();
1415
1416        for (int i = result - 1; i >= 0; i --) {
1417            add(set, str.charAt(i));
1418        }
1419        return result;
1420    }
1421
1422    /**
1423     * Adds all characters of the argument str and gets the length
1424     * Equivalent to calcStringSetLength.
1425     * @param set set to add all chars of str to
1426     * @param str string to add
1427     */
1428    private static int add(int set[], StringBuffer str)
1429    {
1430        int result = str.length();
1431
1432        for (int i = result - 1; i >= 0; i --) {
1433            add(set, str.charAt(i));
1434        }
1435        return result;
1436    }
1437
1438    /**
1439     * Adds all algorithmic names into the name set.
1440     * Equivalent to part of calcAlgNameSetsLengths.
1441     * @param maxlength length to compare to
1442     * @return the maximum length of any possible algorithmic name if it is >
1443     *         maxlength, otherwise maxlength is returned.
1444     */
1445    private int addAlgorithmName(int maxlength)
1446    {
1447        int result = 0;
1448        for (int i = m_algorithm_.length - 1; i >= 0; i --) {
1449            result = m_algorithm_[i].add(m_nameSet_, maxlength);
1450            if (result > maxlength) {
1451                maxlength = result;
1452            }
1453        }
1454        return maxlength;
1455    }
1456
1457    /**
1458     * Adds all extended names into the name set.
1459     * Equivalent to part of calcExtNameSetsLengths.
1460     * @param maxlength length to compare to
1461     * @return the maxlength of any possible extended name.
1462     */
1463    private int addExtendedName(int maxlength)
1464    {
1465        for (int i = TYPE_NAMES_.length - 1; i >= 0; i --) {
1466            // for each category, count the length of the category name
1467            // plus 9 =
1468            // 2 for <>
1469            // 1 for -
1470            // 6 for most hex digits per code point
1471            int length = 9 + add(m_nameSet_, TYPE_NAMES_[i]);
1472            if (length > maxlength) {
1473                maxlength = length;
1474            }
1475        }
1476        return maxlength;
1477    }
1478
1479    /**
1480     * Adds names of a group to the argument set.
1481     * Equivalent to calcNameSetLength.
1482     * @param offset of the group name string in byte count
1483     * @param length of the group name string
1484     * @param tokenlength array to store the length of each token
1485     * @param set to add to
1486     * @return the length of the name string and the length of the group
1487     *         string parsed
1488     */
1489    private int[] addGroupName(int offset, int length, byte tokenlength[],
1490                               int set[])
1491    {
1492        int resultnlength = 0;
1493        int resultplength = 0;
1494        while (resultplength < length) {
1495            char b = (char)(m_groupstring_[offset + resultplength] & 0xff);
1496            resultplength ++;
1497            if (b == ';') {
1498                break;
1499            }
1500
1501            if (b >= m_tokentable_.length) {
1502                add(set, b); // implicit letter
1503                resultnlength ++;
1504            }
1505            else {
1506                char token = m_tokentable_[b & 0x00ff];
1507                if (token == 0xFFFE) {
1508                    // this is a lead byte for a double-byte token
1509                    b = (char)(b << 8 | (m_groupstring_[offset + resultplength]
1510                                         & 0x00ff));
1511                    token = m_tokentable_[b];
1512                    resultplength ++;
1513                }
1514                if (token == 0xFFFF) {
1515                    add(set, b);
1516                    resultnlength ++;
1517                }
1518                else {
1519                    // count token word
1520                    // use cached token length
1521                    byte tlength = tokenlength[b];
1522                    if (tlength == 0) {
1523                        synchronized (m_utilStringBuffer_) {
1524                            m_utilStringBuffer_.delete(0,
1525                                                 m_utilStringBuffer_.length());
1526                            UCharacterUtility.getNullTermByteSubString(
1527                                           m_utilStringBuffer_, m_tokenstring_,
1528                                           token);
1529                            tlength = (byte)add(set, m_utilStringBuffer_);
1530                        }
1531                        tokenlength[b] = tlength;
1532                    }
1533                    resultnlength += tlength;
1534                }
1535            }
1536        }
1537        m_utilIntBuffer_[0] = resultnlength;
1538        m_utilIntBuffer_[1] = resultplength;
1539        return m_utilIntBuffer_;
1540    }
1541
1542    /**
1543     * Adds names of all group to the argument set.
1544     * Sets the data member m_max*Length_.
1545     * Method called only once.
1546     * Equivalent to calcGroupNameSetsLength.
1547     * @param maxlength length to compare to
1548     */
1549    private void addGroupName(int maxlength)
1550    {
1551        int maxisolength = 0;
1552        char offsets[] = new char[LINES_PER_GROUP_ + 2];
1553        char lengths[] = new char[LINES_PER_GROUP_ + 2];
1554        byte tokenlengths[] = new byte[m_tokentable_.length];
1555
1556        // enumerate all groups
1557        // for (int i = m_groupcount_ - 1; i >= 0; i --) {
1558        for (int i = 0; i < m_groupcount_ ; i ++) {
1559            int offset = getGroupLengths(i, offsets, lengths);
1560            // enumerate all lines in each group
1561            // for (int linenumber = LINES_PER_GROUP_ - 1; linenumber >= 0;
1562            //    linenumber --) {
1563            for (int linenumber = 0; linenumber < LINES_PER_GROUP_;
1564                linenumber ++) {
1565                int lineoffset = offset + offsets[linenumber];
1566                int length = lengths[linenumber];
1567                if (length == 0) {
1568                    continue;
1569                }
1570
1571                // read regular name
1572                int parsed[] = addGroupName(lineoffset, length, tokenlengths,
1573                                            m_nameSet_);
1574                if (parsed[0] > maxlength) {
1575                    // 0 for name length
1576                    maxlength = parsed[0];
1577                }
1578                lineoffset += parsed[1];
1579                if (parsed[1] >= length) {
1580                    // 1 for parsed group string length
1581                    continue;
1582                }
1583                length -= parsed[1];
1584                // read Unicode 1.0 name
1585                parsed = addGroupName(lineoffset, length, tokenlengths,
1586                                      m_nameSet_);
1587                if (parsed[0] > maxlength) {
1588                    // 0 for name length
1589                    maxlength = parsed[0];
1590                }
1591                lineoffset += parsed[1];
1592                if (parsed[1] >= length) {
1593                    // 1 for parsed group string length
1594                    continue;
1595                }
1596                length -= parsed[1];
1597                // read ISO comment
1598                parsed = addGroupName(lineoffset, length, tokenlengths,
1599                                      m_ISOCommentSet_);
1600                if (parsed[1] > maxisolength) {
1601                    maxisolength = length;
1602                }
1603            }
1604        }
1605
1606        // set gMax... - name length last for threading
1607        m_maxISOCommentLength_ = maxisolength;
1608        m_maxNameLength_ = maxlength;
1609    }
1610
1611    /**
1612     * Sets up the name sets and the calculation of the maximum lengths.
1613     * Equivalent to calcNameSetsLengths.
1614     */
1615    private boolean initNameSetsLengths()
1616    {
1617        if (m_maxNameLength_ > 0) {
1618            return true;
1619        }
1620
1621        String extra = "0123456789ABCDEF<>-";
1622        // set hex digits, used in various names, and <>-, used in extended
1623        // names
1624        for (int i = extra.length() - 1; i >= 0; i --) {
1625            add(m_nameSet_, extra.charAt(i));
1626        }
1627
1628        // set sets and lengths from algorithmic names
1629        m_maxNameLength_ = addAlgorithmName(0);
1630        // set sets and lengths from extended names
1631        m_maxNameLength_ = addExtendedName(m_maxNameLength_);
1632        // set sets and lengths from group names, set global maximum values
1633        addGroupName(m_maxNameLength_);
1634        return true;
1635    }
1636
1637    /**
1638     * Converts the char set cset into a Unicode set uset.
1639     * Equivalent to charSetToUSet.
1640     * @param set Set of 256 bit flags corresponding to a set of chars.
1641     * @param uset USet to receive characters. Existing contents are deleted.
1642     */
1643    private void convert(int set[], UnicodeSet uset)
1644    {
1645        uset.clear();
1646        if (!initNameSetsLengths()) {
1647            return;
1648        }
1649
1650        // build a char string with all chars that are used in character names
1651        for (char c = 255; c > 0; c --) {
1652            if (contains(set, c)) {
1653                uset.add(c);
1654            }
1655        }
1656    }
1657}
1658