17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 1996-2004, International Business Machines Corporation and    *
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* others. All Rights Reserved.                                                *
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl;
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Internal character utility class for simple data type conversion and String
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* parsing functions. Does not have an analog in the JDK.
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* @author Syn Wee Quek
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* @since sep2900
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class UCharacterUtility
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert{
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // public methods -----------------------------------------------------
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Determines if codepoint is a non character
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param ch codepoint
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @return true if codepoint is a non character false otherwise
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static boolean isNonCharacter(int ch)
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if ((ch & NON_CHARACTER_SUFFIX_MIN_3_0_) ==
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                            NON_CHARACTER_SUFFIX_MIN_3_0_) {
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return true;
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ch >= NON_CHARACTER_MIN_3_1_ && ch <=  NON_CHARACTER_MAX_3_1_;
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // package private methods ---------------------------------------------
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * joining 2 chars to form an int
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param msc most significant char
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param lsc least significant char
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @return int form
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int toInt(char msc, char lsc)
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ((msc << 16) | lsc);
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Retrieves a null terminated substring from an array of bytes.
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Substring is a set of non-zero bytes starting from argument start to the
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * next zero byte. If the first byte is a zero, the next byte will be taken as
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * the first byte.
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param str stringbuffer to store data in, data will be store with each
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    *            byte as a char
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param array byte array
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param index to start substring in byte count
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @return the end position of the substring within the character array
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int getNullTermByteSubString(StringBuffer str, byte[] array,
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                  int index)
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        byte b = 1;
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (b != 0)
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            b = array[index];
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (b != 0) {
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                str.append((char)(b & 0x00FF));
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            index ++;
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return index;
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Compares a null terminated substring from an array of bytes.
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Substring is a set of non-zero bytes starting from argument start to the
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * next zero byte. if the first byte is a zero, the next byte will be taken as
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * the first byte.
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param str string to compare
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param array byte array
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param strindex index within str to start comparing
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param aindex array index to start in byte count
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @return the end position of the substring within str if matches otherwise
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    *         a -1
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int compareNullTermByteSubString(String str, byte[] array,
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                      int strindex, int aindex)
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        byte b = 1;
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int length = str.length();
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (b != 0)
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            b = array[aindex];
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            aindex ++;
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (b == 0) {
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // if we have reached the end of the string and yet the array has not
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // reached the end of their substring yet, abort
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (strindex == length
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                || (str.charAt(strindex) != (char)(b & 0xFF))) {
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert              return -1;
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            strindex ++;
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return strindex;
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Skip null terminated substrings from an array of bytes.
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Substring is a set of non-zero bytes starting from argument start to the
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * next zero byte. If the first byte is a zero, the next byte will be taken as
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * the first byte.
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param array byte array
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param index to start substrings in byte count
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param skipcount number of null terminated substrings to skip
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @return the end position of the substrings within the character array
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int skipNullTermByteSubString(byte[] array, int index,
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                   int skipcount)
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        byte b;
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i = 0; i < skipcount; i ++)
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            b = 1;
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (b != 0)
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            {
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                b = array[index];
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index ++;
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return index;
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * skip substrings from an array of characters, where each character is a set
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * of 2 bytes. substring is a set of non-zero bytes starting from argument
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * start to the byte of the argument value. skips up to a max number of
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * characters
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param array byte array to parse
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param index to start substrings in byte count
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param length the max number of bytes to skip
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param skipend value of byte to skip to
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the number of bytes skipped
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int skipByteSubString(byte[] array, int index, int length,
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                           byte skipend)
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int result;
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        byte b;
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (result = 0; result < length; result ++)
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            b = array[index + result];
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (b == skipend)
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            {
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result ++;
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private data member --------------------------------------------------
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Minimum suffix value that indicates if a character is non character.
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Unicode 3.0 non characters
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int NON_CHARACTER_SUFFIX_MIN_3_0_ = 0xFFFE;
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * New minimum non character in Unicode 3.1
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int NON_CHARACTER_MIN_3_1_ = 0xFDD0;
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * New non character range in Unicode 3.1
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int NON_CHARACTER_MAX_3_1_ = 0xFDEF;
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private constructor --------------------------------------------------
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    ///CLOVER:OFF
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * private constructor to avoid initialisation
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private UCharacterUtility()
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    ///CLOVER:ON
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
195