17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert******************************************************************************* 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 1996-2004, International Business Machines Corporation and * 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* others. All Rights Reserved. * 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/ 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl; 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Internal character utility class for simple data type conversion and String 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* parsing functions. Does not have an analog in the JDK. 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* @author Syn Wee Quek 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* @since sep2900 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/ 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class UCharacterUtility 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert{ 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // public methods ----------------------------------------------------- 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Determines if codepoint is a non character 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param ch codepoint 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return true if codepoint is a non character false otherwise 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static boolean isNonCharacter(int ch) 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if ((ch & NON_CHARACTER_SUFFIX_MIN_3_0_) == 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert NON_CHARACTER_SUFFIX_MIN_3_0_) { 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return true; 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return ch >= NON_CHARACTER_MIN_3_1_ && ch <= NON_CHARACTER_MAX_3_1_; 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // package private methods --------------------------------------------- 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * joining 2 chars to form an int 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param msc most significant char 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param lsc least significant char 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return int form 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static int toInt(char msc, char lsc) 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return ((msc << 16) | lsc); 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Retrieves a null terminated substring from an array of bytes. 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Substring is a set of non-zero bytes starting from argument start to the 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * next zero byte. If the first byte is a zero, the next byte will be taken as 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the first byte. 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param str stringbuffer to store data in, data will be store with each 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * byte as a char 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param array byte array 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param index to start substring in byte count 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the end position of the substring within the character array 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static int getNullTermByteSubString(StringBuffer str, byte[] array, 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int index) 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte b = 1; 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (b != 0) 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert b = array[index]; 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (b != 0) { 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert str.append((char)(b & 0x00FF)); 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index ++; 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return index; 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Compares a null terminated substring from an array of bytes. 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Substring is a set of non-zero bytes starting from argument start to the 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * next zero byte. if the first byte is a zero, the next byte will be taken as 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the first byte. 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param str string to compare 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param array byte array 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param strindex index within str to start comparing 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param aindex array index to start in byte count 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the end position of the substring within str if matches otherwise 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a -1 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static int compareNullTermByteSubString(String str, byte[] array, 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int strindex, int aindex) 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte b = 1; 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int length = str.length(); 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (b != 0) 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert b = array[aindex]; 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert aindex ++; 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (b == 0) { 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if we have reached the end of the string and yet the array has not 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // reached the end of their substring yet, abort 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (strindex == length 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert || (str.charAt(strindex) != (char)(b & 0xFF))) { 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return -1; 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert strindex ++; 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return strindex; 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Skip null terminated substrings from an array of bytes. 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Substring is a set of non-zero bytes starting from argument start to the 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * next zero byte. If the first byte is a zero, the next byte will be taken as 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the first byte. 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param array byte array 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param index to start substrings in byte count 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param skipcount number of null terminated substrings to skip 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the end position of the substrings within the character array 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static int skipNullTermByteSubString(byte[] array, int index, 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int skipcount) 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte b; 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < skipcount; i ++) 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert b = 1; 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (b != 0) 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert b = array[index]; 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index ++; 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return index; 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * skip substrings from an array of characters, where each character is a set 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * of 2 bytes. substring is a set of non-zero bytes starting from argument 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * start to the byte of the argument value. skips up to a max number of 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * characters 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param array byte array to parse 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param index to start substrings in byte count 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param length the max number of bytes to skip 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param skipend value of byte to skip to 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the number of bytes skipped 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static int skipByteSubString(byte[] array, int index, int length, 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte skipend) 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int result; 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte b; 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (result = 0; result < length; result ++) 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert b = array[index + result]; 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (b == skipend) 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result ++; 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // private data member -------------------------------------------------- 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Minimum suffix value that indicates if a character is non character. 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Unicode 3.0 non characters 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int NON_CHARACTER_SUFFIX_MIN_3_0_ = 0xFFFE; 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * New minimum non character in Unicode 3.1 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int NON_CHARACTER_MIN_3_1_ = 0xFDD0; 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * New non character range in Unicode 3.1 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int NON_CHARACTER_MAX_3_1_ = 0xFDEF; 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // private constructor -------------------------------------------------- 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ///CLOVER:OFF 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * private constructor to avoid initialisation 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private UCharacterUtility() 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ///CLOVER:ON 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 195