12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others. 22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 5f8a0c400bbd62a2ea4ee9b77641f79cb443d2187Neil Fuller * Copyright (C) 1996-2015, International Business Machines Corporation and 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl; 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.IOException; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.nio.ByteBuffer; 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Arrays; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 172d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert* <p>Internal reader class for ICU data file uname.dat containing 182d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert* Unicode codepoint name data.</p> 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* <p>This class simply reads unames.icu, authenticates that it is a valid 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* ICU data file and split its contents up into blocks of data for use in 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* <a href=UCharacterName.html>com.ibm.icu.impl.UCharacterName</a>. 222d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert* </p> 232d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert* <p>unames.icu which is in big-endian format is jared together with this 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* package.</p> 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* @author Syn Wee Quek 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* @since release 2.1, February 1st 2002 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/ 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertfinal class UCharacterNameReader implements ICUBinary.Authenticate 302d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert{ 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // public methods ---------------------------------------------------- 322d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 332d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert @Override 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean isDataVersionAcceptable(byte version[]) 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return version[0] == 1; 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // protected constructor --------------------------------------------- 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Protected constructor.</p> 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param bytes ICU uprop.dat file buffer 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @exception IOException throw if data file fails authentication 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected UCharacterNameReader(ByteBuffer bytes) throws IOException 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ICUBinary.readHeader(bytes, DATA_FORMAT_ID_, this); 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert m_byteBuffer_ = bytes; 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // protected methods ------------------------------------------------- 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Read and break up the stream of data passed in as arguments 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and fills up UCharacterName. 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * If unsuccessful false will be returned. 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param data instance of datablock 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @exception IOException thrown when there's a data error. 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected void read(UCharacterName data) throws IOException 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // reading index 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert m_tokenstringindex_ = m_byteBuffer_.getInt(); 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert m_groupindex_ = m_byteBuffer_.getInt(); 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert m_groupstringindex_ = m_byteBuffer_.getInt(); 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert m_algnamesindex_ = m_byteBuffer_.getInt(); 682d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // reading tokens 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int count = m_byteBuffer_.getChar(); 71f8a0c400bbd62a2ea4ee9b77641f79cb443d2187Neil Fuller char token[] = ICUBinary.getChars(m_byteBuffer_, count, 0); 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int size = m_groupindex_ - m_tokenstringindex_; 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte tokenstr[] = new byte[size]; 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert m_byteBuffer_.get(tokenstr); 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert data.setToken(token, tokenstr); 762d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // reading the group information records 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert count = m_byteBuffer_.getChar(); 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert data.setGroupCountSize(count, GROUP_INFO_SIZE_); 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert count *= GROUP_INFO_SIZE_; 81f8a0c400bbd62a2ea4ee9b77641f79cb443d2187Neil Fuller char group[] = ICUBinary.getChars(m_byteBuffer_, count, 0); 82f8a0c400bbd62a2ea4ee9b77641f79cb443d2187Neil Fuller 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert size = m_algnamesindex_ - m_groupstringindex_; 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte groupstring[] = new byte[size]; 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert m_byteBuffer_.get(groupstring); 862d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert data.setGroup(group, groupstring); 882d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert count = m_byteBuffer_.getInt(); 902d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert UCharacterName.AlgorithmName alg[] = 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert new UCharacterName.AlgorithmName[count]; 922d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < count; i ++) 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UCharacterName.AlgorithmName an = readAlg(); 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (an == null) { 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IOException("unames.icu read error: Algorithmic names creation error"); 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert alg[i] = an; 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert data.setAlgorithm(alg); 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1032d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Checking the file for the correct format.</p> 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param dataformatid 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param dataformatversion 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return true if the file format version is correct 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ///CLOVER:OFF 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected boolean authenticate(byte dataformatid[], 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte dataformatversion[]) 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Arrays.equals( 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ICUBinary.getVersionByteArrayFromCompactInt(DATA_FORMAT_ID_), 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert dataformatid) && 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert isDataVersionAcceptable(dataformatversion); 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ///CLOVER:ON 1202d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // private variables ------------------------------------------------- 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Byte buffer for names 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private ByteBuffer m_byteBuffer_; 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Size of the group information block in number of char 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int GROUP_INFO_SIZE_ = 3; 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Index of the offset information 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int m_tokenstringindex_; 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int m_groupindex_; 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int m_groupstringindex_; 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int m_algnamesindex_; 1392d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Size of an algorithmic name information group 1422d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * start code point size + end code point size + type size + variant size + 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * size of data size 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int ALG_INFO_SIZE_ = 12; 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * File format id that this class understands. 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int DATA_FORMAT_ID_ = 0x756E616D; 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // private methods --------------------------------------------------- 1532d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Reads an individual record of AlgorithmNames 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return an instance of AlgorithNames if read is successful otherwise null 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @exception IOException thrown when file read error occurs or data is corrupted 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private UCharacterName.AlgorithmName readAlg() throws IOException 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 1612d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert UCharacterName.AlgorithmName result = 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert new UCharacterName.AlgorithmName(); 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rangestart = m_byteBuffer_.getInt(); 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rangeend = m_byteBuffer_.getInt(); 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte type = m_byteBuffer_.get(); 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte variant = m_byteBuffer_.get(); 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!result.setInfo(rangestart, rangeend, type, variant)) { 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return null; 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1702d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int size = m_byteBuffer_.getChar(); 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (type == UCharacterName.AlgorithmName.TYPE_1_) 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 174f8a0c400bbd62a2ea4ee9b77641f79cb443d2187Neil Fuller char factor[] = ICUBinary.getChars(m_byteBuffer_, variant, 0); 175f8a0c400bbd62a2ea4ee9b77641f79cb443d2187Neil Fuller 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.setFactor(factor); 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert size -= (variant << 1); 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1792d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder prefix = new StringBuilder(); 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = (char)(m_byteBuffer_.get() & 0x00FF); 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (c != 0) 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert prefix.append(c); 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert c = (char)(m_byteBuffer_.get() & 0x00FF); 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1872d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.setPrefix(prefix.toString()); 1892d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert size -= (ALG_INFO_SIZE_ + prefix.length() + 1); 1912d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (size > 0) 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte string[] = new byte[size]; 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert m_byteBuffer_.get(string); 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.setFactorString(string); 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 201