12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
5f8a0c400bbd62a2ea4ee9b77641f79cb443d2187Neil Fuller * Copyright (C) 1996-2015, International Business Machines Corporation and
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.IOException;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.nio.ByteBuffer;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Arrays;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
172d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert* <p>Internal reader class for ICU data file uname.dat containing
182d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert* Unicode codepoint name data.</p>
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* <p>This class simply reads unames.icu, authenticates that it is a valid
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* ICU data file and split its contents up into blocks of data for use in
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* <a href=UCharacterName.html>com.ibm.icu.impl.UCharacterName</a>.
222d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert* </p>
232d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert* <p>unames.icu which is in big-endian format is jared together with this
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* package.</p>
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* @author Syn Wee Quek
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* @since release 2.1, February 1st 2002
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertfinal class UCharacterNameReader implements ICUBinary.Authenticate
302d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert{
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // public methods ----------------------------------------------------
322d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
332d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isDataVersionAcceptable(byte version[])
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return version[0] == 1;
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // protected constructor ---------------------------------------------
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * <p>Protected constructor.</p>
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param bytes ICU uprop.dat file buffer
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @exception IOException throw if data file fails authentication
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected UCharacterNameReader(ByteBuffer bytes) throws IOException
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ICUBinary.readHeader(bytes, DATA_FORMAT_ID_, this);
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        m_byteBuffer_ = bytes;
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // protected methods -------------------------------------------------
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Read and break up the stream of data passed in as arguments
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * and fills up UCharacterName.
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * If unsuccessful false will be returned.
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param data instance of datablock
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @exception IOException thrown when there's a data error.
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected void read(UCharacterName data) throws IOException
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // reading index
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        m_tokenstringindex_ = m_byteBuffer_.getInt();
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        m_groupindex_       = m_byteBuffer_.getInt();
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        m_groupstringindex_ = m_byteBuffer_.getInt();
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        m_algnamesindex_    = m_byteBuffer_.getInt();
682d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // reading tokens
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int count = m_byteBuffer_.getChar();
71f8a0c400bbd62a2ea4ee9b77641f79cb443d2187Neil Fuller        char token[] = ICUBinary.getChars(m_byteBuffer_, count, 0);
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int size = m_groupindex_ - m_tokenstringindex_;
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        byte tokenstr[] = new byte[size];
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        m_byteBuffer_.get(tokenstr);
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        data.setToken(token, tokenstr);
762d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // reading the group information records
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        count = m_byteBuffer_.getChar();
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        data.setGroupCountSize(count, GROUP_INFO_SIZE_);
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        count *= GROUP_INFO_SIZE_;
81f8a0c400bbd62a2ea4ee9b77641f79cb443d2187Neil Fuller        char group[] = ICUBinary.getChars(m_byteBuffer_, count, 0);
82f8a0c400bbd62a2ea4ee9b77641f79cb443d2187Neil Fuller
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        size = m_algnamesindex_ - m_groupstringindex_;
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        byte groupstring[] = new byte[size];
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        m_byteBuffer_.get(groupstring);
862d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        data.setGroup(group, groupstring);
882d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        count = m_byteBuffer_.getInt();
902d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        UCharacterName.AlgorithmName alg[] =
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 new UCharacterName.AlgorithmName[count];
922d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i = 0; i < count; i ++)
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            UCharacterName.AlgorithmName an = readAlg();
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (an == null) {
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new IOException("unames.icu read error: Algorithmic names creation error");
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            alg[i] = an;
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        data.setAlgorithm(alg);
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1032d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * <p>Checking the file for the correct format.</p>
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param dataformatid
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @param dataformatversion
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @return true if the file format version is correct
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    ///CLOVER:OFF
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected boolean authenticate(byte dataformatid[],
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                   byte dataformatversion[])
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return Arrays.equals(
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ICUBinary.getVersionByteArrayFromCompactInt(DATA_FORMAT_ID_),
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                dataformatid) &&
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert               isDataVersionAcceptable(dataformatversion);
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    ///CLOVER:ON
1202d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private variables -------------------------------------------------
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Byte buffer for names
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private ByteBuffer m_byteBuffer_;
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Size of the group information block in number of char
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int GROUP_INFO_SIZE_ = 3;
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Index of the offset information
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int m_tokenstringindex_;
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int m_groupindex_;
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int m_groupstringindex_;
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int m_algnamesindex_;
1392d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Size of an algorithmic name information group
1422d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    * start code point size + end code point size + type size + variant size +
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * size of data size
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int ALG_INFO_SIZE_ = 12;
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * File format id that this class understands.
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int DATA_FORMAT_ID_ = 0x756E616D;
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private methods ---------------------------------------------------
1532d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Reads an individual record of AlgorithmNames
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @return an instance of AlgorithNames if read is successful otherwise null
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @exception IOException thrown when file read error occurs or data is corrupted
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private UCharacterName.AlgorithmName readAlg() throws IOException
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
1612d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        UCharacterName.AlgorithmName result =
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                       new UCharacterName.AlgorithmName();
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int rangestart = m_byteBuffer_.getInt();
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int rangeend   = m_byteBuffer_.getInt();
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        byte type      = m_byteBuffer_.get();
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        byte variant   = m_byteBuffer_.get();
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (!result.setInfo(rangestart, rangeend, type, variant)) {
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return null;
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1702d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int size = m_byteBuffer_.getChar();
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (type == UCharacterName.AlgorithmName.TYPE_1_)
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
174f8a0c400bbd62a2ea4ee9b77641f79cb443d2187Neil Fuller            char factor[] = ICUBinary.getChars(m_byteBuffer_, variant, 0);
175f8a0c400bbd62a2ea4ee9b77641f79cb443d2187Neil Fuller
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result.setFactor(factor);
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            size -= (variant << 1);
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1792d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder prefix = new StringBuilder();
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char c = (char)(m_byteBuffer_.get() & 0x00FF);
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (c != 0)
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            prefix.append(c);
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c = (char)(m_byteBuffer_.get() & 0x00FF);
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1872d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result.setPrefix(prefix.toString());
1892d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        size -= (ALG_INFO_SIZE_ + prefix.length() + 1);
1912d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (size > 0)
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            byte string[] = new byte[size];
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            m_byteBuffer_.get(string);
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result.setFactorString(string);
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
201