1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 **********************************************************************
5 * Copyright (c) 2002-2015, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 * Author: Alan Liu
9 * Created: November 5 2002
10 * Since: ICU 2.4
11 * 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
12 **********************************************************************
13 */
14
15package com.ibm.icu.impl;
16
17import java.io.IOException;
18import java.nio.ByteBuffer;
19import java.util.MissingResourceException;
20
21import com.ibm.icu.lang.UProperty;
22import com.ibm.icu.util.BytesTrie;
23
24/**
25 * Wrapper for the pnames.icu binary data file.  This data file is
26 * imported from icu4c.  It contains property and property value
27 * aliases from the UCD files PropertyAliases.txt and
28 * PropertyValueAliases.txt.  The file is built by the icu4c tool
29 * genpname.  It must be an ASCII big-endian file to be
30 * usable in icu4j.
31 *
32 * This class performs two functions.
33 *
34 * (1) It can import the flat binary data into usable objects.
35 *
36 * (2) It provides an API to access the tree of objects.
37 *
38 * Needless to say, this class is tightly coupled to the binary format
39 * of icu4c's pnames.icu file.
40 *
41 * Each time a UPropertyAliases is constructed, the pnames.icu file is
42 * read, parsed, and data structures assembled.  Clients should create one
43 * singleton instance and cache it.
44 *
45 * @author Alan Liu
46 * @since ICU 2.4
47 */
48public final class UPropertyAliases {
49    // Byte offsets from the start of the data, after the generic header.
50    private static final int IX_VALUE_MAPS_OFFSET=0;
51    private static final int IX_BYTE_TRIES_OFFSET=1;
52    private static final int IX_NAME_GROUPS_OFFSET=2;
53    private static final int IX_RESERVED3_OFFSET=3;
54    // private static final int IX_RESERVED4_OFFSET=4;
55    // private static final int IX_TOTAL_SIZE=5;
56
57    // Other values.
58    // private static final int IX_MAX_NAME_LENGTH=6;
59    // private static final int IX_RESERVED7=7;
60    // private static final int IX_COUNT=8;
61
62    //----------------------------------------------------------------
63    // Runtime data.  This is an unflattened representation of the
64    // data in pnames.icu.
65
66    private int[] valueMaps;
67    private byte[] bytesTries;
68    private String nameGroups;
69
70    private static final class IsAcceptable implements ICUBinary.Authenticate {
71        // @Override when we switch to Java 6
72        @Override
73        public boolean isDataVersionAcceptable(byte version[]) {
74            return version[0]==2;
75        }
76    }
77    private static final IsAcceptable IS_ACCEPTABLE=new IsAcceptable();
78    private static final int DATA_FORMAT=0x706E616D;  // "pnam"
79
80    private void load(ByteBuffer bytes) throws IOException {
81        //dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
82        ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
83        int indexesLength=bytes.getInt()/4;  // inIndexes[IX_VALUE_MAPS_OFFSET]/4
84        if(indexesLength<8) {  // formatVersion 2 initially has 8 indexes
85            throw new IOException("pnames.icu: not enough indexes");
86        }
87        int[] inIndexes=new int[indexesLength];
88        inIndexes[0]=indexesLength*4;
89        for(int i=1; i<indexesLength; ++i) {
90            inIndexes[i]=bytes.getInt();
91        }
92
93        // Read the valueMaps.
94        int offset=inIndexes[IX_VALUE_MAPS_OFFSET];
95        int nextOffset=inIndexes[IX_BYTE_TRIES_OFFSET];
96        int numInts=(nextOffset-offset)/4;
97        valueMaps=ICUBinary.getInts(bytes, numInts, 0);
98
99        // Read the bytesTries.
100        offset=nextOffset;
101        nextOffset=inIndexes[IX_NAME_GROUPS_OFFSET];
102        int numBytes=nextOffset-offset;
103        bytesTries=new byte[numBytes];
104        bytes.get(bytesTries);
105
106        // Read the nameGroups and turn them from ASCII bytes into a Java String.
107        offset=nextOffset;
108        nextOffset=inIndexes[IX_RESERVED3_OFFSET];
109        numBytes=nextOffset-offset;
110        StringBuilder sb=new StringBuilder(numBytes);
111        for(int i=0; i<numBytes; ++i) {
112            sb.append((char)bytes.get());
113        }
114        nameGroups=sb.toString();
115    }
116
117    private UPropertyAliases() throws IOException {
118        ByteBuffer bytes = ICUBinary.getRequiredData("pnames.icu");
119        load(bytes);
120    }
121
122    private int findProperty(int property) {
123        int i=1;  // valueMaps index, initially after numRanges
124        for(int numRanges=valueMaps[0]; numRanges>0; --numRanges) {
125            // Read and skip the start and limit of this range.
126            int start=valueMaps[i];
127            int limit=valueMaps[i+1];
128            i+=2;
129            if(property<start) {
130                break;
131            }
132            if(property<limit) {
133                return i+(property-start)*2;
134            }
135            i+=(limit-start)*2;  // Skip all entries for this range.
136        }
137        return 0;
138    }
139
140    private int findPropertyValueNameGroup(int valueMapIndex, int value) {
141        if(valueMapIndex==0) {
142            return 0;  // The property does not have named values.
143        }
144        ++valueMapIndex;  // Skip the BytesTrie offset.
145        int numRanges=valueMaps[valueMapIndex++];
146        if(numRanges<0x10) {
147            // Ranges of values.
148            for(; numRanges>0; --numRanges) {
149                // Read and skip the start and limit of this range.
150                int start=valueMaps[valueMapIndex];
151                int limit=valueMaps[valueMapIndex+1];
152                valueMapIndex+=2;
153                if(value<start) {
154                    break;
155                }
156                if(value<limit) {
157                    return valueMaps[valueMapIndex+value-start];
158                }
159                valueMapIndex+=limit-start;  // Skip all entries for this range.
160            }
161        } else {
162            // List of values.
163            int valuesStart=valueMapIndex;
164            int nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
165            do {
166                int v=valueMaps[valueMapIndex];
167                if(value<v) {
168                    break;
169                }
170                if(value==v) {
171                    return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
172                }
173            } while(++valueMapIndex<nameGroupOffsetsStart);
174        }
175        return 0;
176    }
177
178    private String getName(int nameGroupsIndex, int nameIndex) {
179        int numNames=nameGroups.charAt(nameGroupsIndex++);
180        if(nameIndex<0 || numNames<=nameIndex) {
181            throw new IllegalIcuArgumentException("Invalid property (value) name choice");
182        }
183        // Skip nameIndex names.
184        for(; nameIndex>0; --nameIndex) {
185            while(0!=nameGroups.charAt(nameGroupsIndex++)) {}
186        }
187        // Find the end of this name.
188        int nameStart=nameGroupsIndex;
189        while(0!=nameGroups.charAt(nameGroupsIndex)) {
190            ++nameGroupsIndex;
191        }
192        if(nameStart==nameGroupsIndex) {
193            return null;  // no name (Property[Value]Aliases.txt has "n/a")
194        }
195        return nameGroups.substring(nameStart, nameGroupsIndex);
196    }
197
198    private static int asciiToLowercase(int c) {
199        return 'A'<=c && c<='Z' ? c+0x20 : c;
200    }
201
202    private boolean containsName(BytesTrie trie, CharSequence name) {
203        BytesTrie.Result result=BytesTrie.Result.NO_VALUE;
204        for(int i=0; i<name.length(); ++i) {
205            int c=name.charAt(i);
206            // Ignore delimiters '-', '_', and ASCII White_Space.
207            if(c=='-' || c=='_' || c==' ' || (0x09<=c && c<=0x0d)) {
208                continue;
209            }
210            if(!result.hasNext()) {
211                return false;
212            }
213            c=asciiToLowercase(c);
214            result=trie.next(c);
215        }
216        return result.hasValue();
217    }
218
219    //----------------------------------------------------------------
220    // Public API
221
222    public static final UPropertyAliases INSTANCE;
223
224    static {
225        try {
226            INSTANCE = new UPropertyAliases();
227        } catch(IOException e) {
228            ///CLOVER:OFF
229            MissingResourceException mre = new MissingResourceException(
230                    "Could not construct UPropertyAliases. Missing pnames.icu", "", "");
231            mre.initCause(e);
232            throw mre;
233            ///CLOVER:ON
234        }
235    }
236
237    /**
238     * Returns a property name given a property enum.
239     * Multiple names may be available for each property;
240     * the nameChoice selects among them.
241     */
242    public String getPropertyName(int property, int nameChoice) {
243        int valueMapIndex=findProperty(property);
244        if(valueMapIndex==0) {
245            throw new IllegalArgumentException(
246                    "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
247        }
248        return getName(valueMaps[valueMapIndex], nameChoice);
249    }
250
251    /**
252     * Returns a value name given a property enum and a value enum.
253     * Multiple names may be available for each value;
254     * the nameChoice selects among them.
255     */
256    public String getPropertyValueName(int property, int value, int nameChoice) {
257        int valueMapIndex=findProperty(property);
258        if(valueMapIndex==0) {
259            throw new IllegalArgumentException(
260                    "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
261        }
262        int nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
263        if(nameGroupOffset==0) {
264            throw new IllegalArgumentException(
265                    "Property "+property+" (0x"+Integer.toHexString(property)+
266                    ") does not have named values");
267        }
268        return getName(nameGroupOffset, nameChoice);
269    }
270
271    private int getPropertyOrValueEnum(int bytesTrieOffset, CharSequence alias) {
272        BytesTrie trie=new BytesTrie(bytesTries, bytesTrieOffset);
273        if(containsName(trie, alias)) {
274            return trie.getValue();
275        } else {
276            return UProperty.UNDEFINED;
277        }
278    }
279
280    /**
281     * Returns a property enum given one of its property names.
282     * If the property name is not known, this method returns
283     * UProperty.UNDEFINED.
284     */
285    public int getPropertyEnum(CharSequence alias) {
286        return getPropertyOrValueEnum(0, alias);
287    }
288
289    /**
290     * Returns a value enum given a property enum and one of its value names.
291     */
292    public int getPropertyValueEnum(int property, CharSequence alias) {
293        int valueMapIndex=findProperty(property);
294        if(valueMapIndex==0) {
295            throw new IllegalArgumentException(
296                    "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
297        }
298        valueMapIndex=valueMaps[valueMapIndex+1];
299        if(valueMapIndex==0) {
300            throw new IllegalArgumentException(
301                    "Property "+property+" (0x"+Integer.toHexString(property)+
302                    ") does not have named values");
303        }
304        // valueMapIndex is the start of the property's valueMap,
305        // where the first word is the BytesTrie offset.
306        return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
307    }
308
309    /**
310     * Returns a value enum given a property enum and one of its value names. Does not throw.
311     * @return value enum, or UProperty.UNDEFINED if not defined for that property
312     */
313    public int getPropertyValueEnumNoThrow(int property, CharSequence alias) {
314        int valueMapIndex=findProperty(property);
315        if(valueMapIndex==0) {
316            return UProperty.UNDEFINED;
317        }
318        valueMapIndex=valueMaps[valueMapIndex+1];
319        if(valueMapIndex==0) {
320            return UProperty.UNDEFINED;
321        }
322        // valueMapIndex is the start of the property's valueMap,
323        // where the first word is the BytesTrie offset.
324        return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
325    }
326
327    /**
328     * Compare two property names, returning <0, 0, or >0.  The
329     * comparison is that described as "loose" matching in the
330     * Property*Aliases.txt files.
331     */
332    public static int compare(String stra, String strb) {
333        // Note: This implementation is a literal copy of
334        // uprv_comparePropertyNames.  It can probably be improved.
335        int istra=0, istrb=0, rc;
336        int cstra=0, cstrb=0;
337        for (;;) {
338            /* Ignore delimiters '-', '_', and ASCII White_Space */
339            while (istra<stra.length()) {
340                cstra = stra.charAt(istra);
341                switch (cstra) {
342                case '-':  case '_':  case ' ':  case '\t':
343                case '\n': case 0xb/*\v*/: case '\f': case '\r':
344                    ++istra;
345                    continue;
346                }
347                break;
348            }
349
350            while (istrb<strb.length()) {
351                cstrb = strb.charAt(istrb);
352                switch (cstrb) {
353                case '-':  case '_':  case ' ':  case '\t':
354                case '\n': case 0xb/*\v*/: case '\f': case '\r':
355                    ++istrb;
356                    continue;
357                }
358                break;
359            }
360
361            /* If we reach the ends of both strings then they match */
362            boolean endstra = istra==stra.length();
363            boolean endstrb = istrb==strb.length();
364            if (endstra) {
365                if (endstrb) return 0;
366                cstra = 0;
367            } else if (endstrb) {
368                cstrb = 0;
369            }
370
371            rc = asciiToLowercase(cstra) - asciiToLowercase(cstrb);
372            if (rc != 0) {
373                return rc;
374            }
375
376            ++istra;
377            ++istrb;
378        }
379    }
380}
381