UPropertyAliases.java revision f86f25d102340da66b9c7cb6b2d5ecdc0de43ecf
1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5 **********************************************************************
6 * Copyright (c) 2002-2015, International Business Machines
7 * Corporation and others.  All Rights Reserved.
8 **********************************************************************
9 * Author: Alan Liu
10 * Created: November 5 2002
11 * Since: ICU 2.4
12 * 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
13 **********************************************************************
14 */
15
16package android.icu.impl;
17
18import java.io.IOException;
19import java.nio.ByteBuffer;
20import java.util.MissingResourceException;
21
22import android.icu.lang.UProperty;
23import android.icu.util.BytesTrie;
24
25/**
26 * Wrapper for the pnames.icu binary data file.  This data file is
27 * imported from icu4c.  It contains property and property value
28 * aliases from the UCD files PropertyAliases.txt and
29 * PropertyValueAliases.txt.  The file is built by the icu4c tool
30 * genpname.  It must be an ASCII big-endian file to be
31 * usable in icu4j.
32 *
33 * This class performs two functions.
34 *
35 * (1) It can import the flat binary data into usable objects.
36 *
37 * (2) It provides an API to access the tree of objects.
38 *
39 * Needless to say, this class is tightly coupled to the binary format
40 * of icu4c's pnames.icu file.
41 *
42 * Each time a UPropertyAliases is constructed, the pnames.icu file is
43 * read, parsed, and data structures assembled.  Clients should create one
44 * singleton instance and cache it.
45 *
46 * @author Alan Liu
47 * @hide Only a subset of ICU is exposed in Android
48 */
49public final class UPropertyAliases {
50    // Byte offsets from the start of the data, after the generic header.
51    private static final int IX_VALUE_MAPS_OFFSET=0;
52    private static final int IX_BYTE_TRIES_OFFSET=1;
53    private static final int IX_NAME_GROUPS_OFFSET=2;
54    private static final int IX_RESERVED3_OFFSET=3;
55    // private static final int IX_RESERVED4_OFFSET=4;
56    // private static final int IX_TOTAL_SIZE=5;
57
58    // Other values.
59    // private static final int IX_MAX_NAME_LENGTH=6;
60    // private static final int IX_RESERVED7=7;
61    // private static final int IX_COUNT=8;
62
63    //----------------------------------------------------------------
64    // Runtime data.  This is an unflattened representation of the
65    // data in pnames.icu.
66
67    private int[] valueMaps;
68    private byte[] bytesTries;
69    private String nameGroups;
70
71    private static final class IsAcceptable implements ICUBinary.Authenticate {
72        // @Override when we switch to Java 6
73        @Override
74        public boolean isDataVersionAcceptable(byte version[]) {
75            return version[0]==2;
76        }
77    }
78    private static final IsAcceptable IS_ACCEPTABLE=new IsAcceptable();
79    private static final int DATA_FORMAT=0x706E616D;  // "pnam"
80
81    private void load(ByteBuffer bytes) throws IOException {
82        //dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
83        ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
84        int indexesLength=bytes.getInt()/4;  // inIndexes[IX_VALUE_MAPS_OFFSET]/4
85        if(indexesLength<8) {  // formatVersion 2 initially has 8 indexes
86            throw new IOException("pnames.icu: not enough indexes");
87        }
88        int[] inIndexes=new int[indexesLength];
89        inIndexes[0]=indexesLength*4;
90        for(int i=1; i<indexesLength; ++i) {
91            inIndexes[i]=bytes.getInt();
92        }
93
94        // Read the valueMaps.
95        int offset=inIndexes[IX_VALUE_MAPS_OFFSET];
96        int nextOffset=inIndexes[IX_BYTE_TRIES_OFFSET];
97        int numInts=(nextOffset-offset)/4;
98        valueMaps=ICUBinary.getInts(bytes, numInts, 0);
99
100        // Read the bytesTries.
101        offset=nextOffset;
102        nextOffset=inIndexes[IX_NAME_GROUPS_OFFSET];
103        int numBytes=nextOffset-offset;
104        bytesTries=new byte[numBytes];
105        bytes.get(bytesTries);
106
107        // Read the nameGroups and turn them from ASCII bytes into a Java String.
108        offset=nextOffset;
109        nextOffset=inIndexes[IX_RESERVED3_OFFSET];
110        numBytes=nextOffset-offset;
111        StringBuilder sb=new StringBuilder(numBytes);
112        for(int i=0; i<numBytes; ++i) {
113            sb.append((char)bytes.get());
114        }
115        nameGroups=sb.toString();
116    }
117
118    private UPropertyAliases() throws IOException {
119        ByteBuffer bytes = ICUBinary.getRequiredData("pnames.icu");
120        load(bytes);
121    }
122
123    private int findProperty(int property) {
124        int i=1;  // valueMaps index, initially after numRanges
125        for(int numRanges=valueMaps[0]; numRanges>0; --numRanges) {
126            // Read and skip the start and limit of this range.
127            int start=valueMaps[i];
128            int limit=valueMaps[i+1];
129            i+=2;
130            if(property<start) {
131                break;
132            }
133            if(property<limit) {
134                return i+(property-start)*2;
135            }
136            i+=(limit-start)*2;  // Skip all entries for this range.
137        }
138        return 0;
139    }
140
141    private int findPropertyValueNameGroup(int valueMapIndex, int value) {
142        if(valueMapIndex==0) {
143            return 0;  // The property does not have named values.
144        }
145        ++valueMapIndex;  // Skip the BytesTrie offset.
146        int numRanges=valueMaps[valueMapIndex++];
147        if(numRanges<0x10) {
148            // Ranges of values.
149            for(; numRanges>0; --numRanges) {
150                // Read and skip the start and limit of this range.
151                int start=valueMaps[valueMapIndex];
152                int limit=valueMaps[valueMapIndex+1];
153                valueMapIndex+=2;
154                if(value<start) {
155                    break;
156                }
157                if(value<limit) {
158                    return valueMaps[valueMapIndex+value-start];
159                }
160                valueMapIndex+=limit-start;  // Skip all entries for this range.
161            }
162        } else {
163            // List of values.
164            int valuesStart=valueMapIndex;
165            int nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
166            do {
167                int v=valueMaps[valueMapIndex];
168                if(value<v) {
169                    break;
170                }
171                if(value==v) {
172                    return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
173                }
174            } while(++valueMapIndex<nameGroupOffsetsStart);
175        }
176        return 0;
177    }
178
179    private String getName(int nameGroupsIndex, int nameIndex) {
180        int numNames=nameGroups.charAt(nameGroupsIndex++);
181        if(nameIndex<0 || numNames<=nameIndex) {
182            throw new IllegalIcuArgumentException("Invalid property (value) name choice");
183        }
184        // Skip nameIndex names.
185        for(; nameIndex>0; --nameIndex) {
186            while(0!=nameGroups.charAt(nameGroupsIndex++)) {}
187        }
188        // Find the end of this name.
189        int nameStart=nameGroupsIndex;
190        while(0!=nameGroups.charAt(nameGroupsIndex)) {
191            ++nameGroupsIndex;
192        }
193        if(nameStart==nameGroupsIndex) {
194            return null;  // no name (Property[Value]Aliases.txt has "n/a")
195        }
196        return nameGroups.substring(nameStart, nameGroupsIndex);
197    }
198
199    private static int asciiToLowercase(int c) {
200        return 'A'<=c && c<='Z' ? c+0x20 : c;
201    }
202
203    private boolean containsName(BytesTrie trie, CharSequence name) {
204        BytesTrie.Result result=BytesTrie.Result.NO_VALUE;
205        for(int i=0; i<name.length(); ++i) {
206            int c=name.charAt(i);
207            // Ignore delimiters '-', '_', and ASCII White_Space.
208            if(c=='-' || c=='_' || c==' ' || (0x09<=c && c<=0x0d)) {
209                continue;
210            }
211            if(!result.hasNext()) {
212                return false;
213            }
214            c=asciiToLowercase(c);
215            result=trie.next(c);
216        }
217        return result.hasValue();
218    }
219
220    //----------------------------------------------------------------
221    // Public API
222
223    public static final UPropertyAliases INSTANCE;
224
225    static {
226        try {
227            INSTANCE = new UPropertyAliases();
228        } catch(IOException e) {
229            ///CLOVER:OFF
230            MissingResourceException mre = new MissingResourceException(
231                    "Could not construct UPropertyAliases. Missing pnames.icu", "", "");
232            mre.initCause(e);
233            throw mre;
234            ///CLOVER:ON
235        }
236    }
237
238    /**
239     * Returns a property name given a property enum.
240     * Multiple names may be available for each property;
241     * the nameChoice selects among them.
242     */
243    public String getPropertyName(int property, int nameChoice) {
244        int valueMapIndex=findProperty(property);
245        if(valueMapIndex==0) {
246            throw new IllegalArgumentException(
247                    "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
248        }
249        return getName(valueMaps[valueMapIndex], nameChoice);
250    }
251
252    /**
253     * Returns a value name given a property enum and a value enum.
254     * Multiple names may be available for each value;
255     * the nameChoice selects among them.
256     */
257    public String getPropertyValueName(int property, int value, int nameChoice) {
258        int valueMapIndex=findProperty(property);
259        if(valueMapIndex==0) {
260            throw new IllegalArgumentException(
261                    "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
262        }
263        int nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
264        if(nameGroupOffset==0) {
265            throw new IllegalArgumentException(
266                    "Property "+property+" (0x"+Integer.toHexString(property)+
267                    ") does not have named values");
268        }
269        return getName(nameGroupOffset, nameChoice);
270    }
271
272    private int getPropertyOrValueEnum(int bytesTrieOffset, CharSequence alias) {
273        BytesTrie trie=new BytesTrie(bytesTries, bytesTrieOffset);
274        if(containsName(trie, alias)) {
275            return trie.getValue();
276        } else {
277            return UProperty.UNDEFINED;
278        }
279    }
280
281    /**
282     * Returns a property enum given one of its property names.
283     * If the property name is not known, this method returns
284     * UProperty.UNDEFINED.
285     */
286    public int getPropertyEnum(CharSequence alias) {
287        return getPropertyOrValueEnum(0, alias);
288    }
289
290    /**
291     * Returns a value enum given a property enum and one of its value names.
292     */
293    public int getPropertyValueEnum(int property, CharSequence alias) {
294        int valueMapIndex=findProperty(property);
295        if(valueMapIndex==0) {
296            throw new IllegalArgumentException(
297                    "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
298        }
299        valueMapIndex=valueMaps[valueMapIndex+1];
300        if(valueMapIndex==0) {
301            throw new IllegalArgumentException(
302                    "Property "+property+" (0x"+Integer.toHexString(property)+
303                    ") does not have named values");
304        }
305        // valueMapIndex is the start of the property's valueMap,
306        // where the first word is the BytesTrie offset.
307        return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
308    }
309
310    /**
311     * Returns a value enum given a property enum and one of its value names. Does not throw.
312     * @return value enum, or UProperty.UNDEFINED if not defined for that property
313     */
314    public int getPropertyValueEnumNoThrow(int property, CharSequence alias) {
315        int valueMapIndex=findProperty(property);
316        if(valueMapIndex==0) {
317            return UProperty.UNDEFINED;
318        }
319        valueMapIndex=valueMaps[valueMapIndex+1];
320        if(valueMapIndex==0) {
321            return UProperty.UNDEFINED;
322        }
323        // valueMapIndex is the start of the property's valueMap,
324        // where the first word is the BytesTrie offset.
325        return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
326    }
327
328    /**
329     * Compare two property names, returning <0, 0, or >0.  The
330     * comparison is that described as "loose" matching in the
331     * Property*Aliases.txt files.
332     */
333    public static int compare(String stra, String strb) {
334        // Note: This implementation is a literal copy of
335        // uprv_comparePropertyNames.  It can probably be improved.
336        int istra=0, istrb=0, rc;
337        int cstra=0, cstrb=0;
338        for (;;) {
339            /* Ignore delimiters '-', '_', and ASCII White_Space */
340            while (istra<stra.length()) {
341                cstra = stra.charAt(istra);
342                switch (cstra) {
343                case '-':  case '_':  case ' ':  case '\t':
344                case '\n': case 0xb/*\v*/: case '\f': case '\r':
345                    ++istra;
346                    continue;
347                }
348                break;
349            }
350
351            while (istrb<strb.length()) {
352                cstrb = strb.charAt(istrb);
353                switch (cstrb) {
354                case '-':  case '_':  case ' ':  case '\t':
355                case '\n': case 0xb/*\v*/: case '\f': case '\r':
356                    ++istrb;
357                    continue;
358                }
359                break;
360            }
361
362            /* If we reach the ends of both strings then they match */
363            boolean endstra = istra==stra.length();
364            boolean endstrb = istrb==strb.length();
365            if (endstra) {
366                if (endstrb) return 0;
367                cstra = 0;
368            } else if (endstrb) {
369                cstrb = 0;
370            }
371
372            rc = asciiToLowercase(cstra) - asciiToLowercase(cstrb);
373            if (rc != 0) {
374                return rc;
375            }
376
377            ++istra;
378            ++istrb;
379        }
380    }
381}
382