1/*
2 **********************************************************************
3 * Copyright (c) 2002-2015, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 * Author: Alan Liu
7 * Created: November 5 2002
8 * Since: ICU 2.4
9 * 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
10 **********************************************************************
11 */
12
13package com.ibm.icu.impl;
14
15import java.io.IOException;
16import java.nio.ByteBuffer;
17import java.util.MissingResourceException;
18
19import com.ibm.icu.lang.UProperty;
20import com.ibm.icu.util.BytesTrie;
21
22/**
23 * Wrapper for the pnames.icu binary data file.  This data file is
24 * imported from icu4c.  It contains property and property value
25 * aliases from the UCD files PropertyAliases.txt and
26 * PropertyValueAliases.txt.  The file is built by the icu4c tool
27 * genpname.  It must be an ASCII big-endian file to be
28 * usable in icu4j.
29 *
30 * This class performs two functions.
31 *
32 * (1) It can import the flat binary data into usable objects.
33 *
34 * (2) It provides an API to access the tree of objects.
35 *
36 * Needless to say, this class is tightly coupled to the binary format
37 * of icu4c's pnames.icu file.
38 *
39 * Each time a UPropertyAliases is constructed, the pnames.icu file is
40 * read, parsed, and data structures assembled.  Clients should create one
41 * singleton instance and cache it.
42 *
43 * @author Alan Liu
44 * @since ICU 2.4
45 */
46public final class UPropertyAliases {
47    // Byte offsets from the start of the data, after the generic header.
48    private static final int IX_VALUE_MAPS_OFFSET=0;
49    private static final int IX_BYTE_TRIES_OFFSET=1;
50    private static final int IX_NAME_GROUPS_OFFSET=2;
51    private static final int IX_RESERVED3_OFFSET=3;
52    // private static final int IX_RESERVED4_OFFSET=4;
53    // private static final int IX_TOTAL_SIZE=5;
54
55    // Other values.
56    // private static final int IX_MAX_NAME_LENGTH=6;
57    // private static final int IX_RESERVED7=7;
58    // private static final int IX_COUNT=8;
59
60    //----------------------------------------------------------------
61    // Runtime data.  This is an unflattened representation of the
62    // data in pnames.icu.
63
64    private int[] valueMaps;
65    private byte[] bytesTries;
66    private String nameGroups;
67
68    private static final class IsAcceptable implements ICUBinary.Authenticate {
69        // @Override when we switch to Java 6
70        public boolean isDataVersionAcceptable(byte version[]) {
71            return version[0]==2;
72        }
73    }
74    private static final IsAcceptable IS_ACCEPTABLE=new IsAcceptable();
75    private static final int DATA_FORMAT=0x706E616D;  // "pnam"
76
77    private void load(ByteBuffer bytes) throws IOException {
78        //dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
79        ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
80        int indexesLength=bytes.getInt()/4;  // inIndexes[IX_VALUE_MAPS_OFFSET]/4
81        if(indexesLength<8) {  // formatVersion 2 initially has 8 indexes
82            throw new IOException("pnames.icu: not enough indexes");
83        }
84        int[] inIndexes=new int[indexesLength];
85        inIndexes[0]=indexesLength*4;
86        for(int i=1; i<indexesLength; ++i) {
87            inIndexes[i]=bytes.getInt();
88        }
89
90        // Read the valueMaps.
91        int offset=inIndexes[IX_VALUE_MAPS_OFFSET];
92        int nextOffset=inIndexes[IX_BYTE_TRIES_OFFSET];
93        int numInts=(nextOffset-offset)/4;
94        valueMaps=ICUBinary.getInts(bytes, numInts, 0);
95
96        // Read the bytesTries.
97        offset=nextOffset;
98        nextOffset=inIndexes[IX_NAME_GROUPS_OFFSET];
99        int numBytes=nextOffset-offset;
100        bytesTries=new byte[numBytes];
101        bytes.get(bytesTries);
102
103        // Read the nameGroups and turn them from ASCII bytes into a Java String.
104        offset=nextOffset;
105        nextOffset=inIndexes[IX_RESERVED3_OFFSET];
106        numBytes=nextOffset-offset;
107        StringBuilder sb=new StringBuilder(numBytes);
108        for(int i=0; i<numBytes; ++i) {
109            sb.append((char)bytes.get());
110        }
111        nameGroups=sb.toString();
112    }
113
114    private UPropertyAliases() throws IOException {
115        ByteBuffer bytes = ICUBinary.getRequiredData("pnames.icu");
116        load(bytes);
117    }
118
119    private int findProperty(int property) {
120        int i=1;  // valueMaps index, initially after numRanges
121        for(int numRanges=valueMaps[0]; numRanges>0; --numRanges) {
122            // Read and skip the start and limit of this range.
123            int start=valueMaps[i];
124            int limit=valueMaps[i+1];
125            i+=2;
126            if(property<start) {
127                break;
128            }
129            if(property<limit) {
130                return i+(property-start)*2;
131            }
132            i+=(limit-start)*2;  // Skip all entries for this range.
133        }
134        return 0;
135    }
136
137    private int findPropertyValueNameGroup(int valueMapIndex, int value) {
138        if(valueMapIndex==0) {
139            return 0;  // The property does not have named values.
140        }
141        ++valueMapIndex;  // Skip the BytesTrie offset.
142        int numRanges=valueMaps[valueMapIndex++];
143        if(numRanges<0x10) {
144            // Ranges of values.
145            for(; numRanges>0; --numRanges) {
146                // Read and skip the start and limit of this range.
147                int start=valueMaps[valueMapIndex];
148                int limit=valueMaps[valueMapIndex+1];
149                valueMapIndex+=2;
150                if(value<start) {
151                    break;
152                }
153                if(value<limit) {
154                    return valueMaps[valueMapIndex+value-start];
155                }
156                valueMapIndex+=limit-start;  // Skip all entries for this range.
157            }
158        } else {
159            // List of values.
160            int valuesStart=valueMapIndex;
161            int nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
162            do {
163                int v=valueMaps[valueMapIndex];
164                if(value<v) {
165                    break;
166                }
167                if(value==v) {
168                    return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
169                }
170            } while(++valueMapIndex<nameGroupOffsetsStart);
171        }
172        return 0;
173    }
174
175    private String getName(int nameGroupsIndex, int nameIndex) {
176        int numNames=nameGroups.charAt(nameGroupsIndex++);
177        if(nameIndex<0 || numNames<=nameIndex) {
178            throw new IllegalIcuArgumentException("Invalid property (value) name choice");
179        }
180        // Skip nameIndex names.
181        for(; nameIndex>0; --nameIndex) {
182            while(0!=nameGroups.charAt(nameGroupsIndex++)) {}
183        }
184        // Find the end of this name.
185        int nameStart=nameGroupsIndex;
186        while(0!=nameGroups.charAt(nameGroupsIndex)) {
187            ++nameGroupsIndex;
188        }
189        if(nameStart==nameGroupsIndex) {
190            return null;  // no name (Property[Value]Aliases.txt has "n/a")
191        }
192        return nameGroups.substring(nameStart, nameGroupsIndex);
193    }
194
195    private static int asciiToLowercase(int c) {
196        return 'A'<=c && c<='Z' ? c+0x20 : c;
197    }
198
199    private boolean containsName(BytesTrie trie, CharSequence name) {
200        BytesTrie.Result result=BytesTrie.Result.NO_VALUE;
201        for(int i=0; i<name.length(); ++i) {
202            int c=name.charAt(i);
203            // Ignore delimiters '-', '_', and ASCII White_Space.
204            if(c=='-' || c=='_' || c==' ' || (0x09<=c && c<=0x0d)) {
205                continue;
206            }
207            if(!result.hasNext()) {
208                return false;
209            }
210            c=asciiToLowercase(c);
211            result=trie.next(c);
212        }
213        return result.hasValue();
214    }
215
216    //----------------------------------------------------------------
217    // Public API
218
219    public static final UPropertyAliases INSTANCE;
220
221    static {
222        try {
223            INSTANCE = new UPropertyAliases();
224        } catch(IOException e) {
225            ///CLOVER:OFF
226            MissingResourceException mre = new MissingResourceException(
227                    "Could not construct UPropertyAliases. Missing pnames.icu", "", "");
228            mre.initCause(e);
229            throw mre;
230            ///CLOVER:ON
231        }
232    }
233
234    /**
235     * Returns a property name given a property enum.
236     * Multiple names may be available for each property;
237     * the nameChoice selects among them.
238     */
239    public String getPropertyName(int property, int nameChoice) {
240        int valueMapIndex=findProperty(property);
241        if(valueMapIndex==0) {
242            throw new IllegalArgumentException(
243                    "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
244        }
245        return getName(valueMaps[valueMapIndex], nameChoice);
246    }
247
248    /**
249     * Returns a value name given a property enum and a value enum.
250     * Multiple names may be available for each value;
251     * the nameChoice selects among them.
252     */
253    public String getPropertyValueName(int property, int value, int nameChoice) {
254        int valueMapIndex=findProperty(property);
255        if(valueMapIndex==0) {
256            throw new IllegalArgumentException(
257                    "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
258        }
259        int nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
260        if(nameGroupOffset==0) {
261            throw new IllegalArgumentException(
262                    "Property "+property+" (0x"+Integer.toHexString(property)+
263                    ") does not have named values");
264        }
265        return getName(nameGroupOffset, nameChoice);
266    }
267
268    private int getPropertyOrValueEnum(int bytesTrieOffset, CharSequence alias) {
269        BytesTrie trie=new BytesTrie(bytesTries, bytesTrieOffset);
270        if(containsName(trie, alias)) {
271            return trie.getValue();
272        } else {
273            return UProperty.UNDEFINED;
274        }
275    }
276
277    /**
278     * Returns a property enum given one of its property names.
279     * If the property name is not known, this method returns
280     * UProperty.UNDEFINED.
281     */
282    public int getPropertyEnum(CharSequence alias) {
283        return getPropertyOrValueEnum(0, alias);
284    }
285
286    /**
287     * Returns a value enum given a property enum and one of its value names.
288     */
289    public int getPropertyValueEnum(int property, CharSequence alias) {
290        int valueMapIndex=findProperty(property);
291        if(valueMapIndex==0) {
292            throw new IllegalArgumentException(
293                    "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
294        }
295        valueMapIndex=valueMaps[valueMapIndex+1];
296        if(valueMapIndex==0) {
297            throw new IllegalArgumentException(
298                    "Property "+property+" (0x"+Integer.toHexString(property)+
299                    ") does not have named values");
300        }
301        // valueMapIndex is the start of the property's valueMap,
302        // where the first word is the BytesTrie offset.
303        return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
304    }
305
306    /**
307     * Returns a value enum given a property enum and one of its value names. Does not throw.
308     * @return value enum, or UProperty.UNDEFINED if not defined for that property
309     */
310    public int getPropertyValueEnumNoThrow(int property, CharSequence alias) {
311        int valueMapIndex=findProperty(property);
312        if(valueMapIndex==0) {
313            return UProperty.UNDEFINED;
314        }
315        valueMapIndex=valueMaps[valueMapIndex+1];
316        if(valueMapIndex==0) {
317            return UProperty.UNDEFINED;
318        }
319        // valueMapIndex is the start of the property's valueMap,
320        // where the first word is the BytesTrie offset.
321        return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
322    }
323
324    /**
325     * Compare two property names, returning <0, 0, or >0.  The
326     * comparison is that described as "loose" matching in the
327     * Property*Aliases.txt files.
328     */
329    public static int compare(String stra, String strb) {
330        // Note: This implementation is a literal copy of
331        // uprv_comparePropertyNames.  It can probably be improved.
332        int istra=0, istrb=0, rc;
333        int cstra=0, cstrb=0;
334        for (;;) {
335            /* Ignore delimiters '-', '_', and ASCII White_Space */
336            while (istra<stra.length()) {
337                cstra = stra.charAt(istra);
338                switch (cstra) {
339                case '-':  case '_':  case ' ':  case '\t':
340                case '\n': case 0xb/*\v*/: case '\f': case '\r':
341                    ++istra;
342                    continue;
343                }
344                break;
345            }
346
347            while (istrb<strb.length()) {
348                cstrb = strb.charAt(istrb);
349                switch (cstrb) {
350                case '-':  case '_':  case ' ':  case '\t':
351                case '\n': case 0xb/*\v*/: case '\f': case '\r':
352                    ++istrb;
353                    continue;
354                }
355                break;
356            }
357
358            /* If we reach the ends of both strings then they match */
359            boolean endstra = istra==stra.length();
360            boolean endstrb = istrb==strb.length();
361            if (endstra) {
362                if (endstrb) return 0;
363                cstra = 0;
364            } else if (endstrb) {
365                cstrb = 0;
366            }
367
368            rc = asciiToLowercase(cstra) - asciiToLowercase(cstrb);
369            if (rc != 0) {
370                return rc;
371            }
372
373            ++istra;
374            ++istrb;
375        }
376    }
377}
378