1/* 2 ********************************************************************** 3 * Copyright (c) 2002-2015, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Alan Liu 7 * Created: November 5 2002 8 * Since: ICU 2.4 9 * 2010nov19 Markus Scherer Rewrite for formatVersion 2. 10 ********************************************************************** 11 */ 12 13package com.ibm.icu.impl; 14 15import java.io.IOException; 16import java.nio.ByteBuffer; 17import java.util.MissingResourceException; 18 19import com.ibm.icu.lang.UProperty; 20import com.ibm.icu.util.BytesTrie; 21 22/** 23 * Wrapper for the pnames.icu binary data file. This data file is 24 * imported from icu4c. It contains property and property value 25 * aliases from the UCD files PropertyAliases.txt and 26 * PropertyValueAliases.txt. The file is built by the icu4c tool 27 * genpname. It must be an ASCII big-endian file to be 28 * usable in icu4j. 29 * 30 * This class performs two functions. 31 * 32 * (1) It can import the flat binary data into usable objects. 33 * 34 * (2) It provides an API to access the tree of objects. 35 * 36 * Needless to say, this class is tightly coupled to the binary format 37 * of icu4c's pnames.icu file. 38 * 39 * Each time a UPropertyAliases is constructed, the pnames.icu file is 40 * read, parsed, and data structures assembled. Clients should create one 41 * singleton instance and cache it. 42 * 43 * @author Alan Liu 44 * @since ICU 2.4 45 */ 46public final class UPropertyAliases { 47 // Byte offsets from the start of the data, after the generic header. 48 private static final int IX_VALUE_MAPS_OFFSET=0; 49 private static final int IX_BYTE_TRIES_OFFSET=1; 50 private static final int IX_NAME_GROUPS_OFFSET=2; 51 private static final int IX_RESERVED3_OFFSET=3; 52 // private static final int IX_RESERVED4_OFFSET=4; 53 // private static final int IX_TOTAL_SIZE=5; 54 55 // Other values. 56 // private static final int IX_MAX_NAME_LENGTH=6; 57 // private static final int IX_RESERVED7=7; 58 // private static final int IX_COUNT=8; 59 60 //---------------------------------------------------------------- 61 // Runtime data. This is an unflattened representation of the 62 // data in pnames.icu. 63 64 private int[] valueMaps; 65 private byte[] bytesTries; 66 private String nameGroups; 67 68 private static final class IsAcceptable implements ICUBinary.Authenticate { 69 // @Override when we switch to Java 6 70 public boolean isDataVersionAcceptable(byte version[]) { 71 return version[0]==2; 72 } 73 } 74 private static final IsAcceptable IS_ACCEPTABLE=new IsAcceptable(); 75 private static final int DATA_FORMAT=0x706E616D; // "pnam" 76 77 private void load(ByteBuffer bytes) throws IOException { 78 //dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE); 79 ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE); 80 int indexesLength=bytes.getInt()/4; // inIndexes[IX_VALUE_MAPS_OFFSET]/4 81 if(indexesLength<8) { // formatVersion 2 initially has 8 indexes 82 throw new IOException("pnames.icu: not enough indexes"); 83 } 84 int[] inIndexes=new int[indexesLength]; 85 inIndexes[0]=indexesLength*4; 86 for(int i=1; i<indexesLength; ++i) { 87 inIndexes[i]=bytes.getInt(); 88 } 89 90 // Read the valueMaps. 91 int offset=inIndexes[IX_VALUE_MAPS_OFFSET]; 92 int nextOffset=inIndexes[IX_BYTE_TRIES_OFFSET]; 93 int numInts=(nextOffset-offset)/4; 94 valueMaps=ICUBinary.getInts(bytes, numInts, 0); 95 96 // Read the bytesTries. 97 offset=nextOffset; 98 nextOffset=inIndexes[IX_NAME_GROUPS_OFFSET]; 99 int numBytes=nextOffset-offset; 100 bytesTries=new byte[numBytes]; 101 bytes.get(bytesTries); 102 103 // Read the nameGroups and turn them from ASCII bytes into a Java String. 104 offset=nextOffset; 105 nextOffset=inIndexes[IX_RESERVED3_OFFSET]; 106 numBytes=nextOffset-offset; 107 StringBuilder sb=new StringBuilder(numBytes); 108 for(int i=0; i<numBytes; ++i) { 109 sb.append((char)bytes.get()); 110 } 111 nameGroups=sb.toString(); 112 } 113 114 private UPropertyAliases() throws IOException { 115 ByteBuffer bytes = ICUBinary.getRequiredData("pnames.icu"); 116 load(bytes); 117 } 118 119 private int findProperty(int property) { 120 int i=1; // valueMaps index, initially after numRanges 121 for(int numRanges=valueMaps[0]; numRanges>0; --numRanges) { 122 // Read and skip the start and limit of this range. 123 int start=valueMaps[i]; 124 int limit=valueMaps[i+1]; 125 i+=2; 126 if(property<start) { 127 break; 128 } 129 if(property<limit) { 130 return i+(property-start)*2; 131 } 132 i+=(limit-start)*2; // Skip all entries for this range. 133 } 134 return 0; 135 } 136 137 private int findPropertyValueNameGroup(int valueMapIndex, int value) { 138 if(valueMapIndex==0) { 139 return 0; // The property does not have named values. 140 } 141 ++valueMapIndex; // Skip the BytesTrie offset. 142 int numRanges=valueMaps[valueMapIndex++]; 143 if(numRanges<0x10) { 144 // Ranges of values. 145 for(; numRanges>0; --numRanges) { 146 // Read and skip the start and limit of this range. 147 int start=valueMaps[valueMapIndex]; 148 int limit=valueMaps[valueMapIndex+1]; 149 valueMapIndex+=2; 150 if(value<start) { 151 break; 152 } 153 if(value<limit) { 154 return valueMaps[valueMapIndex+value-start]; 155 } 156 valueMapIndex+=limit-start; // Skip all entries for this range. 157 } 158 } else { 159 // List of values. 160 int valuesStart=valueMapIndex; 161 int nameGroupOffsetsStart=valueMapIndex+numRanges-0x10; 162 do { 163 int v=valueMaps[valueMapIndex]; 164 if(value<v) { 165 break; 166 } 167 if(value==v) { 168 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart]; 169 } 170 } while(++valueMapIndex<nameGroupOffsetsStart); 171 } 172 return 0; 173 } 174 175 private String getName(int nameGroupsIndex, int nameIndex) { 176 int numNames=nameGroups.charAt(nameGroupsIndex++); 177 if(nameIndex<0 || numNames<=nameIndex) { 178 throw new IllegalIcuArgumentException("Invalid property (value) name choice"); 179 } 180 // Skip nameIndex names. 181 for(; nameIndex>0; --nameIndex) { 182 while(0!=nameGroups.charAt(nameGroupsIndex++)) {} 183 } 184 // Find the end of this name. 185 int nameStart=nameGroupsIndex; 186 while(0!=nameGroups.charAt(nameGroupsIndex)) { 187 ++nameGroupsIndex; 188 } 189 if(nameStart==nameGroupsIndex) { 190 return null; // no name (Property[Value]Aliases.txt has "n/a") 191 } 192 return nameGroups.substring(nameStart, nameGroupsIndex); 193 } 194 195 private static int asciiToLowercase(int c) { 196 return 'A'<=c && c<='Z' ? c+0x20 : c; 197 } 198 199 private boolean containsName(BytesTrie trie, CharSequence name) { 200 BytesTrie.Result result=BytesTrie.Result.NO_VALUE; 201 for(int i=0; i<name.length(); ++i) { 202 int c=name.charAt(i); 203 // Ignore delimiters '-', '_', and ASCII White_Space. 204 if(c=='-' || c=='_' || c==' ' || (0x09<=c && c<=0x0d)) { 205 continue; 206 } 207 if(!result.hasNext()) { 208 return false; 209 } 210 c=asciiToLowercase(c); 211 result=trie.next(c); 212 } 213 return result.hasValue(); 214 } 215 216 //---------------------------------------------------------------- 217 // Public API 218 219 public static final UPropertyAliases INSTANCE; 220 221 static { 222 try { 223 INSTANCE = new UPropertyAliases(); 224 } catch(IOException e) { 225 ///CLOVER:OFF 226 MissingResourceException mre = new MissingResourceException( 227 "Could not construct UPropertyAliases. Missing pnames.icu", "", ""); 228 mre.initCause(e); 229 throw mre; 230 ///CLOVER:ON 231 } 232 } 233 234 /** 235 * Returns a property name given a property enum. 236 * Multiple names may be available for each property; 237 * the nameChoice selects among them. 238 */ 239 public String getPropertyName(int property, int nameChoice) { 240 int valueMapIndex=findProperty(property); 241 if(valueMapIndex==0) { 242 throw new IllegalArgumentException( 243 "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")"); 244 } 245 return getName(valueMaps[valueMapIndex], nameChoice); 246 } 247 248 /** 249 * Returns a value name given a property enum and a value enum. 250 * Multiple names may be available for each value; 251 * the nameChoice selects among them. 252 */ 253 public String getPropertyValueName(int property, int value, int nameChoice) { 254 int valueMapIndex=findProperty(property); 255 if(valueMapIndex==0) { 256 throw new IllegalArgumentException( 257 "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")"); 258 } 259 int nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value); 260 if(nameGroupOffset==0) { 261 throw new IllegalArgumentException( 262 "Property "+property+" (0x"+Integer.toHexString(property)+ 263 ") does not have named values"); 264 } 265 return getName(nameGroupOffset, nameChoice); 266 } 267 268 private int getPropertyOrValueEnum(int bytesTrieOffset, CharSequence alias) { 269 BytesTrie trie=new BytesTrie(bytesTries, bytesTrieOffset); 270 if(containsName(trie, alias)) { 271 return trie.getValue(); 272 } else { 273 return UProperty.UNDEFINED; 274 } 275 } 276 277 /** 278 * Returns a property enum given one of its property names. 279 * If the property name is not known, this method returns 280 * UProperty.UNDEFINED. 281 */ 282 public int getPropertyEnum(CharSequence alias) { 283 return getPropertyOrValueEnum(0, alias); 284 } 285 286 /** 287 * Returns a value enum given a property enum and one of its value names. 288 */ 289 public int getPropertyValueEnum(int property, CharSequence alias) { 290 int valueMapIndex=findProperty(property); 291 if(valueMapIndex==0) { 292 throw new IllegalArgumentException( 293 "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")"); 294 } 295 valueMapIndex=valueMaps[valueMapIndex+1]; 296 if(valueMapIndex==0) { 297 throw new IllegalArgumentException( 298 "Property "+property+" (0x"+Integer.toHexString(property)+ 299 ") does not have named values"); 300 } 301 // valueMapIndex is the start of the property's valueMap, 302 // where the first word is the BytesTrie offset. 303 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); 304 } 305 306 /** 307 * Returns a value enum given a property enum and one of its value names. Does not throw. 308 * @return value enum, or UProperty.UNDEFINED if not defined for that property 309 */ 310 public int getPropertyValueEnumNoThrow(int property, CharSequence alias) { 311 int valueMapIndex=findProperty(property); 312 if(valueMapIndex==0) { 313 return UProperty.UNDEFINED; 314 } 315 valueMapIndex=valueMaps[valueMapIndex+1]; 316 if(valueMapIndex==0) { 317 return UProperty.UNDEFINED; 318 } 319 // valueMapIndex is the start of the property's valueMap, 320 // where the first word is the BytesTrie offset. 321 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); 322 } 323 324 /** 325 * Compare two property names, returning <0, 0, or >0. The 326 * comparison is that described as "loose" matching in the 327 * Property*Aliases.txt files. 328 */ 329 public static int compare(String stra, String strb) { 330 // Note: This implementation is a literal copy of 331 // uprv_comparePropertyNames. It can probably be improved. 332 int istra=0, istrb=0, rc; 333 int cstra=0, cstrb=0; 334 for (;;) { 335 /* Ignore delimiters '-', '_', and ASCII White_Space */ 336 while (istra<stra.length()) { 337 cstra = stra.charAt(istra); 338 switch (cstra) { 339 case '-': case '_': case ' ': case '\t': 340 case '\n': case 0xb/*\v*/: case '\f': case '\r': 341 ++istra; 342 continue; 343 } 344 break; 345 } 346 347 while (istrb<strb.length()) { 348 cstrb = strb.charAt(istrb); 349 switch (cstrb) { 350 case '-': case '_': case ' ': case '\t': 351 case '\n': case 0xb/*\v*/: case '\f': case '\r': 352 ++istrb; 353 continue; 354 } 355 break; 356 } 357 358 /* If we reach the ends of both strings then they match */ 359 boolean endstra = istra==stra.length(); 360 boolean endstrb = istrb==strb.length(); 361 if (endstra) { 362 if (endstrb) return 0; 363 cstra = 0; 364 } else if (endstrb) { 365 cstrb = 0; 366 } 367 368 rc = asciiToLowercase(cstra) - asciiToLowercase(cstrb); 369 if (rc != 0) { 370 return rc; 371 } 372 373 ++istra; 374 ++istrb; 375 } 376 } 377} 378