1/* GENERATED SOURCE. DO NOT MODIFY. */ 2/* 3 ******************************************************************************* 4 * Copyright (C) 1996-2015, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ******************************************************************************* 7 */ 8 9package android.icu.text; 10 11import java.io.IOException; 12import java.nio.ByteBuffer; 13import java.nio.ByteOrder; 14 15import android.icu.impl.CharTrie; 16import android.icu.impl.ICUBinary; 17import android.icu.impl.ICUBinary.Authenticate; 18import android.icu.impl.Trie; 19 20/** 21* <p>Internal class used for Rule Based Break Iterators</p> 22* <p>This class provides access to the compiled break rule data, as 23* it is stored in a .brk file. 24*/ 25final class RBBIDataWrapper { 26 // 27 // These fields are the ready-to-use compiled rule data, as 28 // read from the file. 29 // 30 RBBIDataHeader fHeader; 31 short fFTable[]; 32 short fRTable[]; 33 short fSFTable[]; 34 short fSRTable[]; 35 CharTrie fTrie; 36 String fRuleSource; 37 int fStatusTable[]; 38 39 private boolean isBigEndian; 40 41 static final int DATA_FORMAT = 0x42726b20; // "Brk " 42 static final int FORMAT_VERSION = 0x03010000; // 3.1 43 44 private static final class IsAcceptable implements Authenticate { 45 // @Override when we switch to Java 6 46 public boolean isDataVersionAcceptable(byte version[]) { 47 return version[0] == (FORMAT_VERSION >>> 24); 48 } 49 } 50 private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); 51 52 // 53 // Indexes to fields in the ICU4C style binary form of the RBBI Data Header 54 // Used by the rule compiler when flattening the data. 55 // 56 final static int DH_SIZE = 24; 57 final static int DH_MAGIC = 0; 58 final static int DH_FORMATVERSION = 1; 59 final static int DH_LENGTH = 2; 60 final static int DH_CATCOUNT = 3; 61 final static int DH_FTABLE = 4; 62 final static int DH_FTABLELEN = 5; 63 final static int DH_RTABLE = 6; 64 final static int DH_RTABLELEN = 7; 65 final static int DH_SFTABLE = 8; 66 final static int DH_SFTABLELEN = 9; 67 final static int DH_SRTABLE = 10; 68 final static int DH_SRTABLELEN = 11; 69 final static int DH_TRIE = 12; 70 final static int DH_TRIELEN = 13; 71 final static int DH_RULESOURCE = 14; 72 final static int DH_RULESOURCELEN = 15; 73 final static int DH_STATUSTABLE = 16; 74 final static int DH_STATUSTABLELEN = 17; 75 76 77 // Index offsets to the fields in a state table row. 78 // Corresponds to struct RBBIStateTableRow in the C version. 79 // 80 final static int ACCEPTING = 0; 81 final static int LOOKAHEAD = 1; 82 final static int TAGIDX = 2; 83 final static int RESERVED = 3; 84 final static int NEXTSTATES = 4; 85 86 // Index offsets to header fields of a state table 87 // struct RBBIStateTable {... in the C version. 88 // 89 static final int NUMSTATES = 0; 90 static final int ROWLEN = 2; 91 static final int FLAGS = 4; 92 //ivate static final int RESERVED_2 = 6; 93 private static final int ROW_DATA = 8; 94 95 // Bit selectors for the "FLAGS" field of the state table header 96 // enum RBBIStateTableFlags in the C version. 97 // 98 final static int RBBI_LOOKAHEAD_HARD_BREAK = 1; 99 final static int RBBI_BOF_REQUIRED = 2; 100 101 /** 102 * Data Header. A struct-like class with the fields from the RBBI data file header. 103 */ 104 final static class RBBIDataHeader { 105 int fMagic; // == 0xbla0 106 int fVersion; // == 1 (for ICU 3.2 and earlier. 107 byte[] fFormatVersion; // For ICU 3.4 and later. 108 int fLength; // Total length in bytes of this RBBI Data, 109 // including all sections, not just the header. 110 int fCatCount; // Number of character categories. 111 112 // 113 // Offsets and sizes of each of the subsections within the RBBI data. 114 // All offsets are bytes from the start of the RBBIDataHeader. 115 // All sizes are in bytes. 116 // 117 int fFTable; // forward state transition table. 118 int fFTableLen; 119 int fRTable; // Offset to the reverse state transition table. 120 int fRTableLen; 121 int fSFTable; // safe point forward transition table 122 int fSFTableLen; 123 int fSRTable; // safe point reverse transition table 124 int fSRTableLen; 125 int fTrie; // Offset to Trie data for character categories 126 int fTrieLen; 127 int fRuleSource; // Offset to the source for for the break 128 int fRuleSourceLen; // rules. Stored UChar *. 129 int fStatusTable; // Offset to the table of rule status values 130 int fStatusTableLen; 131 132 public RBBIDataHeader() { 133 fMagic = 0; 134 fFormatVersion = new byte[4]; 135 } 136 } 137 138 139 /** 140 * RBBI State Table Indexing Function. Given a state number, return the 141 * array index of the start of the state table row for that state. 142 * 143 */ 144 int getRowIndex(int state){ 145 return ROW_DATA + state * (fHeader.fCatCount + 4); 146 } 147 148 static class TrieFoldingFunc implements Trie.DataManipulate { 149 public int getFoldingOffset(int data) { 150 if ((data & 0x8000) != 0) { 151 return data & 0x7fff; 152 } else { 153 return 0; 154 } 155 } 156 } 157 static TrieFoldingFunc fTrieFoldingFunc = new TrieFoldingFunc(); 158 159 160 RBBIDataWrapper() { 161 } 162 163 /* 164 * Get an RBBIDataWrapper from an InputStream onto a pre-compiled set 165 * of RBBI rules. 166 */ 167 static RBBIDataWrapper get(ByteBuffer bytes) throws IOException { 168 RBBIDataWrapper This = new RBBIDataWrapper(); 169 170 ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE); 171 This.isBigEndian = bytes.order() == ByteOrder.BIG_ENDIAN; 172 173 // Read in the RBBI data header... 174 This.fHeader = new RBBIDataHeader(); 175 This.fHeader.fMagic = bytes.getInt(); 176 // Read the same 4 bytes as an int and as a byte array: The data format could be 177 // the old fVersion=1 (TODO: probably not with a real ICU data header?) 178 // or the new fFormatVersion=3.x. 179 This.fHeader.fVersion = bytes.getInt(bytes.position()); 180 This.fHeader.fFormatVersion[0] = bytes.get(); 181 This.fHeader.fFormatVersion[1] = bytes.get(); 182 This.fHeader.fFormatVersion[2] = bytes.get(); 183 This.fHeader.fFormatVersion[3] = bytes.get(); 184 This.fHeader.fLength = bytes.getInt(); 185 This.fHeader.fCatCount = bytes.getInt(); 186 This.fHeader.fFTable = bytes.getInt(); 187 This.fHeader.fFTableLen = bytes.getInt(); 188 This.fHeader.fRTable = bytes.getInt(); 189 This.fHeader.fRTableLen = bytes.getInt(); 190 This.fHeader.fSFTable = bytes.getInt(); 191 This.fHeader.fSFTableLen = bytes.getInt(); 192 This.fHeader.fSRTable = bytes.getInt(); 193 This.fHeader.fSRTableLen = bytes.getInt(); 194 This.fHeader.fTrie = bytes.getInt(); 195 This.fHeader.fTrieLen = bytes.getInt(); 196 This.fHeader.fRuleSource = bytes.getInt(); 197 This.fHeader.fRuleSourceLen = bytes.getInt(); 198 This.fHeader.fStatusTable = bytes.getInt(); 199 This.fHeader.fStatusTableLen = bytes.getInt(); 200 ICUBinary.skipBytes(bytes, 6 * 4); // uint32_t fReserved[6]; 201 202 203 if (This.fHeader.fMagic != 0xb1a0 || 204 ! (This.fHeader.fVersion == 1 || // ICU 3.2 and earlier 205 This.fHeader.fFormatVersion[0] == 3) // ICU 3.4 206 ) { 207 throw new IOException("Break Iterator Rule Data Magic Number Incorrect, or unsupported data version."); 208 } 209 210 // Current position in the buffer. 211 int pos = 24 * 4; // offset of end of header, which has 24 fields, all int32_t (4 bytes) 212 213 // 214 // Read in the Forward state transition table as an array of shorts. 215 // 216 217 // Quick Sanity Check 218 if (This.fHeader.fFTable < pos || This.fHeader.fFTable > This.fHeader.fLength) { 219 throw new IOException("Break iterator Rule data corrupt"); 220 } 221 222 // Skip over any padding preceding this table 223 ICUBinary.skipBytes(bytes, This.fHeader.fFTable - pos); 224 pos = This.fHeader.fFTable; 225 226 This.fFTable = ICUBinary.getShorts( 227 bytes, This.fHeader.fFTableLen / 2, This.fHeader.fFTableLen & 1); 228 pos += This.fHeader.fFTableLen; 229 230 // 231 // Read in the Reverse state table 232 // 233 234 // Skip over any padding in the file 235 ICUBinary.skipBytes(bytes, This.fHeader.fRTable - pos); 236 pos = This.fHeader.fRTable; 237 238 // Create & fill the table itself. 239 This.fRTable = ICUBinary.getShorts( 240 bytes, This.fHeader.fRTableLen / 2, This.fHeader.fRTableLen & 1); 241 pos += This.fHeader.fRTableLen; 242 243 // 244 // Read in the Safe Forward state table 245 // 246 if (This.fHeader.fSFTableLen > 0) { 247 // Skip over any padding in the file 248 ICUBinary.skipBytes(bytes, This.fHeader.fSFTable - pos); 249 pos = This.fHeader.fSFTable; 250 251 // Create & fill the table itself. 252 This.fSFTable = ICUBinary.getShorts( 253 bytes, This.fHeader.fSFTableLen / 2, This.fHeader.fSFTableLen & 1); 254 pos += This.fHeader.fSFTableLen; 255 } 256 257 // 258 // Read in the Safe Reverse state table 259 // 260 if (This.fHeader.fSRTableLen > 0) { 261 // Skip over any padding in the file 262 ICUBinary.skipBytes(bytes, This.fHeader.fSRTable - pos); 263 pos = This.fHeader.fSRTable; 264 265 // Create & fill the table itself. 266 This.fSRTable = ICUBinary.getShorts( 267 bytes, This.fHeader.fSRTableLen / 2, This.fHeader.fSRTableLen & 1); 268 pos += This.fHeader.fSRTableLen; 269 } 270 271 // 272 // Unserialize the Character categories TRIE 273 // Because we can't be absolutely certain where the Trie deserialize will 274 // leave the buffer, leave position unchanged. 275 // The seek to the start of the next item following the TRIE will get us 276 // back in sync. 277 // 278 ICUBinary.skipBytes(bytes, This.fHeader.fTrie - pos); // seek buffer from end of 279 pos = This.fHeader.fTrie; // previous section to the start of the trie 280 281 bytes.mark(); // Mark position of start of TRIE in the input 282 // and tell Java to keep the mark valid so long 283 // as we don't go more than 100 bytes past the 284 // past the end of the TRIE. 285 286 This.fTrie = new CharTrie(bytes, fTrieFoldingFunc); // Deserialize the TRIE, leaving buffer 287 // at an unknown position, preceding the 288 // padding between TRIE and following section. 289 290 bytes.reset(); // Move buffer back to marked position at 291 // the start of the serialized TRIE. Now our 292 // "pos" variable and the buffer are in 293 // agreement. 294 295 // 296 // Read the Rule Status Table 297 // 298 if (pos > This.fHeader.fStatusTable) { 299 throw new IOException("Break iterator Rule data corrupt"); 300 } 301 ICUBinary.skipBytes(bytes, This.fHeader.fStatusTable - pos); 302 pos = This.fHeader.fStatusTable; 303 This.fStatusTable = ICUBinary.getInts( 304 bytes, This.fHeader.fStatusTableLen / 4, This.fHeader.fStatusTableLen & 3); 305 pos += This.fHeader.fStatusTableLen; 306 307 // 308 // Put the break rule source into a String 309 // 310 if (pos > This.fHeader.fRuleSource) { 311 throw new IOException("Break iterator Rule data corrupt"); 312 } 313 ICUBinary.skipBytes(bytes, This.fHeader.fRuleSource - pos); 314 pos = This.fHeader.fRuleSource; 315 This.fRuleSource = ICUBinary.getString( 316 bytes, This.fHeader.fRuleSourceLen / 2, This.fHeader.fRuleSourceLen & 1); 317 318 if (RuleBasedBreakIterator.fDebugEnv!=null && RuleBasedBreakIterator.fDebugEnv.indexOf("data")>=0) { 319 This.dump(); 320 } 321 return This; 322 } 323 324 ///CLOVER:OFF 325 // Getters for fields from the state table header 326 // 327 private int getStateTableNumStates(short table[]) { 328 if (isBigEndian) { 329 return (table[NUMSTATES] << 16) | (table[NUMSTATES+1] & 0xffff); 330 } else { 331 return (table[NUMSTATES+1] << 16) | (table[NUMSTATES] & 0xffff); 332 } 333 } 334 ///CLOVER:ON 335 336 int getStateTableFlags(short table[]) { 337 // This works for up to 15 flags bits. 338 return table[isBigEndian ? FLAGS + 1 : FLAGS]; 339 } 340 341 ///CLOVER:OFF 342 /* Debug function to display the break iterator data. */ 343 void dump() { 344 if (fFTable.length == 0) { 345 // There is no table. Fail early for testing purposes. 346 throw new NullPointerException(); 347 } 348 System.out.println("RBBI Data Wrapper dump ..."); 349 System.out.println(); 350 System.out.println("Forward State Table"); 351 dumpTable(fFTable); 352 System.out.println("Reverse State Table"); 353 dumpTable(fRTable); 354 System.out.println("Forward Safe Points Table"); 355 dumpTable(fSFTable); 356 System.out.println("Reverse Safe Points Table"); 357 dumpTable(fSRTable); 358 359 dumpCharCategories(); 360 System.out.println("Source Rules: " + fRuleSource); 361 362 } 363 ///CLOVER:ON 364 365 ///CLOVER:OFF 366 /* Fixed width int-to-string conversion. */ 367 static public String intToString(int n, int width) { 368 StringBuilder dest = new StringBuilder(width); 369 dest.append(n); 370 while (dest.length() < width) { 371 dest.insert(0, ' '); 372 } 373 return dest.toString(); 374 } 375 ///CLOVER:ON 376 377 ///CLOVER:OFF 378 /* Fixed width int-to-string conversion. */ 379 static public String intToHexString(int n, int width) { 380 StringBuilder dest = new StringBuilder(width); 381 dest.append(Integer.toHexString(n)); 382 while (dest.length() < width) { 383 dest.insert(0, ' '); 384 } 385 return dest.toString(); 386 } 387 ///CLOVER:ON 388 389 ///CLOVER:OFF 390 /** Dump a state table. (A full set of RBBI rules has 4 state tables.) */ 391 private void dumpTable(short table[]) { 392 if (table == null) { 393 System.out.println(" -- null -- "); 394 } else { 395 int n; 396 int state; 397 StringBuilder header = new StringBuilder(" Row Acc Look Tag"); 398 for (n=0; n<fHeader.fCatCount; n++) { 399 header.append(intToString(n, 5)); 400 } 401 System.out.println(header.toString()); 402 for (n=0; n<header.length(); n++) { 403 System.out.print("-"); 404 } 405 System.out.println(); 406 for (state=0; state< getStateTableNumStates(table); state++) { 407 dumpRow(table, state); 408 } 409 System.out.println(); 410 } 411 } 412 ///CLOVER:ON 413 414 ///CLOVER:OFF 415 /** 416 * Dump (for debug) a single row of an RBBI state table 417 * @param table 418 * @param state 419 */ 420 private void dumpRow(short table[], int state) { 421 StringBuilder dest = new StringBuilder(fHeader.fCatCount*5 + 20); 422 dest.append(intToString(state, 4)); 423 int row = getRowIndex(state); 424 if (table[row+ACCEPTING] != 0) { 425 dest.append(intToString(table[row+ACCEPTING], 5)); 426 }else { 427 dest.append(" "); 428 } 429 if (table[row+LOOKAHEAD] != 0) { 430 dest.append(intToString(table[row+LOOKAHEAD], 5)); 431 }else { 432 dest.append(" "); 433 } 434 dest.append(intToString(table[row+TAGIDX], 5)); 435 436 for (int col=0; col<fHeader.fCatCount; col++) { 437 dest.append(intToString(table[row+NEXTSTATES+col], 5)); 438 } 439 440 System.out.println(dest); 441 } 442 ///CLOVER:ON 443 444 ///CLOVER:OFF 445 private void dumpCharCategories() { 446 int n = fHeader.fCatCount; 447 String catStrings[] = new String[n+1]; 448 int rangeStart = 0; 449 int rangeEnd = 0; 450 int lastCat = -1; 451 int char32; 452 int category; 453 int lastNewline[] = new int[n+1]; 454 455 for (category = 0; category <= fHeader.fCatCount; category ++) { 456 catStrings[category] = ""; 457 } 458 System.out.println("\nCharacter Categories"); 459 System.out.println("--------------------"); 460 for (char32 = 0; char32<=0x10ffff; char32++) { 461 category = fTrie.getCodePointValue(char32); 462 category &= ~0x4000; // Mask off dictionary bit. 463 if (category < 0 || category > fHeader.fCatCount) { 464 System.out.println("Error, bad category " + Integer.toHexString(category) + 465 " for char " + Integer.toHexString(char32)); 466 break; 467 } 468 if (category == lastCat ) { 469 rangeEnd = char32; 470 } else { 471 if (lastCat >= 0) { 472 if (catStrings[lastCat].length() > lastNewline[lastCat] + 70) { 473 lastNewline[lastCat] = catStrings[lastCat].length() + 10; 474 catStrings[lastCat] += "\n "; 475 } 476 477 catStrings[lastCat] += " " + Integer.toHexString(rangeStart); 478 if (rangeEnd != rangeStart) { 479 catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd); 480 } 481 } 482 lastCat = category; 483 rangeStart = rangeEnd = char32; 484 } 485 } 486 catStrings[lastCat] += " " + Integer.toHexString(rangeStart); 487 if (rangeEnd != rangeStart) { 488 catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd); 489 } 490 491 for (category = 0; category <= fHeader.fCatCount; category ++) { 492 System.out.println (intToString(category, 5) + " " + catStrings[category]); 493 } 494 System.out.println(); 495 } 496 ///CLOVER:ON 497 498 /*static RBBIDataWrapper get(String name) throws IOException { 499 String fullName = "data/" + name; 500 InputStream is = ICUData.getRequiredStream(fullName); 501 return get(is); 502 } 503 504 public static void main(String[] args) { 505 String s; 506 if (args.length == 0) { 507 s = "char"; 508 } else { 509 s = args[0]; 510 } 511 System.out.println("RBBIDataWrapper.main(" + s + ") "); 512 513 String versionedName = ICUResourceBundle.ICU_BUNDLE+"/"+ s + ".brk"; 514 515 try { 516 RBBIDataWrapper This = RBBIDataWrapper.get(versionedName); 517 This.dump(); 518 } 519 catch (Exception e) { 520 System.out.println("Exception: " + e.toString()); 521 } 522 523 }*/ 524} 525