1/* GENERATED SOURCE. DO NOT MODIFY. */ 2// © 2016 and later: Unicode, Inc. and others. 3// License & terms of use: http://www.unicode.org/copyright.html#License 4/* 5 ******************************************************************************* 6 * Copyright (C) 1996-2015, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11package android.icu.text; 12 13import java.io.IOException; 14import java.nio.ByteBuffer; 15import java.nio.ByteOrder; 16 17import android.icu.impl.CharTrie; 18import android.icu.impl.ICUBinary; 19import android.icu.impl.ICUBinary.Authenticate; 20import android.icu.impl.Trie; 21 22/** 23* <p>Internal class used for Rule Based Break Iterators</p> 24* <p>This class provides access to the compiled break rule data, as 25* it is stored in a .brk file. 26*/ 27final class RBBIDataWrapper { 28 // 29 // These fields are the ready-to-use compiled rule data, as 30 // read from the file. 31 // 32 RBBIDataHeader fHeader; 33 short fFTable[]; 34 short fRTable[]; 35 short fSFTable[]; 36 short fSRTable[]; 37 CharTrie fTrie; 38 String fRuleSource; 39 int fStatusTable[]; 40 41 private boolean isBigEndian; 42 43 static final int DATA_FORMAT = 0x42726b20; // "Brk " 44 static final int FORMAT_VERSION = 0x03010000; // 3.1 45 46 private static final class IsAcceptable implements Authenticate { 47 // @Override when we switch to Java 6 48 @Override 49 public boolean isDataVersionAcceptable(byte version[]) { 50 return version[0] == (FORMAT_VERSION >>> 24); 51 } 52 } 53 private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); 54 55 // 56 // Indexes to fields in the ICU4C style binary form of the RBBI Data Header 57 // Used by the rule compiler when flattening the data. 58 // 59 final static int DH_SIZE = 24; 60 final static int DH_MAGIC = 0; 61 final static int DH_FORMATVERSION = 1; 62 final static int DH_LENGTH = 2; 63 final static int DH_CATCOUNT = 3; 64 final static int DH_FTABLE = 4; 65 final static int DH_FTABLELEN = 5; 66 final static int DH_RTABLE = 6; 67 final static int DH_RTABLELEN = 7; 68 final static int DH_SFTABLE = 8; 69 final static int DH_SFTABLELEN = 9; 70 final static int DH_SRTABLE = 10; 71 final static int DH_SRTABLELEN = 11; 72 final static int DH_TRIE = 12; 73 final static int DH_TRIELEN = 13; 74 final static int DH_RULESOURCE = 14; 75 final static int DH_RULESOURCELEN = 15; 76 final static int DH_STATUSTABLE = 16; 77 final static int DH_STATUSTABLELEN = 17; 78 79 80 // Index offsets to the fields in a state table row. 81 // Corresponds to struct RBBIStateTableRow in the C version. 82 // 83 final static int ACCEPTING = 0; 84 final static int LOOKAHEAD = 1; 85 final static int TAGIDX = 2; 86 final static int RESERVED = 3; 87 final static int NEXTSTATES = 4; 88 89 // Index offsets to header fields of a state table 90 // struct RBBIStateTable {... in the C version. 91 // 92 static final int NUMSTATES = 0; 93 static final int ROWLEN = 2; 94 static final int FLAGS = 4; 95 //ivate static final int RESERVED_2 = 6; 96 private static final int ROW_DATA = 8; 97 98 // Bit selectors for the "FLAGS" field of the state table header 99 // enum RBBIStateTableFlags in the C version. 100 // 101 final static int RBBI_LOOKAHEAD_HARD_BREAK = 1; 102 final static int RBBI_BOF_REQUIRED = 2; 103 104 /** 105 * Data Header. A struct-like class with the fields from the RBBI data file header. 106 */ 107 final static class RBBIDataHeader { 108 int fMagic; // == 0xbla0 109 int fVersion; // == 1 (for ICU 3.2 and earlier. 110 byte[] fFormatVersion; // For ICU 3.4 and later. 111 int fLength; // Total length in bytes of this RBBI Data, 112 // including all sections, not just the header. 113 int fCatCount; // Number of character categories. 114 115 // 116 // Offsets and sizes of each of the subsections within the RBBI data. 117 // All offsets are bytes from the start of the RBBIDataHeader. 118 // All sizes are in bytes. 119 // 120 int fFTable; // forward state transition table. 121 int fFTableLen; 122 int fRTable; // Offset to the reverse state transition table. 123 int fRTableLen; 124 int fSFTable; // safe point forward transition table 125 int fSFTableLen; 126 int fSRTable; // safe point reverse transition table 127 int fSRTableLen; 128 int fTrie; // Offset to Trie data for character categories 129 int fTrieLen; 130 int fRuleSource; // Offset to the source for for the break 131 int fRuleSourceLen; // rules. Stored UChar *. 132 int fStatusTable; // Offset to the table of rule status values 133 int fStatusTableLen; 134 135 public RBBIDataHeader() { 136 fMagic = 0; 137 fFormatVersion = new byte[4]; 138 } 139 } 140 141 142 /** 143 * RBBI State Table Indexing Function. Given a state number, return the 144 * array index of the start of the state table row for that state. 145 * 146 */ 147 int getRowIndex(int state){ 148 return ROW_DATA + state * (fHeader.fCatCount + 4); 149 } 150 151 static class TrieFoldingFunc implements Trie.DataManipulate { 152 @Override 153 public int getFoldingOffset(int data) { 154 if ((data & 0x8000) != 0) { 155 return data & 0x7fff; 156 } else { 157 return 0; 158 } 159 } 160 } 161 static TrieFoldingFunc fTrieFoldingFunc = new TrieFoldingFunc(); 162 163 164 RBBIDataWrapper() { 165 } 166 167 /* 168 * Get an RBBIDataWrapper from an InputStream onto a pre-compiled set 169 * of RBBI rules. 170 */ 171 static RBBIDataWrapper get(ByteBuffer bytes) throws IOException { 172 RBBIDataWrapper This = new RBBIDataWrapper(); 173 174 ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE); 175 This.isBigEndian = bytes.order() == ByteOrder.BIG_ENDIAN; 176 177 // Read in the RBBI data header... 178 This.fHeader = new RBBIDataHeader(); 179 This.fHeader.fMagic = bytes.getInt(); 180 // Read the same 4 bytes as an int and as a byte array: The data format could be 181 // the old fVersion=1 (TODO: probably not with a real ICU data header?) 182 // or the new fFormatVersion=3.x. 183 This.fHeader.fVersion = bytes.getInt(bytes.position()); 184 This.fHeader.fFormatVersion[0] = bytes.get(); 185 This.fHeader.fFormatVersion[1] = bytes.get(); 186 This.fHeader.fFormatVersion[2] = bytes.get(); 187 This.fHeader.fFormatVersion[3] = bytes.get(); 188 This.fHeader.fLength = bytes.getInt(); 189 This.fHeader.fCatCount = bytes.getInt(); 190 This.fHeader.fFTable = bytes.getInt(); 191 This.fHeader.fFTableLen = bytes.getInt(); 192 This.fHeader.fRTable = bytes.getInt(); 193 This.fHeader.fRTableLen = bytes.getInt(); 194 This.fHeader.fSFTable = bytes.getInt(); 195 This.fHeader.fSFTableLen = bytes.getInt(); 196 This.fHeader.fSRTable = bytes.getInt(); 197 This.fHeader.fSRTableLen = bytes.getInt(); 198 This.fHeader.fTrie = bytes.getInt(); 199 This.fHeader.fTrieLen = bytes.getInt(); 200 This.fHeader.fRuleSource = bytes.getInt(); 201 This.fHeader.fRuleSourceLen = bytes.getInt(); 202 This.fHeader.fStatusTable = bytes.getInt(); 203 This.fHeader.fStatusTableLen = bytes.getInt(); 204 ICUBinary.skipBytes(bytes, 6 * 4); // uint32_t fReserved[6]; 205 206 207 if (This.fHeader.fMagic != 0xb1a0 || 208 ! (This.fHeader.fVersion == 1 || // ICU 3.2 and earlier 209 This.fHeader.fFormatVersion[0] == 3) // ICU 3.4 210 ) { 211 throw new IOException("Break Iterator Rule Data Magic Number Incorrect, or unsupported data version."); 212 } 213 214 // Current position in the buffer. 215 int pos = 24 * 4; // offset of end of header, which has 24 fields, all int32_t (4 bytes) 216 217 // 218 // Read in the Forward state transition table as an array of shorts. 219 // 220 221 // Quick Sanity Check 222 if (This.fHeader.fFTable < pos || This.fHeader.fFTable > This.fHeader.fLength) { 223 throw new IOException("Break iterator Rule data corrupt"); 224 } 225 226 // Skip over any padding preceding this table 227 ICUBinary.skipBytes(bytes, This.fHeader.fFTable - pos); 228 pos = This.fHeader.fFTable; 229 230 This.fFTable = ICUBinary.getShorts( 231 bytes, This.fHeader.fFTableLen / 2, This.fHeader.fFTableLen & 1); 232 pos += This.fHeader.fFTableLen; 233 234 // 235 // Read in the Reverse state table 236 // 237 238 // Skip over any padding in the file 239 ICUBinary.skipBytes(bytes, This.fHeader.fRTable - pos); 240 pos = This.fHeader.fRTable; 241 242 // Create & fill the table itself. 243 This.fRTable = ICUBinary.getShorts( 244 bytes, This.fHeader.fRTableLen / 2, This.fHeader.fRTableLen & 1); 245 pos += This.fHeader.fRTableLen; 246 247 // 248 // Read in the Safe Forward state table 249 // 250 if (This.fHeader.fSFTableLen > 0) { 251 // Skip over any padding in the file 252 ICUBinary.skipBytes(bytes, This.fHeader.fSFTable - pos); 253 pos = This.fHeader.fSFTable; 254 255 // Create & fill the table itself. 256 This.fSFTable = ICUBinary.getShorts( 257 bytes, This.fHeader.fSFTableLen / 2, This.fHeader.fSFTableLen & 1); 258 pos += This.fHeader.fSFTableLen; 259 } 260 261 // 262 // Read in the Safe Reverse state table 263 // 264 if (This.fHeader.fSRTableLen > 0) { 265 // Skip over any padding in the file 266 ICUBinary.skipBytes(bytes, This.fHeader.fSRTable - pos); 267 pos = This.fHeader.fSRTable; 268 269 // Create & fill the table itself. 270 This.fSRTable = ICUBinary.getShorts( 271 bytes, This.fHeader.fSRTableLen / 2, This.fHeader.fSRTableLen & 1); 272 pos += This.fHeader.fSRTableLen; 273 } 274 275 // 276 // Unserialize the Character categories TRIE 277 // Because we can't be absolutely certain where the Trie deserialize will 278 // leave the buffer, leave position unchanged. 279 // The seek to the start of the next item following the TRIE will get us 280 // back in sync. 281 // 282 ICUBinary.skipBytes(bytes, This.fHeader.fTrie - pos); // seek buffer from end of 283 pos = This.fHeader.fTrie; // previous section to the start of the trie 284 285 bytes.mark(); // Mark position of start of TRIE in the input 286 // and tell Java to keep the mark valid so long 287 // as we don't go more than 100 bytes past the 288 // past the end of the TRIE. 289 290 This.fTrie = new CharTrie(bytes, fTrieFoldingFunc); // Deserialize the TRIE, leaving buffer 291 // at an unknown position, preceding the 292 // padding between TRIE and following section. 293 294 bytes.reset(); // Move buffer back to marked position at 295 // the start of the serialized TRIE. Now our 296 // "pos" variable and the buffer are in 297 // agreement. 298 299 // 300 // Read the Rule Status Table 301 // 302 if (pos > This.fHeader.fStatusTable) { 303 throw new IOException("Break iterator Rule data corrupt"); 304 } 305 ICUBinary.skipBytes(bytes, This.fHeader.fStatusTable - pos); 306 pos = This.fHeader.fStatusTable; 307 This.fStatusTable = ICUBinary.getInts( 308 bytes, This.fHeader.fStatusTableLen / 4, This.fHeader.fStatusTableLen & 3); 309 pos += This.fHeader.fStatusTableLen; 310 311 // 312 // Put the break rule source into a String 313 // 314 if (pos > This.fHeader.fRuleSource) { 315 throw new IOException("Break iterator Rule data corrupt"); 316 } 317 ICUBinary.skipBytes(bytes, This.fHeader.fRuleSource - pos); 318 pos = This.fHeader.fRuleSource; 319 This.fRuleSource = ICUBinary.getString( 320 bytes, This.fHeader.fRuleSourceLen / 2, This.fHeader.fRuleSourceLen & 1); 321 322 if (RuleBasedBreakIterator.fDebugEnv!=null && RuleBasedBreakIterator.fDebugEnv.indexOf("data")>=0) { 323 This.dump(System.out); 324 } 325 return This; 326 } 327 328 ///CLOVER:OFF 329 // Getters for fields from the state table header 330 // 331 private int getStateTableNumStates(short table[]) { 332 if (isBigEndian) { 333 return (table[NUMSTATES] << 16) | (table[NUMSTATES+1] & 0xffff); 334 } else { 335 return (table[NUMSTATES+1] << 16) | (table[NUMSTATES] & 0xffff); 336 } 337 } 338 ///CLOVER:ON 339 340 int getStateTableFlags(short table[]) { 341 // This works for up to 15 flags bits. 342 return table[isBigEndian ? FLAGS + 1 : FLAGS]; 343 } 344 345 ///CLOVER:OFF 346 /* Debug function to display the break iterator data. */ 347 void dump(java.io.PrintStream out) { 348 if (fFTable.length == 0) { 349 // There is no table. Fail early for testing purposes. 350 throw new NullPointerException(); 351 } 352 out.println("RBBI Data Wrapper dump ..."); 353 out.println(); 354 out.println("Forward State Table"); 355 dumpTable(out, fFTable); 356 out.println("Reverse State Table"); 357 dumpTable(out, fRTable); 358 out.println("Forward Safe Points Table"); 359 dumpTable(out, fSFTable); 360 out.println("Reverse Safe Points Table"); 361 dumpTable(out, fSRTable); 362 363 dumpCharCategories(out); 364 out.println("Source Rules: " + fRuleSource); 365 366 } 367 ///CLOVER:ON 368 369 ///CLOVER:OFF 370 /* Fixed width int-to-string conversion. */ 371 static public String intToString(int n, int width) { 372 StringBuilder dest = new StringBuilder(width); 373 dest.append(n); 374 while (dest.length() < width) { 375 dest.insert(0, ' '); 376 } 377 return dest.toString(); 378 } 379 ///CLOVER:ON 380 381 ///CLOVER:OFF 382 /* Fixed width int-to-string conversion. */ 383 static public String intToHexString(int n, int width) { 384 StringBuilder dest = new StringBuilder(width); 385 dest.append(Integer.toHexString(n)); 386 while (dest.length() < width) { 387 dest.insert(0, ' '); 388 } 389 return dest.toString(); 390 } 391 ///CLOVER:ON 392 393 ///CLOVER:OFF 394 /** Dump a state table. (A full set of RBBI rules has 4 state tables.) */ 395 private void dumpTable(java.io.PrintStream out, short table[]) { 396 if (table == null) { 397 out.println(" -- null -- "); 398 } else { 399 int n; 400 int state; 401 StringBuilder header = new StringBuilder(" Row Acc Look Tag"); 402 for (n=0; n<fHeader.fCatCount; n++) { 403 header.append(intToString(n, 5)); 404 } 405 out.println(header.toString()); 406 for (n=0; n<header.length(); n++) { 407 out.print("-"); 408 } 409 out.println(); 410 for (state=0; state< getStateTableNumStates(table); state++) { 411 dumpRow(out, table, state); 412 } 413 out.println(); 414 } 415 } 416 ///CLOVER:ON 417 418 ///CLOVER:OFF 419 /** 420 * Dump (for debug) a single row of an RBBI state table 421 * @param table 422 * @param state 423 */ 424 private void dumpRow(java.io.PrintStream out, short table[], int state) { 425 StringBuilder dest = new StringBuilder(fHeader.fCatCount*5 + 20); 426 dest.append(intToString(state, 4)); 427 int row = getRowIndex(state); 428 if (table[row+ACCEPTING] != 0) { 429 dest.append(intToString(table[row+ACCEPTING], 5)); 430 }else { 431 dest.append(" "); 432 } 433 if (table[row+LOOKAHEAD] != 0) { 434 dest.append(intToString(table[row+LOOKAHEAD], 5)); 435 }else { 436 dest.append(" "); 437 } 438 dest.append(intToString(table[row+TAGIDX], 5)); 439 440 for (int col=0; col<fHeader.fCatCount; col++) { 441 dest.append(intToString(table[row+NEXTSTATES+col], 5)); 442 } 443 444 out.println(dest); 445 } 446 ///CLOVER:ON 447 448 ///CLOVER:OFF 449 private void dumpCharCategories(java.io.PrintStream out) { 450 int n = fHeader.fCatCount; 451 String catStrings[] = new String[n+1]; 452 int rangeStart = 0; 453 int rangeEnd = 0; 454 int lastCat = -1; 455 int char32; 456 int category; 457 int lastNewline[] = new int[n+1]; 458 459 for (category = 0; category <= fHeader.fCatCount; category ++) { 460 catStrings[category] = ""; 461 } 462 out.println("\nCharacter Categories"); 463 out.println("--------------------"); 464 for (char32 = 0; char32<=0x10ffff; char32++) { 465 category = fTrie.getCodePointValue(char32); 466 category &= ~0x4000; // Mask off dictionary bit. 467 if (category < 0 || category > fHeader.fCatCount) { 468 out.println("Error, bad category " + Integer.toHexString(category) + 469 " for char " + Integer.toHexString(char32)); 470 break; 471 } 472 if (category == lastCat ) { 473 rangeEnd = char32; 474 } else { 475 if (lastCat >= 0) { 476 if (catStrings[lastCat].length() > lastNewline[lastCat] + 70) { 477 lastNewline[lastCat] = catStrings[lastCat].length() + 10; 478 catStrings[lastCat] += "\n "; 479 } 480 481 catStrings[lastCat] += " " + Integer.toHexString(rangeStart); 482 if (rangeEnd != rangeStart) { 483 catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd); 484 } 485 } 486 lastCat = category; 487 rangeStart = rangeEnd = char32; 488 } 489 } 490 catStrings[lastCat] += " " + Integer.toHexString(rangeStart); 491 if (rangeEnd != rangeStart) { 492 catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd); 493 } 494 495 for (category = 0; category <= fHeader.fCatCount; category ++) { 496 out.println (intToString(category, 5) + " " + catStrings[category]); 497 } 498 out.println(); 499 } 500 ///CLOVER:ON 501 502 /*static RBBIDataWrapper get(String name) throws IOException { 503 String fullName = "data/" + name; 504 InputStream is = ICUData.getRequiredStream(fullName); 505 return get(is); 506 } 507 508 public static void main(String[] args) { 509 String s; 510 if (args.length == 0) { 511 s = "char"; 512 } else { 513 s = args[0]; 514 } 515 System.out.println("RBBIDataWrapper.main(" + s + ") "); 516 517 String versionedName = ICUResourceBundle.ICU_BUNDLE+"/"+ s + ".brk"; 518 519 try { 520 RBBIDataWrapper This = RBBIDataWrapper.get(versionedName); 521 This.dump(); 522 } 523 catch (Exception e) { 524 System.out.println("Exception: " + e.toString()); 525 } 526 527 }*/ 528} 529