1/* GENERATED SOURCE. DO NOT MODIFY. */
2/*
3 *******************************************************************************
4 * Copyright (C) 1996-2015, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 *******************************************************************************
7 */
8
9package android.icu.text;
10
11import java.io.IOException;
12import java.nio.ByteBuffer;
13import java.nio.ByteOrder;
14
15import android.icu.impl.CharTrie;
16import android.icu.impl.ICUBinary;
17import android.icu.impl.ICUBinary.Authenticate;
18import android.icu.impl.Trie;
19
20/**
21* <p>Internal class used for Rule Based Break Iterators</p>
22* <p>This class provides access to the compiled break rule data, as
23* it is stored in a .brk file.
24*/
25final class RBBIDataWrapper {
26    //
27    // These fields are the ready-to-use compiled rule data, as
28    //   read from the file.
29    //
30    RBBIDataHeader fHeader;
31    short          fFTable[];
32    short          fRTable[];
33    short          fSFTable[];
34    short          fSRTable[];
35    CharTrie       fTrie;
36    String         fRuleSource;
37    int            fStatusTable[];
38
39    private boolean isBigEndian;
40
41    static final int DATA_FORMAT = 0x42726b20;  // "Brk "
42    static final int FORMAT_VERSION = 0x03010000;  // 3.1
43
44    private static final class IsAcceptable implements Authenticate {
45        // @Override when we switch to Java 6
46        public boolean isDataVersionAcceptable(byte version[]) {
47            return version[0] == (FORMAT_VERSION >>> 24);
48        }
49    }
50    private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
51
52    //
53    // Indexes to fields in the ICU4C style binary form of the RBBI Data Header
54    //   Used by the rule compiler when flattening the data.
55    //
56    final static int    DH_SIZE           = 24;
57    final static int    DH_MAGIC          = 0;
58    final static int    DH_FORMATVERSION  = 1;
59    final static int    DH_LENGTH         = 2;
60    final static int    DH_CATCOUNT       = 3;
61    final static int    DH_FTABLE         = 4;
62    final static int    DH_FTABLELEN      = 5;
63    final static int    DH_RTABLE         = 6;
64    final static int    DH_RTABLELEN      = 7;
65    final static int    DH_SFTABLE        = 8;
66    final static int    DH_SFTABLELEN     = 9;
67    final static int    DH_SRTABLE        = 10;
68    final static int    DH_SRTABLELEN     = 11;
69    final static int    DH_TRIE           = 12;
70    final static int    DH_TRIELEN        = 13;
71    final static int    DH_RULESOURCE     = 14;
72    final static int    DH_RULESOURCELEN  = 15;
73    final static int    DH_STATUSTABLE    = 16;
74    final static int    DH_STATUSTABLELEN = 17;
75
76
77    // Index offsets to the fields in a state table row.
78    //    Corresponds to struct RBBIStateTableRow in the C version.
79    //
80    final static int      ACCEPTING  = 0;
81    final static int      LOOKAHEAD  = 1;
82    final static int      TAGIDX     = 2;
83    final static int      RESERVED   = 3;
84    final static int      NEXTSTATES = 4;
85
86    // Index offsets to header fields of a state table
87    //     struct RBBIStateTable {...   in the C version.
88    //
89            static final int NUMSTATES  = 0;
90            static final int ROWLEN     = 2;
91            static final int FLAGS      = 4;
92    //ivate static final int RESERVED_2 = 6;
93    private static final int ROW_DATA   = 8;
94
95    //  Bit selectors for the "FLAGS" field of the state table header
96    //     enum RBBIStateTableFlags in the C version.
97    //
98    final static int      RBBI_LOOKAHEAD_HARD_BREAK = 1;
99    final static int      RBBI_BOF_REQUIRED         = 2;
100
101    /**
102     * Data Header.  A struct-like class with the fields from the RBBI data file header.
103     */
104    final static class RBBIDataHeader {
105        int         fMagic;         //  == 0xbla0
106        int         fVersion;       //  == 1 (for ICU 3.2 and earlier.
107        byte[]      fFormatVersion; //  For ICU 3.4 and later.
108        int         fLength;        //  Total length in bytes of this RBBI Data,
109                                       //      including all sections, not just the header.
110        int         fCatCount;      //  Number of character categories.
111
112        //
113        //  Offsets and sizes of each of the subsections within the RBBI data.
114        //  All offsets are bytes from the start of the RBBIDataHeader.
115        //  All sizes are in bytes.
116        //
117        int         fFTable;         //  forward state transition table.
118        int         fFTableLen;
119        int         fRTable;         //  Offset to the reverse state transition table.
120        int         fRTableLen;
121        int         fSFTable;        //  safe point forward transition table
122        int         fSFTableLen;
123        int         fSRTable;        //  safe point reverse transition table
124        int         fSRTableLen;
125        int         fTrie;           //  Offset to Trie data for character categories
126        int         fTrieLen;
127        int         fRuleSource;     //  Offset to the source for for the break
128        int         fRuleSourceLen;  //    rules.  Stored UChar *.
129        int         fStatusTable;    // Offset to the table of rule status values
130        int         fStatusTableLen;
131
132        public RBBIDataHeader() {
133            fMagic = 0;
134            fFormatVersion = new byte[4];
135        }
136    }
137
138
139    /**
140     * RBBI State Table Indexing Function.  Given a state number, return the
141     * array index of the start of the state table row for that state.
142     *
143     */
144    int getRowIndex(int state){
145        return ROW_DATA + state * (fHeader.fCatCount + 4);
146    }
147
148    static class TrieFoldingFunc implements  Trie.DataManipulate {
149        public int getFoldingOffset(int data) {
150            if ((data & 0x8000) != 0) {
151                return data & 0x7fff;
152            } else {
153                return 0;
154            }
155        }
156    }
157    static TrieFoldingFunc  fTrieFoldingFunc = new TrieFoldingFunc();
158
159
160    RBBIDataWrapper() {
161    }
162
163    /*
164     *  Get an RBBIDataWrapper from an InputStream onto a pre-compiled set
165     *  of RBBI rules.
166     */
167    static RBBIDataWrapper get(ByteBuffer bytes) throws IOException {
168        RBBIDataWrapper This = new RBBIDataWrapper();
169
170        ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
171        This.isBigEndian = bytes.order() == ByteOrder.BIG_ENDIAN;
172
173        // Read in the RBBI data header...
174        This.fHeader = new  RBBIDataHeader();
175        This.fHeader.fMagic          = bytes.getInt();
176        // Read the same 4 bytes as an int and as a byte array: The data format could be
177        // the old fVersion=1 (TODO: probably not with a real ICU data header?)
178        // or the new fFormatVersion=3.x.
179        This.fHeader.fVersion        = bytes.getInt(bytes.position());
180        This.fHeader.fFormatVersion[0] = bytes.get();
181        This.fHeader.fFormatVersion[1] = bytes.get();
182        This.fHeader.fFormatVersion[2] = bytes.get();
183        This.fHeader.fFormatVersion[3] = bytes.get();
184        This.fHeader.fLength         = bytes.getInt();
185        This.fHeader.fCatCount       = bytes.getInt();
186        This.fHeader.fFTable         = bytes.getInt();
187        This.fHeader.fFTableLen      = bytes.getInt();
188        This.fHeader.fRTable         = bytes.getInt();
189        This.fHeader.fRTableLen      = bytes.getInt();
190        This.fHeader.fSFTable        = bytes.getInt();
191        This.fHeader.fSFTableLen     = bytes.getInt();
192        This.fHeader.fSRTable        = bytes.getInt();
193        This.fHeader.fSRTableLen     = bytes.getInt();
194        This.fHeader.fTrie           = bytes.getInt();
195        This.fHeader.fTrieLen        = bytes.getInt();
196        This.fHeader.fRuleSource     = bytes.getInt();
197        This.fHeader.fRuleSourceLen  = bytes.getInt();
198        This.fHeader.fStatusTable    = bytes.getInt();
199        This.fHeader.fStatusTableLen = bytes.getInt();
200        ICUBinary.skipBytes(bytes, 6 * 4);    // uint32_t  fReserved[6];
201
202
203        if (This.fHeader.fMagic != 0xb1a0 ||
204                ! (This.fHeader.fVersion == 1  ||         // ICU 3.2 and earlier
205                   This.fHeader.fFormatVersion[0] == 3)   // ICU 3.4
206            ) {
207            throw new IOException("Break Iterator Rule Data Magic Number Incorrect, or unsupported data version.");
208        }
209
210        // Current position in the buffer.
211        int pos = 24 * 4;     // offset of end of header, which has 24 fields, all int32_t (4 bytes)
212
213        //
214        // Read in the Forward state transition table as an array of shorts.
215        //
216
217        //   Quick Sanity Check
218        if (This.fHeader.fFTable < pos || This.fHeader.fFTable > This.fHeader.fLength) {
219             throw new IOException("Break iterator Rule data corrupt");
220        }
221
222        //    Skip over any padding preceding this table
223        ICUBinary.skipBytes(bytes, This.fHeader.fFTable - pos);
224        pos = This.fHeader.fFTable;
225
226        This.fFTable = ICUBinary.getShorts(
227                bytes, This.fHeader.fFTableLen / 2, This.fHeader.fFTableLen & 1);
228        pos += This.fHeader.fFTableLen;
229
230        //
231        // Read in the Reverse state table
232        //
233
234        // Skip over any padding in the file
235        ICUBinary.skipBytes(bytes, This.fHeader.fRTable - pos);
236        pos = This.fHeader.fRTable;
237
238        // Create & fill the table itself.
239        This.fRTable = ICUBinary.getShorts(
240                bytes, This.fHeader.fRTableLen / 2, This.fHeader.fRTableLen & 1);
241        pos += This.fHeader.fRTableLen;
242
243        //
244        // Read in the Safe Forward state table
245        //
246        if (This.fHeader.fSFTableLen > 0) {
247            // Skip over any padding in the file
248            ICUBinary.skipBytes(bytes, This.fHeader.fSFTable - pos);
249            pos = This.fHeader.fSFTable;
250
251            // Create & fill the table itself.
252            This.fSFTable = ICUBinary.getShorts(
253                    bytes, This.fHeader.fSFTableLen / 2, This.fHeader.fSFTableLen & 1);
254            pos += This.fHeader.fSFTableLen;
255        }
256
257        //
258        // Read in the Safe Reverse state table
259        //
260        if (This.fHeader.fSRTableLen > 0) {
261            // Skip over any padding in the file
262            ICUBinary.skipBytes(bytes, This.fHeader.fSRTable - pos);
263            pos = This.fHeader.fSRTable;
264
265            // Create & fill the table itself.
266            This.fSRTable = ICUBinary.getShorts(
267                    bytes, This.fHeader.fSRTableLen / 2, This.fHeader.fSRTableLen & 1);
268            pos += This.fHeader.fSRTableLen;
269        }
270
271        //
272        // Unserialize the Character categories TRIE
273        //     Because we can't be absolutely certain where the Trie deserialize will
274        //     leave the buffer, leave position unchanged.
275        //     The seek to the start of the next item following the TRIE will get us
276        //     back in sync.
277        //
278        ICUBinary.skipBytes(bytes, This.fHeader.fTrie - pos);  // seek buffer from end of
279        pos = This.fHeader.fTrie;               // previous section to the start of the trie
280
281        bytes.mark();                           // Mark position of start of TRIE in the input
282                                                //  and tell Java to keep the mark valid so long
283                                                //  as we don't go more than 100 bytes past the
284                                                //  past the end of the TRIE.
285
286        This.fTrie = new CharTrie(bytes, fTrieFoldingFunc);  // Deserialize the TRIE, leaving buffer
287                                                //  at an unknown position, preceding the
288                                                //  padding between TRIE and following section.
289
290        bytes.reset();                          // Move buffer back to marked position at
291                                                //   the start of the serialized TRIE.  Now our
292                                                //   "pos" variable and the buffer are in
293                                                //   agreement.
294
295        //
296        // Read the Rule Status Table
297        //
298        if (pos > This.fHeader.fStatusTable) {
299            throw new IOException("Break iterator Rule data corrupt");
300        }
301        ICUBinary.skipBytes(bytes, This.fHeader.fStatusTable - pos);
302        pos = This.fHeader.fStatusTable;
303        This.fStatusTable = ICUBinary.getInts(
304                bytes, This.fHeader.fStatusTableLen / 4, This.fHeader.fStatusTableLen & 3);
305        pos += This.fHeader.fStatusTableLen;
306
307        //
308        // Put the break rule source into a String
309        //
310        if (pos > This.fHeader.fRuleSource) {
311            throw new IOException("Break iterator Rule data corrupt");
312        }
313        ICUBinary.skipBytes(bytes, This.fHeader.fRuleSource - pos);
314        pos = This.fHeader.fRuleSource;
315        This.fRuleSource = ICUBinary.getString(
316                bytes, This.fHeader.fRuleSourceLen / 2, This.fHeader.fRuleSourceLen & 1);
317
318        if (RuleBasedBreakIterator.fDebugEnv!=null && RuleBasedBreakIterator.fDebugEnv.indexOf("data")>=0) {
319            This.dump();
320        }
321        return This;
322    }
323
324    ///CLOVER:OFF
325    //  Getters for fields from the state table header
326    //
327    private int getStateTableNumStates(short table[]) {
328        if (isBigEndian) {
329            return (table[NUMSTATES] << 16) | (table[NUMSTATES+1] & 0xffff);
330        } else {
331            return (table[NUMSTATES+1] << 16) | (table[NUMSTATES] & 0xffff);
332        }
333    }
334    ///CLOVER:ON
335
336    int getStateTableFlags(short table[]) {
337        // This works for up to 15 flags bits.
338        return table[isBigEndian ? FLAGS + 1 : FLAGS];
339    }
340
341    ///CLOVER:OFF
342    /* Debug function to display the break iterator data. */
343    void dump() {
344        if (fFTable.length == 0) {
345            // There is no table. Fail early for testing purposes.
346            throw new NullPointerException();
347        }
348        System.out.println("RBBI Data Wrapper dump ...");
349        System.out.println();
350        System.out.println("Forward State Table");
351        dumpTable(fFTable);
352        System.out.println("Reverse State Table");
353        dumpTable(fRTable);
354        System.out.println("Forward Safe Points Table");
355        dumpTable(fSFTable);
356        System.out.println("Reverse Safe Points Table");
357        dumpTable(fSRTable);
358
359        dumpCharCategories();
360        System.out.println("Source Rules: " + fRuleSource);
361
362    }
363    ///CLOVER:ON
364
365    ///CLOVER:OFF
366    /* Fixed width int-to-string conversion. */
367    static public String intToString(int n, int width) {
368        StringBuilder  dest = new StringBuilder(width);
369        dest.append(n);
370        while (dest.length() < width) {
371           dest.insert(0, ' ');
372        }
373        return dest.toString();
374    }
375    ///CLOVER:ON
376
377    ///CLOVER:OFF
378    /* Fixed width int-to-string conversion. */
379    static public String intToHexString(int n, int width) {
380        StringBuilder  dest = new StringBuilder(width);
381        dest.append(Integer.toHexString(n));
382        while (dest.length() < width) {
383           dest.insert(0, ' ');
384        }
385        return dest.toString();
386    }
387    ///CLOVER:ON
388
389    ///CLOVER:OFF
390    /** Dump a state table.  (A full set of RBBI rules has 4 state tables.)  */
391    private void dumpTable(short table[]) {
392        if (table == null)   {
393            System.out.println("  -- null -- ");
394        } else {
395            int n;
396            int state;
397            StringBuilder header = new StringBuilder(" Row  Acc Look  Tag");
398            for (n=0; n<fHeader.fCatCount; n++) {
399                header.append(intToString(n, 5));
400            }
401            System.out.println(header.toString());
402            for (n=0; n<header.length(); n++) {
403                System.out.print("-");
404            }
405            System.out.println();
406            for (state=0; state< getStateTableNumStates(table); state++) {
407                dumpRow(table, state);
408            }
409            System.out.println();
410        }
411    }
412    ///CLOVER:ON
413
414    ///CLOVER:OFF
415    /**
416     * Dump (for debug) a single row of an RBBI state table
417     * @param table
418     * @param state
419     */
420    private void dumpRow(short table[], int   state) {
421        StringBuilder dest = new StringBuilder(fHeader.fCatCount*5 + 20);
422        dest.append(intToString(state, 4));
423        int row = getRowIndex(state);
424        if (table[row+ACCEPTING] != 0) {
425           dest.append(intToString(table[row+ACCEPTING], 5));
426        }else {
427            dest.append("     ");
428        }
429        if (table[row+LOOKAHEAD] != 0) {
430            dest.append(intToString(table[row+LOOKAHEAD], 5));
431        }else {
432            dest.append("     ");
433        }
434        dest.append(intToString(table[row+TAGIDX], 5));
435
436        for (int col=0; col<fHeader.fCatCount; col++) {
437            dest.append(intToString(table[row+NEXTSTATES+col], 5));
438        }
439
440        System.out.println(dest);
441    }
442    ///CLOVER:ON
443
444    ///CLOVER:OFF
445    private void dumpCharCategories() {
446        int n = fHeader.fCatCount;
447        String   catStrings[] = new  String[n+1];
448        int      rangeStart = 0;
449        int      rangeEnd = 0;
450        int      lastCat = -1;
451        int      char32;
452        int      category;
453        int      lastNewline[] = new int[n+1];
454
455        for (category = 0; category <= fHeader.fCatCount; category ++) {
456            catStrings[category] = "";
457        }
458        System.out.println("\nCharacter Categories");
459        System.out.println("--------------------");
460        for (char32 = 0; char32<=0x10ffff; char32++) {
461            category = fTrie.getCodePointValue(char32);
462            category &= ~0x4000;            // Mask off dictionary bit.
463            if (category < 0 || category > fHeader.fCatCount) {
464                System.out.println("Error, bad category " + Integer.toHexString(category) +
465                        " for char " + Integer.toHexString(char32));
466                break;
467            }
468            if (category == lastCat ) {
469                rangeEnd = char32;
470            } else {
471                if (lastCat >= 0) {
472                    if (catStrings[lastCat].length() > lastNewline[lastCat] + 70) {
473                        lastNewline[lastCat] = catStrings[lastCat].length() + 10;
474                        catStrings[lastCat] += "\n       ";
475                    }
476
477                    catStrings[lastCat] += " " + Integer.toHexString(rangeStart);
478                    if (rangeEnd != rangeStart) {
479                        catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd);
480                    }
481                }
482                lastCat = category;
483                rangeStart = rangeEnd = char32;
484            }
485        }
486        catStrings[lastCat] += " " + Integer.toHexString(rangeStart);
487        if (rangeEnd != rangeStart) {
488            catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd);
489        }
490
491        for (category = 0; category <= fHeader.fCatCount; category ++) {
492            System.out.println (intToString(category, 5) + "  " + catStrings[category]);
493        }
494        System.out.println();
495    }
496    ///CLOVER:ON
497
498    /*static RBBIDataWrapper get(String name) throws IOException {
499        String  fullName = "data/" + name;
500        InputStream is = ICUData.getRequiredStream(fullName);
501        return get(is);
502    }
503
504    public static void main(String[] args) {
505        String s;
506        if (args.length == 0) {
507            s = "char";
508        } else {
509            s = args[0];
510        }
511        System.out.println("RBBIDataWrapper.main(" + s + ") ");
512
513        String versionedName = ICUResourceBundle.ICU_BUNDLE+"/"+ s + ".brk";
514
515        try {
516            RBBIDataWrapper This = RBBIDataWrapper.get(versionedName);
517            This.dump();
518        }
519       catch (Exception e) {
520           System.out.println("Exception: " + e.toString());
521       }
522
523    }*/
524}
525