1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5 *******************************************************************************
6 * Copyright (C) 1996-2015, International Business Machines Corporation and
7 * others. All Rights Reserved.
8 *******************************************************************************
9 */
10
11package android.icu.text;
12
13import java.io.IOException;
14import java.nio.ByteBuffer;
15import java.nio.ByteOrder;
16
17import android.icu.impl.CharTrie;
18import android.icu.impl.ICUBinary;
19import android.icu.impl.ICUBinary.Authenticate;
20import android.icu.impl.Trie;
21
22/**
23* <p>Internal class used for Rule Based Break Iterators</p>
24* <p>This class provides access to the compiled break rule data, as
25* it is stored in a .brk file.
26*/
27final class RBBIDataWrapper {
28    //
29    // These fields are the ready-to-use compiled rule data, as
30    //   read from the file.
31    //
32    RBBIDataHeader fHeader;
33    short          fFTable[];
34    short          fRTable[];
35    short          fSFTable[];
36    short          fSRTable[];
37    CharTrie       fTrie;
38    String         fRuleSource;
39    int            fStatusTable[];
40
41    private boolean isBigEndian;
42
43    static final int DATA_FORMAT = 0x42726b20;  // "Brk "
44    static final int FORMAT_VERSION = 0x03010000;  // 3.1
45
46    private static final class IsAcceptable implements Authenticate {
47        // @Override when we switch to Java 6
48        @Override
49        public boolean isDataVersionAcceptable(byte version[]) {
50            return version[0] == (FORMAT_VERSION >>> 24);
51        }
52    }
53    private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
54
55    //
56    // Indexes to fields in the ICU4C style binary form of the RBBI Data Header
57    //   Used by the rule compiler when flattening the data.
58    //
59    final static int    DH_SIZE           = 24;
60    final static int    DH_MAGIC          = 0;
61    final static int    DH_FORMATVERSION  = 1;
62    final static int    DH_LENGTH         = 2;
63    final static int    DH_CATCOUNT       = 3;
64    final static int    DH_FTABLE         = 4;
65    final static int    DH_FTABLELEN      = 5;
66    final static int    DH_RTABLE         = 6;
67    final static int    DH_RTABLELEN      = 7;
68    final static int    DH_SFTABLE        = 8;
69    final static int    DH_SFTABLELEN     = 9;
70    final static int    DH_SRTABLE        = 10;
71    final static int    DH_SRTABLELEN     = 11;
72    final static int    DH_TRIE           = 12;
73    final static int    DH_TRIELEN        = 13;
74    final static int    DH_RULESOURCE     = 14;
75    final static int    DH_RULESOURCELEN  = 15;
76    final static int    DH_STATUSTABLE    = 16;
77    final static int    DH_STATUSTABLELEN = 17;
78
79
80    // Index offsets to the fields in a state table row.
81    //    Corresponds to struct RBBIStateTableRow in the C version.
82    //
83    final static int      ACCEPTING  = 0;
84    final static int      LOOKAHEAD  = 1;
85    final static int      TAGIDX     = 2;
86    final static int      RESERVED   = 3;
87    final static int      NEXTSTATES = 4;
88
89    // Index offsets to header fields of a state table
90    //     struct RBBIStateTable {...   in the C version.
91    //
92            static final int NUMSTATES  = 0;
93            static final int ROWLEN     = 2;
94            static final int FLAGS      = 4;
95    //ivate static final int RESERVED_2 = 6;
96    private static final int ROW_DATA   = 8;
97
98    //  Bit selectors for the "FLAGS" field of the state table header
99    //     enum RBBIStateTableFlags in the C version.
100    //
101    final static int      RBBI_LOOKAHEAD_HARD_BREAK = 1;
102    final static int      RBBI_BOF_REQUIRED         = 2;
103
104    /**
105     * Data Header.  A struct-like class with the fields from the RBBI data file header.
106     */
107    final static class RBBIDataHeader {
108        int         fMagic;         //  == 0xbla0
109        int         fVersion;       //  == 1 (for ICU 3.2 and earlier.
110        byte[]      fFormatVersion; //  For ICU 3.4 and later.
111        int         fLength;        //  Total length in bytes of this RBBI Data,
112                                       //      including all sections, not just the header.
113        int         fCatCount;      //  Number of character categories.
114
115        //
116        //  Offsets and sizes of each of the subsections within the RBBI data.
117        //  All offsets are bytes from the start of the RBBIDataHeader.
118        //  All sizes are in bytes.
119        //
120        int         fFTable;         //  forward state transition table.
121        int         fFTableLen;
122        int         fRTable;         //  Offset to the reverse state transition table.
123        int         fRTableLen;
124        int         fSFTable;        //  safe point forward transition table
125        int         fSFTableLen;
126        int         fSRTable;        //  safe point reverse transition table
127        int         fSRTableLen;
128        int         fTrie;           //  Offset to Trie data for character categories
129        int         fTrieLen;
130        int         fRuleSource;     //  Offset to the source for for the break
131        int         fRuleSourceLen;  //    rules.  Stored UChar *.
132        int         fStatusTable;    // Offset to the table of rule status values
133        int         fStatusTableLen;
134
135        public RBBIDataHeader() {
136            fMagic = 0;
137            fFormatVersion = new byte[4];
138        }
139    }
140
141
142    /**
143     * RBBI State Table Indexing Function.  Given a state number, return the
144     * array index of the start of the state table row for that state.
145     *
146     */
147    int getRowIndex(int state){
148        return ROW_DATA + state * (fHeader.fCatCount + 4);
149    }
150
151    static class TrieFoldingFunc implements  Trie.DataManipulate {
152        @Override
153        public int getFoldingOffset(int data) {
154            if ((data & 0x8000) != 0) {
155                return data & 0x7fff;
156            } else {
157                return 0;
158            }
159        }
160    }
161    static TrieFoldingFunc  fTrieFoldingFunc = new TrieFoldingFunc();
162
163
164    RBBIDataWrapper() {
165    }
166
167    /*
168     *  Get an RBBIDataWrapper from an InputStream onto a pre-compiled set
169     *  of RBBI rules.
170     */
171    static RBBIDataWrapper get(ByteBuffer bytes) throws IOException {
172        RBBIDataWrapper This = new RBBIDataWrapper();
173
174        ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
175        This.isBigEndian = bytes.order() == ByteOrder.BIG_ENDIAN;
176
177        // Read in the RBBI data header...
178        This.fHeader = new  RBBIDataHeader();
179        This.fHeader.fMagic          = bytes.getInt();
180        // Read the same 4 bytes as an int and as a byte array: The data format could be
181        // the old fVersion=1 (TODO: probably not with a real ICU data header?)
182        // or the new fFormatVersion=3.x.
183        This.fHeader.fVersion        = bytes.getInt(bytes.position());
184        This.fHeader.fFormatVersion[0] = bytes.get();
185        This.fHeader.fFormatVersion[1] = bytes.get();
186        This.fHeader.fFormatVersion[2] = bytes.get();
187        This.fHeader.fFormatVersion[3] = bytes.get();
188        This.fHeader.fLength         = bytes.getInt();
189        This.fHeader.fCatCount       = bytes.getInt();
190        This.fHeader.fFTable         = bytes.getInt();
191        This.fHeader.fFTableLen      = bytes.getInt();
192        This.fHeader.fRTable         = bytes.getInt();
193        This.fHeader.fRTableLen      = bytes.getInt();
194        This.fHeader.fSFTable        = bytes.getInt();
195        This.fHeader.fSFTableLen     = bytes.getInt();
196        This.fHeader.fSRTable        = bytes.getInt();
197        This.fHeader.fSRTableLen     = bytes.getInt();
198        This.fHeader.fTrie           = bytes.getInt();
199        This.fHeader.fTrieLen        = bytes.getInt();
200        This.fHeader.fRuleSource     = bytes.getInt();
201        This.fHeader.fRuleSourceLen  = bytes.getInt();
202        This.fHeader.fStatusTable    = bytes.getInt();
203        This.fHeader.fStatusTableLen = bytes.getInt();
204        ICUBinary.skipBytes(bytes, 6 * 4);    // uint32_t  fReserved[6];
205
206
207        if (This.fHeader.fMagic != 0xb1a0 ||
208                ! (This.fHeader.fVersion == 1  ||         // ICU 3.2 and earlier
209                   This.fHeader.fFormatVersion[0] == 3)   // ICU 3.4
210            ) {
211            throw new IOException("Break Iterator Rule Data Magic Number Incorrect, or unsupported data version.");
212        }
213
214        // Current position in the buffer.
215        int pos = 24 * 4;     // offset of end of header, which has 24 fields, all int32_t (4 bytes)
216
217        //
218        // Read in the Forward state transition table as an array of shorts.
219        //
220
221        //   Quick Sanity Check
222        if (This.fHeader.fFTable < pos || This.fHeader.fFTable > This.fHeader.fLength) {
223             throw new IOException("Break iterator Rule data corrupt");
224        }
225
226        //    Skip over any padding preceding this table
227        ICUBinary.skipBytes(bytes, This.fHeader.fFTable - pos);
228        pos = This.fHeader.fFTable;
229
230        This.fFTable = ICUBinary.getShorts(
231                bytes, This.fHeader.fFTableLen / 2, This.fHeader.fFTableLen & 1);
232        pos += This.fHeader.fFTableLen;
233
234        //
235        // Read in the Reverse state table
236        //
237
238        // Skip over any padding in the file
239        ICUBinary.skipBytes(bytes, This.fHeader.fRTable - pos);
240        pos = This.fHeader.fRTable;
241
242        // Create & fill the table itself.
243        This.fRTable = ICUBinary.getShorts(
244                bytes, This.fHeader.fRTableLen / 2, This.fHeader.fRTableLen & 1);
245        pos += This.fHeader.fRTableLen;
246
247        //
248        // Read in the Safe Forward state table
249        //
250        if (This.fHeader.fSFTableLen > 0) {
251            // Skip over any padding in the file
252            ICUBinary.skipBytes(bytes, This.fHeader.fSFTable - pos);
253            pos = This.fHeader.fSFTable;
254
255            // Create & fill the table itself.
256            This.fSFTable = ICUBinary.getShorts(
257                    bytes, This.fHeader.fSFTableLen / 2, This.fHeader.fSFTableLen & 1);
258            pos += This.fHeader.fSFTableLen;
259        }
260
261        //
262        // Read in the Safe Reverse state table
263        //
264        if (This.fHeader.fSRTableLen > 0) {
265            // Skip over any padding in the file
266            ICUBinary.skipBytes(bytes, This.fHeader.fSRTable - pos);
267            pos = This.fHeader.fSRTable;
268
269            // Create & fill the table itself.
270            This.fSRTable = ICUBinary.getShorts(
271                    bytes, This.fHeader.fSRTableLen / 2, This.fHeader.fSRTableLen & 1);
272            pos += This.fHeader.fSRTableLen;
273        }
274
275        //
276        // Unserialize the Character categories TRIE
277        //     Because we can't be absolutely certain where the Trie deserialize will
278        //     leave the buffer, leave position unchanged.
279        //     The seek to the start of the next item following the TRIE will get us
280        //     back in sync.
281        //
282        ICUBinary.skipBytes(bytes, This.fHeader.fTrie - pos);  // seek buffer from end of
283        pos = This.fHeader.fTrie;               // previous section to the start of the trie
284
285        bytes.mark();                           // Mark position of start of TRIE in the input
286                                                //  and tell Java to keep the mark valid so long
287                                                //  as we don't go more than 100 bytes past the
288                                                //  past the end of the TRIE.
289
290        This.fTrie = new CharTrie(bytes, fTrieFoldingFunc);  // Deserialize the TRIE, leaving buffer
291                                                //  at an unknown position, preceding the
292                                                //  padding between TRIE and following section.
293
294        bytes.reset();                          // Move buffer back to marked position at
295                                                //   the start of the serialized TRIE.  Now our
296                                                //   "pos" variable and the buffer are in
297                                                //   agreement.
298
299        //
300        // Read the Rule Status Table
301        //
302        if (pos > This.fHeader.fStatusTable) {
303            throw new IOException("Break iterator Rule data corrupt");
304        }
305        ICUBinary.skipBytes(bytes, This.fHeader.fStatusTable - pos);
306        pos = This.fHeader.fStatusTable;
307        This.fStatusTable = ICUBinary.getInts(
308                bytes, This.fHeader.fStatusTableLen / 4, This.fHeader.fStatusTableLen & 3);
309        pos += This.fHeader.fStatusTableLen;
310
311        //
312        // Put the break rule source into a String
313        //
314        if (pos > This.fHeader.fRuleSource) {
315            throw new IOException("Break iterator Rule data corrupt");
316        }
317        ICUBinary.skipBytes(bytes, This.fHeader.fRuleSource - pos);
318        pos = This.fHeader.fRuleSource;
319        This.fRuleSource = ICUBinary.getString(
320                bytes, This.fHeader.fRuleSourceLen / 2, This.fHeader.fRuleSourceLen & 1);
321
322        if (RuleBasedBreakIterator.fDebugEnv!=null && RuleBasedBreakIterator.fDebugEnv.indexOf("data")>=0) {
323            This.dump(System.out);
324        }
325        return This;
326    }
327
328    ///CLOVER:OFF
329    //  Getters for fields from the state table header
330    //
331    private int getStateTableNumStates(short table[]) {
332        if (isBigEndian) {
333            return (table[NUMSTATES] << 16) | (table[NUMSTATES+1] & 0xffff);
334        } else {
335            return (table[NUMSTATES+1] << 16) | (table[NUMSTATES] & 0xffff);
336        }
337    }
338    ///CLOVER:ON
339
340    int getStateTableFlags(short table[]) {
341        // This works for up to 15 flags bits.
342        return table[isBigEndian ? FLAGS + 1 : FLAGS];
343    }
344
345    ///CLOVER:OFF
346    /* Debug function to display the break iterator data. */
347    void dump(java.io.PrintStream out) {
348        if (fFTable.length == 0) {
349            // There is no table. Fail early for testing purposes.
350            throw new NullPointerException();
351        }
352        out.println("RBBI Data Wrapper dump ...");
353        out.println();
354        out.println("Forward State Table");
355        dumpTable(out, fFTable);
356        out.println("Reverse State Table");
357        dumpTable(out, fRTable);
358        out.println("Forward Safe Points Table");
359        dumpTable(out, fSFTable);
360        out.println("Reverse Safe Points Table");
361        dumpTable(out, fSRTable);
362
363        dumpCharCategories(out);
364        out.println("Source Rules: " + fRuleSource);
365
366    }
367    ///CLOVER:ON
368
369    ///CLOVER:OFF
370    /* Fixed width int-to-string conversion. */
371    static public String intToString(int n, int width) {
372        StringBuilder  dest = new StringBuilder(width);
373        dest.append(n);
374        while (dest.length() < width) {
375           dest.insert(0, ' ');
376        }
377        return dest.toString();
378    }
379    ///CLOVER:ON
380
381    ///CLOVER:OFF
382    /* Fixed width int-to-string conversion. */
383    static public String intToHexString(int n, int width) {
384        StringBuilder  dest = new StringBuilder(width);
385        dest.append(Integer.toHexString(n));
386        while (dest.length() < width) {
387           dest.insert(0, ' ');
388        }
389        return dest.toString();
390    }
391    ///CLOVER:ON
392
393    ///CLOVER:OFF
394    /** Dump a state table.  (A full set of RBBI rules has 4 state tables.)  */
395    private void dumpTable(java.io.PrintStream out, short table[]) {
396        if (table == null)   {
397            out.println("  -- null -- ");
398        } else {
399            int n;
400            int state;
401            StringBuilder header = new StringBuilder(" Row  Acc Look  Tag");
402            for (n=0; n<fHeader.fCatCount; n++) {
403                header.append(intToString(n, 5));
404            }
405            out.println(header.toString());
406            for (n=0; n<header.length(); n++) {
407                out.print("-");
408            }
409            out.println();
410            for (state=0; state< getStateTableNumStates(table); state++) {
411                dumpRow(out, table, state);
412            }
413            out.println();
414        }
415    }
416    ///CLOVER:ON
417
418    ///CLOVER:OFF
419    /**
420     * Dump (for debug) a single row of an RBBI state table
421     * @param table
422     * @param state
423     */
424    private void dumpRow(java.io.PrintStream out, short table[], int   state) {
425        StringBuilder dest = new StringBuilder(fHeader.fCatCount*5 + 20);
426        dest.append(intToString(state, 4));
427        int row = getRowIndex(state);
428        if (table[row+ACCEPTING] != 0) {
429           dest.append(intToString(table[row+ACCEPTING], 5));
430        }else {
431            dest.append("     ");
432        }
433        if (table[row+LOOKAHEAD] != 0) {
434            dest.append(intToString(table[row+LOOKAHEAD], 5));
435        }else {
436            dest.append("     ");
437        }
438        dest.append(intToString(table[row+TAGIDX], 5));
439
440        for (int col=0; col<fHeader.fCatCount; col++) {
441            dest.append(intToString(table[row+NEXTSTATES+col], 5));
442        }
443
444        out.println(dest);
445    }
446    ///CLOVER:ON
447
448    ///CLOVER:OFF
449    private void dumpCharCategories(java.io.PrintStream out) {
450        int n = fHeader.fCatCount;
451        String   catStrings[] = new  String[n+1];
452        int      rangeStart = 0;
453        int      rangeEnd = 0;
454        int      lastCat = -1;
455        int      char32;
456        int      category;
457        int      lastNewline[] = new int[n+1];
458
459        for (category = 0; category <= fHeader.fCatCount; category ++) {
460            catStrings[category] = "";
461        }
462        out.println("\nCharacter Categories");
463        out.println("--------------------");
464        for (char32 = 0; char32<=0x10ffff; char32++) {
465            category = fTrie.getCodePointValue(char32);
466            category &= ~0x4000;            // Mask off dictionary bit.
467            if (category < 0 || category > fHeader.fCatCount) {
468                out.println("Error, bad category " + Integer.toHexString(category) +
469                        " for char " + Integer.toHexString(char32));
470                break;
471            }
472            if (category == lastCat ) {
473                rangeEnd = char32;
474            } else {
475                if (lastCat >= 0) {
476                    if (catStrings[lastCat].length() > lastNewline[lastCat] + 70) {
477                        lastNewline[lastCat] = catStrings[lastCat].length() + 10;
478                        catStrings[lastCat] += "\n       ";
479                    }
480
481                    catStrings[lastCat] += " " + Integer.toHexString(rangeStart);
482                    if (rangeEnd != rangeStart) {
483                        catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd);
484                    }
485                }
486                lastCat = category;
487                rangeStart = rangeEnd = char32;
488            }
489        }
490        catStrings[lastCat] += " " + Integer.toHexString(rangeStart);
491        if (rangeEnd != rangeStart) {
492            catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd);
493        }
494
495        for (category = 0; category <= fHeader.fCatCount; category ++) {
496            out.println (intToString(category, 5) + "  " + catStrings[category]);
497        }
498        out.println();
499    }
500    ///CLOVER:ON
501
502    /*static RBBIDataWrapper get(String name) throws IOException {
503        String  fullName = "data/" + name;
504        InputStream is = ICUData.getRequiredStream(fullName);
505        return get(is);
506    }
507
508    public static void main(String[] args) {
509        String s;
510        if (args.length == 0) {
511            s = "char";
512        } else {
513            s = args[0];
514        }
515        System.out.println("RBBIDataWrapper.main(" + s + ") ");
516
517        String versionedName = ICUResourceBundle.ICU_BUNDLE+"/"+ s + ".brk";
518
519        try {
520            RBBIDataWrapper This = RBBIDataWrapper.get(versionedName);
521            This.dump();
522        }
523       catch (Exception e) {
524           System.out.println("Exception: " + e.toString());
525       }
526
527    }*/
528}
529