1/*
2 *******************************************************************************
3 * Copyright (C) 1996-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 */
7
8package com.ibm.icu.text;
9
10import java.io.IOException;
11import java.nio.ByteBuffer;
12import java.text.CharacterIterator;
13
14import com.ibm.icu.impl.ICUBinary;
15
16/*
17 * This is a class used to load in the compact trie dictionary file
18 * used for dictionary based break iteration.
19 */
20class BreakCTDictionary {
21    private CompactTrieHeader fData;
22
23    static class CompactTrieHeader {
24        int size; // Size of data in bytes
25
26        int magic; // Magic number (including versions)
27
28        int nodeCount; // Number of entries in offsets[]
29
30        int root; // Node number of the root node
31
32        int []offset; // Offsets to nodes from start of data
33
34        CompactTrieHeader() {
35            size = 0;
36            magic = 0;
37            nodeCount = 0;
38            root = 0;
39            offset = null;
40        }
41    }
42
43    static final class CompactTrieNodeFlags {
44        static final int kVerticalNode = 0x1000; // This is a vertical node
45
46        static final int kParentEndsWord = 0x2000; // The node whose equal link
47                                                    // points to this ends a
48                                                    // word
49
50        static final int kReservedFlag1 = 0x4000;
51
52        static final int kReservedFlag2 = 0x8000;
53
54        static final int kCountMask = 0x0FFF; // The count portion of
55                                                // flagscount
56
57        static final int kFlagMask = 0xF000; // The flags portion of
58                                                // flagscount
59    }
60
61    // The two node types are distinguished by the kVerticalNode flag.
62    static class CompactTrieHorizontalNode {
63        char ch; // UChar
64
65        int equal; // Equal link node index
66
67        CompactTrieHorizontalNode(char newCh, int newEqual) {
68            ch = newCh;
69            equal = newEqual;
70        }
71    }
72
73    static class CompactTrieVerticalNode {
74        int equal; // Equal link node index
75
76        char chars[]; // Code units
77
78        CompactTrieVerticalNode() {
79            equal = 0;
80            chars = null;
81        }
82    }
83
84    private CompactTrieNodes getCompactTrieNode(int node) {
85        return nodes[node];
86    }
87
88    // private class to hold both node information
89    static class CompactTrieNodes {
90        short flagscount; // Count of sub-entries, plus flags
91
92        CompactTrieHorizontalNode[] hnode;
93
94        CompactTrieVerticalNode vnode;
95
96        CompactTrieNodes() {
97            flagscount = 0;
98            hnode = null;
99            vnode = null;
100        }
101    }
102
103    private CompactTrieNodes[] nodes;
104
105    // Constructor
106    public BreakCTDictionary(ByteBuffer bytes) throws IOException {
107        ICUBinary.readHeader(bytes, DATA_FORMAT_ID, null);
108
109        // Get header information
110        fData = new CompactTrieHeader();
111        fData.size = bytes.getInt();
112        fData.magic = bytes.getInt();
113        fData.nodeCount = bytes.getShort();
114        fData.root = bytes.getShort();
115
116        loadBreakCTDictionary(bytes);
117    }
118
119    // Loads the compact trie dictionary file into the CompactTrieNodes
120    private void loadBreakCTDictionary(ByteBuffer bytes) throws IOException {
121        // skip over offset information
122        for (int i = 0; i < fData.nodeCount; i++) {
123            bytes.getInt();
124        }
125
126        // Create compact trie dictionary
127        nodes = new CompactTrieNodes[fData.nodeCount];
128        nodes[0] = new CompactTrieNodes();
129
130        // Load in compact trie dictionary
131        for (int j = 1; j < fData.nodeCount; j++) {
132            nodes[j] = new CompactTrieNodes();
133            nodes[j].flagscount = bytes.getShort();
134
135            int count = nodes[j].flagscount & CompactTrieNodeFlags.kCountMask;
136
137            if (count != 0) {
138                boolean isVerticalNode = (nodes[j].flagscount & CompactTrieNodeFlags.kVerticalNode) != 0;
139
140                // Vertical node
141                if (isVerticalNode) {
142                    nodes[j].vnode = new CompactTrieVerticalNode();
143                    nodes[j].vnode.equal = bytes.getShort();
144
145                    nodes[j].vnode.chars = new char[count];
146                    for (int l = 0; l < count; l++) {
147                        nodes[j].vnode.chars[l] = bytes.getChar();
148                    }
149                } else { // Horizontal node
150                    nodes[j].hnode = new CompactTrieHorizontalNode[count];
151                    for (int n = 0; n < count; n++) {
152                        nodes[j].hnode[n] = new CompactTrieHorizontalNode(
153                                bytes.getChar(), bytes.getShort());
154                    }
155                }
156            }
157        }
158    }
159
160    /**
161     * Find dictionary words that match the text.
162     *
163     * @param text A CharacterIterator representing the text. The iterator is
164     *            left after the longest prefix match in the dictionary.
165     * @param maxLength The maximum number of code units to match.
166     * @param lengths An array that is filled with the lengths of words that matched.
167     * @param count Filled with the number of elements output in lengths.
168     * @param limit The size of the lengths array; this limits the number of words output.
169     * @return The number of characters in text that were matched.
170     */
171    public int matches(CharacterIterator text, int maxLength, int lengths[],
172            int count[], int limit) {
173        // Current implementation works in UTF-16 space
174        CompactTrieNodes node = getCompactTrieNode(fData.root);
175        int mycount = 0;
176
177        char uc = text.current();
178        int i = 0;
179        boolean exitFlag = false;
180
181        while (node != null) {
182            // Check if the node we just exited ends a word
183            if (limit > 0
184                    && (node.flagscount & CompactTrieNodeFlags.kParentEndsWord) != 0) {
185                lengths[mycount++] = i;
186                --limit;
187            }
188            // Check that we haven't exceeded the maximum number of input
189            // characters.
190            // We have to do that here rather than in the while condition so
191            // that
192            // we can check for ending of a word above.
193            if (i >= maxLength) {
194                break;
195            }
196
197            int nodeCount = (node.flagscount & CompactTrieNodeFlags.kCountMask);
198            if (nodeCount == 0) {
199                // Special terminal node; return now
200                break;
201            }
202            if ((node.flagscount & CompactTrieNodeFlags.kVerticalNode) != 0) {
203                // Vertical node; check all the characters in it
204                CompactTrieVerticalNode vnode = node.vnode;
205                for (int j = 0; j < nodeCount && i < maxLength; j++) {
206                    if (uc != vnode.chars[j]) {
207                        // We hit a non equal character return;
208                        exitFlag = true;
209                        break;
210                    }
211                    text.next();
212                    uc = text.current();
213                    i++;
214                }
215                if (exitFlag) {
216                    break;
217                }
218                // To get here, we must have come through the whole list successfully;
219                // go on to the next node. Note that a word cannot end in the middle
220                // of a vertical node.
221                node = getCompactTrieNode(vnode.equal);
222            } else {
223                // Horizontal node; do binary search
224                CompactTrieHorizontalNode[] hnode = node.hnode;
225                int low = 0;
226                int high = nodeCount - 1;
227                int middle;
228                node = null; // If we don't find a match, we'll fall out of the loop
229                while (high >= low) {
230                    middle = (high + low) >>> 1;
231                    if (uc == hnode[middle].ch) {
232                        // We hit a match; get the next node and next character
233                        node = getCompactTrieNode(hnode[middle].equal);
234                        text.next();
235                        uc = text.current();
236                        i++;
237                        break;
238                    } else if (uc < hnode[middle].ch) {
239                        high = middle - 1;
240                    } else {
241                        low = middle + 1;
242                    }
243                }
244            }
245        }
246
247        count[0] = mycount;
248        return i;
249    }
250
251    // Use for reading the header portion of the file
252    private static final int DATA_FORMAT_ID = 0x54724463;
253}
254