trie_map.cpp revision c0c674cdc0721a374e140ad5ee1409c0498b3262
1/*
2 * Copyright (C) 2014, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "suggest/policyimpl/dictionary/utils/trie_map.h"
18
19#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
20
21namespace latinime {
22
23const int TrieMap::INVALID_INDEX = -1;
24const int TrieMap::FIELD0_SIZE = 4;
25const int TrieMap::FIELD1_SIZE = 3;
26const int TrieMap::ENTRY_SIZE = FIELD0_SIZE + FIELD1_SIZE;
27const uint32_t TrieMap::VALUE_FLAG = 0x400000;
28const uint32_t TrieMap::VALUE_MASK = 0x3FFFFF;
29const uint32_t TrieMap::TERMINAL_LINK_FLAG = 0x800000;
30const uint32_t TrieMap::TERMINAL_LINK_MASK = 0x7FFFFF;
31const int TrieMap::NUM_OF_BITS_USED_FOR_ONE_LEVEL = 5;
32const uint32_t TrieMap::LABEL_MASK = 0x1F;
33const int TrieMap::MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL = 1 << NUM_OF_BITS_USED_FOR_ONE_LEVEL;
34const int TrieMap::ROOT_BITMAP_ENTRY_INDEX = 0;
35const int TrieMap::ROOT_BITMAP_ENTRY_POS = MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL * FIELD0_SIZE;
36const TrieMap::Entry TrieMap::EMPTY_BITMAP_ENTRY = TrieMap::Entry(0, 0);
37const uint64_t TrieMap::MAX_VALUE =
38        (static_cast<uint64_t>(1) << ((FIELD0_SIZE + FIELD1_SIZE) * CHAR_BIT)) - 1;
39const int TrieMap::MAX_BUFFER_SIZE = TERMINAL_LINK_MASK * ENTRY_SIZE;
40
41TrieMap::TrieMap() : mBuffer(MAX_BUFFER_SIZE) {
42    mBuffer.extend(ROOT_BITMAP_ENTRY_POS);
43    writeEntry(EMPTY_BITMAP_ENTRY, ROOT_BITMAP_ENTRY_INDEX);
44}
45
46TrieMap::TrieMap(const ReadWriteByteArrayView buffer)
47        : mBuffer(buffer, BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
48
49void TrieMap::dump(const int from, const int to) const {
50    AKLOGI("BufSize: %d", mBuffer.getTailPosition());
51    for (int i = from; i < to; ++i) {
52        AKLOGI("Entry[%d]: %x, %x", i, readField0(i), readField1(i));
53    }
54    int unusedRegionSize = 0;
55    for (int i = 1; i <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; ++i) {
56        int index = readEmptyTableLink(i);
57        while (index != ROOT_BITMAP_ENTRY_INDEX) {
58            index = readField0(index);
59            unusedRegionSize += i;
60        }
61    }
62    AKLOGI("Unused Size: %d", unusedRegionSize);
63}
64
65int TrieMap::getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex) {
66    const Entry bitmapEntry = readEntry(bitmapEntryIndex);
67    const uint32_t unsignedKey = static_cast<uint32_t>(key);
68    const int terminalEntryIndex = getTerminalEntryIndex(
69            unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntry, 0 /* level */);
70    if (terminalEntryIndex == INVALID_INDEX) {
71        // Not found.
72        return INVALID_INDEX;
73    }
74    const Entry terminalEntry = readEntry(terminalEntryIndex);
75    if (terminalEntry.hasTerminalLink()) {
76        return terminalEntry.getValueEntryIndex() + 1;
77    }
78    // Create a value entry and a bitmap entry.
79    const int valueEntryIndex = allocateTable(2 /* entryCount */);
80    if (!writeEntry(Entry(0, terminalEntry.getValue()), valueEntryIndex)) {
81        return INVALID_INDEX;
82    }
83    if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
84        return INVALID_INDEX;
85    }
86    if (!writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, valueEntryIndex)) {
87        return INVALID_INDEX;
88    }
89    return valueEntryIndex + 1;
90}
91
92const TrieMap::Result TrieMap::get(const int key, const int bitmapEntryIndex) const {
93    const uint32_t unsignedKey = static_cast<uint32_t>(key);
94    return getInternal(unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntryIndex,
95            0 /* level */);
96}
97
98bool TrieMap::put(const int key, const uint64_t value, const int bitmapEntryIndex) {
99    if (value > MAX_VALUE) {
100        return false;
101    }
102    const uint32_t unsignedKey = static_cast<uint32_t>(key);
103    return putInternal(unsignedKey, value, getBitShuffledKey(unsignedKey), bitmapEntryIndex,
104            readEntry(bitmapEntryIndex), 0 /* level */);
105}
106
107bool TrieMap::save(FILE *const file) const {
108    return DictFileWritingUtils::writeBufferToFileTail(file, &mBuffer);
109}
110
111/**
112 * Iterate next entry in a certain level.
113 *
114 * @param iterationState the iteration state that will be read and updated in this method.
115 * @param outKey the output key
116 * @return Result instance. mIsValid is false when all entries are iterated.
117 */
118const TrieMap::Result TrieMap::iterateNext(std::vector<TableIterationState> *const iterationState,
119        int *const outKey) const {
120    while (!iterationState->empty()) {
121        TableIterationState &state = iterationState->back();
122        if (state.mTableSize <= state.mCurrentIndex) {
123            // Move to parent.
124            iterationState->pop_back();
125        } else {
126            const int entryIndex = state.mTableIndex + state.mCurrentIndex;
127            state.mCurrentIndex += 1;
128            const Entry entry = readEntry(entryIndex);
129            if (entry.isBitmapEntry()) {
130                // Move to child.
131                iterationState->emplace_back(popCount(entry.getBitmap()), entry.getTableIndex());
132            } else {
133                if (outKey) {
134                    *outKey = entry.getKey();
135                }
136                if (!entry.hasTerminalLink()) {
137                    return Result(entry.getValue(), true, INVALID_INDEX);
138                }
139                const int valueEntryIndex = entry.getValueEntryIndex();
140                const Entry valueEntry = readEntry(valueEntryIndex);
141                return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1);
142            }
143        }
144    }
145    // Visited all entries.
146    return Result(0, false, INVALID_INDEX);
147}
148
149/**
150 * Shuffle bits of the key in the fixed order.
151 *
152 * This method is used as a hash function. This returns different values for different inputs.
153 */
154uint32_t TrieMap::getBitShuffledKey(const uint32_t key) const {
155    uint32_t shuffledKey = 0;
156    for (int i = 0; i < 4; ++i) {
157        const uint32_t keyPiece = (key >> (i * 8)) & 0xFF;
158        shuffledKey ^= ((keyPiece ^ (keyPiece << 7) ^ (keyPiece << 14) ^ (keyPiece << 21))
159                & 0x11111111) << i;
160    }
161    return shuffledKey;
162}
163
164bool TrieMap::writeValue(const uint64_t value, const int terminalEntryIndex) {
165    if (value <= VALUE_MASK) {
166        // Write value into the terminal entry.
167        return writeField1(value | VALUE_FLAG, terminalEntryIndex);
168    }
169    // Create value entry and write value.
170    const int valueEntryIndex = allocateTable(2 /* entryCount */);
171    if (!writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex)) {
172        return false;
173    }
174    if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
175        return false;
176    }
177    return writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, terminalEntryIndex);
178}
179
180bool TrieMap::updateValue(const Entry &terminalEntry, const uint64_t value,
181        const int terminalEntryIndex) {
182    if (!terminalEntry.hasTerminalLink()) {
183        return writeValue(value, terminalEntryIndex);
184    }
185    const int valueEntryIndex = terminalEntry.getValueEntryIndex();
186    return writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex);
187}
188
189bool TrieMap::freeTable(const int tableIndex, const int entryCount) {
190    if (!writeField0(readEmptyTableLink(entryCount), tableIndex)) {
191        return false;
192    }
193    return writeEmptyTableLink(tableIndex, entryCount);
194}
195
196/**
197 * Allocate table with entryCount-entries. Reuse freed table if possible.
198 */
199int TrieMap::allocateTable(const int entryCount) {
200    if (entryCount > 0 && entryCount <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL) {
201        const int tableIndex = readEmptyTableLink(entryCount);
202        if (tableIndex > 0) {
203            if (!writeEmptyTableLink(readField0(tableIndex), entryCount)) {
204                return INVALID_INDEX;
205            }
206            // Reuse the table.
207            return tableIndex;
208        }
209    }
210    // Allocate memory space at tail position of the buffer.
211    const int mapIndex = getTailEntryIndex();
212    if (!mBuffer.extend(entryCount * ENTRY_SIZE)) {
213        return INVALID_INDEX;
214    }
215    return mapIndex;
216}
217
218int TrieMap::getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
219        const Entry &bitmapEntry, const int level) const {
220    const int label = getLabel(hashedKey, level);
221    if (!exists(bitmapEntry.getBitmap(), label)) {
222        return INVALID_INDEX;
223    }
224    const int entryIndex = bitmapEntry.getTableIndex() + popCount(bitmapEntry.getBitmap(), label);
225    const Entry entry = readEntry(entryIndex);
226    if (entry.isBitmapEntry()) {
227        // Move to the next level.
228        return getTerminalEntryIndex(key, hashedKey, entry, level + 1);
229    }
230    if (entry.getKey() == key) {
231        // Terminal entry is found.
232        return entryIndex;
233    }
234    return INVALID_INDEX;
235}
236
237/**
238 * Get Result corresponding to the key.
239 *
240 * @param key the key.
241 * @param hashedKey the hashed key.
242 * @param bitmapEntryIndex the index of bitmap entry
243 * @param level current level
244 * @return Result instance corresponding to the key. mIsValid indicates whether the key is in the
245 * map.
246 */
247const TrieMap::Result TrieMap::getInternal(const uint32_t key, const uint32_t hashedKey,
248        const int bitmapEntryIndex, const int level) const {
249    const int terminalEntryIndex = getTerminalEntryIndex(key, hashedKey,
250            readEntry(bitmapEntryIndex), level);
251    if (terminalEntryIndex == INVALID_INDEX) {
252        // Not found.
253        return Result(0, false, INVALID_INDEX);
254    }
255    const Entry terminalEntry = readEntry(terminalEntryIndex);
256    if (!terminalEntry.hasTerminalLink()) {
257        return Result(terminalEntry.getValue(), true, INVALID_INDEX);
258    }
259    const int valueEntryIndex = terminalEntry.getValueEntryIndex();
260    const Entry valueEntry = readEntry(valueEntryIndex);
261    return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1);
262}
263
264/**
265 * Put key to value mapping to the map.
266 *
267 * @param key the key.
268 * @param value the value
269 * @param hashedKey the hashed key.
270 * @param bitmapEntryIndex the index of bitmap entry
271 * @param bitmapEntry the bitmap entry
272 * @param level current level
273 * @return whether the key-value has been correctly inserted to the map or not.
274 */
275bool TrieMap::putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey,
276        const int bitmapEntryIndex, const Entry &bitmapEntry, const int level) {
277    const int label = getLabel(hashedKey, level);
278    const uint32_t bitmap = bitmapEntry.getBitmap();
279    const int mapIndex = bitmapEntry.getTableIndex();
280    if (!exists(bitmap, label)) {
281        // Current map doesn't contain the label.
282        return addNewEntryByExpandingTable(key, value, mapIndex, bitmap, bitmapEntryIndex, label);
283    }
284    const int entryIndex = mapIndex + popCount(bitmap, label);
285    const Entry entry = readEntry(entryIndex);
286    if (entry.isBitmapEntry()) {
287        // Bitmap entry is found. Go to the next level.
288        return putInternal(key, value, hashedKey, entryIndex, entry, level + 1);
289    }
290    if (entry.getKey() == key) {
291        // Terminal entry for the key is found. Update the value.
292        return updateValue(entry, value, entryIndex);
293    }
294    // Conflict with the existing key.
295    return addNewEntryByResolvingConflict(key, value, hashedKey, entry, entryIndex, level);
296}
297
298/**
299 * Resolve a conflict in the current level and add new entry.
300 *
301 * @param key the key
302 * @param value the value
303 * @param hashedKey the hashed key
304 * @param conflictedEntry the existing conflicted entry
305 * @param conflictedEntryIndex the index of existing conflicted entry
306 * @param level current level
307 * @return whether the key-value has been correctly inserted to the map or not.
308 */
309bool TrieMap::addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value,
310        const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex,
311        const int level) {
312    const int conflictedKeyNextLabel =
313            getLabel(getBitShuffledKey(conflictedEntry.getKey()), level + 1);
314    const int nextLabel = getLabel(hashedKey, level + 1);
315    if (conflictedKeyNextLabel == nextLabel) {
316        // Conflicted again in the next level.
317        const int newTableIndex = allocateTable(1 /* entryCount */);
318        if (newTableIndex == INVALID_INDEX) {
319            return false;
320        }
321        if (!writeEntry(conflictedEntry, newTableIndex)) {
322            return false;
323        }
324        const Entry newBitmapEntry(setExist(0 /* bitmap */, nextLabel), newTableIndex);
325        if (!writeEntry(newBitmapEntry, conflictedEntryIndex)) {
326            return false;
327        }
328        return putInternal(key, value, hashedKey, conflictedEntryIndex, newBitmapEntry, level + 1);
329    }
330    // The conflict has been resolved. Create a table that contains 2 entries.
331    const int newTableIndex = allocateTable(2 /* entryCount */);
332    if (newTableIndex == INVALID_INDEX) {
333        return false;
334    }
335    if (nextLabel < conflictedKeyNextLabel) {
336        if (!writeTerminalEntry(key, value, newTableIndex)) {
337            return false;
338        }
339        if (!writeEntry(conflictedEntry, newTableIndex + 1)) {
340            return false;
341        }
342    } else { // nextLabel > conflictedKeyNextLabel
343        if (!writeEntry(conflictedEntry, newTableIndex)) {
344            return false;
345        }
346        if (!writeTerminalEntry(key, value, newTableIndex + 1)) {
347            return false;
348        }
349    }
350    const uint32_t updatedBitmap =
351            setExist(setExist(0 /* bitmap */, nextLabel), conflictedKeyNextLabel);
352    return writeEntry(Entry(updatedBitmap, newTableIndex), conflictedEntryIndex);
353}
354
355/**
356 * Add new entry to the existing table.
357 */
358bool TrieMap::addNewEntryByExpandingTable(const uint32_t key, const uint64_t value,
359        const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, const int label) {
360    // Current map doesn't contain the label.
361    const int entryCount = popCount(bitmap);
362    const int newTableIndex = allocateTable(entryCount + 1);
363    if (newTableIndex == INVALID_INDEX) {
364        return false;
365    }
366    const int newEntryIndexInTable = popCount(bitmap, label);
367    // Copy from existing table to the new table.
368    for (int i = 0; i < entryCount; ++i) {
369        if (!copyEntry(tableIndex + i, newTableIndex + i + (i >= newEntryIndexInTable ? 1 : 0))) {
370            return false;
371        }
372    }
373    // Write new terminal entry.
374    if (!writeTerminalEntry(key, value, newTableIndex + newEntryIndexInTable)) {
375        return false;
376    }
377    // Update bitmap.
378    if (!writeEntry(Entry(setExist(bitmap, label), newTableIndex), bitmapEntryIndex)) {
379        return false;
380    }
381    if (entryCount > 0) {
382        return freeTable(tableIndex, entryCount);
383    }
384    return true;
385}
386
387}  // namespace latinime
388