trie_map.cpp revision de5c3a2562bbddc0f3d95619a1b3b1318b9598fd
1/*
2 * Copyright (C) 2014, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "suggest/policyimpl/dictionary/utils/trie_map.h"
18
19#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
20
21namespace latinime {
22
23const int TrieMap::INVALID_INDEX = -1;
24const int TrieMap::FIELD0_SIZE = 4;
25const int TrieMap::FIELD1_SIZE = 3;
26const int TrieMap::ENTRY_SIZE = FIELD0_SIZE + FIELD1_SIZE;
27const uint32_t TrieMap::VALUE_FLAG = 0x400000;
28const uint32_t TrieMap::VALUE_MASK = 0x3FFFFF;
29const uint32_t TrieMap::TERMINAL_LINK_FLAG = 0x800000;
30const uint32_t TrieMap::TERMINAL_LINK_MASK = 0x7FFFFF;
31const int TrieMap::NUM_OF_BITS_USED_FOR_ONE_LEVEL = 5;
32const uint32_t TrieMap::LABEL_MASK = 0x1F;
33const int TrieMap::MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL = 1 << NUM_OF_BITS_USED_FOR_ONE_LEVEL;
34const int TrieMap::ROOT_BITMAP_ENTRY_INDEX = 0;
35const int TrieMap::ROOT_BITMAP_ENTRY_POS = MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL * FIELD0_SIZE;
36const TrieMap::Entry TrieMap::EMPTY_BITMAP_ENTRY = TrieMap::Entry(0, 0);
37const uint64_t TrieMap::MAX_VALUE =
38        (static_cast<uint64_t>(1) << ((FIELD0_SIZE + FIELD1_SIZE) * CHAR_BIT)) - 1;
39const int TrieMap::MAX_BUFFER_SIZE = TERMINAL_LINK_MASK * ENTRY_SIZE;
40
41TrieMap::TrieMap() : mBuffer(MAX_BUFFER_SIZE) {
42    mBuffer.extend(ROOT_BITMAP_ENTRY_POS);
43    writeEntry(EMPTY_BITMAP_ENTRY, ROOT_BITMAP_ENTRY_INDEX);
44}
45
46TrieMap::TrieMap(uint8_t *const buffer, const int bufferSize)
47        : mBuffer(buffer, bufferSize,
48                BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
49
50void TrieMap::dump(const int from, const int to) const {
51    AKLOGI("BufSize: %d", mBuffer.getTailPosition());
52    for (int i = from; i < to; ++i) {
53        AKLOGI("Entry[%d]: %x, %x", i, readField0(i), readField1(i));
54    }
55    int unusedRegionSize = 0;
56    for (int i = 1; i <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; ++i) {
57        int index = readEmptyTableLink(i);
58        while (index != ROOT_BITMAP_ENTRY_INDEX) {
59            index = readField0(index);
60            unusedRegionSize += i;
61        }
62    }
63    AKLOGI("Unused Size: %d", unusedRegionSize);
64}
65
66int TrieMap::getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex) {
67    const Entry bitmapEntry = readEntry(bitmapEntryIndex);
68    const uint32_t unsignedKey = static_cast<uint32_t>(key);
69    const int terminalEntryIndex = getTerminalEntryIndex(
70            unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntry, 0 /* level */);
71    if (terminalEntryIndex == INVALID_INDEX) {
72        // Not found.
73        return INVALID_INDEX;
74    }
75    const Entry terminalEntry = readEntry(terminalEntryIndex);
76    if (terminalEntry.hasTerminalLink()) {
77        return terminalEntry.getValueEntryIndex() + 1;
78    }
79    // Create a value entry and a bitmap entry.
80    const int valueEntryIndex = allocateTable(2 /* entryCount */);
81    if (!writeEntry(Entry(0, terminalEntry.getValue()), valueEntryIndex)) {
82        return INVALID_INDEX;
83    }
84    if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
85        return INVALID_INDEX;
86    }
87    if (!writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, valueEntryIndex)) {
88        return INVALID_INDEX;
89    }
90    return valueEntryIndex + 1;
91}
92
93const TrieMap::Result TrieMap::get(const int key, const int bitmapEntryIndex) const {
94    const uint32_t unsignedKey = static_cast<uint32_t>(key);
95    return getInternal(unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntryIndex,
96            0 /* level */);
97}
98
99bool TrieMap::put(const int key, const uint64_t value, const int bitmapEntryIndex) {
100    if (value > MAX_VALUE) {
101        return false;
102    }
103    const uint32_t unsignedKey = static_cast<uint32_t>(key);
104    return putInternal(unsignedKey, value, getBitShuffledKey(unsignedKey), bitmapEntryIndex,
105            readEntry(bitmapEntryIndex), 0 /* level */);
106}
107
108bool TrieMap::save(FILE *const file) const {
109    return DictFileWritingUtils::writeBufferToFileTail(file, &mBuffer);
110}
111
112/**
113 * Iterate next entry in a certain level.
114 *
115 * @param iterationState the iteration state that will be read and updated in this method.
116 * @param outKey the output key
117 * @return Result instance. mIsValid is false when all entries are iterated.
118 */
119const TrieMap::Result TrieMap::iterateNext(std::vector<TableIterationState> *const iterationState,
120        int *const outKey) const {
121    while (!iterationState->empty()) {
122        TableIterationState &state = iterationState->back();
123        if (state.mTableSize <= state.mCurrentIndex) {
124            // Move to parent.
125            iterationState->pop_back();
126        } else {
127            const int entryIndex = state.mTableIndex + state.mCurrentIndex;
128            state.mCurrentIndex += 1;
129            const Entry entry = readEntry(entryIndex);
130            if (entry.isBitmapEntry()) {
131                // Move to child.
132                iterationState->emplace_back(popCount(entry.getBitmap()), entry.getTableIndex());
133            } else {
134                if (outKey) {
135                    *outKey = entry.getKey();
136                }
137                if (!entry.hasTerminalLink()) {
138                    return Result(entry.getValue(), true, INVALID_INDEX);
139                }
140                const int valueEntryIndex = entry.getValueEntryIndex();
141                const Entry valueEntry = readEntry(valueEntryIndex);
142                return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1);
143            }
144        }
145    }
146    // Visited all entries.
147    return Result(0, false, INVALID_INDEX);
148}
149
150/**
151 * Shuffle bits of the key in the fixed order.
152 *
153 * This method is used as a hash function. This returns different values for different inputs.
154 */
155uint32_t TrieMap::getBitShuffledKey(const uint32_t key) const {
156    uint32_t shuffledKey = 0;
157    for (int i = 0; i < 4; ++i) {
158        const uint32_t keyPiece = (key >> (i * 8)) & 0xFF;
159        shuffledKey ^= ((keyPiece ^ (keyPiece << 7) ^ (keyPiece << 14) ^ (keyPiece << 21))
160                & 0x11111111) << i;
161    }
162    return shuffledKey;
163}
164
165bool TrieMap::writeValue(const uint64_t value, const int terminalEntryIndex) {
166    if (value <= VALUE_MASK) {
167        // Write value into the terminal entry.
168        return writeField1(value | VALUE_FLAG, terminalEntryIndex);
169    }
170    // Create value entry and write value.
171    const int valueEntryIndex = allocateTable(2 /* entryCount */);
172    if (!writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex)) {
173        return false;
174    }
175    if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
176        return false;
177    }
178    return writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, terminalEntryIndex);
179}
180
181bool TrieMap::updateValue(const Entry &terminalEntry, const uint64_t value,
182        const int terminalEntryIndex) {
183    if (!terminalEntry.hasTerminalLink()) {
184        return writeValue(value, terminalEntryIndex);
185    }
186    const int valueEntryIndex = terminalEntry.getValueEntryIndex();
187    return writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex);
188}
189
190bool TrieMap::freeTable(const int tableIndex, const int entryCount) {
191    if (!writeField0(readEmptyTableLink(entryCount), tableIndex)) {
192        return false;
193    }
194    return writeEmptyTableLink(tableIndex, entryCount);
195}
196
197/**
198 * Allocate table with entryCount-entries. Reuse freed table if possible.
199 */
200int TrieMap::allocateTable(const int entryCount) {
201    if (entryCount > 0 && entryCount <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL) {
202        const int tableIndex = readEmptyTableLink(entryCount);
203        if (tableIndex > 0) {
204            if (!writeEmptyTableLink(readField0(tableIndex), entryCount)) {
205                return INVALID_INDEX;
206            }
207            // Reuse the table.
208            return tableIndex;
209        }
210    }
211    // Allocate memory space at tail position of the buffer.
212    const int mapIndex = getTailEntryIndex();
213    if (!mBuffer.extend(entryCount * ENTRY_SIZE)) {
214        return INVALID_INDEX;
215    }
216    return mapIndex;
217}
218
219int TrieMap::getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
220        const Entry &bitmapEntry, const int level) const {
221    const int label = getLabel(hashedKey, level);
222    if (!exists(bitmapEntry.getBitmap(), label)) {
223        return INVALID_INDEX;
224    }
225    const int entryIndex = bitmapEntry.getTableIndex() + popCount(bitmapEntry.getBitmap(), label);
226    const Entry entry = readEntry(entryIndex);
227    if (entry.isBitmapEntry()) {
228        // Move to the next level.
229        return getTerminalEntryIndex(key, hashedKey, entry, level + 1);
230    }
231    if (entry.getKey() == key) {
232        // Terminal entry is found.
233        return entryIndex;
234    }
235    return INVALID_INDEX;
236}
237
238/**
239 * Get Result corresponding to the key.
240 *
241 * @param key the key.
242 * @param hashedKey the hashed key.
243 * @param bitmapEntryIndex the index of bitmap entry
244 * @param level current level
245 * @return Result instance corresponding to the key. mIsValid indicates whether the key is in the
246 * map.
247 */
248const TrieMap::Result TrieMap::getInternal(const uint32_t key, const uint32_t hashedKey,
249        const int bitmapEntryIndex, const int level) const {
250    const int terminalEntryIndex = getTerminalEntryIndex(key, hashedKey,
251            readEntry(bitmapEntryIndex), level);
252    if (terminalEntryIndex == INVALID_INDEX) {
253        // Not found.
254        return Result(0, false, INVALID_INDEX);
255    }
256    const Entry terminalEntry = readEntry(terminalEntryIndex);
257    if (!terminalEntry.hasTerminalLink()) {
258        return Result(terminalEntry.getValue(), true, INVALID_INDEX);
259    }
260    const int valueEntryIndex = terminalEntry.getValueEntryIndex();
261    const Entry valueEntry = readEntry(valueEntryIndex);
262    return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1);
263}
264
265/**
266 * Put key to value mapping to the map.
267 *
268 * @param key the key.
269 * @param value the value
270 * @param hashedKey the hashed key.
271 * @param bitmapEntryIndex the index of bitmap entry
272 * @param bitmapEntry the bitmap entry
273 * @param level current level
274 * @return whether the key-value has been correctly inserted to the map or not.
275 */
276bool TrieMap::putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey,
277        const int bitmapEntryIndex, const Entry &bitmapEntry, const int level) {
278    const int label = getLabel(hashedKey, level);
279    const uint32_t bitmap = bitmapEntry.getBitmap();
280    const int mapIndex = bitmapEntry.getTableIndex();
281    if (!exists(bitmap, label)) {
282        // Current map doesn't contain the label.
283        return addNewEntryByExpandingTable(key, value, mapIndex, bitmap, bitmapEntryIndex, label);
284    }
285    const int entryIndex = mapIndex + popCount(bitmap, label);
286    const Entry entry = readEntry(entryIndex);
287    if (entry.isBitmapEntry()) {
288        // Bitmap entry is found. Go to the next level.
289        return putInternal(key, value, hashedKey, entryIndex, entry, level + 1);
290    }
291    if (entry.getKey() == key) {
292        // Terminal entry for the key is found. Update the value.
293        return updateValue(entry, value, entryIndex);
294    }
295    // Conflict with the existing key.
296    return addNewEntryByResolvingConflict(key, value, hashedKey, entry, entryIndex, level);
297}
298
299/**
300 * Resolve a conflict in the current level and add new entry.
301 *
302 * @param key the key
303 * @param value the value
304 * @param hashedKey the hashed key
305 * @param conflictedEntry the existing conflicted entry
306 * @param conflictedEntryIndex the index of existing conflicted entry
307 * @param level current level
308 * @return whether the key-value has been correctly inserted to the map or not.
309 */
310bool TrieMap::addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value,
311        const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex,
312        const int level) {
313    const int conflictedKeyNextLabel =
314            getLabel(getBitShuffledKey(conflictedEntry.getKey()), level + 1);
315    const int nextLabel = getLabel(hashedKey, level + 1);
316    if (conflictedKeyNextLabel == nextLabel) {
317        // Conflicted again in the next level.
318        const int newTableIndex = allocateTable(1 /* entryCount */);
319        if (newTableIndex == INVALID_INDEX) {
320            return false;
321        }
322        if (!writeEntry(conflictedEntry, newTableIndex)) {
323            return false;
324        }
325        const Entry newBitmapEntry(setExist(0 /* bitmap */, nextLabel), newTableIndex);
326        if (!writeEntry(newBitmapEntry, conflictedEntryIndex)) {
327            return false;
328        }
329        return putInternal(key, value, hashedKey, conflictedEntryIndex, newBitmapEntry, level + 1);
330    }
331    // The conflict has been resolved. Create a table that contains 2 entries.
332    const int newTableIndex = allocateTable(2 /* entryCount */);
333    if (newTableIndex == INVALID_INDEX) {
334        return false;
335    }
336    if (nextLabel < conflictedKeyNextLabel) {
337        if (!writeTerminalEntry(key, value, newTableIndex)) {
338            return false;
339        }
340        if (!writeEntry(conflictedEntry, newTableIndex + 1)) {
341            return false;
342        }
343    } else { // nextLabel > conflictedKeyNextLabel
344        if (!writeEntry(conflictedEntry, newTableIndex)) {
345            return false;
346        }
347        if (!writeTerminalEntry(key, value, newTableIndex + 1)) {
348            return false;
349        }
350    }
351    const uint32_t updatedBitmap =
352            setExist(setExist(0 /* bitmap */, nextLabel), conflictedKeyNextLabel);
353    return writeEntry(Entry(updatedBitmap, newTableIndex), conflictedEntryIndex);
354}
355
356/**
357 * Add new entry to the existing table.
358 */
359bool TrieMap::addNewEntryByExpandingTable(const uint32_t key, const uint64_t value,
360        const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, const int label) {
361    // Current map doesn't contain the label.
362    const int entryCount = popCount(bitmap);
363    const int newTableIndex = allocateTable(entryCount + 1);
364    if (newTableIndex == INVALID_INDEX) {
365        return false;
366    }
367    const int newEntryIndexInTable = popCount(bitmap, label);
368    // Copy from existing table to the new table.
369    for (int i = 0; i < entryCount; ++i) {
370        if (!copyEntry(tableIndex + i, newTableIndex + i + (i >= newEntryIndexInTable ? 1 : 0))) {
371            return false;
372        }
373    }
374    // Write new terminal entry.
375    if (!writeTerminalEntry(key, value, newTableIndex + newEntryIndexInTable)) {
376        return false;
377    }
378    // Update bitmap.
379    if (!writeEntry(Entry(setExist(bitmap, label), newTableIndex), bitmapEntryIndex)) {
380        return false;
381    }
382    if (entryCount > 0) {
383        return freeTable(tableIndex, entryCount);
384    }
385    return true;
386}
387
388}  // namespace latinime
389