trie_map.h revision de5c3a2562bbddc0f3d95619a1b3b1318b9598fd
1/*
2 * Copyright (C) 2014, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_TRIE_MAP_H
18#define LATINIME_TRIE_MAP_H
19
20#include <climits>
21#include <cstdint>
22#include <cstdio>
23#include <vector>
24
25#include "defines.h"
26#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
27
28namespace latinime {
29
30/**
31 * Trie map derived from Phil Bagwell's Hash Array Mapped Trie.
32 * key is int and value is uint64_t.
33 * This supports multiple level map. Terminal entries can have a bitmap for the next level map.
34 * This doesn't support root map resizing.
35 */
36class TrieMap {
37 public:
38    struct Result {
39        const uint64_t mValue;
40        const bool mIsValid;
41        const int mNextLevelBitmapEntryIndex;
42
43        Result(const uint64_t value, const bool isValid, const int nextLevelBitmapEntryIndex)
44                : mValue(value), mIsValid(isValid),
45                  mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {}
46    };
47
48    /**
49     * Struct to record iteration state in a table.
50     */
51    struct TableIterationState {
52        int mTableSize;
53        int mTableIndex;
54        int mCurrentIndex;
55
56        TableIterationState(const int tableSize, const int tableIndex)
57                : mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex(0) {}
58    };
59
60    class TrieMapRange;
61    class TrieMapIterator {
62     public:
63        class IterationResult {
64         public:
65            IterationResult(const TrieMap *const trieMap, const int key, const uint64_t value,
66                    const int nextLeveBitmapEntryIndex)
67                    : mTrieMap(trieMap), mKey(key), mValue(value),
68                      mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {}
69
70            const TrieMapRange getEntriesInNextLevel() const {
71                return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex);
72            }
73
74            bool hasNextLevelMap() const {
75                return mNextLevelBitmapEntryIndex != INVALID_INDEX;
76            }
77
78            AK_FORCE_INLINE int key() const {
79                return mKey;
80            }
81
82            AK_FORCE_INLINE uint64_t value() const {
83                return mValue;
84            }
85
86         private:
87            const TrieMap *const mTrieMap;
88            const int mKey;
89            const uint64_t mValue;
90            const int mNextLevelBitmapEntryIndex;
91        };
92
93        TrieMapIterator(const TrieMap *const trieMap, const int bitmapEntryIndex)
94                : mTrieMap(trieMap), mStateStack(), mBaseBitmapEntryIndex(bitmapEntryIndex),
95                  mKey(0), mValue(0), mIsValid(false), mNextLevelBitmapEntryIndex(INVALID_INDEX) {
96            if (!trieMap) {
97                return;
98            }
99            const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex);
100            mStateStack.emplace_back(
101                    mTrieMap->popCount(bitmapEntry.getBitmap()), bitmapEntry.getTableIndex());
102            this->operator++();
103        }
104
105        const IterationResult operator*() const {
106            return IterationResult(mTrieMap, mKey, mValue, mNextLevelBitmapEntryIndex);
107        }
108
109        bool operator!=(const TrieMapIterator &other) const {
110            // Caveat: This works only for for loops.
111            return mIsValid || other.mIsValid;
112        }
113
114        const TrieMapIterator &operator++() {
115            const Result result = mTrieMap->iterateNext(&mStateStack, &mKey);
116            mValue = result.mValue;
117            mIsValid = result.mIsValid;
118            mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
119            return *this;
120        }
121
122     private:
123        DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator);
124        DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator);
125
126        const TrieMap *const mTrieMap;
127        std::vector<TrieMap::TableIterationState> mStateStack;
128        const int mBaseBitmapEntryIndex;
129        int mKey;
130        uint64_t mValue;
131        bool mIsValid;
132        int mNextLevelBitmapEntryIndex;
133    };
134
135    /**
136     * Class to support iterating entries in TrieMap by range base for loops.
137     */
138    class TrieMapRange {
139     public:
140        TrieMapRange(const TrieMap *const trieMap, const int bitmapEntryIndex)
141                : mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex) {};
142
143        TrieMapIterator begin() const {
144            return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex);
145        }
146
147        const TrieMapIterator end() const {
148            return TrieMapIterator(nullptr, INVALID_INDEX);
149        }
150
151     private:
152        DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange);
153        DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange);
154
155        const TrieMap *const mTrieMap;
156        const int mBaseBitmapEntryIndex;
157    };
158
159    static const int INVALID_INDEX;
160    static const uint64_t MAX_VALUE;
161
162    TrieMap();
163    // Construct TrieMap using existing data in the memory region written by save().
164    TrieMap(uint8_t *const buffer, const int bufferSize);
165    void dump(const int from = 0, const int to = 0) const;
166
167    bool isNearSizeLimit() const {
168        return mBuffer.isNearSizeLimit();
169    }
170
171    // Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
172    int getNextLevelBitmapEntryIndex(const int key) {
173        return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
174    }
175
176    int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex);
177
178    const Result getRoot(const int key) const {
179        return get(key, ROOT_BITMAP_ENTRY_INDEX);
180    }
181
182    const Result get(const int key, const int bitmapEntryIndex) const;
183
184    bool putRoot(const int key, const uint64_t value) {
185        return put(key, value, ROOT_BITMAP_ENTRY_INDEX);
186    }
187
188    bool put(const int key, const uint64_t value, const int bitmapEntryIndex);
189
190    const TrieMapRange getEntriesInRootLevel() const {
191        return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX);
192    }
193
194    const TrieMapRange getEntriesInSpecifiedLevel(const int bitmapEntryIndex) const {
195        return TrieMapRange(this, bitmapEntryIndex);
196    }
197
198    bool save(FILE *const file) const;
199
200 private:
201    DISALLOW_COPY_AND_ASSIGN(TrieMap);
202
203    /**
204     * Struct represents an entry.
205     *
206     * Entry is one of these entry types. All entries are fixed size and have 2 fields FIELD_0 and
207     * FIELD_1.
208     * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table.
209     *   FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE)
210     * 2. terminal entry. terminal entry contains hashed key and value or terminal link. terminal
211     * entry have terminal link when the value is not fit to FIELD_1 or there is a next level map
212     * for the key.
213     *   FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK))
214     * 3. value entry. value entry represents a value. Upper order bytes are stored in FIELD_0 and
215     * lower order bytes are stored in FIELD_1.
216     *   FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes))
217     */
218    struct Entry {
219        const uint32_t mData0;
220        const uint32_t mData1;
221
222        Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData1(data1) {}
223
224        AK_FORCE_INLINE bool isBitmapEntry() const {
225            return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0;
226        }
227
228        AK_FORCE_INLINE bool hasTerminalLink() const {
229            return (mData1 & TERMINAL_LINK_FLAG) != 0;
230        }
231
232        // For terminal entry.
233        AK_FORCE_INLINE uint32_t getKey() const {
234            return mData0;
235        }
236
237        // For terminal entry.
238        AK_FORCE_INLINE uint32_t getValue() const {
239            return mData1 & VALUE_MASK;
240        }
241
242        // For terminal entry.
243        AK_FORCE_INLINE uint32_t getValueEntryIndex() const {
244            return mData1 & TERMINAL_LINK_MASK;
245        }
246
247        // For bitmap entry.
248        AK_FORCE_INLINE uint32_t getBitmap() const {
249            return mData0;
250        }
251
252        // For bitmap entry.
253        AK_FORCE_INLINE int getTableIndex() const {
254            return static_cast<int>(mData1);
255        }
256
257        // For value entry.
258        AK_FORCE_INLINE uint64_t getValueOfValueEntry() const {
259            return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ mData1);
260        }
261    };
262
263    BufferWithExtendableBuffer mBuffer;
264
265    static const int FIELD0_SIZE;
266    static const int FIELD1_SIZE;
267    static const int ENTRY_SIZE;
268    static const uint32_t VALUE_FLAG;
269    static const uint32_t VALUE_MASK;
270    static const uint32_t TERMINAL_LINK_FLAG;
271    static const uint32_t TERMINAL_LINK_MASK;
272    static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL;
273    static const uint32_t LABEL_MASK;
274    static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL;
275    static const int ROOT_BITMAP_ENTRY_INDEX;
276    static const int ROOT_BITMAP_ENTRY_POS;
277    static const Entry EMPTY_BITMAP_ENTRY;
278    static const int MAX_BUFFER_SIZE;
279
280    uint32_t getBitShuffledKey(const uint32_t key) const;
281    bool writeValue(const uint64_t value, const int terminalEntryIndex);
282    bool updateValue(const Entry &terminalEntry, const uint64_t value,
283            const int terminalEntryIndex);
284    bool freeTable(const int tableIndex, const int entryCount);
285    int allocateTable(const int entryCount);
286    int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
287            const Entry &bitmapEntry, const int level) const;
288    const Result getInternal(const uint32_t key, const uint32_t hashedKey,
289            const int bitmapEntryIndex, const int level) const;
290    bool putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey,
291            const int bitmapEntryIndex, const Entry &bitmapEntry, const int level);
292    bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value,
293            const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex,
294            const int level);
295    bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value,
296            const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex,
297            const int label);
298    const Result iterateNext(std::vector<TableIterationState> *const iterationState,
299            int *const outKey) const;
300
301    AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const {
302        return Entry(readField0(entryIndex), readField1(entryIndex));
303    }
304
305    // Returns whether an entry for the index is existing by testing if the index-th bit in the
306    // bitmap is set or not.
307    AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const {
308        return (bitmap & (1 << index)) != 0;
309    }
310
311    // Set index-th bit in the bitmap.
312    AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) const {
313        return bitmap | (1 << index);
314    }
315
316    // Count set bits before index in the bitmap.
317    AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const {
318        return popCount(bitmap & ((1 << index) - 1));
319    }
320
321    // Count set bits in the bitmap.
322    AK_FORCE_INLINE int popCount(const uint32_t bitmap) const {
323        return __builtin_popcount(bitmap);
324        // int v = bitmap - ((bitmap >> 1) & 0x55555555);
325        // v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
326        // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
327    }
328
329    AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) const {
330        return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK;
331    }
332
333    AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const {
334        return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
335    }
336
337    AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const {
338        return mBuffer.readUint(FIELD1_SIZE,
339                ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
340    }
341
342    AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const {
343        return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
344    }
345
346    AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int entryCount) {
347        return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
348    }
349
350    AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) {
351        return mBuffer.writeUint(data, FIELD0_SIZE,
352                ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
353    }
354
355    AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) {
356        return mBuffer.writeUint(data, FIELD1_SIZE,
357                ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
358    }
359
360    AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) {
361        return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1, entryIndex);
362    }
363
364    AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t value,
365            const int entryIndex) {
366        return writeField0(key, entryIndex) && writeValue(value, entryIndex);
367    }
368
369    AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEntryIndex) {
370        return writeEntry(readEntry(originalEntryIndex), newEntryIndex);
371    }
372
373    AK_FORCE_INLINE int getTailEntryIndex() const {
374        return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE;
375    }
376};
377
378} // namespace latinime
379#endif /* LATINIME_TRIE_MAP_H */
380