trie_map.h revision c0c674cdc0721a374e140ad5ee1409c0498b3262
1/*
2 * Copyright (C) 2014, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_TRIE_MAP_H
18#define LATINIME_TRIE_MAP_H
19
20#include <climits>
21#include <cstdint>
22#include <cstdio>
23#include <vector>
24
25#include "defines.h"
26#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
27#include "utils/byte_array_view.h"
28
29namespace latinime {
30
31/**
32 * Trie map derived from Phil Bagwell's Hash Array Mapped Trie.
33 * key is int and value is uint64_t.
34 * This supports multiple level map. Terminal entries can have a bitmap for the next level map.
35 * This doesn't support root map resizing.
36 */
37class TrieMap {
38 public:
39    struct Result {
40        const uint64_t mValue;
41        const bool mIsValid;
42        const int mNextLevelBitmapEntryIndex;
43
44        Result(const uint64_t value, const bool isValid, const int nextLevelBitmapEntryIndex)
45                : mValue(value), mIsValid(isValid),
46                  mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {}
47    };
48
49    /**
50     * Struct to record iteration state in a table.
51     */
52    struct TableIterationState {
53        int mTableSize;
54        int mTableIndex;
55        int mCurrentIndex;
56
57        TableIterationState(const int tableSize, const int tableIndex)
58                : mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex(0) {}
59    };
60
61    class TrieMapRange;
62    class TrieMapIterator {
63     public:
64        class IterationResult {
65         public:
66            IterationResult(const TrieMap *const trieMap, const int key, const uint64_t value,
67                    const int nextLeveBitmapEntryIndex)
68                    : mTrieMap(trieMap), mKey(key), mValue(value),
69                      mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {}
70
71            const TrieMapRange getEntriesInNextLevel() const {
72                return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex);
73            }
74
75            bool hasNextLevelMap() const {
76                return mNextLevelBitmapEntryIndex != INVALID_INDEX;
77            }
78
79            AK_FORCE_INLINE int key() const {
80                return mKey;
81            }
82
83            AK_FORCE_INLINE uint64_t value() const {
84                return mValue;
85            }
86
87         private:
88            const TrieMap *const mTrieMap;
89            const int mKey;
90            const uint64_t mValue;
91            const int mNextLevelBitmapEntryIndex;
92        };
93
94        TrieMapIterator(const TrieMap *const trieMap, const int bitmapEntryIndex)
95                : mTrieMap(trieMap), mStateStack(), mBaseBitmapEntryIndex(bitmapEntryIndex),
96                  mKey(0), mValue(0), mIsValid(false), mNextLevelBitmapEntryIndex(INVALID_INDEX) {
97            if (!trieMap) {
98                return;
99            }
100            const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex);
101            mStateStack.emplace_back(
102                    mTrieMap->popCount(bitmapEntry.getBitmap()), bitmapEntry.getTableIndex());
103            this->operator++();
104        }
105
106        const IterationResult operator*() const {
107            return IterationResult(mTrieMap, mKey, mValue, mNextLevelBitmapEntryIndex);
108        }
109
110        bool operator!=(const TrieMapIterator &other) const {
111            // Caveat: This works only for for loops.
112            return mIsValid || other.mIsValid;
113        }
114
115        const TrieMapIterator &operator++() {
116            const Result result = mTrieMap->iterateNext(&mStateStack, &mKey);
117            mValue = result.mValue;
118            mIsValid = result.mIsValid;
119            mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
120            return *this;
121        }
122
123     private:
124        DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator);
125        DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator);
126
127        const TrieMap *const mTrieMap;
128        std::vector<TrieMap::TableIterationState> mStateStack;
129        const int mBaseBitmapEntryIndex;
130        int mKey;
131        uint64_t mValue;
132        bool mIsValid;
133        int mNextLevelBitmapEntryIndex;
134    };
135
136    /**
137     * Class to support iterating entries in TrieMap by range base for loops.
138     */
139    class TrieMapRange {
140     public:
141        TrieMapRange(const TrieMap *const trieMap, const int bitmapEntryIndex)
142                : mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex) {};
143
144        TrieMapIterator begin() const {
145            return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex);
146        }
147
148        const TrieMapIterator end() const {
149            return TrieMapIterator(nullptr, INVALID_INDEX);
150        }
151
152     private:
153        DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange);
154        DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange);
155
156        const TrieMap *const mTrieMap;
157        const int mBaseBitmapEntryIndex;
158    };
159
160    static const int INVALID_INDEX;
161    static const uint64_t MAX_VALUE;
162
163    TrieMap();
164    // Construct TrieMap using existing data in the memory region written by save().
165    TrieMap(const ReadWriteByteArrayView buffer);
166    void dump(const int from = 0, const int to = 0) const;
167
168    bool isNearSizeLimit() const {
169        return mBuffer.isNearSizeLimit();
170    }
171
172    // Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
173    int getNextLevelBitmapEntryIndex(const int key) {
174        return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
175    }
176
177    int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex);
178
179    const Result getRoot(const int key) const {
180        return get(key, ROOT_BITMAP_ENTRY_INDEX);
181    }
182
183    const Result get(const int key, const int bitmapEntryIndex) const;
184
185    bool putRoot(const int key, const uint64_t value) {
186        return put(key, value, ROOT_BITMAP_ENTRY_INDEX);
187    }
188
189    bool put(const int key, const uint64_t value, const int bitmapEntryIndex);
190
191    const TrieMapRange getEntriesInRootLevel() const {
192        return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX);
193    }
194
195    const TrieMapRange getEntriesInSpecifiedLevel(const int bitmapEntryIndex) const {
196        return TrieMapRange(this, bitmapEntryIndex);
197    }
198
199    bool save(FILE *const file) const;
200
201 private:
202    DISALLOW_COPY_AND_ASSIGN(TrieMap);
203
204    /**
205     * Struct represents an entry.
206     *
207     * Entry is one of these entry types. All entries are fixed size and have 2 fields FIELD_0 and
208     * FIELD_1.
209     * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table.
210     *   FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE)
211     * 2. terminal entry. terminal entry contains hashed key and value or terminal link. terminal
212     * entry have terminal link when the value is not fit to FIELD_1 or there is a next level map
213     * for the key.
214     *   FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK))
215     * 3. value entry. value entry represents a value. Upper order bytes are stored in FIELD_0 and
216     * lower order bytes are stored in FIELD_1.
217     *   FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes))
218     */
219    struct Entry {
220        const uint32_t mData0;
221        const uint32_t mData1;
222
223        Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData1(data1) {}
224
225        AK_FORCE_INLINE bool isBitmapEntry() const {
226            return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0;
227        }
228
229        AK_FORCE_INLINE bool hasTerminalLink() const {
230            return (mData1 & TERMINAL_LINK_FLAG) != 0;
231        }
232
233        // For terminal entry.
234        AK_FORCE_INLINE uint32_t getKey() const {
235            return mData0;
236        }
237
238        // For terminal entry.
239        AK_FORCE_INLINE uint32_t getValue() const {
240            return mData1 & VALUE_MASK;
241        }
242
243        // For terminal entry.
244        AK_FORCE_INLINE uint32_t getValueEntryIndex() const {
245            return mData1 & TERMINAL_LINK_MASK;
246        }
247
248        // For bitmap entry.
249        AK_FORCE_INLINE uint32_t getBitmap() const {
250            return mData0;
251        }
252
253        // For bitmap entry.
254        AK_FORCE_INLINE int getTableIndex() const {
255            return static_cast<int>(mData1);
256        }
257
258        // For value entry.
259        AK_FORCE_INLINE uint64_t getValueOfValueEntry() const {
260            return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ mData1);
261        }
262    };
263
264    BufferWithExtendableBuffer mBuffer;
265
266    static const int FIELD0_SIZE;
267    static const int FIELD1_SIZE;
268    static const int ENTRY_SIZE;
269    static const uint32_t VALUE_FLAG;
270    static const uint32_t VALUE_MASK;
271    static const uint32_t TERMINAL_LINK_FLAG;
272    static const uint32_t TERMINAL_LINK_MASK;
273    static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL;
274    static const uint32_t LABEL_MASK;
275    static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL;
276    static const int ROOT_BITMAP_ENTRY_INDEX;
277    static const int ROOT_BITMAP_ENTRY_POS;
278    static const Entry EMPTY_BITMAP_ENTRY;
279    static const int MAX_BUFFER_SIZE;
280
281    uint32_t getBitShuffledKey(const uint32_t key) const;
282    bool writeValue(const uint64_t value, const int terminalEntryIndex);
283    bool updateValue(const Entry &terminalEntry, const uint64_t value,
284            const int terminalEntryIndex);
285    bool freeTable(const int tableIndex, const int entryCount);
286    int allocateTable(const int entryCount);
287    int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
288            const Entry &bitmapEntry, const int level) const;
289    const Result getInternal(const uint32_t key, const uint32_t hashedKey,
290            const int bitmapEntryIndex, const int level) const;
291    bool putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey,
292            const int bitmapEntryIndex, const Entry &bitmapEntry, const int level);
293    bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value,
294            const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex,
295            const int level);
296    bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value,
297            const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex,
298            const int label);
299    const Result iterateNext(std::vector<TableIterationState> *const iterationState,
300            int *const outKey) const;
301
302    AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const {
303        return Entry(readField0(entryIndex), readField1(entryIndex));
304    }
305
306    // Returns whether an entry for the index is existing by testing if the index-th bit in the
307    // bitmap is set or not.
308    AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const {
309        return (bitmap & (1 << index)) != 0;
310    }
311
312    // Set index-th bit in the bitmap.
313    AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) const {
314        return bitmap | (1 << index);
315    }
316
317    // Count set bits before index in the bitmap.
318    AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const {
319        return popCount(bitmap & ((1 << index) - 1));
320    }
321
322    // Count set bits in the bitmap.
323    AK_FORCE_INLINE int popCount(const uint32_t bitmap) const {
324        return __builtin_popcount(bitmap);
325        // int v = bitmap - ((bitmap >> 1) & 0x55555555);
326        // v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
327        // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
328    }
329
330    AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) const {
331        return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK;
332    }
333
334    AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const {
335        return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
336    }
337
338    AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const {
339        return mBuffer.readUint(FIELD1_SIZE,
340                ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
341    }
342
343    AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const {
344        return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
345    }
346
347    AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int entryCount) {
348        return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
349    }
350
351    AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) {
352        return mBuffer.writeUint(data, FIELD0_SIZE,
353                ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
354    }
355
356    AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) {
357        return mBuffer.writeUint(data, FIELD1_SIZE,
358                ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
359    }
360
361    AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) {
362        return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1, entryIndex);
363    }
364
365    AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t value,
366            const int entryIndex) {
367        return writeField0(key, entryIndex) && writeValue(value, entryIndex);
368    }
369
370    AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEntryIndex) {
371        return writeEntry(readEntry(originalEntryIndex), newEntryIndex);
372    }
373
374    AK_FORCE_INLINE int getTailEntryIndex() const {
375        return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE;
376    }
377};
378
379} // namespace latinime
380#endif /* LATINIME_TRIE_MAP_H */
381