trie_map.h revision 60ae3e0be5374478183362005f7c48809924ef01
1/* 2 * Copyright (C) 2014, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef LATINIME_TRIE_MAP_H 18#define LATINIME_TRIE_MAP_H 19 20#include <climits> 21#include <cstdint> 22#include <cstdio> 23#include <vector> 24 25#include "defines.h" 26#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" 27 28namespace latinime { 29 30/** 31 * Trie map derived from Phil Bagwell's Hash Array Mapped Trie. 32 * key is int and value is uint64_t. 33 * This supports multiple level map. Terminal entries can have a bitmap for the next level map. 34 * This doesn't support root map resizing. 35 */ 36class TrieMap { 37 public: 38 struct Result { 39 const uint64_t mValue; 40 const bool mIsValid; 41 const int mNextLevelBitmapEntryIndex; 42 43 Result(const uint64_t value, const bool isValid, const int nextLevelBitmapEntryIndex) 44 : mValue(value), mIsValid(isValid), 45 mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {} 46 }; 47 48 /** 49 * Struct to record iteration state in a table. 50 */ 51 struct TableIterationState { 52 int mTableSize; 53 int mTableIndex; 54 int mCurrentIndex; 55 56 TableIterationState(const int tableSize, const int tableIndex) 57 : mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex(0) {} 58 }; 59 60 class TrieMapRange; 61 class TrieMapIterator { 62 public: 63 class IterationResult { 64 public: 65 IterationResult(const TrieMap *const trieMap, const int key, const uint64_t value, 66 const int nextLeveBitmapEntryIndex) 67 : mTrieMap(trieMap), mKey(key), mValue(value), 68 mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {} 69 70 const TrieMapRange getEntriesInNextLevel() const { 71 return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex); 72 } 73 74 bool hasNextLevelMap() const { 75 return mNextLevelBitmapEntryIndex != INVALID_INDEX; 76 } 77 78 AK_FORCE_INLINE int key() const { 79 return mKey; 80 } 81 82 AK_FORCE_INLINE uint64_t value() const { 83 return mValue; 84 } 85 86 private: 87 const TrieMap *const mTrieMap; 88 const int mKey; 89 const uint64_t mValue; 90 const int mNextLevelBitmapEntryIndex; 91 }; 92 93 TrieMapIterator(const TrieMap *const trieMap, const int bitmapEntryIndex) 94 : mTrieMap(trieMap), mStateStack(), mBaseBitmapEntryIndex(bitmapEntryIndex), 95 mKey(0), mValue(0), mIsValid(false), mNextLevelBitmapEntryIndex(INVALID_INDEX) { 96 if (!trieMap) { 97 return; 98 } 99 const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex); 100 mStateStack.emplace_back( 101 mTrieMap->popCount(bitmapEntry.getBitmap()), bitmapEntry.getTableIndex()); 102 this->operator++(); 103 } 104 105 const IterationResult operator*() const { 106 return IterationResult(mTrieMap, mKey, mValue, mNextLevelBitmapEntryIndex); 107 } 108 109 bool operator!=(const TrieMapIterator &other) const { 110 // Caveat: This works only for for loops. 111 return mIsValid || other.mIsValid; 112 } 113 114 const TrieMapIterator &operator++() { 115 const Result result = mTrieMap->iterateNext(&mStateStack, &mKey); 116 mValue = result.mValue; 117 mIsValid = result.mIsValid; 118 mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex; 119 return *this; 120 } 121 122 private: 123 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator); 124 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator); 125 126 const TrieMap *const mTrieMap; 127 std::vector<TrieMap::TableIterationState> mStateStack; 128 const int mBaseBitmapEntryIndex; 129 int mKey; 130 uint64_t mValue; 131 bool mIsValid; 132 int mNextLevelBitmapEntryIndex; 133 }; 134 135 /** 136 * Class to support iterating entries in TrieMap by range base for loops. 137 */ 138 class TrieMapRange { 139 public: 140 TrieMapRange(const TrieMap *const trieMap, const int bitmapEntryIndex) 141 : mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex) {}; 142 143 TrieMapIterator begin() const { 144 return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex); 145 } 146 147 const TrieMapIterator end() const { 148 return TrieMapIterator(nullptr, INVALID_INDEX); 149 } 150 151 private: 152 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange); 153 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange); 154 155 const TrieMap *const mTrieMap; 156 const int mBaseBitmapEntryIndex; 157 }; 158 159 static const int INVALID_INDEX; 160 static const uint64_t MAX_VALUE; 161 162 TrieMap(); 163 void dump(const int from = 0, const int to = 0) const; 164 165 bool isNearSizeLimit() const { 166 return mBuffer.isNearSizeLimit(); 167 } 168 169 // Returns bitmapEntryIndex. Create the next level map if it doesn't exist. 170 int getNextLevelBitmapEntryIndex(const int key) { 171 return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX); 172 } 173 174 int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex); 175 176 const Result getRoot(const int key) const { 177 return get(key, ROOT_BITMAP_ENTRY_INDEX); 178 } 179 180 const Result get(const int key, const int bitmapEntryIndex) const; 181 182 bool putRoot(const int key, const uint64_t value) { 183 return put(key, value, ROOT_BITMAP_ENTRY_INDEX); 184 } 185 186 bool put(const int key, const uint64_t value, const int bitmapEntryIndex); 187 188 const TrieMapRange getEntriesInRootLevel() const { 189 return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX); 190 } 191 192 const TrieMapRange getEntriesInSpecifiedLevel(const int bitmapEntryIndex) const { 193 return TrieMapRange(this, bitmapEntryIndex); 194 } 195 196 bool save(FILE *const file) const; 197 198 private: 199 DISALLOW_COPY_AND_ASSIGN(TrieMap); 200 201 /** 202 * Struct represents an entry. 203 * 204 * Entry is one of these entry types. All entries are fixed size and have 2 fields FIELD_0 and 205 * FIELD_1. 206 * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table. 207 * FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE) 208 * 2. terminal entry. terminal entry contains hashed key and value or terminal link. terminal 209 * entry have terminal link when the value is not fit to FIELD_1 or there is a next level map 210 * for the key. 211 * FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK)) 212 * 3. value entry. value entry represents a value. Upper order bytes are stored in FIELD_0 and 213 * lower order bytes are stored in FIELD_1. 214 * FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes)) 215 */ 216 struct Entry { 217 const uint32_t mData0; 218 const uint32_t mData1; 219 220 Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData1(data1) {} 221 222 AK_FORCE_INLINE bool isBitmapEntry() const { 223 return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0; 224 } 225 226 AK_FORCE_INLINE bool hasTerminalLink() const { 227 return (mData1 & TERMINAL_LINK_FLAG) != 0; 228 } 229 230 // For terminal entry. 231 AK_FORCE_INLINE uint32_t getKey() const { 232 return mData0; 233 } 234 235 // For terminal entry. 236 AK_FORCE_INLINE uint32_t getValue() const { 237 return mData1 & VALUE_MASK; 238 } 239 240 // For terminal entry. 241 AK_FORCE_INLINE uint32_t getValueEntryIndex() const { 242 return mData1 & TERMINAL_LINK_MASK; 243 } 244 245 // For bitmap entry. 246 AK_FORCE_INLINE uint32_t getBitmap() const { 247 return mData0; 248 } 249 250 // For bitmap entry. 251 AK_FORCE_INLINE int getTableIndex() const { 252 return static_cast<int>(mData1); 253 } 254 255 // For value entry. 256 AK_FORCE_INLINE uint64_t getValueOfValueEntry() const { 257 return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ mData1); 258 } 259 }; 260 261 BufferWithExtendableBuffer mBuffer; 262 263 static const int FIELD0_SIZE; 264 static const int FIELD1_SIZE; 265 static const int ENTRY_SIZE; 266 static const uint32_t VALUE_FLAG; 267 static const uint32_t VALUE_MASK; 268 static const uint32_t TERMINAL_LINK_FLAG; 269 static const uint32_t TERMINAL_LINK_MASK; 270 static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL; 271 static const uint32_t LABEL_MASK; 272 static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; 273 static const int ROOT_BITMAP_ENTRY_INDEX; 274 static const int ROOT_BITMAP_ENTRY_POS; 275 static const Entry EMPTY_BITMAP_ENTRY; 276 static const int MAX_BUFFER_SIZE; 277 278 uint32_t getBitShuffledKey(const uint32_t key) const; 279 bool writeValue(const uint64_t value, const int terminalEntryIndex); 280 bool updateValue(const Entry &terminalEntry, const uint64_t value, 281 const int terminalEntryIndex); 282 bool freeTable(const int tableIndex, const int entryCount); 283 int allocateTable(const int entryCount); 284 int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey, 285 const Entry &bitmapEntry, const int level) const; 286 const Result getInternal(const uint32_t key, const uint32_t hashedKey, 287 const int bitmapEntryIndex, const int level) const; 288 bool putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey, 289 const int bitmapEntryIndex, const Entry &bitmapEntry, const int level); 290 bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value, 291 const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex, 292 const int level); 293 bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value, 294 const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, 295 const int label); 296 const Result iterateNext(std::vector<TableIterationState> *const iterationState, 297 int *const outKey) const; 298 299 AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const { 300 return Entry(readField0(entryIndex), readField1(entryIndex)); 301 } 302 303 // Returns whether an entry for the index is existing by testing if the index-th bit in the 304 // bitmap is set or not. 305 AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const { 306 return (bitmap & (1 << index)) != 0; 307 } 308 309 // Set index-th bit in the bitmap. 310 AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) const { 311 return bitmap | (1 << index); 312 } 313 314 // Count set bits before index in the bitmap. 315 AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const { 316 return popCount(bitmap & ((1 << index) - 1)); 317 } 318 319 // Count set bits in the bitmap. 320 AK_FORCE_INLINE int popCount(const uint32_t bitmap) const { 321 return __builtin_popcount(bitmap); 322 // int v = bitmap - ((bitmap >> 1) & 0x55555555); 323 // v = (v & 0x33333333) + ((v >> 2) & 0x33333333); 324 // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; 325 } 326 327 AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) const { 328 return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK; 329 } 330 331 AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const { 332 return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE); 333 } 334 335 AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const { 336 return mBuffer.readUint(FIELD1_SIZE, 337 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); 338 } 339 340 AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const { 341 return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE); 342 } 343 344 AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int entryCount) { 345 return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE); 346 } 347 348 AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) { 349 return mBuffer.writeUint(data, FIELD0_SIZE, 350 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE); 351 } 352 353 AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) { 354 return mBuffer.writeUint(data, FIELD1_SIZE, 355 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); 356 } 357 358 AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) { 359 return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1, entryIndex); 360 } 361 362 AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t value, 363 const int entryIndex) { 364 return writeField0(key, entryIndex) && writeValue(value, entryIndex); 365 } 366 367 AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEntryIndex) { 368 return writeEntry(readEntry(originalEntryIndex), newEntryIndex); 369 } 370 371 AK_FORCE_INLINE int getTailEntryIndex() const { 372 return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE; 373 } 374}; 375 376} // namespace latinime 377#endif /* LATINIME_TRIE_MAP_H */ 378