trie_map.h revision c0c674cdc0721a374e140ad5ee1409c0498b3262
1/* 2 * Copyright (C) 2014, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef LATINIME_TRIE_MAP_H 18#define LATINIME_TRIE_MAP_H 19 20#include <climits> 21#include <cstdint> 22#include <cstdio> 23#include <vector> 24 25#include "defines.h" 26#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" 27#include "utils/byte_array_view.h" 28 29namespace latinime { 30 31/** 32 * Trie map derived from Phil Bagwell's Hash Array Mapped Trie. 33 * key is int and value is uint64_t. 34 * This supports multiple level map. Terminal entries can have a bitmap for the next level map. 35 * This doesn't support root map resizing. 36 */ 37class TrieMap { 38 public: 39 struct Result { 40 const uint64_t mValue; 41 const bool mIsValid; 42 const int mNextLevelBitmapEntryIndex; 43 44 Result(const uint64_t value, const bool isValid, const int nextLevelBitmapEntryIndex) 45 : mValue(value), mIsValid(isValid), 46 mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {} 47 }; 48 49 /** 50 * Struct to record iteration state in a table. 51 */ 52 struct TableIterationState { 53 int mTableSize; 54 int mTableIndex; 55 int mCurrentIndex; 56 57 TableIterationState(const int tableSize, const int tableIndex) 58 : mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex(0) {} 59 }; 60 61 class TrieMapRange; 62 class TrieMapIterator { 63 public: 64 class IterationResult { 65 public: 66 IterationResult(const TrieMap *const trieMap, const int key, const uint64_t value, 67 const int nextLeveBitmapEntryIndex) 68 : mTrieMap(trieMap), mKey(key), mValue(value), 69 mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {} 70 71 const TrieMapRange getEntriesInNextLevel() const { 72 return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex); 73 } 74 75 bool hasNextLevelMap() const { 76 return mNextLevelBitmapEntryIndex != INVALID_INDEX; 77 } 78 79 AK_FORCE_INLINE int key() const { 80 return mKey; 81 } 82 83 AK_FORCE_INLINE uint64_t value() const { 84 return mValue; 85 } 86 87 private: 88 const TrieMap *const mTrieMap; 89 const int mKey; 90 const uint64_t mValue; 91 const int mNextLevelBitmapEntryIndex; 92 }; 93 94 TrieMapIterator(const TrieMap *const trieMap, const int bitmapEntryIndex) 95 : mTrieMap(trieMap), mStateStack(), mBaseBitmapEntryIndex(bitmapEntryIndex), 96 mKey(0), mValue(0), mIsValid(false), mNextLevelBitmapEntryIndex(INVALID_INDEX) { 97 if (!trieMap) { 98 return; 99 } 100 const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex); 101 mStateStack.emplace_back( 102 mTrieMap->popCount(bitmapEntry.getBitmap()), bitmapEntry.getTableIndex()); 103 this->operator++(); 104 } 105 106 const IterationResult operator*() const { 107 return IterationResult(mTrieMap, mKey, mValue, mNextLevelBitmapEntryIndex); 108 } 109 110 bool operator!=(const TrieMapIterator &other) const { 111 // Caveat: This works only for for loops. 112 return mIsValid || other.mIsValid; 113 } 114 115 const TrieMapIterator &operator++() { 116 const Result result = mTrieMap->iterateNext(&mStateStack, &mKey); 117 mValue = result.mValue; 118 mIsValid = result.mIsValid; 119 mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex; 120 return *this; 121 } 122 123 private: 124 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator); 125 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator); 126 127 const TrieMap *const mTrieMap; 128 std::vector<TrieMap::TableIterationState> mStateStack; 129 const int mBaseBitmapEntryIndex; 130 int mKey; 131 uint64_t mValue; 132 bool mIsValid; 133 int mNextLevelBitmapEntryIndex; 134 }; 135 136 /** 137 * Class to support iterating entries in TrieMap by range base for loops. 138 */ 139 class TrieMapRange { 140 public: 141 TrieMapRange(const TrieMap *const trieMap, const int bitmapEntryIndex) 142 : mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex) {}; 143 144 TrieMapIterator begin() const { 145 return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex); 146 } 147 148 const TrieMapIterator end() const { 149 return TrieMapIterator(nullptr, INVALID_INDEX); 150 } 151 152 private: 153 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange); 154 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange); 155 156 const TrieMap *const mTrieMap; 157 const int mBaseBitmapEntryIndex; 158 }; 159 160 static const int INVALID_INDEX; 161 static const uint64_t MAX_VALUE; 162 163 TrieMap(); 164 // Construct TrieMap using existing data in the memory region written by save(). 165 TrieMap(const ReadWriteByteArrayView buffer); 166 void dump(const int from = 0, const int to = 0) const; 167 168 bool isNearSizeLimit() const { 169 return mBuffer.isNearSizeLimit(); 170 } 171 172 // Returns bitmapEntryIndex. Create the next level map if it doesn't exist. 173 int getNextLevelBitmapEntryIndex(const int key) { 174 return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX); 175 } 176 177 int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex); 178 179 const Result getRoot(const int key) const { 180 return get(key, ROOT_BITMAP_ENTRY_INDEX); 181 } 182 183 const Result get(const int key, const int bitmapEntryIndex) const; 184 185 bool putRoot(const int key, const uint64_t value) { 186 return put(key, value, ROOT_BITMAP_ENTRY_INDEX); 187 } 188 189 bool put(const int key, const uint64_t value, const int bitmapEntryIndex); 190 191 const TrieMapRange getEntriesInRootLevel() const { 192 return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX); 193 } 194 195 const TrieMapRange getEntriesInSpecifiedLevel(const int bitmapEntryIndex) const { 196 return TrieMapRange(this, bitmapEntryIndex); 197 } 198 199 bool save(FILE *const file) const; 200 201 private: 202 DISALLOW_COPY_AND_ASSIGN(TrieMap); 203 204 /** 205 * Struct represents an entry. 206 * 207 * Entry is one of these entry types. All entries are fixed size and have 2 fields FIELD_0 and 208 * FIELD_1. 209 * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table. 210 * FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE) 211 * 2. terminal entry. terminal entry contains hashed key and value or terminal link. terminal 212 * entry have terminal link when the value is not fit to FIELD_1 or there is a next level map 213 * for the key. 214 * FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK)) 215 * 3. value entry. value entry represents a value. Upper order bytes are stored in FIELD_0 and 216 * lower order bytes are stored in FIELD_1. 217 * FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes)) 218 */ 219 struct Entry { 220 const uint32_t mData0; 221 const uint32_t mData1; 222 223 Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData1(data1) {} 224 225 AK_FORCE_INLINE bool isBitmapEntry() const { 226 return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0; 227 } 228 229 AK_FORCE_INLINE bool hasTerminalLink() const { 230 return (mData1 & TERMINAL_LINK_FLAG) != 0; 231 } 232 233 // For terminal entry. 234 AK_FORCE_INLINE uint32_t getKey() const { 235 return mData0; 236 } 237 238 // For terminal entry. 239 AK_FORCE_INLINE uint32_t getValue() const { 240 return mData1 & VALUE_MASK; 241 } 242 243 // For terminal entry. 244 AK_FORCE_INLINE uint32_t getValueEntryIndex() const { 245 return mData1 & TERMINAL_LINK_MASK; 246 } 247 248 // For bitmap entry. 249 AK_FORCE_INLINE uint32_t getBitmap() const { 250 return mData0; 251 } 252 253 // For bitmap entry. 254 AK_FORCE_INLINE int getTableIndex() const { 255 return static_cast<int>(mData1); 256 } 257 258 // For value entry. 259 AK_FORCE_INLINE uint64_t getValueOfValueEntry() const { 260 return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ mData1); 261 } 262 }; 263 264 BufferWithExtendableBuffer mBuffer; 265 266 static const int FIELD0_SIZE; 267 static const int FIELD1_SIZE; 268 static const int ENTRY_SIZE; 269 static const uint32_t VALUE_FLAG; 270 static const uint32_t VALUE_MASK; 271 static const uint32_t TERMINAL_LINK_FLAG; 272 static const uint32_t TERMINAL_LINK_MASK; 273 static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL; 274 static const uint32_t LABEL_MASK; 275 static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; 276 static const int ROOT_BITMAP_ENTRY_INDEX; 277 static const int ROOT_BITMAP_ENTRY_POS; 278 static const Entry EMPTY_BITMAP_ENTRY; 279 static const int MAX_BUFFER_SIZE; 280 281 uint32_t getBitShuffledKey(const uint32_t key) const; 282 bool writeValue(const uint64_t value, const int terminalEntryIndex); 283 bool updateValue(const Entry &terminalEntry, const uint64_t value, 284 const int terminalEntryIndex); 285 bool freeTable(const int tableIndex, const int entryCount); 286 int allocateTable(const int entryCount); 287 int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey, 288 const Entry &bitmapEntry, const int level) const; 289 const Result getInternal(const uint32_t key, const uint32_t hashedKey, 290 const int bitmapEntryIndex, const int level) const; 291 bool putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey, 292 const int bitmapEntryIndex, const Entry &bitmapEntry, const int level); 293 bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value, 294 const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex, 295 const int level); 296 bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value, 297 const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, 298 const int label); 299 const Result iterateNext(std::vector<TableIterationState> *const iterationState, 300 int *const outKey) const; 301 302 AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const { 303 return Entry(readField0(entryIndex), readField1(entryIndex)); 304 } 305 306 // Returns whether an entry for the index is existing by testing if the index-th bit in the 307 // bitmap is set or not. 308 AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const { 309 return (bitmap & (1 << index)) != 0; 310 } 311 312 // Set index-th bit in the bitmap. 313 AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) const { 314 return bitmap | (1 << index); 315 } 316 317 // Count set bits before index in the bitmap. 318 AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const { 319 return popCount(bitmap & ((1 << index) - 1)); 320 } 321 322 // Count set bits in the bitmap. 323 AK_FORCE_INLINE int popCount(const uint32_t bitmap) const { 324 return __builtin_popcount(bitmap); 325 // int v = bitmap - ((bitmap >> 1) & 0x55555555); 326 // v = (v & 0x33333333) + ((v >> 2) & 0x33333333); 327 // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; 328 } 329 330 AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) const { 331 return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK; 332 } 333 334 AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const { 335 return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE); 336 } 337 338 AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const { 339 return mBuffer.readUint(FIELD1_SIZE, 340 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); 341 } 342 343 AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const { 344 return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE); 345 } 346 347 AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int entryCount) { 348 return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE); 349 } 350 351 AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) { 352 return mBuffer.writeUint(data, FIELD0_SIZE, 353 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE); 354 } 355 356 AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) { 357 return mBuffer.writeUint(data, FIELD1_SIZE, 358 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); 359 } 360 361 AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) { 362 return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1, entryIndex); 363 } 364 365 AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t value, 366 const int entryIndex) { 367 return writeField0(key, entryIndex) && writeValue(value, entryIndex); 368 } 369 370 AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEntryIndex) { 371 return writeEntry(readEntry(originalEntryIndex), newEntryIndex); 372 } 373 374 AK_FORCE_INLINE int getTailEntryIndex() const { 375 return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE; 376 } 377}; 378 379} // namespace latinime 380#endif /* LATINIME_TRIE_MAP_H */ 381