trie_map.cpp revision de5c3a2562bbddc0f3d95619a1b3b1318b9598fd
1/* 2 * Copyright (C) 2014, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "suggest/policyimpl/dictionary/utils/trie_map.h" 18 19#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" 20 21namespace latinime { 22 23const int TrieMap::INVALID_INDEX = -1; 24const int TrieMap::FIELD0_SIZE = 4; 25const int TrieMap::FIELD1_SIZE = 3; 26const int TrieMap::ENTRY_SIZE = FIELD0_SIZE + FIELD1_SIZE; 27const uint32_t TrieMap::VALUE_FLAG = 0x400000; 28const uint32_t TrieMap::VALUE_MASK = 0x3FFFFF; 29const uint32_t TrieMap::TERMINAL_LINK_FLAG = 0x800000; 30const uint32_t TrieMap::TERMINAL_LINK_MASK = 0x7FFFFF; 31const int TrieMap::NUM_OF_BITS_USED_FOR_ONE_LEVEL = 5; 32const uint32_t TrieMap::LABEL_MASK = 0x1F; 33const int TrieMap::MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL = 1 << NUM_OF_BITS_USED_FOR_ONE_LEVEL; 34const int TrieMap::ROOT_BITMAP_ENTRY_INDEX = 0; 35const int TrieMap::ROOT_BITMAP_ENTRY_POS = MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL * FIELD0_SIZE; 36const TrieMap::Entry TrieMap::EMPTY_BITMAP_ENTRY = TrieMap::Entry(0, 0); 37const uint64_t TrieMap::MAX_VALUE = 38 (static_cast<uint64_t>(1) << ((FIELD0_SIZE + FIELD1_SIZE) * CHAR_BIT)) - 1; 39const int TrieMap::MAX_BUFFER_SIZE = TERMINAL_LINK_MASK * ENTRY_SIZE; 40 41TrieMap::TrieMap() : mBuffer(MAX_BUFFER_SIZE) { 42 mBuffer.extend(ROOT_BITMAP_ENTRY_POS); 43 writeEntry(EMPTY_BITMAP_ENTRY, ROOT_BITMAP_ENTRY_INDEX); 44} 45 46TrieMap::TrieMap(uint8_t *const buffer, const int bufferSize) 47 : mBuffer(buffer, bufferSize, 48 BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {} 49 50void TrieMap::dump(const int from, const int to) const { 51 AKLOGI("BufSize: %d", mBuffer.getTailPosition()); 52 for (int i = from; i < to; ++i) { 53 AKLOGI("Entry[%d]: %x, %x", i, readField0(i), readField1(i)); 54 } 55 int unusedRegionSize = 0; 56 for (int i = 1; i <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; ++i) { 57 int index = readEmptyTableLink(i); 58 while (index != ROOT_BITMAP_ENTRY_INDEX) { 59 index = readField0(index); 60 unusedRegionSize += i; 61 } 62 } 63 AKLOGI("Unused Size: %d", unusedRegionSize); 64} 65 66int TrieMap::getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex) { 67 const Entry bitmapEntry = readEntry(bitmapEntryIndex); 68 const uint32_t unsignedKey = static_cast<uint32_t>(key); 69 const int terminalEntryIndex = getTerminalEntryIndex( 70 unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntry, 0 /* level */); 71 if (terminalEntryIndex == INVALID_INDEX) { 72 // Not found. 73 return INVALID_INDEX; 74 } 75 const Entry terminalEntry = readEntry(terminalEntryIndex); 76 if (terminalEntry.hasTerminalLink()) { 77 return terminalEntry.getValueEntryIndex() + 1; 78 } 79 // Create a value entry and a bitmap entry. 80 const int valueEntryIndex = allocateTable(2 /* entryCount */); 81 if (!writeEntry(Entry(0, terminalEntry.getValue()), valueEntryIndex)) { 82 return INVALID_INDEX; 83 } 84 if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) { 85 return INVALID_INDEX; 86 } 87 if (!writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, valueEntryIndex)) { 88 return INVALID_INDEX; 89 } 90 return valueEntryIndex + 1; 91} 92 93const TrieMap::Result TrieMap::get(const int key, const int bitmapEntryIndex) const { 94 const uint32_t unsignedKey = static_cast<uint32_t>(key); 95 return getInternal(unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntryIndex, 96 0 /* level */); 97} 98 99bool TrieMap::put(const int key, const uint64_t value, const int bitmapEntryIndex) { 100 if (value > MAX_VALUE) { 101 return false; 102 } 103 const uint32_t unsignedKey = static_cast<uint32_t>(key); 104 return putInternal(unsignedKey, value, getBitShuffledKey(unsignedKey), bitmapEntryIndex, 105 readEntry(bitmapEntryIndex), 0 /* level */); 106} 107 108bool TrieMap::save(FILE *const file) const { 109 return DictFileWritingUtils::writeBufferToFileTail(file, &mBuffer); 110} 111 112/** 113 * Iterate next entry in a certain level. 114 * 115 * @param iterationState the iteration state that will be read and updated in this method. 116 * @param outKey the output key 117 * @return Result instance. mIsValid is false when all entries are iterated. 118 */ 119const TrieMap::Result TrieMap::iterateNext(std::vector<TableIterationState> *const iterationState, 120 int *const outKey) const { 121 while (!iterationState->empty()) { 122 TableIterationState &state = iterationState->back(); 123 if (state.mTableSize <= state.mCurrentIndex) { 124 // Move to parent. 125 iterationState->pop_back(); 126 } else { 127 const int entryIndex = state.mTableIndex + state.mCurrentIndex; 128 state.mCurrentIndex += 1; 129 const Entry entry = readEntry(entryIndex); 130 if (entry.isBitmapEntry()) { 131 // Move to child. 132 iterationState->emplace_back(popCount(entry.getBitmap()), entry.getTableIndex()); 133 } else { 134 if (outKey) { 135 *outKey = entry.getKey(); 136 } 137 if (!entry.hasTerminalLink()) { 138 return Result(entry.getValue(), true, INVALID_INDEX); 139 } 140 const int valueEntryIndex = entry.getValueEntryIndex(); 141 const Entry valueEntry = readEntry(valueEntryIndex); 142 return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1); 143 } 144 } 145 } 146 // Visited all entries. 147 return Result(0, false, INVALID_INDEX); 148} 149 150/** 151 * Shuffle bits of the key in the fixed order. 152 * 153 * This method is used as a hash function. This returns different values for different inputs. 154 */ 155uint32_t TrieMap::getBitShuffledKey(const uint32_t key) const { 156 uint32_t shuffledKey = 0; 157 for (int i = 0; i < 4; ++i) { 158 const uint32_t keyPiece = (key >> (i * 8)) & 0xFF; 159 shuffledKey ^= ((keyPiece ^ (keyPiece << 7) ^ (keyPiece << 14) ^ (keyPiece << 21)) 160 & 0x11111111) << i; 161 } 162 return shuffledKey; 163} 164 165bool TrieMap::writeValue(const uint64_t value, const int terminalEntryIndex) { 166 if (value <= VALUE_MASK) { 167 // Write value into the terminal entry. 168 return writeField1(value | VALUE_FLAG, terminalEntryIndex); 169 } 170 // Create value entry and write value. 171 const int valueEntryIndex = allocateTable(2 /* entryCount */); 172 if (!writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex)) { 173 return false; 174 } 175 if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) { 176 return false; 177 } 178 return writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, terminalEntryIndex); 179} 180 181bool TrieMap::updateValue(const Entry &terminalEntry, const uint64_t value, 182 const int terminalEntryIndex) { 183 if (!terminalEntry.hasTerminalLink()) { 184 return writeValue(value, terminalEntryIndex); 185 } 186 const int valueEntryIndex = terminalEntry.getValueEntryIndex(); 187 return writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex); 188} 189 190bool TrieMap::freeTable(const int tableIndex, const int entryCount) { 191 if (!writeField0(readEmptyTableLink(entryCount), tableIndex)) { 192 return false; 193 } 194 return writeEmptyTableLink(tableIndex, entryCount); 195} 196 197/** 198 * Allocate table with entryCount-entries. Reuse freed table if possible. 199 */ 200int TrieMap::allocateTable(const int entryCount) { 201 if (entryCount > 0 && entryCount <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL) { 202 const int tableIndex = readEmptyTableLink(entryCount); 203 if (tableIndex > 0) { 204 if (!writeEmptyTableLink(readField0(tableIndex), entryCount)) { 205 return INVALID_INDEX; 206 } 207 // Reuse the table. 208 return tableIndex; 209 } 210 } 211 // Allocate memory space at tail position of the buffer. 212 const int mapIndex = getTailEntryIndex(); 213 if (!mBuffer.extend(entryCount * ENTRY_SIZE)) { 214 return INVALID_INDEX; 215 } 216 return mapIndex; 217} 218 219int TrieMap::getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey, 220 const Entry &bitmapEntry, const int level) const { 221 const int label = getLabel(hashedKey, level); 222 if (!exists(bitmapEntry.getBitmap(), label)) { 223 return INVALID_INDEX; 224 } 225 const int entryIndex = bitmapEntry.getTableIndex() + popCount(bitmapEntry.getBitmap(), label); 226 const Entry entry = readEntry(entryIndex); 227 if (entry.isBitmapEntry()) { 228 // Move to the next level. 229 return getTerminalEntryIndex(key, hashedKey, entry, level + 1); 230 } 231 if (entry.getKey() == key) { 232 // Terminal entry is found. 233 return entryIndex; 234 } 235 return INVALID_INDEX; 236} 237 238/** 239 * Get Result corresponding to the key. 240 * 241 * @param key the key. 242 * @param hashedKey the hashed key. 243 * @param bitmapEntryIndex the index of bitmap entry 244 * @param level current level 245 * @return Result instance corresponding to the key. mIsValid indicates whether the key is in the 246 * map. 247 */ 248const TrieMap::Result TrieMap::getInternal(const uint32_t key, const uint32_t hashedKey, 249 const int bitmapEntryIndex, const int level) const { 250 const int terminalEntryIndex = getTerminalEntryIndex(key, hashedKey, 251 readEntry(bitmapEntryIndex), level); 252 if (terminalEntryIndex == INVALID_INDEX) { 253 // Not found. 254 return Result(0, false, INVALID_INDEX); 255 } 256 const Entry terminalEntry = readEntry(terminalEntryIndex); 257 if (!terminalEntry.hasTerminalLink()) { 258 return Result(terminalEntry.getValue(), true, INVALID_INDEX); 259 } 260 const int valueEntryIndex = terminalEntry.getValueEntryIndex(); 261 const Entry valueEntry = readEntry(valueEntryIndex); 262 return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1); 263} 264 265/** 266 * Put key to value mapping to the map. 267 * 268 * @param key the key. 269 * @param value the value 270 * @param hashedKey the hashed key. 271 * @param bitmapEntryIndex the index of bitmap entry 272 * @param bitmapEntry the bitmap entry 273 * @param level current level 274 * @return whether the key-value has been correctly inserted to the map or not. 275 */ 276bool TrieMap::putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey, 277 const int bitmapEntryIndex, const Entry &bitmapEntry, const int level) { 278 const int label = getLabel(hashedKey, level); 279 const uint32_t bitmap = bitmapEntry.getBitmap(); 280 const int mapIndex = bitmapEntry.getTableIndex(); 281 if (!exists(bitmap, label)) { 282 // Current map doesn't contain the label. 283 return addNewEntryByExpandingTable(key, value, mapIndex, bitmap, bitmapEntryIndex, label); 284 } 285 const int entryIndex = mapIndex + popCount(bitmap, label); 286 const Entry entry = readEntry(entryIndex); 287 if (entry.isBitmapEntry()) { 288 // Bitmap entry is found. Go to the next level. 289 return putInternal(key, value, hashedKey, entryIndex, entry, level + 1); 290 } 291 if (entry.getKey() == key) { 292 // Terminal entry for the key is found. Update the value. 293 return updateValue(entry, value, entryIndex); 294 } 295 // Conflict with the existing key. 296 return addNewEntryByResolvingConflict(key, value, hashedKey, entry, entryIndex, level); 297} 298 299/** 300 * Resolve a conflict in the current level and add new entry. 301 * 302 * @param key the key 303 * @param value the value 304 * @param hashedKey the hashed key 305 * @param conflictedEntry the existing conflicted entry 306 * @param conflictedEntryIndex the index of existing conflicted entry 307 * @param level current level 308 * @return whether the key-value has been correctly inserted to the map or not. 309 */ 310bool TrieMap::addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value, 311 const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex, 312 const int level) { 313 const int conflictedKeyNextLabel = 314 getLabel(getBitShuffledKey(conflictedEntry.getKey()), level + 1); 315 const int nextLabel = getLabel(hashedKey, level + 1); 316 if (conflictedKeyNextLabel == nextLabel) { 317 // Conflicted again in the next level. 318 const int newTableIndex = allocateTable(1 /* entryCount */); 319 if (newTableIndex == INVALID_INDEX) { 320 return false; 321 } 322 if (!writeEntry(conflictedEntry, newTableIndex)) { 323 return false; 324 } 325 const Entry newBitmapEntry(setExist(0 /* bitmap */, nextLabel), newTableIndex); 326 if (!writeEntry(newBitmapEntry, conflictedEntryIndex)) { 327 return false; 328 } 329 return putInternal(key, value, hashedKey, conflictedEntryIndex, newBitmapEntry, level + 1); 330 } 331 // The conflict has been resolved. Create a table that contains 2 entries. 332 const int newTableIndex = allocateTable(2 /* entryCount */); 333 if (newTableIndex == INVALID_INDEX) { 334 return false; 335 } 336 if (nextLabel < conflictedKeyNextLabel) { 337 if (!writeTerminalEntry(key, value, newTableIndex)) { 338 return false; 339 } 340 if (!writeEntry(conflictedEntry, newTableIndex + 1)) { 341 return false; 342 } 343 } else { // nextLabel > conflictedKeyNextLabel 344 if (!writeEntry(conflictedEntry, newTableIndex)) { 345 return false; 346 } 347 if (!writeTerminalEntry(key, value, newTableIndex + 1)) { 348 return false; 349 } 350 } 351 const uint32_t updatedBitmap = 352 setExist(setExist(0 /* bitmap */, nextLabel), conflictedKeyNextLabel); 353 return writeEntry(Entry(updatedBitmap, newTableIndex), conflictedEntryIndex); 354} 355 356/** 357 * Add new entry to the existing table. 358 */ 359bool TrieMap::addNewEntryByExpandingTable(const uint32_t key, const uint64_t value, 360 const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, const int label) { 361 // Current map doesn't contain the label. 362 const int entryCount = popCount(bitmap); 363 const int newTableIndex = allocateTable(entryCount + 1); 364 if (newTableIndex == INVALID_INDEX) { 365 return false; 366 } 367 const int newEntryIndexInTable = popCount(bitmap, label); 368 // Copy from existing table to the new table. 369 for (int i = 0; i < entryCount; ++i) { 370 if (!copyEntry(tableIndex + i, newTableIndex + i + (i >= newEntryIndexInTable ? 1 : 0))) { 371 return false; 372 } 373 } 374 // Write new terminal entry. 375 if (!writeTerminalEntry(key, value, newTableIndex + newEntryIndexInTable)) { 376 return false; 377 } 378 // Update bitmap. 379 if (!writeEntry(Entry(setExist(bitmap, label), newTableIndex), bitmapEntryIndex)) { 380 return false; 381 } 382 if (entryCount > 0) { 383 return freeTable(tableIndex, entryCount); 384 } 385 return true; 386} 387 388} // namespace latinime 389