suggest.cpp revision 5b03213db13c670e37b15b8c813c91ebb232ead9
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "suggest/core/suggest.h" 18 19#include "suggest/core/dicnode/dic_node.h" 20#include "suggest/core/dicnode/dic_node_priority_queue.h" 21#include "suggest/core/dicnode/dic_node_vector.h" 22#include "suggest/core/dictionary/binary_dictionary_info.h" 23#include "suggest/core/dictionary/dictionary.h" 24#include "suggest/core/dictionary/digraph_utils.h" 25#include "suggest/core/dictionary/shortcut_utils.h" 26#include "suggest/core/dictionary/terminal_attributes.h" 27#include "suggest/core/layout/proximity_info.h" 28#include "suggest/core/policy/scoring.h" 29#include "suggest/core/policy/traversal.h" 30#include "suggest/core/policy/weighting.h" 31#include "suggest/core/session/dic_traverse_session.h" 32 33namespace latinime { 34 35// Initialization of class constants. 36const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; 37const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2; 38const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f; 39const int Suggest::FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD = 1; 40 41/** 42 * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates 43 * whether to prematurely commit the suggested words up to the given point for sentence-level 44 * suggestion. 45 * 46 * Note: Currently does not support concurrent calls across threads. Continuous suggestion is 47 * automatically activated for sequential calls that share the same starting input. 48 * TODO: Stop detecting continuous suggestion. Start using traverseSession instead. 49 */ 50int Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, 51 int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints, 52 int inputSize, int commitPoint, int *outWords, int *frequencies, int *outputIndices, 53 int *outputTypes) const { 54 PROF_OPEN; 55 PROF_START(0); 56 const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance(); 57 DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession); 58 tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times, 59 pointerIds, maxSpatialDistance, TRAVERSAL->getMaxPointerCount()); 60 // TODO: Add the way to evaluate cache 61 62 initializeSearch(tSession, commitPoint); 63 PROF_END(0); 64 PROF_START(1); 65 66 // keep expanding search dicNodes until all have terminated. 67 while (tSession->getDicTraverseCache()->activeSize() > 0) { 68 expandCurrentDicNodes(tSession); 69 tSession->getDicTraverseCache()->advanceActiveDicNodes(); 70 tSession->getDicTraverseCache()->advanceInputIndex(inputSize); 71 } 72 PROF_END(1); 73 PROF_START(2); 74 const int size = outputSuggestions(tSession, frequencies, outWords, outputIndices, outputTypes); 75 PROF_END(2); 76 PROF_CLOSE; 77 return size; 78} 79 80/** 81 * Initializes the search at the root of the lexicon trie. Note that when possible the search will 82 * continue suggestion from where it left off during the last call. 83 */ 84void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const { 85 if (!traverseSession->getProximityInfoState(0)->isUsed()) { 86 return; 87 } 88 89 // Never auto partial commit for now. 90 commitPoint = 0; 91 92 if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE 93 && traverseSession->isContinuousSuggestionPossible()) { 94 if (commitPoint == 0) { 95 // Continue suggestion 96 traverseSession->getDicTraverseCache()->continueSearch(); 97 } else { 98 // Continue suggestion after partial commit. 99 DicNode *topDicNode = 100 traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint); 101 traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos()); 102 traverseSession->getDicTraverseCache()->continueSearch(); 103 traverseSession->setPartiallyCommited(); 104 } 105 } else { 106 // Restart recognition at the root. 107 traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(), MAX_RESULTS); 108 // Create a new dic node here 109 DicNode rootNode; 110 DicNodeUtils::initAsRoot(traverseSession->getBinaryDictionaryInfo(), 111 traverseSession->getPrevWordPos(), &rootNode); 112 traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); 113 } 114} 115 116/** 117 * Outputs the final list of suggestions (i.e., terminal nodes). 118 */ 119int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, 120 int *outputCodePoints, int *spaceIndices, int *outputTypes) const { 121#if DEBUG_EVALUATE_MOST_PROBABLE_STRING 122 const int terminalSize = 0; 123#else 124 const int terminalSize = min(MAX_RESULTS, 125 static_cast<int>(traverseSession->getDicTraverseCache()->terminalSize())); 126#endif 127 DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array 128 129 for (int index = terminalSize - 1; index >= 0; --index) { 130 traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]); 131 } 132 133 const float languageWeight = SCORING->getAdjustedLanguageWeight( 134 traverseSession, terminals, terminalSize); 135 136 int outputWordIndex = 0; 137 // Insert most probable word at index == 0 as long as there is one terminal at least 138 const bool hasMostProbableString = 139 SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight, 140 &outputCodePoints[0], &outputTypes[0], &frequencies[0]); 141 if (hasMostProbableString) { 142 ++outputWordIndex; 143 } 144 145 // Initial value of the loop index for terminal nodes (words) 146 int doubleLetterTerminalIndex = -1; 147 DoubleLetterLevel doubleLetterLevel = NOT_A_DOUBLE_LETTER; 148 SCORING->searchWordWithDoubleLetter(terminals, terminalSize, 149 &doubleLetterTerminalIndex, &doubleLetterLevel); 150 151 int maxScore = S_INT_MIN; 152 int bestExactMatchedNodeTerminalIndex = -1; 153 int bestExactMatchedNodeOutputWordIndex = -1; 154 // Force autocorrection for obvious long multi-word suggestions when the top suggestion is 155 // a long multiple words suggestion. 156 // TODO: Implement a smarter auto-commit method for handling multi-word suggestions. 157 // traverseSession->isPartiallyCommited() always returns false because we never auto partial 158 // commit for now. 159 const bool forceCommitMultiWords = (terminalSize > 0) ? 160 TRAVERSAL->autoCorrectsToMultiWordSuggestionIfTop() 161 && (traverseSession->isPartiallyCommited() 162 || (traverseSession->getInputSize() 163 >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT 164 && terminals[0].hasMultipleWords())) : false; 165 // Output suggestion results here 166 for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS; 167 ++terminalIndex) { 168 DicNode *terminalDicNode = &terminals[terminalIndex]; 169 if (DEBUG_GEO_FULL) { 170 terminalDicNode->dump("OUT:"); 171 } 172 const float doubleLetterCost = SCORING->getDoubleLetterDemotionDistanceCost( 173 terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel); 174 const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight) 175 + doubleLetterCost; 176 const bool isPossiblyOffensiveWord = terminalDicNode->getProbability() <= 0; 177 const bool isExactMatch = terminalDicNode->isExactMatch(); 178 const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase(); 179 // Heuristic: We exclude freq=0 first-char-uppercase words from exact match. 180 // (e.g. "AMD" and "and") 181 const bool isSafeExactMatch = isExactMatch 182 && !(isPossiblyOffensiveWord && isFirstCharUppercase); 183 const int outputTypeFlags = 184 (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) 185 | (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0); 186 187 // Entries that are blacklisted or do not represent a word should not be output. 188 const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord(); 189 190 // Increase output score of top typing suggestion to ensure autocorrection. 191 // TODO: Better integration with java side autocorrection logic. 192 const int finalScore = SCORING->calculateFinalScore( 193 compoundDistance, traverseSession->getInputSize(), 194 (forceCommitMultiWords && terminalDicNode->hasMultipleWords()) 195 || (isValidWord && SCORING->doesAutoCorrectValidWord())); 196 maxScore = max(maxScore, finalScore); 197 198 // TODO: Implement a smarter auto-commit method for handling multi-word suggestions. 199 // Index for top typing suggestion should be 0. 200 if (isValidWord && outputWordIndex == 0) { 201 terminalDicNode->outputSpacePositionsResult(spaceIndices); 202 } 203 204 // Don't output invalid words. However, we still need to submit their shortcuts if any. 205 if (isValidWord) { 206 outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags; 207 frequencies[outputWordIndex] = finalScore; 208 if (isSafeExactMatch) { 209 // Demote exact matches that are not the highest probable node among all exact 210 // matches. 211 const bool isBestTerminal = bestExactMatchedNodeTerminalIndex < 0 212 || terminals[bestExactMatchedNodeTerminalIndex].getProbability() 213 < terminalDicNode->getProbability(); 214 const int outputWordIndexToBeDemoted = isBestTerminal ? 215 bestExactMatchedNodeOutputWordIndex : outputWordIndex; 216 if (outputWordIndexToBeDemoted >= 0) { 217 frequencies[outputWordIndexToBeDemoted] -= 218 FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD; 219 } 220 if (isBestTerminal) { 221 // Updates the best exact matched node index. 222 bestExactMatchedNodeTerminalIndex = terminalIndex; 223 // Updates the best exact matched output word index. 224 bestExactMatchedNodeOutputWordIndex = outputWordIndex; 225 } 226 } 227 // Populate the outputChars array with the suggested word. 228 const int startIndex = outputWordIndex * MAX_WORD_LENGTH; 229 terminalDicNode->outputResult(&outputCodePoints[startIndex]); 230 ++outputWordIndex; 231 } 232 233 if (!terminalDicNode->hasMultipleWords()) { 234 const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(), 235 terminalDicNode->getAttributesPos()); 236 // Shortcut is not supported for multiple words suggestions. 237 // TODO: Check shortcuts during traversal for multiple words suggestions. 238 const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); 239 outputWordIndex = ShortcutUtils::outputShortcuts(&terminalAttributes, outputWordIndex, 240 finalScore, outputCodePoints, frequencies, outputTypes, sameAsTyped); 241 242 } 243 DicNode::managedDelete(terminalDicNode); 244 } 245 246 if (hasMostProbableString) { 247 SCORING->safetyNetForMostProbableString(terminalSize, maxScore, 248 &outputCodePoints[0], &frequencies[0]); 249 } 250 return outputWordIndex; 251} 252 253/** 254 * Expands the dicNodes in the current search priority queue by advancing to the possible child 255 * nodes based on the next touch point(s) (or no touch points for lookahead) 256 */ 257void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { 258 const int inputSize = traverseSession->getInputSize(); 259 DicNodeVector childDicNodes(TRAVERSAL->getDefaultExpandDicNodeSize()); 260 DicNode correctionDicNode; 261 262 // TODO: Find more efficient caching 263 const bool shouldDepthLevelCache = TRAVERSAL->shouldDepthLevelCache(traverseSession); 264 if (shouldDepthLevelCache) { 265 traverseSession->getDicTraverseCache()->updateLastCachedInputIndex(); 266 } 267 if (DEBUG_CACHE) { 268 AKLOGI("expandCurrentDicNodes depth level cache = %d, inputSize = %d", 269 shouldDepthLevelCache, inputSize); 270 } 271 while (traverseSession->getDicTraverseCache()->activeSize() > 0) { 272 DicNode dicNode; 273 traverseSession->getDicTraverseCache()->popActive(&dicNode); 274 if (dicNode.isTotalInputSizeExceedingLimit()) { 275 return; 276 } 277 childDicNodes.clear(); 278 const int point0Index = dicNode.getInputIndex(0); 279 const bool canDoLookAheadCorrection = 280 TRAVERSAL->canDoLookAheadCorrection(traverseSession, &dicNode); 281 const bool isLookAheadCorrection = canDoLookAheadCorrection 282 && traverseSession->getDicTraverseCache()-> 283 isLookAheadCorrectionInputIndex(static_cast<int>(point0Index)); 284 const bool isCompletion = dicNode.isCompletion(inputSize); 285 286 const bool shouldNodeLevelCache = 287 TRAVERSAL->shouldNodeLevelCache(traverseSession, &dicNode); 288 if (shouldDepthLevelCache || shouldNodeLevelCache) { 289 if (DEBUG_CACHE) { 290 dicNode.dump("PUSH_CACHE"); 291 } 292 traverseSession->getDicTraverseCache()->copyPushContinue(&dicNode); 293 dicNode.setCached(); 294 } 295 296 if (dicNode.isInDigraph()) { 297 // Finish digraph handling if the node is in the middle of a digraph expansion. 298 processDicNodeAsDigraph(traverseSession, &dicNode); 299 } else if (isLookAheadCorrection) { 300 // The algorithm maintains a small set of "deferred" nodes that have not consumed the 301 // latest touch point yet. These are needed to apply look-ahead correction operations 302 // that require special handling of the latest touch point. For example, with insertions 303 // (e.g., "thiis" -> "this") the latest touch point should not be consumed at all. 304 processDicNodeAsTransposition(traverseSession, &dicNode); 305 processDicNodeAsInsertion(traverseSession, &dicNode); 306 } else { // !isLookAheadCorrection 307 // Only consider typing error corrections if the normalized compound distance is 308 // below a spatial distance threshold. 309 // NOTE: the threshold may need to be updated if scoring model changes. 310 // TODO: Remove. Do not prune node here. 311 const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode); 312 // Process for handling space substitution (e.g., hevis => he is) 313 if (allowsErrorCorrections 314 && TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) { 315 createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */); 316 } 317 318 DicNodeUtils::getAllChildDicNodes( 319 &dicNode, traverseSession->getBinaryDictionaryInfo(), &childDicNodes); 320 321 const int childDicNodesSize = childDicNodes.getSizeAndLock(); 322 for (int i = 0; i < childDicNodesSize; ++i) { 323 DicNode *const childDicNode = childDicNodes[i]; 324 if (isCompletion) { 325 // Handle forward lookahead when the lexicon letter exceeds the input size. 326 processDicNodeAsMatch(traverseSession, childDicNode); 327 continue; 328 } 329 if (DigraphUtils::hasDigraphForCodePoint( 330 traverseSession->getBinaryDictionaryInfo()->getHeader(), 331 childDicNode->getNodeCodePoint())) { 332 correctionDicNode.initByCopy(childDicNode); 333 correctionDicNode.advanceDigraphIndex(); 334 processDicNodeAsDigraph(traverseSession, &correctionDicNode); 335 } 336 if (TRAVERSAL->isOmission(traverseSession, &dicNode, childDicNode, 337 allowsErrorCorrections)) { 338 // TODO: (Gesture) Change weight between omission and substitution errors 339 // TODO: (Gesture) Terminal node should not be handled as omission 340 correctionDicNode.initByCopy(childDicNode); 341 processDicNodeAsOmission(traverseSession, &correctionDicNode); 342 } 343 const ProximityType proximityType = TRAVERSAL->getProximityType( 344 traverseSession, &dicNode, childDicNode); 345 switch (proximityType) { 346 // TODO: Consider the difference of proximityType here 347 case MATCH_CHAR: 348 case PROXIMITY_CHAR: 349 processDicNodeAsMatch(traverseSession, childDicNode); 350 break; 351 case ADDITIONAL_PROXIMITY_CHAR: 352 if (allowsErrorCorrections) { 353 processDicNodeAsAdditionalProximityChar(traverseSession, &dicNode, 354 childDicNode); 355 } 356 break; 357 case SUBSTITUTION_CHAR: 358 if (allowsErrorCorrections) { 359 processDicNodeAsSubstitution(traverseSession, &dicNode, childDicNode); 360 } 361 break; 362 case UNRELATED_CHAR: 363 // Just drop this node and do nothing. 364 break; 365 default: 366 // Just drop this node and do nothing. 367 break; 368 } 369 } 370 371 // Push the node for look-ahead correction 372 if (allowsErrorCorrections && canDoLookAheadCorrection) { 373 traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode); 374 } 375 } 376 } 377} 378 379void Suggest::processTerminalDicNode( 380 DicTraverseSession *traverseSession, DicNode *dicNode) const { 381 if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { 382 return; 383 } 384 if (!dicNode->isTerminalWordNode()) { 385 return; 386 } 387 if (TRAVERSAL->needsToTraverseAllUserInput() 388 && dicNode->getInputIndex(0) < traverseSession->getInputSize()) { 389 return; 390 } 391 392 if (dicNode->shouldBeFilterdBySafetyNetForBigram()) { 393 return; 394 } 395 // Create a non-cached node here. 396 DicNode terminalDicNode; 397 DicNodeUtils::initByCopy(dicNode, &terminalDicNode); 398 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL, traverseSession, 0, 399 &terminalDicNode, traverseSession->getMultiBigramMap()); 400 traverseSession->getDicTraverseCache()->copyPushTerminal(&terminalDicNode); 401} 402 403/** 404 * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word 405 * (by the space omission error correction) search path if input dicNode is on a terminal node. 406 */ 407void Suggest::processExpandedDicNode( 408 DicTraverseSession *traverseSession, DicNode *dicNode) const { 409 processTerminalDicNode(traverseSession, dicNode); 410 if (dicNode->getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { 411 if (TRAVERSAL->isSpaceOmissionTerminal(traverseSession, dicNode)) { 412 createNextWordDicNode(traverseSession, dicNode, false /* spaceSubstitution */); 413 } 414 const int allowsLookAhead = !(dicNode->hasMultipleWords() 415 && dicNode->isCompletion(traverseSession->getInputSize())); 416 if (dicNode->hasChildren() && allowsLookAhead) { 417 traverseSession->getDicTraverseCache()->copyPushNextActive(dicNode); 418 } 419 } 420 DicNode::managedDelete(dicNode); 421} 422 423void Suggest::processDicNodeAsMatch(DicTraverseSession *traverseSession, 424 DicNode *childDicNode) const { 425 weightChildNode(traverseSession, childDicNode); 426 processExpandedDicNode(traverseSession, childDicNode); 427} 428 429void Suggest::processDicNodeAsAdditionalProximityChar(DicTraverseSession *traverseSession, 430 DicNode *dicNode, DicNode *childDicNode) const { 431 // Note: Most types of corrections don't need to look up the bigram information since they do 432 // not treat the node as a terminal. There is no need to pass the bigram map in these cases. 433 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_ADDITIONAL_PROXIMITY, 434 traverseSession, dicNode, childDicNode, 0 /* multiBigramMap */); 435 weightChildNode(traverseSession, childDicNode); 436 processExpandedDicNode(traverseSession, childDicNode); 437} 438 439void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession, 440 DicNode *dicNode, DicNode *childDicNode) const { 441 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_SUBSTITUTION, traverseSession, 442 dicNode, childDicNode, 0 /* multiBigramMap */); 443 weightChildNode(traverseSession, childDicNode); 444 processExpandedDicNode(traverseSession, childDicNode); 445} 446 447// Process the node codepoint as a digraph. This means that composite glyphs like the German 448// u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with 449// the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber". 450void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession, 451 DicNode *childDicNode) const { 452 weightChildNode(traverseSession, childDicNode); 453 childDicNode->advanceDigraphIndex(); 454 processExpandedDicNode(traverseSession, childDicNode); 455} 456 457/** 458 * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider 459 * matches for all possible next letters. Note that just skipping the current letter without any 460 * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check 461 * the possible *next* letters after the omission to better limit search to plausible omissions. 462 * Note that apostrophes are handled as omissions. 463 */ 464void Suggest::processDicNodeAsOmission( 465 DicTraverseSession *traverseSession, DicNode *dicNode) const { 466 DicNodeVector childDicNodes; 467 DicNodeUtils::getAllChildDicNodes( 468 dicNode, traverseSession->getBinaryDictionaryInfo(), &childDicNodes); 469 470 const int size = childDicNodes.getSizeAndLock(); 471 for (int i = 0; i < size; i++) { 472 DicNode *const childDicNode = childDicNodes[i]; 473 // Treat this word as omission 474 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_OMISSION, traverseSession, 475 dicNode, childDicNode, 0 /* multiBigramMap */); 476 weightChildNode(traverseSession, childDicNode); 477 478 if (!TRAVERSAL->isPossibleOmissionChildNode(traverseSession, dicNode, childDicNode)) { 479 continue; 480 } 481 processExpandedDicNode(traverseSession, childDicNode); 482 } 483} 484 485/** 486 * Handle the dicNode as an insertion error (e.g., thiis => this). Skip the current touch point and 487 * consider matches for the next touch point. 488 */ 489void Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession, 490 DicNode *dicNode) const { 491 const int16_t pointIndex = dicNode->getInputIndex(0); 492 DicNodeVector childDicNodes; 493 DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getBinaryDictionaryInfo(), 494 traverseSession->getProximityInfoState(0), pointIndex + 1, true, &childDicNodes); 495 const int size = childDicNodes.getSizeAndLock(); 496 for (int i = 0; i < size; i++) { 497 DicNode *const childDicNode = childDicNodes[i]; 498 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_INSERTION, traverseSession, 499 dicNode, childDicNode, 0 /* multiBigramMap */); 500 processExpandedDicNode(traverseSession, childDicNode); 501 } 502} 503 504/** 505 * Handle the dicNode as a transposition error (e.g., thsi => this). Swap the next two touch points. 506 */ 507void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession, 508 DicNode *dicNode) const { 509 const int16_t pointIndex = dicNode->getInputIndex(0); 510 DicNodeVector childDicNodes1; 511 DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getBinaryDictionaryInfo(), 512 traverseSession->getProximityInfoState(0), pointIndex + 1, false, &childDicNodes1); 513 const int childSize1 = childDicNodes1.getSizeAndLock(); 514 for (int i = 0; i < childSize1; i++) { 515 if (childDicNodes1[i]->hasChildren()) { 516 DicNodeVector childDicNodes2; 517 DicNodeUtils::getProximityChildDicNodes( 518 childDicNodes1[i], traverseSession->getBinaryDictionaryInfo(), 519 traverseSession->getProximityInfoState(0), pointIndex, false, &childDicNodes2); 520 const int childSize2 = childDicNodes2.getSizeAndLock(); 521 for (int j = 0; j < childSize2; j++) { 522 DicNode *const childDicNode2 = childDicNodes2[j]; 523 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TRANSPOSITION, 524 traverseSession, childDicNodes1[i], childDicNode2, 0 /* multiBigramMap */); 525 processExpandedDicNode(traverseSession, childDicNode2); 526 } 527 } 528 DicNode::managedDelete(childDicNodes1[i]); 529 } 530} 531 532/** 533 * Weight child node by aligning it to the key 534 */ 535void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const { 536 const int inputSize = traverseSession->getInputSize(); 537 if (dicNode->isCompletion(inputSize)) { 538 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_COMPLETION, traverseSession, 539 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */); 540 } else { // completion 541 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_MATCH, traverseSession, 542 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */); 543 } 544} 545 546/** 547 * Creates a new dicNode that represents a space insertion at the end of the input dicNode. Also 548 * incorporates the unigram / bigram score for the ending word into the new dicNode. 549 */ 550void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode, 551 const bool spaceSubstitution) const { 552 if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode)) { 553 return; 554 } 555 556 // Create a non-cached node here. 557 DicNode newDicNode; 558 DicNodeUtils::initAsRootWithPreviousWord( 559 traverseSession->getBinaryDictionaryInfo(), dicNode, &newDicNode); 560 const CorrectionType correctionType = spaceSubstitution ? 561 CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMITTION; 562 Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode, 563 &newDicNode, traverseSession->getMultiBigramMap()); 564 if (newDicNode.getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { 565 // newDicNode is worth continuing to traverse. 566 // CAVEAT: This pruning is important for speed. Remove this when we can afford not to prune 567 // here because here is not the right place to do pruning. Pruning should take place only 568 // in DicNodePriorityQueue. 569 traverseSession->getDicTraverseCache()->copyPushNextActive(&newDicNode); 570 } 571} 572} // namespace latinime 573