195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/* 295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Copyright (C) 2012 The Android Open Source Project 395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * 495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Licensed under the Apache License, Version 2.0 (the "License"); 595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * you may not use this file except in compliance with the License. 695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * You may obtain a copy of the License at 795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * 895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * http://www.apache.org/licenses/LICENSE-2.0 995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * 1095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Unless required by applicable law or agreed to in writing, software 1195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * distributed under the License is distributed on an "AS IS" BASIS, 1295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * See the License for the specific language governing permissions and 1495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * limitations under the License. 1595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */ 1695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 17b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/suggest.h" 18b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa 19b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/dicnode/dic_node.h" 20b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/dicnode/dic_node_priority_queue.h" 21b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/dicnode/dic_node_vector.h" 226abdafc67165977b47d7fa7ae176ebe9b3b007efKeisuke Kuroyanagi#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h" 23a65c267b1f1207e54c6f821148c600e3899b7f9cKen Wakasa#include "suggest/core/dictionary/dictionary.h" 24a65c267b1f1207e54c6f821148c600e3899b7f9cKen Wakasa#include "suggest/core/dictionary/digraph_utils.h" 25b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/dictionary/shortcut_utils.h" 2629432f843a8cd6ffb2be286104964592e80d77c9Ken Wakasa#include "suggest/core/layout/proximity_info.h" 27d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" 28b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/policy/scoring.h" 29b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/policy/traversal.h" 30b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/policy/weighting.h" 31b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/session/dic_traverse_session.h" 3295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 3395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokanamespace latinime { 3495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 3595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka// Initialization of class constants. 3695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokaconst int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; 3795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokaconst int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2; 3895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokaconst float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f; 3995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 4095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/** 4195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates 4295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * whether to prematurely commit the suggested words up to the given point for sentence-level 4395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * suggestion. 4495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * 4595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Note: Currently does not support concurrent calls across threads. Continuous suggestion is 4695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * automatically activated for sequential calls that share the same starting input. 4795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * TODO: Stop detecting continuous suggestion. Start using traverseSession instead. 4895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */ 4995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokaint Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, 5095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints, 5195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka int inputSize, int commitPoint, int *outWords, int *frequencies, int *outputIndices, 52bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard int *outputTypes, int *outputAutoCommitFirstWordConfidence) const { 5395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka PROF_OPEN; 5495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka PROF_START(0); 5595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance(); 5695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession); 5795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times, 5895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka pointerIds, maxSpatialDistance, TRAVERSAL->getMaxPointerCount()); 5995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // TODO: Add the way to evaluate cache 6095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 6195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka initializeSearch(tSession, commitPoint); 6295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka PROF_END(0); 6395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka PROF_START(1); 6495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 6595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // keep expanding search dicNodes until all have terminated. 6695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka while (tSession->getDicTraverseCache()->activeSize() > 0) { 6795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka expandCurrentDicNodes(tSession); 6895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka tSession->getDicTraverseCache()->advanceActiveDicNodes(); 6995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka tSession->getDicTraverseCache()->advanceInputIndex(inputSize); 7095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 7195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka PROF_END(1); 7295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka PROF_START(2); 73bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard const int size = outputSuggestions(tSession, frequencies, outWords, outputIndices, outputTypes, 74bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard outputAutoCommitFirstWordConfidence); 7595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka PROF_END(2); 7695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka PROF_CLOSE; 7795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka return size; 7895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} 7995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 8095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/** 8195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Initializes the search at the root of the lexicon trie. Note that when possible the search will 8295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * continue suggestion from where it left off during the last call. 8395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */ 8495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const { 8595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (!traverseSession->getProximityInfoState(0)->isUsed()) { 8695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka return; 8795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 88a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi 89a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi // Never auto partial commit for now. 90a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi commitPoint = 0; 9195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 9295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE 9395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka && traverseSession->isContinuousSuggestionPossible()) { 9495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (commitPoint == 0) { 9595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Continue suggestion 9695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka traverseSession->getDicTraverseCache()->continueSearch(); 9795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } else { 9895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Continue suggestion after partial commit. 9995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode *topDicNode = 10095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint); 10195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos()); 10295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka traverseSession->getDicTraverseCache()->continueSearch(); 10395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka traverseSession->setPartiallyCommited(); 10495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 10595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } else { 10695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Restart recognition at the root. 1074447b14b785652e36adca329f5cddf986bfd14faKeisuke Kuroynagi traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(traverseSession->getInputSize()), 1084447b14b785652e36adca329f5cddf986bfd14faKeisuke Kuroynagi MAX_RESULTS); 10995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Create a new dic node here 11095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode rootNode; 111d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(), 1120ecfb9424754341d7ee41557fc1f913cb6ca79c2Keisuke Kuroyanagi traverseSession->getPrevWordPos(), &rootNode); 11395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); 11495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 11595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} 11695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 11795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/** 11895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Outputs the final list of suggestions (i.e., terminal nodes). 11995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */ 12095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokaint Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, 121bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes, 122bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard int *outputAutoCommitFirstWordConfidence) const { 1238b3009ef4875e37cffbc5ccee532e4e77a34fd36Satoshi Kataoka#if DEBUG_EVALUATE_MOST_PROBABLE_STRING 1248b3009ef4875e37cffbc5ccee532e4e77a34fd36Satoshi Kataoka const int terminalSize = 0; 1258b3009ef4875e37cffbc5ccee532e4e77a34fd36Satoshi Kataoka#else 12695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const int terminalSize = min(MAX_RESULTS, 12795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka static_cast<int>(traverseSession->getDicTraverseCache()->terminalSize())); 1288b3009ef4875e37cffbc5ccee532e4e77a34fd36Satoshi Kataoka#endif 12995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array 13095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 13195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka for (int index = terminalSize - 1; index >= 0; --index) { 13295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]); 13395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 13495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 13595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const float languageWeight = SCORING->getAdjustedLanguageWeight( 13695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka traverseSession, terminals, terminalSize); 13795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 13895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka int outputWordIndex = 0; 13995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Insert most probable word at index == 0 as long as there is one terminal at least 14095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const bool hasMostProbableString = 14195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight, 14295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka &outputCodePoints[0], &outputTypes[0], &frequencies[0]); 14395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (hasMostProbableString) { 1444e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX; 14595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka ++outputWordIndex; 14695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 14795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 14895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Initial value of the loop index for terminal nodes (words) 14995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka int doubleLetterTerminalIndex = -1; 15095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DoubleLetterLevel doubleLetterLevel = NOT_A_DOUBLE_LETTER; 15195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka SCORING->searchWordWithDoubleLetter(terminals, terminalSize, 15295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka &doubleLetterTerminalIndex, &doubleLetterLevel); 15395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 15495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka int maxScore = S_INT_MIN; 155a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi // Force autocorrection for obvious long multi-word suggestions when the top suggestion is 156a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi // a long multiple words suggestion. 157a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi // TODO: Implement a smarter auto-commit method for handling multi-word suggestions. 158a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi // traverseSession->isPartiallyCommited() always returns false because we never auto partial 159a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi // commit for now. 160a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi const bool forceCommitMultiWords = (terminalSize > 0) ? 161a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi TRAVERSAL->autoCorrectsToMultiWordSuggestionIfTop() 162a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi && (traverseSession->isPartiallyCommited() 163a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi || (traverseSession->getInputSize() 164a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT 165a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi && terminals[0].hasMultipleWords())) : false; 1664e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi // TODO: have partial commit work even with multiple pointers. 1674e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi const bool outputSecondWordFirstLetterInputIndex = 1684e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi traverseSession->isOnlyOnePointerUsed(0 /* pointerId */); 169459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard if (terminalSize > 0) { 170459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // If we have no suggestions, don't write this 171459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard outputAutoCommitFirstWordConfidence[0] = 172459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard computeFirstWordConfidence(&terminals[0]); 173459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard } 174bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard 17595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Output suggestion results here 17695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS; 17795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka ++terminalIndex) { 17895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode *terminalDicNode = &terminals[terminalIndex]; 17995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (DEBUG_GEO_FULL) { 18095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka terminalDicNode->dump("OUT:"); 18195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 18295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const float doubleLetterCost = SCORING->getDoubleLetterDemotionDistanceCost( 18395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel); 18495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight) 18595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka + doubleLetterCost; 18665d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi const bool isPossiblyOffensiveWord = 18765d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi traverseSession->getDictionaryStructurePolicy()->getProbability( 18865d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0; 18999e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard const bool isExactMatch = terminalDicNode->isExactMatch(); 1905a346afab96186dc09aeed390b2cc70f8ec439d0Satoshi Kataoka const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase(); 1915a346afab96186dc09aeed390b2cc70f8ec439d0Satoshi Kataoka // Heuristic: We exclude freq=0 first-char-uppercase words from exact match. 1925a346afab96186dc09aeed390b2cc70f8ec439d0Satoshi Kataoka // (e.g. "AMD" and "and") 1935a346afab96186dc09aeed390b2cc70f8ec439d0Satoshi Kataoka const bool isSafeExactMatch = isExactMatch 1945a346afab96186dc09aeed390b2cc70f8ec439d0Satoshi Kataoka && !(isPossiblyOffensiveWord && isFirstCharUppercase); 19599e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard const int outputTypeFlags = 1965a346afab96186dc09aeed390b2cc70f8ec439d0Satoshi Kataoka (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) 1975a346afab96186dc09aeed390b2cc70f8ec439d0Satoshi Kataoka | (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0); 19899e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard 19999e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard // Entries that are blacklisted or do not represent a word should not be output. 2005b03213db13c670e37b15b8c813c91ebb232ead9Keisuke Kuroynagi const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord(); 20195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 20295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Increase output score of top typing suggestion to ensure autocorrection. 20395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // TODO: Better integration with java side autocorrection logic. 20495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const int finalScore = SCORING->calculateFinalScore( 20595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka compoundDistance, traverseSession->getInputSize(), 20634047d8905fbd2cbe4c99618aab105556ebee9edKeisuke Kuroynagi terminalDicNode->isExactMatch() 20734047d8905fbd2cbe4c99618aab105556ebee9edKeisuke Kuroynagi || (forceCommitMultiWords && terminalDicNode->hasMultipleWords()) 20834047d8905fbd2cbe4c99618aab105556ebee9edKeisuke Kuroynagi || (isValidWord && SCORING->doesAutoCorrectValidWord())); 2094e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi if (maxScore < finalScore && isValidWord) { 2104e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi maxScore = finalScore; 21195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 21295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 21399e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard // Don't output invalid words. However, we still need to submit their shortcuts if any. 21495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (isValidWord) { 21599e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags; 21695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka frequencies[outputWordIndex] = finalScore; 2174e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi if (outputSecondWordFirstLetterInputIndex) { 2184e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi outputIndicesToPartialCommit[outputWordIndex] = 2194e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi terminalDicNode->getSecondWordFirstInputIndex( 2204e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi traverseSession->getProximityInfoState(0)); 2214e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi } else { 2224e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX; 2234e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi } 22495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Populate the outputChars array with the suggested word. 22595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const int startIndex = outputWordIndex * MAX_WORD_LENGTH; 22695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka terminalDicNode->outputResult(&outputCodePoints[startIndex]); 22795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka ++outputWordIndex; 22895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 22995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 2309a4f7a3761684ee2122485c7ae111f6287d105d6Keisuke Kuroynagi if (!terminalDicNode->hasMultipleWords()) { 2316abdafc67165977b47d7fa7ae176ebe9b3b007efKeisuke Kuroyanagi BinaryDictionaryShortcutIterator shortcutIt( 232d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(), 233d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi traverseSession->getDictionaryStructurePolicy() 23477ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi ->getShortcutPositionOfPtNode(terminalDicNode->getPos())); 2359a4f7a3761684ee2122485c7ae111f6287d105d6Keisuke Kuroynagi // Shortcut is not supported for multiple words suggestions. 2369a4f7a3761684ee2122485c7ae111f6287d105d6Keisuke Kuroynagi // TODO: Check shortcuts during traversal for multiple words suggestions. 2379a4f7a3761684ee2122485c7ae111f6287d105d6Keisuke Kuroynagi const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); 2384e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt, 2394e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi outputWordIndex, finalScore, outputCodePoints, frequencies, outputTypes, 2404e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi sameAsTyped); 2414e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex( 2424e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi traverseSession->getProximityInfoState(0)); 2434e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi for (int i = outputWordIndex; i < updatedOutputWordIndex; ++i) { 2444e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi if (outputSecondWordFirstLetterInputIndex) { 2454e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi outputIndicesToPartialCommit[i] = secondWordFirstInputIndex; 2464e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi } else { 2474e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi outputIndicesToPartialCommit[i] = NOT_AN_INDEX; 2484e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi } 2494e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi } 2504e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi outputWordIndex = updatedOutputWordIndex; 2519a4f7a3761684ee2122485c7ae111f6287d105d6Keisuke Kuroynagi } 25295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode::managedDelete(terminalDicNode); 25395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 25495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 25595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (hasMostProbableString) { 25695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka SCORING->safetyNetForMostProbableString(terminalSize, maxScore, 25795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka &outputCodePoints[0], &frequencies[0]); 25895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 25995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka return outputWordIndex; 26095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} 26195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 262459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalardint Suggest::computeFirstWordConfidence(const DicNode *const terminalDicNode) const { 263459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // Get the number of spaces in the first suggestion 264459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard const int spaceCount = terminalDicNode->getTotalNodeSpaceCount(); 265459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // Get the number of characters in the first suggestion 266459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard const int length = terminalDicNode->getTotalNodeCodePointCount(); 267459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // Get the distance for the first word of the suggestion 268459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord(); 269459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard 270459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000. 271459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or 272459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means 273459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // we are very confident. 274459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // Expected space count is 1 ~ 5 275459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard static const int MIN_EXPECTED_SPACE_COUNT = 1; 276459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard static const int MAX_EXPECTED_SPACE_COUNT = 5; 277459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // Expected length is about 4 ~ 30 278459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard static const int MIN_EXPECTED_LENGTH = 4; 279459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard static const int MAX_EXPECTED_LENGTH = 30; 280459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0 281459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard static const float MIN_EXPECTED_DISTANCE = 0.0; 282459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard static const float MAX_EXPECTED_DISTANCE = 2.0; 283459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // This is not strict: it's where most stuff will be falling, but it's still fine if it's 284459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // outside these values. We want to output a value that reflects all of these. Each factor 285459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // contributes a bit. 286459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard 287459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // We need at least a space. 288459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE; 289459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard 290459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0 291459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the 292459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // weight of the distance. Clamp to avoid overflows. 293459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE 294459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance; 295459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT 296459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard * (MAX_EXPECTED_DISTANCE - clampedDistance) 297459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE); 298459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no 299459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the 300459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp. 301459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT 302459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH); 303459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no 304459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the 305459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard // weight of the space count. 306459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT 307459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard * (spaceCount - MIN_EXPECTED_SPACE_COUNT) 308459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT); 309459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard 310459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard return distanceContribution + lengthContribution + spaceContribution; 311bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard} 312bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard 31395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/** 31495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Expands the dicNodes in the current search priority queue by advancing to the possible child 31595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * nodes based on the next touch point(s) (or no touch points for lookahead) 31695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */ 31795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { 31895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const int inputSize = traverseSession->getInputSize(); 31995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNodeVector childDicNodes(TRAVERSAL->getDefaultExpandDicNodeSize()); 320fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang DicNode correctionDicNode; 32195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 32295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // TODO: Find more efficient caching 32395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const bool shouldDepthLevelCache = TRAVERSAL->shouldDepthLevelCache(traverseSession); 32495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (shouldDepthLevelCache) { 32595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka traverseSession->getDicTraverseCache()->updateLastCachedInputIndex(); 32695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 32795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (DEBUG_CACHE) { 32895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka AKLOGI("expandCurrentDicNodes depth level cache = %d, inputSize = %d", 32995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka shouldDepthLevelCache, inputSize); 33095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 33195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka while (traverseSession->getDicTraverseCache()->activeSize() > 0) { 33295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode dicNode; 33395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka traverseSession->getDicTraverseCache()->popActive(&dicNode); 33495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (dicNode.isTotalInputSizeExceedingLimit()) { 33595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka return; 33695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 33795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka childDicNodes.clear(); 33895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const int point0Index = dicNode.getInputIndex(0); 33995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const bool canDoLookAheadCorrection = 34095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka TRAVERSAL->canDoLookAheadCorrection(traverseSession, &dicNode); 34195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const bool isLookAheadCorrection = canDoLookAheadCorrection 34295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka && traverseSession->getDicTraverseCache()-> 34395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka isLookAheadCorrectionInputIndex(static_cast<int>(point0Index)); 34495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const bool isCompletion = dicNode.isCompletion(inputSize); 34595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 34695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const bool shouldNodeLevelCache = 34795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka TRAVERSAL->shouldNodeLevelCache(traverseSession, &dicNode); 34895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (shouldDepthLevelCache || shouldNodeLevelCache) { 34995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (DEBUG_CACHE) { 35095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka dicNode.dump("PUSH_CACHE"); 35195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 35295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka traverseSession->getDicTraverseCache()->copyPushContinue(&dicNode); 35395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka dicNode.setCached(); 35495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 35595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 356fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang if (dicNode.isInDigraph()) { 357fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang // Finish digraph handling if the node is in the middle of a digraph expansion. 358fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang processDicNodeAsDigraph(traverseSession, &dicNode); 359fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang } else if (isLookAheadCorrection) { 36095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // The algorithm maintains a small set of "deferred" nodes that have not consumed the 36195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // latest touch point yet. These are needed to apply look-ahead correction operations 36295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // that require special handling of the latest touch point. For example, with insertions 36395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // (e.g., "thiis" -> "this") the latest touch point should not be consumed at all. 364252412d7eb4573f91588b06b0fe49ef9f0ac38acSatoshi Kataoka processDicNodeAsTransposition(traverseSession, &dicNode); 365252412d7eb4573f91588b06b0fe49ef9f0ac38acSatoshi Kataoka processDicNodeAsInsertion(traverseSession, &dicNode); 36695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } else { // !isLookAheadCorrection 36795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Only consider typing error corrections if the normalized compound distance is 36895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // below a spatial distance threshold. 36995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // NOTE: the threshold may need to be updated if scoring model changes. 37095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // TODO: Remove. Do not prune node here. 37195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode); 37295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Process for handling space substitution (e.g., hevis => he is) 37395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (allowsErrorCorrections 37495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka && TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) { 37595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */); 37695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 37795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 37895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNodeUtils::getAllChildDicNodes( 379d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi &dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes); 38095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 38195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const int childDicNodesSize = childDicNodes.getSizeAndLock(); 38295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka for (int i = 0; i < childDicNodesSize; ++i) { 38395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode *const childDicNode = childDicNodes[i]; 38495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (isCompletion) { 38595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Handle forward lookahead when the lexicon letter exceeds the input size. 38695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka processDicNodeAsMatch(traverseSession, childDicNode); 38795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka continue; 38895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 389bd0d1afdb28a28e2ddac1409208c59ba64350399Keisuke Kuroynagi if (DigraphUtils::hasDigraphForCodePoint( 39076e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi traverseSession->getDictionaryStructurePolicy() 39176e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi ->getHeaderStructurePolicy(), 392fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang childDicNode->getNodeCodePoint())) { 393fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang correctionDicNode.initByCopy(childDicNode); 394fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang correctionDicNode.advanceDigraphIndex(); 395fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang processDicNodeAsDigraph(traverseSession, &correctionDicNode); 396fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang } 397fd02ec10f0a0374096e88fa30a0e126d6ff11c72Tom Ouyang if (TRAVERSAL->isOmission(traverseSession, &dicNode, childDicNode, 398fd02ec10f0a0374096e88fa30a0e126d6ff11c72Tom Ouyang allowsErrorCorrections)) { 39995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // TODO: (Gesture) Change weight between omission and substitution errors 40095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // TODO: (Gesture) Terminal node should not be handled as omission 401fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang correctionDicNode.initByCopy(childDicNode); 402fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang processDicNodeAsOmission(traverseSession, &correctionDicNode); 40395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 40495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const ProximityType proximityType = TRAVERSAL->getProximityType( 40595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka traverseSession, &dicNode, childDicNode); 40695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka switch (proximityType) { 40795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // TODO: Consider the difference of proximityType here 40895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka case MATCH_CHAR: 40995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka case PROXIMITY_CHAR: 41095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka processDicNodeAsMatch(traverseSession, childDicNode); 41195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka break; 41295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka case ADDITIONAL_PROXIMITY_CHAR: 41395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (allowsErrorCorrections) { 41495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka processDicNodeAsAdditionalProximityChar(traverseSession, &dicNode, 41595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka childDicNode); 41695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 41795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka break; 41895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka case SUBSTITUTION_CHAR: 41995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (allowsErrorCorrections) { 42095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka processDicNodeAsSubstitution(traverseSession, &dicNode, childDicNode); 42195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 42295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka break; 42395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka case UNRELATED_CHAR: 42495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Just drop this node and do nothing. 42595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka break; 42695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka default: 42795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Just drop this node and do nothing. 42895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka break; 42995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 43095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 43195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 43295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Push the node for look-ahead correction 43395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (allowsErrorCorrections && canDoLookAheadCorrection) { 43495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode); 43595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 43695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 43795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 43895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} 43995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 44095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processTerminalDicNode( 44195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicTraverseSession *traverseSession, DicNode *dicNode) const { 44295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { 44395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka return; 44495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 44595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (!dicNode->isTerminalWordNode()) { 44695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka return; 44795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 4483e954347e3a7b381d7e94feb002e158f3bc69a32Jean Chalard if (dicNode->shouldBeFilteredBySafetyNetForBigram()) { 44995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka return; 45095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 45195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Create a non-cached node here. 45295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode terminalDicNode; 45395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNodeUtils::initByCopy(dicNode, &terminalDicNode); 45475322cecb9fe02a0914b65c859cfbc2f4e1f70d9Satoshi Kataoka if (TRAVERSAL->needsToTraverseAllUserInput() 45575322cecb9fe02a0914b65c859cfbc2f4e1f70d9Satoshi Kataoka && dicNode->getInputIndex(0) < traverseSession->getInputSize()) { 45675322cecb9fe02a0914b65c859cfbc2f4e1f70d9Satoshi Kataoka Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL_INSERTION, traverseSession, 0, 45775322cecb9fe02a0914b65c859cfbc2f4e1f70d9Satoshi Kataoka &terminalDicNode, traverseSession->getMultiBigramMap()); 45875322cecb9fe02a0914b65c859cfbc2f4e1f70d9Satoshi Kataoka } 45995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL, traverseSession, 0, 4609559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang &terminalDicNode, traverseSession->getMultiBigramMap()); 46195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka traverseSession->getDicTraverseCache()->copyPushTerminal(&terminalDicNode); 46295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} 46395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 46495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/** 46595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word 46695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * (by the space omission error correction) search path if input dicNode is on a terminal node. 46795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */ 46895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processExpandedDicNode( 46995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicTraverseSession *traverseSession, DicNode *dicNode) const { 47095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka processTerminalDicNode(traverseSession, dicNode); 47195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (dicNode->getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { 47295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (TRAVERSAL->isSpaceOmissionTerminal(traverseSession, dicNode)) { 47395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka createNextWordDicNode(traverseSession, dicNode, false /* spaceSubstitution */); 47495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 47595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const int allowsLookAhead = !(dicNode->hasMultipleWords() 47695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka && dicNode->isCompletion(traverseSession->getInputSize())); 47795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (dicNode->hasChildren() && allowsLookAhead) { 47895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka traverseSession->getDicTraverseCache()->copyPushNextActive(dicNode); 47995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 48095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 48195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode::managedDelete(dicNode); 48295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} 48395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 48495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processDicNodeAsMatch(DicTraverseSession *traverseSession, 48595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode *childDicNode) const { 48695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka weightChildNode(traverseSession, childDicNode); 48795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka processExpandedDicNode(traverseSession, childDicNode); 48895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} 48995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 49095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processDicNodeAsAdditionalProximityChar(DicTraverseSession *traverseSession, 49195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode *dicNode, DicNode *childDicNode) const { 4929559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang // Note: Most types of corrections don't need to look up the bigram information since they do 4939559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang // not treat the node as a terminal. There is no need to pass the bigram map in these cases. 49495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_ADDITIONAL_PROXIMITY, 4959559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang traverseSession, dicNode, childDicNode, 0 /* multiBigramMap */); 49695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka weightChildNode(traverseSession, childDicNode); 49795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka processExpandedDicNode(traverseSession, childDicNode); 49895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} 49995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 50095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession, 50195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode *dicNode, DicNode *childDicNode) const { 50295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_SUBSTITUTION, traverseSession, 5039559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang dicNode, childDicNode, 0 /* multiBigramMap */); 50495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka weightChildNode(traverseSession, childDicNode); 50595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka processExpandedDicNode(traverseSession, childDicNode); 50695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} 50795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 508fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang// Process the node codepoint as a digraph. This means that composite glyphs like the German 509fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang// u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with 510fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang// the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber". 511fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyangvoid Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession, 512fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang DicNode *childDicNode) const { 513fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang weightChildNode(traverseSession, childDicNode); 514fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang childDicNode->advanceDigraphIndex(); 515fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang processExpandedDicNode(traverseSession, childDicNode); 516fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang} 517fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang 51895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/** 51995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider 52095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * matches for all possible next letters. Note that just skipping the current letter without any 52195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check 52295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * the possible *next* letters after the omission to better limit search to plausible omissions. 52395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Note that apostrophes are handled as omissions. 52495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */ 52595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processDicNodeAsOmission( 52695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicTraverseSession *traverseSession, DicNode *dicNode) const { 52795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNodeVector childDicNodes; 5280ecfb9424754341d7ee41557fc1f913cb6ca79c2Keisuke Kuroyanagi DicNodeUtils::getAllChildDicNodes( 529d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes); 53095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 53195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const int size = childDicNodes.getSizeAndLock(); 53295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka for (int i = 0; i < size; i++) { 53395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode *const childDicNode = childDicNodes[i]; 53460a169f0c34bf0da89c420d99bfff5c2556f3fbfKeisuke Kuroynagi // Treat this word as omission 53560a169f0c34bf0da89c420d99bfff5c2556f3fbfKeisuke Kuroynagi Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_OMISSION, traverseSession, 5369559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang dicNode, childDicNode, 0 /* multiBigramMap */); 53795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka weightChildNode(traverseSession, childDicNode); 53895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (!TRAVERSAL->isPossibleOmissionChildNode(traverseSession, dicNode, childDicNode)) { 53995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka continue; 54095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 54195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka processExpandedDicNode(traverseSession, childDicNode); 54295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 54395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} 54495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 54595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/** 54695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Handle the dicNode as an insertion error (e.g., thiis => this). Skip the current touch point and 54795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * consider matches for the next touch point. 54895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */ 54995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession, 55095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode *dicNode) const { 55195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const int16_t pointIndex = dicNode->getInputIndex(0); 55295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNodeVector childDicNodes; 5537fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(), 5547fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi &childDicNodes); 55595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const int size = childDicNodes.getSizeAndLock(); 55695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka for (int i = 0; i < size; i++) { 5577fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi if (traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex + 1) 5587fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi != childDicNodes[i]->getNodeCodePoint()) { 5597fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi continue; 5607fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi } 56195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode *const childDicNode = childDicNodes[i]; 56295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_INSERTION, traverseSession, 5639559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang dicNode, childDicNode, 0 /* multiBigramMap */); 56495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka processExpandedDicNode(traverseSession, childDicNode); 56595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 56695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} 56795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 56895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/** 56995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Handle the dicNode as a transposition error (e.g., thsi => this). Swap the next two touch points. 57095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */ 57195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession, 57295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode *dicNode) const { 57395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const int16_t pointIndex = dicNode->getInputIndex(0); 57495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNodeVector childDicNodes1; 5757fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(), 5767fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi &childDicNodes1); 57795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const int childSize1 = childDicNodes1.getSizeAndLock(); 57895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka for (int i = 0; i < childSize1; i++) { 5797fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi const ProximityType matchedId1 = traverseSession->getProximityInfoState(0) 5807fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi ->getProximityType(pointIndex + 1, childDicNodes1[i]->getNodeCodePoint(), 5817fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi true /* checkProximityChars */); 5827fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId1)) { 5837fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi continue; 5847fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi } 58595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (childDicNodes1[i]->hasChildren()) { 58695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNodeVector childDicNodes2; 5877fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi DicNodeUtils::getAllChildDicNodes(childDicNodes1[i], 5887fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi traverseSession->getDictionaryStructurePolicy(), &childDicNodes2); 58995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const int childSize2 = childDicNodes2.getSizeAndLock(); 59095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka for (int j = 0; j < childSize2; j++) { 59195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode *const childDicNode2 = childDicNodes2[j]; 5927fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi const ProximityType matchedId2 = traverseSession->getProximityInfoState(0) 5937fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi ->getProximityType(pointIndex, childDicNode2->getNodeCodePoint(), 5947fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi true /* checkProximityChars */); 5957fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId2)) { 5967fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi continue; 5977fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi } 59895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TRANSPOSITION, 5999559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang traverseSession, childDicNodes1[i], childDicNode2, 0 /* multiBigramMap */); 60095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka processExpandedDicNode(traverseSession, childDicNode2); 60195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 60295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 60395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode::managedDelete(childDicNodes1[i]); 60495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 60595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} 60695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 60795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/** 60895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Weight child node by aligning it to the key 60995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */ 61095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const { 61195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const int inputSize = traverseSession->getInputSize(); 61295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (dicNode->isCompletion(inputSize)) { 61395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_COMPLETION, traverseSession, 6149559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */); 61595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } else { // completion 61695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_MATCH, traverseSession, 6179559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */); 61895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 61995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} 62095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 62195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/** 62295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Creates a new dicNode that represents a space insertion at the end of the input dicNode. Also 62395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * incorporates the unigram / bigram score for the ending word into the new dicNode. 62495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */ 62595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode, 62695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka const bool spaceSubstitution) const { 62795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode)) { 62895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka return; 62995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka } 63095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka 63195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka // Create a non-cached node here. 63295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka DicNode newDicNode; 6330ecfb9424754341d7ee41557fc1f913cb6ca79c2Keisuke Kuroyanagi DicNodeUtils::initAsRootWithPreviousWord( 634d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode); 635252412d7eb4573f91588b06b0fe49ef9f0ac38acSatoshi Kataoka const CorrectionType correctionType = spaceSubstitution ? 636da06e385f5f006bc891113847fbdf508376f7f34Jean Chalard CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMISSION; 637252412d7eb4573f91588b06b0fe49ef9f0ac38acSatoshi Kataoka Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode, 6389559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang &newDicNode, traverseSession->getMultiBigramMap()); 6392d3f2daf12643e57f15fc98c7fd61329513ca0cfKeisuke Kuroynagi if (newDicNode.getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { 6402d3f2daf12643e57f15fc98c7fd61329513ca0cfKeisuke Kuroynagi // newDicNode is worth continuing to traverse. 6412d3f2daf12643e57f15fc98c7fd61329513ca0cfKeisuke Kuroynagi // CAVEAT: This pruning is important for speed. Remove this when we can afford not to prune 6422d3f2daf12643e57f15fc98c7fd61329513ca0cfKeisuke Kuroynagi // here because here is not the right place to do pruning. Pruning should take place only 6432d3f2daf12643e57f15fc98c7fd61329513ca0cfKeisuke Kuroynagi // in DicNodePriorityQueue. 6442d3f2daf12643e57f15fc98c7fd61329513ca0cfKeisuke Kuroynagi traverseSession->getDicTraverseCache()->copyPushNextActive(&newDicNode); 6452d3f2daf12643e57f15fc98c7fd61329513ca0cfKeisuke Kuroynagi } 64695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} 64795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} // namespace latinime 648