195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/*
295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Copyright (C) 2012 The Android Open Source Project
395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka *
495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Licensed under the Apache License, Version 2.0 (the "License");
595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * you may not use this file except in compliance with the License.
695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * You may obtain a copy of the License at
795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka *
895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka *      http://www.apache.org/licenses/LICENSE-2.0
995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka *
1095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Unless required by applicable law or agreed to in writing, software
1195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * distributed under the License is distributed on an "AS IS" BASIS,
1295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * See the License for the specific language governing permissions and
1495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * limitations under the License.
1595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */
1695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
17b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/suggest.h"
18b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa
19b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/dicnode/dic_node.h"
20b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/dicnode/dic_node_priority_queue.h"
21b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/dicnode/dic_node_vector.h"
226abdafc67165977b47d7fa7ae176ebe9b3b007efKeisuke Kuroyanagi#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
23a65c267b1f1207e54c6f821148c600e3899b7f9cKen Wakasa#include "suggest/core/dictionary/dictionary.h"
24a65c267b1f1207e54c6f821148c600e3899b7f9cKen Wakasa#include "suggest/core/dictionary/digraph_utils.h"
25b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/dictionary/shortcut_utils.h"
2629432f843a8cd6ffb2be286104964592e80d77c9Ken Wakasa#include "suggest/core/layout/proximity_info.h"
27d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
28b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/policy/scoring.h"
29b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/policy/traversal.h"
30b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/policy/weighting.h"
31b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/session/dic_traverse_session.h"
3295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
3395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokanamespace latinime {
3495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
3595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka// Initialization of class constants.
3695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokaconst int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
3795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokaconst int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
3895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokaconst float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f;
3995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
4095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/**
4195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
4295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * whether to prematurely commit the suggested words up to the given point for sentence-level
4395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * suggestion.
4495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka *
4595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Note: Currently does not support concurrent calls across threads. Continuous suggestion is
4695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * automatically activated for sequential calls that share the same starting input.
4795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * TODO: Stop detecting continuous suggestion. Start using traverseSession instead.
4895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */
4995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokaint Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
5095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints,
5195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        int inputSize, int commitPoint, int *outWords, int *frequencies, int *outputIndices,
52bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard        int *outputTypes, int *outputAutoCommitFirstWordConfidence) const {
5395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    PROF_OPEN;
5495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    PROF_START(0);
5595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance();
5695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession);
5795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times,
5895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            pointerIds, maxSpatialDistance, TRAVERSAL->getMaxPointerCount());
5995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    // TODO: Add the way to evaluate cache
6095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
6195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    initializeSearch(tSession, commitPoint);
6295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    PROF_END(0);
6395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    PROF_START(1);
6495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
6595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    // keep expanding search dicNodes until all have terminated.
6695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    while (tSession->getDicTraverseCache()->activeSize() > 0) {
6795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        expandCurrentDicNodes(tSession);
6895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        tSession->getDicTraverseCache()->advanceActiveDicNodes();
6995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        tSession->getDicTraverseCache()->advanceInputIndex(inputSize);
7095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
7195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    PROF_END(1);
7295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    PROF_START(2);
73bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard    const int size = outputSuggestions(tSession, frequencies, outWords, outputIndices, outputTypes,
74bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard            outputAutoCommitFirstWordConfidence);
7595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    PROF_END(2);
7695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    PROF_CLOSE;
7795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    return size;
7895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka}
7995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
8095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/**
8195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Initializes the search at the root of the lexicon trie. Note that when possible the search will
8295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * continue suggestion from where it left off during the last call.
8395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */
8495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const {
8595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    if (!traverseSession->getProximityInfoState(0)->isUsed()) {
8695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        return;
8795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
88a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi
89a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi    // Never auto partial commit for now.
90a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi    commitPoint = 0;
9195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
9295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE
9395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            && traverseSession->isContinuousSuggestionPossible()) {
9495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        if (commitPoint == 0) {
9595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            // Continue suggestion
9695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            traverseSession->getDicTraverseCache()->continueSearch();
9795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        } else {
9895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            // Continue suggestion after partial commit.
9995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            DicNode *topDicNode =
10095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                    traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint);
10195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos());
10295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            traverseSession->getDicTraverseCache()->continueSearch();
10395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            traverseSession->setPartiallyCommited();
10495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        }
10595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    } else {
10695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        // Restart recognition at the root.
1074447b14b785652e36adca329f5cddf986bfd14faKeisuke Kuroynagi        traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(traverseSession->getInputSize()),
1084447b14b785652e36adca329f5cddf986bfd14faKeisuke Kuroynagi                MAX_RESULTS);
10995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        // Create a new dic node here
11095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        DicNode rootNode;
111d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi        DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
1120ecfb9424754341d7ee41557fc1f913cb6ca79c2Keisuke Kuroyanagi                traverseSession->getPrevWordPos(), &rootNode);
11395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
11495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
11595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka}
11695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
11795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/**
11895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Outputs the final list of suggestions (i.e., terminal nodes).
11995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */
12095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokaint Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies,
121bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard        int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes,
122bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard        int *outputAutoCommitFirstWordConfidence) const {
1238b3009ef4875e37cffbc5ccee532e4e77a34fd36Satoshi Kataoka#if DEBUG_EVALUATE_MOST_PROBABLE_STRING
1248b3009ef4875e37cffbc5ccee532e4e77a34fd36Satoshi Kataoka    const int terminalSize = 0;
1258b3009ef4875e37cffbc5ccee532e4e77a34fd36Satoshi Kataoka#else
12695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    const int terminalSize = min(MAX_RESULTS,
12795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            static_cast<int>(traverseSession->getDicTraverseCache()->terminalSize()));
1288b3009ef4875e37cffbc5ccee532e4e77a34fd36Satoshi Kataoka#endif
12995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array
13095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
13195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    for (int index = terminalSize - 1; index >= 0; --index) {
13295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]);
13395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
13495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
13595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    const float languageWeight = SCORING->getAdjustedLanguageWeight(
13695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            traverseSession, terminals, terminalSize);
13795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
13895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    int outputWordIndex = 0;
13995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    // Insert most probable word at index == 0 as long as there is one terminal at least
14095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    const bool hasMostProbableString =
14195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight,
14295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                    &outputCodePoints[0], &outputTypes[0], &frequencies[0]);
14395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    if (hasMostProbableString) {
1444e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi        outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
14595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        ++outputWordIndex;
14695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
14795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
14895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    // Initial value of the loop index for terminal nodes (words)
14995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    int doubleLetterTerminalIndex = -1;
15095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    DoubleLetterLevel doubleLetterLevel = NOT_A_DOUBLE_LETTER;
15195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    SCORING->searchWordWithDoubleLetter(terminals, terminalSize,
15295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            &doubleLetterTerminalIndex, &doubleLetterLevel);
15395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
15495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    int maxScore = S_INT_MIN;
155a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi    // Force autocorrection for obvious long multi-word suggestions when the top suggestion is
156a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi    // a long multiple words suggestion.
157a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi    // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
158a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi    // traverseSession->isPartiallyCommited() always returns false because we never auto partial
159a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi    // commit for now.
160a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi    const bool forceCommitMultiWords = (terminalSize > 0) ?
161a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi            TRAVERSAL->autoCorrectsToMultiWordSuggestionIfTop()
162a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi                    && (traverseSession->isPartiallyCommited()
163a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi                            || (traverseSession->getInputSize()
164a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi                                    >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
165a829188f54c0fd2e26192d98d9e56e033d8f91aaKeisuke Kuroynagi                                            && terminals[0].hasMultipleWords())) : false;
1664e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi    // TODO: have partial commit work even with multiple pointers.
1674e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi    const bool outputSecondWordFirstLetterInputIndex =
1684e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi            traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
169459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    if (terminalSize > 0) {
170459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard        // If we have no suggestions, don't write this
171459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard        outputAutoCommitFirstWordConfidence[0] =
172459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard                computeFirstWordConfidence(&terminals[0]);
173459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    }
174bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard
17595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    // Output suggestion results here
17695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
17795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            ++terminalIndex) {
17895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        DicNode *terminalDicNode = &terminals[terminalIndex];
17995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        if (DEBUG_GEO_FULL) {
18095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            terminalDicNode->dump("OUT:");
18195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        }
18295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        const float doubleLetterCost = SCORING->getDoubleLetterDemotionDistanceCost(
18395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel);
18495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
18595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                + doubleLetterCost;
18665d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi        const bool isPossiblyOffensiveWord =
18765d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi                traverseSession->getDictionaryStructurePolicy()->getProbability(
18865d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi                        terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
18999e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard        const bool isExactMatch = terminalDicNode->isExactMatch();
1905a346afab96186dc09aeed390b2cc70f8ec439d0Satoshi Kataoka        const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
1915a346afab96186dc09aeed390b2cc70f8ec439d0Satoshi Kataoka        // Heuristic: We exclude freq=0 first-char-uppercase words from exact match.
1925a346afab96186dc09aeed390b2cc70f8ec439d0Satoshi Kataoka        // (e.g. "AMD" and "and")
1935a346afab96186dc09aeed390b2cc70f8ec439d0Satoshi Kataoka        const bool isSafeExactMatch = isExactMatch
1945a346afab96186dc09aeed390b2cc70f8ec439d0Satoshi Kataoka                && !(isPossiblyOffensiveWord && isFirstCharUppercase);
19599e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard        const int outputTypeFlags =
1965a346afab96186dc09aeed390b2cc70f8ec439d0Satoshi Kataoka                (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
1975a346afab96186dc09aeed390b2cc70f8ec439d0Satoshi Kataoka                | (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
19899e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard
19999e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard        // Entries that are blacklisted or do not represent a word should not be output.
2005b03213db13c670e37b15b8c813c91ebb232ead9Keisuke Kuroynagi        const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
20195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
20295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        // Increase output score of top typing suggestion to ensure autocorrection.
20395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        // TODO: Better integration with java side autocorrection logic.
20495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        const int finalScore = SCORING->calculateFinalScore(
20595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                compoundDistance, traverseSession->getInputSize(),
20634047d8905fbd2cbe4c99618aab105556ebee9edKeisuke Kuroynagi                terminalDicNode->isExactMatch()
20734047d8905fbd2cbe4c99618aab105556ebee9edKeisuke Kuroynagi                        || (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
20834047d8905fbd2cbe4c99618aab105556ebee9edKeisuke Kuroynagi                                || (isValidWord && SCORING->doesAutoCorrectValidWord()));
2094e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi        if (maxScore < finalScore && isValidWord) {
2104e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi            maxScore = finalScore;
21195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        }
21295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
21399e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard        // Don't output invalid words. However, we still need to submit their shortcuts if any.
21495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        if (isValidWord) {
21599e998286d71cf698d0a809a29b15d1a231ebbb1Jean Chalard            outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
21695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            frequencies[outputWordIndex] = finalScore;
2174e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi            if (outputSecondWordFirstLetterInputIndex) {
2184e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi                outputIndicesToPartialCommit[outputWordIndex] =
2194e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi                        terminalDicNode->getSecondWordFirstInputIndex(
2204e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi                                traverseSession->getProximityInfoState(0));
2214e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi            } else {
2224e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi                outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
2234e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi            }
22495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            // Populate the outputChars array with the suggested word.
22595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
22695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            terminalDicNode->outputResult(&outputCodePoints[startIndex]);
22795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            ++outputWordIndex;
22895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        }
22995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
2309a4f7a3761684ee2122485c7ae111f6287d105d6Keisuke Kuroynagi        if (!terminalDicNode->hasMultipleWords()) {
2316abdafc67165977b47d7fa7ae176ebe9b3b007efKeisuke Kuroyanagi            BinaryDictionaryShortcutIterator shortcutIt(
232d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi                    traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
233d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi                    traverseSession->getDictionaryStructurePolicy()
23477ef75cbe6722d1eb45115c1ad82f963444d71cdKeisuke Kuroyanagi                            ->getShortcutPositionOfPtNode(terminalDicNode->getPos()));
2359a4f7a3761684ee2122485c7ae111f6287d105d6Keisuke Kuroynagi            // Shortcut is not supported for multiple words suggestions.
2369a4f7a3761684ee2122485c7ae111f6287d105d6Keisuke Kuroynagi            // TODO: Check shortcuts during traversal for multiple words suggestions.
2379a4f7a3761684ee2122485c7ae111f6287d105d6Keisuke Kuroynagi            const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
2384e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi            const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt,
2394e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi                    outputWordIndex,  finalScore, outputCodePoints, frequencies, outputTypes,
2404e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi                    sameAsTyped);
2414e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi            const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex(
2424e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi                    traverseSession->getProximityInfoState(0));
2434e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi            for (int i = outputWordIndex; i < updatedOutputWordIndex; ++i) {
2444e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi                if (outputSecondWordFirstLetterInputIndex) {
2454e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi                    outputIndicesToPartialCommit[i] = secondWordFirstInputIndex;
2464e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi                } else {
2474e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi                    outputIndicesToPartialCommit[i] = NOT_AN_INDEX;
2484e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi                }
2494e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi            }
2504e17b42d0fd104ec2cd3a79db2473863228ecb62Keisuke Kuroyanagi            outputWordIndex = updatedOutputWordIndex;
2519a4f7a3761684ee2122485c7ae111f6287d105d6Keisuke Kuroynagi        }
25295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        DicNode::managedDelete(terminalDicNode);
25395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
25495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
25595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    if (hasMostProbableString) {
25695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        SCORING->safetyNetForMostProbableString(terminalSize, maxScore,
25795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                &outputCodePoints[0], &frequencies[0]);
25895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
25995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    return outputWordIndex;
26095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka}
26195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
262459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalardint Suggest::computeFirstWordConfidence(const DicNode *const terminalDicNode) const {
263459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // Get the number of spaces in the first suggestion
264459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    const int spaceCount = terminalDicNode->getTotalNodeSpaceCount();
265459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // Get the number of characters in the first suggestion
266459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    const int length = terminalDicNode->getTotalNodeCodePointCount();
267459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // Get the distance for the first word of the suggestion
268459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord();
269459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard
270459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000.
271459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or
272459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means
273459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // we are very confident.
274459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // Expected space count is 1 ~ 5
275459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    static const int MIN_EXPECTED_SPACE_COUNT = 1;
276459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    static const int MAX_EXPECTED_SPACE_COUNT = 5;
277459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // Expected length is about 4 ~ 30
278459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    static const int MIN_EXPECTED_LENGTH = 4;
279459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    static const int MAX_EXPECTED_LENGTH = 30;
280459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0
281459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    static const float MIN_EXPECTED_DISTANCE = 0.0;
282459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    static const float MAX_EXPECTED_DISTANCE = 2.0;
283459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // This is not strict: it's where most stuff will be falling, but it's still fine if it's
284459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // outside these values. We want to output a value that reflects all of these. Each factor
285459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // contributes a bit.
286459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard
287459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // We need at least a space.
288459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE;
289459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard
290459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0
291459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the
292459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // weight of the distance. Clamp to avoid overflows.
293459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE
294459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard            : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance;
295459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT
296459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard            * (MAX_EXPECTED_DISTANCE - clampedDistance)
297459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard            / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE);
298459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no
299459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the
300459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp.
301459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT
302459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard            * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH);
303459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no
304459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the
305459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    // weight of the space count.
306459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT
307459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard            * (spaceCount - MIN_EXPECTED_SPACE_COUNT)
308459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard            / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT);
309459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard
310459cd6f8ef3eaa561e47dd996ce537770ea8b37aJean Chalard    return distanceContribution + lengthContribution + spaceContribution;
311bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard}
312bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42Jean Chalard
31395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/**
31495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Expands the dicNodes in the current search priority queue by advancing to the possible child
31595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * nodes based on the next touch point(s) (or no touch points for lookahead)
31695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */
31795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
31895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    const int inputSize = traverseSession->getInputSize();
31995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    DicNodeVector childDicNodes(TRAVERSAL->getDefaultExpandDicNodeSize());
320fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang    DicNode correctionDicNode;
32195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
32295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    // TODO: Find more efficient caching
32395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    const bool shouldDepthLevelCache = TRAVERSAL->shouldDepthLevelCache(traverseSession);
32495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    if (shouldDepthLevelCache) {
32595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        traverseSession->getDicTraverseCache()->updateLastCachedInputIndex();
32695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
32795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    if (DEBUG_CACHE) {
32895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        AKLOGI("expandCurrentDicNodes depth level cache = %d, inputSize = %d",
32995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                shouldDepthLevelCache, inputSize);
33095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
33195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    while (traverseSession->getDicTraverseCache()->activeSize() > 0) {
33295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        DicNode dicNode;
33395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        traverseSession->getDicTraverseCache()->popActive(&dicNode);
33495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        if (dicNode.isTotalInputSizeExceedingLimit()) {
33595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            return;
33695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        }
33795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        childDicNodes.clear();
33895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        const int point0Index = dicNode.getInputIndex(0);
33995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        const bool canDoLookAheadCorrection =
34095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                TRAVERSAL->canDoLookAheadCorrection(traverseSession, &dicNode);
34195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        const bool isLookAheadCorrection = canDoLookAheadCorrection
34295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                && traverseSession->getDicTraverseCache()->
34395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                        isLookAheadCorrectionInputIndex(static_cast<int>(point0Index));
34495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        const bool isCompletion = dicNode.isCompletion(inputSize);
34595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
34695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        const bool shouldNodeLevelCache =
34795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                TRAVERSAL->shouldNodeLevelCache(traverseSession, &dicNode);
34895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        if (shouldDepthLevelCache || shouldNodeLevelCache) {
34995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            if (DEBUG_CACHE) {
35095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                dicNode.dump("PUSH_CACHE");
35195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            }
35295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            traverseSession->getDicTraverseCache()->copyPushContinue(&dicNode);
35395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            dicNode.setCached();
35495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        }
35595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
356fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang        if (dicNode.isInDigraph()) {
357fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang            // Finish digraph handling if the node is in the middle of a digraph expansion.
358fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang            processDicNodeAsDigraph(traverseSession, &dicNode);
359fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang        } else if (isLookAheadCorrection) {
36095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            // The algorithm maintains a small set of "deferred" nodes that have not consumed the
36195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            // latest touch point yet. These are needed to apply look-ahead correction operations
36295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            // that require special handling of the latest touch point. For example, with insertions
36395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            // (e.g., "thiis" -> "this") the latest touch point should not be consumed at all.
364252412d7eb4573f91588b06b0fe49ef9f0ac38acSatoshi Kataoka            processDicNodeAsTransposition(traverseSession, &dicNode);
365252412d7eb4573f91588b06b0fe49ef9f0ac38acSatoshi Kataoka            processDicNodeAsInsertion(traverseSession, &dicNode);
36695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        } else { // !isLookAheadCorrection
36795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            // Only consider typing error corrections if the normalized compound distance is
36895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            // below a spatial distance threshold.
36995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            // NOTE: the threshold may need to be updated if scoring model changes.
37095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            // TODO: Remove. Do not prune node here.
37195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrections(&dicNode);
37295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            // Process for handling space substitution (e.g., hevis => he is)
37395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            if (allowsErrorCorrections
37495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                    && TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, &dicNode)) {
37595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                createNextWordDicNode(traverseSession, &dicNode, true /* spaceSubstitution */);
37695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            }
37795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
37895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            DicNodeUtils::getAllChildDicNodes(
379d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi                    &dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes);
38095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
38195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            const int childDicNodesSize = childDicNodes.getSizeAndLock();
38295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            for (int i = 0; i < childDicNodesSize; ++i) {
38395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                DicNode *const childDicNode = childDicNodes[i];
38495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                if (isCompletion) {
38595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                    // Handle forward lookahead when the lexicon letter exceeds the input size.
38695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                    processDicNodeAsMatch(traverseSession, childDicNode);
38795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                    continue;
38895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                }
389bd0d1afdb28a28e2ddac1409208c59ba64350399Keisuke Kuroynagi                if (DigraphUtils::hasDigraphForCodePoint(
39076e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi                        traverseSession->getDictionaryStructurePolicy()
39176e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi                                ->getHeaderStructurePolicy(),
392fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang                        childDicNode->getNodeCodePoint())) {
393fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang                    correctionDicNode.initByCopy(childDicNode);
394fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang                    correctionDicNode.advanceDigraphIndex();
395fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang                    processDicNodeAsDigraph(traverseSession, &correctionDicNode);
396fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang                }
397fd02ec10f0a0374096e88fa30a0e126d6ff11c72Tom Ouyang                if (TRAVERSAL->isOmission(traverseSession, &dicNode, childDicNode,
398fd02ec10f0a0374096e88fa30a0e126d6ff11c72Tom Ouyang                        allowsErrorCorrections)) {
39995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                    // TODO: (Gesture) Change weight between omission and substitution errors
40095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                    // TODO: (Gesture) Terminal node should not be handled as omission
401fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang                    correctionDicNode.initByCopy(childDicNode);
402fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang                    processDicNodeAsOmission(traverseSession, &correctionDicNode);
40395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                }
40495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                const ProximityType proximityType = TRAVERSAL->getProximityType(
40595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                        traverseSession, &dicNode, childDicNode);
40695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                switch (proximityType) {
40795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                    // TODO: Consider the difference of proximityType here
40895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                    case MATCH_CHAR:
40995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                    case PROXIMITY_CHAR:
41095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                        processDicNodeAsMatch(traverseSession, childDicNode);
41195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                        break;
41295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                    case ADDITIONAL_PROXIMITY_CHAR:
41395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                        if (allowsErrorCorrections) {
41495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                            processDicNodeAsAdditionalProximityChar(traverseSession, &dicNode,
41595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                                    childDicNode);
41695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                        }
41795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                        break;
41895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                    case SUBSTITUTION_CHAR:
41995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                        if (allowsErrorCorrections) {
42095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                            processDicNodeAsSubstitution(traverseSession, &dicNode, childDicNode);
42195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                        }
42295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                        break;
42395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                    case UNRELATED_CHAR:
42495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                        // Just drop this node and do nothing.
42595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                        break;
42695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                    default:
42795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                        // Just drop this node and do nothing.
42895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                        break;
42995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                }
43095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            }
43195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
43295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            // Push the node for look-ahead correction
43395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            if (allowsErrorCorrections && canDoLookAheadCorrection) {
43495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode);
43595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            }
43695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        }
43795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
43895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka}
43995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
44095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processTerminalDicNode(
44195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        DicTraverseSession *traverseSession, DicNode *dicNode) const {
44295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
44395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        return;
44495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
44595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    if (!dicNode->isTerminalWordNode()) {
44695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        return;
44795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
4483e954347e3a7b381d7e94feb002e158f3bc69a32Jean Chalard    if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
44995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        return;
45095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
45195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    // Create a non-cached node here.
45295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    DicNode terminalDicNode;
45395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    DicNodeUtils::initByCopy(dicNode, &terminalDicNode);
45475322cecb9fe02a0914b65c859cfbc2f4e1f70d9Satoshi Kataoka    if (TRAVERSAL->needsToTraverseAllUserInput()
45575322cecb9fe02a0914b65c859cfbc2f4e1f70d9Satoshi Kataoka            && dicNode->getInputIndex(0) < traverseSession->getInputSize()) {
45675322cecb9fe02a0914b65c859cfbc2f4e1f70d9Satoshi Kataoka        Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL_INSERTION, traverseSession, 0,
45775322cecb9fe02a0914b65c859cfbc2f4e1f70d9Satoshi Kataoka                &terminalDicNode, traverseSession->getMultiBigramMap());
45875322cecb9fe02a0914b65c859cfbc2f4e1f70d9Satoshi Kataoka    }
45995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL, traverseSession, 0,
4609559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang            &terminalDicNode, traverseSession->getMultiBigramMap());
46195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    traverseSession->getDicTraverseCache()->copyPushTerminal(&terminalDicNode);
46295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka}
46395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
46495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/**
46595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
46695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * (by the space omission error correction) search path if input dicNode is on a terminal node.
46795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */
46895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processExpandedDicNode(
46995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        DicTraverseSession *traverseSession, DicNode *dicNode) const {
47095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    processTerminalDicNode(traverseSession, dicNode);
47195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    if (dicNode->getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
47295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        if (TRAVERSAL->isSpaceOmissionTerminal(traverseSession, dicNode)) {
47395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            createNextWordDicNode(traverseSession, dicNode, false /* spaceSubstitution */);
47495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        }
47595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        const int allowsLookAhead = !(dicNode->hasMultipleWords()
47695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                && dicNode->isCompletion(traverseSession->getInputSize()));
47795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        if (dicNode->hasChildren() && allowsLookAhead) {
47895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            traverseSession->getDicTraverseCache()->copyPushNextActive(dicNode);
47995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        }
48095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
48195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    DicNode::managedDelete(dicNode);
48295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka}
48395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
48495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processDicNodeAsMatch(DicTraverseSession *traverseSession,
48595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        DicNode *childDicNode) const {
48695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    weightChildNode(traverseSession, childDicNode);
48795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    processExpandedDicNode(traverseSession, childDicNode);
48895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka}
48995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
49095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processDicNodeAsAdditionalProximityChar(DicTraverseSession *traverseSession,
49195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        DicNode *dicNode, DicNode *childDicNode) const {
4929559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang    // Note: Most types of corrections don't need to look up the bigram information since they do
4939559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang    // not treat the node as a terminal. There is no need to pass the bigram map in these cases.
49495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_ADDITIONAL_PROXIMITY,
4959559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang            traverseSession, dicNode, childDicNode, 0 /* multiBigramMap */);
49695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    weightChildNode(traverseSession, childDicNode);
49795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    processExpandedDicNode(traverseSession, childDicNode);
49895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka}
49995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
50095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession,
50195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        DicNode *dicNode, DicNode *childDicNode) const {
50295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_SUBSTITUTION, traverseSession,
5039559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang            dicNode, childDicNode, 0 /* multiBigramMap */);
50495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    weightChildNode(traverseSession, childDicNode);
50595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    processExpandedDicNode(traverseSession, childDicNode);
50695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka}
50795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
508fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang// Process the node codepoint as a digraph. This means that composite glyphs like the German
509fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang// u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with
510fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang// the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
511fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyangvoid Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
512fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang        DicNode *childDicNode) const {
513fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang    weightChildNode(traverseSession, childDicNode);
514fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang    childDicNode->advanceDigraphIndex();
515fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang    processExpandedDicNode(traverseSession, childDicNode);
516fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang}
517fd9f3d97aee97e9d2e5b9016ec61e120c1265b6aTom Ouyang
51895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/**
51995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
52095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * matches for all possible next letters. Note that just skipping the current letter without any
52195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check
52295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * the possible *next* letters after the omission to better limit search to plausible omissions.
52395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Note that apostrophes are handled as omissions.
52495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */
52595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processDicNodeAsOmission(
52695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        DicTraverseSession *traverseSession, DicNode *dicNode) const {
52795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    DicNodeVector childDicNodes;
5280ecfb9424754341d7ee41557fc1f913cb6ca79c2Keisuke Kuroyanagi    DicNodeUtils::getAllChildDicNodes(
529d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi            dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes);
53095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
53195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    const int size = childDicNodes.getSizeAndLock();
53295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    for (int i = 0; i < size; i++) {
53395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        DicNode *const childDicNode = childDicNodes[i];
53460a169f0c34bf0da89c420d99bfff5c2556f3fbfKeisuke Kuroynagi        // Treat this word as omission
53560a169f0c34bf0da89c420d99bfff5c2556f3fbfKeisuke Kuroynagi        Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_OMISSION, traverseSession,
5369559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang                dicNode, childDicNode, 0 /* multiBigramMap */);
53795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        weightChildNode(traverseSession, childDicNode);
53895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        if (!TRAVERSAL->isPossibleOmissionChildNode(traverseSession, dicNode, childDicNode)) {
53995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            continue;
54095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        }
54195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        processExpandedDicNode(traverseSession, childDicNode);
54295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
54395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka}
54495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
54595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/**
54695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Handle the dicNode as an insertion error (e.g., thiis => this). Skip the current touch point and
54795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * consider matches for the next touch point.
54895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */
54995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession,
55095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        DicNode *dicNode) const {
55195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    const int16_t pointIndex = dicNode->getInputIndex(0);
55295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    DicNodeVector childDicNodes;
5537fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi    DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(),
5547fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi            &childDicNodes);
55595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    const int size = childDicNodes.getSizeAndLock();
55695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    for (int i = 0; i < size; i++) {
5577fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi        if (traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex + 1)
5587fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi                != childDicNodes[i]->getNodeCodePoint()) {
5597fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi            continue;
5607fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi        }
56195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        DicNode *const childDicNode = childDicNodes[i];
56295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_INSERTION, traverseSession,
5639559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang                dicNode, childDicNode, 0 /* multiBigramMap */);
56495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        processExpandedDicNode(traverseSession, childDicNode);
56595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
56695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka}
56795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
56895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/**
56995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Handle the dicNode as a transposition error (e.g., thsi => this). Swap the next two touch points.
57095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */
57195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
57295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        DicNode *dicNode) const {
57395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    const int16_t pointIndex = dicNode->getInputIndex(0);
57495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    DicNodeVector childDicNodes1;
5757fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi    DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStructurePolicy(),
5767fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi            &childDicNodes1);
57795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    const int childSize1 = childDicNodes1.getSizeAndLock();
57895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    for (int i = 0; i < childSize1; i++) {
5797fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi        const ProximityType matchedId1 = traverseSession->getProximityInfoState(0)
5807fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi                ->getProximityType(pointIndex + 1, childDicNodes1[i]->getNodeCodePoint(),
5817fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi                        true /* checkProximityChars */);
5827fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi        if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId1)) {
5837fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi            continue;
5847fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi        }
58595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        if (childDicNodes1[i]->hasChildren()) {
58695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            DicNodeVector childDicNodes2;
5877fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi            DicNodeUtils::getAllChildDicNodes(childDicNodes1[i],
5887fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi                    traverseSession->getDictionaryStructurePolicy(), &childDicNodes2);
58995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            const int childSize2 = childDicNodes2.getSizeAndLock();
59095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            for (int j = 0; j < childSize2; j++) {
59195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                DicNode *const childDicNode2 = childDicNodes2[j];
5927fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi                const ProximityType matchedId2 = traverseSession->getProximityInfoState(0)
5937fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi                        ->getProximityType(pointIndex, childDicNode2->getNodeCodePoint(),
5947fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi                                true /* checkProximityChars */);
5957fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi                if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId2)) {
5967fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi                    continue;
5977fd9667d76cdc6febe32545865648ea90dc28904Keisuke Kuroyanagi                }
59895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TRANSPOSITION,
5999559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang                        traverseSession, childDicNodes1[i], childDicNode2, 0 /* multiBigramMap */);
60095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka                processExpandedDicNode(traverseSession, childDicNode2);
60195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka            }
60295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        }
60395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        DicNode::managedDelete(childDicNodes1[i]);
60495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
60595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka}
60695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
60795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/**
60895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Weight child node by aligning it to the key
60995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */
61095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const {
61195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    const int inputSize = traverseSession->getInputSize();
61295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    if (dicNode->isCompletion(inputSize)) {
61395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_COMPLETION, traverseSession,
6149559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang                0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */);
61595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    } else { // completion
61695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_MATCH, traverseSession,
6179559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang                0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */);
61895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
61995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka}
62095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
62195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka/**
62295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * Creates a new dicNode that represents a space insertion at the end of the input dicNode. Also
62395fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka * incorporates the unigram / bigram score for the ending word into the new dicNode.
62495fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka */
62595fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataokavoid Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode,
62695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        const bool spaceSubstitution) const {
62795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode)) {
62895fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka        return;
62995fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    }
63095fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka
63195fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    // Create a non-cached node here.
63295fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka    DicNode newDicNode;
6330ecfb9424754341d7ee41557fc1f913cb6ca79c2Keisuke Kuroyanagi    DicNodeUtils::initAsRootWithPreviousWord(
634d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi            traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode);
635252412d7eb4573f91588b06b0fe49ef9f0ac38acSatoshi Kataoka    const CorrectionType correctionType = spaceSubstitution ?
636da06e385f5f006bc891113847fbdf508376f7f34Jean Chalard            CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMISSION;
637252412d7eb4573f91588b06b0fe49ef9f0ac38acSatoshi Kataoka    Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode,
6389559dd2e30de288a9ff7069bfc59f8500b949a88Tom Ouyang            &newDicNode, traverseSession->getMultiBigramMap());
6392d3f2daf12643e57f15fc98c7fd61329513ca0cfKeisuke Kuroynagi    if (newDicNode.getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
6402d3f2daf12643e57f15fc98c7fd61329513ca0cfKeisuke Kuroynagi        // newDicNode is worth continuing to traverse.
6412d3f2daf12643e57f15fc98c7fd61329513ca0cfKeisuke Kuroynagi        // CAVEAT: This pruning is important for speed. Remove this when we can afford not to prune
6422d3f2daf12643e57f15fc98c7fd61329513ca0cfKeisuke Kuroynagi        // here because here is not the right place to do pruning. Pruning should take place only
6432d3f2daf12643e57f15fc98c7fd61329513ca0cfKeisuke Kuroynagi        // in DicNodePriorityQueue.
6442d3f2daf12643e57f15fc98c7fd61329513ca0cfKeisuke Kuroynagi        traverseSession->getDicTraverseCache()->copyPushNextActive(&newDicNode);
6452d3f2daf12643e57f15fc98c7fd61329513ca0cfKeisuke Kuroynagi    }
64695fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka}
64795fe8267955ba5bfcc3cf38383f0d13026287082Satoshi Kataoka} // namespace latinime
648