12df3060883c7535029c7dfbbb4f7b05935d796aesatok/*
22df3060883c7535029c7dfbbb4f7b05935d796aesatok * Copyright (C) 2011 The Android Open Source Project
32df3060883c7535029c7dfbbb4f7b05935d796aesatok *
42df3060883c7535029c7dfbbb4f7b05935d796aesatok * Licensed under the Apache License, Version 2.0 (the "License");
52df3060883c7535029c7dfbbb4f7b05935d796aesatok * you may not use this file except in compliance with the License.
62df3060883c7535029c7dfbbb4f7b05935d796aesatok * You may obtain a copy of the License at
72df3060883c7535029c7dfbbb4f7b05935d796aesatok *
82df3060883c7535029c7dfbbb4f7b05935d796aesatok *      http://www.apache.org/licenses/LICENSE-2.0
92df3060883c7535029c7dfbbb4f7b05935d796aesatok *
102df3060883c7535029c7dfbbb4f7b05935d796aesatok * Unless required by applicable law or agreed to in writing, software
112df3060883c7535029c7dfbbb4f7b05935d796aesatok * distributed under the License is distributed on an "AS IS" BASIS,
122df3060883c7535029c7dfbbb4f7b05935d796aesatok * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
132df3060883c7535029c7dfbbb4f7b05935d796aesatok * See the License for the specific language governing permissions and
142df3060883c7535029c7dfbbb4f7b05935d796aesatok * limitations under the License.
152df3060883c7535029c7dfbbb4f7b05935d796aesatok */
162df3060883c7535029c7dfbbb4f7b05935d796aesatok
17f1008c550168e50f930ea1e043000b395ce0f129Ken Wakasa#include <cassert>
18f1008c550168e50f930ea1e043000b395ce0f129Ken Wakasa#include <cctype>
19f1008c550168e50f930ea1e043000b395ce0f129Ken Wakasa#include <cmath>
20f1008c550168e50f930ea1e043000b395ce0f129Ken Wakasa#include <cstring>
212df3060883c7535029c7dfbbb4f7b05935d796aesatok
22cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok#define LOG_TAG "LatinIME: correction.cpp"
232df3060883c7535029c7dfbbb4f7b05935d796aesatok
246e3cb27cffa525d555b289111678f6fa0495447eTadashi G. Takaoka#include "char_utils.h"
25cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok#include "correction.h"
2609baa36f7d1298e54a291b0d486cf366a3c3257cTadashi G. Takaoka#include "defines.h"
273e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka#include "proximity_info_state.h"
282df3060883c7535029c7dfbbb4f7b05935d796aesatok
292df3060883c7535029c7dfbbb4f7b05935d796aesatoknamespace latinime {
302df3060883c7535029c7dfbbb4f7b05935d796aesatok
3174fb957e49e7d9ff5af47f35d062aa7c7f97a8fcKen Wakasaclass ProximityInfo;
3274fb957e49e7d9ff5af47f35d062aa7c7f97a8fcKen Wakasa
3304d873701550323116cf8737494fb8d7e839c351Yusuke Nojima/////////////////////////////
3404d873701550323116cf8737494fb8d7e839c351Yusuke Nojima// edit distance funcitons //
3504d873701550323116cf8737494fb8d7e839c351Yusuke Nojima/////////////////////////////
3604d873701550323116cf8737494fb8d7e839c351Yusuke Nojima
3704d873701550323116cf8737494fb8d7e839c351Yusuke Nojimainline static void initEditDistance(int *editDistanceTable) {
3804d873701550323116cf8737494fb8d7e839c351Yusuke Nojima    for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) {
3904d873701550323116cf8737494fb8d7e839c351Yusuke Nojima        editDistanceTable[i] = i;
4004d873701550323116cf8737494fb8d7e839c351Yusuke Nojima    }
4104d873701550323116cf8737494fb8d7e839c351Yusuke Nojima}
4204d873701550323116cf8737494fb8d7e839c351Yusuke Nojima
4354af64ae921baa764d64c11c7f4f8edd6352d405satokinline static void dumpEditDistance10ForDebug(int *editDistanceTable,
4454af64ae921baa764d64c11c7f4f8edd6352d405satok        const int editDistanceTableWidth, const int outputLength) {
451a6da631ab7c6ed895964978be8f455b41e019bbsatok    if (DEBUG_DICT) {
469fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok        AKLOGI("EditDistanceTable");
471a6da631ab7c6ed895964978be8f455b41e019bbsatok        for (int i = 0; i <= 10; ++i) {
481a6da631ab7c6ed895964978be8f455b41e019bbsatok            int c[11];
491a6da631ab7c6ed895964978be8f455b41e019bbsatok            for (int j = 0; j <= 10; ++j) {
5054af64ae921baa764d64c11c7f4f8edd6352d405satok                if (j < editDistanceTableWidth + 1 && i < outputLength + 1) {
5154af64ae921baa764d64c11c7f4f8edd6352d405satok                    c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j];
521a6da631ab7c6ed895964978be8f455b41e019bbsatok                } else {
531a6da631ab7c6ed895964978be8f455b41e019bbsatok                    c[j] = -1;
541a6da631ab7c6ed895964978be8f455b41e019bbsatok                }
551a6da631ab7c6ed895964978be8f455b41e019bbsatok            }
569fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok            AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]",
571a6da631ab7c6ed895964978be8f455b41e019bbsatok                    c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]);
587914e907b5d31ec4b2034a94e393129833770531Ken Wakasa            (void)c; // To suppress compiler warning
591a6da631ab7c6ed895964978be8f455b41e019bbsatok        }
601a6da631ab7c6ed895964978be8f455b41e019bbsatok    }
611a6da631ab7c6ed895964978be8f455b41e019bbsatok}
621a6da631ab7c6ed895964978be8f455b41e019bbsatok
6304d873701550323116cf8737494fb8d7e839c351Yusuke Nojimainline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
64687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        const int inputSize, const unsigned short *output, const int outputLength) {
651a6da631ab7c6ed895964978be8f455b41e019bbsatok    // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
66687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j].
67687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated,
68687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    // and calculate dp[ouputLength][0] ... dp[outputLength][inputSize].
69687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    int *const current = editDistanceTable + outputLength * (inputSize + 1);
70687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    const int *const prev = editDistanceTable + (outputLength - 1) * (inputSize + 1);
7104d873701550323116cf8737494fb8d7e839c351Yusuke Nojima    const int *const prevprev =
72687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka            outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
7304d873701550323116cf8737494fb8d7e839c351Yusuke Nojima    current[0] = outputLength;
746e3cb27cffa525d555b289111678f6fa0495447eTadashi G. Takaoka    const uint32_t co = toBaseLowerCase(output[outputLength - 1]);
756e3cb27cffa525d555b289111678f6fa0495447eTadashi G. Takaoka    const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
76687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    for (int i = 1; i <= inputSize; ++i) {
776e3cb27cffa525d555b289111678f6fa0495447eTadashi G. Takaoka        const uint32_t ci = toBaseLowerCase(input[i - 1]);
7804d873701550323116cf8737494fb8d7e839c351Yusuke Nojima        const uint16_t cost = (ci == co) ? 0 : 1;
7904d873701550323116cf8737494fb8d7e839c351Yusuke Nojima        current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
806e3cb27cffa525d555b289111678f6fa0495447eTadashi G. Takaoka        if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
8104d873701550323116cf8737494fb8d7e839c351Yusuke Nojima            current[i] = min(current[i], prevprev[i - 2] + 1);
8204d873701550323116cf8737494fb8d7e839c351Yusuke Nojima        }
8304d873701550323116cf8737494fb8d7e839c351Yusuke Nojima    }
8404d873701550323116cf8737494fb8d7e839c351Yusuke Nojima}
8504d873701550323116cf8737494fb8d7e839c351Yusuke Nojima
8654af64ae921baa764d64c11c7f4f8edd6352d405satokinline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth,
87687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        const int outputLength, const int inputSize) {
8829dc80614bc529ca2c0b96e1a731ebb7a5433090satok    if (DEBUG_EDIT_DISTANCE) {
89687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        AKLOGI("getCurrentEditDistance %d, %d", inputSize, outputLength);
901a6da631ab7c6ed895964978be8f455b41e019bbsatok    }
91687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputSize];
9204d873701550323116cf8737494fb8d7e839c351Yusuke Nojima}
9304d873701550323116cf8737494fb8d7e839c351Yusuke Nojima
948876b75ca1c218949539dcc2fb6c88a19da9e3f8satok//////////////////////
958876b75ca1c218949539dcc2fb6c88a19da9e3f8satok// inline functions //
968876b75ca1c218949539dcc2fb6c88a19da9e3f8satok//////////////////////
9777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasastatic const char SINGLE_QUOTE = '\'';
988876b75ca1c218949539dcc2fb6c88a19da9e3f8satok
9977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasainline bool Correction::isSingleQuote(const unsigned short c) {
1004a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka    const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex);
10177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    return (c == SINGLE_QUOTE && userTypedChar != SINGLE_QUOTE);
1028876b75ca1c218949539dcc2fb6c88a19da9e3f8satok}
1038876b75ca1c218949539dcc2fb6c88a19da9e3f8satok
104cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok////////////////
105cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok// Correction //
106cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok////////////////
1078876b75ca1c218949539dcc2fb6c88a19da9e3f8satok
1086cbe204fce109fab652da15f4c8ea1ae35fca3e7Satoshi Kataokavoid Correction::resetCorrection() {
1096cbe204fce109fab652da15f4c8ea1ae35fca3e7Satoshi Kataoka    mTotalTraverseCount = 0;
1106cbe204fce109fab652da15f4c8ea1ae35fca3e7Satoshi Kataoka}
1116cbe204fce109fab652da15f4c8ea1ae35fca3e7Satoshi Kataoka
112687a244703a02323ebd64433cbaead5def499861Satoshi Kataokavoid Correction::initCorrection(const ProximityInfo *pi, const int inputSize,
1138876b75ca1c218949539dcc2fb6c88a19da9e3f8satok        const int maxDepth) {
1142df3060883c7535029c7dfbbb4f7b05935d796aesatok    mProximityInfo = pi;
115687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    mInputSize = inputSize;
1168876b75ca1c218949539dcc2fb6c88a19da9e3f8satok    mMaxDepth = maxDepth;
117687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    mMaxEditDistance = mInputSize < 5 ? 2 : mInputSize / 2;
1181a6da631ab7c6ed895964978be8f455b41e019bbsatok    // TODO: This is not supposed to be required.  Check what's going wrong with
1191a6da631ab7c6ed895964978be8f455b41e019bbsatok    // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL]
1201a6da631ab7c6ed895964978be8f455b41e019bbsatok    initEditDistance(mEditDistanceTable);
121612c6e49c03dc49320a0bf141f51e45a8b969d43satok}
122612c6e49c03dc49320a0bf141f51e45a8b969d43satok
123208268d149c4d139cdf14923650a58ccc0a9d9b6satokvoid Correction::initCorrectionState(
124208268d149c4d139cdf14923650a58ccc0a9d9b6satok        const int rootPos, const int childCount, const bool traverseAll) {
125635f68e8222901d607a5ca6fab95985bc496d72asatok    latinime::initCorrectionState(mCorrectionStates, rootPos, childCount, traverseAll);
126f3948c1eacee57a9ba4b689ada992cd460596d9fsatok    // TODO: remove
1279db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mCorrectionStates[0].mTransposedPos = mTransposedPos;
1289db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mCorrectionStates[0].mExcessivePos = mExcessivePos;
129f3948c1eacee57a9ba4b689ada992cd460596d9fsatok    mCorrectionStates[0].mSkipPos = mSkipPos;
130208268d149c4d139cdf14923650a58ccc0a9d9b6satok}
131208268d149c4d139cdf14923650a58ccc0a9d9b6satok
132cfca3c6317143ce68770cab02eb7d7a5dc8765c9satokvoid Correction::setCorrectionParams(const int skipPos, const int excessivePos,
13340a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok        const int transposedPos, const int spaceProximityPos, const int missingSpacePos,
1344d355989bd972ba792ba546a55c67e5b6fc2527asatok        const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors) {
135f3948c1eacee57a9ba4b689ada992cd460596d9fsatok    // TODO: remove
1369db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mTransposedPos = transposedPos;
1379db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mExcessivePos = excessivePos;
1382df3060883c7535029c7dfbbb4f7b05935d796aesatok    mSkipPos = skipPos;
139f3948c1eacee57a9ba4b689ada992cd460596d9fsatok    // TODO: remove
1409db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mCorrectionStates[0].mTransposedPos = transposedPos;
1419db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mCorrectionStates[0].mExcessivePos = excessivePos;
142f3948c1eacee57a9ba4b689ada992cd460596d9fsatok    mCorrectionStates[0].mSkipPos = skipPos;
1439db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok
144612c6e49c03dc49320a0bf141f51e45a8b969d43satok    mSpaceProximityPos = spaceProximityPos;
145612c6e49c03dc49320a0bf141f51e45a8b969d43satok    mMissingSpacePos = missingSpacePos;
14640a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok    mUseFullEditDistance = useFullEditDistance;
147d03317c4be21ee65c19d00c7b83a7042042b8627satok    mDoAutoCompletion = doAutoCompletion;
1484d355989bd972ba792ba546a55c67e5b6fc2527asatok    mMaxErrors = maxErrors;
1492df3060883c7535029c7dfbbb4f7b05935d796aesatok}
1502df3060883c7535029c7dfbbb4f7b05935d796aesatok
151cfca3c6317143ce68770cab02eb7d7a5dc8765c9satokvoid Correction::checkState() {
1522df3060883c7535029c7dfbbb4f7b05935d796aesatok    if (DEBUG_DICT) {
1532df3060883c7535029c7dfbbb4f7b05935d796aesatok        int inputCount = 0;
1542df3060883c7535029c7dfbbb4f7b05935d796aesatok        if (mSkipPos >= 0) ++inputCount;
1552df3060883c7535029c7dfbbb4f7b05935d796aesatok        if (mExcessivePos >= 0) ++inputCount;
1562df3060883c7535029c7dfbbb4f7b05935d796aesatok        if (mTransposedPos >= 0) ++inputCount;
1572df3060883c7535029c7dfbbb4f7b05935d796aesatok    }
1582df3060883c7535029c7dfbbb4f7b05935d796aesatok}
1592df3060883c7535029c7dfbbb4f7b05935d796aesatok
160b14fc88e482e53ba6852c8d5da5d9826c68d041fJean Chalardbool Correction::sameAsTyped() {
161b14fc88e482e53ba6852c8d5da5d9826c68d041fJean Chalard    return mProximityInfoState.sameAsTyped(mWord, mOutputIndex);
162b14fc88e482e53ba6852c8d5da5d9826c68d041fJean Chalard}
163b14fc88e482e53ba6852c8d5da5d9826c68d041fJean Chalard
164a85f4929cd027246045ec3e806857b84e64fe762satokint Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
165a85f4929cd027246045ec3e806857b84e64fe762satok        const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
166a85f4929cd027246045ec3e806857b84e64fe762satok    return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
167a85f4929cd027246045ec3e806857b84e64fe762satok            wordCount, this, isSpaceProximity, word);
168612c6e49c03dc49320a0bf141f51e45a8b969d43satok}
169612c6e49c03dc49320a0bf141f51e45a8b969d43satok
170171d1809ffc724de4fb793f481d592644e3d141eJean Chalardint Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) {
171687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    return getFinalProbabilityInternal(probability, word, wordLength, mInputSize);
17254af64ae921baa764d64c11c7f4f8edd6352d405satok}
17354af64ae921baa764d64c11c7f4f8edd6352d405satok
174171d1809ffc724de4fb793f481d592644e3d141eJean Chalardint Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
175687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        int *wordLength, const int inputSize) {
176687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    return getFinalProbabilityInternal(probability, word, wordLength, inputSize);
17754af64ae921baa764d64c11c7f4f8edd6352d405satok}
17854af64ae921baa764d64c11c7f4f8edd6352d405satok
179171d1809ffc724de4fb793f481d592644e3d141eJean Chalardint Correction::getFinalProbabilityInternal(const int probability, unsigned short **word,
180687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        int *wordLength, const int inputSize) {
181985312e88f11e3ce61f35191df59c6bdf9e80e79satok    const int outputIndex = mTerminalOutputIndex;
182985312e88f11e3ce61f35191df59c6bdf9e80e79satok    const int inputIndex = mTerminalInputIndex;
1838876b75ca1c218949539dcc2fb6c88a19da9e3f8satok    *wordLength = outputIndex + 1;
1848876b75ca1c218949539dcc2fb6c88a19da9e3f8satok    *word = mWord;
185171d1809ffc724de4fb793f481d592644e3d141eJean Chalard    int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability(
186687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka            inputIndex, outputIndex, probability, mEditDistanceTable, this, inputSize);
187171d1809ffc724de4fb793f481d592644e3d141eJean Chalard    return finalProbability;
1880f6c8e8aeb18b949fa9586dd9de091027b17e107satok}
1890f6c8e8aeb18b949fa9586dd9de091027b17e107satok
190208268d149c4d139cdf14923650a58ccc0a9d9b6satokbool Correction::initProcessState(const int outputIndex) {
191208268d149c4d139cdf14923650a58ccc0a9d9b6satok    if (mCorrectionStates[outputIndex].mChildCount <= 0) {
192208268d149c4d139cdf14923650a58ccc0a9d9b6satok        return false;
193208268d149c4d139cdf14923650a58ccc0a9d9b6satok    }
1944e4e74e6b659a069ca6e778f0ae7f9c7fa4343b7satok    mOutputIndex = outputIndex;
195208268d149c4d139cdf14923650a58ccc0a9d9b6satok    --(mCorrectionStates[outputIndex].mChildCount);
196208268d149c4d139cdf14923650a58ccc0a9d9b6satok    mInputIndex = mCorrectionStates[outputIndex].mInputIndex;
197635f68e8222901d607a5ca6fab95985bc496d72asatok    mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes;
1989db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok
199e4ba822cc6959490868fd8868ffad1c4e9b23992Yusuke Nojima    mEquivalentCharCount = mCorrectionStates[outputIndex].mEquivalentCharCount;
200466ed22fc6f90c47bc1571b51fda2712ade664f6satok    mProximityCount = mCorrectionStates[outputIndex].mProximityCount;
2019db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mTransposedCount = mCorrectionStates[outputIndex].mTransposedCount;
2029db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mExcessiveCount = mCorrectionStates[outputIndex].mExcessiveCount;
203635f68e8222901d607a5ca6fab95985bc496d72asatok    mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount;
2049db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mLastCharExceeded = mCorrectionStates[outputIndex].mLastCharExceeded;
2059db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok
2069db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mTransposedPos = mCorrectionStates[outputIndex].mTransposedPos;
2079db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mExcessivePos = mCorrectionStates[outputIndex].mExcessivePos;
208f3948c1eacee57a9ba4b689ada992cd460596d9fsatok    mSkipPos = mCorrectionStates[outputIndex].mSkipPos;
2099db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok
210635f68e8222901d607a5ca6fab95985bc496d72asatok    mMatching = false;
2119db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mProximityMatching = false;
2121b9fa942b4b62a818e45655dc5097c7eed7a5465satok    mAdditionalProximityMatching = false;
2139db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mTransposing = false;
2149db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mExceeding = false;
2159db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mSkipping = false;
2169db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok
217208268d149c4d139cdf14923650a58ccc0a9d9b6satok    return true;
218208268d149c4d139cdf14923650a58ccc0a9d9b6satok}
219208268d149c4d139cdf14923650a58ccc0a9d9b6satok
220208268d149c4d139cdf14923650a58ccc0a9d9b6satokint Correction::goDownTree(
221208268d149c4d139cdf14923650a58ccc0a9d9b6satok        const int parentIndex, const int childCount, const int firstChildPos) {
222208268d149c4d139cdf14923650a58ccc0a9d9b6satok    mCorrectionStates[mOutputIndex].mParentIndex = parentIndex;
223208268d149c4d139cdf14923650a58ccc0a9d9b6satok    mCorrectionStates[mOutputIndex].mChildCount = childCount;
224208268d149c4d139cdf14923650a58ccc0a9d9b6satok    mCorrectionStates[mOutputIndex].mSiblingPos = firstChildPos;
225208268d149c4d139cdf14923650a58ccc0a9d9b6satok    return mOutputIndex;
2260f6c8e8aeb18b949fa9586dd9de091027b17e107satok}
2270f6c8e8aeb18b949fa9586dd9de091027b17e107satok
2284e4e74e6b659a069ca6e778f0ae7f9c7fa4343b7satok// TODO: remove
229fee0ac60b1cd0a4760ca8f310ff8a86b925d833bKen Wakasaint Correction::getInputIndex() const {
2304e4e74e6b659a069ca6e778f0ae7f9c7fa4343b7satok    return mInputIndex;
2310f6c8e8aeb18b949fa9586dd9de091027b17e107satok}
2320f6c8e8aeb18b949fa9586dd9de091027b17e107satok
233cfca3c6317143ce68770cab02eb7d7a5dc8765c9satokvoid Correction::incrementInputIndex() {
2344e4e74e6b659a069ca6e778f0ae7f9c7fa4343b7satok    ++mInputIndex;
2354e4e74e6b659a069ca6e778f0ae7f9c7fa4343b7satok}
2364e4e74e6b659a069ca6e778f0ae7f9c7fa4343b7satok
237cfca3c6317143ce68770cab02eb7d7a5dc8765c9satokvoid Correction::incrementOutputIndex() {
2384e4e74e6b659a069ca6e778f0ae7f9c7fa4343b7satok    ++mOutputIndex;
239208268d149c4d139cdf14923650a58ccc0a9d9b6satok    mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
240208268d149c4d139cdf14923650a58ccc0a9d9b6satok    mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
241208268d149c4d139cdf14923650a58ccc0a9d9b6satok    mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
242208268d149c4d139cdf14923650a58ccc0a9d9b6satok    mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
243635f68e8222901d607a5ca6fab95985bc496d72asatok    mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
2449db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok
245e4ba822cc6959490868fd8868ffad1c4e9b23992Yusuke Nojima    mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount;
246466ed22fc6f90c47bc1571b51fda2712ade664f6satok    mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
2479db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount;
2489db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount;
249635f68e8222901d607a5ca6fab95985bc496d72asatok    mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
2509db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok
251f3948c1eacee57a9ba4b689ada992cd460596d9fsatok    mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
2529db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos;
2539db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos;
2549db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok
2559db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded;
2569db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok
257635f68e8222901d607a5ca6fab95985bc496d72asatok    mCorrectionStates[mOutputIndex].mMatching = mMatching;
2580cedd2bcc3efcec30ea542ceb8d9161afa764a62satok    mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching;
2591b9fa942b4b62a818e45655dc5097c7eed7a5465satok    mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching;
2609db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mCorrectionStates[mOutputIndex].mTransposing = mTransposing;
2619db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mCorrectionStates[mOutputIndex].mExceeding = mExceeding;
2629db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
263612c6e49c03dc49320a0bf141f51e45a8b969d43satok}
264612c6e49c03dc49320a0bf141f51e45a8b969d43satok
265635f68e8222901d607a5ca6fab95985bc496d72asatokvoid Correction::startToTraverseAllNodes() {
266635f68e8222901d607a5ca6fab95985bc496d72asatok    mNeedsToTraverseAllNodes = true;
2678876b75ca1c218949539dcc2fb6c88a19da9e3f8satok}
2688876b75ca1c218949539dcc2fb6c88a19da9e3f8satok
269cfca3c6317143ce68770cab02eb7d7a5dc8765c9satokbool Correction::needsToPrune() const {
27004d873701550323116cf8737494fb8d7e839c351Yusuke Nojima    // TODO: use edit distance here
271d03317c4be21ee65c19d00c7b83a7042042b8627satok    return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance
272d03317c4be21ee65c19d00c7b83a7042042b8627satok            // Allow one char longer word for missing character
273687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka            || (!mDoAutoCompletion && (mOutputIndex > mInputSize));
2748876b75ca1c218949539dcc2fb6c88a19da9e3f8satok}
2758876b75ca1c218949539dcc2fb6c88a19da9e3f8satok
27604d873701550323116cf8737494fb8d7e839c351Yusuke Nojimavoid Correction::addCharToCurrentWord(const int32_t c) {
27704d873701550323116cf8737494fb8d7e839c351Yusuke Nojima    mWord[mOutputIndex] = c;
2784a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka    const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
279687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize,
28004d873701550323116cf8737494fb8d7e839c351Yusuke Nojima            mWord, mOutputIndex + 1);
28104d873701550323116cf8737494fb8d7e839c351Yusuke Nojima}
28204d873701550323116cf8737494fb8d7e839c351Yusuke Nojima
283cfca3c6317143ce68770cab02eb7d7a5dc8765c9satokCorrection::CorrectionType Correction::processSkipChar(
28410266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok        const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
28504d873701550323116cf8737494fb8d7e839c351Yusuke Nojima    addCharToCurrentWord(c);
2866ad15fcd158de5bec18f6529b961a55e7db9007fsatok    mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
2876ad15fcd158de5bec18f6529b961a55e7db9007fsatok    mTerminalOutputIndex = mOutputIndex;
2886ad15fcd158de5bec18f6529b961a55e7db9007fsatok    if (mNeedsToTraverseAllNodes && isTerminal) {
289985312e88f11e3ce61f35191df59c6bdf9e80e79satok        incrementOutputIndex();
290985312e88f11e3ce61f35191df59c6bdf9e80e79satok        return TRAVERSE_ALL_ON_TERMINAL;
291985312e88f11e3ce61f35191df59c6bdf9e80e79satok    } else {
292985312e88f11e3ce61f35191df59c6bdf9e80e79satok        incrementOutputIndex();
293985312e88f11e3ce61f35191df59c6bdf9e80e79satok        return TRAVERSE_ALL_NOT_ON_TERMINAL;
294985312e88f11e3ce61f35191df59c6bdf9e80e79satok    }
295985312e88f11e3ce61f35191df59c6bdf9e80e79satok}
296985312e88f11e3ce61f35191df59c6bdf9e80e79satok
2976ad15fcd158de5bec18f6529b961a55e7db9007fsatokCorrection::CorrectionType Correction::processUnrelatedCorrectionType() {
2986ad15fcd158de5bec18f6529b961a55e7db9007fsatok    // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
2996ad15fcd158de5bec18f6529b961a55e7db9007fsatok    mTerminalInputIndex = mInputIndex;
3006ad15fcd158de5bec18f6529b961a55e7db9007fsatok    mTerminalOutputIndex = mOutputIndex;
3016ad15fcd158de5bec18f6529b961a55e7db9007fsatok    return UNRELATED;
3026ad15fcd158de5bec18f6529b961a55e7db9007fsatok}
3036ad15fcd158de5bec18f6529b961a55e7db9007fsatok
3043e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataokainline bool isEquivalentChar(ProximityType type) {
3053e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka    return type == EQUIVALENT_CHAR;
306258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima}
307258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima
3083e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataokainline bool isProximityCharOrEquivalentChar(ProximityType type) {
3093e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka    return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR;
3101b9fa942b4b62a818e45655dc5097c7eed7a5465satok}
3111b9fa942b4b62a818e45655dc5097c7eed7a5465satok
312cfca3c6317143ce68770cab02eb7d7a5dc8765c9satokCorrection::CorrectionType Correction::processCharAndCalcState(
3138876b75ca1c218949539dcc2fb6c88a19da9e3f8satok        const int32_t c, const bool isTerminal) {
3147adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok    const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
3154d355989bd972ba792ba546a55c67e5b6fc2527asatok    if (correctionCount > mMaxErrors) {
3166ad15fcd158de5bec18f6529b961a55e7db9007fsatok        return processUnrelatedCorrectionType();
3174d355989bd972ba792ba546a55c67e5b6fc2527asatok    }
3184d355989bd972ba792ba546a55c67e5b6fc2527asatok
3197adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok    // TODO: Change the limit if we'll allow two or more corrections
3207adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok    const bool noCorrectionsHappenedSoFar = correctionCount == 0;
3217adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok    const bool canTryCorrection = noCorrectionsHappenedSoFar;
322a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima    int proximityIndex = 0;
323a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima    mDistances[mOutputIndex] = NOT_A_DISTANCE;
324b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok
3254d355989bd972ba792ba546a55c67e5b6fc2527asatok    // Skip checking this node
32677e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    if (mNeedsToTraverseAllNodes || isSingleQuote(c)) {
32710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok        bool incremented = false;
328687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        if (mLastCharExceeded && mInputIndex == mInputSize - 1) {
32910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            // TODO: Do not check the proximity if EditDistance exceeds the threshold
3304a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka            const ProximityType matchId = mProximityInfoState.getMatchedProximityId(
3314a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka                    mInputIndex, c, true, &proximityIndex);
332258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima            if (isEquivalentChar(matchId)) {
33310266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok                mLastCharExceeded = false;
33410266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok                --mExcessiveCount;
335a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima                mDistances[mOutputIndex] =
3364a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka                        mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
3373e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka            } else if (matchId == NEAR_PROXIMITY_CHAR) {
33810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok                mLastCharExceeded = false;
33910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok                --mExcessiveCount;
34010266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok                ++mProximityCount;
3414a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka                mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(
3424a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka                        mInputIndex, proximityIndex);
34310266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            }
34477e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa            if (!isSingleQuote(c)) {
3456804b8e0fd12b8d57f99f4364cb89fdabe9f4f8bsatok                incrementInputIndex();
3466804b8e0fd12b8d57f99f4364cb89fdabe9f4f8bsatok                incremented = true;
3476804b8e0fd12b8d57f99f4364cb89fdabe9f4f8bsatok            }
3486d78302155d8a6437ab6541d93ddb42bf21e0a61satok        }
34910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok        return processSkipChar(c, isTerminal, incremented);
350b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    }
3519db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok
3524d355989bd972ba792ba546a55c67e5b6fc2527asatok    // Check possible corrections.
3539db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    if (mExcessivePos >= 0) {
3549db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok        if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) {
355b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok            mExcessivePos = mOutputIndex;
3569db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok        }
357687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        if (mExcessivePos < mInputSize - 1) {
3587adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok            mExceeding = mExcessivePos == mInputIndex && canTryCorrection;
3599db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok        }
3608876b75ca1c218949539dcc2fb6c88a19da9e3f8satok    }
3618876b75ca1c218949539dcc2fb6c88a19da9e3f8satok
362985312e88f11e3ce61f35191df59c6bdf9e80e79satok    if (mSkipPos >= 0) {
363f3948c1eacee57a9ba4b689ada992cd460596d9fsatok        if (mSkippedCount == 0 && mSkipPos < mOutputIndex) {
364f3948c1eacee57a9ba4b689ada992cd460596d9fsatok            if (DEBUG_DICT) {
365f2789819bd005b5b0581e8439601b5501306327dKen Wakasa                // TODO: Enable this assertion.
366f2789819bd005b5b0581e8439601b5501306327dKen Wakasa                //assert(mSkipPos == mOutputIndex - 1);
367f3948c1eacee57a9ba4b689ada992cd460596d9fsatok            }
368b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok            mSkipPos = mOutputIndex;
369f3948c1eacee57a9ba4b689ada992cd460596d9fsatok        }
3707adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok        mSkipping = mSkipPos == mOutputIndex && canTryCorrection;
3719db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    }
3729db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok
3739db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    if (mTransposedPos >= 0) {
3749db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok        if (mTransposedCount == 0 && mTransposedPos < mOutputIndex) {
375b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok            mTransposedPos = mOutputIndex;
376b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok        }
377687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        if (mTransposedPos < mInputSize - 1) {
3787adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok            mTransposing = mInputIndex == mTransposedPos && canTryCorrection;
3799db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok        }
380985312e88f11e3ce61f35191df59c6bdf9e80e79satok    }
381985312e88f11e3ce61f35191df59c6bdf9e80e79satok
382b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    bool secondTransposing = false;
383b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    if (mTransposedCount % 2 == 1) {
3844a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka        if (isEquivalentChar(mProximityInfoState.getMatchedProximityId(
3854a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka                mInputIndex - 1, c, false))) {
386b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok            ++mTransposedCount;
387b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok            secondTransposing = true;
388b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok        } else if (mCorrectionStates[mOutputIndex].mExceeding) {
389b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok            --mTransposedCount;
390b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok            ++mExcessiveCount;
39110266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            --mExcessivePos;
392b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok            incrementInputIndex();
393b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok        } else {
394b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok            --mTransposedCount;
395e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok            if (DEBUG_CORRECTION
396687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka                    && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
397e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                    && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
398e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                            || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
39910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok                DUMP_WORD(mWord, mOutputIndex);
4009fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok                AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
40110266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok                        mTransposedCount, mExcessiveCount, c);
40210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            }
4036ad15fcd158de5bec18f6529b961a55e7db9007fsatok            return processUnrelatedCorrectionType();
4048876b75ca1c218949539dcc2fb6c88a19da9e3f8satok        }
405b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    }
4068876b75ca1c218949539dcc2fb6c88a19da9e3f8satok
4077adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok    // TODO: Change the limit if we'll allow two or more proximity chars with corrections
4084d355989bd972ba792ba546a55c67e5b6fc2527asatok    // Work around: When the mMaxErrors is 1, we only allow just one error
4094d355989bd972ba792ba546a55c67e5b6fc2527asatok    // including proximity correction.
4104d355989bd972ba792ba546a55c67e5b6fc2527asatok    const bool checkProximityChars = (mMaxErrors > 1)
4114d355989bd972ba792ba546a55c67e5b6fc2527asatok            ? (noCorrectionsHappenedSoFar || mProximityCount == 0)
4124d355989bd972ba792ba546a55c67e5b6fc2527asatok            : (noCorrectionsHappenedSoFar && mProximityCount == 0);
4134d355989bd972ba792ba546a55c67e5b6fc2527asatok
4143e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka    ProximityType matchedProximityCharId = secondTransposing
4153e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka            ? EQUIVALENT_CHAR
4164a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka            : mProximityInfoState.getMatchedProximityId(
417a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima                    mInputIndex, c, checkProximityChars, &proximityIndex);
418b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok
4193e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka    if (UNRELATED_CHAR == matchedProximityCharId
4203e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka            || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
42157834c20a5f2e4c944e09eb4fcddb440bbd46e20satok        if (canTryCorrection && mOutputIndex > 0
42257834c20a5f2e4c944e09eb4fcddb440bbd46e20satok                && mCorrectionStates[mOutputIndex].mProximityMatching
42310266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok                && mCorrectionStates[mOutputIndex].mExceeding
4244a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka                && isEquivalentChar(mProximityInfoState.getMatchedProximityId(
42557834c20a5f2e4c944e09eb4fcddb440bbd46e20satok                        mInputIndex, mWord[mOutputIndex - 1], false))) {
426e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok            if (DEBUG_CORRECTION
427687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka                    && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
428e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                    && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
429e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                            || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
4309fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok                AKLOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]);
43157834c20a5f2e4c944e09eb4fcddb440bbd46e20satok            }
4327adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok            // Conversion p->e
43357834c20a5f2e4c944e09eb4fcddb440bbd46e20satok            // Example:
43457834c20a5f2e4c944e09eb4fcddb440bbd46e20satok            // wearth ->    earth
43557834c20a5f2e4c944e09eb4fcddb440bbd46e20satok            // px     -> (E)mmmmm
43610266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            ++mExcessiveCount;
43710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            --mProximityCount;
43857834c20a5f2e4c944e09eb4fcddb440bbd46e20satok            mExcessivePos = mOutputIndex - 1;
43957834c20a5f2e4c944e09eb4fcddb440bbd46e20satok            ++mInputIndex;
44057834c20a5f2e4c944e09eb4fcddb440bbd46e20satok            // Here, we are doing something equivalent to matchedProximityCharId,
44157834c20a5f2e4c944e09eb4fcddb440bbd46e20satok            // but we already know that "excessive char correction" just happened
44257834c20a5f2e4c944e09eb4fcddb440bbd46e20satok            // so that we just need to check "mProximityCount == 0".
4434a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka            matchedProximityCharId = mProximityInfoState.getMatchedProximityId(
444a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima                    mInputIndex, c, mProximityCount == 0, &proximityIndex);
44557834c20a5f2e4c944e09eb4fcddb440bbd46e20satok        }
44657834c20a5f2e4c944e09eb4fcddb440bbd46e20satok    }
44757834c20a5f2e4c944e09eb4fcddb440bbd46e20satok
4483e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka    if (UNRELATED_CHAR == matchedProximityCharId
4493e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka            || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
4503e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka        if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
4511b9fa942b4b62a818e45655dc5097c7eed7a5465satok            mAdditionalProximityMatching = true;
4521b9fa942b4b62a818e45655dc5097c7eed7a5465satok        }
45357834c20a5f2e4c944e09eb4fcddb440bbd46e20satok        // TODO: Optimize
45457834c20a5f2e4c944e09eb4fcddb440bbd46e20satok        // As the current char turned out to be an unrelated char,
45557834c20a5f2e4c944e09eb4fcddb440bbd46e20satok        // we will try other correction-types. Please note that mCorrectionStates[mOutputIndex]
45657834c20a5f2e4c944e09eb4fcddb440bbd46e20satok        // here refers to the previous state.
457687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        if (mInputIndex < mInputSize - 1 && mOutputIndex > 0 && mTransposedCount > 0
45810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok                && !mCorrectionStates[mOutputIndex].mTransposing
45910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok                && mCorrectionStates[mOutputIndex - 1].mTransposing
4604a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka                && isEquivalentChar(mProximityInfoState.getMatchedProximityId(
461258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima                        mInputIndex, mWord[mOutputIndex - 1], false))
462258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima                && isEquivalentChar(
4634a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka                        mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
4647adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok            // Conversion t->e
46510266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            // Example:
46610266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            // occaisional -> occa   sional
46710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            // mmmmttx     -> mmmm(E)mmmmmm
46810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            mTransposedCount -= 2;
46910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            ++mExcessiveCount;
47010266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            ++mInputIndex;
4717adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok        } else if (mOutputIndex > 0 && mInputIndex > 0 && mTransposedCount > 0
47210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok                && !mCorrectionStates[mOutputIndex].mTransposing
47310266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok                && mCorrectionStates[mOutputIndex - 1].mTransposing
474258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima                && isEquivalentChar(
4754a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka                        mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) {
4767adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok            // Conversion t->s
47710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            // Example:
47810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            // chcolate -> chocolate
47910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            // mmttx    -> mmsmmmmmm
48010266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            mTransposedCount -= 2;
48110266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            ++mSkippedCount;
48210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            --mInputIndex;
4837adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok        } else if (canTryCorrection && mInputIndex > 0
4847adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok                && mCorrectionStates[mOutputIndex].mProximityMatching
4857adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok                && mCorrectionStates[mOutputIndex].mSkipping
486258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima                && isEquivalentChar(
4874a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka                        mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) {
4887adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok            // Conversion p->s
4897adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok            // Note: This logic tries saving cases like contrst --> contrast -- "a" is one of
4907adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok            // proximity chars of "s", but it should rather be handled as a skipped char.
4917adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok            ++mSkippedCount;
4927adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok            --mProximityCount;
4937adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok            return processSkipChar(c, isTerminal, false);
494687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        } else if (mInputIndex - 1 < mInputSize
4951b9fa942b4b62a818e45655dc5097c7eed7a5465satok                && mSkippedCount > 0
4961b9fa942b4b62a818e45655dc5097c7eed7a5465satok                && mCorrectionStates[mOutputIndex].mSkipping
4971b9fa942b4b62a818e45655dc5097c7eed7a5465satok                && mCorrectionStates[mOutputIndex].mAdditionalProximityMatching
4981b9fa942b4b62a818e45655dc5097c7eed7a5465satok                && isProximityCharOrEquivalentChar(
4994a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka                        mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
5001b9fa942b4b62a818e45655dc5097c7eed7a5465satok            // Conversion s->a
5011b9fa942b4b62a818e45655dc5097c7eed7a5465satok            incrementInputIndex();
5021b9fa942b4b62a818e45655dc5097c7eed7a5465satok            --mSkippedCount;
5031b9fa942b4b62a818e45655dc5097c7eed7a5465satok            mProximityMatching = true;
5041b9fa942b4b62a818e45655dc5097c7eed7a5465satok            ++mProximityCount;
5051b9fa942b4b62a818e45655dc5097c7eed7a5465satok            mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO;
506687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputSize
507258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima                && isEquivalentChar(
5084a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka                        mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) {
5097adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok            // 1.2. Excessive or transpose correction
510b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok            if (mTransposing) {
511b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok                ++mTransposedCount;
512985312e88f11e3ce61f35191df59c6bdf9e80e79satok            } else {
513b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok                ++mExcessiveCount;
514b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok                incrementInputIndex();
515985312e88f11e3ce61f35191df59c6bdf9e80e79satok            }
516e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok            if (DEBUG_CORRECTION
517687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka                    && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
518e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                    && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
519e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                            || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
520e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                DUMP_WORD(mWord, mOutputIndex);
521e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                if (mTransposing) {
522e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                    AKLOGI("TRANSPOSE: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
523e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                            mTransposedCount, mExcessiveCount, c);
524e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                } else {
525e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                    AKLOGI("EXCEED: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
526e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                            mTransposedCount, mExcessiveCount, c);
527e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                }
528e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok            }
5297adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok        } else if (mSkipping) {
5307adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok            // 3. Skip correction
531b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok            ++mSkippedCount;
532e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok            if (DEBUG_CORRECTION
533687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka                    && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
534e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                    && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
535e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                            || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
536e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                AKLOGI("SKIP: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
537e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                        mTransposedCount, mExcessiveCount, c);
538e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok            }
53910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            return processSkipChar(c, isTerminal, false);
5403e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka        } else if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
541e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok            // As a last resort, use additional proximity characters
542e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok            mProximityMatching = true;
543e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok            ++mProximityCount;
544e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok            mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO;
545e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok            if (DEBUG_CORRECTION
546687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka                    && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
547e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                    && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
548e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                            || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
549e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                AKLOGI("ADDITIONALPROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
550e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                        mTransposedCount, mExcessiveCount, c);
551e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok            }
552b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok        } else {
553e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok            if (DEBUG_CORRECTION
554687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka                    && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
555e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                    && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
556e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                            || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
55710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok                DUMP_WORD(mWord, mOutputIndex);
5589fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok                AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
55910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok                        mTransposedCount, mExcessiveCount, c);
56010266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            }
5616ad15fcd158de5bec18f6529b961a55e7db9007fsatok            return processUnrelatedCorrectionType();
5628876b75ca1c218949539dcc2fb6c88a19da9e3f8satok        }
563258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima    } else if (secondTransposing) {
564687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        // If inputIndex is greater than mInputSize, that means there is no
565b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok        // proximity chars. So, we don't need to check proximity.
566b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok        mMatching = true;
567258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima    } else if (isEquivalentChar(matchedProximityCharId)) {
568258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima        mMatching = true;
569e4ba822cc6959490868fd8868ffad1c4e9b23992Yusuke Nojima        ++mEquivalentCharCount;
5704a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka        mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0);
5713e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka    } else if (NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
572b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok        mProximityMatching = true;
573258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima        ++mProximityCount;
574a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima        mDistances[mOutputIndex] =
5754a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka                mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, proximityIndex);
576e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok        if (DEBUG_CORRECTION
577687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka                && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
578e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0
579e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                        || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
580e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok            AKLOGI("PROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
581e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                    mTransposedCount, mExcessiveCount, c);
582e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok        }
583b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    }
5848876b75ca1c218949539dcc2fb6c88a19da9e3f8satok
58504d873701550323116cf8737494fb8d7e839c351Yusuke Nojima    addCharToCurrentWord(c);
5868876b75ca1c218949539dcc2fb6c88a19da9e3f8satok
5877adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok    // 4. Last char excessive correction
5887adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok    mLastCharExceeded = mExcessiveCount == 0 && mSkippedCount == 0 && mTransposedCount == 0
589687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka            && mProximityCount == 0 && (mInputIndex == mInputSize - 2);
590687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    const bool isSameAsUserTypedLength = (mInputSize == mInputIndex + 1) || mLastCharExceeded;
591b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    if (mLastCharExceeded) {
592b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok        ++mExcessiveCount;
593b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    }
5948876b75ca1c218949539dcc2fb6c88a19da9e3f8satok
595b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    // Start traversing all nodes after the index exceeds the user typed length
596b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    if (isSameAsUserTypedLength) {
597b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok        startToTraverseAllNodes();
5988876b75ca1c218949539dcc2fb6c88a19da9e3f8satok    }
5998876b75ca1c218949539dcc2fb6c88a19da9e3f8satok
60010266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok    const bool needsToTryOnTerminalForTheLastPossibleExcessiveChar =
601687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka            mExceeding && mInputIndex == mInputSize - 2;
60210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok
603b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    // Finally, we are ready to go to the next character, the next "virtual node".
604b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    // We should advance the input index.
605b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    // We do this in this branch of the 'if traverseAllNodes' because we are still matching
606b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    // characters to input; the other branch is not matching them but searching for
607b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    // completions, this is why it does not have to do it.
608b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    incrementInputIndex();
6098876b75ca1c218949539dcc2fb6c88a19da9e3f8satok    // Also, the next char is one "virtual node" depth more than this char.
6108876b75ca1c218949539dcc2fb6c88a19da9e3f8satok    incrementOutputIndex();
6118876b75ca1c218949539dcc2fb6c88a19da9e3f8satok
61210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok    if ((needsToTryOnTerminalForTheLastPossibleExcessiveChar
61310266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            || isSameAsUserTypedLength) && isTerminal) {
614b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok        mTerminalInputIndex = mInputIndex - 1;
615b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok        mTerminalOutputIndex = mOutputIndex - 1;
616e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok        if (DEBUG_CORRECTION
617687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka                && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize)
618e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok                && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) {
61957834c20a5f2e4c944e09eb4fcddb440bbd46e20satok            DUMP_WORD(mWord, mOutputIndex);
6209fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok            AKLOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
62157834c20a5f2e4c944e09eb4fcddb440bbd46e20satok                    mTransposedCount, mExcessiveCount, c);
62257834c20a5f2e4c944e09eb4fcddb440bbd46e20satok        }
623b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok        return ON_TERMINAL;
624b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    } else {
6256ad15fcd158de5bec18f6529b961a55e7db9007fsatok        mTerminalInputIndex = mInputIndex - 1;
6266ad15fcd158de5bec18f6529b961a55e7db9007fsatok        mTerminalOutputIndex = mOutputIndex - 1;
627b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok        return NOT_ON_TERMINAL;
628b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok    }
6298876b75ca1c218949539dcc2fb6c88a19da9e3f8satok}
6308876b75ca1c218949539dcc2fb6c88a19da9e3f8satok
6310bbb917d12358e0264796e75dea888f244761b64Ken Wakasainline static int getQuoteCount(const unsigned short *word, const int length) {
632bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    int quoteCount = 0;
633bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    for (int i = 0; i < length; ++i) {
6341cd7ca991961937c1a84572a6cafa3eaf5181be4Keisuke Kuroyanagi        if (word[i] == SINGLE_QUOTE) {
635bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok            ++quoteCount;
636bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok        }
637bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    }
638bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    return quoteCount;
639bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok}
640bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok
641eb050fc2dc97a7e6ddcaf254c110dc16279dfd0dsatokinline static bool isUpperCase(unsigned short c) {
6424c5daa8a5574628204be602578794035ab8686f0Ken Wakasa    return isAsciiUpper(toBaseChar(c));
643eb050fc2dc97a7e6ddcaf254c110dc16279dfd0dsatok}
644eb050fc2dc97a7e6ddcaf254c110dc16279dfd0dsatok
645612c6e49c03dc49320a0bf141f51e45a8b969d43satok//////////////////////
646612c6e49c03dc49320a0bf141f51e45a8b969d43satok// RankingAlgorithm //
647612c6e49c03dc49320a0bf141f51e45a8b969d43satok//////////////////////
648612c6e49c03dc49320a0bf141f51e45a8b969d43satok
649635f68e8222901d607a5ca6fab95985bc496d72asatok/* static */
650171d1809ffc724de4fb793f481d592644e3d141eJean Chalardint Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex,
6510bbb917d12358e0264796e75dea888f244761b64Ken Wakasa        const int outputIndex, const int freq, int *editDistanceTable, const Correction *correction,
652687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        const int inputSize) {
653cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok    const int excessivePos = correction->getExcessivePos();
654cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok    const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
655cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok    const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER;
6564a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka    const ProximityInfoState *proximityInfoState = &correction->mProximityInfoState;
6579db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    const int skippedCount = correction->mSkippedCount;
65810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok    const int transposedCount = correction->mTransposedCount / 2;
65910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok    const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2;
6600cedd2bcc3efcec30ea542ceb8d9161afa764a62satok    const int proximityMatchedCount = correction->mProximityCount;
6619db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    const bool lastCharExceeded = correction->mLastCharExceeded;
66240a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok    const bool useFullEditDistance = correction->mUseFullEditDistance;
66340a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok    const int outputLength = outputIndex + 1;
664687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    if (skippedCount >= inputSize || inputSize == 0) {
665bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok        return -1;
666bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    }
667bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok
66810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok    // TODO: find more robust way
669687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    bool sameLength = lastCharExceeded ? (inputSize == inputIndex + 2)
670687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka            : (inputSize == inputIndex + 1);
671466ed22fc6f90c47bc1571b51fda2712ade664f6satok
672466ed22fc6f90c47bc1571b51fda2712ade664f6satok    // TODO: use mExcessiveCount
673687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    const int matchCount = inputSize - correction->mProximityCount - excessiveCount;
674466ed22fc6f90c47bc1571b51fda2712ade664f6satok
6750bbb917d12358e0264796e75dea888f244761b64Ken Wakasa    const unsigned short *word = correction->mWord;
6769db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    const bool skipped = skippedCount > 0;
6770cedd2bcc3efcec30ea542ceb8d9161afa764a62satok
67854af64ae921baa764d64c11c7f4f8edd6352d405satok    const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
679687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka            - getQuoteCount(proximityInfoState->getPrimaryInputWord(), inputSize));
680612c6e49c03dc49320a0bf141f51e45a8b969d43satok
681bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    // TODO: Calculate edit distance for transposed and excessive
682bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    int ed = 0;
6831a6da631ab7c6ed895964978be8f455b41e019bbsatok    if (DEBUG_DICT_FULL) {
684687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        dumpEditDistance10ForDebug(editDistanceTable, correction->mInputSize, outputLength);
6851a6da631ab7c6ed895964978be8f455b41e019bbsatok    }
68610266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok    int adjustedProximityMatchedCount = proximityMatchedCount;
68710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok
68810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok    int finalFreq = freq;
6899db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok
6901b9fa942b4b62a818e45655dc5097c7eed7a5465satok    if (DEBUG_CORRECTION_FREQ
691687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka            && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputSize)) {
6921b9fa942b4b62a818e45655dc5097c7eed7a5465satok        AKLOGI("FinalFreq0: %d", finalFreq);
6931b9fa942b4b62a818e45655dc5097c7eed7a5465satok    }
6949db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok    // TODO: Optimize this.
695a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok    if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) {
696687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        ed = getCurrentEditDistance(editDistanceTable, correction->mInputSize, outputLength,
697687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka                inputSize) - transposedCount;
698a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok
69910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok        const int matchWeight = powerIntCapped(typedLetterMultiplier,
700687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka                max(inputSize, outputLength) - ed);
70110266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok        multiplyIntCapped(matchWeight, &finalFreq);
70210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok
70310266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok        // TODO: Demote further if there are two or more excessive chars with longer user input?
704687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        if (inputSize > outputLength) {
70510266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq);
7060cedd2bcc3efcec30ea542ceb8d9161afa764a62satok        }
70710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok
708bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok        ed = max(0, ed - quoteDiffCount);
709687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputSize)),
7101b9fa942b4b62a818e45655dc5097c7eed7a5465satok                proximityMatchedCount);
7116804b8e0fd12b8d57f99f4364cb89fdabe9f4f8bsatok        if (transposedCount <= 0) {
712687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka            if (ed == 1 && (inputSize == outputLength - 1 || inputSize == outputLength + 1)) {
713a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok                // Promote a word with just one skipped or excessive char
714a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok                if (sameLength) {
7151b9fa942b4b62a818e45655dc5097c7eed7a5465satok                    multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE
7161b9fa942b4b62a818e45655dc5097c7eed7a5465satok                            + WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER * outputLength,
7171b9fa942b4b62a818e45655dc5097c7eed7a5465satok                            &finalFreq);
718a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok                } else {
719a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok                    multiplyIntCapped(typedLetterMultiplier, &finalFreq);
720a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok                }
721a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok            } else if (ed == 0) {
72210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok                multiplyIntCapped(typedLetterMultiplier, &finalFreq);
723a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok                sameLength = true;
72410266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            }
72510266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok        }
726bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    } else {
72710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok        const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
72810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok        multiplyIntCapped(matchWeight, &finalFreq);
7290cedd2bcc3efcec30ea542ceb8d9161afa764a62satok    }
7300cedd2bcc3efcec30ea542ceb8d9161afa764a62satok
7314a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka    if (proximityInfoState->getMatchedProximityId(0, word[0], true) == UNRELATED_CHAR) {
73210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok        multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq);
73310266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok    }
734bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok
735bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    ///////////////////////////////////////////////
736bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    // Promotion and Demotion for each correction
7370cedd2bcc3efcec30ea542ceb8d9161afa764a62satok
738bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    // Demotion for a word with missing character
739466ed22fc6f90c47bc1571b51fda2712ade664f6satok    if (skipped) {
740bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok        const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE
741687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka                * (10 * inputSize - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
742687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka                / (10 * inputSize
743bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok                        - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10);
744bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok        if (DEBUG_DICT_FULL) {
7459fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok            AKLOGI("Demotion rate for missing character is %d.", demotionRate);
746612c6e49c03dc49320a0bf141f51e45a8b969d43satok        }
747bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok        multiplyRate(demotionRate, &finalFreq);
748612c6e49c03dc49320a0bf141f51e45a8b969d43satok    }
749bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok
750bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    // Demotion for a word with transposed character
75110266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok    if (transposedCount > 0) multiplyRate(
752612c6e49c03dc49320a0bf141f51e45a8b969d43satok            WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
753bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok
754bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    // Demotion for a word with excessive character
75510266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok    if (excessiveCount > 0) {
756612c6e49c03dc49320a0bf141f51e45a8b969d43satok        multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
7574a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka        if (!lastCharExceeded && !proximityInfoState->existsAdjacentProximityChars(excessivePos)) {
758e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok            if (DEBUG_DICT_FULL) {
7599fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok                AKLOGI("Double excessive demotion");
76010266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            }
761612c6e49c03dc49320a0bf141f51e45a8b969d43satok            // If an excessive character is not adjacent to the left char or the right char,
762612c6e49c03dc49320a0bf141f51e45a8b969d43satok            // we will demote this word.
763612c6e49c03dc49320a0bf141f51e45a8b969d43satok            multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
764612c6e49c03dc49320a0bf141f51e45a8b969d43satok        }
765612c6e49c03dc49320a0bf141f51e45a8b969d43satok    }
766bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok
767441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka    int additionalProximityCount = 0;
768441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka    // Demote additional proximity characters
769441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka    for (int i = 0; i < outputLength; ++i) {
770441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka        const int squaredDistance = correction->mDistances[i];
771441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka        if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) {
772441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka            ++additionalProximityCount;
773441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka        }
774441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka    }
775441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka
776e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok    const bool performTouchPositionCorrection =
7774a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka            CALIBRATE_SCORE_BY_TOUCH_COORDINATES
7784a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka                    && proximityInfoState->touchPositionCorrectionEnabled()
779441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka                    && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0
780441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka                    && additionalProximityCount == 0;
781441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka
782a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima    // Score calibration by touch coordinates is being done only for pure-fat finger typing error
783a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima    // cases.
784a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima    // TODO: Remove this constraint.
78504fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok    if (performTouchPositionCorrection) {
78604fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok        for (int i = 0; i < outputLength; ++i) {
78704fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok            const int squaredDistance = correction->mDistances[i];
78804fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok            if (i < adjustedProximityMatchedCount) {
78904fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                multiplyIntCapped(typedLetterMultiplier, &finalFreq);
79004fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok            }
79104fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok            if (squaredDistance >= 0) {
79204fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                // Promote or demote the score according to the distance from the sweet spot
79304fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f;
79404fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                static const float B = 1.0f;
79504fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                static const float C = 0.5f;
79604fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                static const float MIN = 0.3f;
79704fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS;
79804fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                static const float R2 = HALF_SCORE_SQUARED_RADIUS;
79977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa                const float x = static_cast<float>(squaredDistance)
8003e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka                        / ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
80104fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                const float factor = max((x < R1)
802448e732272bb3e55d649d2d5dd6a0acb9efdaec3Satoshi Kataoka                        ? (A * (R1 - x) + B * x) / R1
803448e732272bb3e55d649d2d5dd6a0acb9efdaec3Satoshi Kataoka                        : (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN);
804448e732272bb3e55d649d2d5dd6a0acb9efdaec3Satoshi Kataoka                // factor is a piecewise linear function like:
80504fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                // A -_                  .
80604fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                //     ^-_               .
80704fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                // B      \              .
80804fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                //         \_            .
80904fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                // C         ------------.
81004fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                //                       .
81104fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                // 0   R1 R2             .
812448e732272bb3e55d649d2d5dd6a0acb9efdaec3Satoshi Kataoka                multiplyRate((int)(factor * 100.0f), &finalFreq);
81304fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok            } else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) {
81404fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
81504fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok            }
816e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok        }
81704fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok    } else {
81804fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok        // Promotion for a word with proximity characters
81904fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok        for (int i = 0; i < adjustedProximityMatchedCount; ++i) {
82004fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok            // A word with proximity corrections
82104fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok            if (DEBUG_DICT_FULL) {
82204fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                AKLOGI("Found a proximity correction.");
82304fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok            }
82404fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok            multiplyIntCapped(typedLetterMultiplier, &finalFreq);
82504fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok            if (i < additionalProximityCount) {
82604fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                multiplyRate(WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
82704fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok            } else {
82804fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok                multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
82904fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok            }
830612c6e49c03dc49320a0bf141f51e45a8b969d43satok        }
8319ee8c9c45c960dae6fbf0f35e4c84c9c1c85fc3fYusuke Nojima    }
8329ee8c9c45c960dae6fbf0f35e4c84c9c1c85fc3fYusuke Nojima
8331b9fa942b4b62a818e45655dc5097c7eed7a5465satok    // If the user types too many(three or more) proximity characters with additional proximity
8341b9fa942b4b62a818e45655dc5097c7eed7a5465satok    // character,do not treat as the same length word.
8351b9fa942b4b62a818e45655dc5097c7eed7a5465satok    if (sameLength && additionalProximityCount > 0 && (adjustedProximityMatchedCount >= 3
8361b9fa942b4b62a818e45655dc5097c7eed7a5465satok            || transposedCount > 0 || skipped || excessiveCount > 0)) {
8371b9fa942b4b62a818e45655dc5097c7eed7a5465satok        sameLength = false;
8381b9fa942b4b62a818e45655dc5097c7eed7a5465satok    }
8391b9fa942b4b62a818e45655dc5097c7eed7a5465satok
84010266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok    const int errorCount = adjustedProximityMatchedCount > 0
84110266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            ? adjustedProximityMatchedCount
84210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok            : (proximityMatchedCount + transposedCount);
843bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    multiplyRate(
844687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka            100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputSize, &finalFreq);
845bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok
846bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    // Promotion for an exactly matched word
84710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok    if (ed == 0) {
848bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok        // Full exact match
84954af64ae921baa764d64c11c7f4f8edd6352d405satok        if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0
8501b9fa942b4b62a818e45655dc5097c7eed7a5465satok                && quoteDiffCount == 0 && additionalProximityCount == 0) {
851bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok            finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
852bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok        }
853612c6e49c03dc49320a0bf141f51e45a8b969d43satok    }
854635f68e8222901d607a5ca6fab95985bc496d72asatok
855bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    // Promote a word with no correction
8561b9fa942b4b62a818e45655dc5097c7eed7a5465satok    if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0
8571b9fa942b4b62a818e45655dc5097c7eed7a5465satok            && additionalProximityCount == 0) {
858bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok        multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
859bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    }
860bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok
861bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    // TODO: Check excessive count and transposed count
862bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    // TODO: Remove this if possible
863635f68e8222901d607a5ca6fab95985bc496d72asatok    /*
864bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok         If the last character of the user input word is the same as the next character
865bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok         of the output word, and also all of characters of the user input are matched
866bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok         to the output word, we'll promote that word a bit because
867bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok         that word can be considered the combination of skipped and matched characters.
868bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok         This means that the 'sm' pattern wins over the 'ma' pattern.
869bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok         e.g.)
870bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok         shel -> shell [mmmma] or [mmmsm]
871bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok         hel -> hello [mmmaa] or [mmsma]
872bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok         m ... matching
873bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok         s ... skipping
874bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok         a ... traversing all
8757adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok         t ... transposing
8767adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok         e ... exceeding
8777adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok         p ... proximity matching
878635f68e8222901d607a5ca6fab95985bc496d72asatok     */
879687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    if (matchCount == inputSize && matchCount >= 2 && !skipped
880635f68e8222901d607a5ca6fab95985bc496d72asatok            && word[matchCount] == word[matchCount - 1]) {
881635f68e8222901d607a5ca6fab95985bc496d72asatok        multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
882635f68e8222901d607a5ca6fab95985bc496d72asatok    }
883635f68e8222901d607a5ca6fab95985bc496d72asatok
88410266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok    // TODO: Do not use sameLength?
885bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    if (sameLength) {
886bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok        multiplyIntCapped(fullWordMultiplier, &finalFreq);
887bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    }
888bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok
889687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka    if (useFullEditDistance && outputLength > inputSize + 1) {
890687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        const int diff = outputLength - inputSize - 1;
89140a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok        const int divider = diff < 31 ? 1 << diff : S_INT_MAX;
89240a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok        finalFreq = divider > finalFreq ? 1 : finalFreq / divider;
89340a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok    }
89440a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok
895bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    if (DEBUG_DICT_FULL) {
89654af64ae921baa764d64c11c7f4f8edd6352d405satok        AKLOGI("calc: %d, %d", outputLength, sameLength);
897bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok    }
898bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok
899e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok    if (DEBUG_CORRECTION_FREQ
900687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka            && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputSize)) {
901687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka        DUMP_WORD(correction->getPrimaryInputWord(), inputSize);
90254af64ae921baa764d64c11c7f4f8edd6352d405satok        DUMP_WORD(correction->mWord, outputLength);
9031b9fa942b4b62a818e45655dc5097c7eed7a5465satok        AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d, A%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount,
9041b9fa942b4b62a818e45655dc5097c7eed7a5465satok                skippedCount, transposedCount, excessiveCount, additionalProximityCount,
9051b9fa942b4b62a818e45655dc5097c7eed7a5465satok                outputLength, lastCharExceeded, sameLength, quoteDiffCount, ed, finalFreq);
90610266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok    }
90710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok
908612c6e49c03dc49320a0bf141f51e45a8b969d43satok    return finalFreq;
909612c6e49c03dc49320a0bf141f51e45a8b969d43satok}
910612c6e49c03dc49320a0bf141f51e45a8b969d43satok
911635f68e8222901d607a5ca6fab95985bc496d72asatok/* static */
912a85f4929cd027246045ec3e806857b84e64fe762satokint Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
913a85f4929cd027246045ec3e806857b84e64fe762satok        const int *freqArray, const int *wordLengthArray, const int wordCount,
9140bbb917d12358e0264796e75dea888f244761b64Ken Wakasa        const Correction *correction, const bool isSpaceProximity, const unsigned short *word) {
91529dc80614bc529ca2c0b96e1a731ebb7a5433090satok    const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
91629dc80614bc529ca2c0b96e1a731ebb7a5433090satok
91729dc80614bc529ca2c0b96e1a731ebb7a5433090satok    bool firstCapitalizedWordDemotion = false;
91829dc80614bc529ca2c0b96e1a731ebb7a5433090satok    bool secondCapitalizedWordDemotion = false;
919a85f4929cd027246045ec3e806857b84e64fe762satok
920a85f4929cd027246045ec3e806857b84e64fe762satok    {
921a85f4929cd027246045ec3e806857b84e64fe762satok        // TODO: Handle multiple capitalized word demotion properly
922a85f4929cd027246045ec3e806857b84e64fe762satok        const int firstWordLength = wordLengthArray[0];
923a85f4929cd027246045ec3e806857b84e64fe762satok        const int secondWordLength = wordLengthArray[1];
924a85f4929cd027246045ec3e806857b84e64fe762satok        if (firstWordLength >= 2) {
925a85f4929cd027246045ec3e806857b84e64fe762satok            firstCapitalizedWordDemotion = isUpperCase(word[0]);
926a85f4929cd027246045ec3e806857b84e64fe762satok        }
927a85f4929cd027246045ec3e806857b84e64fe762satok
928a85f4929cd027246045ec3e806857b84e64fe762satok        if (secondWordLength >= 2) {
929a85f4929cd027246045ec3e806857b84e64fe762satok            // FIXME: word[firstWordLength + 1] is incorrect.
930a85f4929cd027246045ec3e806857b84e64fe762satok            secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]);
931a85f4929cd027246045ec3e806857b84e64fe762satok        }
93229dc80614bc529ca2c0b96e1a731ebb7a5433090satok    }
93329dc80614bc529ca2c0b96e1a731ebb7a5433090satok
934a85f4929cd027246045ec3e806857b84e64fe762satok
93529dc80614bc529ca2c0b96e1a731ebb7a5433090satok    const bool capitalizedWordDemotion =
93629dc80614bc529ca2c0b96e1a731ebb7a5433090satok            firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
93729dc80614bc529ca2c0b96e1a731ebb7a5433090satok
938a85f4929cd027246045ec3e806857b84e64fe762satok    int totalLength = 0;
939a85f4929cd027246045ec3e806857b84e64fe762satok    int totalFreq = 0;
940f2789819bd005b5b0581e8439601b5501306327dKen Wakasa    for (int i = 0; i < wordCount; ++i) {
941a85f4929cd027246045ec3e806857b84e64fe762satok        const int wordLength = wordLengthArray[i];
942a85f4929cd027246045ec3e806857b84e64fe762satok        if (wordLength <= 0) {
943a85f4929cd027246045ec3e806857b84e64fe762satok            return 0;
944a85f4929cd027246045ec3e806857b84e64fe762satok        }
945a85f4929cd027246045ec3e806857b84e64fe762satok        totalLength += wordLength;
946a85f4929cd027246045ec3e806857b84e64fe762satok        const int demotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (wordLength + 1);
947a85f4929cd027246045ec3e806857b84e64fe762satok        int tempFirstFreq = freqArray[i];
948a85f4929cd027246045ec3e806857b84e64fe762satok        multiplyRate(demotionRate, &tempFirstFreq);
949a85f4929cd027246045ec3e806857b84e64fe762satok        totalFreq += tempFirstFreq;
95029dc80614bc529ca2c0b96e1a731ebb7a5433090satok    }
95129dc80614bc529ca2c0b96e1a731ebb7a5433090satok
952a85f4929cd027246045ec3e806857b84e64fe762satok    if (totalLength <= 0 || totalFreq <= 0) {
953a85f4929cd027246045ec3e806857b84e64fe762satok        return 0;
954a85f4929cd027246045ec3e806857b84e64fe762satok    }
95529dc80614bc529ca2c0b96e1a731ebb7a5433090satok
956a85f4929cd027246045ec3e806857b84e64fe762satok    // TODO: Currently totalFreq is adjusted to two word metrix.
95729dc80614bc529ca2c0b96e1a731ebb7a5433090satok    // Promote pairFreq with multiplying by 2, because the word length is the same as the typed
95829dc80614bc529ca2c0b96e1a731ebb7a5433090satok    // length.
959a85f4929cd027246045ec3e806857b84e64fe762satok    totalFreq = totalFreq * 2 / wordCount;
960a85f4929cd027246045ec3e806857b84e64fe762satok    if (wordCount > 2) {
961a85f4929cd027246045ec3e806857b84e64fe762satok        // Safety net for 3+ words -- Caveats: many heuristics and workarounds here.
962a85f4929cd027246045ec3e806857b84e64fe762satok        int oneLengthCounter = 0;
963a85f4929cd027246045ec3e806857b84e64fe762satok        int twoLengthCounter = 0;
964a85f4929cd027246045ec3e806857b84e64fe762satok        for (int i = 0; i < wordCount; ++i) {
965a85f4929cd027246045ec3e806857b84e64fe762satok            const int wordLength = wordLengthArray[i];
966a85f4929cd027246045ec3e806857b84e64fe762satok            // TODO: Use bigram instead of this safety net
967a85f4929cd027246045ec3e806857b84e64fe762satok            if (i < wordCount - 1) {
968a85f4929cd027246045ec3e806857b84e64fe762satok                const int nextWordLength = wordLengthArray[i + 1];
969a85f4929cd027246045ec3e806857b84e64fe762satok                if (wordLength == 1 && nextWordLength == 2) {
970a85f4929cd027246045ec3e806857b84e64fe762satok                    // Safety net to filter 1 length and 2 length sequential words
971a85f4929cd027246045ec3e806857b84e64fe762satok                    return 0;
972a85f4929cd027246045ec3e806857b84e64fe762satok                }
973a85f4929cd027246045ec3e806857b84e64fe762satok            }
974a85f4929cd027246045ec3e806857b84e64fe762satok            const int freq = freqArray[i];
975a85f4929cd027246045ec3e806857b84e64fe762satok            // Demote too short weak words
976a0ac31fcaa01c21592a6e7af243c14dada65cf3esatok            if (wordLength <= 4 && freq <= SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ) {
977a85f4929cd027246045ec3e806857b84e64fe762satok                multiplyRate(100 * freq / MAX_FREQ, &totalFreq);
978a85f4929cd027246045ec3e806857b84e64fe762satok            }
979a85f4929cd027246045ec3e806857b84e64fe762satok            if (wordLength == 1) {
980a85f4929cd027246045ec3e806857b84e64fe762satok                ++oneLengthCounter;
981a85f4929cd027246045ec3e806857b84e64fe762satok            } else if (wordLength == 2) {
982a85f4929cd027246045ec3e806857b84e64fe762satok                ++twoLengthCounter;
983a85f4929cd027246045ec3e806857b84e64fe762satok            }
984a85f4929cd027246045ec3e806857b84e64fe762satok            if (oneLengthCounter >= 2 || (oneLengthCounter + twoLengthCounter) >= 4) {
985a85f4929cd027246045ec3e806857b84e64fe762satok                // Safety net to filter too many short words
986a85f4929cd027246045ec3e806857b84e64fe762satok                return 0;
987a85f4929cd027246045ec3e806857b84e64fe762satok            }
988a85f4929cd027246045ec3e806857b84e64fe762satok        }
989a85f4929cd027246045ec3e806857b84e64fe762satok        multiplyRate(MULTIPLE_WORDS_DEMOTION_RATE, &totalFreq);
990a85f4929cd027246045ec3e806857b84e64fe762satok    }
99129dc80614bc529ca2c0b96e1a731ebb7a5433090satok
99229dc80614bc529ca2c0b96e1a731ebb7a5433090satok    // This is a workaround to try offsetting the not-enough-demotion which will be done in
99329dc80614bc529ca2c0b96e1a731ebb7a5433090satok    // calcNormalizedScore in Utils.java.
99429dc80614bc529ca2c0b96e1a731ebb7a5433090satok    // In calcNormalizedScore the score will be demoted by (1 - 1 / length)
99529dc80614bc529ca2c0b96e1a731ebb7a5433090satok    // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by
99629dc80614bc529ca2c0b96e1a731ebb7a5433090satok    // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length))
99729dc80614bc529ca2c0b96e1a731ebb7a5433090satok    const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength);
99829dc80614bc529ca2c0b96e1a731ebb7a5433090satok    multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq);
99929dc80614bc529ca2c0b96e1a731ebb7a5433090satok
100029dc80614bc529ca2c0b96e1a731ebb7a5433090satok    // At this moment, totalFreq is calculated by the following formula:
100129dc80614bc529ca2c0b96e1a731ebb7a5433090satok    // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1)))
100229dc80614bc529ca2c0b96e1a731ebb7a5433090satok    //        * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1))
100329dc80614bc529ca2c0b96e1a731ebb7a5433090satok
100429dc80614bc529ca2c0b96e1a731ebb7a5433090satok    multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq);
100529dc80614bc529ca2c0b96e1a731ebb7a5433090satok
100629dc80614bc529ca2c0b96e1a731ebb7a5433090satok    // This is another workaround to offset the demotion which will be done in
100729dc80614bc529ca2c0b96e1a731ebb7a5433090satok    // calcNormalizedScore in Utils.java.
100829dc80614bc529ca2c0b96e1a731ebb7a5433090satok    // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote
100929dc80614bc529ca2c0b96e1a731ebb7a5433090satok    // the same amount because we already have adjusted the synthetic freq of this "missing or
101029dc80614bc529ca2c0b96e1a731ebb7a5433090satok    // mistyped space" suggestion candidate above in this method.
101129dc80614bc529ca2c0b96e1a731ebb7a5433090satok    const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength);
101229dc80614bc529ca2c0b96e1a731ebb7a5433090satok    multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq);
101329dc80614bc529ca2c0b96e1a731ebb7a5433090satok
101429dc80614bc529ca2c0b96e1a731ebb7a5433090satok    if (isSpaceProximity) {
101529dc80614bc529ca2c0b96e1a731ebb7a5433090satok        // A word pair with one space proximity correction
101629dc80614bc529ca2c0b96e1a731ebb7a5433090satok        if (DEBUG_DICT) {
101729dc80614bc529ca2c0b96e1a731ebb7a5433090satok            AKLOGI("Found a word pair with space proximity correction.");
101829dc80614bc529ca2c0b96e1a731ebb7a5433090satok        }
101929dc80614bc529ca2c0b96e1a731ebb7a5433090satok        multiplyIntCapped(typedLetterMultiplier, &totalFreq);
102029dc80614bc529ca2c0b96e1a731ebb7a5433090satok        multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq);
102129dc80614bc529ca2c0b96e1a731ebb7a5433090satok    }
102229dc80614bc529ca2c0b96e1a731ebb7a5433090satok
10238330b488e9534afe8f5775ad9416c904d01bb665satok    if (isSpaceProximity) {
10248330b488e9534afe8f5775ad9416c904d01bb665satok        multiplyRate(WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE, &totalFreq);
10258330b488e9534afe8f5775ad9416c904d01bb665satok    } else {
10268330b488e9534afe8f5775ad9416c904d01bb665satok        multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq);
10278330b488e9534afe8f5775ad9416c904d01bb665satok    }
102829dc80614bc529ca2c0b96e1a731ebb7a5433090satok
102929dc80614bc529ca2c0b96e1a731ebb7a5433090satok    if (capitalizedWordDemotion) {
103029dc80614bc529ca2c0b96e1a731ebb7a5433090satok        multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
103129dc80614bc529ca2c0b96e1a731ebb7a5433090satok    }
103229dc80614bc529ca2c0b96e1a731ebb7a5433090satok
10331f6b52e76c59700984fe2b7d7b436d81da997e93satok    if (DEBUG_CORRECTION_FREQ) {
1034a85f4929cd027246045ec3e806857b84e64fe762satok        AKLOGI("Multiple words (%d, %d) (%d, %d) %d, %d", freqArray[0], freqArray[1],
1035a85f4929cd027246045ec3e806857b84e64fe762satok                wordLengthArray[0], wordLengthArray[1], capitalizedWordDemotion, totalFreq);
1036a85f4929cd027246045ec3e806857b84e64fe762satok        DUMP_WORD(word, wordLengthArray[0]);
10371f6b52e76c59700984fe2b7d7b436d81da997e93satok    }
10381f6b52e76c59700984fe2b7d7b436d81da997e93satok
103929dc80614bc529ca2c0b96e1a731ebb7a5433090satok    return totalFreq;
104029dc80614bc529ca2c0b96e1a731ebb7a5433090satok}
104129dc80614bc529ca2c0b96e1a731ebb7a5433090satok
1042be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok/* Damerau-Levenshtein distance */
1043be0cf72253f15bff6abdeaa79f60a56f06ab7b86satokinline static int editDistanceInternal(
10440bbb917d12358e0264796e75dea888f244761b64Ken Wakasa        int *editDistanceTable, const unsigned short *before,
10450bbb917d12358e0264796e75dea888f244761b64Ken Wakasa        const int beforeLength, const unsigned short *after, const int afterLength) {
10464d355989bd972ba792ba546a55c67e5b6fc2527asatok    // dp[li][lo] dp[a][b] = dp[ a * lo + b]
10470bbb917d12358e0264796e75dea888f244761b64Ken Wakasa    int *dp = editDistanceTable;
1048be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok    const int li = beforeLength + 1;
1049be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok    const int lo = afterLength + 1;
10504d355989bd972ba792ba546a55c67e5b6fc2527asatok    for (int i = 0; i < li; ++i) {
10514d355989bd972ba792ba546a55c67e5b6fc2527asatok        dp[lo * i] = i;
10524d355989bd972ba792ba546a55c67e5b6fc2527asatok    }
10534d355989bd972ba792ba546a55c67e5b6fc2527asatok    for (int i = 0; i < lo; ++i) {
10544d355989bd972ba792ba546a55c67e5b6fc2527asatok        dp[i] = i;
10554d355989bd972ba792ba546a55c67e5b6fc2527asatok    }
10564d355989bd972ba792ba546a55c67e5b6fc2527asatok
10574d355989bd972ba792ba546a55c67e5b6fc2527asatok    for (int i = 0; i < li - 1; ++i) {
10584d355989bd972ba792ba546a55c67e5b6fc2527asatok        for (int j = 0; j < lo - 1; ++j) {
1059be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok            const uint32_t ci = toBaseLowerCase(before[i]);
1060be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok            const uint32_t co = toBaseLowerCase(after[j]);
10614d355989bd972ba792ba546a55c67e5b6fc2527asatok            const uint16_t cost = (ci == co) ? 0 : 1;
10624d355989bd972ba792ba546a55c67e5b6fc2527asatok            dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
10634d355989bd972ba792ba546a55c67e5b6fc2527asatok                    min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
1064be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok            if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
1065be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok                    && co == toBaseLowerCase(before[i - 1])) {
10664d355989bd972ba792ba546a55c67e5b6fc2527asatok                dp[(i + 1) * lo + (j + 1)] = min(
10674d355989bd972ba792ba546a55c67e5b6fc2527asatok                        dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
10684d355989bd972ba792ba546a55c67e5b6fc2527asatok            }
10694d355989bd972ba792ba546a55c67e5b6fc2527asatok        }
10704d355989bd972ba792ba546a55c67e5b6fc2527asatok    }
10714d355989bd972ba792ba546a55c67e5b6fc2527asatok
10724d355989bd972ba792ba546a55c67e5b6fc2527asatok    if (DEBUG_EDIT_DISTANCE) {
10739fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok        AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength);
10744d355989bd972ba792ba546a55c67e5b6fc2527asatok        for (int i = 0; i < li; ++i) {
10754d355989bd972ba792ba546a55c67e5b6fc2527asatok            for (int j = 0; j < lo; ++j) {
10769fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok                AKLOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
10774d355989bd972ba792ba546a55c67e5b6fc2527asatok            }
10784d355989bd972ba792ba546a55c67e5b6fc2527asatok        }
10794d355989bd972ba792ba546a55c67e5b6fc2527asatok    }
10804d355989bd972ba792ba546a55c67e5b6fc2527asatok    return dp[li * lo - 1];
10814d355989bd972ba792ba546a55c67e5b6fc2527asatok}
1082be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok
10830bbb917d12358e0264796e75dea888f244761b64Ken Wakasaint Correction::RankingAlgorithm::editDistance(const unsigned short *before,
10840bbb917d12358e0264796e75dea888f244761b64Ken Wakasa        const int beforeLength, const unsigned short *after, const int afterLength) {
1085be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok    int table[(beforeLength + 1) * (afterLength + 1)];
1086be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok    return editDistanceInternal(table, before, beforeLength, after, afterLength);
1087be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok}
1088be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok
1089be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok
1090be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// In dictionary.cpp, getSuggestion() method,
1091be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// suggestion scores are computed using the below formula.
1092be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// original score
1093bcec82de66f52655593dc233346f11468f5077a0Ken Wakasa//  := powf(mTypedLetterMultiplier (this is defined 2),
1094be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok//         (the number of matched characters between typed word and suggested word))
1095be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok//     * (individual word's score which defined in the unigram dictionary,
1096be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok//         and this score is defined in range [0, 255].)
1097be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// Then, the following processing is applied.
1098be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok//     - If the dictionary word is matched up to the point of the user entry
1099be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok//       (full match up to min(before.length(), after.length())
1100be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok//       => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
1101be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok//     - If the word is a true full match except for differences in accents or
1102be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok//       capitalization, then treat it as if the score was 255.
1103be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok//     - If before.length() == after.length()
1104be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok//       => multiply by mFullWordMultiplier (this is defined 2))
1105bcec82de66f52655593dc233346f11468f5077a0Ken Wakasa// So, maximum original score is powf(2, min(before.length(), after.length())) * 255 * 2 * 1.2
1106be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// For historical reasons we ignore the 1.2 modifier (because the measure for a good
1107be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// autocorrection threshold was done at a time when it didn't exist). This doesn't change
1108be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// the result.
1109bcec82de66f52655593dc233346f11468f5077a0Ken Wakasa// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2.
1110be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok
1111be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok/* static */
11120bbb917d12358e0264796e75dea888f244761b64Ken Wakasafloat Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before,
11130bbb917d12358e0264796e75dea888f244761b64Ken Wakasa        const int beforeLength, const unsigned short *after, const int afterLength,
1114be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok        const int score) {
1115be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok    if (0 == beforeLength || 0 == afterLength) {
1116be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok        return 0;
1117be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok    }
1118be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok    const int distance = editDistance(before, beforeLength, after, afterLength);
1119be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok    int spaceCount = 0;
1120be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok    for (int i = 0; i < afterLength; ++i) {
1121be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok        if (after[i] == CODE_SPACE) {
1122be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok            ++spaceCount;
1123be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok        }
1124be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok    }
1125be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok
1126be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok    if (spaceCount == afterLength) {
1127be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok        return 0;
1128be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok    }
1129be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok
1130f2789819bd005b5b0581e8439601b5501306327dKen Wakasa    const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX)
1131f2789819bd005b5b0581e8439601b5501306327dKen Wakasa            : static_cast<float>(MAX_INITIAL_SCORE)
1132f2789819bd005b5b0581e8439601b5501306327dKen Wakasa                    * powf(static_cast<float>(TYPED_LETTER_MULTIPLIER),
1133f2789819bd005b5b0581e8439601b5501306327dKen Wakasa                            static_cast<float>(min(beforeLength, afterLength - spaceCount)))
1134f2789819bd005b5b0581e8439601b5501306327dKen Wakasa                    * static_cast<float>(FULL_WORD_MULTIPLIER);
1135be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok
1136be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok    // add a weight based on edit distance.
1137be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok    // distance <= max(afterLength, beforeLength) == afterLength,
1138be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok    // so, 0 <= distance / afterLength <= 1
113977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength);
1140f2789819bd005b5b0581e8439601b5501306327dKen Wakasa    return (static_cast<float>(score) / maxScore) * weight;
1141be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok}
11422df3060883c7535029c7dfbbb4f7b05935d796aesatok} // namespace latinime
1143