12df3060883c7535029c7dfbbb4f7b05935d796aesatok/* 22df3060883c7535029c7dfbbb4f7b05935d796aesatok * Copyright (C) 2011 The Android Open Source Project 32df3060883c7535029c7dfbbb4f7b05935d796aesatok * 42df3060883c7535029c7dfbbb4f7b05935d796aesatok * Licensed under the Apache License, Version 2.0 (the "License"); 52df3060883c7535029c7dfbbb4f7b05935d796aesatok * you may not use this file except in compliance with the License. 62df3060883c7535029c7dfbbb4f7b05935d796aesatok * You may obtain a copy of the License at 72df3060883c7535029c7dfbbb4f7b05935d796aesatok * 82df3060883c7535029c7dfbbb4f7b05935d796aesatok * http://www.apache.org/licenses/LICENSE-2.0 92df3060883c7535029c7dfbbb4f7b05935d796aesatok * 102df3060883c7535029c7dfbbb4f7b05935d796aesatok * Unless required by applicable law or agreed to in writing, software 112df3060883c7535029c7dfbbb4f7b05935d796aesatok * distributed under the License is distributed on an "AS IS" BASIS, 122df3060883c7535029c7dfbbb4f7b05935d796aesatok * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 132df3060883c7535029c7dfbbb4f7b05935d796aesatok * See the License for the specific language governing permissions and 142df3060883c7535029c7dfbbb4f7b05935d796aesatok * limitations under the License. 152df3060883c7535029c7dfbbb4f7b05935d796aesatok */ 162df3060883c7535029c7dfbbb4f7b05935d796aesatok 17f1008c550168e50f930ea1e043000b395ce0f129Ken Wakasa#include <cassert> 18f1008c550168e50f930ea1e043000b395ce0f129Ken Wakasa#include <cctype> 19f1008c550168e50f930ea1e043000b395ce0f129Ken Wakasa#include <cmath> 20f1008c550168e50f930ea1e043000b395ce0f129Ken Wakasa#include <cstring> 212df3060883c7535029c7dfbbb4f7b05935d796aesatok 22cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok#define LOG_TAG "LatinIME: correction.cpp" 232df3060883c7535029c7dfbbb4f7b05935d796aesatok 246e3cb27cffa525d555b289111678f6fa0495447eTadashi G. Takaoka#include "char_utils.h" 25cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok#include "correction.h" 2609baa36f7d1298e54a291b0d486cf366a3c3257cTadashi G. Takaoka#include "defines.h" 273e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka#include "proximity_info_state.h" 282df3060883c7535029c7dfbbb4f7b05935d796aesatok 292df3060883c7535029c7dfbbb4f7b05935d796aesatoknamespace latinime { 302df3060883c7535029c7dfbbb4f7b05935d796aesatok 3174fb957e49e7d9ff5af47f35d062aa7c7f97a8fcKen Wakasaclass ProximityInfo; 3274fb957e49e7d9ff5af47f35d062aa7c7f97a8fcKen Wakasa 3304d873701550323116cf8737494fb8d7e839c351Yusuke Nojima///////////////////////////// 3404d873701550323116cf8737494fb8d7e839c351Yusuke Nojima// edit distance funcitons // 3504d873701550323116cf8737494fb8d7e839c351Yusuke Nojima///////////////////////////// 3604d873701550323116cf8737494fb8d7e839c351Yusuke Nojima 3704d873701550323116cf8737494fb8d7e839c351Yusuke Nojimainline static void initEditDistance(int *editDistanceTable) { 3804d873701550323116cf8737494fb8d7e839c351Yusuke Nojima for (int i = 0; i <= MAX_WORD_LENGTH_INTERNAL; ++i) { 3904d873701550323116cf8737494fb8d7e839c351Yusuke Nojima editDistanceTable[i] = i; 4004d873701550323116cf8737494fb8d7e839c351Yusuke Nojima } 4104d873701550323116cf8737494fb8d7e839c351Yusuke Nojima} 4204d873701550323116cf8737494fb8d7e839c351Yusuke Nojima 4354af64ae921baa764d64c11c7f4f8edd6352d405satokinline static void dumpEditDistance10ForDebug(int *editDistanceTable, 4454af64ae921baa764d64c11c7f4f8edd6352d405satok const int editDistanceTableWidth, const int outputLength) { 451a6da631ab7c6ed895964978be8f455b41e019bbsatok if (DEBUG_DICT) { 469fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok AKLOGI("EditDistanceTable"); 471a6da631ab7c6ed895964978be8f455b41e019bbsatok for (int i = 0; i <= 10; ++i) { 481a6da631ab7c6ed895964978be8f455b41e019bbsatok int c[11]; 491a6da631ab7c6ed895964978be8f455b41e019bbsatok for (int j = 0; j <= 10; ++j) { 5054af64ae921baa764d64c11c7f4f8edd6352d405satok if (j < editDistanceTableWidth + 1 && i < outputLength + 1) { 5154af64ae921baa764d64c11c7f4f8edd6352d405satok c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j]; 521a6da631ab7c6ed895964978be8f455b41e019bbsatok } else { 531a6da631ab7c6ed895964978be8f455b41e019bbsatok c[j] = -1; 541a6da631ab7c6ed895964978be8f455b41e019bbsatok } 551a6da631ab7c6ed895964978be8f455b41e019bbsatok } 569fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok AKLOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]", 571a6da631ab7c6ed895964978be8f455b41e019bbsatok c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]); 587914e907b5d31ec4b2034a94e393129833770531Ken Wakasa (void)c; // To suppress compiler warning 591a6da631ab7c6ed895964978be8f455b41e019bbsatok } 601a6da631ab7c6ed895964978be8f455b41e019bbsatok } 611a6da631ab7c6ed895964978be8f455b41e019bbsatok} 621a6da631ab7c6ed895964978be8f455b41e019bbsatok 6304d873701550323116cf8737494fb8d7e839c351Yusuke Nojimainline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input, 64687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka const int inputSize, const unsigned short *output, const int outputLength) { 651a6da631ab7c6ed895964978be8f455b41e019bbsatok // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched. 66687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j]. 67687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated, 68687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka // and calculate dp[ouputLength][0] ... dp[outputLength][inputSize]. 69687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka int *const current = editDistanceTable + outputLength * (inputSize + 1); 70687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka const int *const prev = editDistanceTable + (outputLength - 1) * (inputSize + 1); 7104d873701550323116cf8737494fb8d7e839c351Yusuke Nojima const int *const prevprev = 72687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0; 7304d873701550323116cf8737494fb8d7e839c351Yusuke Nojima current[0] = outputLength; 746e3cb27cffa525d555b289111678f6fa0495447eTadashi G. Takaoka const uint32_t co = toBaseLowerCase(output[outputLength - 1]); 756e3cb27cffa525d555b289111678f6fa0495447eTadashi G. Takaoka const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0; 76687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka for (int i = 1; i <= inputSize; ++i) { 776e3cb27cffa525d555b289111678f6fa0495447eTadashi G. Takaoka const uint32_t ci = toBaseLowerCase(input[i - 1]); 7804d873701550323116cf8737494fb8d7e839c351Yusuke Nojima const uint16_t cost = (ci == co) ? 0 : 1; 7904d873701550323116cf8737494fb8d7e839c351Yusuke Nojima current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost)); 806e3cb27cffa525d555b289111678f6fa0495447eTadashi G. Takaoka if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) { 8104d873701550323116cf8737494fb8d7e839c351Yusuke Nojima current[i] = min(current[i], prevprev[i - 2] + 1); 8204d873701550323116cf8737494fb8d7e839c351Yusuke Nojima } 8304d873701550323116cf8737494fb8d7e839c351Yusuke Nojima } 8404d873701550323116cf8737494fb8d7e839c351Yusuke Nojima} 8504d873701550323116cf8737494fb8d7e839c351Yusuke Nojima 8654af64ae921baa764d64c11c7f4f8edd6352d405satokinline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth, 87687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka const int outputLength, const int inputSize) { 8829dc80614bc529ca2c0b96e1a731ebb7a5433090satok if (DEBUG_EDIT_DISTANCE) { 89687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka AKLOGI("getCurrentEditDistance %d, %d", inputSize, outputLength); 901a6da631ab7c6ed895964978be8f455b41e019bbsatok } 91687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputSize]; 9204d873701550323116cf8737494fb8d7e839c351Yusuke Nojima} 9304d873701550323116cf8737494fb8d7e839c351Yusuke Nojima 948876b75ca1c218949539dcc2fb6c88a19da9e3f8satok////////////////////// 958876b75ca1c218949539dcc2fb6c88a19da9e3f8satok// inline functions // 968876b75ca1c218949539dcc2fb6c88a19da9e3f8satok////////////////////// 9777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasastatic const char SINGLE_QUOTE = '\''; 988876b75ca1c218949539dcc2fb6c88a19da9e3f8satok 9977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasainline bool Correction::isSingleQuote(const unsigned short c) { 1004a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex); 10177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa return (c == SINGLE_QUOTE && userTypedChar != SINGLE_QUOTE); 1028876b75ca1c218949539dcc2fb6c88a19da9e3f8satok} 1038876b75ca1c218949539dcc2fb6c88a19da9e3f8satok 104cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok//////////////// 105cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok// Correction // 106cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok//////////////// 1078876b75ca1c218949539dcc2fb6c88a19da9e3f8satok 1086cbe204fce109fab652da15f4c8ea1ae35fca3e7Satoshi Kataokavoid Correction::resetCorrection() { 1096cbe204fce109fab652da15f4c8ea1ae35fca3e7Satoshi Kataoka mTotalTraverseCount = 0; 1106cbe204fce109fab652da15f4c8ea1ae35fca3e7Satoshi Kataoka} 1116cbe204fce109fab652da15f4c8ea1ae35fca3e7Satoshi Kataoka 112687a244703a02323ebd64433cbaead5def499861Satoshi Kataokavoid Correction::initCorrection(const ProximityInfo *pi, const int inputSize, 1138876b75ca1c218949539dcc2fb6c88a19da9e3f8satok const int maxDepth) { 1142df3060883c7535029c7dfbbb4f7b05935d796aesatok mProximityInfo = pi; 115687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka mInputSize = inputSize; 1168876b75ca1c218949539dcc2fb6c88a19da9e3f8satok mMaxDepth = maxDepth; 117687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka mMaxEditDistance = mInputSize < 5 ? 2 : mInputSize / 2; 1181a6da631ab7c6ed895964978be8f455b41e019bbsatok // TODO: This is not supposed to be required. Check what's going wrong with 1191a6da631ab7c6ed895964978be8f455b41e019bbsatok // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] 1201a6da631ab7c6ed895964978be8f455b41e019bbsatok initEditDistance(mEditDistanceTable); 121612c6e49c03dc49320a0bf141f51e45a8b969d43satok} 122612c6e49c03dc49320a0bf141f51e45a8b969d43satok 123208268d149c4d139cdf14923650a58ccc0a9d9b6satokvoid Correction::initCorrectionState( 124208268d149c4d139cdf14923650a58ccc0a9d9b6satok const int rootPos, const int childCount, const bool traverseAll) { 125635f68e8222901d607a5ca6fab95985bc496d72asatok latinime::initCorrectionState(mCorrectionStates, rootPos, childCount, traverseAll); 126f3948c1eacee57a9ba4b689ada992cd460596d9fsatok // TODO: remove 1279db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mCorrectionStates[0].mTransposedPos = mTransposedPos; 1289db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mCorrectionStates[0].mExcessivePos = mExcessivePos; 129f3948c1eacee57a9ba4b689ada992cd460596d9fsatok mCorrectionStates[0].mSkipPos = mSkipPos; 130208268d149c4d139cdf14923650a58ccc0a9d9b6satok} 131208268d149c4d139cdf14923650a58ccc0a9d9b6satok 132cfca3c6317143ce68770cab02eb7d7a5dc8765c9satokvoid Correction::setCorrectionParams(const int skipPos, const int excessivePos, 13340a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok const int transposedPos, const int spaceProximityPos, const int missingSpacePos, 1344d355989bd972ba792ba546a55c67e5b6fc2527asatok const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors) { 135f3948c1eacee57a9ba4b689ada992cd460596d9fsatok // TODO: remove 1369db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mTransposedPos = transposedPos; 1379db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mExcessivePos = excessivePos; 1382df3060883c7535029c7dfbbb4f7b05935d796aesatok mSkipPos = skipPos; 139f3948c1eacee57a9ba4b689ada992cd460596d9fsatok // TODO: remove 1409db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mCorrectionStates[0].mTransposedPos = transposedPos; 1419db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mCorrectionStates[0].mExcessivePos = excessivePos; 142f3948c1eacee57a9ba4b689ada992cd460596d9fsatok mCorrectionStates[0].mSkipPos = skipPos; 1439db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok 144612c6e49c03dc49320a0bf141f51e45a8b969d43satok mSpaceProximityPos = spaceProximityPos; 145612c6e49c03dc49320a0bf141f51e45a8b969d43satok mMissingSpacePos = missingSpacePos; 14640a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok mUseFullEditDistance = useFullEditDistance; 147d03317c4be21ee65c19d00c7b83a7042042b8627satok mDoAutoCompletion = doAutoCompletion; 1484d355989bd972ba792ba546a55c67e5b6fc2527asatok mMaxErrors = maxErrors; 1492df3060883c7535029c7dfbbb4f7b05935d796aesatok} 1502df3060883c7535029c7dfbbb4f7b05935d796aesatok 151cfca3c6317143ce68770cab02eb7d7a5dc8765c9satokvoid Correction::checkState() { 1522df3060883c7535029c7dfbbb4f7b05935d796aesatok if (DEBUG_DICT) { 1532df3060883c7535029c7dfbbb4f7b05935d796aesatok int inputCount = 0; 1542df3060883c7535029c7dfbbb4f7b05935d796aesatok if (mSkipPos >= 0) ++inputCount; 1552df3060883c7535029c7dfbbb4f7b05935d796aesatok if (mExcessivePos >= 0) ++inputCount; 1562df3060883c7535029c7dfbbb4f7b05935d796aesatok if (mTransposedPos >= 0) ++inputCount; 1572df3060883c7535029c7dfbbb4f7b05935d796aesatok } 1582df3060883c7535029c7dfbbb4f7b05935d796aesatok} 1592df3060883c7535029c7dfbbb4f7b05935d796aesatok 160b14fc88e482e53ba6852c8d5da5d9826c68d041fJean Chalardbool Correction::sameAsTyped() { 161b14fc88e482e53ba6852c8d5da5d9826c68d041fJean Chalard return mProximityInfoState.sameAsTyped(mWord, mOutputIndex); 162b14fc88e482e53ba6852c8d5da5d9826c68d041fJean Chalard} 163b14fc88e482e53ba6852c8d5da5d9826c68d041fJean Chalard 164a85f4929cd027246045ec3e806857b84e64fe762satokint Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, 165a85f4929cd027246045ec3e806857b84e64fe762satok const int wordCount, const bool isSpaceProximity, const unsigned short *word) { 166a85f4929cd027246045ec3e806857b84e64fe762satok return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray, 167a85f4929cd027246045ec3e806857b84e64fe762satok wordCount, this, isSpaceProximity, word); 168612c6e49c03dc49320a0bf141f51e45a8b969d43satok} 169612c6e49c03dc49320a0bf141f51e45a8b969d43satok 170171d1809ffc724de4fb793f481d592644e3d141eJean Chalardint Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) { 171687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka return getFinalProbabilityInternal(probability, word, wordLength, mInputSize); 17254af64ae921baa764d64c11c7f4f8edd6352d405satok} 17354af64ae921baa764d64c11c7f4f8edd6352d405satok 174171d1809ffc724de4fb793f481d592644e3d141eJean Chalardint Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word, 175687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka int *wordLength, const int inputSize) { 176687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka return getFinalProbabilityInternal(probability, word, wordLength, inputSize); 17754af64ae921baa764d64c11c7f4f8edd6352d405satok} 17854af64ae921baa764d64c11c7f4f8edd6352d405satok 179171d1809ffc724de4fb793f481d592644e3d141eJean Chalardint Correction::getFinalProbabilityInternal(const int probability, unsigned short **word, 180687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka int *wordLength, const int inputSize) { 181985312e88f11e3ce61f35191df59c6bdf9e80e79satok const int outputIndex = mTerminalOutputIndex; 182985312e88f11e3ce61f35191df59c6bdf9e80e79satok const int inputIndex = mTerminalInputIndex; 1838876b75ca1c218949539dcc2fb6c88a19da9e3f8satok *wordLength = outputIndex + 1; 1848876b75ca1c218949539dcc2fb6c88a19da9e3f8satok *word = mWord; 185171d1809ffc724de4fb793f481d592644e3d141eJean Chalard int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability( 186687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka inputIndex, outputIndex, probability, mEditDistanceTable, this, inputSize); 187171d1809ffc724de4fb793f481d592644e3d141eJean Chalard return finalProbability; 1880f6c8e8aeb18b949fa9586dd9de091027b17e107satok} 1890f6c8e8aeb18b949fa9586dd9de091027b17e107satok 190208268d149c4d139cdf14923650a58ccc0a9d9b6satokbool Correction::initProcessState(const int outputIndex) { 191208268d149c4d139cdf14923650a58ccc0a9d9b6satok if (mCorrectionStates[outputIndex].mChildCount <= 0) { 192208268d149c4d139cdf14923650a58ccc0a9d9b6satok return false; 193208268d149c4d139cdf14923650a58ccc0a9d9b6satok } 1944e4e74e6b659a069ca6e778f0ae7f9c7fa4343b7satok mOutputIndex = outputIndex; 195208268d149c4d139cdf14923650a58ccc0a9d9b6satok --(mCorrectionStates[outputIndex].mChildCount); 196208268d149c4d139cdf14923650a58ccc0a9d9b6satok mInputIndex = mCorrectionStates[outputIndex].mInputIndex; 197635f68e8222901d607a5ca6fab95985bc496d72asatok mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes; 1989db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok 199e4ba822cc6959490868fd8868ffad1c4e9b23992Yusuke Nojima mEquivalentCharCount = mCorrectionStates[outputIndex].mEquivalentCharCount; 200466ed22fc6f90c47bc1571b51fda2712ade664f6satok mProximityCount = mCorrectionStates[outputIndex].mProximityCount; 2019db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mTransposedCount = mCorrectionStates[outputIndex].mTransposedCount; 2029db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mExcessiveCount = mCorrectionStates[outputIndex].mExcessiveCount; 203635f68e8222901d607a5ca6fab95985bc496d72asatok mSkippedCount = mCorrectionStates[outputIndex].mSkippedCount; 2049db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mLastCharExceeded = mCorrectionStates[outputIndex].mLastCharExceeded; 2059db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok 2069db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mTransposedPos = mCorrectionStates[outputIndex].mTransposedPos; 2079db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mExcessivePos = mCorrectionStates[outputIndex].mExcessivePos; 208f3948c1eacee57a9ba4b689ada992cd460596d9fsatok mSkipPos = mCorrectionStates[outputIndex].mSkipPos; 2099db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok 210635f68e8222901d607a5ca6fab95985bc496d72asatok mMatching = false; 2119db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mProximityMatching = false; 2121b9fa942b4b62a818e45655dc5097c7eed7a5465satok mAdditionalProximityMatching = false; 2139db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mTransposing = false; 2149db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mExceeding = false; 2159db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mSkipping = false; 2169db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok 217208268d149c4d139cdf14923650a58ccc0a9d9b6satok return true; 218208268d149c4d139cdf14923650a58ccc0a9d9b6satok} 219208268d149c4d139cdf14923650a58ccc0a9d9b6satok 220208268d149c4d139cdf14923650a58ccc0a9d9b6satokint Correction::goDownTree( 221208268d149c4d139cdf14923650a58ccc0a9d9b6satok const int parentIndex, const int childCount, const int firstChildPos) { 222208268d149c4d139cdf14923650a58ccc0a9d9b6satok mCorrectionStates[mOutputIndex].mParentIndex = parentIndex; 223208268d149c4d139cdf14923650a58ccc0a9d9b6satok mCorrectionStates[mOutputIndex].mChildCount = childCount; 224208268d149c4d139cdf14923650a58ccc0a9d9b6satok mCorrectionStates[mOutputIndex].mSiblingPos = firstChildPos; 225208268d149c4d139cdf14923650a58ccc0a9d9b6satok return mOutputIndex; 2260f6c8e8aeb18b949fa9586dd9de091027b17e107satok} 2270f6c8e8aeb18b949fa9586dd9de091027b17e107satok 2284e4e74e6b659a069ca6e778f0ae7f9c7fa4343b7satok// TODO: remove 229fee0ac60b1cd0a4760ca8f310ff8a86b925d833bKen Wakasaint Correction::getInputIndex() const { 2304e4e74e6b659a069ca6e778f0ae7f9c7fa4343b7satok return mInputIndex; 2310f6c8e8aeb18b949fa9586dd9de091027b17e107satok} 2320f6c8e8aeb18b949fa9586dd9de091027b17e107satok 233cfca3c6317143ce68770cab02eb7d7a5dc8765c9satokvoid Correction::incrementInputIndex() { 2344e4e74e6b659a069ca6e778f0ae7f9c7fa4343b7satok ++mInputIndex; 2354e4e74e6b659a069ca6e778f0ae7f9c7fa4343b7satok} 2364e4e74e6b659a069ca6e778f0ae7f9c7fa4343b7satok 237cfca3c6317143ce68770cab02eb7d7a5dc8765c9satokvoid Correction::incrementOutputIndex() { 2384e4e74e6b659a069ca6e778f0ae7f9c7fa4343b7satok ++mOutputIndex; 239208268d149c4d139cdf14923650a58ccc0a9d9b6satok mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex; 240208268d149c4d139cdf14923650a58ccc0a9d9b6satok mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount; 241208268d149c4d139cdf14923650a58ccc0a9d9b6satok mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos; 242208268d149c4d139cdf14923650a58ccc0a9d9b6satok mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex; 243635f68e8222901d607a5ca6fab95985bc496d72asatok mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes; 2449db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok 245e4ba822cc6959490868fd8868ffad1c4e9b23992Yusuke Nojima mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount; 246466ed22fc6f90c47bc1571b51fda2712ade664f6satok mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount; 2479db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount; 2489db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount; 249635f68e8222901d607a5ca6fab95985bc496d72asatok mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount; 2509db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok 251f3948c1eacee57a9ba4b689ada992cd460596d9fsatok mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos; 2529db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos; 2539db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos; 2549db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok 2559db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded; 2569db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok 257635f68e8222901d607a5ca6fab95985bc496d72asatok mCorrectionStates[mOutputIndex].mMatching = mMatching; 2580cedd2bcc3efcec30ea542ceb8d9161afa764a62satok mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching; 2591b9fa942b4b62a818e45655dc5097c7eed7a5465satok mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching; 2609db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mCorrectionStates[mOutputIndex].mTransposing = mTransposing; 2619db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mCorrectionStates[mOutputIndex].mExceeding = mExceeding; 2629db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok mCorrectionStates[mOutputIndex].mSkipping = mSkipping; 263612c6e49c03dc49320a0bf141f51e45a8b969d43satok} 264612c6e49c03dc49320a0bf141f51e45a8b969d43satok 265635f68e8222901d607a5ca6fab95985bc496d72asatokvoid Correction::startToTraverseAllNodes() { 266635f68e8222901d607a5ca6fab95985bc496d72asatok mNeedsToTraverseAllNodes = true; 2678876b75ca1c218949539dcc2fb6c88a19da9e3f8satok} 2688876b75ca1c218949539dcc2fb6c88a19da9e3f8satok 269cfca3c6317143ce68770cab02eb7d7a5dc8765c9satokbool Correction::needsToPrune() const { 27004d873701550323116cf8737494fb8d7e839c351Yusuke Nojima // TODO: use edit distance here 271d03317c4be21ee65c19d00c7b83a7042042b8627satok return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance 272d03317c4be21ee65c19d00c7b83a7042042b8627satok // Allow one char longer word for missing character 273687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka || (!mDoAutoCompletion && (mOutputIndex > mInputSize)); 2748876b75ca1c218949539dcc2fb6c88a19da9e3f8satok} 2758876b75ca1c218949539dcc2fb6c88a19da9e3f8satok 27604d873701550323116cf8737494fb8d7e839c351Yusuke Nojimavoid Correction::addCharToCurrentWord(const int32_t c) { 27704d873701550323116cf8737494fb8d7e839c351Yusuke Nojima mWord[mOutputIndex] = c; 2784a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord(); 279687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, 28004d873701550323116cf8737494fb8d7e839c351Yusuke Nojima mWord, mOutputIndex + 1); 28104d873701550323116cf8737494fb8d7e839c351Yusuke Nojima} 28204d873701550323116cf8737494fb8d7e839c351Yusuke Nojima 283cfca3c6317143ce68770cab02eb7d7a5dc8765c9satokCorrection::CorrectionType Correction::processSkipChar( 28410266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok const int32_t c, const bool isTerminal, const bool inputIndexIncremented) { 28504d873701550323116cf8737494fb8d7e839c351Yusuke Nojima addCharToCurrentWord(c); 2866ad15fcd158de5bec18f6529b961a55e7db9007fsatok mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0); 2876ad15fcd158de5bec18f6529b961a55e7db9007fsatok mTerminalOutputIndex = mOutputIndex; 2886ad15fcd158de5bec18f6529b961a55e7db9007fsatok if (mNeedsToTraverseAllNodes && isTerminal) { 289985312e88f11e3ce61f35191df59c6bdf9e80e79satok incrementOutputIndex(); 290985312e88f11e3ce61f35191df59c6bdf9e80e79satok return TRAVERSE_ALL_ON_TERMINAL; 291985312e88f11e3ce61f35191df59c6bdf9e80e79satok } else { 292985312e88f11e3ce61f35191df59c6bdf9e80e79satok incrementOutputIndex(); 293985312e88f11e3ce61f35191df59c6bdf9e80e79satok return TRAVERSE_ALL_NOT_ON_TERMINAL; 294985312e88f11e3ce61f35191df59c6bdf9e80e79satok } 295985312e88f11e3ce61f35191df59c6bdf9e80e79satok} 296985312e88f11e3ce61f35191df59c6bdf9e80e79satok 2976ad15fcd158de5bec18f6529b961a55e7db9007fsatokCorrection::CorrectionType Correction::processUnrelatedCorrectionType() { 2986ad15fcd158de5bec18f6529b961a55e7db9007fsatok // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType 2996ad15fcd158de5bec18f6529b961a55e7db9007fsatok mTerminalInputIndex = mInputIndex; 3006ad15fcd158de5bec18f6529b961a55e7db9007fsatok mTerminalOutputIndex = mOutputIndex; 3016ad15fcd158de5bec18f6529b961a55e7db9007fsatok return UNRELATED; 3026ad15fcd158de5bec18f6529b961a55e7db9007fsatok} 3036ad15fcd158de5bec18f6529b961a55e7db9007fsatok 3043e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataokainline bool isEquivalentChar(ProximityType type) { 3053e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka return type == EQUIVALENT_CHAR; 306258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima} 307258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima 3083e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataokainline bool isProximityCharOrEquivalentChar(ProximityType type) { 3093e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR; 3101b9fa942b4b62a818e45655dc5097c7eed7a5465satok} 3111b9fa942b4b62a818e45655dc5097c7eed7a5465satok 312cfca3c6317143ce68770cab02eb7d7a5dc8765c9satokCorrection::CorrectionType Correction::processCharAndCalcState( 3138876b75ca1c218949539dcc2fb6c88a19da9e3f8satok const int32_t c, const bool isTerminal) { 3147adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); 3154d355989bd972ba792ba546a55c67e5b6fc2527asatok if (correctionCount > mMaxErrors) { 3166ad15fcd158de5bec18f6529b961a55e7db9007fsatok return processUnrelatedCorrectionType(); 3174d355989bd972ba792ba546a55c67e5b6fc2527asatok } 3184d355989bd972ba792ba546a55c67e5b6fc2527asatok 3197adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok // TODO: Change the limit if we'll allow two or more corrections 3207adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok const bool noCorrectionsHappenedSoFar = correctionCount == 0; 3217adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok const bool canTryCorrection = noCorrectionsHappenedSoFar; 322a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima int proximityIndex = 0; 323a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima mDistances[mOutputIndex] = NOT_A_DISTANCE; 324b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok 3254d355989bd972ba792ba546a55c67e5b6fc2527asatok // Skip checking this node 32677e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa if (mNeedsToTraverseAllNodes || isSingleQuote(c)) { 32710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok bool incremented = false; 328687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka if (mLastCharExceeded && mInputIndex == mInputSize - 1) { 32910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok // TODO: Do not check the proximity if EditDistance exceeds the threshold 3304a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka const ProximityType matchId = mProximityInfoState.getMatchedProximityId( 3314a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka mInputIndex, c, true, &proximityIndex); 332258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima if (isEquivalentChar(matchId)) { 33310266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok mLastCharExceeded = false; 33410266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok --mExcessiveCount; 335a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima mDistances[mOutputIndex] = 3364a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0); 3373e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka } else if (matchId == NEAR_PROXIMITY_CHAR) { 33810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok mLastCharExceeded = false; 33910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok --mExcessiveCount; 34010266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok ++mProximityCount; 3414a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance( 3424a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka mInputIndex, proximityIndex); 34310266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok } 34477e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa if (!isSingleQuote(c)) { 3456804b8e0fd12b8d57f99f4364cb89fdabe9f4f8bsatok incrementInputIndex(); 3466804b8e0fd12b8d57f99f4364cb89fdabe9f4f8bsatok incremented = true; 3476804b8e0fd12b8d57f99f4364cb89fdabe9f4f8bsatok } 3486d78302155d8a6437ab6541d93ddb42bf21e0a61satok } 34910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok return processSkipChar(c, isTerminal, incremented); 350b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok } 3519db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok 3524d355989bd972ba792ba546a55c67e5b6fc2527asatok // Check possible corrections. 3539db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok if (mExcessivePos >= 0) { 3549db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok if (mExcessiveCount == 0 && mExcessivePos < mOutputIndex) { 355b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok mExcessivePos = mOutputIndex; 3569db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok } 357687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka if (mExcessivePos < mInputSize - 1) { 3587adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok mExceeding = mExcessivePos == mInputIndex && canTryCorrection; 3599db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok } 3608876b75ca1c218949539dcc2fb6c88a19da9e3f8satok } 3618876b75ca1c218949539dcc2fb6c88a19da9e3f8satok 362985312e88f11e3ce61f35191df59c6bdf9e80e79satok if (mSkipPos >= 0) { 363f3948c1eacee57a9ba4b689ada992cd460596d9fsatok if (mSkippedCount == 0 && mSkipPos < mOutputIndex) { 364f3948c1eacee57a9ba4b689ada992cd460596d9fsatok if (DEBUG_DICT) { 365f2789819bd005b5b0581e8439601b5501306327dKen Wakasa // TODO: Enable this assertion. 366f2789819bd005b5b0581e8439601b5501306327dKen Wakasa //assert(mSkipPos == mOutputIndex - 1); 367f3948c1eacee57a9ba4b689ada992cd460596d9fsatok } 368b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok mSkipPos = mOutputIndex; 369f3948c1eacee57a9ba4b689ada992cd460596d9fsatok } 3707adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok mSkipping = mSkipPos == mOutputIndex && canTryCorrection; 3719db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok } 3729db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok 3739db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok if (mTransposedPos >= 0) { 3749db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok if (mTransposedCount == 0 && mTransposedPos < mOutputIndex) { 375b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok mTransposedPos = mOutputIndex; 376b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok } 377687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka if (mTransposedPos < mInputSize - 1) { 3787adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok mTransposing = mInputIndex == mTransposedPos && canTryCorrection; 3799db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok } 380985312e88f11e3ce61f35191df59c6bdf9e80e79satok } 381985312e88f11e3ce61f35191df59c6bdf9e80e79satok 382b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok bool secondTransposing = false; 383b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok if (mTransposedCount % 2 == 1) { 3844a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka if (isEquivalentChar(mProximityInfoState.getMatchedProximityId( 3854a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka mInputIndex - 1, c, false))) { 386b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok ++mTransposedCount; 387b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok secondTransposing = true; 388b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok } else if (mCorrectionStates[mOutputIndex].mExceeding) { 389b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok --mTransposedCount; 390b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok ++mExcessiveCount; 39110266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok --mExcessivePos; 392b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok incrementInputIndex(); 393b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok } else { 394b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok --mTransposedCount; 395e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok if (DEBUG_CORRECTION 396687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) 397e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 398e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { 39910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok DUMP_WORD(mWord, mOutputIndex); 4009fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, 40110266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok mTransposedCount, mExcessiveCount, c); 40210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok } 4036ad15fcd158de5bec18f6529b961a55e7db9007fsatok return processUnrelatedCorrectionType(); 4048876b75ca1c218949539dcc2fb6c88a19da9e3f8satok } 405b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok } 4068876b75ca1c218949539dcc2fb6c88a19da9e3f8satok 4077adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok // TODO: Change the limit if we'll allow two or more proximity chars with corrections 4084d355989bd972ba792ba546a55c67e5b6fc2527asatok // Work around: When the mMaxErrors is 1, we only allow just one error 4094d355989bd972ba792ba546a55c67e5b6fc2527asatok // including proximity correction. 4104d355989bd972ba792ba546a55c67e5b6fc2527asatok const bool checkProximityChars = (mMaxErrors > 1) 4114d355989bd972ba792ba546a55c67e5b6fc2527asatok ? (noCorrectionsHappenedSoFar || mProximityCount == 0) 4124d355989bd972ba792ba546a55c67e5b6fc2527asatok : (noCorrectionsHappenedSoFar && mProximityCount == 0); 4134d355989bd972ba792ba546a55c67e5b6fc2527asatok 4143e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka ProximityType matchedProximityCharId = secondTransposing 4153e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka ? EQUIVALENT_CHAR 4164a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka : mProximityInfoState.getMatchedProximityId( 417a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima mInputIndex, c, checkProximityChars, &proximityIndex); 418b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok 4193e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka if (UNRELATED_CHAR == matchedProximityCharId 4203e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { 42157834c20a5f2e4c944e09eb4fcddb440bbd46e20satok if (canTryCorrection && mOutputIndex > 0 42257834c20a5f2e4c944e09eb4fcddb440bbd46e20satok && mCorrectionStates[mOutputIndex].mProximityMatching 42310266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok && mCorrectionStates[mOutputIndex].mExceeding 4244a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka && isEquivalentChar(mProximityInfoState.getMatchedProximityId( 42557834c20a5f2e4c944e09eb4fcddb440bbd46e20satok mInputIndex, mWord[mOutputIndex - 1], false))) { 426e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok if (DEBUG_CORRECTION 427687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) 428e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 429e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { 4309fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok AKLOGI("CONVERSION p->e %c", mWord[mOutputIndex - 1]); 43157834c20a5f2e4c944e09eb4fcddb440bbd46e20satok } 4327adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok // Conversion p->e 43357834c20a5f2e4c944e09eb4fcddb440bbd46e20satok // Example: 43457834c20a5f2e4c944e09eb4fcddb440bbd46e20satok // wearth -> earth 43557834c20a5f2e4c944e09eb4fcddb440bbd46e20satok // px -> (E)mmmmm 43610266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok ++mExcessiveCount; 43710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok --mProximityCount; 43857834c20a5f2e4c944e09eb4fcddb440bbd46e20satok mExcessivePos = mOutputIndex - 1; 43957834c20a5f2e4c944e09eb4fcddb440bbd46e20satok ++mInputIndex; 44057834c20a5f2e4c944e09eb4fcddb440bbd46e20satok // Here, we are doing something equivalent to matchedProximityCharId, 44157834c20a5f2e4c944e09eb4fcddb440bbd46e20satok // but we already know that "excessive char correction" just happened 44257834c20a5f2e4c944e09eb4fcddb440bbd46e20satok // so that we just need to check "mProximityCount == 0". 4434a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka matchedProximityCharId = mProximityInfoState.getMatchedProximityId( 444a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima mInputIndex, c, mProximityCount == 0, &proximityIndex); 44557834c20a5f2e4c944e09eb4fcddb440bbd46e20satok } 44657834c20a5f2e4c944e09eb4fcddb440bbd46e20satok } 44757834c20a5f2e4c944e09eb4fcddb440bbd46e20satok 4483e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka if (UNRELATED_CHAR == matchedProximityCharId 4493e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka || ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { 4503e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { 4511b9fa942b4b62a818e45655dc5097c7eed7a5465satok mAdditionalProximityMatching = true; 4521b9fa942b4b62a818e45655dc5097c7eed7a5465satok } 45357834c20a5f2e4c944e09eb4fcddb440bbd46e20satok // TODO: Optimize 45457834c20a5f2e4c944e09eb4fcddb440bbd46e20satok // As the current char turned out to be an unrelated char, 45557834c20a5f2e4c944e09eb4fcddb440bbd46e20satok // we will try other correction-types. Please note that mCorrectionStates[mOutputIndex] 45657834c20a5f2e4c944e09eb4fcddb440bbd46e20satok // here refers to the previous state. 457687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka if (mInputIndex < mInputSize - 1 && mOutputIndex > 0 && mTransposedCount > 0 45810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok && !mCorrectionStates[mOutputIndex].mTransposing 45910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok && mCorrectionStates[mOutputIndex - 1].mTransposing 4604a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka && isEquivalentChar(mProximityInfoState.getMatchedProximityId( 461258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima mInputIndex, mWord[mOutputIndex - 1], false)) 462258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima && isEquivalentChar( 4634a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { 4647adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok // Conversion t->e 46510266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok // Example: 46610266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok // occaisional -> occa sional 46710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok // mmmmttx -> mmmm(E)mmmmmm 46810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok mTransposedCount -= 2; 46910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok ++mExcessiveCount; 47010266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok ++mInputIndex; 4717adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok } else if (mOutputIndex > 0 && mInputIndex > 0 && mTransposedCount > 0 47210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok && !mCorrectionStates[mOutputIndex].mTransposing 47310266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok && mCorrectionStates[mOutputIndex - 1].mTransposing 474258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima && isEquivalentChar( 4754a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) { 4767adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok // Conversion t->s 47710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok // Example: 47810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok // chcolate -> chocolate 47910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok // mmttx -> mmsmmmmmm 48010266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok mTransposedCount -= 2; 48110266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok ++mSkippedCount; 48210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok --mInputIndex; 4837adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok } else if (canTryCorrection && mInputIndex > 0 4847adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok && mCorrectionStates[mOutputIndex].mProximityMatching 4857adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok && mCorrectionStates[mOutputIndex].mSkipping 486258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima && isEquivalentChar( 4874a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka mProximityInfoState.getMatchedProximityId(mInputIndex - 1, c, false))) { 4887adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok // Conversion p->s 4897adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok // Note: This logic tries saving cases like contrst --> contrast -- "a" is one of 4907adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok // proximity chars of "s", but it should rather be handled as a skipped char. 4917adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok ++mSkippedCount; 4927adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok --mProximityCount; 4937adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok return processSkipChar(c, isTerminal, false); 494687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka } else if (mInputIndex - 1 < mInputSize 4951b9fa942b4b62a818e45655dc5097c7eed7a5465satok && mSkippedCount > 0 4961b9fa942b4b62a818e45655dc5097c7eed7a5465satok && mCorrectionStates[mOutputIndex].mSkipping 4971b9fa942b4b62a818e45655dc5097c7eed7a5465satok && mCorrectionStates[mOutputIndex].mAdditionalProximityMatching 4981b9fa942b4b62a818e45655dc5097c7eed7a5465satok && isProximityCharOrEquivalentChar( 4994a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { 5001b9fa942b4b62a818e45655dc5097c7eed7a5465satok // Conversion s->a 5011b9fa942b4b62a818e45655dc5097c7eed7a5465satok incrementInputIndex(); 5021b9fa942b4b62a818e45655dc5097c7eed7a5465satok --mSkippedCount; 5031b9fa942b4b62a818e45655dc5097c7eed7a5465satok mProximityMatching = true; 5041b9fa942b4b62a818e45655dc5097c7eed7a5465satok ++mProximityCount; 5051b9fa942b4b62a818e45655dc5097c7eed7a5465satok mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO; 506687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputSize 507258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima && isEquivalentChar( 5084a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka mProximityInfoState.getMatchedProximityId(mInputIndex + 1, c, false))) { 5097adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok // 1.2. Excessive or transpose correction 510b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok if (mTransposing) { 511b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok ++mTransposedCount; 512985312e88f11e3ce61f35191df59c6bdf9e80e79satok } else { 513b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok ++mExcessiveCount; 514b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok incrementInputIndex(); 515985312e88f11e3ce61f35191df59c6bdf9e80e79satok } 516e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok if (DEBUG_CORRECTION 517687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) 518e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 519e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { 520e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok DUMP_WORD(mWord, mOutputIndex); 521e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok if (mTransposing) { 522e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok AKLOGI("TRANSPOSE: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, 523e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok mTransposedCount, mExcessiveCount, c); 524e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok } else { 525e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok AKLOGI("EXCEED: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, 526e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok mTransposedCount, mExcessiveCount, c); 527e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok } 528e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok } 5297adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok } else if (mSkipping) { 5307adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok // 3. Skip correction 531b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok ++mSkippedCount; 532e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok if (DEBUG_CORRECTION 533687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) 534e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 535e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { 536e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok AKLOGI("SKIP: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, 537e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok mTransposedCount, mExcessiveCount, c); 538e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok } 53910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok return processSkipChar(c, isTerminal, false); 5403e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka } else if (ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { 541e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok // As a last resort, use additional proximity characters 542e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok mProximityMatching = true; 543e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok ++mProximityCount; 544e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO; 545e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok if (DEBUG_CORRECTION 546687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) 547e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 548e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { 549e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok AKLOGI("ADDITIONALPROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, 550e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok mTransposedCount, mExcessiveCount, c); 551e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok } 552b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok } else { 553e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok if (DEBUG_CORRECTION 554687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) 555e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 556e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { 55710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok DUMP_WORD(mWord, mOutputIndex); 5589fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, 55910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok mTransposedCount, mExcessiveCount, c); 56010266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok } 5616ad15fcd158de5bec18f6529b961a55e7db9007fsatok return processUnrelatedCorrectionType(); 5628876b75ca1c218949539dcc2fb6c88a19da9e3f8satok } 563258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima } else if (secondTransposing) { 564687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka // If inputIndex is greater than mInputSize, that means there is no 565b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok // proximity chars. So, we don't need to check proximity. 566b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok mMatching = true; 567258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima } else if (isEquivalentChar(matchedProximityCharId)) { 568258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima mMatching = true; 569e4ba822cc6959490868fd8868ffad1c4e9b23992Yusuke Nojima ++mEquivalentCharCount; 5704a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka mDistances[mOutputIndex] = mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, 0); 5713e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka } else if (NEAR_PROXIMITY_CHAR == matchedProximityCharId) { 572b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok mProximityMatching = true; 573258bfe66e0fcfc89b59534a9cc7f50ff07d5f78dYusuke Nojima ++mProximityCount; 574a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima mDistances[mOutputIndex] = 5754a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka mProximityInfoState.getNormalizedSquaredDistance(mInputIndex, proximityIndex); 576e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok if (DEBUG_CORRECTION 577687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) 578e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 579e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { 580e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok AKLOGI("PROX: %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, 581e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok mTransposedCount, mExcessiveCount, c); 582e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok } 583b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok } 5848876b75ca1c218949539dcc2fb6c88a19da9e3f8satok 58504d873701550323116cf8737494fb8d7e839c351Yusuke Nojima addCharToCurrentWord(c); 5868876b75ca1c218949539dcc2fb6c88a19da9e3f8satok 5877adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok // 4. Last char excessive correction 5887adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok mLastCharExceeded = mExcessiveCount == 0 && mSkippedCount == 0 && mTransposedCount == 0 589687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka && mProximityCount == 0 && (mInputIndex == mInputSize - 2); 590687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka const bool isSameAsUserTypedLength = (mInputSize == mInputIndex + 1) || mLastCharExceeded; 591b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok if (mLastCharExceeded) { 592b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok ++mExcessiveCount; 593b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok } 5948876b75ca1c218949539dcc2fb6c88a19da9e3f8satok 595b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok // Start traversing all nodes after the index exceeds the user typed length 596b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok if (isSameAsUserTypedLength) { 597b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok startToTraverseAllNodes(); 5988876b75ca1c218949539dcc2fb6c88a19da9e3f8satok } 5998876b75ca1c218949539dcc2fb6c88a19da9e3f8satok 60010266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok const bool needsToTryOnTerminalForTheLastPossibleExcessiveChar = 601687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka mExceeding && mInputIndex == mInputSize - 2; 60210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok 603b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok // Finally, we are ready to go to the next character, the next "virtual node". 604b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok // We should advance the input index. 605b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok // We do this in this branch of the 'if traverseAllNodes' because we are still matching 606b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok // characters to input; the other branch is not matching them but searching for 607b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok // completions, this is why it does not have to do it. 608b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok incrementInputIndex(); 6098876b75ca1c218949539dcc2fb6c88a19da9e3f8satok // Also, the next char is one "virtual node" depth more than this char. 6108876b75ca1c218949539dcc2fb6c88a19da9e3f8satok incrementOutputIndex(); 6118876b75ca1c218949539dcc2fb6c88a19da9e3f8satok 61210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok if ((needsToTryOnTerminalForTheLastPossibleExcessiveChar 61310266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok || isSameAsUserTypedLength) && isTerminal) { 614b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok mTerminalInputIndex = mInputIndex - 1; 615b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok mTerminalOutputIndex = mOutputIndex - 1; 616e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok if (DEBUG_CORRECTION 617687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == mInputSize) 618e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok && (MIN_OUTPUT_INDEX_FOR_DEBUG <= 0 || MIN_OUTPUT_INDEX_FOR_DEBUG < mOutputIndex)) { 61957834c20a5f2e4c944e09eb4fcddb440bbd46e20satok DUMP_WORD(mWord, mOutputIndex); 6209fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok AKLOGI("ONTERMINAL(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, 62157834c20a5f2e4c944e09eb4fcddb440bbd46e20satok mTransposedCount, mExcessiveCount, c); 62257834c20a5f2e4c944e09eb4fcddb440bbd46e20satok } 623b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok return ON_TERMINAL; 624b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok } else { 6256ad15fcd158de5bec18f6529b961a55e7db9007fsatok mTerminalInputIndex = mInputIndex - 1; 6266ad15fcd158de5bec18f6529b961a55e7db9007fsatok mTerminalOutputIndex = mOutputIndex - 1; 627b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok return NOT_ON_TERMINAL; 628b9d09e73e05f6f9cdab264b358f6d5306c279ccfsatok } 6298876b75ca1c218949539dcc2fb6c88a19da9e3f8satok} 6308876b75ca1c218949539dcc2fb6c88a19da9e3f8satok 6310bbb917d12358e0264796e75dea888f244761b64Ken Wakasainline static int getQuoteCount(const unsigned short *word, const int length) { 632bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok int quoteCount = 0; 633bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok for (int i = 0; i < length; ++i) { 6341cd7ca991961937c1a84572a6cafa3eaf5181be4Keisuke Kuroyanagi if (word[i] == SINGLE_QUOTE) { 635bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok ++quoteCount; 636bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok } 637bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok } 638bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok return quoteCount; 639bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok} 640bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok 641eb050fc2dc97a7e6ddcaf254c110dc16279dfd0dsatokinline static bool isUpperCase(unsigned short c) { 6424c5daa8a5574628204be602578794035ab8686f0Ken Wakasa return isAsciiUpper(toBaseChar(c)); 643eb050fc2dc97a7e6ddcaf254c110dc16279dfd0dsatok} 644eb050fc2dc97a7e6ddcaf254c110dc16279dfd0dsatok 645612c6e49c03dc49320a0bf141f51e45a8b969d43satok////////////////////// 646612c6e49c03dc49320a0bf141f51e45a8b969d43satok// RankingAlgorithm // 647612c6e49c03dc49320a0bf141f51e45a8b969d43satok////////////////////// 648612c6e49c03dc49320a0bf141f51e45a8b969d43satok 649635f68e8222901d607a5ca6fab95985bc496d72asatok/* static */ 650171d1809ffc724de4fb793f481d592644e3d141eJean Chalardint Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex, 6510bbb917d12358e0264796e75dea888f244761b64Ken Wakasa const int outputIndex, const int freq, int *editDistanceTable, const Correction *correction, 652687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka const int inputSize) { 653cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok const int excessivePos = correction->getExcessivePos(); 654cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; 655cfca3c6317143ce68770cab02eb7d7a5dc8765c9satok const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER; 6564a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka const ProximityInfoState *proximityInfoState = &correction->mProximityInfoState; 6579db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok const int skippedCount = correction->mSkippedCount; 65810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok const int transposedCount = correction->mTransposedCount / 2; 65910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2; 6600cedd2bcc3efcec30ea542ceb8d9161afa764a62satok const int proximityMatchedCount = correction->mProximityCount; 6619db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok const bool lastCharExceeded = correction->mLastCharExceeded; 66240a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok const bool useFullEditDistance = correction->mUseFullEditDistance; 66340a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok const int outputLength = outputIndex + 1; 664687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka if (skippedCount >= inputSize || inputSize == 0) { 665bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok return -1; 666bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok } 667bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok 66810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok // TODO: find more robust way 669687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka bool sameLength = lastCharExceeded ? (inputSize == inputIndex + 2) 670687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka : (inputSize == inputIndex + 1); 671466ed22fc6f90c47bc1571b51fda2712ade664f6satok 672466ed22fc6f90c47bc1571b51fda2712ade664f6satok // TODO: use mExcessiveCount 673687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka const int matchCount = inputSize - correction->mProximityCount - excessiveCount; 674466ed22fc6f90c47bc1571b51fda2712ade664f6satok 6750bbb917d12358e0264796e75dea888f244761b64Ken Wakasa const unsigned short *word = correction->mWord; 6769db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok const bool skipped = skippedCount > 0; 6770cedd2bcc3efcec30ea542ceb8d9161afa764a62satok 67854af64ae921baa764d64c11c7f4f8edd6352d405satok const int quoteDiffCount = max(0, getQuoteCount(word, outputLength) 679687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka - getQuoteCount(proximityInfoState->getPrimaryInputWord(), inputSize)); 680612c6e49c03dc49320a0bf141f51e45a8b969d43satok 681bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok // TODO: Calculate edit distance for transposed and excessive 682bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok int ed = 0; 6831a6da631ab7c6ed895964978be8f455b41e019bbsatok if (DEBUG_DICT_FULL) { 684687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka dumpEditDistance10ForDebug(editDistanceTable, correction->mInputSize, outputLength); 6851a6da631ab7c6ed895964978be8f455b41e019bbsatok } 68610266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok int adjustedProximityMatchedCount = proximityMatchedCount; 68710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok 68810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok int finalFreq = freq; 6899db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok 6901b9fa942b4b62a818e45655dc5097c7eed7a5465satok if (DEBUG_CORRECTION_FREQ 691687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputSize)) { 6921b9fa942b4b62a818e45655dc5097c7eed7a5465satok AKLOGI("FinalFreq0: %d", finalFreq); 6931b9fa942b4b62a818e45655dc5097c7eed7a5465satok } 6949db2097f7bbfce0b4679d80cf8a4f6127616f1aesatok // TODO: Optimize this. 695a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) { 696687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka ed = getCurrentEditDistance(editDistanceTable, correction->mInputSize, outputLength, 697687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka inputSize) - transposedCount; 698a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok 69910266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok const int matchWeight = powerIntCapped(typedLetterMultiplier, 700687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka max(inputSize, outputLength) - ed); 70110266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok multiplyIntCapped(matchWeight, &finalFreq); 70210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok 70310266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok // TODO: Demote further if there are two or more excessive chars with longer user input? 704687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka if (inputSize > outputLength) { 70510266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq); 7060cedd2bcc3efcec30ea542ceb8d9161afa764a62satok } 70710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok 708bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok ed = max(0, ed - quoteDiffCount); 709687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputSize)), 7101b9fa942b4b62a818e45655dc5097c7eed7a5465satok proximityMatchedCount); 7116804b8e0fd12b8d57f99f4364cb89fdabe9f4f8bsatok if (transposedCount <= 0) { 712687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka if (ed == 1 && (inputSize == outputLength - 1 || inputSize == outputLength + 1)) { 713a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok // Promote a word with just one skipped or excessive char 714a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok if (sameLength) { 7151b9fa942b4b62a818e45655dc5097c7eed7a5465satok multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE 7161b9fa942b4b62a818e45655dc5097c7eed7a5465satok + WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER * outputLength, 7171b9fa942b4b62a818e45655dc5097c7eed7a5465satok &finalFreq); 718a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok } else { 719a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok multiplyIntCapped(typedLetterMultiplier, &finalFreq); 720a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok } 721a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok } else if (ed == 0) { 72210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok multiplyIntCapped(typedLetterMultiplier, &finalFreq); 723a161a4afd6145d7ed6ba7f67106f5fb5c9887903satok sameLength = true; 72410266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok } 72510266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok } 726bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok } else { 72710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount); 72810266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok multiplyIntCapped(matchWeight, &finalFreq); 7290cedd2bcc3efcec30ea542ceb8d9161afa764a62satok } 7300cedd2bcc3efcec30ea542ceb8d9161afa764a62satok 7314a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka if (proximityInfoState->getMatchedProximityId(0, word[0], true) == UNRELATED_CHAR) { 73210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok multiplyRate(FIRST_CHAR_DIFFERENT_DEMOTION_RATE, &finalFreq); 73310266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok } 734bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok 735bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok /////////////////////////////////////////////// 736bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok // Promotion and Demotion for each correction 7370cedd2bcc3efcec30ea542ceb8d9161afa764a62satok 738bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok // Demotion for a word with missing character 739466ed22fc6f90c47bc1571b51fda2712ade664f6satok if (skipped) { 740bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 741687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka * (10 * inputSize - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X) 742687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka / (10 * inputSize 743bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10); 744bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok if (DEBUG_DICT_FULL) { 7459fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok AKLOGI("Demotion rate for missing character is %d.", demotionRate); 746612c6e49c03dc49320a0bf141f51e45a8b969d43satok } 747bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok multiplyRate(demotionRate, &finalFreq); 748612c6e49c03dc49320a0bf141f51e45a8b969d43satok } 749bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok 750bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok // Demotion for a word with transposed character 75110266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok if (transposedCount > 0) multiplyRate( 752612c6e49c03dc49320a0bf141f51e45a8b969d43satok WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq); 753bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok 754bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok // Demotion for a word with excessive character 75510266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok if (excessiveCount > 0) { 756612c6e49c03dc49320a0bf141f51e45a8b969d43satok multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq); 7574a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka if (!lastCharExceeded && !proximityInfoState->existsAdjacentProximityChars(excessivePos)) { 758e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok if (DEBUG_DICT_FULL) { 7599fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok AKLOGI("Double excessive demotion"); 76010266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok } 761612c6e49c03dc49320a0bf141f51e45a8b969d43satok // If an excessive character is not adjacent to the left char or the right char, 762612c6e49c03dc49320a0bf141f51e45a8b969d43satok // we will demote this word. 763612c6e49c03dc49320a0bf141f51e45a8b969d43satok multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq); 764612c6e49c03dc49320a0bf141f51e45a8b969d43satok } 765612c6e49c03dc49320a0bf141f51e45a8b969d43satok } 766bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok 767441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka int additionalProximityCount = 0; 768441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka // Demote additional proximity characters 769441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka for (int i = 0; i < outputLength; ++i) { 770441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka const int squaredDistance = correction->mDistances[i]; 771441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) { 772441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka ++additionalProximityCount; 773441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka } 774441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka } 775441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka 776e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok const bool performTouchPositionCorrection = 7774a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka CALIBRATE_SCORE_BY_TOUCH_COORDINATES 7784a3db7057f77dc85311fb1f94934b5a004ab613eSatoshi Kataoka && proximityInfoState->touchPositionCorrectionEnabled() 779441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0 780441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka && additionalProximityCount == 0; 781441b3e5a906d12207ee4522849053679506207e5Satoshi Kataoka 782a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima // Score calibration by touch coordinates is being done only for pure-fat finger typing error 783a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima // cases. 784a4c1f1c1fde5e9492523842dd95a4c9f17f40c3aYusuke Nojima // TODO: Remove this constraint. 78504fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok if (performTouchPositionCorrection) { 78604fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok for (int i = 0; i < outputLength; ++i) { 78704fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok const int squaredDistance = correction->mDistances[i]; 78804fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok if (i < adjustedProximityMatchedCount) { 78904fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok multiplyIntCapped(typedLetterMultiplier, &finalFreq); 79004fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok } 79104fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok if (squaredDistance >= 0) { 79204fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok // Promote or demote the score according to the distance from the sweet spot 79304fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f; 79404fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok static const float B = 1.0f; 79504fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok static const float C = 0.5f; 79604fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok static const float MIN = 0.3f; 79704fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS; 79804fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok static const float R2 = HALF_SCORE_SQUARED_RADIUS; 79977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa const float x = static_cast<float>(squaredDistance) 8003e8c58f68d53e6cc9dbf59201c7bdfb8ad04a1cdSatoshi Kataoka / ProximityInfoState::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR; 80104fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok const float factor = max((x < R1) 802448e732272bb3e55d649d2d5dd6a0acb9efdaec3Satoshi Kataoka ? (A * (R1 - x) + B * x) / R1 803448e732272bb3e55d649d2d5dd6a0acb9efdaec3Satoshi Kataoka : (B * (R2 - x) + C * (x - R1)) / (R2 - R1), MIN); 804448e732272bb3e55d649d2d5dd6a0acb9efdaec3Satoshi Kataoka // factor is a piecewise linear function like: 80504fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok // A -_ . 80604fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok // ^-_ . 80704fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok // B \ . 80804fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok // \_ . 80904fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok // C ------------. 81004fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok // . 81104fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok // 0 R1 R2 . 812448e732272bb3e55d649d2d5dd6a0acb9efdaec3Satoshi Kataoka multiplyRate((int)(factor * 100.0f), &finalFreq); 81304fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok } else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) { 81404fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); 81504fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok } 816e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok } 81704fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok } else { 81804fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok // Promotion for a word with proximity characters 81904fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok for (int i = 0; i < adjustedProximityMatchedCount; ++i) { 82004fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok // A word with proximity corrections 82104fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok if (DEBUG_DICT_FULL) { 82204fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok AKLOGI("Found a proximity correction."); 82304fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok } 82404fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok multiplyIntCapped(typedLetterMultiplier, &finalFreq); 82504fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok if (i < additionalProximityCount) { 82604fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok multiplyRate(WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); 82704fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok } else { 82804fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); 82904fd04d6ffe40fc45b4f51640a3f12c3ea88d8acsatok } 830612c6e49c03dc49320a0bf141f51e45a8b969d43satok } 8319ee8c9c45c960dae6fbf0f35e4c84c9c1c85fc3fYusuke Nojima } 8329ee8c9c45c960dae6fbf0f35e4c84c9c1c85fc3fYusuke Nojima 8331b9fa942b4b62a818e45655dc5097c7eed7a5465satok // If the user types too many(three or more) proximity characters with additional proximity 8341b9fa942b4b62a818e45655dc5097c7eed7a5465satok // character,do not treat as the same length word. 8351b9fa942b4b62a818e45655dc5097c7eed7a5465satok if (sameLength && additionalProximityCount > 0 && (adjustedProximityMatchedCount >= 3 8361b9fa942b4b62a818e45655dc5097c7eed7a5465satok || transposedCount > 0 || skipped || excessiveCount > 0)) { 8371b9fa942b4b62a818e45655dc5097c7eed7a5465satok sameLength = false; 8381b9fa942b4b62a818e45655dc5097c7eed7a5465satok } 8391b9fa942b4b62a818e45655dc5097c7eed7a5465satok 84010266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok const int errorCount = adjustedProximityMatchedCount > 0 84110266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok ? adjustedProximityMatchedCount 84210266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok : (proximityMatchedCount + transposedCount); 843bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok multiplyRate( 844687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka 100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputSize, &finalFreq); 845bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok 846bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok // Promotion for an exactly matched word 84710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok if (ed == 0) { 848bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok // Full exact match 84954af64ae921baa764d64c11c7f4f8edd6352d405satok if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0 8501b9fa942b4b62a818e45655dc5097c7eed7a5465satok && quoteDiffCount == 0 && additionalProximityCount == 0) { 851bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq); 852bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok } 853612c6e49c03dc49320a0bf141f51e45a8b969d43satok } 854635f68e8222901d607a5ca6fab95985bc496d72asatok 855bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok // Promote a word with no correction 8561b9fa942b4b62a818e45655dc5097c7eed7a5465satok if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0 8571b9fa942b4b62a818e45655dc5097c7eed7a5465satok && additionalProximityCount == 0) { 858bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq); 859bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok } 860bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok 861bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok // TODO: Check excessive count and transposed count 862bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok // TODO: Remove this if possible 863635f68e8222901d607a5ca6fab95985bc496d72asatok /* 864bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok If the last character of the user input word is the same as the next character 865bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok of the output word, and also all of characters of the user input are matched 866bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok to the output word, we'll promote that word a bit because 867bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok that word can be considered the combination of skipped and matched characters. 868bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok This means that the 'sm' pattern wins over the 'ma' pattern. 869bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok e.g.) 870bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok shel -> shell [mmmma] or [mmmsm] 871bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok hel -> hello [mmmaa] or [mmsma] 872bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok m ... matching 873bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok s ... skipping 874bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok a ... traversing all 8757adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok t ... transposing 8767adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok e ... exceeding 8777adf2cdbbc25060614875a9f176a0fc1b9c42b2esatok p ... proximity matching 878635f68e8222901d607a5ca6fab95985bc496d72asatok */ 879687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka if (matchCount == inputSize && matchCount >= 2 && !skipped 880635f68e8222901d607a5ca6fab95985bc496d72asatok && word[matchCount] == word[matchCount - 1]) { 881635f68e8222901d607a5ca6fab95985bc496d72asatok multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq); 882635f68e8222901d607a5ca6fab95985bc496d72asatok } 883635f68e8222901d607a5ca6fab95985bc496d72asatok 88410266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok // TODO: Do not use sameLength? 885bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok if (sameLength) { 886bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok multiplyIntCapped(fullWordMultiplier, &finalFreq); 887bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok } 888bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok 889687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka if (useFullEditDistance && outputLength > inputSize + 1) { 890687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka const int diff = outputLength - inputSize - 1; 89140a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok const int divider = diff < 31 ? 1 << diff : S_INT_MAX; 89240a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok finalFreq = divider > finalFreq ? 1 : finalFreq / divider; 89340a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok } 89440a5f6fa4df529bf21813d54fc20ffe5b3cbe436satok 895bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok if (DEBUG_DICT_FULL) { 89654af64ae921baa764d64c11c7f4f8edd6352d405satok AKLOGI("calc: %d, %d", outputLength, sameLength); 897bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok } 898bcac0e9e23853891a5a45fd19b6f8917ffc705f7satok 899e05b3f4b3a57dcf99ade35bfbc1e1cdc3c3e476csatok if (DEBUG_CORRECTION_FREQ 900687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputSize)) { 901687a244703a02323ebd64433cbaead5def499861Satoshi Kataoka DUMP_WORD(correction->getPrimaryInputWord(), inputSize); 90254af64ae921baa764d64c11c7f4f8edd6352d405satok DUMP_WORD(correction->mWord, outputLength); 9031b9fa942b4b62a818e45655dc5097c7eed7a5465satok AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d, A%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount, 9041b9fa942b4b62a818e45655dc5097c7eed7a5465satok skippedCount, transposedCount, excessiveCount, additionalProximityCount, 9051b9fa942b4b62a818e45655dc5097c7eed7a5465satok outputLength, lastCharExceeded, sameLength, quoteDiffCount, ed, finalFreq); 90610266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok } 90710266c09ec83db497c8f22dd9dc4cb45c1cf36e9satok 908612c6e49c03dc49320a0bf141f51e45a8b969d43satok return finalFreq; 909612c6e49c03dc49320a0bf141f51e45a8b969d43satok} 910612c6e49c03dc49320a0bf141f51e45a8b969d43satok 911635f68e8222901d607a5ca6fab95985bc496d72asatok/* static */ 912a85f4929cd027246045ec3e806857b84e64fe762satokint Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( 913a85f4929cd027246045ec3e806857b84e64fe762satok const int *freqArray, const int *wordLengthArray, const int wordCount, 9140bbb917d12358e0264796e75dea888f244761b64Ken Wakasa const Correction *correction, const bool isSpaceProximity, const unsigned short *word) { 91529dc80614bc529ca2c0b96e1a731ebb7a5433090satok const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; 91629dc80614bc529ca2c0b96e1a731ebb7a5433090satok 91729dc80614bc529ca2c0b96e1a731ebb7a5433090satok bool firstCapitalizedWordDemotion = false; 91829dc80614bc529ca2c0b96e1a731ebb7a5433090satok bool secondCapitalizedWordDemotion = false; 919a85f4929cd027246045ec3e806857b84e64fe762satok 920a85f4929cd027246045ec3e806857b84e64fe762satok { 921a85f4929cd027246045ec3e806857b84e64fe762satok // TODO: Handle multiple capitalized word demotion properly 922a85f4929cd027246045ec3e806857b84e64fe762satok const int firstWordLength = wordLengthArray[0]; 923a85f4929cd027246045ec3e806857b84e64fe762satok const int secondWordLength = wordLengthArray[1]; 924a85f4929cd027246045ec3e806857b84e64fe762satok if (firstWordLength >= 2) { 925a85f4929cd027246045ec3e806857b84e64fe762satok firstCapitalizedWordDemotion = isUpperCase(word[0]); 926a85f4929cd027246045ec3e806857b84e64fe762satok } 927a85f4929cd027246045ec3e806857b84e64fe762satok 928a85f4929cd027246045ec3e806857b84e64fe762satok if (secondWordLength >= 2) { 929a85f4929cd027246045ec3e806857b84e64fe762satok // FIXME: word[firstWordLength + 1] is incorrect. 930a85f4929cd027246045ec3e806857b84e64fe762satok secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]); 931a85f4929cd027246045ec3e806857b84e64fe762satok } 93229dc80614bc529ca2c0b96e1a731ebb7a5433090satok } 93329dc80614bc529ca2c0b96e1a731ebb7a5433090satok 934a85f4929cd027246045ec3e806857b84e64fe762satok 93529dc80614bc529ca2c0b96e1a731ebb7a5433090satok const bool capitalizedWordDemotion = 93629dc80614bc529ca2c0b96e1a731ebb7a5433090satok firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion; 93729dc80614bc529ca2c0b96e1a731ebb7a5433090satok 938a85f4929cd027246045ec3e806857b84e64fe762satok int totalLength = 0; 939a85f4929cd027246045ec3e806857b84e64fe762satok int totalFreq = 0; 940f2789819bd005b5b0581e8439601b5501306327dKen Wakasa for (int i = 0; i < wordCount; ++i) { 941a85f4929cd027246045ec3e806857b84e64fe762satok const int wordLength = wordLengthArray[i]; 942a85f4929cd027246045ec3e806857b84e64fe762satok if (wordLength <= 0) { 943a85f4929cd027246045ec3e806857b84e64fe762satok return 0; 944a85f4929cd027246045ec3e806857b84e64fe762satok } 945a85f4929cd027246045ec3e806857b84e64fe762satok totalLength += wordLength; 946a85f4929cd027246045ec3e806857b84e64fe762satok const int demotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (wordLength + 1); 947a85f4929cd027246045ec3e806857b84e64fe762satok int tempFirstFreq = freqArray[i]; 948a85f4929cd027246045ec3e806857b84e64fe762satok multiplyRate(demotionRate, &tempFirstFreq); 949a85f4929cd027246045ec3e806857b84e64fe762satok totalFreq += tempFirstFreq; 95029dc80614bc529ca2c0b96e1a731ebb7a5433090satok } 95129dc80614bc529ca2c0b96e1a731ebb7a5433090satok 952a85f4929cd027246045ec3e806857b84e64fe762satok if (totalLength <= 0 || totalFreq <= 0) { 953a85f4929cd027246045ec3e806857b84e64fe762satok return 0; 954a85f4929cd027246045ec3e806857b84e64fe762satok } 95529dc80614bc529ca2c0b96e1a731ebb7a5433090satok 956a85f4929cd027246045ec3e806857b84e64fe762satok // TODO: Currently totalFreq is adjusted to two word metrix. 95729dc80614bc529ca2c0b96e1a731ebb7a5433090satok // Promote pairFreq with multiplying by 2, because the word length is the same as the typed 95829dc80614bc529ca2c0b96e1a731ebb7a5433090satok // length. 959a85f4929cd027246045ec3e806857b84e64fe762satok totalFreq = totalFreq * 2 / wordCount; 960a85f4929cd027246045ec3e806857b84e64fe762satok if (wordCount > 2) { 961a85f4929cd027246045ec3e806857b84e64fe762satok // Safety net for 3+ words -- Caveats: many heuristics and workarounds here. 962a85f4929cd027246045ec3e806857b84e64fe762satok int oneLengthCounter = 0; 963a85f4929cd027246045ec3e806857b84e64fe762satok int twoLengthCounter = 0; 964a85f4929cd027246045ec3e806857b84e64fe762satok for (int i = 0; i < wordCount; ++i) { 965a85f4929cd027246045ec3e806857b84e64fe762satok const int wordLength = wordLengthArray[i]; 966a85f4929cd027246045ec3e806857b84e64fe762satok // TODO: Use bigram instead of this safety net 967a85f4929cd027246045ec3e806857b84e64fe762satok if (i < wordCount - 1) { 968a85f4929cd027246045ec3e806857b84e64fe762satok const int nextWordLength = wordLengthArray[i + 1]; 969a85f4929cd027246045ec3e806857b84e64fe762satok if (wordLength == 1 && nextWordLength == 2) { 970a85f4929cd027246045ec3e806857b84e64fe762satok // Safety net to filter 1 length and 2 length sequential words 971a85f4929cd027246045ec3e806857b84e64fe762satok return 0; 972a85f4929cd027246045ec3e806857b84e64fe762satok } 973a85f4929cd027246045ec3e806857b84e64fe762satok } 974a85f4929cd027246045ec3e806857b84e64fe762satok const int freq = freqArray[i]; 975a85f4929cd027246045ec3e806857b84e64fe762satok // Demote too short weak words 976a0ac31fcaa01c21592a6e7af243c14dada65cf3esatok if (wordLength <= 4 && freq <= SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ) { 977a85f4929cd027246045ec3e806857b84e64fe762satok multiplyRate(100 * freq / MAX_FREQ, &totalFreq); 978a85f4929cd027246045ec3e806857b84e64fe762satok } 979a85f4929cd027246045ec3e806857b84e64fe762satok if (wordLength == 1) { 980a85f4929cd027246045ec3e806857b84e64fe762satok ++oneLengthCounter; 981a85f4929cd027246045ec3e806857b84e64fe762satok } else if (wordLength == 2) { 982a85f4929cd027246045ec3e806857b84e64fe762satok ++twoLengthCounter; 983a85f4929cd027246045ec3e806857b84e64fe762satok } 984a85f4929cd027246045ec3e806857b84e64fe762satok if (oneLengthCounter >= 2 || (oneLengthCounter + twoLengthCounter) >= 4) { 985a85f4929cd027246045ec3e806857b84e64fe762satok // Safety net to filter too many short words 986a85f4929cd027246045ec3e806857b84e64fe762satok return 0; 987a85f4929cd027246045ec3e806857b84e64fe762satok } 988a85f4929cd027246045ec3e806857b84e64fe762satok } 989a85f4929cd027246045ec3e806857b84e64fe762satok multiplyRate(MULTIPLE_WORDS_DEMOTION_RATE, &totalFreq); 990a85f4929cd027246045ec3e806857b84e64fe762satok } 99129dc80614bc529ca2c0b96e1a731ebb7a5433090satok 99229dc80614bc529ca2c0b96e1a731ebb7a5433090satok // This is a workaround to try offsetting the not-enough-demotion which will be done in 99329dc80614bc529ca2c0b96e1a731ebb7a5433090satok // calcNormalizedScore in Utils.java. 99429dc80614bc529ca2c0b96e1a731ebb7a5433090satok // In calcNormalizedScore the score will be demoted by (1 - 1 / length) 99529dc80614bc529ca2c0b96e1a731ebb7a5433090satok // but we demoted only (1 - 1 / (length + 1)) so we will additionally adjust freq by 99629dc80614bc529ca2c0b96e1a731ebb7a5433090satok // (1 - 1 / length) / (1 - 1 / (length + 1)) = (1 - 1 / (length * length)) 99729dc80614bc529ca2c0b96e1a731ebb7a5433090satok const int normalizedScoreNotEnoughDemotionAdjustment = 100 - 100 / (totalLength * totalLength); 99829dc80614bc529ca2c0b96e1a731ebb7a5433090satok multiplyRate(normalizedScoreNotEnoughDemotionAdjustment, &totalFreq); 99929dc80614bc529ca2c0b96e1a731ebb7a5433090satok 100029dc80614bc529ca2c0b96e1a731ebb7a5433090satok // At this moment, totalFreq is calculated by the following formula: 100129dc80614bc529ca2c0b96e1a731ebb7a5433090satok // (firstFreq * (1 - 1 / (firstWordLength + 1)) + secondFreq * (1 - 1 / (secondWordLength + 1))) 100229dc80614bc529ca2c0b96e1a731ebb7a5433090satok // * (1 - 1 / totalLength) / (1 - 1 / (totalLength + 1)) 100329dc80614bc529ca2c0b96e1a731ebb7a5433090satok 100429dc80614bc529ca2c0b96e1a731ebb7a5433090satok multiplyIntCapped(powerIntCapped(typedLetterMultiplier, totalLength), &totalFreq); 100529dc80614bc529ca2c0b96e1a731ebb7a5433090satok 100629dc80614bc529ca2c0b96e1a731ebb7a5433090satok // This is another workaround to offset the demotion which will be done in 100729dc80614bc529ca2c0b96e1a731ebb7a5433090satok // calcNormalizedScore in Utils.java. 100829dc80614bc529ca2c0b96e1a731ebb7a5433090satok // In calcNormalizedScore the score will be demoted by (1 - 1 / length) so we have to promote 100929dc80614bc529ca2c0b96e1a731ebb7a5433090satok // the same amount because we already have adjusted the synthetic freq of this "missing or 101029dc80614bc529ca2c0b96e1a731ebb7a5433090satok // mistyped space" suggestion candidate above in this method. 101129dc80614bc529ca2c0b96e1a731ebb7a5433090satok const int normalizedScoreDemotionRateOffset = (100 + 100 / totalLength); 101229dc80614bc529ca2c0b96e1a731ebb7a5433090satok multiplyRate(normalizedScoreDemotionRateOffset, &totalFreq); 101329dc80614bc529ca2c0b96e1a731ebb7a5433090satok 101429dc80614bc529ca2c0b96e1a731ebb7a5433090satok if (isSpaceProximity) { 101529dc80614bc529ca2c0b96e1a731ebb7a5433090satok // A word pair with one space proximity correction 101629dc80614bc529ca2c0b96e1a731ebb7a5433090satok if (DEBUG_DICT) { 101729dc80614bc529ca2c0b96e1a731ebb7a5433090satok AKLOGI("Found a word pair with space proximity correction."); 101829dc80614bc529ca2c0b96e1a731ebb7a5433090satok } 101929dc80614bc529ca2c0b96e1a731ebb7a5433090satok multiplyIntCapped(typedLetterMultiplier, &totalFreq); 102029dc80614bc529ca2c0b96e1a731ebb7a5433090satok multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &totalFreq); 102129dc80614bc529ca2c0b96e1a731ebb7a5433090satok } 102229dc80614bc529ca2c0b96e1a731ebb7a5433090satok 10238330b488e9534afe8f5775ad9416c904d01bb665satok if (isSpaceProximity) { 10248330b488e9534afe8f5775ad9416c904d01bb665satok multiplyRate(WORDS_WITH_MISTYPED_SPACE_DEMOTION_RATE, &totalFreq); 10258330b488e9534afe8f5775ad9416c904d01bb665satok } else { 10268330b488e9534afe8f5775ad9416c904d01bb665satok multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq); 10278330b488e9534afe8f5775ad9416c904d01bb665satok } 102829dc80614bc529ca2c0b96e1a731ebb7a5433090satok 102929dc80614bc529ca2c0b96e1a731ebb7a5433090satok if (capitalizedWordDemotion) { 103029dc80614bc529ca2c0b96e1a731ebb7a5433090satok multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq); 103129dc80614bc529ca2c0b96e1a731ebb7a5433090satok } 103229dc80614bc529ca2c0b96e1a731ebb7a5433090satok 10331f6b52e76c59700984fe2b7d7b436d81da997e93satok if (DEBUG_CORRECTION_FREQ) { 1034a85f4929cd027246045ec3e806857b84e64fe762satok AKLOGI("Multiple words (%d, %d) (%d, %d) %d, %d", freqArray[0], freqArray[1], 1035a85f4929cd027246045ec3e806857b84e64fe762satok wordLengthArray[0], wordLengthArray[1], capitalizedWordDemotion, totalFreq); 1036a85f4929cd027246045ec3e806857b84e64fe762satok DUMP_WORD(word, wordLengthArray[0]); 10371f6b52e76c59700984fe2b7d7b436d81da997e93satok } 10381f6b52e76c59700984fe2b7d7b436d81da997e93satok 103929dc80614bc529ca2c0b96e1a731ebb7a5433090satok return totalFreq; 104029dc80614bc529ca2c0b96e1a731ebb7a5433090satok} 104129dc80614bc529ca2c0b96e1a731ebb7a5433090satok 1042be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok/* Damerau-Levenshtein distance */ 1043be0cf72253f15bff6abdeaa79f60a56f06ab7b86satokinline static int editDistanceInternal( 10440bbb917d12358e0264796e75dea888f244761b64Ken Wakasa int *editDistanceTable, const unsigned short *before, 10450bbb917d12358e0264796e75dea888f244761b64Ken Wakasa const int beforeLength, const unsigned short *after, const int afterLength) { 10464d355989bd972ba792ba546a55c67e5b6fc2527asatok // dp[li][lo] dp[a][b] = dp[ a * lo + b] 10470bbb917d12358e0264796e75dea888f244761b64Ken Wakasa int *dp = editDistanceTable; 1048be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok const int li = beforeLength + 1; 1049be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok const int lo = afterLength + 1; 10504d355989bd972ba792ba546a55c67e5b6fc2527asatok for (int i = 0; i < li; ++i) { 10514d355989bd972ba792ba546a55c67e5b6fc2527asatok dp[lo * i] = i; 10524d355989bd972ba792ba546a55c67e5b6fc2527asatok } 10534d355989bd972ba792ba546a55c67e5b6fc2527asatok for (int i = 0; i < lo; ++i) { 10544d355989bd972ba792ba546a55c67e5b6fc2527asatok dp[i] = i; 10554d355989bd972ba792ba546a55c67e5b6fc2527asatok } 10564d355989bd972ba792ba546a55c67e5b6fc2527asatok 10574d355989bd972ba792ba546a55c67e5b6fc2527asatok for (int i = 0; i < li - 1; ++i) { 10584d355989bd972ba792ba546a55c67e5b6fc2527asatok for (int j = 0; j < lo - 1; ++j) { 1059be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok const uint32_t ci = toBaseLowerCase(before[i]); 1060be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok const uint32_t co = toBaseLowerCase(after[j]); 10614d355989bd972ba792ba546a55c67e5b6fc2527asatok const uint16_t cost = (ci == co) ? 0 : 1; 10624d355989bd972ba792ba546a55c67e5b6fc2527asatok dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1, 10634d355989bd972ba792ba546a55c67e5b6fc2527asatok min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost)); 1064be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1]) 1065be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok && co == toBaseLowerCase(before[i - 1])) { 10664d355989bd972ba792ba546a55c67e5b6fc2527asatok dp[(i + 1) * lo + (j + 1)] = min( 10674d355989bd972ba792ba546a55c67e5b6fc2527asatok dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost); 10684d355989bd972ba792ba546a55c67e5b6fc2527asatok } 10694d355989bd972ba792ba546a55c67e5b6fc2527asatok } 10704d355989bd972ba792ba546a55c67e5b6fc2527asatok } 10714d355989bd972ba792ba546a55c67e5b6fc2527asatok 10724d355989bd972ba792ba546a55c67e5b6fc2527asatok if (DEBUG_EDIT_DISTANCE) { 10739fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok AKLOGI("IN = %d, OUT = %d", beforeLength, afterLength); 10744d355989bd972ba792ba546a55c67e5b6fc2527asatok for (int i = 0; i < li; ++i) { 10754d355989bd972ba792ba546a55c67e5b6fc2527asatok for (int j = 0; j < lo; ++j) { 10769fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok AKLOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]); 10774d355989bd972ba792ba546a55c67e5b6fc2527asatok } 10784d355989bd972ba792ba546a55c67e5b6fc2527asatok } 10794d355989bd972ba792ba546a55c67e5b6fc2527asatok } 10804d355989bd972ba792ba546a55c67e5b6fc2527asatok return dp[li * lo - 1]; 10814d355989bd972ba792ba546a55c67e5b6fc2527asatok} 1082be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok 10830bbb917d12358e0264796e75dea888f244761b64Ken Wakasaint Correction::RankingAlgorithm::editDistance(const unsigned short *before, 10840bbb917d12358e0264796e75dea888f244761b64Ken Wakasa const int beforeLength, const unsigned short *after, const int afterLength) { 1085be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok int table[(beforeLength + 1) * (afterLength + 1)]; 1086be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok return editDistanceInternal(table, before, beforeLength, after, afterLength); 1087be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok} 1088be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok 1089be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok 1090be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// In dictionary.cpp, getSuggestion() method, 1091be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// suggestion scores are computed using the below formula. 1092be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// original score 1093bcec82de66f52655593dc233346f11468f5077a0Ken Wakasa// := powf(mTypedLetterMultiplier (this is defined 2), 1094be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// (the number of matched characters between typed word and suggested word)) 1095be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// * (individual word's score which defined in the unigram dictionary, 1096be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// and this score is defined in range [0, 255].) 1097be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// Then, the following processing is applied. 1098be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// - If the dictionary word is matched up to the point of the user entry 1099be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// (full match up to min(before.length(), after.length()) 1100be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2) 1101be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// - If the word is a true full match except for differences in accents or 1102be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// capitalization, then treat it as if the score was 255. 1103be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// - If before.length() == after.length() 1104be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// => multiply by mFullWordMultiplier (this is defined 2)) 1105bcec82de66f52655593dc233346f11468f5077a0Ken Wakasa// So, maximum original score is powf(2, min(before.length(), after.length())) * 255 * 2 * 1.2 1106be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// For historical reasons we ignore the 1.2 modifier (because the measure for a good 1107be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// autocorrection threshold was done at a time when it didn't exist). This doesn't change 1108be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok// the result. 1109bcec82de66f52655593dc233346f11468f5077a0Ken Wakasa// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2. 1110be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok 1111be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok/* static */ 11120bbb917d12358e0264796e75dea888f244761b64Ken Wakasafloat Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before, 11130bbb917d12358e0264796e75dea888f244761b64Ken Wakasa const int beforeLength, const unsigned short *after, const int afterLength, 1114be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok const int score) { 1115be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok if (0 == beforeLength || 0 == afterLength) { 1116be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok return 0; 1117be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok } 1118be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok const int distance = editDistance(before, beforeLength, after, afterLength); 1119be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok int spaceCount = 0; 1120be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok for (int i = 0; i < afterLength; ++i) { 1121be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok if (after[i] == CODE_SPACE) { 1122be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok ++spaceCount; 1123be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok } 1124be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok } 1125be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok 1126be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok if (spaceCount == afterLength) { 1127be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok return 0; 1128be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok } 1129be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok 1130f2789819bd005b5b0581e8439601b5501306327dKen Wakasa const float maxScore = score >= S_INT_MAX ? static_cast<float>(S_INT_MAX) 1131f2789819bd005b5b0581e8439601b5501306327dKen Wakasa : static_cast<float>(MAX_INITIAL_SCORE) 1132f2789819bd005b5b0581e8439601b5501306327dKen Wakasa * powf(static_cast<float>(TYPED_LETTER_MULTIPLIER), 1133f2789819bd005b5b0581e8439601b5501306327dKen Wakasa static_cast<float>(min(beforeLength, afterLength - spaceCount))) 1134f2789819bd005b5b0581e8439601b5501306327dKen Wakasa * static_cast<float>(FULL_WORD_MULTIPLIER); 1135be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok 1136be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok // add a weight based on edit distance. 1137be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok // distance <= max(afterLength, beforeLength) == afterLength, 1138be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok // so, 0 <= distance / afterLength <= 1 113977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa const float weight = 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength); 1140f2789819bd005b5b0581e8439601b5501306327dKen Wakasa return (static_cast<float>(score) / maxScore) * weight; 1141be0cf72253f15bff6abdeaa79f60a56f06ab7b86satok} 11422df3060883c7535029c7dfbbb4f7b05935d796aesatok} // namespace latinime 1143