dictionary.cpp revision 77e8e81ad95cfc1eb8f8407fc872674b8d08bbe9
1923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project/* 20bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Copyright (C) 2009, The Android Open Source Project 30bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * 40bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Licensed under the Apache License, Version 2.0 (the "License"); 50bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * you may not use this file except in compliance with the License. 60bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * You may obtain a copy of the License at 70bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * 80bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * http://www.apache.org/licenses/LICENSE-2.0 90bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * 100bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Unless required by applicable law or agreed to in writing, software 110bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * distributed under the License is distributed on an "AS IS" BASIS, 120bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 130bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * See the License for the specific language governing permissions and 140bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * limitations under the License. 150bbb917d12358e0264796e75dea888f244761b64Ken Wakasa */ 16923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project 17e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok#define LOG_TAG "LatinIME: dictionary.cpp" 18e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok 1977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa#include <stdint.h> 2077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa 2177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa#include "bigram_dictionary.h" 2246a1eec4d86f4b47434275065d3170728255f2c8Jean Chalard#include "binary_format.h" 233b088a2f365a9ce06f58243c83cb961ea2920b7eKen Wakasa#include "defines.h" 24923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project#include "dictionary.h" 25deb0987274d8f351cb4ff2af205ef2fd8cb93c23Satoshi Kataoka#include "gesture_decoder_wrapper.h" 2677e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa#include "unigram_dictionary.h" 27d4952c8fe9e71c2b313a68077e03d64c8b5196dbsatok 28923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Projectnamespace latinime { 29923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project 308fbd55229243cb66c03d5ea1f79dfb39f596590dsatok// TODO: Change the type of all keyCodes to uint32_t 31e90b333017c68e888a5e3d351f07ea29036457d0Ken WakasaDictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, 32e90b333017c68e888a5e3d351f07ea29036457d0Ken Wakasa int typedLetterMultiplier, int fullWordMultiplier, 33b7d7c5a369cef80d4319de8e433501ab25b49615Jean Chalard int maxWordLength, int maxWords, int maxPredictions) 34e90b333017c68e888a5e3d351f07ea29036457d0Ken Wakasa : mDict((unsigned char*) dict), mDictSize(dictSize), 355b0761e6a94227d6ef788f589fb6edcd44ed791fJean Chalard mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust) { 36662fe69ba2b8513a1a6640adde917db9a13e98afsatok if (DEBUG_DICT) { 37662fe69ba2b8513a1a6640adde917db9a13e98afsatok if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) { 389fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok AKLOGI("Max word length (%d) is greater than %d", 39662fe69ba2b8513a1a6640adde917db9a13e98afsatok maxWordLength, MAX_WORD_LENGTH_INTERNAL); 409fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok AKLOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF)); 41662fe69ba2b8513a1a6640adde917db9a13e98afsatok } 42715514d7dd5716c77781b97d4f6ac8eace75e8b3satok } 4346a1eec4d86f4b47434275065d3170728255f2c8Jean Chalard const unsigned int headerSize = BinaryFormat::getHeaderSize(mDict); 44cd274b146961c95abf1436caa8ec482650432b6eJean Chalard const unsigned int options = BinaryFormat::getFlags(mDict); 4546a1eec4d86f4b47434275065d3170728255f2c8Jean Chalard mUnigramDictionary = new UnigramDictionary(mDict + headerSize, typedLetterMultiplier, 46cd274b146961c95abf1436caa8ec482650432b6eJean Chalard fullWordMultiplier, maxWordLength, maxWords, options); 47b7d7c5a369cef80d4319de8e433501ab25b49615Jean Chalard mBigramDictionary = new BigramDictionary(mDict + headerSize, maxWordLength, maxPredictions); 48deb0987274d8f351cb4ff2af205ef2fd8cb93c23Satoshi Kataoka mGestureDecoder = new GestureDecoderWrapper(maxWordLength, maxWords); 49efb63246c2e5df29d62416d48f62e2b57b14de7cSatoshi Kataoka mGestureDecoder->setDict(mUnigramDictionary, mBigramDictionary, 50efb63246c2e5df29d62416d48f62e2b57b14de7cSatoshi Kataoka mDict + headerSize /* dict root */, 0 /* root pos */); 51923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project} 52923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project 53662fe69ba2b8513a1a6640adde917db9a13e98afsatokDictionary::~Dictionary() { 5430088259480130e5bac5c2028e2c7c3e6d4c51a2satok delete mUnigramDictionary; 5530088259480130e5bac5c2028e2c7c3e6d4c51a2satok delete mBigramDictionary; 568658e552f350167bb8f5af7b9e991775f2cc6a6dKen Wakasa delete mGestureDecoder; 57923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project} 58e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok 5977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasaint Dictionary::getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates, 6077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa int *times, int *pointerIds, int *codes, int codesSize, int *prevWordChars, 6177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa int prevWordLength, int commitPoint, bool isGesture, 6277e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa bool useFullEditDistance, unsigned short *outWords, 6377e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa int *frequencies, int *spaceIndices, int *outputTypes) { 6477e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa int result = 0; 6577e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa if (isGesture) { 6677e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa mGestureDecoder->setPrevWord(prevWordChars, prevWordLength); 6777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa result = mGestureDecoder->getSuggestions(proximityInfo, xcoordinates, ycoordinates, 6877e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa times, pointerIds, codes, codesSize, commitPoint, 6977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa outWords, frequencies, spaceIndices, outputTypes); 7077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa return result; 7177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa } else { 7277e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa std::map<int, int> bigramMap; 7377e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE]; 7477e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordChars, 7577e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa prevWordLength, &bigramMap, bigramFilter); 7677e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa result = mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, 7777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa ycoordinates, codes, codesSize, &bigramMap, bigramFilter, 7877e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa useFullEditDistance, outWords, frequencies, outputTypes); 7977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa return result; 8077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa } 8177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa} 8277e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa 8377e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasaint Dictionary::getBigrams(const int32_t *word, int length, int *codes, int codesSize, 8477e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa unsigned short *outWords, int *frequencies, int *outputTypes) const { 8577e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa if (length <= 0) return 0; 8677e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies, 8777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa outputTypes); 8877e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa} 8977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa 90b1ed1d47acb366706df0ccc77711df92eff4f51esatokint Dictionary::getFrequency(const int32_t *word, int length) const { 912f854e170c9fde47cae804145f90d164cdb5ceb8Satoshi Kataoka return mUnigramDictionary->getFrequency(word, length); 92e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok} 93e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok 944d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyangbool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2, 95b1ed1d47acb366706df0ccc77711df92eff4f51esatok int length2) const { 964d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang return mBigramDictionary->isValidBigram(word1, length1, word2, length2); 974d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang} 98923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project} // namespace latinime 99