dictionary.cpp revision 77e8e81ad95cfc1eb8f8407fc872674b8d08bbe9
1923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project/*
20bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Copyright (C) 2009, The Android Open Source Project
30bbb917d12358e0264796e75dea888f244761b64Ken Wakasa *
40bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Licensed under the Apache License, Version 2.0 (the "License");
50bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * you may not use this file except in compliance with the License.
60bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * You may obtain a copy of the License at
70bbb917d12358e0264796e75dea888f244761b64Ken Wakasa *
80bbb917d12358e0264796e75dea888f244761b64Ken Wakasa *     http://www.apache.org/licenses/LICENSE-2.0
90bbb917d12358e0264796e75dea888f244761b64Ken Wakasa *
100bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Unless required by applicable law or agreed to in writing, software
110bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * distributed under the License is distributed on an "AS IS" BASIS,
120bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
130bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * See the License for the specific language governing permissions and
140bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * limitations under the License.
150bbb917d12358e0264796e75dea888f244761b64Ken Wakasa */
16923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project
17e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok#define LOG_TAG "LatinIME: dictionary.cpp"
18e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok
1977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa#include <stdint.h>
2077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa
2177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa#include "bigram_dictionary.h"
2246a1eec4d86f4b47434275065d3170728255f2c8Jean Chalard#include "binary_format.h"
233b088a2f365a9ce06f58243c83cb961ea2920b7eKen Wakasa#include "defines.h"
24923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project#include "dictionary.h"
25deb0987274d8f351cb4ff2af205ef2fd8cb93c23Satoshi Kataoka#include "gesture_decoder_wrapper.h"
2677e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa#include "unigram_dictionary.h"
27d4952c8fe9e71c2b313a68077e03d64c8b5196dbsatok
28923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Projectnamespace latinime {
29923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project
308fbd55229243cb66c03d5ea1f79dfb39f596590dsatok// TODO: Change the type of all keyCodes to uint32_t
31e90b333017c68e888a5e3d351f07ea29036457d0Ken WakasaDictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
32e90b333017c68e888a5e3d351f07ea29036457d0Ken Wakasa        int typedLetterMultiplier, int fullWordMultiplier,
33b7d7c5a369cef80d4319de8e433501ab25b49615Jean Chalard        int maxWordLength, int maxWords, int maxPredictions)
34e90b333017c68e888a5e3d351f07ea29036457d0Ken Wakasa    : mDict((unsigned char*) dict), mDictSize(dictSize),
355b0761e6a94227d6ef788f589fb6edcd44ed791fJean Chalard      mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust) {
36662fe69ba2b8513a1a6640adde917db9a13e98afsatok    if (DEBUG_DICT) {
37662fe69ba2b8513a1a6640adde917db9a13e98afsatok        if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) {
389fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok            AKLOGI("Max word length (%d) is greater than %d",
39662fe69ba2b8513a1a6640adde917db9a13e98afsatok                    maxWordLength, MAX_WORD_LENGTH_INTERNAL);
409fb6f47a6a11f62d134d4d6259181ac987fc1ad3satok            AKLOGI("IN NATIVE SUGGEST Version: %d", (mDict[0] & 0xFF));
41662fe69ba2b8513a1a6640adde917db9a13e98afsatok        }
42715514d7dd5716c77781b97d4f6ac8eace75e8b3satok    }
4346a1eec4d86f4b47434275065d3170728255f2c8Jean Chalard    const unsigned int headerSize = BinaryFormat::getHeaderSize(mDict);
44cd274b146961c95abf1436caa8ec482650432b6eJean Chalard    const unsigned int options = BinaryFormat::getFlags(mDict);
4546a1eec4d86f4b47434275065d3170728255f2c8Jean Chalard    mUnigramDictionary = new UnigramDictionary(mDict + headerSize, typedLetterMultiplier,
46cd274b146961c95abf1436caa8ec482650432b6eJean Chalard            fullWordMultiplier, maxWordLength, maxWords, options);
47b7d7c5a369cef80d4319de8e433501ab25b49615Jean Chalard    mBigramDictionary = new BigramDictionary(mDict + headerSize, maxWordLength, maxPredictions);
48deb0987274d8f351cb4ff2af205ef2fd8cb93c23Satoshi Kataoka    mGestureDecoder = new GestureDecoderWrapper(maxWordLength, maxWords);
49efb63246c2e5df29d62416d48f62e2b57b14de7cSatoshi Kataoka    mGestureDecoder->setDict(mUnigramDictionary, mBigramDictionary,
50efb63246c2e5df29d62416d48f62e2b57b14de7cSatoshi Kataoka            mDict + headerSize /* dict root */, 0 /* root pos */);
51923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project}
52923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project
53662fe69ba2b8513a1a6640adde917db9a13e98afsatokDictionary::~Dictionary() {
5430088259480130e5bac5c2028e2c7c3e6d4c51a2satok    delete mUnigramDictionary;
5530088259480130e5bac5c2028e2c7c3e6d4c51a2satok    delete mBigramDictionary;
568658e552f350167bb8f5af7b9e991775f2cc6a6dKen Wakasa    delete mGestureDecoder;
57923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project}
58e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok
5977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasaint Dictionary::getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates,
6077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa        int *times, int *pointerIds, int *codes, int codesSize, int *prevWordChars,
6177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa        int prevWordLength, int commitPoint, bool isGesture,
6277e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa        bool useFullEditDistance, unsigned short *outWords,
6377e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa        int *frequencies, int *spaceIndices, int *outputTypes) {
6477e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    int result = 0;
6577e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    if (isGesture) {
6677e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa        mGestureDecoder->setPrevWord(prevWordChars, prevWordLength);
6777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa        result = mGestureDecoder->getSuggestions(proximityInfo, xcoordinates, ycoordinates,
6877e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa                times, pointerIds, codes, codesSize, commitPoint,
6977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa                outWords, frequencies, spaceIndices, outputTypes);
7077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa        return result;
7177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    } else {
7277e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa        std::map<int, int> bigramMap;
7377e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa        uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE];
7477e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa        mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordChars,
7577e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa                prevWordLength, &bigramMap, bigramFilter);
7677e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa        result = mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates,
7777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa                ycoordinates, codes, codesSize, &bigramMap, bigramFilter,
7877e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa                useFullEditDistance, outWords, frequencies, outputTypes);
7977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa        return result;
8077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    }
8177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa}
8277e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa
8377e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasaint Dictionary::getBigrams(const int32_t *word, int length, int *codes, int codesSize,
8477e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa        unsigned short *outWords, int *frequencies, int *outputTypes) const {
8577e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    if (length <= 0) return 0;
8677e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
8777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa            outputTypes);
8877e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa}
8977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa
90b1ed1d47acb366706df0ccc77711df92eff4f51esatokint Dictionary::getFrequency(const int32_t *word, int length) const {
912f854e170c9fde47cae804145f90d164cdb5ceb8Satoshi Kataoka    return mUnigramDictionary->getFrequency(word, length);
92e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok}
93e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok
944d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyangbool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
95b1ed1d47acb366706df0ccc77711df92eff4f51esatok        int length2) const {
964d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang    return mBigramDictionary->isValidBigram(word1, length1, word2, length2);
974d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang}
98923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project} // namespace latinime
99