dictionary.cpp revision b68e73448104714e8f12f89a1e00fb10b5fd14c4
1923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project/* 20bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Copyright (C) 2009, The Android Open Source Project 30bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * 40bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Licensed under the Apache License, Version 2.0 (the "License"); 50bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * you may not use this file except in compliance with the License. 60bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * You may obtain a copy of the License at 70bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * 80bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * http://www.apache.org/licenses/LICENSE-2.0 90bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * 100bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Unless required by applicable law or agreed to in writing, software 110bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * distributed under the License is distributed on an "AS IS" BASIS, 120bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 130bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * See the License for the specific language governing permissions and 140bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * limitations under the License. 150bbb917d12358e0264796e75dea888f244761b64Ken Wakasa */ 16923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project 17e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok#define LOG_TAG "LatinIME: dictionary.cpp" 18e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok 19b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "dictionary.h" 20b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa 21609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang#include <map> // TODO: remove 2277e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa#include <stdint.h> 2377e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa 2477e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa#include "bigram_dictionary.h" 2546a1eec4d86f4b47434275065d3170728255f2c8Jean Chalard#include "binary_format.h" 263b088a2f365a9ce06f58243c83cb961ea2920b7eKen Wakasa#include "defines.h" 27e9f3e182e4b3217282831fd8805958270b4dbba3Satoshi Kataoka#include "dic_traverse_wrapper.h" 28b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/core/suggest.h" 29b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h" 30b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h" 3177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa#include "unigram_dictionary.h" 32d4952c8fe9e71c2b313a68077e03d64c8b5196dbsatok 33923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Projectnamespace latinime { 34923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project 355db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken WakasaDictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust) 36162c211b44c1546b2e9be36e0cec50de497217a9Ken Wakasa : mDict(static_cast<unsigned char *>(dict)), 37162c211b44c1546b2e9be36e0cec50de497217a9Ken Wakasa mOffsetDict((static_cast<unsigned char *>(dict)) + BinaryFormat::getHeaderSize(mDict)), 38162c211b44c1546b2e9be36e0cec50de497217a9Ken Wakasa mDictSize(dictSize), mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust), 395db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa mUnigramDictionary(new UnigramDictionary(mOffsetDict, BinaryFormat::getFlags(mDict))), 405db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa mBigramDictionary(new BigramDictionary(mOffsetDict)), 41e67ede12ec44e998a157e08487f8d6ff8f467880Satoshi Kataoka mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), 42e67ede12ec44e998a157e08487f8d6ff8f467880Satoshi Kataoka mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) { 43923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project} 44923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project 45662fe69ba2b8513a1a6640adde917db9a13e98afsatokDictionary::~Dictionary() { 4630088259480130e5bac5c2028e2c7c3e6d4c51a2satok delete mUnigramDictionary; 4730088259480130e5bac5c2028e2c7c3e6d4c51a2satok delete mBigramDictionary; 48ffd08e37881e9e7f403d04c1a1a8aaba409d36b5Ken Wakasa delete mGestureSuggest; 49609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang delete mTypingSuggest; 50923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project} 51e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok 529127811493b9288a0ca385e75db68520b197dea2Satoshi Kataokaint Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, 535db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, 545db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint, bool isGesture, 551e61493c50082264caaef862df02b1ccc84dc396Ken Wakasa bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices, 561e61493c50082264caaef862df02b1ccc84dc396Ken Wakasa int *outputTypes) const { 5777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa int result = 0; 5877e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa if (isGesture) { 59e9f3e182e4b3217282831fd8805958270b4dbba3Satoshi Kataoka DicTraverseWrapper::initDicTraverseSession( 605db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa traverseSession, this, prevWordCodePoints, prevWordLength); 615db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, 625db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords, 635db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa frequencies, spaceIndices, outputTypes); 64586b0ca1a27c68c29583c496a453e811451d9359Satoshi Kataoka if (DEBUG_DICT) { 655db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa DUMP_RESULT(outWords, frequencies); 66586b0ca1a27c68c29583c496a453e811451d9359Satoshi Kataoka } 6777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa return result; 6877e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa } else { 69609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang if (USE_SUGGEST_INTERFACE_FOR_TYPING) { 70609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang DicTraverseWrapper::initDicTraverseSession( 71609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang traverseSession, this, prevWordCodePoints, prevWordLength); 72609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, 73609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, 74609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang outWords, frequencies, spaceIndices, outputTypes); 75609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang if (DEBUG_DICT) { 76609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang DUMP_RESULT(outWords, frequencies); 77609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang } 78609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang return result; 79609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang } else { 80609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang std::map<int, int> bigramMap; 81609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE]; 82609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang mBigramDictionary->fillBigramAddressToProbabilityMapAndFilter(prevWordCodePoints, 83609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang prevWordLength, &bigramMap, bigramFilter); 84609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang result = mUnigramDictionary->getSuggestions(proximityInfo, xcoordinates, ycoordinates, 85609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang inputCodePoints, inputSize, &bigramMap, bigramFilter, useFullEditDistance, 86609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang outWords, frequencies, outputTypes); 87609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang return result; 88609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang } 8977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa } 9077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa} 9177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa 925db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasaint Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, 931e61493c50082264caaef862df02b1ccc84dc396Ken Wakasa int *outWords, int *frequencies, int *outputTypes) const { 9477e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa if (length <= 0) return 0; 955db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa return mBigramDictionary->getBigrams(word, length, inputCodePoints, inputSize, outWords, 965db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa frequencies, outputTypes); 9777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa} 9877e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa 99e0e67373735918c78eaeaf24f127e1d28816aa29Satoshi Kataokaint Dictionary::getProbability(const int *word, int length) const { 100e0e67373735918c78eaeaf24f127e1d28816aa29Satoshi Kataoka return mUnigramDictionary->getProbability(word, length); 101e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok} 102e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok 103aa5a3e84ad330f55edda3087a9498c5ee16b9cbaKen Wakasabool Dictionary::isValidBigram(const int *word1, int length1, const int *word2, int length2) const { 1044d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang return mBigramDictionary->isValidBigram(word1, length1, word2, length2); 1054d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang} 106923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project} // namespace latinime 107