dictionary.cpp revision c18510049a3422c88ed3ab3bbc64944c94a611fd
1923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project/*
20bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Copyright (C) 2009, The Android Open Source Project
30bbb917d12358e0264796e75dea888f244761b64Ken Wakasa *
40bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Licensed under the Apache License, Version 2.0 (the "License");
50bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * you may not use this file except in compliance with the License.
60bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * You may obtain a copy of the License at
70bbb917d12358e0264796e75dea888f244761b64Ken Wakasa *
80bbb917d12358e0264796e75dea888f244761b64Ken Wakasa *     http://www.apache.org/licenses/LICENSE-2.0
90bbb917d12358e0264796e75dea888f244761b64Ken Wakasa *
100bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Unless required by applicable law or agreed to in writing, software
110bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * distributed under the License is distributed on an "AS IS" BASIS,
120bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
130bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * See the License for the specific language governing permissions and
140bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * limitations under the License.
150bbb917d12358e0264796e75dea888f244761b64Ken Wakasa */
16923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project
17e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok#define LOG_TAG "LatinIME: dictionary.cpp"
18e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok
19a65c267b1f1207e54c6f821148c600e3899b7f9cKen Wakasa#include "suggest/core/dictionary/dictionary.h"
20b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa
2177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa#include <stdint.h>
2277e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa
233b088a2f365a9ce06f58243c83cb961ea2920b7eKen Wakasa#include "defines.h"
24d18112046af044d1a13d5399087e14a0d230cfedSatoshi Kataoka#include "suggest/core/dictionary/bigram_dictionary.h"
2576e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi#include "suggest/core/policy/dictionary_header_structure_policy.h"
260624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
277c92b421ee18054e43903d6794b4039357dd944aSatoshi Kataoka#include "suggest/core/session/dic_traverse_session.h"
28f497e1e5a0be0a6268bd5f27e6f61308c69c7b63Ken Wakasa#include "suggest/core/suggest.h"
29f497e1e5a0be0a6268bd5f27e6f61308c69c7b63Ken Wakasa#include "suggest/core/suggest_options.h"
30b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
31b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
3276e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi#include "utils/log_utils.h"
33d4952c8fe9e71c2b313a68077e03d64c8b5196dbsatok
34923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Projectnamespace latinime {
35923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project
365ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagiconst int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
375ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi
380624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke KuroyanagiDictionary::Dictionary(JNIEnv *env,
390624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi        DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy)
400624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi        : mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy),
41d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi          mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)),
42e67ede12ec44e998a157e08487f8d6ff8f467880Satoshi Kataoka          mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
43e67ede12ec44e998a157e08487f8d6ff8f467880Satoshi Kataoka          mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
4476e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    logDictionaryInfo(env);
45923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project}
46923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project
47662fe69ba2b8513a1a6640adde917db9a13e98afsatokDictionary::~Dictionary() {
4830088259480130e5bac5c2028e2c7c3e6d4c51a2satok    delete mBigramDictionary;
49ffd08e37881e9e7f403d04c1a1a8aaba409d36b5Ken Wakasa    delete mGestureSuggest;
50609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang    delete mTypingSuggest;
51d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi    delete mDictionaryStructureWithBufferPolicy;
52923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project}
53e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok
547c92b421ee18054e43903d6794b4039357dd944aSatoshi Kataokaint Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
555db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa        int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
56338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi        int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint,
57338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi        const SuggestOptions *const suggestOptions, int *outWords, int *frequencies,
58338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi        int *spaceIndices, int *outputTypes) const {
5977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    int result = 0;
60338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi    if (suggestOptions->isGesture()) {
617c92b421ee18054e43903d6794b4039357dd944aSatoshi Kataoka        DicTraverseSession::initSessionInstance(
62338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi                traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
635db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa        result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
645db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa                ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords,
655db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa                frequencies, spaceIndices, outputTypes);
66586b0ca1a27c68c29583c496a453e811451d9359Satoshi Kataoka        if (DEBUG_DICT) {
675db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa            DUMP_RESULT(outWords, frequencies);
68586b0ca1a27c68c29583c496a453e811451d9359Satoshi Kataoka        }
6977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa        return result;
7077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    } else {
716c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka        DicTraverseSession::initSessionInstance(
726c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka                traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
736c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka        result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
746c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka                ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint,
756c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka                outWords, frequencies, spaceIndices, outputTypes);
766c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka        if (DEBUG_DICT) {
776c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka            DUMP_RESULT(outWords, frequencies);
78609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang        }
796c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka        return result;
8077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    }
8177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa}
8277e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa
832a2aac2568e3f2da3efc8aeaa392696471d63417Keisuke Kuroynagiint Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies,
842a2aac2568e3f2da3efc8aeaa392696471d63417Keisuke Kuroynagi        int *outputTypes) const {
8577e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    if (length <= 0) return 0;
862a2aac2568e3f2da3efc8aeaa392696471d63417Keisuke Kuroynagi    return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes);
8777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa}
8877e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa
89e0e67373735918c78eaeaf24f127e1d28816aa29Satoshi Kataokaint Dictionary::getProbability(const int *word, int length) const {
90d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi    int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length,
916c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka            false /* forceLowerCaseSearch */);
92cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi    if (NOT_A_DICT_POS == pos) {
936c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka        return NOT_A_PROBABILITY;
946c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka    }
9565d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi    return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
96e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok}
97e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok
984d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagiint Dictionary::getBigramProbability(const int *word0, int length0, const int *word1,
994d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi        int length1) const {
1004d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi    return mBigramDictionary->getBigramProbability(word0, length0, word1, length1);
1015bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi}
1025bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi
1035bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagivoid Dictionary::addUnigramWord(const int *const word, const int length, const int probability) {
10466facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi    mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability);
1055bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi}
1065bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi
1075bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagivoid Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
1085bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi        const int length1, const int probability) {
10966facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi    mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1,
11066facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi            probability);
1115bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi}
1125bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi
1135bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagivoid Dictionary::removeBigramWords(const int *const word0, const int length0,
1145bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi        const int *const word1, const int length1) {
11566facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi    mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1);
1164d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang}
117ede2333640accecc066de328cb4f93e03e4bc5d7Tom Ouyang
118d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagivoid Dictionary::flush(const char *const filePath) {
119d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi    mDictionaryStructureWithBufferPolicy->flush(filePath);
120d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi}
121d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi
122d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagivoid Dictionary::flushWithGC(const char *const filePath) {
123d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi    mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
124d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi}
125d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi
126c18510049a3422c88ed3ab3bbc64944c94a611fdKeisuke Kuroyanagibool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
127c18510049a3422c88ed3ab3bbc64944c94a611fdKeisuke Kuroyanagi    return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
128d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi}
129d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi
13031097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagivoid Dictionary::getProperty(const char *const query, char *const outResult,
13131097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi        const int maxResultLength) const {
13231097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi    return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength);
13331097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi}
13431097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi
13576e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagivoid Dictionary::logDictionaryInfo(JNIEnv *const env) const {
1365ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi    int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
1375ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi    int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
1385ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi    int dateStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
13976e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    const DictionaryHeaderStructurePolicy *const headerPolicy =
14076e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi            getDictionaryStructurePolicy()->getHeaderStructurePolicy();
14176e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer,
1425ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi            HEADER_ATTRIBUTE_BUFFER_SIZE);
14376e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer,
1445ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi            HEADER_ATTRIBUTE_BUFFER_SIZE);
1455ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi    headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer,
1465ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi            HEADER_ATTRIBUTE_BUFFER_SIZE);
1475ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi
1485ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi    char dictionaryIdCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
1495ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi    char versionStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
1505ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi    char dateStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
1515ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi    intArrayToCharArray(dictionaryIdCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
1525ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi            dictionaryIdCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
1535ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi    intArrayToCharArray(versionStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
1545ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi            versionStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
1555ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi    intArrayToCharArray(dateStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
1565ed30a7660048ef4bf78077e77554c97786eae2bKeisuke Kuroyanagi            dateStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
15776e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi
15876e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    LogUtils::logToJava(env,
1590624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi            "Dictionary info: dictionary = %s ; version = %s ; date = %s",
1600624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi            dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer);
16176e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi}
16276e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi
163923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project} // namespace latinime
164