dictionary.cpp revision d0246277fde27e9c40a270e206f1d106811e847f
1923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project/*
20bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Copyright (C) 2009, The Android Open Source Project
30bbb917d12358e0264796e75dea888f244761b64Ken Wakasa *
40bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Licensed under the Apache License, Version 2.0 (the "License");
50bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * you may not use this file except in compliance with the License.
60bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * You may obtain a copy of the License at
70bbb917d12358e0264796e75dea888f244761b64Ken Wakasa *
80bbb917d12358e0264796e75dea888f244761b64Ken Wakasa *     http://www.apache.org/licenses/LICENSE-2.0
90bbb917d12358e0264796e75dea888f244761b64Ken Wakasa *
100bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * Unless required by applicable law or agreed to in writing, software
110bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * distributed under the License is distributed on an "AS IS" BASIS,
120bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
130bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * See the License for the specific language governing permissions and
140bbb917d12358e0264796e75dea888f244761b64Ken Wakasa * limitations under the License.
150bbb917d12358e0264796e75dea888f244761b64Ken Wakasa */
16923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project
17e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok#define LOG_TAG "LatinIME: dictionary.cpp"
18e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok
19a65c267b1f1207e54c6f821148c600e3899b7f9cKen Wakasa#include "suggest/core/dictionary/dictionary.h"
20b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa
2177e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa#include <stdint.h>
2277e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa
233b088a2f365a9ce06f58243c83cb961ea2920b7eKen Wakasa#include "defines.h"
24d18112046af044d1a13d5399087e14a0d230cfedSatoshi Kataoka#include "suggest/core/dictionary/bigram_dictionary.h"
2576e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi#include "suggest/core/policy/dictionary_header_structure_policy.h"
260624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
277c92b421ee18054e43903d6794b4039357dd944aSatoshi Kataoka#include "suggest/core/session/dic_traverse_session.h"
28f497e1e5a0be0a6268bd5f27e6f61308c69c7b63Ken Wakasa#include "suggest/core/suggest.h"
29f497e1e5a0be0a6268bd5f27e6f61308c69c7b63Ken Wakasa#include "suggest/core/suggest_options.h"
30b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
31b68e73448104714e8f12f89a1e00fb10b5fd14c4Ken Wakasa#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
3276e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi#include "utils/log_utils.h"
33d4952c8fe9e71c2b313a68077e03d64c8b5196dbsatok
34923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Projectnamespace latinime {
35923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project
360624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke KuroyanagiDictionary::Dictionary(JNIEnv *env,
370624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi        DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy)
380624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi        : mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy),
39d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi          mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)),
40e67ede12ec44e998a157e08487f8d6ff8f467880Satoshi Kataoka          mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
41e67ede12ec44e998a157e08487f8d6ff8f467880Satoshi Kataoka          mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
4276e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    logDictionaryInfo(env);
43923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project}
44923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project
45662fe69ba2b8513a1a6640adde917db9a13e98afsatokDictionary::~Dictionary() {
4630088259480130e5bac5c2028e2c7c3e6d4c51a2satok    delete mBigramDictionary;
47ffd08e37881e9e7f403d04c1a1a8aaba409d36b5Ken Wakasa    delete mGestureSuggest;
48609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang    delete mTypingSuggest;
49d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi    delete mDictionaryStructureWithBufferPolicy;
50923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project}
51e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok
527c92b421ee18054e43903d6794b4039357dd944aSatoshi Kataokaint Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
535db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa        int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
54338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi        int inputSize, int *prevWordCodePoints, int prevWordLength, int commitPoint,
55338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi        const SuggestOptions *const suggestOptions, int *outWords, int *frequencies,
56338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi        int *spaceIndices, int *outputTypes) const {
5777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    int result = 0;
58338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi    if (suggestOptions->isGesture()) {
597c92b421ee18054e43903d6794b4039357dd944aSatoshi Kataoka        DicTraverseSession::initSessionInstance(
60338ef65077f43d6a35665a5c8eb6a44928332547Keisuke Kuroyanagi                traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
615db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa        result = mGestureSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
625db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa                ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint, outWords,
635db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa                frequencies, spaceIndices, outputTypes);
64586b0ca1a27c68c29583c496a453e811451d9359Satoshi Kataoka        if (DEBUG_DICT) {
655db594abbad2d9e8d2cf1aa6e417aa50ffc5dfc1Ken Wakasa            DUMP_RESULT(outWords, frequencies);
66586b0ca1a27c68c29583c496a453e811451d9359Satoshi Kataoka        }
6777e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa        return result;
6877e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    } else {
696c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka        DicTraverseSession::initSessionInstance(
706c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka                traverseSession, this, prevWordCodePoints, prevWordLength, suggestOptions);
716c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka        result = mTypingSuggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
726c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka                ycoordinates, times, pointerIds, inputCodePoints, inputSize, commitPoint,
736c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka                outWords, frequencies, spaceIndices, outputTypes);
746c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka        if (DEBUG_DICT) {
756c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka            DUMP_RESULT(outWords, frequencies);
76609a871da6cdeb3c58892b307c621998842c82bfTom Ouyang        }
776c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka        return result;
7877e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    }
7977e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa}
8077e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa
812a2aac2568e3f2da3efc8aeaa392696471d63417Keisuke Kuroynagiint Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies,
822a2aac2568e3f2da3efc8aeaa392696471d63417Keisuke Kuroynagi        int *outputTypes) const {
8377e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa    if (length <= 0) return 0;
842a2aac2568e3f2da3efc8aeaa392696471d63417Keisuke Kuroynagi    return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes);
8577e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa}
8677e8e81ad95cfc1eb8f8407fc872674b8d08bbe9Ken Wakasa
87e0e67373735918c78eaeaf24f127e1d28816aa29Satoshi Kataokaint Dictionary::getProbability(const int *word, int length) const {
88d81654cd61bd10f7cb56bfa4c89b34e9cfb18598Keisuke Kuroyanagi    int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length,
896c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka            false /* forceLowerCaseSearch */);
90cb816e5e16f086d98c8d05a0a5805c1cdfaf1c02Keisuke Kuroyanagi    if (NOT_A_DICT_POS == pos) {
916c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka        return NOT_A_PROBABILITY;
926c4ad527dc8c90a7b67aa2454a81016881e02c96Satoshi Kataoka    }
9365d19946bebd1cc6299e2789cc0fc097d1898e80Keisuke Kuroyanagi    return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
94e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok}
95e808e436cbd6f1aeadb5d61f354d03c3c50872a7satok
965bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagibool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {
975bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi    return mBigramDictionary->isValidBigram(word0, length0, word1, length1);
985bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi}
995bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi
1005bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagivoid Dictionary::addUnigramWord(const int *const word, const int length, const int probability) {
10166facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi    mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, probability);
1025bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi}
1035bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi
1045bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagivoid Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
1055bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi        const int length1, const int probability) {
10666facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi    mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1,
10766facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi            probability);
1085bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi}
1095bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi
1105bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagivoid Dictionary::removeBigramWords(const int *const word0, const int length0,
1115bf1be71629607e7206e6203489cf742d2f8ed79Keisuke Kuroynagi        const int *const word1, const int length1) {
11266facd37ddf8fc23ed2508a114c446147aaca724Keisuke Kuroyanagi    mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1);
1134d289d39aeae21064f63d958974816ceee3e9fdeTom Ouyang}
114ede2333640accecc066de328cb4f93e03e4bc5d7Tom Ouyang
115d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagivoid Dictionary::flush(const char *const filePath) {
116d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi    mDictionaryStructureWithBufferPolicy->flush(filePath);
117d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi}
118d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi
119d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagivoid Dictionary::flushWithGC(const char *const filePath) {
120d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi    mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
121d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi}
122d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi
123d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagibool Dictionary::needsToRunGC() {
124d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi    return mDictionaryStructureWithBufferPolicy->needsToRunGC();
125d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi}
126d0246277fde27e9c40a270e206f1d106811e847fKeisuke Kuroyanagi
12776e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagivoid Dictionary::logDictionaryInfo(JNIEnv *const env) const {
12876e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    const int BUFFER_SIZE = 16;
12976e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    int dictionaryIdCodePointBuffer[BUFFER_SIZE];
13076e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    int versionStringCodePointBuffer[BUFFER_SIZE];
13176e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    int dateStringCodePointBuffer[BUFFER_SIZE];
13276e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    const DictionaryHeaderStructurePolicy *const headerPolicy =
13376e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi            getDictionaryStructurePolicy()->getHeaderStructurePolicy();
13476e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer,
13576e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi            BUFFER_SIZE);
13676e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer,
13776e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi            BUFFER_SIZE);
13876e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer, BUFFER_SIZE);
13976e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi
14076e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    char dictionaryIdCharBuffer[BUFFER_SIZE];
14176e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    char versionStringCharBuffer[BUFFER_SIZE];
14276e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    char dateStringCharBuffer[BUFFER_SIZE];
14376e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE,
14476e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi            dictionaryIdCharBuffer, BUFFER_SIZE);
14576e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE,
14676e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi            versionStringCharBuffer, BUFFER_SIZE);
14776e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE,
14876e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi            dateStringCharBuffer, BUFFER_SIZE);
14976e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi
15076e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi    LogUtils::logToJava(env,
1510624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi            "Dictionary info: dictionary = %s ; version = %s ; date = %s",
1520624cc6cf3971aa3c189185208571a5f3d0c459cKeisuke Kuroyanagi            dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer);
15376e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi}
15476e579c7caf2ef04f440be21c27377fe0b4150ffKeisuke Kuroyanagi
155923bf41f853a544fd0d71fbf7dc90359ec35981The Android Open Source Project} // namespace latinime
156