dictionary_utils.cpp revision c32356c2291a6de8adf21698e6467f30b5f2e31c
1d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi/*
2d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * Copyright (C) 2014, The Android Open Source Project
3d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi *
4d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License");
5d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * you may not use this file except in compliance with the License.
6d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * You may obtain a copy of the License at
7d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi *
8d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi *     http://www.apache.org/licenses/LICENSE-2.0
9d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi *
10d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software
11d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS,
12d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * See the License for the specific language governing permissions and
14d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * limitations under the License.
15d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi */
16d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
17d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dictionary/dictionary_utils.h"
18d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
19d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dicnode/dic_node.h"
20d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dicnode/dic_node_priority_queue.h"
21d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dicnode/dic_node_vector.h"
22d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dictionary/dictionary.h"
23d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dictionary/digraph_utils.h"
24d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/session/prev_words_info.h"
25d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
26d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
27d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanaginamespace latinime {
28d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
29d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi/* static */ int DictionaryUtils::getMaxProbabilityOfExactMatches(
30d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
31d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        const int *const codePoints, const int codePointCount) {
32d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    std::vector<DicNode> current;
33d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    std::vector<DicNode> next;
34d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
35d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    // No prev words information.
36d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    PrevWordsInfo emptyPrevWordsInfo;
3789a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi    int prevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
3889a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi    emptyPrevWordsInfo.getPrevWordIds(dictionaryStructurePolicy, prevWordIds,
3989a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi            false /* tryLowerCaseSearch */);
40d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    current.emplace_back();
4189a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi    DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordIds, &current.front());
42d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    for (int i = 0; i < codePointCount; ++i) {
43d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        // The base-lower input is used to ignore case errors and accent errors.
44d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        const int codePoint = CharUtils::toBaseLowerCase(codePoints[i]);
45d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        for (const DicNode &dicNode : current) {
46d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            if (dicNode.isInDigraph() && dicNode.getNodeCodePoint() == codePoint) {
47d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                next.emplace_back(dicNode);
48d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                next.back().advanceDigraphIndex();
49d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                continue;
50d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            }
51d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            processChildDicNodes(dictionaryStructurePolicy, codePoint, &dicNode, &next);
52d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        }
53d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        current.clear();
54d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        current.swap(next);
55d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    }
56d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
57c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi    int maxProbability = NOT_A_PROBABILITY;
58d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    for (const DicNode &dicNode : current) {
59d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        if (!dicNode.isTerminalDicNode()) {
60d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            continue;
61d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        }
62c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi        const WordAttributes wordAttributes =
63c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi                dictionaryStructurePolicy->getWordAttributesInContext(dicNode.getPrevWordIds(),
64c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi                        dicNode.getWordId(), nullptr /* multiBigramMap */);
65d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        // dicNode can contain case errors, accent errors, intentional omissions or digraphs.
66c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi        maxProbability = std::max(maxProbability, wordAttributes.getProbability());
67d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    }
68c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi    return maxProbability;
69d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi}
70d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
71d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi/* static */ void DictionaryUtils::processChildDicNodes(
72d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
73d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        const int inputCodePoint, const DicNode *const parentDicNode,
74d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        std::vector<DicNode> *const outDicNodes) {
75d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    DicNodeVector childDicNodes;
76d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    DicNodeUtils::getAllChildDicNodes(parentDicNode, dictionaryStructurePolicy, &childDicNodes);
77d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    for (int childIndex = 0; childIndex < childDicNodes.getSizeAndLock(); ++childIndex) {
78d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        DicNode *const childDicNode = childDicNodes[childIndex];
79d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        const int codePoint = CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint());
80d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        if (inputCodePoint == codePoint) {
81d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            outDicNodes->emplace_back(*childDicNode);
82d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        }
83d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        if (childDicNode->canBeIntentionalOmission()) {
84d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            processChildDicNodes(dictionaryStructurePolicy, inputCodePoint, childDicNode,
85d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                    outDicNodes);
86d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        }
87d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        if (DigraphUtils::hasDigraphForCodePoint(
88d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                dictionaryStructurePolicy->getHeaderStructurePolicy(),
89d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                childDicNode->getNodeCodePoint())) {
90d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            childDicNode->advanceDigraphIndex();
91d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            if (childDicNode->getNodeCodePoint() == codePoint) {
92d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                childDicNode->advanceDigraphIndex();
93d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                outDicNodes->emplace_back(*childDicNode);
94d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            }
95d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        }
96d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    }
97d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi}
98d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
99d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi} // namespace latinime
100