1d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi/*
2d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * Copyright (C) 2014, The Android Open Source Project
3d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi *
4d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License");
5d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * you may not use this file except in compliance with the License.
6d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * You may obtain a copy of the License at
7d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi *
8d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi *     http://www.apache.org/licenses/LICENSE-2.0
9d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi *
10d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software
11d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS,
12d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * See the License for the specific language governing permissions and
14d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi * limitations under the License.
15d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi */
16d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
17d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dictionary/dictionary_utils.h"
18d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
1988bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/interface/dictionary_structure_with_buffer_policy.h"
2088bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/property/ngram_context.h"
21d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dicnode/dic_node.h"
22d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dicnode/dic_node_priority_queue.h"
23d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dicnode/dic_node_vector.h"
24d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dictionary/dictionary.h"
25d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi#include "suggest/core/dictionary/digraph_utils.h"
26537f6eea8a8d56fe532913a37f4dbff4b3d490afKeisuke Kuroyanagi#include "utils/int_array_view.h"
27d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
28d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanaginamespace latinime {
29d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
30d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi/* static */ int DictionaryUtils::getMaxProbabilityOfExactMatches(
31d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
32fc7d0540fee2ac09336b562af7a421e96790cb7fKeisuke Kuroyanagi        const CodePointArrayView codePoints) {
33d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    std::vector<DicNode> current;
34d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    std::vector<DicNode> next;
35d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
3672e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi    // No ngram context.
3772e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi    NgramContext emptyNgramContext;
38c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi    WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
3972e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi    const WordIdArrayView prevWordIds = emptyNgramContext.getPrevWordIds(
40c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi            dictionaryStructurePolicy, &prevWordIdArray, false /* tryLowerCaseSearch */);
41d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    current.emplace_back();
42c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi    DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordIds, &current.front());
43fc7d0540fee2ac09336b562af7a421e96790cb7fKeisuke Kuroyanagi    for (const int codePoint : codePoints) {
44d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        // The base-lower input is used to ignore case errors and accent errors.
45fc7d0540fee2ac09336b562af7a421e96790cb7fKeisuke Kuroyanagi        const int baseLowerCodePoint = CharUtils::toBaseLowerCase(codePoint);
46d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        for (const DicNode &dicNode : current) {
47fc7d0540fee2ac09336b562af7a421e96790cb7fKeisuke Kuroyanagi            if (dicNode.isInDigraph() && dicNode.getNodeCodePoint() == baseLowerCodePoint) {
48d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                next.emplace_back(dicNode);
49d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                next.back().advanceDigraphIndex();
50d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                continue;
51d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            }
52fc7d0540fee2ac09336b562af7a421e96790cb7fKeisuke Kuroyanagi            processChildDicNodes(dictionaryStructurePolicy, baseLowerCodePoint, &dicNode, &next);
53d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        }
54d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        current.clear();
55d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        current.swap(next);
56d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    }
57d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
58c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi    int maxProbability = NOT_A_PROBABILITY;
59d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    for (const DicNode &dicNode : current) {
60d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        if (!dicNode.isTerminalDicNode()) {
61d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            continue;
62d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        }
63c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi        const WordAttributes wordAttributes =
64c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi                dictionaryStructurePolicy->getWordAttributesInContext(dicNode.getPrevWordIds(),
65c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi                        dicNode.getWordId(), nullptr /* multiBigramMap */);
66d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        // dicNode can contain case errors, accent errors, intentional omissions or digraphs.
67c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi        maxProbability = std::max(maxProbability, wordAttributes.getProbability());
68d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    }
69c32356c2291a6de8adf21698e6467f30b5f2e31cKeisuke Kuroyanagi    return maxProbability;
70d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi}
71d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
72d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi/* static */ void DictionaryUtils::processChildDicNodes(
73d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
74d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        const int inputCodePoint, const DicNode *const parentDicNode,
75d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        std::vector<DicNode> *const outDicNodes) {
76d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    DicNodeVector childDicNodes;
77d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    DicNodeUtils::getAllChildDicNodes(parentDicNode, dictionaryStructurePolicy, &childDicNodes);
78d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    for (int childIndex = 0; childIndex < childDicNodes.getSizeAndLock(); ++childIndex) {
79d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        DicNode *const childDicNode = childDicNodes[childIndex];
80d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        const int codePoint = CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint());
81d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        if (inputCodePoint == codePoint) {
82d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            outDicNodes->emplace_back(*childDicNode);
83d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        }
84d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        if (childDicNode->canBeIntentionalOmission()) {
85d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            processChildDicNodes(dictionaryStructurePolicy, inputCodePoint, childDicNode,
86d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                    outDicNodes);
87d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        }
88d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        if (DigraphUtils::hasDigraphForCodePoint(
89d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                dictionaryStructurePolicy->getHeaderStructurePolicy(),
90d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                childDicNode->getNodeCodePoint())) {
91d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            childDicNode->advanceDigraphIndex();
92d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            if (childDicNode->getNodeCodePoint() == codePoint) {
93d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                childDicNode->advanceDigraphIndex();
94d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi                outDicNodes->emplace_back(*childDicNode);
95d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi            }
96d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi        }
97d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi    }
98d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi}
99d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi
100d9b8602f4862c2c876e1499aad7ca7d77ea66595Keisuke Kuroyanagi} // namespace latinime
101