dictionary_utils.cpp revision 72e2383d11cf09735b378dcedd20c9fc43da1f12
1/*
2 * Copyright (C) 2014, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "suggest/core/dictionary/dictionary_utils.h"
18
19#include "suggest/core/dicnode/dic_node.h"
20#include "suggest/core/dicnode/dic_node_priority_queue.h"
21#include "suggest/core/dicnode/dic_node_vector.h"
22#include "suggest/core/dictionary/dictionary.h"
23#include "suggest/core/dictionary/digraph_utils.h"
24#include "suggest/core/session/prev_words_info.h"
25#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
26#include "utils/int_array_view.h"
27
28namespace latinime {
29
30/* static */ int DictionaryUtils::getMaxProbabilityOfExactMatches(
31        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
32        const CodePointArrayView codePoints) {
33    std::vector<DicNode> current;
34    std::vector<DicNode> next;
35
36    // No ngram context.
37    NgramContext emptyNgramContext;
38    WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
39    const WordIdArrayView prevWordIds = emptyNgramContext.getPrevWordIds(
40            dictionaryStructurePolicy, &prevWordIdArray, false /* tryLowerCaseSearch */);
41    current.emplace_back();
42    DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordIds, &current.front());
43    for (const int codePoint : codePoints) {
44        // The base-lower input is used to ignore case errors and accent errors.
45        const int baseLowerCodePoint = CharUtils::toBaseLowerCase(codePoint);
46        for (const DicNode &dicNode : current) {
47            if (dicNode.isInDigraph() && dicNode.getNodeCodePoint() == baseLowerCodePoint) {
48                next.emplace_back(dicNode);
49                next.back().advanceDigraphIndex();
50                continue;
51            }
52            processChildDicNodes(dictionaryStructurePolicy, baseLowerCodePoint, &dicNode, &next);
53        }
54        current.clear();
55        current.swap(next);
56    }
57
58    int maxProbability = NOT_A_PROBABILITY;
59    for (const DicNode &dicNode : current) {
60        if (!dicNode.isTerminalDicNode()) {
61            continue;
62        }
63        const WordAttributes wordAttributes =
64                dictionaryStructurePolicy->getWordAttributesInContext(dicNode.getPrevWordIds(),
65                        dicNode.getWordId(), nullptr /* multiBigramMap */);
66        // dicNode can contain case errors, accent errors, intentional omissions or digraphs.
67        maxProbability = std::max(maxProbability, wordAttributes.getProbability());
68    }
69    return maxProbability;
70}
71
72/* static */ void DictionaryUtils::processChildDicNodes(
73        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
74        const int inputCodePoint, const DicNode *const parentDicNode,
75        std::vector<DicNode> *const outDicNodes) {
76    DicNodeVector childDicNodes;
77    DicNodeUtils::getAllChildDicNodes(parentDicNode, dictionaryStructurePolicy, &childDicNodes);
78    for (int childIndex = 0; childIndex < childDicNodes.getSizeAndLock(); ++childIndex) {
79        DicNode *const childDicNode = childDicNodes[childIndex];
80        const int codePoint = CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint());
81        if (inputCodePoint == codePoint) {
82            outDicNodes->emplace_back(*childDicNode);
83        }
84        if (childDicNode->canBeIntentionalOmission()) {
85            processChildDicNodes(dictionaryStructurePolicy, inputCodePoint, childDicNode,
86                    outDicNodes);
87        }
88        if (DigraphUtils::hasDigraphForCodePoint(
89                dictionaryStructurePolicy->getHeaderStructurePolicy(),
90                childDicNode->getNodeCodePoint())) {
91            childDicNode->advanceDigraphIndex();
92            if (childDicNode->getNodeCodePoint() == codePoint) {
93                childDicNode->advanceDigraphIndex();
94                outDicNodes->emplace_back(*childDicNode);
95            }
96        }
97    }
98}
99
100} // namespace latinime
101