dic_node_utils.h revision 5fa33a701d4b8405254d3369c714702481a90e6b
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_DIC_NODE_UTILS_H
18#define LATINIME_DIC_NODE_UTILS_H
19
20#include <stdint.h>
21#include <vector>
22
23#include "defines.h"
24#include "hash_map_compat.h"
25
26namespace latinime {
27
28class DicNode;
29class DicNodeVector;
30class ProximityInfo;
31class ProximityInfoState;
32
33class DicNodeUtils {
34 public:
35    static int appendTwoWords(const int *src0, const int16_t length0, const int *src1,
36            const int16_t length1, int *dest);
37    static void initAsRoot(const int rootPos, const uint8_t *const dicRoot,
38            const int prevWordNodePos, DicNode *newRootNode);
39    static void initAsRootWithPreviousWord(const int rootPos, const uint8_t *const dicRoot,
40            DicNode *prevWordLastNode, DicNode *newRootNode);
41    static void initByCopy(DicNode *srcNode, DicNode *destNode);
42    static void getAllChildDicNodes(DicNode *dicNode, const uint8_t *const dicRoot,
43            DicNodeVector *childDicNodes);
44    static float getBigramNodeImprobability(const uint8_t *const dicRoot,
45            const DicNode *const node, hash_map_compat<int, int16_t> *const bigramCacheMap);
46    static bool isDicNodeFilteredOut(const int nodeCodePoint, const ProximityInfo *const pInfo,
47            const std::vector<int> *const codePointsFilter);
48    // TODO: Move to private
49    static void getProximityChildDicNodes(DicNode *dicNode, const uint8_t *const dicRoot,
50            const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly,
51            DicNodeVector *childDicNodes);
52
53    // TODO: Move to proximity info
54    static bool isProximityChar(ProximityType type) {
55        return type == MATCH_CHAR || type == PROXIMITY_CHAR || type == ADDITIONAL_PROXIMITY_CHAR;
56    }
57
58 private:
59    DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
60    // Max cache size for the space omission error correction bigram lookup
61    static const int MAX_BIGRAM_MAP_SIZE = 20000;
62    // Max number of bigrams to look up
63    static const int MAX_BIGRAMS_CONSIDERED_PER_CONTEXT = 500;
64
65    static int getBigramNodeProbability(const uint8_t *const dicRoot, const DicNode *const node,
66            hash_map_compat<int, int16_t> *bigramCacheMap);
67    static int16_t getBigramNodeEncodedDiffProbability(const uint8_t *const dicRoot,
68            const DicNode *const node, hash_map_compat<int, int16_t> *bigramCacheMap);
69    static void createAndGetPassingChildNode(DicNode *dicNode, const ProximityInfoState *pInfoState,
70            const int pointIndex, const bool exactOnly, DicNodeVector *childDicNodes);
71    static void createAndGetAllLeavingChildNodes(DicNode *dicNode, const uint8_t *const dicRoot,
72            const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
73            const std::vector<int> *const codePointsFilter,
74            const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
75    static int createAndGetLeavingChildNode(DicNode *dicNode, int pos, const uint8_t *const dicRoot,
76            const int terminalDepth, const ProximityInfoState *pInfoState, const int pointIndex,
77            const bool exactOnly, const std::vector<int> *const codePointsFilter,
78            const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
79    static int16_t getBigramProbability(const uint8_t *const dicRoot, int pos, const int nextPos,
80            hash_map_compat<int, int16_t> *bigramCacheMap);
81
82    // TODO: Move to proximity info
83    static bool isMatchedNodeCodePoint(const ProximityInfoState *pInfoState, const int pointIndex,
84            const bool exactOnly, const int nodeCodePoint);
85};
86} // namespace latinime
87#endif // LATINIME_DIC_NODE_UTILS_H
88