dic_node_utils.h revision 5fa33a701d4b8405254d3369c714702481a90e6b
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef LATINIME_DIC_NODE_UTILS_H 18#define LATINIME_DIC_NODE_UTILS_H 19 20#include <stdint.h> 21#include <vector> 22 23#include "defines.h" 24#include "hash_map_compat.h" 25 26namespace latinime { 27 28class DicNode; 29class DicNodeVector; 30class ProximityInfo; 31class ProximityInfoState; 32 33class DicNodeUtils { 34 public: 35 static int appendTwoWords(const int *src0, const int16_t length0, const int *src1, 36 const int16_t length1, int *dest); 37 static void initAsRoot(const int rootPos, const uint8_t *const dicRoot, 38 const int prevWordNodePos, DicNode *newRootNode); 39 static void initAsRootWithPreviousWord(const int rootPos, const uint8_t *const dicRoot, 40 DicNode *prevWordLastNode, DicNode *newRootNode); 41 static void initByCopy(DicNode *srcNode, DicNode *destNode); 42 static void getAllChildDicNodes(DicNode *dicNode, const uint8_t *const dicRoot, 43 DicNodeVector *childDicNodes); 44 static float getBigramNodeImprobability(const uint8_t *const dicRoot, 45 const DicNode *const node, hash_map_compat<int, int16_t> *const bigramCacheMap); 46 static bool isDicNodeFilteredOut(const int nodeCodePoint, const ProximityInfo *const pInfo, 47 const std::vector<int> *const codePointsFilter); 48 // TODO: Move to private 49 static void getProximityChildDicNodes(DicNode *dicNode, const uint8_t *const dicRoot, 50 const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly, 51 DicNodeVector *childDicNodes); 52 53 // TODO: Move to proximity info 54 static bool isProximityChar(ProximityType type) { 55 return type == MATCH_CHAR || type == PROXIMITY_CHAR || type == ADDITIONAL_PROXIMITY_CHAR; 56 } 57 58 private: 59 DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils); 60 // Max cache size for the space omission error correction bigram lookup 61 static const int MAX_BIGRAM_MAP_SIZE = 20000; 62 // Max number of bigrams to look up 63 static const int MAX_BIGRAMS_CONSIDERED_PER_CONTEXT = 500; 64 65 static int getBigramNodeProbability(const uint8_t *const dicRoot, const DicNode *const node, 66 hash_map_compat<int, int16_t> *bigramCacheMap); 67 static int16_t getBigramNodeEncodedDiffProbability(const uint8_t *const dicRoot, 68 const DicNode *const node, hash_map_compat<int, int16_t> *bigramCacheMap); 69 static void createAndGetPassingChildNode(DicNode *dicNode, const ProximityInfoState *pInfoState, 70 const int pointIndex, const bool exactOnly, DicNodeVector *childDicNodes); 71 static void createAndGetAllLeavingChildNodes(DicNode *dicNode, const uint8_t *const dicRoot, 72 const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly, 73 const std::vector<int> *const codePointsFilter, 74 const ProximityInfo *const pInfo, DicNodeVector *childDicNodes); 75 static int createAndGetLeavingChildNode(DicNode *dicNode, int pos, const uint8_t *const dicRoot, 76 const int terminalDepth, const ProximityInfoState *pInfoState, const int pointIndex, 77 const bool exactOnly, const std::vector<int> *const codePointsFilter, 78 const ProximityInfo *const pInfo, DicNodeVector *childDicNodes); 79 static int16_t getBigramProbability(const uint8_t *const dicRoot, int pos, const int nextPos, 80 hash_map_compat<int, int16_t> *bigramCacheMap); 81 82 // TODO: Move to proximity info 83 static bool isMatchedNodeCodePoint(const ProximityInfoState *pInfoState, const int pointIndex, 84 const bool exactOnly, const int nodeCodePoint); 85}; 86} // namespace latinime 87#endif // LATINIME_DIC_NODE_UTILS_H 88