1/* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H 18#define LATINIME_PATRICIA_TRIE_READING_UTILS_H 19 20#include <cstdint> 21 22#include "defines.h" 23 24namespace latinime { 25 26class DictionaryShortcutsStructurePolicy; 27class DictionaryBigramsStructurePolicy; 28 29class PatriciaTrieReadingUtils { 30 public: 31 typedef uint8_t NodeFlags; 32 33 static int getPtNodeArraySizeAndAdvancePosition(const uint8_t *const buffer, int *const pos); 34 35 static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos); 36 37 static int getCodePointAndAdvancePosition(const uint8_t *const buffer, int *const pos); 38 39 // Returns the number of read characters. 40 static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags, 41 const int maxLength, int *const outBuffer, int *const pos); 42 43 // Returns the number of skipped characters. 44 static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags, 45 const int maxLength, int *const pos); 46 47 static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos); 48 49 static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, 50 const NodeFlags flags, int *const pos); 51 52 /** 53 * Node Flags 54 */ 55 static AK_FORCE_INLINE bool isBlacklisted(const NodeFlags flags) { 56 return (flags & FLAG_IS_BLACKLISTED) != 0; 57 } 58 59 static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) { 60 return (flags & FLAG_IS_NOT_A_WORD) != 0; 61 } 62 63 static AK_FORCE_INLINE bool isTerminal(const NodeFlags flags) { 64 return (flags & FLAG_IS_TERMINAL) != 0; 65 } 66 67 static AK_FORCE_INLINE bool hasShortcutTargets(const NodeFlags flags) { 68 return (flags & FLAG_HAS_SHORTCUT_TARGETS) != 0; 69 } 70 71 static AK_FORCE_INLINE bool hasBigrams(const NodeFlags flags) { 72 return (flags & FLAG_HAS_BIGRAMS) != 0; 73 } 74 75 static AK_FORCE_INLINE bool hasMultipleChars(const NodeFlags flags) { 76 return (flags & FLAG_HAS_MULTIPLE_CHARS) != 0; 77 } 78 79 static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) { 80 return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags); 81 } 82 83 static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isBlacklisted, 84 const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets, 85 const bool hasBigrams, const bool hasMultipleChars, 86 const int childrenPositionFieldSize) { 87 NodeFlags nodeFlags = 0; 88 nodeFlags = isBlacklisted ? (nodeFlags | FLAG_IS_BLACKLISTED) : nodeFlags; 89 nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags; 90 nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags; 91 nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags; 92 nodeFlags = hasBigrams ? (nodeFlags | FLAG_HAS_BIGRAMS) : nodeFlags; 93 nodeFlags = hasMultipleChars ? (nodeFlags | FLAG_HAS_MULTIPLE_CHARS) : nodeFlags; 94 if (childrenPositionFieldSize == 1) { 95 nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_ONEBYTE; 96 } else if (childrenPositionFieldSize == 2) { 97 nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_TWOBYTES; 98 } else if (childrenPositionFieldSize == 3) { 99 nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_THREEBYTES; 100 } else { 101 nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_NOPOSITION; 102 } 103 return nodeFlags; 104 } 105 106 static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos, 107 const DictionaryShortcutsStructurePolicy *const shortcutPolicy, 108 const DictionaryBigramsStructurePolicy *const bigramPolicy, 109 NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint, 110 int *const outProbability, int *const outChildrenPos, int *const outShortcutPos, 111 int *const outBigramPos, int *const outSiblingPos); 112 113 private: 114 DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils); 115 116 static const NodeFlags MASK_CHILDREN_POSITION_TYPE; 117 static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION; 118 static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE; 119 static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES; 120 static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES; 121 122 static const NodeFlags FLAG_HAS_MULTIPLE_CHARS; 123 static const NodeFlags FLAG_IS_TERMINAL; 124 static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS; 125 static const NodeFlags FLAG_HAS_BIGRAMS; 126 static const NodeFlags FLAG_IS_NOT_A_WORD; 127 static const NodeFlags FLAG_IS_BLACKLISTED; 128}; 129} // namespace latinime 130#endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */ 131