1647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi/* 2647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * Copyright (C) 2013, The Android Open Source Project 3647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * 4647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * Licensed under the Apache License, Version 2.0 (the "License"); 5647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * you may not use this file except in compliance with the License. 6647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * You may obtain a copy of the License at 7647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * 8647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * http://www.apache.org/licenses/LICENSE-2.0 9647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * 10647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * Unless required by applicable law or agreed to in writing, software 11647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * distributed under the License is distributed on an "AS IS" BASIS, 12647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * See the License for the specific language governing permissions and 14647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * limitations under the License. 15647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi */ 16647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 17647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi#ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H 18647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi#define LATINIME_PATRICIA_TRIE_READING_UTILS_H 19647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 20cafab169cdb21244c82b99c09983c98066113d87Ken Wakasa#include <cstdint> 21647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 22647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi#include "defines.h" 23647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 24647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynaginamespace latinime { 25647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 261e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagiclass DictionaryShortcutsStructurePolicy; 271e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagiclass DictionaryBigramsStructurePolicy; 281e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi 29647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagiclass PatriciaTrieReadingUtils { 30647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi public: 31647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi typedef uint8_t NodeFlags; 32647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 339ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi static int getPtNodeArraySizeAndAdvancePosition(const uint8_t *const buffer, int *const pos); 34647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 359ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos); 36647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 379ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi static int getCodePointAndAdvancePosition(const uint8_t *const buffer, int *const pos); 38647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 39647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi // Returns the number of read characters. 409ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags, 419ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi const int maxLength, int *const outBuffer, int *const pos); 42647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 43647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi // Returns the number of skipped characters. 449ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags, 459ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi const int maxLength, int *const pos); 46647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 479ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos); 48647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 49647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, 50647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi const NodeFlags flags, int *const pos); 51647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 52647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi /** 53647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * Node Flags 54647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi */ 55647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi static AK_FORCE_INLINE bool isBlacklisted(const NodeFlags flags) { 56647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return (flags & FLAG_IS_BLACKLISTED) != 0; 57647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 58647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 59647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) { 60647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return (flags & FLAG_IS_NOT_A_WORD) != 0; 61647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 62647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 63647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi static AK_FORCE_INLINE bool isTerminal(const NodeFlags flags) { 64647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return (flags & FLAG_IS_TERMINAL) != 0; 65647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 66647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 67647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi static AK_FORCE_INLINE bool hasShortcutTargets(const NodeFlags flags) { 68647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return (flags & FLAG_HAS_SHORTCUT_TARGETS) != 0; 69647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 70647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 71647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi static AK_FORCE_INLINE bool hasBigrams(const NodeFlags flags) { 72647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return (flags & FLAG_HAS_BIGRAMS) != 0; 73647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 74647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 75647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi static AK_FORCE_INLINE bool hasMultipleChars(const NodeFlags flags) { 76647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi return (flags & FLAG_HAS_MULTIPLE_CHARS) != 0; 77647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 78647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 79647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) { 8027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags); 81647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi } 82647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 83e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isBlacklisted, 84e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets, 85e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi const bool hasBigrams, const bool hasMultipleChars, 86e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi const int childrenPositionFieldSize) { 87e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi NodeFlags nodeFlags = 0; 88e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi nodeFlags = isBlacklisted ? (nodeFlags | FLAG_IS_BLACKLISTED) : nodeFlags; 89e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags; 90e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags; 91e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags; 92e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi nodeFlags = hasBigrams ? (nodeFlags | FLAG_HAS_BIGRAMS) : nodeFlags; 93e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi nodeFlags = hasMultipleChars ? (nodeFlags | FLAG_HAS_MULTIPLE_CHARS) : nodeFlags; 94e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi if (childrenPositionFieldSize == 1) { 95e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_ONEBYTE; 96e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi } else if (childrenPositionFieldSize == 2) { 97e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_TWOBYTES; 98e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi } else if (childrenPositionFieldSize == 3) { 99e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_THREEBYTES; 100e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi } else { 101e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_NOPOSITION; 102e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi } 103e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi return nodeFlags; 104e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi } 105e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi 1061e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos, 1071e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi const DictionaryShortcutsStructurePolicy *const shortcutPolicy, 1081e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi const DictionaryBigramsStructurePolicy *const bigramPolicy, 1091e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint, 1101e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi int *const outProbability, int *const outChildrenPos, int *const outShortcutPos, 1111e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi int *const outBigramPos, int *const outSiblingPos); 1121e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi 113647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi private: 114647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils); 115647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 11627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi static const NodeFlags MASK_CHILDREN_POSITION_TYPE; 11727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION; 11827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE; 11927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES; 12027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES; 121647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi 122647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi static const NodeFlags FLAG_HAS_MULTIPLE_CHARS; 123647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi static const NodeFlags FLAG_IS_TERMINAL; 124647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS; 125647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi static const NodeFlags FLAG_HAS_BIGRAMS; 126647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi static const NodeFlags FLAG_IS_NOT_A_WORD; 127647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi static const NodeFlags FLAG_IS_BLACKLISTED; 128647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi}; 129647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi} // namespace latinime 130647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi#endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */ 131