1647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi/*
2647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * Copyright (C) 2013, The Android Open Source Project
3647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi *
4647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * Licensed under the Apache License, Version 2.0 (the "License");
5647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * you may not use this file except in compliance with the License.
6647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * You may obtain a copy of the License at
7647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi *
8647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi *     http://www.apache.org/licenses/LICENSE-2.0
9647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi *
10647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * Unless required by applicable law or agreed to in writing, software
11647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * distributed under the License is distributed on an "AS IS" BASIS,
12647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * See the License for the specific language governing permissions and
14647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi * limitations under the License.
15647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi */
16647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
17647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi#ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H
18647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi#define LATINIME_PATRICIA_TRIE_READING_UTILS_H
19647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
20cafab169cdb21244c82b99c09983c98066113d87Ken Wakasa#include <cstdint>
21647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
22647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi#include "defines.h"
23647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
24647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynaginamespace latinime {
25647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
261e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagiclass DictionaryShortcutsStructurePolicy;
271e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagiclass DictionaryBigramsStructurePolicy;
281e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi
29647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagiclass PatriciaTrieReadingUtils {
30647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi public:
31647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    typedef uint8_t NodeFlags;
32647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
339ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi    static int getPtNodeArraySizeAndAdvancePosition(const uint8_t *const buffer, int *const pos);
34647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
359ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi    static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos);
36647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
379ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi    static int getCodePointAndAdvancePosition(const uint8_t *const buffer, int *const pos);
38647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
39647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    // Returns the number of read characters.
409ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi    static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags,
419ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi            const int maxLength, int *const outBuffer, int *const pos);
42647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
43647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    // Returns the number of skipped characters.
449ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi    static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
459ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi            const int maxLength, int *const pos);
46647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
479ea9c61c99b2fc8ff9a5bbd02c0ad81a828b930cKeisuke Kuroyanagi    static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos);
48647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
49647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer,
50647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi            const NodeFlags flags, int *const pos);
51647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
52647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    /**
53647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi     * Node Flags
54647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi     */
55647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    static AK_FORCE_INLINE bool isBlacklisted(const NodeFlags flags) {
56647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi        return (flags & FLAG_IS_BLACKLISTED) != 0;
57647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
58647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
59647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) {
60647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi        return (flags & FLAG_IS_NOT_A_WORD) != 0;
61647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
62647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
63647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    static AK_FORCE_INLINE bool isTerminal(const NodeFlags flags) {
64647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi        return (flags & FLAG_IS_TERMINAL) != 0;
65647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
66647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
67647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    static AK_FORCE_INLINE bool hasShortcutTargets(const NodeFlags flags) {
68647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi        return (flags & FLAG_HAS_SHORTCUT_TARGETS) != 0;
69647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
70647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
71647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    static AK_FORCE_INLINE bool hasBigrams(const NodeFlags flags) {
72647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi        return (flags & FLAG_HAS_BIGRAMS) != 0;
73647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
74647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
75647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    static AK_FORCE_INLINE bool hasMultipleChars(const NodeFlags flags) {
76647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi        return (flags & FLAG_HAS_MULTIPLE_CHARS) != 0;
77647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
78647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
79647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) {
8027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi        return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags);
81647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    }
82647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
83e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi    static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isBlacklisted,
84e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi            const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets,
85e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi            const bool hasBigrams, const bool hasMultipleChars,
86e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi            const int childrenPositionFieldSize) {
87e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi        NodeFlags nodeFlags = 0;
88e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi        nodeFlags = isBlacklisted ? (nodeFlags | FLAG_IS_BLACKLISTED) : nodeFlags;
89e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi        nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags;
90e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi        nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags;
91e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi        nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags;
92e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi        nodeFlags = hasBigrams ? (nodeFlags | FLAG_HAS_BIGRAMS) : nodeFlags;
93e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi        nodeFlags = hasMultipleChars ? (nodeFlags | FLAG_HAS_MULTIPLE_CHARS) : nodeFlags;
94e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi        if (childrenPositionFieldSize == 1) {
95e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi            nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
96e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi        } else if (childrenPositionFieldSize == 2) {
97e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi            nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
98e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi        } else if (childrenPositionFieldSize == 3) {
99e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi            nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
100e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi        } else {
101e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi            nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
102e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi        }
103e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi        return nodeFlags;
104e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi    }
105e4dcebe0c32760281376da52f543db62ece8b7b4Keisuke Kuroyanagi
1061e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi    static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
1071e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi            const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
1081e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi            const DictionaryBigramsStructurePolicy *const bigramPolicy,
1091e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi            NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
1101e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi            int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
1111e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi            int *const outBigramPos, int *const outSiblingPos);
1121e2752924d921a9a2a26bf4e72e6db8d4e21982cKeisuke Kuroyanagi
113647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi private:
114647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
115647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
11627b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi    static const NodeFlags MASK_CHILDREN_POSITION_TYPE;
11727b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
11827b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
11927b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
12027b12933cd4e6dcb7363f0f33f3da8d7481bf7caKeisuke Kuroyanagi    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
121647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi
122647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    static const NodeFlags FLAG_HAS_MULTIPLE_CHARS;
123647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    static const NodeFlags FLAG_IS_TERMINAL;
124647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS;
125647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    static const NodeFlags FLAG_HAS_BIGRAMS;
126647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    static const NodeFlags FLAG_IS_NOT_A_WORD;
127647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi    static const NodeFlags FLAG_IS_BLACKLISTED;
128647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi};
129647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi} // namespace latinime
130647c00070712067fc5ae415f9106be5ca4e17464Keisuke Kuroynagi#endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */
131