1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H
18#define LATINIME_PATRICIA_TRIE_READING_UTILS_H
19
20#include <cstdint>
21
22#include "defines.h"
23
24namespace latinime {
25
26class DictionaryShortcutsStructurePolicy;
27class DictionaryBigramsStructurePolicy;
28
29class PatriciaTrieReadingUtils {
30 public:
31    typedef uint8_t NodeFlags;
32
33    static int getPtNodeArraySizeAndAdvancePosition(const uint8_t *const buffer, int *const pos);
34
35    static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos);
36
37    static int getCodePointAndAdvancePosition(const uint8_t *const buffer, int *const pos);
38
39    // Returns the number of read characters.
40    static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags,
41            const int maxLength, int *const outBuffer, int *const pos);
42
43    // Returns the number of skipped characters.
44    static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
45            const int maxLength, int *const pos);
46
47    static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos);
48
49    static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer,
50            const NodeFlags flags, int *const pos);
51
52    /**
53     * Node Flags
54     */
55    static AK_FORCE_INLINE bool isBlacklisted(const NodeFlags flags) {
56        return (flags & FLAG_IS_BLACKLISTED) != 0;
57    }
58
59    static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) {
60        return (flags & FLAG_IS_NOT_A_WORD) != 0;
61    }
62
63    static AK_FORCE_INLINE bool isTerminal(const NodeFlags flags) {
64        return (flags & FLAG_IS_TERMINAL) != 0;
65    }
66
67    static AK_FORCE_INLINE bool hasShortcutTargets(const NodeFlags flags) {
68        return (flags & FLAG_HAS_SHORTCUT_TARGETS) != 0;
69    }
70
71    static AK_FORCE_INLINE bool hasBigrams(const NodeFlags flags) {
72        return (flags & FLAG_HAS_BIGRAMS) != 0;
73    }
74
75    static AK_FORCE_INLINE bool hasMultipleChars(const NodeFlags flags) {
76        return (flags & FLAG_HAS_MULTIPLE_CHARS) != 0;
77    }
78
79    static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) {
80        return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags);
81    }
82
83    static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isBlacklisted,
84            const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets,
85            const bool hasBigrams, const bool hasMultipleChars,
86            const int childrenPositionFieldSize) {
87        NodeFlags nodeFlags = 0;
88        nodeFlags = isBlacklisted ? (nodeFlags | FLAG_IS_BLACKLISTED) : nodeFlags;
89        nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags;
90        nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags;
91        nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags;
92        nodeFlags = hasBigrams ? (nodeFlags | FLAG_HAS_BIGRAMS) : nodeFlags;
93        nodeFlags = hasMultipleChars ? (nodeFlags | FLAG_HAS_MULTIPLE_CHARS) : nodeFlags;
94        if (childrenPositionFieldSize == 1) {
95            nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
96        } else if (childrenPositionFieldSize == 2) {
97            nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
98        } else if (childrenPositionFieldSize == 3) {
99            nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
100        } else {
101            nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
102        }
103        return nodeFlags;
104    }
105
106    static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
107            const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
108            const DictionaryBigramsStructurePolicy *const bigramPolicy,
109            NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
110            int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
111            int *const outBigramPos, int *const outSiblingPos);
112
113 private:
114    DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
115
116    static const NodeFlags MASK_CHILDREN_POSITION_TYPE;
117    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
118    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
119    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
120    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
121
122    static const NodeFlags FLAG_HAS_MULTIPLE_CHARS;
123    static const NodeFlags FLAG_IS_TERMINAL;
124    static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS;
125    static const NodeFlags FLAG_HAS_BIGRAMS;
126    static const NodeFlags FLAG_IS_NOT_A_WORD;
127    static const NodeFlags FLAG_IS_BLACKLISTED;
128};
129} // namespace latinime
130#endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */
131