1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H
18#define LATINIME_PATRICIA_TRIE_READING_UTILS_H
19
20#include <cstdint>
21
22#include "defines.h"
23
24namespace latinime {
25
26class DictionaryShortcutsStructurePolicy;
27class DictionaryBigramsStructurePolicy;
28
29class PatriciaTrieReadingUtils {
30 public:
31    typedef uint8_t NodeFlags;
32
33    static int getPtNodeArraySizeAndAdvancePosition(const uint8_t *const buffer, int *const pos);
34
35    static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos);
36
37    static int getCodePointAndAdvancePosition(const uint8_t *const buffer,
38            const int *const codePointTable, int *const pos);
39
40    // Returns the number of read characters.
41    static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags,
42            const int maxLength, const int *const codePointTable, int *const outBuffer,
43            int *const pos);
44
45    // Returns the number of skipped characters.
46    static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
47            const int maxLength, const int *const codePointTable, int *const pos);
48
49    static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos);
50
51    static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer,
52            const NodeFlags flags, int *const pos);
53
54    /**
55     * Node Flags
56     */
57    static AK_FORCE_INLINE bool isPossiblyOffensive(const NodeFlags flags) {
58        return (flags & FLAG_IS_POSSIBLY_OFFENSIVE) != 0;
59    }
60
61    static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) {
62        return (flags & FLAG_IS_NOT_A_WORD) != 0;
63    }
64
65    static AK_FORCE_INLINE bool isTerminal(const NodeFlags flags) {
66        return (flags & FLAG_IS_TERMINAL) != 0;
67    }
68
69    static AK_FORCE_INLINE bool hasShortcutTargets(const NodeFlags flags) {
70        return (flags & FLAG_HAS_SHORTCUT_TARGETS) != 0;
71    }
72
73    static AK_FORCE_INLINE bool hasBigrams(const NodeFlags flags) {
74        return (flags & FLAG_HAS_BIGRAMS) != 0;
75    }
76
77    static AK_FORCE_INLINE bool hasMultipleChars(const NodeFlags flags) {
78        return (flags & FLAG_HAS_MULTIPLE_CHARS) != 0;
79    }
80
81    static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) {
82        return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags);
83    }
84
85    static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isPossiblyOffensive,
86            const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets,
87            const bool hasBigrams, const bool hasMultipleChars,
88            const int childrenPositionFieldSize) {
89        NodeFlags nodeFlags = 0;
90        nodeFlags = isPossiblyOffensive ? (nodeFlags | FLAG_IS_POSSIBLY_OFFENSIVE) : nodeFlags;
91        nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags;
92        nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags;
93        nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags;
94        nodeFlags = hasBigrams ? (nodeFlags | FLAG_HAS_BIGRAMS) : nodeFlags;
95        nodeFlags = hasMultipleChars ? (nodeFlags | FLAG_HAS_MULTIPLE_CHARS) : nodeFlags;
96        if (childrenPositionFieldSize == 1) {
97            nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
98        } else if (childrenPositionFieldSize == 2) {
99            nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
100        } else if (childrenPositionFieldSize == 3) {
101            nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
102        } else {
103            nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
104        }
105        return nodeFlags;
106    }
107
108    static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
109            const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
110            const DictionaryBigramsStructurePolicy *const bigramPolicy,
111            const int *const codePointTable, NodeFlags *const outFlags,
112            int *const outCodePointCount, int *const outCodePoint, int *const outProbability,
113            int *const outChildrenPos, int *const outShortcutPos, int *const outBigramPos,
114            int *const outSiblingPos);
115
116 private:
117    DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
118
119    static const NodeFlags MASK_CHILDREN_POSITION_TYPE;
120    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
121    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
122    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
123    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
124
125    static const NodeFlags FLAG_HAS_MULTIPLE_CHARS;
126    static const NodeFlags FLAG_IS_TERMINAL;
127    static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS;
128    static const NodeFlags FLAG_HAS_BIGRAMS;
129    static const NodeFlags FLAG_IS_NOT_A_WORD;
130    static const NodeFlags FLAG_IS_POSSIBLY_OFFENSIVE;
131};
132} // namespace latinime
133#endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */
134