1/* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" 18 19#include "defines.h" 20#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" 21 22namespace latinime { 23 24typedef PatriciaTrieReadingUtils PtReadingUtils; 25 26const PtReadingUtils::NodeFlags PtReadingUtils::MASK_CHILDREN_POSITION_TYPE = 0xC0; 27const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_NOPOSITION = 0x00; 28const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_ONEBYTE = 0x40; 29const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_TWOBYTES = 0x80; 30const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_THREEBYTES = 0xC0; 31 32// Flag for single/multiple char group 33const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20; 34// Flag for terminal PtNodes 35const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10; 36// Flag for shortcut targets presence 37const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08; 38// Flag for bigram presence 39const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_BIGRAMS = 0x04; 40// Flag for non-words (typically, shortcut only entries) 41const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02; 42// Flag for blacklist 43const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; 44 45/* static */ int PtReadingUtils::getPtNodeArraySizeAndAdvancePosition( 46 const uint8_t *const buffer, int *const pos) { 47 const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); 48 if (firstByte < 0x80) { 49 return firstByte; 50 } else { 51 return ((firstByte & 0x7F) << 8) ^ ByteArrayUtils::readUint8AndAdvancePosition( 52 buffer, pos); 53 } 54} 55 56/* static */ PtReadingUtils::NodeFlags PtReadingUtils::getFlagsAndAdvancePosition( 57 const uint8_t *const buffer, int *const pos) { 58 return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); 59} 60 61/* static */ int PtReadingUtils::getCodePointAndAdvancePosition(const uint8_t *const buffer, 62 int *const pos) { 63 return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos); 64} 65 66// Returns the number of read characters. 67/* static */ int PtReadingUtils::getCharsAndAdvancePosition(const uint8_t *const buffer, 68 const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) { 69 int length = 0; 70 if (hasMultipleChars(flags)) { 71 length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer, 72 pos); 73 } else { 74 const int codePoint = getCodePointAndAdvancePosition(buffer, pos); 75 if (codePoint == NOT_A_CODE_POINT) { 76 // CAVEAT: codePoint == NOT_A_CODE_POINT means the code point is 77 // CHARACTER_ARRAY_TERMINATOR. The code point must not be CHARACTER_ARRAY_TERMINATOR 78 // when the PtNode has a single code point. 79 length = 0; 80 AKLOGE("codePoint is NOT_A_CODE_POINT. pos: %d, codePoint: 0x%x, buffer[pos - 1]: 0x%x", 81 *pos - 1, codePoint, buffer[*pos - 1]); 82 ASSERT(false); 83 } else if (maxLength > 0) { 84 outBuffer[0] = codePoint; 85 length = 1; 86 } 87 } 88 return length; 89} 90 91// Returns the number of skipped characters. 92/* static */ int PtReadingUtils::skipCharacters(const uint8_t *const buffer, const NodeFlags flags, 93 const int maxLength, int *const pos) { 94 if (hasMultipleChars(flags)) { 95 return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos); 96 } else { 97 if (maxLength > 0) { 98 getCodePointAndAdvancePosition(buffer, pos); 99 return 1; 100 } else { 101 return 0; 102 } 103 } 104} 105 106/* static */ int PtReadingUtils::readProbabilityAndAdvancePosition(const uint8_t *const buffer, 107 int *const pos) { 108 return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); 109} 110 111/* static */ int PtReadingUtils::readChildrenPositionAndAdvancePosition( 112 const uint8_t *const buffer, const NodeFlags flags, int *const pos) { 113 const int base = *pos; 114 int offset = 0; 115 switch (MASK_CHILDREN_POSITION_TYPE & flags) { 116 case FLAG_CHILDREN_POSITION_TYPE_ONEBYTE: 117 offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); 118 break; 119 case FLAG_CHILDREN_POSITION_TYPE_TWOBYTES: 120 offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos); 121 break; 122 case FLAG_CHILDREN_POSITION_TYPE_THREEBYTES: 123 offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos); 124 break; 125 default: 126 // If we come here, it means we asked for the children of a word with 127 // no children. 128 return NOT_A_DICT_POS; 129 } 130 return base + offset; 131} 132 133} // namespace latinime 134