header_read_write_utils.cpp revision 88bc312ad34321fb3e81be2dc939a889d065f4a7
1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "dictionary/header/header_read_write_utils.h"
18
19#include <cctype>
20#include <cstdio>
21#include <memory>
22#include <vector>
23
24#include "defines.h"
25#include "dictionary/utils/buffer_with_extendable_buffer.h"
26#include "dictionary/utils/byte_array_utils.h"
27
28namespace latinime {
29
30// Number of base-10 digits in the largest integer + 1 to leave room for a zero terminator.
31// As such, this is the maximum number of characters will be needed to represent an int as a
32// string, including the terminator; this is used as the size of a string buffer large enough to
33// hold any value that is intended to fit in an integer, e.g. in the code that reads the header
34// of the binary dictionary where a {key,value} string pair scheme is used.
35const int HeaderReadWriteUtils::LARGEST_INT_DIGIT_COUNT = 11;
36
37const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256;
38const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 2048;
39
40const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4;
41const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2;
42const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2;
43const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4;
44const char *const HeaderReadWriteUtils::CODE_POINT_TABLE_KEY = "codePointTable";
45
46const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;
47
48typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
49
50/* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) {
51    // See the format of the header in the comment in
52    // BinaryDictionaryFormatUtils::detectFormatVersion()
53    return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE
54            + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE);
55}
56
57/* static */ HeaderReadWriteUtils::DictionaryFlags
58        HeaderReadWriteUtils::getFlags(const uint8_t *const dictBuf) {
59    return ByteArrayUtils::readUint16(dictBuf,
60            HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE);
61}
62
63/* static */ HeaderReadWriteUtils::DictionaryFlags
64        HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
65                const AttributeMap *const attributeMap) {
66    return NO_FLAGS;
67}
68
69/* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf,
70        AttributeMap *const headerAttributes) {
71    const int headerSize = getHeaderSize(dictBuf);
72    int pos = getHeaderOptionsPosition();
73    if (pos == NOT_A_DICT_POS) {
74        // The header doesn't have header options.
75        return;
76    }
77    int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH];
78    std::unique_ptr<int[]> valueBuffer(new int[MAX_ATTRIBUTE_VALUE_LENGTH]);
79    while (pos < headerSize) {
80        // The values in the header don't use the code point table for their encoding.
81        const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
82                MAX_ATTRIBUTE_KEY_LENGTH, nullptr /* codePointTable */, keyBuffer, &pos);
83        std::vector<int> key;
84        key.insert(key.end(), keyBuffer, keyBuffer + keyLength);
85        const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
86                MAX_ATTRIBUTE_VALUE_LENGTH, nullptr /* codePointTable */, valueBuffer.get(), &pos);
87        std::vector<int> value;
88        value.insert(value.end(), valueBuffer.get(), valueBuffer.get() + valueLength);
89        headerAttributes->insert(AttributeMap::value_type(key, value));
90    }
91}
92
93/* static */ const int *HeaderReadWriteUtils::readCodePointTable(
94        AttributeMap *const headerAttributes) {
95    AttributeMap::key_type keyVector;
96    insertCharactersIntoVector(CODE_POINT_TABLE_KEY, &keyVector);
97    AttributeMap::const_iterator it = headerAttributes->find(keyVector);
98    if (it == headerAttributes->end()) {
99        return nullptr;
100    }
101    return it->second.data();
102}
103
104/* static */ bool HeaderReadWriteUtils::writeDictionaryVersion(
105        BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version,
106        int *const writingPos) {
107    if (!buffer->writeUintAndAdvancePosition(FormatUtils::MAGIC_NUMBER, HEADER_MAGIC_NUMBER_SIZE,
108            writingPos)) {
109        return false;
110    }
111    switch (version) {
112        case FormatUtils::VERSION_2:
113        case FormatUtils::VERSION_201:
114        case FormatUtils::VERSION_202:
115            // None of the static dictionaries (v2x) support writing
116            return false;
117        case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
118        case FormatUtils::VERSION_402:
119        case FormatUtils::VERSION_403:
120            return buffer->writeUintAndAdvancePosition(version /* data */,
121                    HEADER_DICTIONARY_VERSION_SIZE, writingPos);
122        default:
123            return false;
124    }
125}
126
127/* static */ bool HeaderReadWriteUtils::writeDictionaryFlags(
128        BufferWithExtendableBuffer *const buffer, const DictionaryFlags flags,
129        int *const writingPos) {
130    return buffer->writeUintAndAdvancePosition(flags, HEADER_FLAG_SIZE, writingPos);
131}
132
133/* static */ bool HeaderReadWriteUtils::writeDictionaryHeaderSize(
134        BufferWithExtendableBuffer *const buffer, const int size, int *const writingPos) {
135    return buffer->writeUintAndAdvancePosition(size, HEADER_SIZE_FIELD_SIZE, writingPos);
136}
137
138/* static */ bool HeaderReadWriteUtils::writeHeaderAttributes(
139        BufferWithExtendableBuffer *const buffer, const AttributeMap *const headerAttributes,
140        int *const writingPos) {
141    for (AttributeMap::const_iterator it = headerAttributes->begin();
142            it != headerAttributes->end(); ++it) {
143        if (it->first.empty() || it->second.empty()) {
144            continue;
145        }
146        // Write a key.
147        if (!buffer->writeCodePointsAndAdvancePosition(&(it->first.at(0)), it->first.size(),
148                true /* writesTerminator */, writingPos)) {
149            return false;
150        }
151        // Write a value.
152        if (!buffer->writeCodePointsAndAdvancePosition(&(it->second.at(0)), it->second.size(),
153                true /* writesTerminator */, writingPos)) {
154            return false;
155        }
156    }
157    return true;
158}
159
160/* static */ void HeaderReadWriteUtils::setCodePointVectorAttribute(
161        AttributeMap *const headerAttributes, const char *const key,
162        const std::vector<int> &value) {
163    AttributeMap::key_type keyVector;
164    insertCharactersIntoVector(key, &keyVector);
165    (*headerAttributes)[keyVector] = value;
166}
167
168/* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes,
169        const char *const key, const bool value) {
170    setIntAttribute(headerAttributes, key, value ? 1 : 0);
171}
172
173/* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes,
174        const char *const key, const int value) {
175    AttributeMap::key_type keyVector;
176    insertCharactersIntoVector(key, &keyVector);
177    setIntAttributeInner(headerAttributes, &keyVector, value);
178}
179
180/* static */ void HeaderReadWriteUtils::setIntAttributeInner(AttributeMap *const headerAttributes,
181        const AttributeMap::key_type *const key, const int value) {
182    AttributeMap::mapped_type valueVector;
183    char charBuf[LARGEST_INT_DIGIT_COUNT];
184    snprintf(charBuf, sizeof(charBuf), "%d", value);
185    insertCharactersIntoVector(charBuf, &valueVector);
186    (*headerAttributes)[*key] = valueVector;
187}
188
189/* static */ const std::vector<int> HeaderReadWriteUtils::readCodePointVectorAttributeValue(
190        const AttributeMap *const headerAttributes, const char *const key) {
191    AttributeMap::key_type keyVector;
192    insertCharactersIntoVector(key, &keyVector);
193    AttributeMap::const_iterator it = headerAttributes->find(keyVector);
194    if (it == headerAttributes->end()) {
195        return std::vector<int>();
196    } else {
197        return it->second;
198    }
199}
200
201/* static */ bool HeaderReadWriteUtils::readBoolAttributeValue(
202        const AttributeMap *const headerAttributes, const char *const key,
203        const bool defaultValue) {
204    const int intDefaultValue = defaultValue ? 1 : 0;
205    const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue);
206    return intValue != 0;
207}
208
209/* static */ int HeaderReadWriteUtils::readIntAttributeValue(
210        const AttributeMap *const headerAttributes, const char *const key,
211        const int defaultValue) {
212    AttributeMap::key_type keyVector;
213    insertCharactersIntoVector(key, &keyVector);
214    return readIntAttributeValueInner(headerAttributes, &keyVector, defaultValue);
215}
216
217/* static */ int HeaderReadWriteUtils::readIntAttributeValueInner(
218        const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key,
219        const int defaultValue) {
220    AttributeMap::const_iterator it = headerAttributes->find(*key);
221    if (it != headerAttributes->end()) {
222        int value = 0;
223        bool isNegative = false;
224        for (size_t i = 0; i < it->second.size(); ++i) {
225            if (i == 0 && it->second.at(i) == '-') {
226                isNegative = true;
227            } else {
228                if (!isdigit(it->second.at(i))) {
229                    // If not a number.
230                    return defaultValue;
231                }
232                value *= 10;
233                value += it->second.at(i) - '0';
234            }
235        }
236        return isNegative ? -value : value;
237    }
238    return defaultValue;
239}
240
241/* static */ void HeaderReadWriteUtils::insertCharactersIntoVector(const char *const characters,
242        std::vector<int> *const vector) {
243    for (int i = 0; characters[i]; ++i) {
244        vector->push_back(characters[i]);
245    }
246}
247
248} // namespace latinime
249