ver4_dict_buffers.cpp revision d3d7c31ca996d4d7eecbb1dafc549363bef6448e
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
18
19#include <cerrno>
20#include <cstring>
21#include <fcntl.h>
22#include <sys/stat.h>
23#include <sys/types.h>
24#include <vector>
25
26#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
27#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
28#include "suggest/policyimpl/dictionary/utils/file_utils.h"
29
30namespace latinime {
31
32/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
33        const char *const dictPath, MmappedBuffer::MmappedBufferPtr &&headerBuffer,
34        const FormatUtils::FORMAT_VERSION formatVersion) {
35    if (!headerBuffer) {
36        ASSERT(false);
37        AKLOGE("The header buffer must be valid to open ver4 dict buffers.");
38        return Ver4DictBuffersPtr(nullptr);
39    }
40    // TODO: take only dictDirPath, and open both header and trie files in the constructor below
41    const bool isUpdatable = headerBuffer->isUpdatable();
42    MmappedBuffer::MmappedBufferPtr bodyBuffer = MmappedBuffer::openBuffer(dictPath,
43            Ver4DictConstants::BODY_FILE_EXTENSION, isUpdatable);
44    if (!bodyBuffer) {
45        return Ver4DictBuffersPtr(nullptr);
46    }
47    std::vector<uint8_t *> buffers;
48    std::vector<int> bufferSizes;
49    uint8_t *const buffer = bodyBuffer->getBuffer();
50    int position = 0;
51    while (position < bodyBuffer->getBufferSize()) {
52        const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(buffer, &position);
53        buffers.push_back(buffer + position);
54        bufferSizes.push_back(bufferSize);
55        position += bufferSize;
56        if (bufferSize < 0 || position < 0 || position > bodyBuffer->getBufferSize()) {
57            AKLOGE("The dict body file is corrupted.");
58            return Ver4DictBuffersPtr(nullptr);
59        }
60    }
61    if (buffers.size() != Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE) {
62        AKLOGE("The dict body file is corrupted.");
63        return Ver4DictBuffersPtr(nullptr);
64    }
65    return Ver4DictBuffersPtr(new Ver4DictBuffers(std::move(headerBuffer), std::move(bodyBuffer),
66            formatVersion, buffers, bufferSizes));
67}
68
69bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
70        const BufferWithExtendableBuffer *const headerBuffer) const {
71    // Create temporary directory.
72    const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath,
73            DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
74    char tmpDirPath[tmpDirPathBufSize];
75    FileUtils::getFilePathWithSuffix(dictDirPath,
76            DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize,
77            tmpDirPath);
78    if (FileUtils::existsDir(tmpDirPath)) {
79        if (!FileUtils::removeDirAndFiles(tmpDirPath)) {
80            AKLOGE("Existing directory %s cannot be removed.", tmpDirPath);
81            ASSERT(false);
82            return false;
83        }
84    }
85    umask(S_IWGRP | S_IWOTH);
86    if (mkdir(tmpDirPath, S_IRWXU) == -1) {
87        AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
88        return false;
89    }
90    // Get dictionary base path.
91    const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
92    char dictName[dictNameBufSize];
93    FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
94    const int dictPathBufSize = FileUtils::getFilePathBufSize(tmpDirPath, dictName);
95    char dictPath[dictPathBufSize];
96    FileUtils::getFilePath(tmpDirPath, dictName, dictPathBufSize, dictPath);
97
98    // Write header file.
99    if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
100            Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) {
101        AKLOGE("Dictionary header file %s%s cannot be written.", tmpDirPath,
102                Ver4DictConstants::HEADER_FILE_EXTENSION);
103        return false;
104    }
105
106    // Write body file.
107    const int bodyFilePathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictPath,
108            Ver4DictConstants::BODY_FILE_EXTENSION);
109    char bodyFilePath[bodyFilePathBufSize];
110    FileUtils::getFilePathWithSuffix(dictPath, Ver4DictConstants::BODY_FILE_EXTENSION,
111            bodyFilePathBufSize, bodyFilePath);
112
113    const int fd = open(bodyFilePath, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
114    if (fd == -1) {
115        AKLOGE("File %s cannot be opened. errno: %d", bodyFilePath, errno);
116        ASSERT(false);
117        return false;
118    }
119    FILE *const file = fdopen(fd, "wb");
120    if (!file) {
121        AKLOGE("fdopen failed for the file %s. errno: %d", bodyFilePath, errno);
122        ASSERT(false);
123        return false;
124    }
125
126    if (!flushDictBuffers(file)) {
127        fclose(file);
128        return false;
129    }
130    fclose(file);
131    // Remove existing dictionary.
132    if (!FileUtils::removeDirAndFiles(dictDirPath)) {
133        AKLOGE("Existing directory %s cannot be removed.", dictDirPath);
134        ASSERT(false);
135        return false;
136    }
137    // Rename temporary directory.
138    if (rename(tmpDirPath, dictDirPath) != 0) {
139        AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath);
140        ASSERT(false);
141        return false;
142    }
143    return true;
144}
145
146bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
147    // Write trie.
148    if (!DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableTrieBuffer)) {
149        AKLOGE("Trie cannot be written.");
150        return false;
151    }
152    // Write terminal position lookup table.
153    if (!mTerminalPositionLookupTable.flushToFile(file)) {
154        AKLOGE("Terminal position lookup table cannot be written.");
155        return false;
156    }
157    // Write probability dict content.
158    if (!mProbabilityDictContent.flushToFile(file)) {
159        AKLOGE("Probability dict content cannot be written.");
160        return false;
161    }
162    // Write bigram dict content.
163    if (!mBigramDictContent.flushToFile(file)) {
164        AKLOGE("Bigram dict content cannot be written.");
165        return false;
166    }
167    // Write shortcut dict content.
168    if (!mShortcutDictContent.flushToFile(file)) {
169        AKLOGE("Shortcut dict content cannot be written.");
170        return false;
171    }
172    return true;
173}
174
175Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
176        MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
177        const FormatUtils::FORMAT_VERSION formatVersion,
178        const std::vector<uint8_t *> &contentBuffers, const std::vector<int> &contentBufferSizes)
179        : mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)),
180          mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion),
181          mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr,
182                  mHeaderPolicy.getSize(),
183                  BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
184          mExpandableTrieBuffer(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
185                  contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX],
186                  BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
187          mTerminalPositionLookupTable(
188                  contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX],
189                  contentBufferSizes[
190                          Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
191          mProbabilityDictContent(
192                  contentBuffers[Ver4DictConstants::PROBABILITY_BUFFER_INDEX],
193                  contentBufferSizes[Ver4DictConstants::PROBABILITY_BUFFER_INDEX],
194                  mHeaderPolicy.hasHistoricalInfoOfWords()),
195          mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
196                  &contentBufferSizes[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
197                  mHeaderPolicy.hasHistoricalInfoOfWords()),
198          mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX],
199                  &contentBufferSizes[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
200          mIsUpdatable(mDictBuffer->isUpdatable()) {}
201
202Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
203        : mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
204          mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
205          mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
206          mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
207          mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
208          mIsUpdatable(true) {}
209
210} // namespace latinime
211