ver4_dict_buffers.cpp revision c0c674cdc0721a374e140ad5ee1409c0498b3262
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
18
19#include <cerrno>
20#include <cstring>
21#include <fcntl.h>
22#include <sys/stat.h>
23#include <sys/types.h>
24#include <vector>
25
26#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
27#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
28#include "suggest/policyimpl/dictionary/utils/file_utils.h"
29#include "utils/byte_array_view.h"
30
31namespace latinime {
32
33/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
34        const char *const dictPath, MmappedBuffer::MmappedBufferPtr &&headerBuffer,
35        const FormatUtils::FORMAT_VERSION formatVersion) {
36    if (!headerBuffer) {
37        ASSERT(false);
38        AKLOGE("The header buffer must be valid to open ver4 dict buffers.");
39        return Ver4DictBuffersPtr(nullptr);
40    }
41    // TODO: take only dictDirPath, and open both header and trie files in the constructor below
42    const bool isUpdatable = headerBuffer->isUpdatable();
43    MmappedBuffer::MmappedBufferPtr bodyBuffer = MmappedBuffer::openBuffer(dictPath,
44            Ver4DictConstants::BODY_FILE_EXTENSION, isUpdatable);
45    if (!bodyBuffer) {
46        return Ver4DictBuffersPtr(nullptr);
47    }
48    std::vector<uint8_t *> buffers;
49    std::vector<int> bufferSizes;
50    const ReadWriteByteArrayView buffer = bodyBuffer->getReadWriteByteArrayView();
51    int position = 0;
52    while (position < static_cast<int>(buffer.size())) {
53        const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(
54                buffer.data(), &position);
55        const ReadWriteByteArrayView subBuffer = buffer.subView(position, bufferSize);
56        buffers.push_back(subBuffer.data());
57        bufferSizes.push_back(subBuffer.size());
58        position += bufferSize;
59        if (bufferSize < 0 || position < 0 || position > static_cast<int>(buffer.size())) {
60            AKLOGE("The dict body file is corrupted.");
61            return Ver4DictBuffersPtr(nullptr);
62        }
63    }
64    if (buffers.size() != Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE) {
65        AKLOGE("The dict body file is corrupted.");
66        return Ver4DictBuffersPtr(nullptr);
67    }
68    return Ver4DictBuffersPtr(new Ver4DictBuffers(std::move(headerBuffer), std::move(bodyBuffer),
69            formatVersion, buffers, bufferSizes));
70}
71
72bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
73        const BufferWithExtendableBuffer *const headerBuffer) const {
74    // Create temporary directory.
75    const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath,
76            DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
77    char tmpDirPath[tmpDirPathBufSize];
78    FileUtils::getFilePathWithSuffix(dictDirPath,
79            DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize,
80            tmpDirPath);
81    if (FileUtils::existsDir(tmpDirPath)) {
82        if (!FileUtils::removeDirAndFiles(tmpDirPath)) {
83            AKLOGE("Existing directory %s cannot be removed.", tmpDirPath);
84            ASSERT(false);
85            return false;
86        }
87    }
88    umask(S_IWGRP | S_IWOTH);
89    if (mkdir(tmpDirPath, S_IRWXU) == -1) {
90        AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
91        return false;
92    }
93    // Get dictionary base path.
94    const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
95    char dictName[dictNameBufSize];
96    FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
97    const int dictPathBufSize = FileUtils::getFilePathBufSize(tmpDirPath, dictName);
98    char dictPath[dictPathBufSize];
99    FileUtils::getFilePath(tmpDirPath, dictName, dictPathBufSize, dictPath);
100
101    // Write header file.
102    if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
103            Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) {
104        AKLOGE("Dictionary header file %s%s cannot be written.", tmpDirPath,
105                Ver4DictConstants::HEADER_FILE_EXTENSION);
106        return false;
107    }
108
109    // Write body file.
110    const int bodyFilePathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictPath,
111            Ver4DictConstants::BODY_FILE_EXTENSION);
112    char bodyFilePath[bodyFilePathBufSize];
113    FileUtils::getFilePathWithSuffix(dictPath, Ver4DictConstants::BODY_FILE_EXTENSION,
114            bodyFilePathBufSize, bodyFilePath);
115
116    const int fd = open(bodyFilePath, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
117    if (fd == -1) {
118        AKLOGE("File %s cannot be opened. errno: %d", bodyFilePath, errno);
119        ASSERT(false);
120        return false;
121    }
122    FILE *const file = fdopen(fd, "wb");
123    if (!file) {
124        AKLOGE("fdopen failed for the file %s. errno: %d", bodyFilePath, errno);
125        ASSERT(false);
126        return false;
127    }
128
129    if (!flushDictBuffers(file)) {
130        fclose(file);
131        return false;
132    }
133    fclose(file);
134    // Remove existing dictionary.
135    if (!FileUtils::removeDirAndFiles(dictDirPath)) {
136        AKLOGE("Existing directory %s cannot be removed.", dictDirPath);
137        ASSERT(false);
138        return false;
139    }
140    // Rename temporary directory.
141    if (rename(tmpDirPath, dictDirPath) != 0) {
142        AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath);
143        ASSERT(false);
144        return false;
145    }
146    return true;
147}
148
149bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
150    // Write trie.
151    if (!DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableTrieBuffer)) {
152        AKLOGE("Trie cannot be written.");
153        return false;
154    }
155    // Write terminal position lookup table.
156    if (!mTerminalPositionLookupTable.flushToFile(file)) {
157        AKLOGE("Terminal position lookup table cannot be written.");
158        return false;
159    }
160    // Write probability dict content.
161    if (!mProbabilityDictContent.flushToFile(file)) {
162        AKLOGE("Probability dict content cannot be written.");
163        return false;
164    }
165    // Write bigram dict content.
166    if (!mBigramDictContent.flushToFile(file)) {
167        AKLOGE("Bigram dict content cannot be written.");
168        return false;
169    }
170    // Write shortcut dict content.
171    if (!mShortcutDictContent.flushToFile(file)) {
172        AKLOGE("Shortcut dict content cannot be written.");
173        return false;
174    }
175    return true;
176}
177
178Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
179        MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
180        const FormatUtils::FORMAT_VERSION formatVersion,
181        const std::vector<uint8_t *> &contentBuffers, const std::vector<int> &contentBufferSizes)
182        : mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)),
183          mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion),
184          mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(),
185                  BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
186          mExpandableTrieBuffer(
187                  ReadWriteByteArrayView(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
188                          contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX]),
189                  BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
190          mTerminalPositionLookupTable(
191                  contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX],
192                  contentBufferSizes[
193                          Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
194          mProbabilityDictContent(
195                  contentBuffers[Ver4DictConstants::PROBABILITY_BUFFER_INDEX],
196                  contentBufferSizes[Ver4DictConstants::PROBABILITY_BUFFER_INDEX],
197                  mHeaderPolicy.hasHistoricalInfoOfWords()),
198          mLanguageModelDictContent(mHeaderPolicy.hasHistoricalInfoOfWords()),
199          mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
200                  &contentBufferSizes[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
201                  mHeaderPolicy.hasHistoricalInfoOfWords()),
202          mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX],
203                  &contentBufferSizes[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
204          mIsUpdatable(mDictBuffer->isUpdatable()) {}
205
206Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
207        : mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
208          mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
209          mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
210          mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
211          mLanguageModelDictContent(headerPolicy->hasHistoricalInfoOfWords()),
212          mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
213          mIsUpdatable(true) {}
214
215} // namespace latinime
216