ver4_dict_buffers.cpp revision c0c674cdc0721a374e140ad5ee1409c0498b3262
1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" 18 19#include <cerrno> 20#include <cstring> 21#include <fcntl.h> 22#include <sys/stat.h> 23#include <sys/types.h> 24#include <vector> 25 26#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" 27#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" 28#include "suggest/policyimpl/dictionary/utils/file_utils.h" 29#include "utils/byte_array_view.h" 30 31namespace latinime { 32 33/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers( 34 const char *const dictPath, MmappedBuffer::MmappedBufferPtr &&headerBuffer, 35 const FormatUtils::FORMAT_VERSION formatVersion) { 36 if (!headerBuffer) { 37 ASSERT(false); 38 AKLOGE("The header buffer must be valid to open ver4 dict buffers."); 39 return Ver4DictBuffersPtr(nullptr); 40 } 41 // TODO: take only dictDirPath, and open both header and trie files in the constructor below 42 const bool isUpdatable = headerBuffer->isUpdatable(); 43 MmappedBuffer::MmappedBufferPtr bodyBuffer = MmappedBuffer::openBuffer(dictPath, 44 Ver4DictConstants::BODY_FILE_EXTENSION, isUpdatable); 45 if (!bodyBuffer) { 46 return Ver4DictBuffersPtr(nullptr); 47 } 48 std::vector<uint8_t *> buffers; 49 std::vector<int> bufferSizes; 50 const ReadWriteByteArrayView buffer = bodyBuffer->getReadWriteByteArrayView(); 51 int position = 0; 52 while (position < static_cast<int>(buffer.size())) { 53 const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition( 54 buffer.data(), &position); 55 const ReadWriteByteArrayView subBuffer = buffer.subView(position, bufferSize); 56 buffers.push_back(subBuffer.data()); 57 bufferSizes.push_back(subBuffer.size()); 58 position += bufferSize; 59 if (bufferSize < 0 || position < 0 || position > static_cast<int>(buffer.size())) { 60 AKLOGE("The dict body file is corrupted."); 61 return Ver4DictBuffersPtr(nullptr); 62 } 63 } 64 if (buffers.size() != Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE) { 65 AKLOGE("The dict body file is corrupted."); 66 return Ver4DictBuffersPtr(nullptr); 67 } 68 return Ver4DictBuffersPtr(new Ver4DictBuffers(std::move(headerBuffer), std::move(bodyBuffer), 69 formatVersion, buffers, bufferSizes)); 70} 71 72bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath, 73 const BufferWithExtendableBuffer *const headerBuffer) const { 74 // Create temporary directory. 75 const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath, 76 DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); 77 char tmpDirPath[tmpDirPathBufSize]; 78 FileUtils::getFilePathWithSuffix(dictDirPath, 79 DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize, 80 tmpDirPath); 81 if (FileUtils::existsDir(tmpDirPath)) { 82 if (!FileUtils::removeDirAndFiles(tmpDirPath)) { 83 AKLOGE("Existing directory %s cannot be removed.", tmpDirPath); 84 ASSERT(false); 85 return false; 86 } 87 } 88 umask(S_IWGRP | S_IWOTH); 89 if (mkdir(tmpDirPath, S_IRWXU) == -1) { 90 AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno); 91 return false; 92 } 93 // Get dictionary base path. 94 const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */; 95 char dictName[dictNameBufSize]; 96 FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName); 97 const int dictPathBufSize = FileUtils::getFilePathBufSize(tmpDirPath, dictName); 98 char dictPath[dictPathBufSize]; 99 FileUtils::getFilePath(tmpDirPath, dictName, dictPathBufSize, dictPath); 100 101 // Write header file. 102 if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, 103 Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) { 104 AKLOGE("Dictionary header file %s%s cannot be written.", tmpDirPath, 105 Ver4DictConstants::HEADER_FILE_EXTENSION); 106 return false; 107 } 108 109 // Write body file. 110 const int bodyFilePathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictPath, 111 Ver4DictConstants::BODY_FILE_EXTENSION); 112 char bodyFilePath[bodyFilePathBufSize]; 113 FileUtils::getFilePathWithSuffix(dictPath, Ver4DictConstants::BODY_FILE_EXTENSION, 114 bodyFilePathBufSize, bodyFilePath); 115 116 const int fd = open(bodyFilePath, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); 117 if (fd == -1) { 118 AKLOGE("File %s cannot be opened. errno: %d", bodyFilePath, errno); 119 ASSERT(false); 120 return false; 121 } 122 FILE *const file = fdopen(fd, "wb"); 123 if (!file) { 124 AKLOGE("fdopen failed for the file %s. errno: %d", bodyFilePath, errno); 125 ASSERT(false); 126 return false; 127 } 128 129 if (!flushDictBuffers(file)) { 130 fclose(file); 131 return false; 132 } 133 fclose(file); 134 // Remove existing dictionary. 135 if (!FileUtils::removeDirAndFiles(dictDirPath)) { 136 AKLOGE("Existing directory %s cannot be removed.", dictDirPath); 137 ASSERT(false); 138 return false; 139 } 140 // Rename temporary directory. 141 if (rename(tmpDirPath, dictDirPath) != 0) { 142 AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath); 143 ASSERT(false); 144 return false; 145 } 146 return true; 147} 148 149bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const { 150 // Write trie. 151 if (!DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableTrieBuffer)) { 152 AKLOGE("Trie cannot be written."); 153 return false; 154 } 155 // Write terminal position lookup table. 156 if (!mTerminalPositionLookupTable.flushToFile(file)) { 157 AKLOGE("Terminal position lookup table cannot be written."); 158 return false; 159 } 160 // Write probability dict content. 161 if (!mProbabilityDictContent.flushToFile(file)) { 162 AKLOGE("Probability dict content cannot be written."); 163 return false; 164 } 165 // Write bigram dict content. 166 if (!mBigramDictContent.flushToFile(file)) { 167 AKLOGE("Bigram dict content cannot be written."); 168 return false; 169 } 170 // Write shortcut dict content. 171 if (!mShortcutDictContent.flushToFile(file)) { 172 AKLOGE("Shortcut dict content cannot be written."); 173 return false; 174 } 175 return true; 176} 177 178Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer, 179 MmappedBuffer::MmappedBufferPtr &&bodyBuffer, 180 const FormatUtils::FORMAT_VERSION formatVersion, 181 const std::vector<uint8_t *> &contentBuffers, const std::vector<int> &contentBufferSizes) 182 : mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)), 183 mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion), 184 mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(), 185 BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), 186 mExpandableTrieBuffer( 187 ReadWriteByteArrayView(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX], 188 contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX]), 189 BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), 190 mTerminalPositionLookupTable( 191 contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX], 192 contentBufferSizes[ 193 Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]), 194 mProbabilityDictContent( 195 contentBuffers[Ver4DictConstants::PROBABILITY_BUFFER_INDEX], 196 contentBufferSizes[Ver4DictConstants::PROBABILITY_BUFFER_INDEX], 197 mHeaderPolicy.hasHistoricalInfoOfWords()), 198 mLanguageModelDictContent(mHeaderPolicy.hasHistoricalInfoOfWords()), 199 mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX], 200 &contentBufferSizes[Ver4DictConstants::BIGRAM_BUFFERS_INDEX], 201 mHeaderPolicy.hasHistoricalInfoOfWords()), 202 mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX], 203 &contentBufferSizes[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]), 204 mIsUpdatable(mDictBuffer->isUpdatable()) {} 205 206Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize) 207 : mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy), 208 mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), 209 mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(), 210 mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()), 211 mLanguageModelDictContent(headerPolicy->hasHistoricalInfoOfWords()), 212 mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(), 213 mIsUpdatable(true) {} 214 215} // namespace latinime 216