1/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15#include "tensorflow/core/util/memmapped_file_system.h"
16
17#include "tensorflow/core/lib/core/errors.h"
18#include "tensorflow/core/platform/protobuf.h"
19#include "tensorflow/core/util/memmapped_file_system.pb.h"
20
21namespace tensorflow {
22
23namespace {
24
25uint64 DecodeUint64LittleEndian(const uint8* buffer) {
26  uint64 result = 0;
27  for (int i = 0; i < static_cast<int>(sizeof(uint64)); ++i) {
28    result |= static_cast<uint64>(buffer[i]) << (8 * i);
29  }
30  return result;
31}
32
33}  // namespace
34
35namespace {
36
37class ReadOnlyMemoryRegionFromMemmapped : public ReadOnlyMemoryRegion {
38 public:
39  ReadOnlyMemoryRegionFromMemmapped(const void* data, uint64 length)
40      : data_(data), length_(length) {}
41  ~ReadOnlyMemoryRegionFromMemmapped() override = default;
42  const void* data() override { return data_; }
43  uint64 length() override { return length_; }
44
45 private:
46  const void* const data_;
47  const uint64 length_;
48  // intentionally copyable
49};
50
51class RandomAccessFileFromMemmapped : public RandomAccessFile {
52 public:
53  RandomAccessFileFromMemmapped(const void* data, uint64 length)
54      : data_(data), length_(length) {}
55
56  ~RandomAccessFileFromMemmapped() override = default;
57
58  Status Read(uint64 offset, size_t to_read, StringPiece* result,
59              char* scratch) const override {
60    if (offset >= length_) {
61      *result = StringPiece(scratch, 0);
62      return Status(error::OUT_OF_RANGE, "Read after file end");
63    }
64    const uint64 region_left =
65        std::min(length_ - offset, static_cast<uint64>(to_read));
66    *result =
67        StringPiece(reinterpret_cast<const char*>(data_) + offset, region_left);
68    return (region_left == to_read)
69               ? Status::OK()
70               : Status(error::OUT_OF_RANGE, "Read less bytes than requested");
71  }
72
73 private:
74  const void* const data_;
75  const uint64 length_;
76  // intentionally copyable
77};
78
79}  // namespace
80
81MemmappedFileSystem::MemmappedFileSystem() {}
82
83Status MemmappedFileSystem::FileExists(const string& fname) {
84  if (!mapped_memory_) {
85    return errors::FailedPrecondition("MemmappedEnv is not initialized");
86  }
87  const auto dir_element = directory_.find(fname);
88  if (dir_element != directory_.end()) {
89    return Status::OK();
90  }
91  return errors::NotFound(fname, " not found");
92}
93
94Status MemmappedFileSystem::NewRandomAccessFile(
95    const string& filename, std::unique_ptr<RandomAccessFile>* result) {
96  if (!mapped_memory_) {
97    return errors::FailedPrecondition("MemmappedEnv is not initialized");
98  }
99  const auto dir_element = directory_.find(filename);
100  if (dir_element == directory_.end()) {
101    return errors::NotFound("Region ", filename, " is not found");
102  }
103  result->reset(new RandomAccessFileFromMemmapped(
104      GetMemoryWithOffset(dir_element->second.offset),
105      dir_element->second.length));
106  return Status::OK();
107}
108
109Status MemmappedFileSystem::NewReadOnlyMemoryRegionFromFile(
110    const string& filename, std::unique_ptr<ReadOnlyMemoryRegion>* result) {
111  if (!mapped_memory_) {
112    return errors::FailedPrecondition("MemmappedEnv is not initialized");
113  }
114  const auto dir_element = directory_.find(filename);
115  if (dir_element == directory_.end()) {
116    return errors::NotFound("Region ", filename, " is not found");
117  }
118  result->reset(new ReadOnlyMemoryRegionFromMemmapped(
119      GetMemoryWithOffset(dir_element->second.offset),
120      dir_element->second.length));
121  return Status::OK();
122}
123
124Status MemmappedFileSystem::GetFileSize(const string& filename, uint64* size) {
125  if (!mapped_memory_) {
126    return errors::FailedPrecondition("MemmappedEnv is not initialized");
127  }
128  const auto dir_element = directory_.find(filename);
129  if (dir_element == directory_.end()) {
130    return errors::NotFound("Region ", filename, " is not found");
131  }
132  *size = dir_element->second.length;
133  return Status::OK();
134}
135
136Status MemmappedFileSystem::Stat(const string& fname, FileStatistics* stat) {
137  uint64 size;
138  auto status = GetFileSize(fname, &size);
139  if (status.ok()) {
140    stat->length = size;
141  }
142  return status;
143}
144
145Status MemmappedFileSystem::NewWritableFile(const string& filename,
146                                            std::unique_ptr<WritableFile>* wf) {
147  return errors::Unimplemented("memmapped format doesn't support writing");
148}
149
150Status MemmappedFileSystem::NewAppendableFile(
151    const string& filename, std::unique_ptr<WritableFile>* result) {
152  return errors::Unimplemented("memmapped format doesn't support writing");
153}
154
155Status MemmappedFileSystem::GetChildren(const string& filename,
156                                        std::vector<string>* strings) {
157  return errors::Unimplemented("memmapped format doesn't support GetChildren");
158}
159
160Status MemmappedFileSystem::DeleteFile(const string& filename) {
161  return errors::Unimplemented("memmapped format doesn't support DeleteFile");
162}
163
164Status MemmappedFileSystem::CreateDir(const string& dirname) {
165  return errors::Unimplemented("memmapped format doesn't support CreateDir");
166}
167
168Status MemmappedFileSystem::DeleteDir(const string& dirname) {
169  return errors::Unimplemented("memmapped format doesn't support DeleteDir");
170}
171
172Status MemmappedFileSystem::RenameFile(const string& filename_from,
173                                       const string& filename_to) {
174  return errors::Unimplemented("memmapped format doesn't support RenameFile");
175}
176
177const void* MemmappedFileSystem::GetMemoryWithOffset(uint64 offset) const {
178  return reinterpret_cast<const uint8*>(mapped_memory_->data()) + offset;
179}
180
181#if defined(COMPILER_MSVC)
182constexpr char* MemmappedFileSystem::kMemmappedPackagePrefix;
183constexpr char* MemmappedFileSystem::kMemmappedPackageDefaultGraphDef;
184#else
185constexpr char MemmappedFileSystem::kMemmappedPackagePrefix[];
186constexpr char MemmappedFileSystem::kMemmappedPackageDefaultGraphDef[];
187#endif
188
189Status MemmappedFileSystem::InitializeFromFile(Env* env,
190                                               const string& filename) {
191  TF_RETURN_IF_ERROR(
192      env->NewReadOnlyMemoryRegionFromFile(filename, &mapped_memory_));
193  directory_.clear();
194  if (mapped_memory_->length() <= sizeof(uint64)) {
195    return errors::DataLoss("Corrupted memmapped model file: ", filename,
196                            " Invalid package size");
197  }
198  const auto memory_start =
199      reinterpret_cast<const uint8*>(mapped_memory_->data());
200  const uint64 directory_offset = DecodeUint64LittleEndian(
201      memory_start + mapped_memory_->length() - sizeof(uint64));
202  if (directory_offset > mapped_memory_->length() - sizeof(uint64)) {
203    return errors::DataLoss("Corrupted memmapped model file: ", filename,
204                            " Invalid directory offset");
205  }
206  MemmappedFileSystemDirectory proto_directory;
207  if (!ParseProtoUnlimited(
208          &proto_directory, memory_start + directory_offset,
209          mapped_memory_->length() - directory_offset - sizeof(uint64))) {
210    return errors::DataLoss("Corrupted memmapped model file: ", filename,
211                            " Can't parse its internal directory");
212  }
213
214  // Iterating in reverse order to get lengths of elements;
215  uint64 prev_element_offset = directory_offset;
216  for (auto element_iter = proto_directory.element().rbegin();
217       element_iter != proto_directory.element().rend(); ++element_iter) {
218    // Check that the element offset is in the right range.
219    if (element_iter->offset() >= prev_element_offset) {
220      return errors::DataLoss("Corrupted memmapped model file: ", filename,
221                              " Invalid offset of internal component");
222    }
223    if (!directory_
224             .insert(std::make_pair(
225                 element_iter->name(),
226                 FileRegion(element_iter->offset(),
227                            prev_element_offset - element_iter->offset())))
228             .second) {
229      return errors::DataLoss("Corrupted memmapped model file: ", filename,
230                              " Duplicate name of internal component ",
231                              element_iter->name());
232    }
233    prev_element_offset = element_iter->offset();
234  }
235  return Status::OK();
236}
237
238bool MemmappedFileSystem::IsMemmappedPackageFilename(const string& filename) {
239  return StringPiece(filename).starts_with(kMemmappedPackagePrefix);
240}
241
242namespace {
243bool IsValidRegionChar(char c) {
244  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
245         (c >= '0' && c <= '9') || c == '_' || c == '.';
246}
247}  // namespace
248
249bool MemmappedFileSystem::IsWellFormedMemmappedPackageFilename(
250    const string& filename) {
251  if (!IsMemmappedPackageFilename(filename)) {
252    return false;
253  }
254  for (char c :
255       filename.substr(strlen(kMemmappedPackagePrefix),
256                       filename.length() - strlen(kMemmappedPackagePrefix))) {
257    if (!IsValidRegionChar(c)) {
258      return false;
259    }
260  }
261  return true;
262}
263
264MemmappedEnv::MemmappedEnv(Env* env) : EnvWrapper(env) {}
265
266Status MemmappedEnv::GetFileSystemForFile(const string& fname,
267                                          FileSystem** result) {
268  if (MemmappedFileSystem::IsMemmappedPackageFilename(fname)) {
269    if (!memmapped_file_system_) {
270      return errors::FailedPrecondition(
271          "MemmappedEnv is not initialized from a file.");
272    }
273    *result = memmapped_file_system_.get();
274    return Status::OK();
275  }
276  return EnvWrapper::GetFileSystemForFile(fname, result);
277}
278
279Status MemmappedEnv::GetRegisteredFileSystemSchemes(
280    std::vector<string>* schemes) {
281  const auto status = EnvWrapper::GetRegisteredFileSystemSchemes(schemes);
282  if (status.ok()) {
283    schemes->emplace_back(MemmappedFileSystem::kMemmappedPackagePrefix);
284  }
285  return status;
286}
287
288Status MemmappedEnv::InitializeFromFile(const string& package_filename) {
289  std::unique_ptr<MemmappedFileSystem> file_system_ptr(new MemmappedFileSystem);
290  const auto status =
291      file_system_ptr->InitializeFromFile(target(), package_filename);
292  if (status.ok()) {
293    memmapped_file_system_ = std::move(file_system_ptr);
294  }
295  return status;
296}
297
298}  // namespace tensorflow
299