file_system.h revision c9ed9bf846c6c8e8566082ce4ac201a529c23355
1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_
17#define TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_
18
19#include <stdint.h>
20#include <functional>
21#include <string>
22#include <unordered_map>
23#include <vector>
24#include "tensorflow/core/lib/core/errors.h"
25#include "tensorflow/core/lib/core/status.h"
26#include "tensorflow/core/lib/core/stringpiece.h"
27#include "tensorflow/core/platform/file_statistics.h"
28#include "tensorflow/core/platform/macros.h"
29#include "tensorflow/core/platform/platform.h"
30#include "tensorflow/core/platform/protobuf.h"
31#include "tensorflow/core/platform/types.h"
32
33#ifdef PLATFORM_WINDOWS
34#undef DeleteFile
35#endif
36
37namespace tensorflow {
38
39class RandomAccessFile;
40class ReadOnlyMemoryRegion;
41class WritableFile;
42
43/// A generic interface for accessing a file system.  Implementations
44/// of custom filesystem adapters must implement this interface,
45/// RandomAccessFile, WritableFile, and ReadOnlyMemoryRegion classes.
46class FileSystem {
47 public:
48  /// \brief Creates a brand new random access read-only file with the
49  /// specified name.
50  ///
51  /// On success, stores a pointer to the new file in
52  /// *result and returns OK.  On failure stores NULL in *result and
53  /// returns non-OK.  If the file does not exist, returns a non-OK
54  /// status.
55  ///
56  /// The returned file may be concurrently accessed by multiple threads.
57  ///
58  /// The ownership of the returned RandomAccessFile is passed to the caller
59  /// and the object should be deleted when is not used.
60  virtual Status NewRandomAccessFile(
61      const string& fname, std::unique_ptr<RandomAccessFile>* result) = 0;
62
63  /// \brief Creates an object that writes to a new file with the specified
64  /// name.
65  ///
66  /// Deletes any existing file with the same name and creates a
67  /// new file.  On success, stores a pointer to the new file in
68  /// *result and returns OK.  On failure stores NULL in *result and
69  /// returns non-OK.
70  ///
71  /// The returned file will only be accessed by one thread at a time.
72  ///
73  /// The ownership of the returned WritableFile is passed to the caller
74  /// and the object should be deleted when is not used.
75  virtual Status NewWritableFile(const string& fname,
76                                 std::unique_ptr<WritableFile>* result) = 0;
77
78  /// \brief Creates an object that either appends to an existing file, or
79  /// writes to a new file (if the file does not exist to begin with).
80  ///
81  /// On success, stores a pointer to the new file in *result and
82  /// returns OK.  On failure stores NULL in *result and returns
83  /// non-OK.
84  ///
85  /// The returned file will only be accessed by one thread at a time.
86  ///
87  /// The ownership of the returned WritableFile is passed to the caller
88  /// and the object should be deleted when is not used.
89  virtual Status NewAppendableFile(const string& fname,
90                                   std::unique_ptr<WritableFile>* result) = 0;
91
92  /// \brief Creates a readonly region of memory with the file context.
93  ///
94  /// On success, it returns a pointer to read-only memory region
95  /// from the content of file fname. The ownership of the region is passed to
96  /// the caller. On failure stores nullptr in *result and returns non-OK.
97  ///
98  /// The returned memory region can be accessed from many threads in parallel.
99  ///
100  /// The ownership of the returned ReadOnlyMemoryRegion is passed to the caller
101  /// and the object should be deleted when is not used.
102  virtual Status NewReadOnlyMemoryRegionFromFile(
103      const string& fname, std::unique_ptr<ReadOnlyMemoryRegion>* result) = 0;
104
105  /// Returns OK if the named path exists and NOT_FOUND otherwise.
106  virtual Status FileExists(const string& fname) = 0;
107
108  /// Returns true if all the listed files exist, false otherwise.
109  /// if status is not null, populate the vector with a detailed status
110  /// for each file.
111  virtual bool FilesExist(const std::vector<string>& files,
112                          std::vector<Status>* status);
113
114  /// \brief Returns the immediate children in the given directory.
115  ///
116  /// The returned paths are relative to 'dir'.
117  virtual Status GetChildren(const string& dir,
118                             std::vector<string>* result) = 0;
119
120  /// \brief Given a pattern, stores in *results the set of paths that matches
121  /// that pattern. *results is cleared.
122  ///
123  /// pattern must match all of a name, not just a substring.
124  ///
125  /// pattern: { term }
126  /// term:
127  ///   '*': matches any sequence of non-'/' characters
128  ///   '?': matches a single non-'/' character
129  ///   '[' [ '^' ] { match-list } ']':
130  ///        matches any single character (not) on the list
131  ///   c: matches character c (c != '*', '?', '\\', '[')
132  ///   '\\' c: matches character c
133  /// character-range:
134  ///   c: matches character c (c != '\\', '-', ']')
135  ///   '\\' c: matches character c
136  ///   lo '-' hi: matches character c for lo <= c <= hi
137  ///
138  /// Typical return codes:
139  ///  * OK - no errors
140  ///  * UNIMPLEMENTED - Some underlying functions (like GetChildren) are not
141  ///                    implemented
142  /// The default implementation uses a combination of GetChildren, MatchPath
143  /// and IsDirectory.
144  virtual Status GetMatchingPaths(const string& pattern,
145                                  std::vector<string>* results);
146
147  /// \brief Obtains statistics for the given path.
148  virtual Status Stat(const string& fname, FileStatistics* stat) = 0;
149
150  /// \brief Deletes the named file.
151  virtual Status DeleteFile(const string& fname) = 0;
152
153  /// \brief Creates the specified directory.
154  /// Typical return codes:
155  ///  * OK - successfully created the directory.
156  ///  * ALREADY_EXISTS - directory with name dirname already exists.
157  ///  * PERMISSION_DENIED - dirname is not writable.
158  virtual Status CreateDir(const string& dirname) = 0;
159
160  /// \brief Creates the specified directory and all the necessary
161  /// subdirectories.
162  /// Typical return codes:
163  ///  * OK - successfully created the directory and sub directories, even if
164  ///         they were already created.
165  ///  * PERMISSION_DENIED - dirname or some subdirectory is not writable.
166  virtual Status RecursivelyCreateDir(const string& dirname);
167
168  /// \brief Deletes the specified directory.
169  virtual Status DeleteDir(const string& dirname) = 0;
170
171  /// \brief Deletes the specified directory and all subdirectories and files
172  /// underneath it. undeleted_files and undeleted_dirs stores the number of
173  /// files and directories that weren't deleted (unspecified if the return
174  /// status is not OK).
175  /// REQUIRES: undeleted_files, undeleted_dirs to be not null.
176  /// Typical return codes:
177  ///  * OK - dirname exists and we were able to delete everything underneath.
178  ///  * NOT_FOUND - dirname doesn't exist
179  ///  * PERMISSION_DENIED - dirname or some descendant is not writable
180  ///  * UNIMPLEMENTED - Some underlying functions (like Delete) are not
181  ///                    implemented
182  virtual Status DeleteRecursively(const string& dirname,
183                                   int64* undeleted_files,
184                                   int64* undeleted_dirs);
185
186  /// \brief Stores the size of `fname` in `*file_size`.
187  virtual Status GetFileSize(const string& fname, uint64* file_size) = 0;
188
189  /// \brief Overwrites the target if it exists.
190  virtual Status RenameFile(const string& src, const string& target) = 0;
191
192  /// \brief Translate an URI to a filename for the FileSystem implementation.
193  ///
194  /// The implementation in this class cleans up the path, removing
195  /// duplicate /'s, resolving .. and . (more details in
196  /// tensorflow::lib::io::CleanPath).
197  virtual string TranslateName(const string& name) const;
198
199  /// \brief Returns whether the given path is a directory or not.
200  ///
201  /// Typical return codes (not guaranteed exhaustive):
202  ///  * OK - The path exists and is a directory.
203  ///  * FAILED_PRECONDITION - The path exists and is not a directory.
204  ///  * NOT_FOUND - The path entry does not exist.
205  ///  * PERMISSION_DENIED - Insufficient permissions.
206  ///  * UNIMPLEMENTED - The file factory doesn't support directories.
207  virtual Status IsDirectory(const string& fname);
208
209  /// \brief Flushes any cached filesystem objects from memory.
210  virtual void FlushCaches();
211
212  FileSystem() {}
213
214  virtual ~FileSystem();
215};
216
217/// A file abstraction for randomly reading the contents of a file.
218class RandomAccessFile {
219 public:
220  RandomAccessFile() {}
221  virtual ~RandomAccessFile();
222
223  /// \brief Reads up to `n` bytes from the file starting at `offset`.
224  ///
225  /// `scratch[0..n-1]` may be written by this routine.  Sets `*result`
226  /// to the data that was read (including if fewer than `n` bytes were
227  /// successfully read).  May set `*result` to point at data in
228  /// `scratch[0..n-1]`, so `scratch[0..n-1]` must be live when
229  /// `*result` is used.
230  ///
231  /// On OK returned status: `n` bytes have been stored in `*result`.
232  /// On non-OK returned status: `[0..n]` bytes have been stored in `*result`.
233  ///
234  /// Returns `OUT_OF_RANGE` if fewer than n bytes were stored in `*result`
235  /// because of EOF.
236  ///
237  /// Safe for concurrent use by multiple threads.
238  virtual Status Read(uint64 offset, size_t n, StringPiece* result,
239                      char* scratch) const = 0;
240
241 private:
242  TF_DISALLOW_COPY_AND_ASSIGN(RandomAccessFile);
243};
244
245/// \brief A file abstraction for sequential writing.
246///
247/// The implementation must provide buffering since callers may append
248/// small fragments at a time to the file.
249class WritableFile {
250 public:
251  WritableFile() {}
252  virtual ~WritableFile();
253
254  /// \brief Append 'data' to the file.
255  virtual Status Append(const StringPiece& data) = 0;
256
257  /// \brief Close the file.
258  ///
259  /// Flush() and de-allocate resources associated with this file
260  ///
261  /// Typical return codes (not guaranteed to be exhaustive):
262  ///  * OK
263  ///  * Other codes, as returned from Flush()
264  virtual Status Close() = 0;
265
266  /// \brief Flushes the file and optionally syncs contents to filesystem.
267  ///
268  /// This should flush any local buffers whose contents have not been
269  /// delivered to the filesystem.
270  ///
271  /// If the process terminates after a successful flush, the contents
272  /// may still be persisted, since the underlying filesystem may
273  /// eventually flush the contents.  If the OS or machine crashes
274  /// after a successful flush, the contents may or may not be
275  /// persisted, depending on the implementation.
276  virtual Status Flush() = 0;
277
278  /// \brief Syncs contents of file to filesystem.
279  ///
280  /// This waits for confirmation from the filesystem that the contents
281  /// of the file have been persisted to the filesystem; if the OS
282  /// or machine crashes after a successful Sync, the contents should
283  /// be properly saved.
284  virtual Status Sync() = 0;
285
286 private:
287  TF_DISALLOW_COPY_AND_ASSIGN(WritableFile);
288};
289
290/// \brief A readonly memmapped file abstraction.
291///
292/// The implementation must guarantee that all memory is accessible when the
293/// object exists, independently from the Env that created it.
294class ReadOnlyMemoryRegion {
295 public:
296  ReadOnlyMemoryRegion() {}
297  virtual ~ReadOnlyMemoryRegion() = default;
298
299  /// \brief Returns a pointer to the memory region.
300  virtual const void* data() = 0;
301
302  /// \brief Returns the length of the memory region in bytes.
303  virtual uint64 length() = 0;
304};
305
306// START_SKIP_DOXYGEN
307
308#ifndef SWIG
309// Degenerate file system that provides no implementations.
310class NullFileSystem : public FileSystem {
311 public:
312  NullFileSystem() {}
313
314  ~NullFileSystem() override = default;
315
316  Status NewRandomAccessFile(
317      const string& fname, std::unique_ptr<RandomAccessFile>* result) override {
318    return errors::Unimplemented("NewRandomAccessFile unimplemented");
319  }
320
321  Status NewWritableFile(const string& fname,
322                         std::unique_ptr<WritableFile>* result) override {
323    return errors::Unimplemented("NewWritableFile unimplemented");
324  }
325
326  Status NewAppendableFile(const string& fname,
327                           std::unique_ptr<WritableFile>* result) override {
328    return errors::Unimplemented("NewAppendableFile unimplemented");
329  }
330
331  Status NewReadOnlyMemoryRegionFromFile(
332      const string& fname,
333      std::unique_ptr<ReadOnlyMemoryRegion>* result) override {
334    return errors::Unimplemented(
335        "NewReadOnlyMemoryRegionFromFile unimplemented");
336  }
337
338  Status FileExists(const string& fname) override {
339    return errors::Unimplemented("FileExists unimplemented");
340  }
341
342  Status GetChildren(const string& dir, std::vector<string>* result) override {
343    return errors::Unimplemented("GetChildren unimplemented");
344  }
345
346  Status DeleteFile(const string& fname) override {
347    return errors::Unimplemented("DeleteFile unimplemented");
348  }
349
350  Status CreateDir(const string& dirname) override {
351    return errors::Unimplemented("CreateDir unimplemented");
352  }
353
354  Status DeleteDir(const string& dirname) override {
355    return errors::Unimplemented("DeleteDir unimplemented");
356  }
357
358  Status GetFileSize(const string& fname, uint64* file_size) override {
359    return errors::Unimplemented("GetFileSize unimplemented");
360  }
361
362  Status RenameFile(const string& src, const string& target) override {
363    return errors::Unimplemented("RenameFile unimplemented");
364  }
365
366  Status Stat(const string& fname, FileStatistics* stat) override {
367    return errors::Unimplemented("Stat unimplemented");
368  }
369};
370#endif
371
372// END_SKIP_DOXYGEN
373
374/// \brief A registry for file system implementations.
375///
376/// Filenames are specified as an URI, which is of the form
377/// [scheme://]<filename>.
378/// File system implementations are registered using the REGISTER_FILE_SYSTEM
379/// macro, providing the 'scheme' as the key.
380class FileSystemRegistry {
381 public:
382  typedef std::function<FileSystem*()> Factory;
383
384  virtual ~FileSystemRegistry();
385  virtual Status Register(const string& scheme, Factory factory) = 0;
386  virtual FileSystem* Lookup(const string& scheme) = 0;
387  virtual Status GetRegisteredFileSystemSchemes(
388      std::vector<string>* schemes) = 0;
389};
390
391}  // namespace tensorflow
392
393#endif  // TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_
394