file_system.h revision c9ed9bf846c6c8e8566082ce4ac201a529c23355
1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16#ifndef TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ 17#define TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ 18 19#include <stdint.h> 20#include <functional> 21#include <string> 22#include <unordered_map> 23#include <vector> 24#include "tensorflow/core/lib/core/errors.h" 25#include "tensorflow/core/lib/core/status.h" 26#include "tensorflow/core/lib/core/stringpiece.h" 27#include "tensorflow/core/platform/file_statistics.h" 28#include "tensorflow/core/platform/macros.h" 29#include "tensorflow/core/platform/platform.h" 30#include "tensorflow/core/platform/protobuf.h" 31#include "tensorflow/core/platform/types.h" 32 33#ifdef PLATFORM_WINDOWS 34#undef DeleteFile 35#endif 36 37namespace tensorflow { 38 39class RandomAccessFile; 40class ReadOnlyMemoryRegion; 41class WritableFile; 42 43/// A generic interface for accessing a file system. Implementations 44/// of custom filesystem adapters must implement this interface, 45/// RandomAccessFile, WritableFile, and ReadOnlyMemoryRegion classes. 46class FileSystem { 47 public: 48 /// \brief Creates a brand new random access read-only file with the 49 /// specified name. 50 /// 51 /// On success, stores a pointer to the new file in 52 /// *result and returns OK. On failure stores NULL in *result and 53 /// returns non-OK. If the file does not exist, returns a non-OK 54 /// status. 55 /// 56 /// The returned file may be concurrently accessed by multiple threads. 57 /// 58 /// The ownership of the returned RandomAccessFile is passed to the caller 59 /// and the object should be deleted when is not used. 60 virtual Status NewRandomAccessFile( 61 const string& fname, std::unique_ptr<RandomAccessFile>* result) = 0; 62 63 /// \brief Creates an object that writes to a new file with the specified 64 /// name. 65 /// 66 /// Deletes any existing file with the same name and creates a 67 /// new file. On success, stores a pointer to the new file in 68 /// *result and returns OK. On failure stores NULL in *result and 69 /// returns non-OK. 70 /// 71 /// The returned file will only be accessed by one thread at a time. 72 /// 73 /// The ownership of the returned WritableFile is passed to the caller 74 /// and the object should be deleted when is not used. 75 virtual Status NewWritableFile(const string& fname, 76 std::unique_ptr<WritableFile>* result) = 0; 77 78 /// \brief Creates an object that either appends to an existing file, or 79 /// writes to a new file (if the file does not exist to begin with). 80 /// 81 /// On success, stores a pointer to the new file in *result and 82 /// returns OK. On failure stores NULL in *result and returns 83 /// non-OK. 84 /// 85 /// The returned file will only be accessed by one thread at a time. 86 /// 87 /// The ownership of the returned WritableFile is passed to the caller 88 /// and the object should be deleted when is not used. 89 virtual Status NewAppendableFile(const string& fname, 90 std::unique_ptr<WritableFile>* result) = 0; 91 92 /// \brief Creates a readonly region of memory with the file context. 93 /// 94 /// On success, it returns a pointer to read-only memory region 95 /// from the content of file fname. The ownership of the region is passed to 96 /// the caller. On failure stores nullptr in *result and returns non-OK. 97 /// 98 /// The returned memory region can be accessed from many threads in parallel. 99 /// 100 /// The ownership of the returned ReadOnlyMemoryRegion is passed to the caller 101 /// and the object should be deleted when is not used. 102 virtual Status NewReadOnlyMemoryRegionFromFile( 103 const string& fname, std::unique_ptr<ReadOnlyMemoryRegion>* result) = 0; 104 105 /// Returns OK if the named path exists and NOT_FOUND otherwise. 106 virtual Status FileExists(const string& fname) = 0; 107 108 /// Returns true if all the listed files exist, false otherwise. 109 /// if status is not null, populate the vector with a detailed status 110 /// for each file. 111 virtual bool FilesExist(const std::vector<string>& files, 112 std::vector<Status>* status); 113 114 /// \brief Returns the immediate children in the given directory. 115 /// 116 /// The returned paths are relative to 'dir'. 117 virtual Status GetChildren(const string& dir, 118 std::vector<string>* result) = 0; 119 120 /// \brief Given a pattern, stores in *results the set of paths that matches 121 /// that pattern. *results is cleared. 122 /// 123 /// pattern must match all of a name, not just a substring. 124 /// 125 /// pattern: { term } 126 /// term: 127 /// '*': matches any sequence of non-'/' characters 128 /// '?': matches a single non-'/' character 129 /// '[' [ '^' ] { match-list } ']': 130 /// matches any single character (not) on the list 131 /// c: matches character c (c != '*', '?', '\\', '[') 132 /// '\\' c: matches character c 133 /// character-range: 134 /// c: matches character c (c != '\\', '-', ']') 135 /// '\\' c: matches character c 136 /// lo '-' hi: matches character c for lo <= c <= hi 137 /// 138 /// Typical return codes: 139 /// * OK - no errors 140 /// * UNIMPLEMENTED - Some underlying functions (like GetChildren) are not 141 /// implemented 142 /// The default implementation uses a combination of GetChildren, MatchPath 143 /// and IsDirectory. 144 virtual Status GetMatchingPaths(const string& pattern, 145 std::vector<string>* results); 146 147 /// \brief Obtains statistics for the given path. 148 virtual Status Stat(const string& fname, FileStatistics* stat) = 0; 149 150 /// \brief Deletes the named file. 151 virtual Status DeleteFile(const string& fname) = 0; 152 153 /// \brief Creates the specified directory. 154 /// Typical return codes: 155 /// * OK - successfully created the directory. 156 /// * ALREADY_EXISTS - directory with name dirname already exists. 157 /// * PERMISSION_DENIED - dirname is not writable. 158 virtual Status CreateDir(const string& dirname) = 0; 159 160 /// \brief Creates the specified directory and all the necessary 161 /// subdirectories. 162 /// Typical return codes: 163 /// * OK - successfully created the directory and sub directories, even if 164 /// they were already created. 165 /// * PERMISSION_DENIED - dirname or some subdirectory is not writable. 166 virtual Status RecursivelyCreateDir(const string& dirname); 167 168 /// \brief Deletes the specified directory. 169 virtual Status DeleteDir(const string& dirname) = 0; 170 171 /// \brief Deletes the specified directory and all subdirectories and files 172 /// underneath it. undeleted_files and undeleted_dirs stores the number of 173 /// files and directories that weren't deleted (unspecified if the return 174 /// status is not OK). 175 /// REQUIRES: undeleted_files, undeleted_dirs to be not null. 176 /// Typical return codes: 177 /// * OK - dirname exists and we were able to delete everything underneath. 178 /// * NOT_FOUND - dirname doesn't exist 179 /// * PERMISSION_DENIED - dirname or some descendant is not writable 180 /// * UNIMPLEMENTED - Some underlying functions (like Delete) are not 181 /// implemented 182 virtual Status DeleteRecursively(const string& dirname, 183 int64* undeleted_files, 184 int64* undeleted_dirs); 185 186 /// \brief Stores the size of `fname` in `*file_size`. 187 virtual Status GetFileSize(const string& fname, uint64* file_size) = 0; 188 189 /// \brief Overwrites the target if it exists. 190 virtual Status RenameFile(const string& src, const string& target) = 0; 191 192 /// \brief Translate an URI to a filename for the FileSystem implementation. 193 /// 194 /// The implementation in this class cleans up the path, removing 195 /// duplicate /'s, resolving .. and . (more details in 196 /// tensorflow::lib::io::CleanPath). 197 virtual string TranslateName(const string& name) const; 198 199 /// \brief Returns whether the given path is a directory or not. 200 /// 201 /// Typical return codes (not guaranteed exhaustive): 202 /// * OK - The path exists and is a directory. 203 /// * FAILED_PRECONDITION - The path exists and is not a directory. 204 /// * NOT_FOUND - The path entry does not exist. 205 /// * PERMISSION_DENIED - Insufficient permissions. 206 /// * UNIMPLEMENTED - The file factory doesn't support directories. 207 virtual Status IsDirectory(const string& fname); 208 209 /// \brief Flushes any cached filesystem objects from memory. 210 virtual void FlushCaches(); 211 212 FileSystem() {} 213 214 virtual ~FileSystem(); 215}; 216 217/// A file abstraction for randomly reading the contents of a file. 218class RandomAccessFile { 219 public: 220 RandomAccessFile() {} 221 virtual ~RandomAccessFile(); 222 223 /// \brief Reads up to `n` bytes from the file starting at `offset`. 224 /// 225 /// `scratch[0..n-1]` may be written by this routine. Sets `*result` 226 /// to the data that was read (including if fewer than `n` bytes were 227 /// successfully read). May set `*result` to point at data in 228 /// `scratch[0..n-1]`, so `scratch[0..n-1]` must be live when 229 /// `*result` is used. 230 /// 231 /// On OK returned status: `n` bytes have been stored in `*result`. 232 /// On non-OK returned status: `[0..n]` bytes have been stored in `*result`. 233 /// 234 /// Returns `OUT_OF_RANGE` if fewer than n bytes were stored in `*result` 235 /// because of EOF. 236 /// 237 /// Safe for concurrent use by multiple threads. 238 virtual Status Read(uint64 offset, size_t n, StringPiece* result, 239 char* scratch) const = 0; 240 241 private: 242 TF_DISALLOW_COPY_AND_ASSIGN(RandomAccessFile); 243}; 244 245/// \brief A file abstraction for sequential writing. 246/// 247/// The implementation must provide buffering since callers may append 248/// small fragments at a time to the file. 249class WritableFile { 250 public: 251 WritableFile() {} 252 virtual ~WritableFile(); 253 254 /// \brief Append 'data' to the file. 255 virtual Status Append(const StringPiece& data) = 0; 256 257 /// \brief Close the file. 258 /// 259 /// Flush() and de-allocate resources associated with this file 260 /// 261 /// Typical return codes (not guaranteed to be exhaustive): 262 /// * OK 263 /// * Other codes, as returned from Flush() 264 virtual Status Close() = 0; 265 266 /// \brief Flushes the file and optionally syncs contents to filesystem. 267 /// 268 /// This should flush any local buffers whose contents have not been 269 /// delivered to the filesystem. 270 /// 271 /// If the process terminates after a successful flush, the contents 272 /// may still be persisted, since the underlying filesystem may 273 /// eventually flush the contents. If the OS or machine crashes 274 /// after a successful flush, the contents may or may not be 275 /// persisted, depending on the implementation. 276 virtual Status Flush() = 0; 277 278 /// \brief Syncs contents of file to filesystem. 279 /// 280 /// This waits for confirmation from the filesystem that the contents 281 /// of the file have been persisted to the filesystem; if the OS 282 /// or machine crashes after a successful Sync, the contents should 283 /// be properly saved. 284 virtual Status Sync() = 0; 285 286 private: 287 TF_DISALLOW_COPY_AND_ASSIGN(WritableFile); 288}; 289 290/// \brief A readonly memmapped file abstraction. 291/// 292/// The implementation must guarantee that all memory is accessible when the 293/// object exists, independently from the Env that created it. 294class ReadOnlyMemoryRegion { 295 public: 296 ReadOnlyMemoryRegion() {} 297 virtual ~ReadOnlyMemoryRegion() = default; 298 299 /// \brief Returns a pointer to the memory region. 300 virtual const void* data() = 0; 301 302 /// \brief Returns the length of the memory region in bytes. 303 virtual uint64 length() = 0; 304}; 305 306// START_SKIP_DOXYGEN 307 308#ifndef SWIG 309// Degenerate file system that provides no implementations. 310class NullFileSystem : public FileSystem { 311 public: 312 NullFileSystem() {} 313 314 ~NullFileSystem() override = default; 315 316 Status NewRandomAccessFile( 317 const string& fname, std::unique_ptr<RandomAccessFile>* result) override { 318 return errors::Unimplemented("NewRandomAccessFile unimplemented"); 319 } 320 321 Status NewWritableFile(const string& fname, 322 std::unique_ptr<WritableFile>* result) override { 323 return errors::Unimplemented("NewWritableFile unimplemented"); 324 } 325 326 Status NewAppendableFile(const string& fname, 327 std::unique_ptr<WritableFile>* result) override { 328 return errors::Unimplemented("NewAppendableFile unimplemented"); 329 } 330 331 Status NewReadOnlyMemoryRegionFromFile( 332 const string& fname, 333 std::unique_ptr<ReadOnlyMemoryRegion>* result) override { 334 return errors::Unimplemented( 335 "NewReadOnlyMemoryRegionFromFile unimplemented"); 336 } 337 338 Status FileExists(const string& fname) override { 339 return errors::Unimplemented("FileExists unimplemented"); 340 } 341 342 Status GetChildren(const string& dir, std::vector<string>* result) override { 343 return errors::Unimplemented("GetChildren unimplemented"); 344 } 345 346 Status DeleteFile(const string& fname) override { 347 return errors::Unimplemented("DeleteFile unimplemented"); 348 } 349 350 Status CreateDir(const string& dirname) override { 351 return errors::Unimplemented("CreateDir unimplemented"); 352 } 353 354 Status DeleteDir(const string& dirname) override { 355 return errors::Unimplemented("DeleteDir unimplemented"); 356 } 357 358 Status GetFileSize(const string& fname, uint64* file_size) override { 359 return errors::Unimplemented("GetFileSize unimplemented"); 360 } 361 362 Status RenameFile(const string& src, const string& target) override { 363 return errors::Unimplemented("RenameFile unimplemented"); 364 } 365 366 Status Stat(const string& fname, FileStatistics* stat) override { 367 return errors::Unimplemented("Stat unimplemented"); 368 } 369}; 370#endif 371 372// END_SKIP_DOXYGEN 373 374/// \brief A registry for file system implementations. 375/// 376/// Filenames are specified as an URI, which is of the form 377/// [scheme://]<filename>. 378/// File system implementations are registered using the REGISTER_FILE_SYSTEM 379/// macro, providing the 'scheme' as the key. 380class FileSystemRegistry { 381 public: 382 typedef std::function<FileSystem*()> Factory; 383 384 virtual ~FileSystemRegistry(); 385 virtual Status Register(const string& scheme, Factory factory) = 0; 386 virtual FileSystem* Lookup(const string& scheme) = 0; 387 virtual Status GetRegisteredFileSystemSchemes( 388 std::vector<string>* schemes) = 0; 389}; 390 391} // namespace tensorflow 392 393#endif // TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ 394