FileManager.cpp revision 5ba0559f2f0ddd62de16547af514dbaf93b79585
1//===--- FileManager.cpp - File System Probing and Caching ----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the FileManager interface.
11//
12//===----------------------------------------------------------------------===//
13//
14// TODO: This should index all interesting directories with dirent calls.
15//  getdirentries ?
16//  opendir/readdir_r/closedir ?
17//
18//===----------------------------------------------------------------------===//
19
20#include "clang/Basic/FileManager.h"
21#include "clang/Basic/FileSystemStatCache.h"
22#include "llvm/ADT/SmallString.h"
23#include "llvm/Support/FileSystem.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/raw_ostream.h"
26#include "llvm/Support/Path.h"
27#include "llvm/Support/system_error.h"
28#include "llvm/Config/llvm-config.h"
29#include <map>
30#include <set>
31#include <string>
32
33// FIXME: This is terrible, we need this for ::close.
34#if !defined(_MSC_VER) && !defined(__MINGW32__)
35#include <unistd.h>
36#include <sys/uio.h>
37#else
38#include <io.h>
39#endif
40using namespace clang;
41
42// FIXME: Enhance libsystem to support inode and other fields.
43#include <sys/stat.h>
44
45/// NON_EXISTENT_DIR - A special value distinct from null that is used to
46/// represent a dir name that doesn't exist on the disk.
47#define NON_EXISTENT_DIR reinterpret_cast<DirectoryEntry*>((intptr_t)-1)
48
49/// NON_EXISTENT_FILE - A special value distinct from null that is used to
50/// represent a filename that doesn't exist on the disk.
51#define NON_EXISTENT_FILE reinterpret_cast<FileEntry*>((intptr_t)-1)
52
53
54FileEntry::~FileEntry() {
55  // If this FileEntry owns an open file descriptor that never got used, close
56  // it.
57  if (FD != -1) ::close(FD);
58}
59
60//===----------------------------------------------------------------------===//
61// Windows.
62//===----------------------------------------------------------------------===//
63
64#ifdef LLVM_ON_WIN32
65
66namespace {
67  static std::string GetFullPath(const char *relPath) {
68    char *absPathStrPtr = _fullpath(NULL, relPath, 0);
69    assert(absPathStrPtr && "_fullpath() returned NULL!");
70
71    std::string absPath(absPathStrPtr);
72
73    free(absPathStrPtr);
74    return absPath;
75  }
76}
77
78class FileManager::UniqueDirContainer {
79  /// UniqueDirs - Cache from full path to existing directories/files.
80  ///
81  llvm::StringMap<DirectoryEntry> UniqueDirs;
82
83public:
84  /// getDirectory - Return an existing DirectoryEntry with the given
85  /// name if there is already one; otherwise create and return a
86  /// default-constructed DirectoryEntry.
87  DirectoryEntry &getDirectory(const char *Name,
88                               const struct stat & /*StatBuf*/) {
89    std::string FullPath(GetFullPath(Name));
90    return UniqueDirs.GetOrCreateValue(FullPath).getValue();
91  }
92
93  size_t size() const { return UniqueDirs.size(); }
94};
95
96class FileManager::UniqueFileContainer {
97  /// UniqueFiles - Cache from full path to existing directories/files.
98  ///
99  llvm::StringMap<FileEntry, llvm::BumpPtrAllocator> UniqueFiles;
100
101public:
102  /// getFile - Return an existing FileEntry with the given name if
103  /// there is already one; otherwise create and return a
104  /// default-constructed FileEntry.
105  FileEntry &getFile(const char *Name, const struct stat & /*StatBuf*/) {
106    std::string FullPath(GetFullPath(Name));
107
108    // Lowercase string because Windows filesystem is case insensitive.
109    FullPath = StringRef(FullPath).lower();
110    return UniqueFiles.GetOrCreateValue(FullPath).getValue();
111  }
112
113  size_t size() const { return UniqueFiles.size(); }
114
115  void erase(const FileEntry *Entry) { UniqueFiles.erase(Entry->getName()); }
116};
117
118//===----------------------------------------------------------------------===//
119// Unix-like Systems.
120//===----------------------------------------------------------------------===//
121
122#else
123
124class FileManager::UniqueDirContainer {
125  /// UniqueDirs - Cache from ID's to existing directories/files.
126  std::map<std::pair<dev_t, ino_t>, DirectoryEntry> UniqueDirs;
127
128public:
129  /// getDirectory - Return an existing DirectoryEntry with the given
130  /// ID's if there is already one; otherwise create and return a
131  /// default-constructed DirectoryEntry.
132  DirectoryEntry &getDirectory(const char * /*Name*/,
133                               const struct stat &StatBuf) {
134    return UniqueDirs[std::make_pair(StatBuf.st_dev, StatBuf.st_ino)];
135  }
136
137  size_t size() const { return UniqueDirs.size(); }
138};
139
140class FileManager::UniqueFileContainer {
141  /// UniqueFiles - Cache from ID's to existing directories/files.
142  std::set<FileEntry> UniqueFiles;
143
144public:
145  /// getFile - Return an existing FileEntry with the given ID's if
146  /// there is already one; otherwise create and return a
147  /// default-constructed FileEntry.
148  FileEntry &getFile(const char * /*Name*/, const struct stat &StatBuf) {
149    return
150      const_cast<FileEntry&>(
151                    *UniqueFiles.insert(FileEntry(StatBuf.st_dev,
152                                                  StatBuf.st_ino,
153                                                  StatBuf.st_mode)).first);
154  }
155
156  size_t size() const { return UniqueFiles.size(); }
157
158  void erase(const FileEntry *Entry) { UniqueFiles.erase(*Entry); }
159};
160
161#endif
162
163//===----------------------------------------------------------------------===//
164// Common logic.
165//===----------------------------------------------------------------------===//
166
167FileManager::FileManager(const FileSystemOptions &FSO)
168  : FileSystemOpts(FSO),
169    UniqueRealDirs(*new UniqueDirContainer()),
170    UniqueRealFiles(*new UniqueFileContainer()),
171    SeenDirEntries(64), SeenFileEntries(64), NextFileUID(0) {
172  NumDirLookups = NumFileLookups = 0;
173  NumDirCacheMisses = NumFileCacheMisses = 0;
174}
175
176FileManager::~FileManager() {
177  delete &UniqueRealDirs;
178  delete &UniqueRealFiles;
179  for (unsigned i = 0, e = VirtualFileEntries.size(); i != e; ++i)
180    delete VirtualFileEntries[i];
181  for (unsigned i = 0, e = VirtualDirectoryEntries.size(); i != e; ++i)
182    delete VirtualDirectoryEntries[i];
183}
184
185void FileManager::addStatCache(FileSystemStatCache *statCache,
186                               bool AtBeginning) {
187  assert(statCache && "No stat cache provided?");
188  if (AtBeginning || StatCache.get() == 0) {
189    statCache->setNextStatCache(StatCache.take());
190    StatCache.reset(statCache);
191    return;
192  }
193
194  FileSystemStatCache *LastCache = StatCache.get();
195  while (LastCache->getNextStatCache())
196    LastCache = LastCache->getNextStatCache();
197
198  LastCache->setNextStatCache(statCache);
199}
200
201void FileManager::removeStatCache(FileSystemStatCache *statCache) {
202  if (!statCache)
203    return;
204
205  if (StatCache.get() == statCache) {
206    // This is the first stat cache.
207    StatCache.reset(StatCache->takeNextStatCache());
208    return;
209  }
210
211  // Find the stat cache in the list.
212  FileSystemStatCache *PrevCache = StatCache.get();
213  while (PrevCache && PrevCache->getNextStatCache() != statCache)
214    PrevCache = PrevCache->getNextStatCache();
215
216  assert(PrevCache && "Stat cache not found for removal");
217  PrevCache->setNextStatCache(statCache->getNextStatCache());
218}
219
220/// \brief Retrieve the directory that the given file name resides in.
221/// Filename can point to either a real file or a virtual file.
222static const DirectoryEntry *getDirectoryFromFile(FileManager &FileMgr,
223                                                  StringRef Filename,
224                                                  bool CacheFailure) {
225  if (Filename.empty())
226    return NULL;
227
228  if (llvm::sys::path::is_separator(Filename[Filename.size() - 1]))
229    return NULL;  // If Filename is a directory.
230
231  StringRef DirName = llvm::sys::path::parent_path(Filename);
232  // Use the current directory if file has no path component.
233  if (DirName.empty())
234    DirName = ".";
235
236  return FileMgr.getDirectory(DirName, CacheFailure);
237}
238
239/// Add all ancestors of the given path (pointing to either a file or
240/// a directory) as virtual directories.
241void FileManager::addAncestorsAsVirtualDirs(StringRef Path) {
242  StringRef DirName = llvm::sys::path::parent_path(Path);
243  if (DirName.empty())
244    return;
245
246  llvm::StringMapEntry<DirectoryEntry *> &NamedDirEnt =
247    SeenDirEntries.GetOrCreateValue(DirName);
248
249  // When caching a virtual directory, we always cache its ancestors
250  // at the same time.  Therefore, if DirName is already in the cache,
251  // we don't need to recurse as its ancestors must also already be in
252  // the cache.
253  if (NamedDirEnt.getValue())
254    return;
255
256  // Add the virtual directory to the cache.
257  DirectoryEntry *UDE = new DirectoryEntry;
258  UDE->Name = NamedDirEnt.getKeyData();
259  NamedDirEnt.setValue(UDE);
260  VirtualDirectoryEntries.push_back(UDE);
261
262  // Recursively add the other ancestors.
263  addAncestorsAsVirtualDirs(DirName);
264}
265
266const DirectoryEntry *FileManager::getDirectory(StringRef DirName,
267                                                bool CacheFailure) {
268  // stat doesn't like trailing separators except for root directory.
269  // At least, on Win32 MSVCRT, stat() cannot strip trailing '/'.
270  // (though it can strip '\\')
271  if (DirName.size() > 1 &&
272      DirName != llvm::sys::path::root_path(DirName) &&
273      llvm::sys::path::is_separator(DirName.back()))
274    DirName = DirName.substr(0, DirName.size()-1);
275
276  ++NumDirLookups;
277  llvm::StringMapEntry<DirectoryEntry *> &NamedDirEnt =
278    SeenDirEntries.GetOrCreateValue(DirName);
279
280  // See if there was already an entry in the map.  Note that the map
281  // contains both virtual and real directories.
282  if (NamedDirEnt.getValue())
283    return NamedDirEnt.getValue() == NON_EXISTENT_DIR
284              ? 0 : NamedDirEnt.getValue();
285
286  ++NumDirCacheMisses;
287
288  // By default, initialize it to invalid.
289  NamedDirEnt.setValue(NON_EXISTENT_DIR);
290
291  // Get the null-terminated directory name as stored as the key of the
292  // SeenDirEntries map.
293  const char *InterndDirName = NamedDirEnt.getKeyData();
294
295  // Check to see if the directory exists.
296  struct stat StatBuf;
297  if (getStatValue(InterndDirName, StatBuf, 0/*directory lookup*/)) {
298    // There's no real directory at the given path.
299    if (!CacheFailure)
300      SeenDirEntries.erase(DirName);
301    return 0;
302  }
303
304  // It exists.  See if we have already opened a directory with the
305  // same inode (this occurs on Unix-like systems when one dir is
306  // symlinked to another, for example) or the same path (on
307  // Windows).
308  DirectoryEntry &UDE = UniqueRealDirs.getDirectory(InterndDirName, StatBuf);
309
310  NamedDirEnt.setValue(&UDE);
311  if (!UDE.getName()) {
312    // We don't have this directory yet, add it.  We use the string
313    // key from the SeenDirEntries map as the string.
314    UDE.Name  = InterndDirName;
315  }
316
317  return &UDE;
318}
319
320const FileEntry *FileManager::getFile(StringRef Filename, bool openFile,
321                                      bool CacheFailure) {
322  ++NumFileLookups;
323
324  // See if there is already an entry in the map.
325  llvm::StringMapEntry<FileEntry *> &NamedFileEnt =
326    SeenFileEntries.GetOrCreateValue(Filename);
327
328  // See if there is already an entry in the map.
329  if (NamedFileEnt.getValue())
330    return NamedFileEnt.getValue() == NON_EXISTENT_FILE
331                 ? 0 : NamedFileEnt.getValue();
332
333  ++NumFileCacheMisses;
334
335  // By default, initialize it to invalid.
336  NamedFileEnt.setValue(NON_EXISTENT_FILE);
337
338  // Get the null-terminated file name as stored as the key of the
339  // SeenFileEntries map.
340  const char *InterndFileName = NamedFileEnt.getKeyData();
341
342  // Look up the directory for the file.  When looking up something like
343  // sys/foo.h we'll discover all of the search directories that have a 'sys'
344  // subdirectory.  This will let us avoid having to waste time on known-to-fail
345  // searches when we go to find sys/bar.h, because all the search directories
346  // without a 'sys' subdir will get a cached failure result.
347  const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename,
348                                                       CacheFailure);
349  if (DirInfo == 0) {  // Directory doesn't exist, file can't exist.
350    if (!CacheFailure)
351      SeenFileEntries.erase(Filename);
352
353    return 0;
354  }
355
356  // FIXME: Use the directory info to prune this, before doing the stat syscall.
357  // FIXME: This will reduce the # syscalls.
358
359  // Nope, there isn't.  Check to see if the file exists.
360  int FileDescriptor = -1;
361  struct stat StatBuf;
362  if (getStatValue(InterndFileName, StatBuf, &FileDescriptor)) {
363    // There's no real file at the given path.
364    if (!CacheFailure)
365      SeenFileEntries.erase(Filename);
366
367    return 0;
368  }
369
370  if (FileDescriptor != -1 && !openFile) {
371    close(FileDescriptor);
372    FileDescriptor = -1;
373  }
374
375  // It exists.  See if we have already opened a file with the same inode.
376  // This occurs when one dir is symlinked to another, for example.
377  FileEntry &UFE = UniqueRealFiles.getFile(InterndFileName, StatBuf);
378
379  NamedFileEnt.setValue(&UFE);
380  if (UFE.getName()) { // Already have an entry with this inode, return it.
381    // If the stat process opened the file, close it to avoid a FD leak.
382    if (FileDescriptor != -1)
383      close(FileDescriptor);
384
385    return &UFE;
386  }
387
388  // Otherwise, we don't have this directory yet, add it.
389  // FIXME: Change the name to be a char* that points back to the
390  // 'SeenFileEntries' key.
391  UFE.Name    = InterndFileName;
392  UFE.Size    = StatBuf.st_size;
393  UFE.ModTime = StatBuf.st_mtime;
394  UFE.Dir     = DirInfo;
395  UFE.UID     = NextFileUID++;
396  UFE.FD      = FileDescriptor;
397  return &UFE;
398}
399
400const FileEntry *
401FileManager::getVirtualFile(StringRef Filename, off_t Size,
402                            time_t ModificationTime) {
403  ++NumFileLookups;
404
405  // See if there is already an entry in the map.
406  llvm::StringMapEntry<FileEntry *> &NamedFileEnt =
407    SeenFileEntries.GetOrCreateValue(Filename);
408
409  // See if there is already an entry in the map.
410  if (NamedFileEnt.getValue() && NamedFileEnt.getValue() != NON_EXISTENT_FILE)
411    return NamedFileEnt.getValue();
412
413  ++NumFileCacheMisses;
414
415  // By default, initialize it to invalid.
416  NamedFileEnt.setValue(NON_EXISTENT_FILE);
417
418  addAncestorsAsVirtualDirs(Filename);
419  FileEntry *UFE = 0;
420
421  // Now that all ancestors of Filename are in the cache, the
422  // following call is guaranteed to find the DirectoryEntry from the
423  // cache.
424  const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename,
425                                                       /*CacheFailure=*/true);
426  assert(DirInfo &&
427         "The directory of a virtual file should already be in the cache.");
428
429  // Check to see if the file exists. If so, drop the virtual file
430  int FileDescriptor = -1;
431  struct stat StatBuf;
432  const char *InterndFileName = NamedFileEnt.getKeyData();
433  if (getStatValue(InterndFileName, StatBuf, &FileDescriptor) == 0) {
434    // If the stat process opened the file, close it to avoid a FD leak.
435    if (FileDescriptor != -1)
436      close(FileDescriptor);
437
438    StatBuf.st_size = Size;
439    StatBuf.st_mtime = ModificationTime;
440    UFE = &UniqueRealFiles.getFile(InterndFileName, StatBuf);
441
442    NamedFileEnt.setValue(UFE);
443
444    // If we had already opened this file, close it now so we don't
445    // leak the descriptor. We're not going to use the file
446    // descriptor anyway, since this is a virtual file.
447    if (UFE->FD != -1) {
448      close(UFE->FD);
449      UFE->FD = -1;
450    }
451
452    // If we already have an entry with this inode, return it.
453    if (UFE->getName())
454      return UFE;
455  }
456
457  if (!UFE) {
458    UFE = new FileEntry();
459    VirtualFileEntries.push_back(UFE);
460    NamedFileEnt.setValue(UFE);
461  }
462
463  UFE->Name    = InterndFileName;
464  UFE->Size    = Size;
465  UFE->ModTime = ModificationTime;
466  UFE->Dir     = DirInfo;
467  UFE->UID     = NextFileUID++;
468  UFE->FD      = -1;
469  return UFE;
470}
471
472void FileManager::FixupRelativePath(SmallVectorImpl<char> &path) const {
473  StringRef pathRef(path.data(), path.size());
474
475  if (FileSystemOpts.WorkingDir.empty()
476      || llvm::sys::path::is_absolute(pathRef))
477    return;
478
479  SmallString<128> NewPath(FileSystemOpts.WorkingDir);
480  llvm::sys::path::append(NewPath, pathRef);
481  path = NewPath;
482}
483
484llvm::MemoryBuffer *FileManager::
485getBufferForFile(const FileEntry *Entry, std::string *ErrorStr) {
486  OwningPtr<llvm::MemoryBuffer> Result;
487  llvm::error_code ec;
488
489  const char *Filename = Entry->getName();
490  // If the file is already open, use the open file descriptor.
491  if (Entry->FD != -1) {
492    ec = llvm::MemoryBuffer::getOpenFile(Entry->FD, Filename, Result,
493                                         Entry->getSize());
494    if (ErrorStr)
495      *ErrorStr = ec.message();
496
497    close(Entry->FD);
498    Entry->FD = -1;
499    return Result.take();
500  }
501
502  // Otherwise, open the file.
503
504  if (FileSystemOpts.WorkingDir.empty()) {
505    ec = llvm::MemoryBuffer::getFile(Filename, Result, Entry->getSize());
506    if (ec && ErrorStr)
507      *ErrorStr = ec.message();
508    return Result.take();
509  }
510
511  SmallString<128> FilePath(Entry->getName());
512  FixupRelativePath(FilePath);
513  ec = llvm::MemoryBuffer::getFile(FilePath.str(), Result, Entry->getSize());
514  if (ec && ErrorStr)
515    *ErrorStr = ec.message();
516  return Result.take();
517}
518
519llvm::MemoryBuffer *FileManager::
520getBufferForFile(StringRef Filename, std::string *ErrorStr) {
521  OwningPtr<llvm::MemoryBuffer> Result;
522  llvm::error_code ec;
523  if (FileSystemOpts.WorkingDir.empty()) {
524    ec = llvm::MemoryBuffer::getFile(Filename, Result);
525    if (ec && ErrorStr)
526      *ErrorStr = ec.message();
527    return Result.take();
528  }
529
530  SmallString<128> FilePath(Filename);
531  FixupRelativePath(FilePath);
532  ec = llvm::MemoryBuffer::getFile(FilePath.c_str(), Result);
533  if (ec && ErrorStr)
534    *ErrorStr = ec.message();
535  return Result.take();
536}
537
538/// getStatValue - Get the 'stat' information for the specified path,
539/// using the cache to accelerate it if possible.  This returns true
540/// if the path points to a virtual file or does not exist, or returns
541/// false if it's an existent real file.  If FileDescriptor is NULL,
542/// do directory look-up instead of file look-up.
543bool FileManager::getStatValue(const char *Path, struct stat &StatBuf,
544                               int *FileDescriptor) {
545  // FIXME: FileSystemOpts shouldn't be passed in here, all paths should be
546  // absolute!
547  if (FileSystemOpts.WorkingDir.empty())
548    return FileSystemStatCache::get(Path, StatBuf, FileDescriptor,
549                                    StatCache.get());
550
551  SmallString<128> FilePath(Path);
552  FixupRelativePath(FilePath);
553
554  return FileSystemStatCache::get(FilePath.c_str(), StatBuf, FileDescriptor,
555                                  StatCache.get());
556}
557
558bool FileManager::getNoncachedStatValue(StringRef Path,
559                                        struct stat &StatBuf) {
560  SmallString<128> FilePath(Path);
561  FixupRelativePath(FilePath);
562
563  return ::stat(FilePath.c_str(), &StatBuf) != 0;
564}
565
566void FileManager::invalidateCache(const FileEntry *Entry) {
567  assert(Entry && "Cannot invalidate a NULL FileEntry");
568
569  SeenFileEntries.erase(Entry->getName());
570
571  // FileEntry invalidation should not block future optimizations in the file
572  // caches. Possible alternatives are cache truncation (invalidate last N) or
573  // invalidation of the whole cache.
574  UniqueRealFiles.erase(Entry);
575}
576
577
578void FileManager::GetUniqueIDMapping(
579                   SmallVectorImpl<const FileEntry *> &UIDToFiles) const {
580  UIDToFiles.clear();
581  UIDToFiles.resize(NextFileUID);
582
583  // Map file entries
584  for (llvm::StringMap<FileEntry*, llvm::BumpPtrAllocator>::const_iterator
585         FE = SeenFileEntries.begin(), FEEnd = SeenFileEntries.end();
586       FE != FEEnd; ++FE)
587    if (FE->getValue() && FE->getValue() != NON_EXISTENT_FILE)
588      UIDToFiles[FE->getValue()->getUID()] = FE->getValue();
589
590  // Map virtual file entries
591  for (SmallVector<FileEntry*, 4>::const_iterator
592         VFE = VirtualFileEntries.begin(), VFEEnd = VirtualFileEntries.end();
593       VFE != VFEEnd; ++VFE)
594    if (*VFE && *VFE != NON_EXISTENT_FILE)
595      UIDToFiles[(*VFE)->getUID()] = *VFE;
596}
597
598void FileManager::modifyFileEntry(FileEntry *File,
599                                  off_t Size, time_t ModificationTime) {
600  File->Size = Size;
601  File->ModTime = ModificationTime;
602}
603
604
605void FileManager::PrintStats() const {
606  llvm::errs() << "\n*** File Manager Stats:\n";
607  llvm::errs() << UniqueRealFiles.size() << " real files found, "
608               << UniqueRealDirs.size() << " real dirs found.\n";
609  llvm::errs() << VirtualFileEntries.size() << " virtual files found, "
610               << VirtualDirectoryEntries.size() << " virtual dirs found.\n";
611  llvm::errs() << NumDirLookups << " dir lookups, "
612               << NumDirCacheMisses << " dir cache misses.\n";
613  llvm::errs() << NumFileLookups << " file lookups, "
614               << NumFileCacheMisses << " file cache misses.\n";
615
616  //llvm::errs() << PagesMapped << BytesOfPagesMapped << FSLookups;
617}
618