SourceManager.h revision d57b7ff9bebc4c45f325fc1be6f238cfcd4c3732
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_SOURCEMANAGER_H
15#define LLVM_CLANG_SOURCEMANAGER_H
16
17#include "clang/Basic/SourceLocation.h"
18#include "llvm/Support/Allocator.h"
19#include "llvm/Support/DataTypes.h"
20#include "llvm/ADT/PointerIntPair.h"
21#include "llvm/ADT/PointerUnion.h"
22#include "llvm/ADT/IntrusiveRefCntPtr.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include <vector>
26#include <cassert>
27
28namespace llvm {
29class StringRef;
30}
31
32namespace clang {
33
34class Diagnostic;
35class SourceManager;
36class FileManager;
37class FileEntry;
38class LineTableInfo;
39
40/// SrcMgr - Public enums and private classes that are part of the
41/// SourceManager implementation.
42///
43namespace SrcMgr {
44  /// CharacteristicKind - This is used to represent whether a file or directory
45  /// holds normal user code, system code, or system code which is implicitly
46  /// 'extern "C"' in C++ mode.  Entire directories can be tagged with this
47  /// (this is maintained by DirectoryLookup and friends) as can specific
48  /// FileIDInfos when a #pragma system_header is seen or various other cases.
49  ///
50  enum CharacteristicKind {
51    C_User, C_System, C_ExternCSystem
52  };
53
54  /// ContentCache - One instance of this struct is kept for every file
55  /// loaded or used.  This object owns the MemoryBuffer object.
56  class ContentCache {
57    enum CCFlags {
58      /// \brief Whether the buffer is invalid.
59      InvalidFlag = 0x01,
60      /// \brief Whether the buffer should not be freed on destruction.
61      DoNotFreeFlag = 0x02
62    };
63
64    /// Buffer - The actual buffer containing the characters from the input
65    /// file.  This is owned by the ContentCache object.
66    /// The bits indicate indicates whether the buffer is invalid.
67    mutable llvm::PointerIntPair<const llvm::MemoryBuffer *, 2> Buffer;
68
69  public:
70    /// Reference to the file entry representing this ContentCache.
71    /// This reference does not own the FileEntry object.
72    /// It is possible for this to be NULL if
73    /// the ContentCache encapsulates an imaginary text buffer.
74    const FileEntry *OrigEntry;
75
76    /// \brief References the file which the contents were actually loaded from.
77    /// Can be different from 'Entry' if we overridden the contents of one file
78    /// with the contents of another file.
79    const FileEntry *ContentsEntry;
80
81    /// SourceLineCache - A bump pointer allocated array of offsets for each
82    /// source line.  This is lazily computed.  This is owned by the
83    /// SourceManager BumpPointerAllocator object.
84    unsigned *SourceLineCache;
85
86    /// NumLines - The number of lines in this ContentCache.  This is only valid
87    /// if SourceLineCache is non-null.
88    unsigned NumLines;
89
90    /// getBuffer - Returns the memory buffer for the associated content.
91    ///
92    /// \param Diag Object through which diagnostics will be emitted if the
93    /// buffer cannot be retrieved.
94    ///
95    /// \param Loc If specified, is the location that invalid file diagnostics
96    ///     will be emitted at.
97    ///
98    /// \param Invalid If non-NULL, will be set \c true if an error occurred.
99    const llvm::MemoryBuffer *getBuffer(Diagnostic &Diag,
100                                        const SourceManager &SM,
101                                        SourceLocation Loc = SourceLocation(),
102                                        bool *Invalid = 0) const;
103
104    /// getSize - Returns the size of the content encapsulated by this
105    ///  ContentCache. This can be the size of the source file or the size of an
106    ///  arbitrary scratch buffer.  If the ContentCache encapsulates a source
107    ///  file this size is retrieved from the file's FileEntry.
108    unsigned getSize() const;
109
110    /// getSizeBytesMapped - Returns the number of bytes actually mapped for
111    ///  this ContentCache.  This can be 0 if the MemBuffer was not actually
112    ///  instantiated.
113    unsigned getSizeBytesMapped() const;
114
115    /// Returns the kind of memory used to back the memory buffer for
116    /// this content cache.  This is used for performance analysis.
117    llvm::MemoryBuffer::BufferKind getMemoryBufferKind() const;
118
119    void setBuffer(const llvm::MemoryBuffer *B) {
120      assert(!Buffer.getPointer() && "MemoryBuffer already set.");
121      Buffer.setPointer(B);
122      Buffer.setInt(false);
123    }
124
125    /// \brief Get the underlying buffer, returning NULL if the buffer is not
126    /// yet available.
127    const llvm::MemoryBuffer *getRawBuffer() const {
128      return Buffer.getPointer();
129    }
130
131    /// \brief Replace the existing buffer (which will be deleted)
132    /// with the given buffer.
133    void replaceBuffer(const llvm::MemoryBuffer *B, bool DoNotFree = false);
134
135    /// \brief Determine whether the buffer itself is invalid.
136    bool isBufferInvalid() const {
137      return Buffer.getInt() & InvalidFlag;
138    }
139
140    /// \brief Determine whether the buffer should be freed.
141    bool shouldFreeBuffer() const {
142      return (Buffer.getInt() & DoNotFreeFlag) == 0;
143    }
144
145    ContentCache(const FileEntry *Ent = 0)
146      : Buffer(0, false), OrigEntry(Ent), ContentsEntry(Ent),
147        SourceLineCache(0), NumLines(0) {}
148
149    ContentCache(const FileEntry *Ent, const FileEntry *contentEnt)
150      : Buffer(0, false), OrigEntry(Ent), ContentsEntry(contentEnt),
151        SourceLineCache(0), NumLines(0) {}
152
153    ~ContentCache();
154
155    /// The copy ctor does not allow copies where source object has either
156    ///  a non-NULL Buffer or SourceLineCache.  Ownership of allocated memory
157    ///  is not transferred, so this is a logical error.
158    ContentCache(const ContentCache &RHS)
159      : Buffer(0, false), SourceLineCache(0)
160    {
161      OrigEntry = RHS.OrigEntry;
162      ContentsEntry = RHS.ContentsEntry;
163
164      assert (RHS.Buffer.getPointer() == 0 && RHS.SourceLineCache == 0
165              && "Passed ContentCache object cannot own a buffer.");
166
167      NumLines = RHS.NumLines;
168    }
169
170  private:
171    // Disable assignments.
172    ContentCache &operator=(const ContentCache& RHS);
173  };
174
175  /// FileInfo - Information about a FileID, basically just the logical file
176  /// that it represents and include stack information.
177  ///
178  /// Each FileInfo has include stack information, indicating where it came
179  /// from.  This information encodes the #include chain that a token was
180  /// instantiated from.  The main include file has an invalid IncludeLoc.
181  ///
182  /// FileInfos contain a "ContentCache *", with the contents of the file.
183  ///
184  class FileInfo {
185    /// IncludeLoc - The location of the #include that brought in this file.
186    /// This is an invalid SLOC for the main file (top of the #include chain).
187    unsigned IncludeLoc;  // Really a SourceLocation
188
189    /// Data - This contains the ContentCache* and the bits indicating the
190    /// characteristic of the file and whether it has #line info, all bitmangled
191    /// together.
192    uintptr_t Data;
193  public:
194    /// get - Return a FileInfo object.
195    static FileInfo get(SourceLocation IL, const ContentCache *Con,
196                        CharacteristicKind FileCharacter) {
197      FileInfo X;
198      X.IncludeLoc = IL.getRawEncoding();
199      X.Data = (uintptr_t)Con;
200      assert((X.Data & 7) == 0 &&"ContentCache pointer insufficiently aligned");
201      assert((unsigned)FileCharacter < 4 && "invalid file character");
202      X.Data |= (unsigned)FileCharacter;
203      return X;
204    }
205
206    SourceLocation getIncludeLoc() const {
207      return SourceLocation::getFromRawEncoding(IncludeLoc);
208    }
209    const ContentCache* getContentCache() const {
210      return reinterpret_cast<const ContentCache*>(Data & ~7UL);
211    }
212
213    /// getCharacteristic - Return whether this is a system header or not.
214    CharacteristicKind getFileCharacteristic() const {
215      return (CharacteristicKind)(Data & 3);
216    }
217
218    /// hasLineDirectives - Return true if this FileID has #line directives in
219    /// it.
220    bool hasLineDirectives() const { return (Data & 4) != 0; }
221
222    /// setHasLineDirectives - Set the flag that indicates that this FileID has
223    /// line table entries associated with it.
224    void setHasLineDirectives() {
225      Data |= 4;
226    }
227  };
228
229  /// InstantiationInfo - Each InstantiationInfo encodes the Instantiation
230  /// location - where the token was ultimately instantiated, and the
231  /// SpellingLoc - where the actual character data for the token came from.
232  class InstantiationInfo {
233     // Really these are all SourceLocations.
234
235    /// SpellingLoc - Where the spelling for the token can be found.
236    unsigned SpellingLoc;
237
238    /// InstantiationLocStart/InstantiationLocEnd - In a macro expansion, these
239    /// indicate the start and end of the instantiation.  In object-like macros,
240    /// these will be the same.  In a function-like macro instantiation, the
241    /// start will be the identifier and the end will be the ')'.
242    unsigned InstantiationLocStart, InstantiationLocEnd;
243  public:
244    SourceLocation getSpellingLoc() const {
245      return SourceLocation::getFromRawEncoding(SpellingLoc);
246    }
247    SourceLocation getInstantiationLocStart() const {
248      return SourceLocation::getFromRawEncoding(InstantiationLocStart);
249    }
250    SourceLocation getInstantiationLocEnd() const {
251      return SourceLocation::getFromRawEncoding(InstantiationLocEnd);
252    }
253
254    std::pair<SourceLocation,SourceLocation> getInstantiationLocRange() const {
255      return std::make_pair(getInstantiationLocStart(),
256                            getInstantiationLocEnd());
257    }
258
259    /// get - Return a InstantiationInfo for an expansion.  IL specifies
260    /// the instantiation location (where the macro is expanded), and SL
261    /// specifies the spelling location (where the characters from the token
262    /// come from).  IL and PL can both refer to normal File SLocs or
263    /// instantiation locations.
264    static InstantiationInfo get(SourceLocation ILStart, SourceLocation ILEnd,
265                                 SourceLocation SL) {
266      InstantiationInfo X;
267      X.SpellingLoc = SL.getRawEncoding();
268      X.InstantiationLocStart = ILStart.getRawEncoding();
269      X.InstantiationLocEnd = ILEnd.getRawEncoding();
270      return X;
271    }
272  };
273
274  /// SLocEntry - This is a discriminated union of FileInfo and
275  /// InstantiationInfo.  SourceManager keeps an array of these objects, and
276  /// they are uniquely identified by the FileID datatype.
277  class SLocEntry {
278    unsigned Offset;   // low bit is set for instantiation info.
279    union {
280      FileInfo File;
281      InstantiationInfo Instantiation;
282    };
283  public:
284    unsigned getOffset() const { return Offset >> 1; }
285
286    bool isInstantiation() const { return Offset & 1; }
287    bool isFile() const { return !isInstantiation(); }
288
289    const FileInfo &getFile() const {
290      assert(isFile() && "Not a file SLocEntry!");
291      return File;
292    }
293
294    const InstantiationInfo &getInstantiation() const {
295      assert(isInstantiation() && "Not an instantiation SLocEntry!");
296      return Instantiation;
297    }
298
299    static SLocEntry get(unsigned Offset, const FileInfo &FI) {
300      SLocEntry E;
301      E.Offset = Offset << 1;
302      E.File = FI;
303      return E;
304    }
305
306    static SLocEntry get(unsigned Offset, const InstantiationInfo &II) {
307      SLocEntry E;
308      E.Offset = (Offset << 1) | 1;
309      E.Instantiation = II;
310      return E;
311    }
312  };
313}  // end SrcMgr namespace.
314
315/// \brief External source of source location entries.
316class ExternalSLocEntrySource {
317public:
318  virtual ~ExternalSLocEntrySource();
319
320  /// \brief Read the source location entry with index ID.
321  ///
322  /// \returns true if an error occurred that prevented the source-location
323  /// entry from being loaded.
324  virtual bool ReadSLocEntry(unsigned ID) = 0;
325};
326
327
328/// IsBeforeInTranslationUnitCache - This class holds the cache used by
329/// isBeforeInTranslationUnit.  The cache structure is complex enough to be
330/// worth breaking out of SourceManager.
331class IsBeforeInTranslationUnitCache {
332  /// L/R QueryFID - These are the FID's of the cached query.  If these match up
333  /// with a subsequent query, the result can be reused.
334  FileID LQueryFID, RQueryFID;
335
336  /// CommonFID - This is the file found in common between the two #include
337  /// traces.  It is the nearest common ancestor of the #include tree.
338  FileID CommonFID;
339
340  /// L/R CommonOffset - This is the offset of the previous query in CommonFID.
341  /// Usually, this represents the location of the #include for QueryFID, but if
342  /// LQueryFID is a parent of RQueryFID (or vise versa) then these can be a
343  /// random token in the parent.
344  unsigned LCommonOffset, RCommonOffset;
345public:
346
347  /// isCacheValid - Return true if the currently cached values match up with
348  /// the specified LHS/RHS query.  If not, we can't use the cache.
349  bool isCacheValid(FileID LHS, FileID RHS) const {
350    return LQueryFID == LHS && RQueryFID == RHS;
351  }
352
353  /// getCachedResult - If the cache is valid, compute the result given the
354  /// specified offsets in the LHS/RHS FID's.
355  bool getCachedResult(unsigned LOffset, unsigned ROffset) const {
356    // If one of the query files is the common file, use the offset.  Otherwise,
357    // use the #include loc in the common file.
358    if (LQueryFID != CommonFID) LOffset = LCommonOffset;
359    if (RQueryFID != CommonFID) ROffset = RCommonOffset;
360    return LOffset < ROffset;
361  }
362
363  // Set up a new query.
364  void setQueryFIDs(FileID LHS, FileID RHS) {
365    LQueryFID = LHS;
366    RQueryFID = RHS;
367  }
368
369  void setCommonLoc(FileID commonFID, unsigned lCommonOffset,
370                    unsigned rCommonOffset) {
371    CommonFID = commonFID;
372    LCommonOffset = lCommonOffset;
373    RCommonOffset = rCommonOffset;
374  }
375
376};
377
378/// SourceManager - This file handles loading and caching of source files into
379/// memory.  This object owns the MemoryBuffer objects for all of the loaded
380/// files and assigns unique FileID's for each unique #include chain.
381///
382/// The SourceManager can be queried for information about SourceLocation
383/// objects, turning them into either spelling or instantiation locations.
384/// Spelling locations represent where the bytes corresponding to a token came
385/// from and instantiation locations represent where the location is in the
386/// user's view.  In the case of a macro expansion, for example, the spelling
387/// location indicates  where the expanded token came from and the instantiation
388/// location specifies where it was expanded.
389class SourceManager : public llvm::RefCountedBase<SourceManager> {
390  /// \brief Diagnostic object.
391  Diagnostic &Diag;
392
393  FileManager &FileMgr;
394
395  mutable llvm::BumpPtrAllocator ContentCacheAlloc;
396
397  /// FileInfos - Memoized information about all of the files tracked by this
398  /// SourceManager.  This set allows us to merge ContentCache entries based
399  /// on their FileEntry*.  All ContentCache objects will thus have unique,
400  /// non-null, FileEntry pointers.
401  llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*> FileInfos;
402
403  /// \brief True if the ContentCache for files that are overriden by other
404  /// files, should report the original file name. Defaults to true.
405  bool OverridenFilesKeepOriginalName;
406
407  /// \brief Files that have been overriden with the contents from another file.
408  llvm::DenseMap<const FileEntry *, const FileEntry *> OverriddenFiles;
409
410  /// MemBufferInfos - Information about various memory buffers that we have
411  /// read in.  All FileEntry* within the stored ContentCache objects are NULL,
412  /// as they do not refer to a file.
413  std::vector<SrcMgr::ContentCache*> MemBufferInfos;
414
415  /// SLocEntryTable - This is an array of SLocEntry's that we have created.
416  /// FileID is an index into this vector.  This array is sorted by the offset.
417  std::vector<SrcMgr::SLocEntry> SLocEntryTable;
418  /// NextOffset - This is the next available offset that a new SLocEntry can
419  /// start at.  It is SLocEntryTable.back().getOffset()+size of back() entry.
420  unsigned NextOffset;
421
422  /// \brief If source location entries are being lazily loaded from
423  /// an external source, this vector indicates whether the Ith source
424  /// location entry has already been loaded from the external storage.
425  std::vector<bool> SLocEntryLoaded;
426
427  /// \brief An external source for source location entries.
428  ExternalSLocEntrySource *ExternalSLocEntries;
429
430  /// LastFileIDLookup - This is a one-entry cache to speed up getFileID.
431  /// LastFileIDLookup records the last FileID looked up or created, because it
432  /// is very common to look up many tokens from the same file.
433  mutable FileID LastFileIDLookup;
434
435  /// LineTable - This holds information for #line directives.  It is referenced
436  /// by indices from SLocEntryTable.
437  LineTableInfo *LineTable;
438
439  /// LastLineNo - These ivars serve as a cache used in the getLineNumber
440  /// method which is used to speedup getLineNumber calls to nearby locations.
441  mutable FileID LastLineNoFileIDQuery;
442  mutable SrcMgr::ContentCache *LastLineNoContentCache;
443  mutable unsigned LastLineNoFilePos;
444  mutable unsigned LastLineNoResult;
445
446  /// MainFileID - The file ID for the main source file of the translation unit.
447  FileID MainFileID;
448
449  // Statistics for -print-stats.
450  mutable unsigned NumLinearScans, NumBinaryProbes;
451
452  // Cache results for the isBeforeInTranslationUnit method.
453  mutable IsBeforeInTranslationUnitCache IsBeforeInTUCache;
454
455  // Cache for the "fake" buffer used for error-recovery purposes.
456  mutable llvm::MemoryBuffer *FakeBufferForRecovery;
457
458  // SourceManager doesn't support copy construction.
459  explicit SourceManager(const SourceManager&);
460  void operator=(const SourceManager&);
461public:
462  SourceManager(Diagnostic &Diag, FileManager &FileMgr);
463  ~SourceManager();
464
465  void clearIDTables();
466
467  Diagnostic &getDiagnostics() const { return Diag; }
468
469  FileManager &getFileManager() const { return FileMgr; }
470
471  /// \brief Set true if the SourceManager should report the original file name
472  /// for contents of files that were overriden by other files.Defaults to true.
473  void setOverridenFilesKeepOriginalName(bool value) {
474    OverridenFilesKeepOriginalName = value;
475  }
476
477  //===--------------------------------------------------------------------===//
478  // MainFileID creation and querying methods.
479  //===--------------------------------------------------------------------===//
480
481  /// getMainFileID - Returns the FileID of the main source file.
482  FileID getMainFileID() const { return MainFileID; }
483
484  /// createMainFileID - Create the FileID for the main source file.
485  FileID createMainFileID(const FileEntry *SourceFile) {
486    assert(MainFileID.isInvalid() && "MainFileID already set!");
487    MainFileID = createFileID(SourceFile, SourceLocation(), SrcMgr::C_User);
488    return MainFileID;
489  }
490
491  /// \brief Set the file ID for the precompiled preamble, which is also the
492  /// main file.
493  void SetPreambleFileID(FileID Preamble) {
494    assert(MainFileID.isInvalid() && "MainFileID already set!");
495    MainFileID = Preamble;
496  }
497
498  //===--------------------------------------------------------------------===//
499  // Methods to create new FileID's and instantiations.
500  //===--------------------------------------------------------------------===//
501
502  /// createFileID - Create a new FileID that represents the specified file
503  /// being #included from the specified IncludePosition.  This translates NULL
504  /// into standard input.
505  /// PreallocateID should be non-zero to specify which pre-allocated,
506  /// lazily computed source location is being filled in by this operation.
507  FileID createFileID(const FileEntry *SourceFile, SourceLocation IncludePos,
508                      SrcMgr::CharacteristicKind FileCharacter,
509                      unsigned PreallocatedID = 0,
510                      unsigned Offset = 0) {
511    const SrcMgr::ContentCache *IR = getOrCreateContentCache(SourceFile);
512    assert(IR && "getOrCreateContentCache() cannot return NULL");
513    return createFileID(IR, IncludePos, FileCharacter, PreallocatedID, Offset);
514  }
515
516  /// createFileIDForMemBuffer - Create a new FileID that represents the
517  /// specified memory buffer.  This does no caching of the buffer and takes
518  /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once.
519  FileID createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer,
520                                  unsigned PreallocatedID = 0,
521                                  unsigned Offset = 0) {
522    return createFileID(createMemBufferContentCache(Buffer), SourceLocation(),
523                        SrcMgr::C_User, PreallocatedID, Offset);
524  }
525
526  /// createMainFileIDForMembuffer - Create the FileID for a memory buffer
527  ///  that will represent the FileID for the main source.  One example
528  ///  of when this would be used is when the main source is read from STDIN.
529  FileID createMainFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
530    assert(MainFileID.isInvalid() && "MainFileID already set!");
531    MainFileID = createFileIDForMemBuffer(Buffer);
532    return MainFileID;
533  }
534
535  /// createInstantiationLoc - Return a new SourceLocation that encodes the fact
536  /// that a token at Loc should actually be referenced from InstantiationLoc.
537  /// TokLength is the length of the token being instantiated.
538  SourceLocation createInstantiationLoc(SourceLocation Loc,
539                                        SourceLocation InstantiationLocStart,
540                                        SourceLocation InstantiationLocEnd,
541                                        unsigned TokLength,
542                                        unsigned PreallocatedID = 0,
543                                        unsigned Offset = 0);
544
545  /// \brief Retrieve the memory buffer associated with the given file.
546  ///
547  /// \param Invalid If non-NULL, will be set \c true if an error
548  /// occurs while retrieving the memory buffer.
549  const llvm::MemoryBuffer *getMemoryBufferForFile(const FileEntry *File,
550                                                   bool *Invalid = 0);
551
552  /// \brief Override the contents of the given source file by providing an
553  /// already-allocated buffer.
554  ///
555  /// \param SourceFile the source file whose contents will be overriden.
556  ///
557  /// \param Buffer the memory buffer whose contents will be used as the
558  /// data in the given source file.
559  ///
560  /// \param DoNotFree If true, then the buffer will not be freed when the
561  /// source manager is destroyed.
562  void overrideFileContents(const FileEntry *SourceFile,
563                            const llvm::MemoryBuffer *Buffer,
564                            bool DoNotFree = false);
565
566  /// \brief Override the the given source file with another one.
567  ///
568  /// \param SourceFile the source file which will be overriden.
569  ///
570  /// \param NewFile the file whose contents will be used as the
571  /// data instead of the contents of the given source file.
572  void overrideFileContents(const FileEntry *SourceFile,
573                            const FileEntry *NewFile);
574
575  //===--------------------------------------------------------------------===//
576  // FileID manipulation methods.
577  //===--------------------------------------------------------------------===//
578
579  /// getBuffer - Return the buffer for the specified FileID. If there is an
580  /// error opening this buffer the first time, this manufactures a temporary
581  /// buffer and returns a non-empty error string.
582  const llvm::MemoryBuffer *getBuffer(FileID FID, SourceLocation Loc,
583                                      bool *Invalid = 0) const {
584    bool MyInvalid = false;
585    const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &MyInvalid);
586    if (MyInvalid || !Entry.isFile()) {
587      if (Invalid)
588        *Invalid = true;
589
590      return getFakeBufferForRecovery();
591    }
592
593    return Entry.getFile().getContentCache()->getBuffer(Diag, *this, Loc,
594                                                        Invalid);
595  }
596
597  const llvm::MemoryBuffer *getBuffer(FileID FID, bool *Invalid = 0) const {
598    bool MyInvalid = false;
599    const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &MyInvalid);
600    if (MyInvalid || !Entry.isFile()) {
601      if (Invalid)
602        *Invalid = true;
603
604      return getFakeBufferForRecovery();
605    }
606
607    return Entry.getFile().getContentCache()->getBuffer(Diag, *this,
608                                                        SourceLocation(),
609                                                        Invalid);
610  }
611
612  /// getFileEntryForID - Returns the FileEntry record for the provided FileID.
613  const FileEntry *getFileEntryForID(FileID FID) const {
614    bool MyInvalid = false;
615    const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &MyInvalid);
616    if (MyInvalid || !Entry.isFile())
617      return 0;
618
619    return Entry.getFile().getContentCache()->OrigEntry;
620  }
621
622  /// Returns the FileEntry record for the provided SLocEntry.
623  const FileEntry *getFileEntryForSLocEntry(const SrcMgr::SLocEntry &sloc) const
624  {
625    return sloc.getFile().getContentCache()->OrigEntry;
626  }
627
628  /// getBufferData - Return a StringRef to the source buffer data for the
629  /// specified FileID.
630  ///
631  /// \param FID The file ID whose contents will be returned.
632  /// \param Invalid If non-NULL, will be set true if an error occurred.
633  llvm::StringRef getBufferData(FileID FID, bool *Invalid = 0) const;
634
635
636  //===--------------------------------------------------------------------===//
637  // SourceLocation manipulation methods.
638  //===--------------------------------------------------------------------===//
639
640  /// getFileID - Return the FileID for a SourceLocation.  This is a very
641  /// hot method that is used for all SourceManager queries that start with a
642  /// SourceLocation object.  It is responsible for finding the entry in
643  /// SLocEntryTable which contains the specified location.
644  ///
645  FileID getFileID(SourceLocation SpellingLoc) const {
646    unsigned SLocOffset = SpellingLoc.getOffset();
647
648    // If our one-entry cache covers this offset, just return it.
649    if (isOffsetInFileID(LastFileIDLookup, SLocOffset))
650      return LastFileIDLookup;
651
652    return getFileIDSlow(SLocOffset);
653  }
654
655  /// getLocForStartOfFile - Return the source location corresponding to the
656  /// first byte of the specified file.
657  SourceLocation getLocForStartOfFile(FileID FID) const {
658    assert(FID.ID < SLocEntryTable.size() && "FileID out of range");
659    bool Invalid = false;
660    const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &Invalid);
661    if (Invalid || !Entry.isFile())
662      return SourceLocation();
663
664    unsigned FileOffset = Entry.getOffset();
665    return SourceLocation::getFileLoc(FileOffset);
666  }
667
668  /// getInstantiationLoc - Given a SourceLocation object, return the
669  /// instantiation location referenced by the ID.
670  SourceLocation getInstantiationLoc(SourceLocation Loc) const {
671    // Handle the non-mapped case inline, defer to out of line code to handle
672    // instantiations.
673    if (Loc.isFileID()) return Loc;
674    return getInstantiationLocSlowCase(Loc);
675  }
676
677  /// getImmediateInstantiationRange - Loc is required to be an instantiation
678  /// location.  Return the start/end of the instantiation information.
679  std::pair<SourceLocation,SourceLocation>
680  getImmediateInstantiationRange(SourceLocation Loc) const;
681
682  /// getInstantiationRange - Given a SourceLocation object, return the
683  /// range of tokens covered by the instantiation in the ultimate file.
684  std::pair<SourceLocation,SourceLocation>
685  getInstantiationRange(SourceLocation Loc) const;
686
687
688  /// getSpellingLoc - Given a SourceLocation object, return the spelling
689  /// location referenced by the ID.  This is the place where the characters
690  /// that make up the lexed token can be found.
691  SourceLocation getSpellingLoc(SourceLocation Loc) const {
692    // Handle the non-mapped case inline, defer to out of line code to handle
693    // instantiations.
694    if (Loc.isFileID()) return Loc;
695    return getSpellingLocSlowCase(Loc);
696  }
697
698  /// getImmediateSpellingLoc - Given a SourceLocation object, return the
699  /// spelling location referenced by the ID.  This is the first level down
700  /// towards the place where the characters that make up the lexed token can be
701  /// found.  This should not generally be used by clients.
702  SourceLocation getImmediateSpellingLoc(SourceLocation Loc) const;
703
704  /// getDecomposedLoc - Decompose the specified location into a raw FileID +
705  /// Offset pair.  The first element is the FileID, the second is the
706  /// offset from the start of the buffer of the location.
707  std::pair<FileID, unsigned> getDecomposedLoc(SourceLocation Loc) const {
708    FileID FID = getFileID(Loc);
709    return std::make_pair(FID, Loc.getOffset()-getSLocEntry(FID).getOffset());
710  }
711
712  /// getDecomposedInstantiationLoc - Decompose the specified location into a
713  /// raw FileID + Offset pair.  If the location is an instantiation record,
714  /// walk through it until we find the final location instantiated.
715  std::pair<FileID, unsigned>
716  getDecomposedInstantiationLoc(SourceLocation Loc) const {
717    FileID FID = getFileID(Loc);
718    const SrcMgr::SLocEntry *E = &getSLocEntry(FID);
719
720    unsigned Offset = Loc.getOffset()-E->getOffset();
721    if (Loc.isFileID())
722      return std::make_pair(FID, Offset);
723
724    return getDecomposedInstantiationLocSlowCase(E, Offset);
725  }
726
727  /// getDecomposedSpellingLoc - Decompose the specified location into a raw
728  /// FileID + Offset pair.  If the location is an instantiation record, walk
729  /// through it until we find its spelling record.
730  std::pair<FileID, unsigned>
731  getDecomposedSpellingLoc(SourceLocation Loc) const {
732    FileID FID = getFileID(Loc);
733    const SrcMgr::SLocEntry *E = &getSLocEntry(FID);
734
735    unsigned Offset = Loc.getOffset()-E->getOffset();
736    if (Loc.isFileID())
737      return std::make_pair(FID, Offset);
738    return getDecomposedSpellingLocSlowCase(E, Offset);
739  }
740
741  /// getFileOffset - This method returns the offset from the start
742  /// of the file that the specified SourceLocation represents. This is not very
743  /// meaningful for a macro ID.
744  unsigned getFileOffset(SourceLocation SpellingLoc) const {
745    return getDecomposedLoc(SpellingLoc).second;
746  }
747
748
749  //===--------------------------------------------------------------------===//
750  // Queries about the code at a SourceLocation.
751  //===--------------------------------------------------------------------===//
752
753  /// getCharacterData - Return a pointer to the start of the specified location
754  /// in the appropriate spelling MemoryBuffer.
755  ///
756  /// \param Invalid If non-NULL, will be set \c true if an error occurs.
757  const char *getCharacterData(SourceLocation SL, bool *Invalid = 0) const;
758
759  /// getColumnNumber - Return the column # for the specified file position.
760  /// This is significantly cheaper to compute than the line number.  This
761  /// returns zero if the column number isn't known.  This may only be called on
762  /// a file sloc, so you must choose a spelling or instantiation location
763  /// before calling this method.
764  unsigned getColumnNumber(FileID FID, unsigned FilePos,
765                           bool *Invalid = 0) const;
766  unsigned getSpellingColumnNumber(SourceLocation Loc, bool *Invalid = 0) const;
767  unsigned getInstantiationColumnNumber(SourceLocation Loc,
768                                        bool *Invalid = 0) const;
769  unsigned getPresumedColumnNumber(SourceLocation Loc, bool *Invalid = 0) const;
770
771
772  /// getLineNumber - Given a SourceLocation, return the spelling line number
773  /// for the position indicated.  This requires building and caching a table of
774  /// line offsets for the MemoryBuffer, so this is not cheap: use only when
775  /// about to emit a diagnostic.
776  unsigned getLineNumber(FileID FID, unsigned FilePos, bool *Invalid = 0) const;
777  unsigned getSpellingLineNumber(SourceLocation Loc, bool *Invalid = 0) const;
778  unsigned getInstantiationLineNumber(SourceLocation Loc,
779                                      bool *Invalid = 0) const;
780  unsigned getPresumedLineNumber(SourceLocation Loc, bool *Invalid = 0) const;
781
782  /// Return the filename or buffer identifier of the buffer the location is in.
783  /// Note that this name does not respect #line directives.  Use getPresumedLoc
784  /// for normal clients.
785  const char *getBufferName(SourceLocation Loc, bool *Invalid = 0) const;
786
787  /// getFileCharacteristic - return the file characteristic of the specified
788  /// source location, indicating whether this is a normal file, a system
789  /// header, or an "implicit extern C" system header.
790  ///
791  /// This state can be modified with flags on GNU linemarker directives like:
792  ///   # 4 "foo.h" 3
793  /// which changes all source locations in the current file after that to be
794  /// considered to be from a system header.
795  SrcMgr::CharacteristicKind getFileCharacteristic(SourceLocation Loc) const;
796
797  /// getPresumedLoc - This method returns the "presumed" location of a
798  /// SourceLocation specifies.  A "presumed location" can be modified by #line
799  /// or GNU line marker directives.  This provides a view on the data that a
800  /// user should see in diagnostics, for example.
801  ///
802  /// Note that a presumed location is always given as the instantiation point
803  /// of an instantiation location, not at the spelling location.
804  ///
805  /// \returns The presumed location of the specified SourceLocation. If the
806  /// presumed location cannot be calculate (e.g., because \p Loc is invalid
807  /// or the file containing \p Loc has changed on disk), returns an invalid
808  /// presumed location.
809  PresumedLoc getPresumedLoc(SourceLocation Loc) const;
810
811  /// isFromSameFile - Returns true if both SourceLocations correspond to
812  ///  the same file.
813  bool isFromSameFile(SourceLocation Loc1, SourceLocation Loc2) const {
814    return getFileID(Loc1) == getFileID(Loc2);
815  }
816
817  /// isFromMainFile - Returns true if the file of provided SourceLocation is
818  ///   the main file.
819  bool isFromMainFile(SourceLocation Loc) const {
820    return getFileID(Loc) == getMainFileID();
821  }
822
823  /// isInSystemHeader - Returns if a SourceLocation is in a system header.
824  bool isInSystemHeader(SourceLocation Loc) const {
825    return getFileCharacteristic(Loc) != SrcMgr::C_User;
826  }
827
828  /// isInExternCSystemHeader - Returns if a SourceLocation is in an "extern C"
829  /// system header.
830  bool isInExternCSystemHeader(SourceLocation Loc) const {
831    return getFileCharacteristic(Loc) == SrcMgr::C_ExternCSystem;
832  }
833
834  /// \brief Returns true if the given MacroID location points at the first
835  /// token of the macro instantiation.
836  bool isAtStartOfMacroInstantiation(SourceLocation Loc) const;
837
838  /// \brief Returns true if the given MacroID location points at the last
839  /// token of the macro instantiation.
840  bool isAtEndOfMacroInstantiation(SourceLocation Loc) const;
841
842  //===--------------------------------------------------------------------===//
843  // Line Table Manipulation Routines
844  //===--------------------------------------------------------------------===//
845
846  /// getLineTableFilenameID - Return the uniqued ID for the specified filename.
847  ///
848  unsigned getLineTableFilenameID(llvm::StringRef Str);
849
850  /// AddLineNote - Add a line note to the line table for the FileID and offset
851  /// specified by Loc.  If FilenameID is -1, it is considered to be
852  /// unspecified.
853  void AddLineNote(SourceLocation Loc, unsigned LineNo, int FilenameID);
854  void AddLineNote(SourceLocation Loc, unsigned LineNo, int FilenameID,
855                   bool IsFileEntry, bool IsFileExit,
856                   bool IsSystemHeader, bool IsExternCHeader);
857
858  /// \brief Determine if the source manager has a line table.
859  bool hasLineTable() const { return LineTable != 0; }
860
861  /// \brief Retrieve the stored line table.
862  LineTableInfo &getLineTable();
863
864  //===--------------------------------------------------------------------===//
865  // Queries for performance analysis.
866  //===--------------------------------------------------------------------===//
867
868  /// Return the total amount of physical memory allocated by the
869  /// ContentCache allocator.
870  size_t getContentCacheSize() const {
871    return ContentCacheAlloc.getTotalMemory();
872  }
873
874  struct MemoryBufferSizes {
875    const size_t malloc_bytes;
876    const size_t mmap_bytes;
877
878    MemoryBufferSizes(size_t malloc_bytes, size_t mmap_bytes)
879      : malloc_bytes(malloc_bytes), mmap_bytes(mmap_bytes) {}
880  };
881
882  /// Return the amount of memory used by memory buffers, breaking down
883  /// by heap-backed versus mmap'ed memory.
884  MemoryBufferSizes getMemoryBufferSizes() const;
885
886  //===--------------------------------------------------------------------===//
887  // Other miscellaneous methods.
888  //===--------------------------------------------------------------------===//
889
890  /// \brief Get the source location for the given file:line:col triplet.
891  ///
892  /// If the source file is included multiple times, the source location will
893  /// be based upon the first inclusion.
894  SourceLocation getLocation(const FileEntry *SourceFile,
895                             unsigned Line, unsigned Col);
896
897  /// \brief Determines the order of 2 source locations in the translation unit.
898  ///
899  /// \returns true if LHS source location comes before RHS, false otherwise.
900  bool isBeforeInTranslationUnit(SourceLocation LHS, SourceLocation RHS) const;
901
902  // Iterators over FileInfos.
903  typedef llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>
904      ::const_iterator fileinfo_iterator;
905  fileinfo_iterator fileinfo_begin() const { return FileInfos.begin(); }
906  fileinfo_iterator fileinfo_end() const { return FileInfos.end(); }
907  bool hasFileInfo(const FileEntry *File) const {
908    return FileInfos.find(File) != FileInfos.end();
909  }
910
911  /// PrintStats - Print statistics to stderr.
912  ///
913  void PrintStats() const;
914
915  unsigned sloc_entry_size() const { return SLocEntryTable.size(); }
916
917  // FIXME: Exposing this is a little gross; what we want is a good way
918  //  to iterate the entries that were not defined in an AST file (or
919  //  any other external source).
920  unsigned sloc_loaded_entry_size() const { return SLocEntryLoaded.size(); }
921
922  const SrcMgr::SLocEntry &getSLocEntry(unsigned ID, bool *Invalid = 0) const {
923    assert(ID < SLocEntryTable.size() && "Invalid id");
924    // If we haven't loaded this source-location entry from the external source
925    // yet, do so now.
926    if (ExternalSLocEntries &&
927        ID < SLocEntryLoaded.size() &&
928        !SLocEntryLoaded[ID] &&
929        ExternalSLocEntries->ReadSLocEntry(ID) &&
930        Invalid)
931      *Invalid = true;
932
933    return SLocEntryTable[ID];
934  }
935
936  const SrcMgr::SLocEntry &getSLocEntry(FileID FID, bool *Invalid = 0) const {
937    return getSLocEntry(FID.ID, Invalid);
938  }
939
940  unsigned getNextOffset() const { return NextOffset; }
941
942  /// \brief Preallocate some number of source location entries, which
943  /// will be loaded as needed from the given external source.
944  void PreallocateSLocEntries(ExternalSLocEntrySource *Source,
945                              unsigned NumSLocEntries,
946                              unsigned NextOffset);
947
948  /// \brief Clear out any preallocated source location entries that
949  /// haven't already been loaded.
950  void ClearPreallocatedSLocEntries();
951
952private:
953  const llvm::MemoryBuffer *getFakeBufferForRecovery() const;
954
955  /// isOffsetInFileID - Return true if the specified FileID contains the
956  /// specified SourceLocation offset.  This is a very hot method.
957  inline bool isOffsetInFileID(FileID FID, unsigned SLocOffset) const {
958    const SrcMgr::SLocEntry &Entry = getSLocEntry(FID);
959    // If the entry is after the offset, it can't contain it.
960    if (SLocOffset < Entry.getOffset()) return false;
961
962    // If this is the last entry than it does.  Otherwise, the entry after it
963    // has to not include it.
964    if (FID.ID+1 == SLocEntryTable.size()) return true;
965
966    return SLocOffset < getSLocEntry(FileID::get(FID.ID+1)).getOffset();
967  }
968
969  /// createFileID - Create a new fileID for the specified ContentCache and
970  ///  include position.  This works regardless of whether the ContentCache
971  ///  corresponds to a file or some other input source.
972  FileID createFileID(const SrcMgr::ContentCache* File,
973                      SourceLocation IncludePos,
974                      SrcMgr::CharacteristicKind DirCharacter,
975                      unsigned PreallocatedID = 0,
976                      unsigned Offset = 0);
977
978  const SrcMgr::ContentCache *
979    getOrCreateContentCache(const FileEntry *SourceFile);
980
981  /// createMemBufferContentCache - Create a new ContentCache for the specified
982  ///  memory buffer.
983  const SrcMgr::ContentCache*
984  createMemBufferContentCache(const llvm::MemoryBuffer *Buf);
985
986  FileID getFileIDSlow(unsigned SLocOffset) const;
987
988  SourceLocation getInstantiationLocSlowCase(SourceLocation Loc) const;
989  SourceLocation getSpellingLocSlowCase(SourceLocation Loc) const;
990
991  std::pair<FileID, unsigned>
992  getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
993                                        unsigned Offset) const;
994  std::pair<FileID, unsigned>
995  getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
996                                   unsigned Offset) const;
997};
998
999
1000}  // end namespace clang
1001
1002#endif
1003