SourceManager.h revision 642116259e8df6286063a17361c20e95b5017a0a
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_SOURCEMANAGER_H
15#define LLVM_CLANG_SOURCEMANAGER_H
16
17#include "clang/Basic/LLVM.h"
18#include "clang/Basic/SourceLocation.h"
19#include "llvm/Support/Allocator.h"
20#include "llvm/Support/DataTypes.h"
21#include "llvm/ADT/PointerIntPair.h"
22#include "llvm/ADT/PointerUnion.h"
23#include "llvm/ADT/IntrusiveRefCntPtr.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/Support/MemoryBuffer.h"
26#include <vector>
27#include <cassert>
28
29namespace clang {
30
31class Diagnostic;
32class SourceManager;
33class FileManager;
34class FileEntry;
35class LineTableInfo;
36class LangOptions;
37
38/// SrcMgr - Public enums and private classes that are part of the
39/// SourceManager implementation.
40///
41namespace SrcMgr {
42  /// CharacteristicKind - This is used to represent whether a file or directory
43  /// holds normal user code, system code, or system code which is implicitly
44  /// 'extern "C"' in C++ mode.  Entire directories can be tagged with this
45  /// (this is maintained by DirectoryLookup and friends) as can specific
46  /// FileInfos when a #pragma system_header is seen or various other cases.
47  ///
48  enum CharacteristicKind {
49    C_User, C_System, C_ExternCSystem
50  };
51
52  /// ContentCache - One instance of this struct is kept for every file
53  /// loaded or used.  This object owns the MemoryBuffer object.
54  class ContentCache {
55    enum CCFlags {
56      /// \brief Whether the buffer is invalid.
57      InvalidFlag = 0x01,
58      /// \brief Whether the buffer should not be freed on destruction.
59      DoNotFreeFlag = 0x02
60    };
61
62    /// Buffer - The actual buffer containing the characters from the input
63    /// file.  This is owned by the ContentCache object.
64    /// The bits indicate indicates whether the buffer is invalid.
65    mutable llvm::PointerIntPair<const llvm::MemoryBuffer *, 2> Buffer;
66
67  public:
68    /// Reference to the file entry representing this ContentCache.
69    /// This reference does not own the FileEntry object.
70    /// It is possible for this to be NULL if
71    /// the ContentCache encapsulates an imaginary text buffer.
72    const FileEntry *OrigEntry;
73
74    /// \brief References the file which the contents were actually loaded from.
75    /// Can be different from 'Entry' if we overridden the contents of one file
76    /// with the contents of another file.
77    const FileEntry *ContentsEntry;
78
79    /// SourceLineCache - A bump pointer allocated array of offsets for each
80    /// source line.  This is lazily computed.  This is owned by the
81    /// SourceManager BumpPointerAllocator object.
82    unsigned *SourceLineCache;
83
84    /// NumLines - The number of lines in this ContentCache.  This is only valid
85    /// if SourceLineCache is non-null.
86    unsigned NumLines;
87
88    /// getBuffer - Returns the memory buffer for the associated content.
89    ///
90    /// \param Diag Object through which diagnostics will be emitted if the
91    /// buffer cannot be retrieved.
92    ///
93    /// \param Loc If specified, is the location that invalid file diagnostics
94    ///     will be emitted at.
95    ///
96    /// \param Invalid If non-NULL, will be set \c true if an error occurred.
97    const llvm::MemoryBuffer *getBuffer(Diagnostic &Diag,
98                                        const SourceManager &SM,
99                                        SourceLocation Loc = SourceLocation(),
100                                        bool *Invalid = 0) const;
101
102    /// getSize - Returns the size of the content encapsulated by this
103    ///  ContentCache. This can be the size of the source file or the size of an
104    ///  arbitrary scratch buffer.  If the ContentCache encapsulates a source
105    ///  file this size is retrieved from the file's FileEntry.
106    unsigned getSize() const;
107
108    /// getSizeBytesMapped - Returns the number of bytes actually mapped for
109    ///  this ContentCache.  This can be 0 if the MemBuffer was not actually
110    ///  instantiated.
111    unsigned getSizeBytesMapped() const;
112
113    /// Returns the kind of memory used to back the memory buffer for
114    /// this content cache.  This is used for performance analysis.
115    llvm::MemoryBuffer::BufferKind getMemoryBufferKind() const;
116
117    void setBuffer(const llvm::MemoryBuffer *B) {
118      assert(!Buffer.getPointer() && "MemoryBuffer already set.");
119      Buffer.setPointer(B);
120      Buffer.setInt(false);
121    }
122
123    /// \brief Get the underlying buffer, returning NULL if the buffer is not
124    /// yet available.
125    const llvm::MemoryBuffer *getRawBuffer() const {
126      return Buffer.getPointer();
127    }
128
129    /// \brief Replace the existing buffer (which will be deleted)
130    /// with the given buffer.
131    void replaceBuffer(const llvm::MemoryBuffer *B, bool DoNotFree = false);
132
133    /// \brief Determine whether the buffer itself is invalid.
134    bool isBufferInvalid() const {
135      return Buffer.getInt() & InvalidFlag;
136    }
137
138    /// \brief Determine whether the buffer should be freed.
139    bool shouldFreeBuffer() const {
140      return (Buffer.getInt() & DoNotFreeFlag) == 0;
141    }
142
143    ContentCache(const FileEntry *Ent = 0)
144      : Buffer(0, false), OrigEntry(Ent), ContentsEntry(Ent),
145        SourceLineCache(0), NumLines(0) {}
146
147    ContentCache(const FileEntry *Ent, const FileEntry *contentEnt)
148      : Buffer(0, false), OrigEntry(Ent), ContentsEntry(contentEnt),
149        SourceLineCache(0), NumLines(0) {}
150
151    ~ContentCache();
152
153    /// The copy ctor does not allow copies where source object has either
154    ///  a non-NULL Buffer or SourceLineCache.  Ownership of allocated memory
155    ///  is not transferred, so this is a logical error.
156    ContentCache(const ContentCache &RHS)
157      : Buffer(0, false), SourceLineCache(0)
158    {
159      OrigEntry = RHS.OrigEntry;
160      ContentsEntry = RHS.ContentsEntry;
161
162      assert (RHS.Buffer.getPointer() == 0 && RHS.SourceLineCache == 0
163              && "Passed ContentCache object cannot own a buffer.");
164
165      NumLines = RHS.NumLines;
166    }
167
168  private:
169    // Disable assignments.
170    ContentCache &operator=(const ContentCache& RHS);
171  };
172
173  /// FileInfo - Information about a FileID, basically just the logical file
174  /// that it represents and include stack information.
175  ///
176  /// Each FileInfo has include stack information, indicating where it came
177  /// from.  This information encodes the #include chain that a token was
178  /// instantiated from.  The main include file has an invalid IncludeLoc.
179  ///
180  /// FileInfos contain a "ContentCache *", with the contents of the file.
181  ///
182  class FileInfo {
183    /// IncludeLoc - The location of the #include that brought in this file.
184    /// This is an invalid SLOC for the main file (top of the #include chain).
185    unsigned IncludeLoc;  // Really a SourceLocation
186
187    /// Data - This contains the ContentCache* and the bits indicating the
188    /// characteristic of the file and whether it has #line info, all bitmangled
189    /// together.
190    uintptr_t Data;
191  public:
192    /// get - Return a FileInfo object.
193    static FileInfo get(SourceLocation IL, const ContentCache *Con,
194                        CharacteristicKind FileCharacter) {
195      FileInfo X;
196      X.IncludeLoc = IL.getRawEncoding();
197      X.Data = (uintptr_t)Con;
198      assert((X.Data & 7) == 0 &&"ContentCache pointer insufficiently aligned");
199      assert((unsigned)FileCharacter < 4 && "invalid file character");
200      X.Data |= (unsigned)FileCharacter;
201      return X;
202    }
203
204    SourceLocation getIncludeLoc() const {
205      return SourceLocation::getFromRawEncoding(IncludeLoc);
206    }
207    const ContentCache* getContentCache() const {
208      return reinterpret_cast<const ContentCache*>(Data & ~7UL);
209    }
210
211    /// getCharacteristic - Return whether this is a system header or not.
212    CharacteristicKind getFileCharacteristic() const {
213      return (CharacteristicKind)(Data & 3);
214    }
215
216    /// hasLineDirectives - Return true if this FileID has #line directives in
217    /// it.
218    bool hasLineDirectives() const { return (Data & 4) != 0; }
219
220    /// setHasLineDirectives - Set the flag that indicates that this FileID has
221    /// line table entries associated with it.
222    void setHasLineDirectives() {
223      Data |= 4;
224    }
225  };
226
227  /// InstantiationInfo - Each InstantiationInfo encodes the Instantiation
228  /// location - where the token was ultimately instantiated, and the
229  /// SpellingLoc - where the actual character data for the token came from.
230  class InstantiationInfo {
231     // Really these are all SourceLocations.
232
233    /// SpellingLoc - Where the spelling for the token can be found.
234    unsigned SpellingLoc;
235
236    /// InstantiationLocStart/InstantiationLocEnd - In a macro expansion, these
237    /// indicate the start and end of the instantiation.  In object-like macros,
238    /// these will be the same.  In a function-like macro instantiation, the
239    /// start will be the identifier and the end will be the ')'.  Finally, in
240    /// macro-argument instantitions, the end will be 'SourceLocation()', an
241    /// invalid location.
242    unsigned InstantiationLocStart, InstantiationLocEnd;
243
244  public:
245    SourceLocation getSpellingLoc() const {
246      return SourceLocation::getFromRawEncoding(SpellingLoc);
247    }
248    SourceLocation getInstantiationLocStart() const {
249      return SourceLocation::getFromRawEncoding(InstantiationLocStart);
250    }
251    SourceLocation getInstantiationLocEnd() const {
252      SourceLocation EndLoc =
253        SourceLocation::getFromRawEncoding(InstantiationLocEnd);
254      return EndLoc.isInvalid() ? getInstantiationLocStart() : EndLoc;
255    }
256
257    std::pair<SourceLocation,SourceLocation> getInstantiationLocRange() const {
258      return std::make_pair(getInstantiationLocStart(),
259                            getInstantiationLocEnd());
260    }
261
262    bool isMacroArgInstantiation() const {
263      // Note that this needs to return false for default constructed objects.
264      return getInstantiationLocStart().isValid() &&
265        SourceLocation::getFromRawEncoding(InstantiationLocEnd).isInvalid();
266    }
267
268    /// create - Return a InstantiationInfo for an expansion. ILStart and
269    /// ILEnd specify the instantiation range (where the macro is expanded),
270    /// and SL specifies the spelling location (where the characters from the
271    /// token come from). All three can refer to normal File SLocs or
272    /// instantiation locations.
273    static InstantiationInfo create(SourceLocation SL,
274                                    SourceLocation ILStart,
275                                    SourceLocation ILEnd) {
276      InstantiationInfo X;
277      X.SpellingLoc = SL.getRawEncoding();
278      X.InstantiationLocStart = ILStart.getRawEncoding();
279      X.InstantiationLocEnd = ILEnd.getRawEncoding();
280      return X;
281    }
282
283    /// createForMacroArg - Return a special InstantiationInfo for the
284    /// expansion of a macro argument into a function-like macro's body. IL
285    /// specifies the instantiation location (where the macro is expanded).
286    /// This doesn't need to be a range because a macro is always instantiated
287    /// at a macro parameter reference, and macro parameters are always exactly
288    /// one token. SL specifies the spelling location (where the characters
289    /// from the token come from). IL and SL can both refer to normal File
290    /// SLocs or instantiation locations.
291    ///
292    /// Given the code:
293    /// \code
294    ///   #define F(x) f(x)
295    ///   F(42);
296    /// \endcode
297    ///
298    /// When expanding '\c F(42)', the '\c x' would call this with an SL
299    /// pointing at '\c 42' anad an IL pointing at its location in the
300    /// definition of '\c F'.
301    static InstantiationInfo createForMacroArg(SourceLocation SL,
302                                               SourceLocation IL) {
303      // We store an intentionally invalid source location for the end of the
304      // instantiation range to mark that this is a macro argument instantation
305      // rather than a normal one.
306      return create(SL, IL, SourceLocation());
307    }
308  };
309
310  /// SLocEntry - This is a discriminated union of FileInfo and
311  /// InstantiationInfo.  SourceManager keeps an array of these objects, and
312  /// they are uniquely identified by the FileID datatype.
313  class SLocEntry {
314    unsigned Offset;   // low bit is set for instantiation info.
315    union {
316      FileInfo File;
317      InstantiationInfo Instantiation;
318    };
319  public:
320    unsigned getOffset() const { return Offset >> 1; }
321
322    bool isInstantiation() const { return Offset & 1; }
323    bool isFile() const { return !isInstantiation(); }
324
325    const FileInfo &getFile() const {
326      assert(isFile() && "Not a file SLocEntry!");
327      return File;
328    }
329
330    const InstantiationInfo &getInstantiation() const {
331      assert(isInstantiation() && "Not an instantiation SLocEntry!");
332      return Instantiation;
333    }
334
335    static SLocEntry get(unsigned Offset, const FileInfo &FI) {
336      SLocEntry E;
337      E.Offset = Offset << 1;
338      E.File = FI;
339      return E;
340    }
341
342    static SLocEntry get(unsigned Offset, const InstantiationInfo &II) {
343      SLocEntry E;
344      E.Offset = (Offset << 1) | 1;
345      E.Instantiation = II;
346      return E;
347    }
348  };
349}  // end SrcMgr namespace.
350
351/// \brief External source of source location entries.
352class ExternalSLocEntrySource {
353public:
354  virtual ~ExternalSLocEntrySource();
355
356  /// \brief Read the source location entry with index ID, which will always be
357  /// less than -1.
358  ///
359  /// \returns true if an error occurred that prevented the source-location
360  /// entry from being loaded.
361  virtual bool ReadSLocEntry(int ID) = 0;
362};
363
364
365/// IsBeforeInTranslationUnitCache - This class holds the cache used by
366/// isBeforeInTranslationUnit.  The cache structure is complex enough to be
367/// worth breaking out of SourceManager.
368class IsBeforeInTranslationUnitCache {
369  /// L/R QueryFID - These are the FID's of the cached query.  If these match up
370  /// with a subsequent query, the result can be reused.
371  FileID LQueryFID, RQueryFID;
372
373  /// CommonFID - This is the file found in common between the two #include
374  /// traces.  It is the nearest common ancestor of the #include tree.
375  FileID CommonFID;
376
377  /// L/R CommonOffset - This is the offset of the previous query in CommonFID.
378  /// Usually, this represents the location of the #include for QueryFID, but if
379  /// LQueryFID is a parent of RQueryFID (or vise versa) then these can be a
380  /// random token in the parent.
381  unsigned LCommonOffset, RCommonOffset;
382public:
383
384  /// isCacheValid - Return true if the currently cached values match up with
385  /// the specified LHS/RHS query.  If not, we can't use the cache.
386  bool isCacheValid(FileID LHS, FileID RHS) const {
387    return LQueryFID == LHS && RQueryFID == RHS;
388  }
389
390  /// getCachedResult - If the cache is valid, compute the result given the
391  /// specified offsets in the LHS/RHS FID's.
392  bool getCachedResult(unsigned LOffset, unsigned ROffset) const {
393    // If one of the query files is the common file, use the offset.  Otherwise,
394    // use the #include loc in the common file.
395    if (LQueryFID != CommonFID) LOffset = LCommonOffset;
396    if (RQueryFID != CommonFID) ROffset = RCommonOffset;
397    return LOffset < ROffset;
398  }
399
400  // Set up a new query.
401  void setQueryFIDs(FileID LHS, FileID RHS) {
402    LQueryFID = LHS;
403    RQueryFID = RHS;
404  }
405
406  void setCommonLoc(FileID commonFID, unsigned lCommonOffset,
407                    unsigned rCommonOffset) {
408    CommonFID = commonFID;
409    LCommonOffset = lCommonOffset;
410    RCommonOffset = rCommonOffset;
411  }
412
413};
414
415/// \brief This class handles loading and caching of source files into memory.
416///
417/// This object owns the MemoryBuffer objects for all of the loaded
418/// files and assigns unique FileID's for each unique #include chain.
419///
420/// The SourceManager can be queried for information about SourceLocation
421/// objects, turning them into either spelling or instantiation locations.
422/// Spelling locations represent where the bytes corresponding to a token came
423/// from and instantiation locations represent where the location is in the
424/// user's view.  In the case of a macro expansion, for example, the spelling
425/// location indicates  where the expanded token came from and the instantiation
426/// location specifies where it was expanded.
427class SourceManager : public llvm::RefCountedBase<SourceManager> {
428  /// \brief Diagnostic object.
429  Diagnostic &Diag;
430
431  FileManager &FileMgr;
432
433  mutable llvm::BumpPtrAllocator ContentCacheAlloc;
434
435  /// FileInfos - Memoized information about all of the files tracked by this
436  /// SourceManager.  This set allows us to merge ContentCache entries based
437  /// on their FileEntry*.  All ContentCache objects will thus have unique,
438  /// non-null, FileEntry pointers.
439  llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*> FileInfos;
440
441  /// \brief True if the ContentCache for files that are overriden by other
442  /// files, should report the original file name. Defaults to true.
443  bool OverridenFilesKeepOriginalName;
444
445  /// \brief Files that have been overriden with the contents from another file.
446  llvm::DenseMap<const FileEntry *, const FileEntry *> OverriddenFiles;
447
448  /// MemBufferInfos - Information about various memory buffers that we have
449  /// read in.  All FileEntry* within the stored ContentCache objects are NULL,
450  /// as they do not refer to a file.
451  std::vector<SrcMgr::ContentCache*> MemBufferInfos;
452
453  /// \brief The table of SLocEntries that are local to this module.
454  ///
455  /// Positive FileIDs are indexes into this table. Entry 0 indicates an invalid
456  /// instantiation.
457  std::vector<SrcMgr::SLocEntry> LocalSLocEntryTable;
458
459  /// \brief The table of SLocEntries that are loaded from other modules.
460  ///
461  /// Negative FileIDs are indexes into this table. To get from ID to an index,
462  /// use (-ID - 2).
463  std::vector<SrcMgr::SLocEntry> LoadedSLocEntryTable;
464
465  /// \brief The starting offset of the next local SLocEntry.
466  ///
467  /// This is LocalSLocEntryTable.back().Offset + the size of that entry.
468  unsigned NextLocalOffset;
469
470  /// \brief The starting offset of the latest batch of loaded SLocEntries.
471  ///
472  /// This is LoadedSLocEntryTable.back().Offset, except that that entry might
473  /// not have been loaded, so that value would be unknown.
474  unsigned CurrentLoadedOffset;
475
476  /// \brief A bitmap that indicates whether the entries of LoadedSLocEntryTable
477  /// have already been loaded from the external source.
478  ///
479  /// Same indexing as LoadedSLocEntryTable.
480  std::vector<bool> SLocEntryLoaded;
481
482  /// \brief An external source for source location entries.
483  ExternalSLocEntrySource *ExternalSLocEntries;
484
485  /// LastFileIDLookup - This is a one-entry cache to speed up getFileID.
486  /// LastFileIDLookup records the last FileID looked up or created, because it
487  /// is very common to look up many tokens from the same file.
488  mutable FileID LastFileIDLookup;
489
490  /// LineTable - This holds information for #line directives.  It is referenced
491  /// by indices from SLocEntryTable.
492  LineTableInfo *LineTable;
493
494  /// LastLineNo - These ivars serve as a cache used in the getLineNumber
495  /// method which is used to speedup getLineNumber calls to nearby locations.
496  mutable FileID LastLineNoFileIDQuery;
497  mutable SrcMgr::ContentCache *LastLineNoContentCache;
498  mutable unsigned LastLineNoFilePos;
499  mutable unsigned LastLineNoResult;
500
501  /// MainFileID - The file ID for the main source file of the translation unit.
502  FileID MainFileID;
503
504  // Statistics for -print-stats.
505  mutable unsigned NumLinearScans, NumBinaryProbes;
506
507  // Cache results for the isBeforeInTranslationUnit method.
508  mutable IsBeforeInTranslationUnitCache IsBeforeInTUCache;
509
510  // Cache for the "fake" buffer used for error-recovery purposes.
511  mutable llvm::MemoryBuffer *FakeBufferForRecovery;
512
513  // SourceManager doesn't support copy construction.
514  explicit SourceManager(const SourceManager&);
515  void operator=(const SourceManager&);
516public:
517  SourceManager(Diagnostic &Diag, FileManager &FileMgr);
518  ~SourceManager();
519
520  void clearIDTables();
521
522  Diagnostic &getDiagnostics() const { return Diag; }
523
524  FileManager &getFileManager() const { return FileMgr; }
525
526  /// \brief Set true if the SourceManager should report the original file name
527  /// for contents of files that were overriden by other files.Defaults to true.
528  void setOverridenFilesKeepOriginalName(bool value) {
529    OverridenFilesKeepOriginalName = value;
530  }
531
532  /// createMainFileIDForMembuffer - Create the FileID for a memory buffer
533  ///  that will represent the FileID for the main source.  One example
534  ///  of when this would be used is when the main source is read from STDIN.
535  FileID createMainFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
536    assert(MainFileID.isInvalid() && "MainFileID already set!");
537    MainFileID = createFileIDForMemBuffer(Buffer);
538    return MainFileID;
539  }
540
541  //===--------------------------------------------------------------------===//
542  // MainFileID creation and querying methods.
543  //===--------------------------------------------------------------------===//
544
545  /// getMainFileID - Returns the FileID of the main source file.
546  FileID getMainFileID() const { return MainFileID; }
547
548  /// createMainFileID - Create the FileID for the main source file.
549  FileID createMainFileID(const FileEntry *SourceFile) {
550    assert(MainFileID.isInvalid() && "MainFileID already set!");
551    MainFileID = createFileID(SourceFile, SourceLocation(), SrcMgr::C_User);
552    return MainFileID;
553  }
554
555  /// \brief Set the file ID for the precompiled preamble, which is also the
556  /// main file.
557  void SetPreambleFileID(FileID Preamble) {
558    assert(MainFileID.isInvalid() && "MainFileID already set!");
559    MainFileID = Preamble;
560  }
561
562  //===--------------------------------------------------------------------===//
563  // Methods to create new FileID's and instantiations.
564  //===--------------------------------------------------------------------===//
565
566  /// createFileID - Create a new FileID that represents the specified file
567  /// being #included from the specified IncludePosition.  This translates NULL
568  /// into standard input.
569  FileID createFileID(const FileEntry *SourceFile, SourceLocation IncludePos,
570                      SrcMgr::CharacteristicKind FileCharacter,
571                      int LoadedID = 0, unsigned LoadedOffset = 0) {
572    const SrcMgr::ContentCache *IR = getOrCreateContentCache(SourceFile);
573    assert(IR && "getOrCreateContentCache() cannot return NULL");
574    return createFileID(IR, IncludePos, FileCharacter, LoadedID, LoadedOffset);
575  }
576
577  /// createFileIDForMemBuffer - Create a new FileID that represents the
578  /// specified memory buffer.  This does no caching of the buffer and takes
579  /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once.
580  FileID createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer,
581                                  int LoadedID = 0, unsigned LoadedOffset = 0) {
582    return createFileID(createMemBufferContentCache(Buffer), SourceLocation(),
583                        SrcMgr::C_User, LoadedID, LoadedOffset);
584  }
585
586  /// createMacroArgInstantiationLoc - Return a new SourceLocation that encodes
587  /// the fact that a token from SpellingLoc should actually be referenced from
588  /// InstantiationLoc, and that it represents the instantiation of a macro
589  /// argument into the function-like macro body.
590  SourceLocation createMacroArgInstantiationLoc(SourceLocation Loc,
591                                                SourceLocation InstantiationLoc,
592                                                unsigned TokLength);
593
594  /// createInstantiationLoc - Return a new SourceLocation that encodes the fact
595  /// that a token from SpellingLoc should actually be referenced from
596  /// InstantiationLoc.
597  SourceLocation createInstantiationLoc(SourceLocation Loc,
598                                        SourceLocation InstantiationLocStart,
599                                        SourceLocation InstantiationLocEnd,
600                                        unsigned TokLength,
601                                        int LoadedID = 0,
602                                        unsigned LoadedOffset = 0);
603
604  /// \brief Retrieve the memory buffer associated with the given file.
605  ///
606  /// \param Invalid If non-NULL, will be set \c true if an error
607  /// occurs while retrieving the memory buffer.
608  const llvm::MemoryBuffer *getMemoryBufferForFile(const FileEntry *File,
609                                                   bool *Invalid = 0);
610
611  /// \brief Override the contents of the given source file by providing an
612  /// already-allocated buffer.
613  ///
614  /// \param SourceFile the source file whose contents will be overriden.
615  ///
616  /// \param Buffer the memory buffer whose contents will be used as the
617  /// data in the given source file.
618  ///
619  /// \param DoNotFree If true, then the buffer will not be freed when the
620  /// source manager is destroyed.
621  void overrideFileContents(const FileEntry *SourceFile,
622                            const llvm::MemoryBuffer *Buffer,
623                            bool DoNotFree = false);
624
625  /// \brief Override the the given source file with another one.
626  ///
627  /// \param SourceFile the source file which will be overriden.
628  ///
629  /// \param NewFile the file whose contents will be used as the
630  /// data instead of the contents of the given source file.
631  void overrideFileContents(const FileEntry *SourceFile,
632                            const FileEntry *NewFile);
633
634  //===--------------------------------------------------------------------===//
635  // FileID manipulation methods.
636  //===--------------------------------------------------------------------===//
637
638  /// getBuffer - Return the buffer for the specified FileID. If there is an
639  /// error opening this buffer the first time, this manufactures a temporary
640  /// buffer and returns a non-empty error string.
641  const llvm::MemoryBuffer *getBuffer(FileID FID, SourceLocation Loc,
642                                      bool *Invalid = 0) const {
643    bool MyInvalid = false;
644    const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &MyInvalid);
645    if (MyInvalid || !Entry.isFile()) {
646      if (Invalid)
647        *Invalid = true;
648
649      return getFakeBufferForRecovery();
650    }
651
652    return Entry.getFile().getContentCache()->getBuffer(Diag, *this, Loc,
653                                                        Invalid);
654  }
655
656  const llvm::MemoryBuffer *getBuffer(FileID FID, bool *Invalid = 0) const {
657    bool MyInvalid = false;
658    const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &MyInvalid);
659    if (MyInvalid || !Entry.isFile()) {
660      if (Invalid)
661        *Invalid = true;
662
663      return getFakeBufferForRecovery();
664    }
665
666    return Entry.getFile().getContentCache()->getBuffer(Diag, *this,
667                                                        SourceLocation(),
668                                                        Invalid);
669  }
670
671  /// getFileEntryForID - Returns the FileEntry record for the provided FileID.
672  const FileEntry *getFileEntryForID(FileID FID) const {
673    bool MyInvalid = false;
674    const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &MyInvalid);
675    if (MyInvalid || !Entry.isFile())
676      return 0;
677
678    return Entry.getFile().getContentCache()->OrigEntry;
679  }
680
681  /// Returns the FileEntry record for the provided SLocEntry.
682  const FileEntry *getFileEntryForSLocEntry(const SrcMgr::SLocEntry &sloc) const
683  {
684    return sloc.getFile().getContentCache()->OrigEntry;
685  }
686
687  /// getBufferData - Return a StringRef to the source buffer data for the
688  /// specified FileID.
689  ///
690  /// \param FID The file ID whose contents will be returned.
691  /// \param Invalid If non-NULL, will be set true if an error occurred.
692  StringRef getBufferData(FileID FID, bool *Invalid = 0) const;
693
694
695  //===--------------------------------------------------------------------===//
696  // SourceLocation manipulation methods.
697  //===--------------------------------------------------------------------===//
698
699  /// getFileID - Return the FileID for a SourceLocation.  This is a very
700  /// hot method that is used for all SourceManager queries that start with a
701  /// SourceLocation object.  It is responsible for finding the entry in
702  /// SLocEntryTable which contains the specified location.
703  ///
704  FileID getFileID(SourceLocation SpellingLoc) const {
705    unsigned SLocOffset = SpellingLoc.getOffset();
706
707    // If our one-entry cache covers this offset, just return it.
708    if (isOffsetInFileID(LastFileIDLookup, SLocOffset))
709      return LastFileIDLookup;
710
711    return getFileIDSlow(SLocOffset);
712  }
713
714  /// getLocForStartOfFile - Return the source location corresponding to the
715  /// first byte of the specified file.
716  SourceLocation getLocForStartOfFile(FileID FID) const {
717    bool Invalid = false;
718    const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &Invalid);
719    if (Invalid || !Entry.isFile())
720      return SourceLocation();
721
722    unsigned FileOffset = Entry.getOffset();
723    return SourceLocation::getFileLoc(FileOffset);
724  }
725
726  /// getExpansionLoc - Given a SourceLocation object, return the expansion
727  /// location referenced by the ID.
728  SourceLocation getExpansionLoc(SourceLocation Loc) const {
729    // Handle the non-mapped case inline, defer to out of line code to handle
730    // expansions.
731    if (Loc.isFileID()) return Loc;
732    return getExpansionLocSlowCase(Loc);
733  }
734
735  /// getImmediateExpansionRange - Loc is required to be an expansion location.
736  /// Return the start/end of the expansion information.
737  std::pair<SourceLocation,SourceLocation>
738  getImmediateExpansionRange(SourceLocation Loc) const;
739
740  /// getExpansionRange - Given a SourceLocation object, return the range of
741  /// tokens covered by the expansion the ultimate file.
742  std::pair<SourceLocation,SourceLocation>
743  getExpansionRange(SourceLocation Loc) const;
744
745
746  /// getSpellingLoc - Given a SourceLocation object, return the spelling
747  /// location referenced by the ID.  This is the place where the characters
748  /// that make up the lexed token can be found.
749  SourceLocation getSpellingLoc(SourceLocation Loc) const {
750    // Handle the non-mapped case inline, defer to out of line code to handle
751    // instantiations.
752    if (Loc.isFileID()) return Loc;
753    return getSpellingLocSlowCase(Loc);
754  }
755
756  /// getImmediateSpellingLoc - Given a SourceLocation object, return the
757  /// spelling location referenced by the ID.  This is the first level down
758  /// towards the place where the characters that make up the lexed token can be
759  /// found.  This should not generally be used by clients.
760  SourceLocation getImmediateSpellingLoc(SourceLocation Loc) const;
761
762  /// getDecomposedLoc - Decompose the specified location into a raw FileID +
763  /// Offset pair.  The first element is the FileID, the second is the
764  /// offset from the start of the buffer of the location.
765  std::pair<FileID, unsigned> getDecomposedLoc(SourceLocation Loc) const {
766    FileID FID = getFileID(Loc);
767    return std::make_pair(FID, Loc.getOffset()-getSLocEntry(FID).getOffset());
768  }
769
770  /// getDecomposedExpansionLoc - Decompose the specified location into a
771  /// raw FileID + Offset pair.  If the location is an instantiation record,
772  /// walk through it until we find the final location instantiated.
773  std::pair<FileID, unsigned>
774  getDecomposedExpansionLoc(SourceLocation Loc) const {
775    FileID FID = getFileID(Loc);
776    const SrcMgr::SLocEntry *E = &getSLocEntry(FID);
777
778    unsigned Offset = Loc.getOffset()-E->getOffset();
779    if (Loc.isFileID())
780      return std::make_pair(FID, Offset);
781
782    return getDecomposedExpansionLocSlowCase(E);
783  }
784
785  /// getDecomposedSpellingLoc - Decompose the specified location into a raw
786  /// FileID + Offset pair.  If the location is an instantiation record, walk
787  /// through it until we find its spelling record.
788  std::pair<FileID, unsigned>
789  getDecomposedSpellingLoc(SourceLocation Loc) const {
790    FileID FID = getFileID(Loc);
791    const SrcMgr::SLocEntry *E = &getSLocEntry(FID);
792
793    unsigned Offset = Loc.getOffset()-E->getOffset();
794    if (Loc.isFileID())
795      return std::make_pair(FID, Offset);
796    return getDecomposedSpellingLocSlowCase(E, Offset);
797  }
798
799  /// getFileOffset - This method returns the offset from the start
800  /// of the file that the specified SourceLocation represents. This is not very
801  /// meaningful for a macro ID.
802  unsigned getFileOffset(SourceLocation SpellingLoc) const {
803    return getDecomposedLoc(SpellingLoc).second;
804  }
805
806  /// isMacroArgInstantiation - This method tests whether the given source
807  /// location represents a macro argument's instantiation into the
808  /// function-like macro definition. Such source locations only appear inside
809  /// of the instantiation locations representing where a particular
810  /// function-like macro was expanded.
811  bool isMacroArgInstantiation(SourceLocation Loc) const;
812
813  //===--------------------------------------------------------------------===//
814  // Queries about the code at a SourceLocation.
815  //===--------------------------------------------------------------------===//
816
817  /// getCharacterData - Return a pointer to the start of the specified location
818  /// in the appropriate spelling MemoryBuffer.
819  ///
820  /// \param Invalid If non-NULL, will be set \c true if an error occurs.
821  const char *getCharacterData(SourceLocation SL, bool *Invalid = 0) const;
822
823  /// getColumnNumber - Return the column # for the specified file position.
824  /// This is significantly cheaper to compute than the line number.  This
825  /// returns zero if the column number isn't known.  This may only be called on
826  /// a file sloc, so you must choose a spelling or instantiation location
827  /// before calling this method.
828  unsigned getColumnNumber(FileID FID, unsigned FilePos,
829                           bool *Invalid = 0) const;
830  unsigned getSpellingColumnNumber(SourceLocation Loc, bool *Invalid = 0) const;
831  unsigned getExpansionColumnNumber(SourceLocation Loc,
832                                    bool *Invalid = 0) const;
833  unsigned getPresumedColumnNumber(SourceLocation Loc, bool *Invalid = 0) const;
834
835
836  /// getLineNumber - Given a SourceLocation, return the spelling line number
837  /// for the position indicated.  This requires building and caching a table of
838  /// line offsets for the MemoryBuffer, so this is not cheap: use only when
839  /// about to emit a diagnostic.
840  unsigned getLineNumber(FileID FID, unsigned FilePos, bool *Invalid = 0) const;
841  unsigned getSpellingLineNumber(SourceLocation Loc, bool *Invalid = 0) const;
842  unsigned getExpansionLineNumber(SourceLocation Loc, bool *Invalid = 0) const;
843  unsigned getPresumedLineNumber(SourceLocation Loc, bool *Invalid = 0) const;
844
845  /// Return the filename or buffer identifier of the buffer the location is in.
846  /// Note that this name does not respect #line directives.  Use getPresumedLoc
847  /// for normal clients.
848  const char *getBufferName(SourceLocation Loc, bool *Invalid = 0) const;
849
850  /// getFileCharacteristic - return the file characteristic of the specified
851  /// source location, indicating whether this is a normal file, a system
852  /// header, or an "implicit extern C" system header.
853  ///
854  /// This state can be modified with flags on GNU linemarker directives like:
855  ///   # 4 "foo.h" 3
856  /// which changes all source locations in the current file after that to be
857  /// considered to be from a system header.
858  SrcMgr::CharacteristicKind getFileCharacteristic(SourceLocation Loc) const;
859
860  /// getPresumedLoc - This method returns the "presumed" location of a
861  /// SourceLocation specifies.  A "presumed location" can be modified by #line
862  /// or GNU line marker directives.  This provides a view on the data that a
863  /// user should see in diagnostics, for example.
864  ///
865  /// Note that a presumed location is always given as the instantiation point
866  /// of an instantiation location, not at the spelling location.
867  ///
868  /// \returns The presumed location of the specified SourceLocation. If the
869  /// presumed location cannot be calculate (e.g., because \p Loc is invalid
870  /// or the file containing \p Loc has changed on disk), returns an invalid
871  /// presumed location.
872  PresumedLoc getPresumedLoc(SourceLocation Loc) const;
873
874  /// isFromSameFile - Returns true if both SourceLocations correspond to
875  ///  the same file.
876  bool isFromSameFile(SourceLocation Loc1, SourceLocation Loc2) const {
877    return getFileID(Loc1) == getFileID(Loc2);
878  }
879
880  /// isFromMainFile - Returns true if the file of provided SourceLocation is
881  ///   the main file.
882  bool isFromMainFile(SourceLocation Loc) const {
883    return getFileID(Loc) == getMainFileID();
884  }
885
886  /// isInSystemHeader - Returns if a SourceLocation is in a system header.
887  bool isInSystemHeader(SourceLocation Loc) const {
888    return getFileCharacteristic(Loc) != SrcMgr::C_User;
889  }
890
891  /// isInExternCSystemHeader - Returns if a SourceLocation is in an "extern C"
892  /// system header.
893  bool isInExternCSystemHeader(SourceLocation Loc) const {
894    return getFileCharacteristic(Loc) == SrcMgr::C_ExternCSystem;
895  }
896
897  /// \brief Given a specific chunk of a FileID (FileID with offset+length),
898  /// returns true if \arg Loc is inside that chunk and sets relative offset
899  /// (offset of \arg Loc from beginning of chunk) to \arg relativeOffset.
900  bool isInFileID(SourceLocation Loc,
901                  FileID FID, unsigned offset, unsigned length,
902                  unsigned *relativeOffset = 0) const {
903    assert(!FID.isInvalid());
904    if (Loc.isInvalid())
905      return false;
906
907    unsigned start = getSLocEntry(FID).getOffset() + offset;
908    unsigned end = start + length;
909
910#ifndef NDEBUG
911    // Make sure offset/length describe a chunk inside the given FileID.
912    unsigned NextOffset;
913    if (FID.ID == -2)
914      NextOffset = 1U << 31U;
915    else if (FID.ID+1 == (int)LocalSLocEntryTable.size())
916      NextOffset = getNextLocalOffset();
917    else
918      NextOffset = getSLocEntryByID(FID.ID+1).getOffset();
919    assert(start < NextOffset);
920    assert(end   < NextOffset);
921#endif
922
923    if (Loc.getOffset() >= start && Loc.getOffset() < end) {
924      if (relativeOffset)
925        *relativeOffset = Loc.getOffset() - start;
926      return true;
927    }
928
929    return false;
930  }
931
932  //===--------------------------------------------------------------------===//
933  // Line Table Manipulation Routines
934  //===--------------------------------------------------------------------===//
935
936  /// getLineTableFilenameID - Return the uniqued ID for the specified filename.
937  ///
938  unsigned getLineTableFilenameID(StringRef Str);
939
940  /// AddLineNote - Add a line note to the line table for the FileID and offset
941  /// specified by Loc.  If FilenameID is -1, it is considered to be
942  /// unspecified.
943  void AddLineNote(SourceLocation Loc, unsigned LineNo, int FilenameID);
944  void AddLineNote(SourceLocation Loc, unsigned LineNo, int FilenameID,
945                   bool IsFileEntry, bool IsFileExit,
946                   bool IsSystemHeader, bool IsExternCHeader);
947
948  /// \brief Determine if the source manager has a line table.
949  bool hasLineTable() const { return LineTable != 0; }
950
951  /// \brief Retrieve the stored line table.
952  LineTableInfo &getLineTable();
953
954  //===--------------------------------------------------------------------===//
955  // Queries for performance analysis.
956  //===--------------------------------------------------------------------===//
957
958  /// Return the total amount of physical memory allocated by the
959  /// ContentCache allocator.
960  size_t getContentCacheSize() const {
961    return ContentCacheAlloc.getTotalMemory();
962  }
963
964  struct MemoryBufferSizes {
965    const size_t malloc_bytes;
966    const size_t mmap_bytes;
967
968    MemoryBufferSizes(size_t malloc_bytes, size_t mmap_bytes)
969      : malloc_bytes(malloc_bytes), mmap_bytes(mmap_bytes) {}
970  };
971
972  /// Return the amount of memory used by memory buffers, breaking down
973  /// by heap-backed versus mmap'ed memory.
974  MemoryBufferSizes getMemoryBufferSizes() const;
975
976  //===--------------------------------------------------------------------===//
977  // Other miscellaneous methods.
978  //===--------------------------------------------------------------------===//
979
980  /// \brief Get the source location for the given file:line:col triplet.
981  ///
982  /// If the source file is included multiple times, the source location will
983  /// be based upon the first inclusion.
984  SourceLocation getLocation(const FileEntry *SourceFile,
985                             unsigned Line, unsigned Col);
986
987  /// \brief Determines the order of 2 source locations in the translation unit.
988  ///
989  /// \returns true if LHS source location comes before RHS, false otherwise.
990  bool isBeforeInTranslationUnit(SourceLocation LHS, SourceLocation RHS) const;
991
992  /// \brief Determines the order of 2 source locations in the "source location
993  /// address space".
994  bool isBeforeInSourceLocationOffset(SourceLocation LHS,
995                                      SourceLocation RHS) const {
996    return isBeforeInSourceLocationOffset(LHS, RHS.getOffset());
997  }
998
999  /// \brief Determines the order of a source location and a source location
1000  /// offset in the "source location address space".
1001  ///
1002  /// Note that we always consider source locations loaded from
1003  bool isBeforeInSourceLocationOffset(SourceLocation LHS, unsigned RHS) const {
1004    unsigned LHSOffset = LHS.getOffset();
1005    bool LHSLoaded = LHSOffset >= CurrentLoadedOffset;
1006    bool RHSLoaded = RHS >= CurrentLoadedOffset;
1007    if (LHSLoaded == RHSLoaded)
1008      return LHS.getOffset() < RHS;
1009
1010    return LHSLoaded;
1011  }
1012
1013  // Iterators over FileInfos.
1014  typedef llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>
1015      ::const_iterator fileinfo_iterator;
1016  fileinfo_iterator fileinfo_begin() const { return FileInfos.begin(); }
1017  fileinfo_iterator fileinfo_end() const { return FileInfos.end(); }
1018  bool hasFileInfo(const FileEntry *File) const {
1019    return FileInfos.find(File) != FileInfos.end();
1020  }
1021
1022  /// PrintStats - Print statistics to stderr.
1023  ///
1024  void PrintStats() const;
1025
1026  /// \brief Get the number of local SLocEntries we have.
1027  unsigned local_sloc_entry_size() const { return LocalSLocEntryTable.size(); }
1028
1029  /// \brief Get a local SLocEntry. This is exposed for indexing.
1030  const SrcMgr::SLocEntry &getLocalSLocEntry(unsigned Index,
1031                                             bool *Invalid = 0) const {
1032    assert(Index < LocalSLocEntryTable.size() && "Invalid index");
1033    return LocalSLocEntryTable[Index];
1034  }
1035
1036  /// \brief Get the number of loaded SLocEntries we have.
1037  unsigned loaded_sloc_entry_size() const { return LoadedSLocEntryTable.size();}
1038
1039  /// \brief Get a loaded SLocEntry. This is exposed for indexing.
1040  const SrcMgr::SLocEntry &getLoadedSLocEntry(unsigned Index, bool *Invalid=0) const {
1041    assert(Index < LoadedSLocEntryTable.size() && "Invalid index");
1042    if (!SLocEntryLoaded[Index])
1043      ExternalSLocEntries->ReadSLocEntry(-(static_cast<int>(Index) + 2));
1044    return LoadedSLocEntryTable[Index];
1045  }
1046
1047  const SrcMgr::SLocEntry &getSLocEntry(FileID FID, bool *Invalid = 0) const {
1048    return getSLocEntryByID(FID.ID);
1049  }
1050
1051  unsigned getNextLocalOffset() const { return NextLocalOffset; }
1052
1053  void setExternalSLocEntrySource(ExternalSLocEntrySource *Source) {
1054    assert(LoadedSLocEntryTable.empty() &&
1055           "Invalidating existing loaded entries");
1056    ExternalSLocEntries = Source;
1057  }
1058
1059  /// \brief Allocate a number of loaded SLocEntries, which will be actually
1060  /// loaded on demand from the external source.
1061  ///
1062  /// NumSLocEntries will be allocated, which occupy a total of TotalSize space
1063  /// in the global source view. The lowest ID and the base offset of the
1064  /// entries will be returned.
1065  std::pair<int, unsigned>
1066  AllocateLoadedSLocEntries(unsigned NumSLocEntries, unsigned TotalSize);
1067
1068private:
1069  const llvm::MemoryBuffer *getFakeBufferForRecovery() const;
1070
1071  /// \brief Get the entry with the given unwrapped FileID.
1072  const SrcMgr::SLocEntry &getSLocEntryByID(int ID) const {
1073    assert(ID != -1 && "Using FileID sentinel value");
1074    if (ID < 0)
1075      return getLoadedSLocEntryByID(ID);
1076    return getLocalSLocEntry(static_cast<unsigned>(ID));
1077  }
1078
1079  const SrcMgr::SLocEntry &getLoadedSLocEntryByID(int ID) const {
1080    return getLoadedSLocEntry(static_cast<unsigned>(-ID - 2));
1081  }
1082
1083  /// createInstantiationLoc - Implements the common elements of storing an
1084  /// instantiation info struct into the SLocEntry table and producing a source
1085  /// location that refers to it.
1086  SourceLocation createInstantiationLocImpl(const SrcMgr::InstantiationInfo &II,
1087                                            unsigned TokLength,
1088                                            int LoadedID = 0,
1089                                            unsigned LoadedOffset = 0);
1090
1091  /// isOffsetInFileID - Return true if the specified FileID contains the
1092  /// specified SourceLocation offset.  This is a very hot method.
1093  inline bool isOffsetInFileID(FileID FID, unsigned SLocOffset) const {
1094    const SrcMgr::SLocEntry &Entry = getSLocEntry(FID);
1095    // If the entry is after the offset, it can't contain it.
1096    if (SLocOffset < Entry.getOffset()) return false;
1097
1098    // If this is the very last entry then it does.
1099    if (FID.ID == -2)
1100      return true;
1101
1102    // If it is the last local entry, then it does if the location is local.
1103    if (static_cast<unsigned>(FID.ID+1) == LocalSLocEntryTable.size()) {
1104      return SLocOffset < NextLocalOffset;
1105    }
1106
1107    // Otherwise, the entry after it has to not include it. This works for both
1108    // local and loaded entries.
1109    return SLocOffset < getSLocEntry(FileID::get(FID.ID+1)).getOffset();
1110  }
1111
1112  /// createFileID - Create a new fileID for the specified ContentCache and
1113  ///  include position.  This works regardless of whether the ContentCache
1114  ///  corresponds to a file or some other input source.
1115  FileID createFileID(const SrcMgr::ContentCache* File,
1116                      SourceLocation IncludePos,
1117                      SrcMgr::CharacteristicKind DirCharacter,
1118                      int LoadedID, unsigned LoadedOffset);
1119
1120  const SrcMgr::ContentCache *
1121    getOrCreateContentCache(const FileEntry *SourceFile);
1122
1123  /// createMemBufferContentCache - Create a new ContentCache for the specified
1124  ///  memory buffer.
1125  const SrcMgr::ContentCache*
1126  createMemBufferContentCache(const llvm::MemoryBuffer *Buf);
1127
1128  FileID getFileIDSlow(unsigned SLocOffset) const;
1129  FileID getFileIDLocal(unsigned SLocOffset) const;
1130  FileID getFileIDLoaded(unsigned SLocOffset) const;
1131
1132  SourceLocation getExpansionLocSlowCase(SourceLocation Loc) const;
1133  SourceLocation getSpellingLocSlowCase(SourceLocation Loc) const;
1134
1135  std::pair<FileID, unsigned>
1136  getDecomposedExpansionLocSlowCase(const SrcMgr::SLocEntry *E) const;
1137  std::pair<FileID, unsigned>
1138  getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
1139                                   unsigned Offset) const;
1140};
1141
1142
1143}  // end namespace clang
1144
1145#endif
1146