SourceManager.h revision 8429fca639bb4fbb6d589f85a2cc84513db4f748
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_SOURCEMANAGER_H
15#define LLVM_CLANG_SOURCEMANAGER_H
16
17#include "clang/Basic/SourceLocation.h"
18#include "llvm/Bitcode/SerializationFwd.h"
19#include <vector>
20#include <set>
21#include <list>
22#include <cassert>
23
24namespace llvm {
25class MemoryBuffer;
26}
27
28namespace clang {
29
30class SourceManager;
31class FileManager;
32class FileEntry;
33class IdentifierTokenInfo;
34
35/// SrcMgr - Private classes that are part of the SourceManager implementation.
36///
37namespace SrcMgr {
38  /// ContentCache - Once instance of this struct is kept for every file
39  ///  loaded or used.  This object owns the MemoryBuffer object.
40  struct ContentCache {
41    /// Reference to the file entry.  This reference does not own
42    /// the FileEntry object.  It is possible for this to be NULL if
43    /// the ContentCache encapsulates an imaginary text buffer.
44    const FileEntry* Entry;
45
46    /// Buffer - The actual buffer containing the characters from the input
47    /// file.  This is owned by the ContentCache object.
48    const llvm::MemoryBuffer* Buffer;
49
50    /// SourceLineCache - A new[]'d array of offsets for each source line.  This
51    /// is lazily computed.  This is owned by the ContentCache object.
52    unsigned* SourceLineCache;
53
54    /// NumLines - The number of lines in this ContentCache.  This is only valid
55    /// if SourceLineCache is non-null.
56    unsigned NumLines;
57
58    ContentCache(const FileEntry* e = NULL)
59    : Entry(e), Buffer(NULL), SourceLineCache(NULL), NumLines(0) {}
60
61    ~ContentCache();
62
63    /// The copy ctor does not allow copies where source object has either
64    ///  a non-NULL Buffer or SourceLineCache.  Ownership of allocated memory
65    ///  is not transfered, so this is a logical error.
66    ContentCache(const ContentCache& RHS) : Buffer(NULL),SourceLineCache(NULL) {
67      Entry = RHS.Entry;
68
69      assert (RHS.Buffer == NULL && RHS.SourceLineCache == NULL
70              && "Passed ContentCache object cannot own a buffer.");
71
72      NumLines = RHS.NumLines;
73    }
74
75    /// Emit - Emit this ContentCache to Bitcode.
76    void Emit(llvm::Serializer& S) const;
77
78    /// ReadToSourceManager - Reconstitute a ContentCache from Bitcode
79    //   and store it in the specified SourceManager.
80    static void ReadToSourceManager(llvm::Deserializer& D, SourceManager& SMgr,
81                                    FileManager* FMgr, std::vector<char>&  Buf);
82
83  private:
84    // Disable assignments.
85    ContentCache& operator=(const ContentCache& RHS);
86  };
87
88  /// FileIDInfo - Information about a FileID, basically just the logical file
89  /// that it represents and include stack information.  A File SourceLocation
90  /// is a byte offset from the start of this.
91  ///
92  /// FileID's are used to compute the location of a character in memory as well
93  /// as the logical source location, which can be differ from the physical
94  /// location.  It is different when #line's are active or when macros have
95  /// been expanded.
96  ///
97  /// Each FileID has include stack information, indicating where it came from.
98  /// For the primary translation unit, it comes from SourceLocation() aka 0.
99  /// This information encodes the #include chain that a token was instantiated
100  /// from.
101  ///
102  /// FileIDInfos contain a "ContentCache *", describing the source file,
103  /// and a Chunk number, which allows a SourceLocation to index into very
104  /// large files (those which there are not enough FilePosBits to address).
105  ///
106  struct FileIDInfo {
107  private:
108    /// IncludeLoc - The location of the #include that brought in this file.
109    /// This SourceLocation object has an invalid SLOC for the main file.
110    SourceLocation IncludeLoc;
111
112    /// ChunkNo - Really large buffers are broken up into chunks that are
113    /// each (1 << SourceLocation::FilePosBits) in size.  This specifies the
114    /// chunk number of this FileID.
115    unsigned ChunkNo;
116
117    /// Content - Information about the source buffer itself.
118    const ContentCache* Content;
119
120  public:
121    /// get - Return a FileIDInfo object.
122    static FileIDInfo get(SourceLocation IL, unsigned CN,
123                          const ContentCache *Con) {
124      FileIDInfo X;
125      X.IncludeLoc = IL;
126      X.ChunkNo = CN;
127      X.Content = Con;
128      return X;
129    }
130
131    SourceLocation getIncludeLoc() const { return IncludeLoc; }
132    unsigned getChunkNo() const { return ChunkNo; }
133    const ContentCache* getContentCache() const { return Content; }
134
135    /// Emit - Emit this FileIDInfo to Bitcode.
136    void Emit(llvm::Serializer& S) const;
137
138    /// ReadVal - Reconstitute a FileIDInfo from Bitcode.
139    static FileIDInfo ReadVal(llvm::Deserializer& S);
140  };
141
142  /// MacroIDInfo - Macro SourceLocations refer to these records by their ID.
143  /// Each MacroIDInfo encodes the Instantiation location - where the macro was
144  /// instantiated, and the PhysicalLoc - where the actual character data for
145  /// the token came from.  An actual macro SourceLocation stores deltas from
146  /// these positions.
147  class MacroIDInfo {
148    SourceLocation VirtualLoc, PhysicalLoc;
149  public:
150    SourceLocation getVirtualLoc() const { return VirtualLoc; }
151    SourceLocation getPhysicalLoc() const { return PhysicalLoc; }
152
153    /// get - Return a MacroID for a macro expansion.  VL specifies
154    /// the instantiation location (where the macro is expanded), and PL
155    /// specifies the physical location (where the characters from the token
156    /// come from).  Both VL and PL refer to normal File SLocs.
157    static MacroIDInfo get(SourceLocation VL, SourceLocation PL) {
158      MacroIDInfo X;
159      X.VirtualLoc = VL;
160      X.PhysicalLoc = PL;
161      return X;
162    }
163
164    /// Emit - Emit this MacroIDInfo to Bitcode.
165    void Emit(llvm::Serializer& S) const;
166
167    /// ReadVal - Reconstitute a MacroIDInfo from Bitcode.
168    static MacroIDInfo ReadVal(llvm::Deserializer& S);
169  };
170}  // end SrcMgr namespace.
171} // end clang namespace
172
173namespace std {
174template <> struct less<clang::SrcMgr::ContentCache> {
175  inline bool operator()(const clang::SrcMgr::ContentCache& L,
176                         const clang::SrcMgr::ContentCache& R) const {
177    return L.Entry < R.Entry;
178  }
179};
180} // end std namespace
181
182namespace clang {
183
184/// SourceManager - This file handles loading and caching of source files into
185/// memory.  This object owns the MemoryBuffer objects for all of the loaded
186/// files and assigns unique FileID's for each unique #include chain.
187///
188/// The SourceManager can be queried for information about SourceLocation
189/// objects, turning them into either physical or logical locations.  Physical
190/// locations represent where the bytes corresponding to a token came from and
191/// logical locations represent where the location is in the user's view.  In
192/// the case of a macro expansion, for example, the physical location indicates
193/// where the expanded token came from and the logical location specifies where
194/// it was expanded.  Logical locations are also influenced by #line directives,
195/// etc.
196class SourceManager {
197  /// FileInfos - Memoized information about all of the files tracked by this
198  /// SourceManager.  This set allows us to merge ContentCache entries based
199  /// on their FileEntry*.  All ContentCache objects will thus have unique,
200  /// non-null, FileEntry pointers.
201  std::set<SrcMgr::ContentCache> FileInfos;
202
203  /// MemBufferInfos - Information about various memory buffers that we have
204  /// read in.  This is a list, instead of a vector, because we need pointers to
205  /// the ContentCache objects to be stable.  All FileEntry* within the
206  /// stored ContentCache objects are NULL, as they do not refer to a file.
207  std::list<SrcMgr::ContentCache> MemBufferInfos;
208
209  /// FileIDs - Information about each FileID.  FileID #0 is not valid, so all
210  /// entries are off by one.
211  std::vector<SrcMgr::FileIDInfo> FileIDs;
212
213  /// MacroIDs - Information about each MacroID.
214  std::vector<SrcMgr::MacroIDInfo> MacroIDs;
215
216  /// LastLineNo - These ivars serve as a cache used in the getLineNumber
217  /// method which is used to speedup getLineNumber calls to nearby locations.
218  unsigned LastLineNoFileIDQuery;
219  SrcMgr::ContentCache *LastLineNoContentCache;
220  unsigned LastLineNoFilePos;
221  unsigned LastLineNoResult;
222
223  /// MainFileID - The file ID for the main source file of the translation unit.
224  unsigned MainFileID;
225
226  // SourceManager doesn't support copy construction.
227  explicit SourceManager(const SourceManager&);
228  void operator=(const SourceManager&);
229public:
230  SourceManager() : LastLineNoFileIDQuery(~0U), MainFileID(0) {}
231  ~SourceManager() {}
232
233  void clearIDTables() {
234    MainFileID = 0;
235    FileIDs.clear();
236    MacroIDs.clear();
237    LastLineNoFileIDQuery = ~0U;
238    LastLineNoContentCache = 0;
239  }
240
241  /// getMainFileID - Returns the FileID of the main source file.
242  unsigned getMainFileID() const { return MainFileID; }
243
244  /// createFileID - Create a new FileID that represents the specified file
245  /// being #included from the specified IncludePosition.  This returns 0 on
246  /// error and translates NULL into standard input.
247  unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){
248    const SrcMgr::ContentCache *IR = getContentCache(SourceFile);
249    if (IR == 0) return 0;    // Error opening file?
250    return createFileID(IR, IncludePos);
251  }
252
253  /// createMainFileID - Create the FileID for the main source file.
254  unsigned createMainFileID(const FileEntry *SourceFile,
255                            SourceLocation IncludePos) {
256
257    assert (MainFileID == 0 && "MainFileID already set!");
258    MainFileID = createFileID(SourceFile,IncludePos);
259    return MainFileID;
260  }
261
262  /// createFileIDForMemBuffer - Create a new FileID that represents the
263  /// specified memory buffer.  This does no caching of the buffer and takes
264  /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once.
265  unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
266    return createFileID(createMemBufferContentCache(Buffer), SourceLocation());
267  }
268
269  /// createMainFileIDForMembuffer - Create the FileID for a memory buffer
270  ///  that will represent the FileID for the main source.  One example
271  ///  of when this would be used is when the main source is read from STDIN.
272  unsigned createMainFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
273    assert (MainFileID == 0 && "MainFileID already set!");
274    MainFileID = createFileIDForMemBuffer(Buffer);
275    return MainFileID;
276  }
277
278  /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
279  /// that a token at Loc should actually be referenced from InstantiationLoc.
280  SourceLocation getInstantiationLoc(SourceLocation Loc,
281                                     SourceLocation InstantiationLoc);
282
283  /// getBuffer - Return the buffer for the specified FileID.
284  ///
285  const llvm::MemoryBuffer *getBuffer(unsigned FileID) const {
286    return getContentCache(FileID)->Buffer;
287  }
288
289  /// getBufferData - Return a pointer to the start and end of the character
290  /// data for the specified FileID.
291  std::pair<const char*, const char*> getBufferData(unsigned FileID) const;
292
293  /// getIncludeLoc - Return the location of the #include for the specified
294  /// SourceLocation.  If this is a macro expansion, this transparently figures
295  /// out which file includes the file being expanded into.
296  SourceLocation getIncludeLoc(SourceLocation ID) const {
297    return getFIDInfo(getLogicalLoc(ID).getFileID())->getIncludeLoc();
298  }
299
300  /// getCharacterData - Return a pointer to the start of the specified location
301  /// in the appropriate MemoryBuffer.
302  const char *getCharacterData(SourceLocation SL) const;
303
304  /// getColumnNumber - Return the column # for the specified file position.
305  /// This is significantly cheaper to compute than the line number.  This
306  /// returns zero if the column number isn't known.  This may only be called on
307  /// a file sloc, so you must choose a physical or logical location before
308  /// calling this method.
309  unsigned getColumnNumber(SourceLocation Loc) const;
310
311  unsigned getPhysicalColumnNumber(SourceLocation Loc) const {
312    return getColumnNumber(getPhysicalLoc(Loc));
313  }
314  unsigned getLogicalColumnNumber(SourceLocation Loc) const {
315    return getColumnNumber(getLogicalLoc(Loc));
316  }
317
318
319  /// getLineNumber - Given a SourceLocation, return the physical line number
320  /// for the position indicated.  This requires building and caching a table of
321  /// line offsets for the MemoryBuffer, so this is not cheap: use only when
322  /// about to emit a diagnostic.
323  unsigned getLineNumber(SourceLocation Loc);
324
325  unsigned getLogicalLineNumber(SourceLocation Loc) {
326    return getLineNumber(getLogicalLoc(Loc));
327  }
328  unsigned getPhysicalLineNumber(SourceLocation Loc) {
329    return getLineNumber(getPhysicalLoc(Loc));
330  }
331
332  /// getSourceName - This method returns the name of the file or buffer that
333  /// the SourceLocation specifies.  This can be modified with #line directives,
334  /// etc.
335  const char *getSourceName(SourceLocation Loc) const;
336
337  /// Given a SourceLocation object, return the logical location referenced by
338  /// the ID.  This logical location is subject to #line directives, etc.
339  SourceLocation getLogicalLoc(SourceLocation Loc) const {
340    // File locations are both physical and logical.
341    if (Loc.isFileID()) return Loc;
342
343    return MacroIDs[Loc.getMacroID()].getVirtualLoc();
344  }
345
346  /// getPhysicalLoc - Given a SourceLocation object, return the physical
347  /// location referenced by the ID.
348  SourceLocation getPhysicalLoc(SourceLocation Loc) const {
349    // File locations are both physical and logical.
350    if (Loc.isFileID()) return Loc;
351
352    SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getPhysicalLoc();
353    return PLoc.getFileLocWithOffset(Loc.getMacroPhysOffs());
354  }
355
356  /// getContentCacheForLoc - Return the ContentCache for the physloc of the
357  /// specified SourceLocation, if one exists.
358  const SrcMgr::ContentCache* getContentCacheForLoc(SourceLocation Loc) const {
359    Loc = getPhysicalLoc(Loc);
360    unsigned FileID = Loc.getFileID();
361    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
362    return FileIDs[FileID-1].getContentCache();
363  }
364
365  /// getFileEntryForLoc - Return the FileEntry record for the physloc of the
366  ///  specified SourceLocation, if one exists.
367  const FileEntry* getFileEntryForLoc(SourceLocation Loc) const {
368    return getContentCacheForLoc(Loc)->Entry;
369  }
370
371  /// getFileEntryForID - Returns the FileEntry record for the provided FileID.
372  const FileEntry* getFileEntryForID(unsigned id) const {
373    return getContentCache(id)->Entry;
374  }
375
376  /// getCanonicalFileID - Return the canonical FileID for a SourceLocation.
377  ///  A file can have multiple FileIDs if it is large enough to be broken
378  ///  into multiple chunks.  This method returns the unique FileID without
379  ///  chunk information for a given SourceLocation.  Use this method when
380  ///  you want to compare FileIDs across SourceLocations.
381  unsigned getCanonicalFileID(SourceLocation PhysLoc) const {
382    return getDecomposedFileLoc(PhysLoc).first;
383  }
384
385  /// getDecomposedFileLoc - Decompose the specified file location into a raw
386  /// FileID + Offset pair.  The first element is the FileID, the second is the
387  /// offset from the start of the buffer of the location.
388  std::pair<unsigned, unsigned> getDecomposedFileLoc(SourceLocation Loc) const {
389    assert(Loc.isFileID() && "Isn't a File SourceLocation");
390
391    // TODO: Add a flag "is first chunk" to SLOC.
392    const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
393
394    // If this file has been split up into chunks, factor in the chunk number
395    // that the FileID references.
396    unsigned ChunkNo = FIDInfo->getChunkNo();
397    unsigned Offset = Loc.getRawFilePos();
398    Offset += (ChunkNo << SourceLocation::FilePosBits);
399
400    return std::pair<unsigned,unsigned>(Loc.getFileID()-ChunkNo, Offset);
401  }
402
403  /// getFullFilePos - This (efficient) method returns the offset from the start
404  /// of the file that the specified physical SourceLocation represents.  This
405  /// returns the location of the physical character data, not the logical file
406  /// position.
407  unsigned getFullFilePos(SourceLocation PhysLoc) const {
408    return getDecomposedFileLoc(PhysLoc).second;
409  }
410
411  /// isFromSameFile - Returns true if both SourceLocations correspond to
412  ///  the same file.
413  bool isFromSameFile(SourceLocation Loc1, SourceLocation Loc2) const {
414    return getCanonicalFileID(Loc1) == getCanonicalFileID(Loc2);
415  }
416
417  /// isFromMainFile - Returns true if the file of provided SourceLocation is
418  ///   the main file.
419  bool isFromMainFile(SourceLocation Loc) const {
420    return getCanonicalFileID(Loc) == getMainFileID();
421  }
422
423  /// PrintStats - Print statistics to stderr.
424  ///
425  void PrintStats() const;
426
427  /// Emit - Emit this SourceManager to Bitcode.
428  void Emit(llvm::Serializer& S) const;
429
430  /// Read - Reconstitute a SourceManager from Bitcode.
431  static SourceManager* CreateAndRegister(llvm::Deserializer& S,
432                                          FileManager &FMgr);
433
434private:
435  friend struct SrcMgr::ContentCache; // Used for deserialization.
436
437  /// createFileID - Create a new fileID for the specified ContentCache and
438  ///  include position.  This works regardless of whether the ContentCache
439  ///  corresponds to a file or some other input source.
440  unsigned createFileID(const SrcMgr::ContentCache* File,
441                        SourceLocation IncludePos);
442
443  /// getContentCache - Create or return a cached ContentCache for the specified
444  ///  file.  This returns null on failure.
445  const SrcMgr::ContentCache* getContentCache(const FileEntry* SourceFile);
446
447  /// createMemBufferContentCache - Create a new ContentCache for the specified
448  ///  memory buffer.
449  const SrcMgr::ContentCache*
450  createMemBufferContentCache(const llvm::MemoryBuffer* Buf);
451
452  const SrcMgr::FileIDInfo* getFIDInfo(unsigned FileID) const {
453    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
454    return &FileIDs[FileID-1];
455  }
456
457  const SrcMgr::ContentCache *getContentCache(unsigned FileID) const {
458    return getContentCache(getFIDInfo(FileID));
459  }
460
461  /// Return the ContentCache structure for the specified FileID.
462  ///  This is always the physical reference for the ID.
463  const SrcMgr::ContentCache*
464  getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const {
465    return FIDInfo->getContentCache();
466  }
467};
468
469
470}  // end namespace clang
471
472#endif
473