SourceManager.h revision e21272fbdbfbf5bf3461d3e9b42279f4d47caa42
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_SOURCEMANAGER_H
15#define LLVM_CLANG_SOURCEMANAGER_H
16
17#include "clang/Basic/SourceLocation.h"
18#include "llvm/Bitcode/SerializationFwd.h"
19#include <vector>
20#include <set>
21#include <list>
22#include <cassert>
23
24namespace llvm {
25class MemoryBuffer;
26}
27
28namespace clang {
29
30class SourceManager;
31class FileEntry;
32class IdentifierTokenInfo;
33
34/// SrcMgr - Private classes that are part of the SourceManager implementation.
35///
36namespace SrcMgr {
37  /// ContentCache - Once instance of this struct is kept for every file
38  ///  loaded or used.  This object owns the MemoryBuffer object.
39  struct ContentCache {
40    /// Reference to the file entry.  This reference does not own
41    /// the FileEntry object.  It is possible for this to be NULL if
42    /// the ContentCache encapsulates an imaginary text buffer.
43    const FileEntry* Entry;
44
45    /// Buffer - The actual buffer containing the characters from the input
46    /// file.  This is owned by the ContentCache object.
47    const llvm::MemoryBuffer* Buffer;
48
49    /// SourceLineCache - A new[]'d array of offsets for each source line.  This
50    /// is lazily computed.  This is owned by the ContentCache object.
51    unsigned* SourceLineCache;
52
53    /// NumLines - The number of lines in this ContentCache.  This is only valid
54    /// if SourceLineCache is non-null.
55    unsigned NumLines;
56
57    ContentCache(const FileEntry* e = NULL)
58    : Entry(e), Buffer(NULL), SourceLineCache(NULL), NumLines(0) {}
59
60    ~ContentCache();
61
62    /// The copy ctor does not allow copies where source object has either
63    ///  a non-NULL Buffer or SourceLineCache.  Ownership of allocated memory
64    ///  is not transfered, so this is a logical error.
65    ContentCache(const ContentCache& RHS) : Buffer(NULL),SourceLineCache(NULL) {
66      Entry = RHS.Entry;
67
68      assert (RHS.Buffer == NULL && RHS.SourceLineCache == NULL
69              && "Passed ContentCache object cannot own a buffer.");
70
71      NumLines = RHS.NumLines;
72    }
73
74    /// Emit - Emit this ContentCache to Bitcode.
75    void Emit(llvm::Serializer& S, bool StoreBufferName,
76              bool StoreBufferContents) const;
77
78    /// Read - Reconstitute a ContentCache from Bitcode.
79    void Read(llvm::Deserializer& D, std::vector<char>* BufferNameBuf,
80              bool ReadBufferContents);
81
82  private:
83    // Disable assignments.
84    ContentCache& operator=(const ContentCache& RHS);
85  };
86
87  /// FileIDInfo - Information about a FileID, basically just the logical file
88  /// that it represents and include stack information.  A File SourceLocation
89  /// is a byte offset from the start of this.
90  ///
91  /// FileID's are used to compute the location of a character in memory as well
92  /// as the logical source location, which can be differ from the physical
93  /// location.  It is different when #line's are active or when macros have
94  /// been expanded.
95  ///
96  /// Each FileID has include stack information, indicating where it came from.
97  /// For the primary translation unit, it comes from SourceLocation() aka 0.
98  /// This information encodes the #include chain that a token was instantiated
99  /// from.
100  ///
101  /// FileIDInfos contain a "ContentCache *", describing the source file,
102  /// and a Chunk number, which allows a SourceLocation to index into very
103  /// large files (those which there are not enough FilePosBits to address).
104  ///
105  struct FileIDInfo {
106  private:
107    /// IncludeLoc - The location of the #include that brought in this file.
108    /// This SourceLocation object has an invalid SLOC for the main file.
109    SourceLocation IncludeLoc;
110
111    /// ChunkNo - Really large buffers are broken up into chunks that are
112    /// each (1 << SourceLocation::FilePosBits) in size.  This specifies the
113    /// chunk number of this FileID.
114    unsigned ChunkNo;
115
116    /// Content - Information about the source buffer itself.
117    const ContentCache* Content;
118
119  public:
120    /// get - Return a FileIDInfo object.
121    static FileIDInfo get(SourceLocation IL, unsigned CN,
122                          const ContentCache *Con) {
123      FileIDInfo X;
124      X.IncludeLoc = IL;
125      X.ChunkNo = CN;
126      X.Content = Con;
127      return X;
128    }
129
130    SourceLocation getIncludeLoc() const { return IncludeLoc; }
131    unsigned getChunkNo() const { return ChunkNo; }
132    const ContentCache* getContentCache() const { return Content; }
133  };
134
135  /// MacroIDInfo - Macro SourceLocations refer to these records by their ID.
136  /// Each MacroIDInfo encodes the Instantiation location - where the macro was
137  /// instantiated, and the PhysicalLoc - where the actual character data for
138  /// the token came from.  An actual macro SourceLocation stores deltas from
139  /// these positions.
140  class MacroIDInfo {
141    SourceLocation VirtualLoc, PhysicalLoc;
142  public:
143    SourceLocation getVirtualLoc() const { return VirtualLoc; }
144    SourceLocation getPhysicalLoc() const { return PhysicalLoc; }
145
146    /// get - Return a MacroID for a macro expansion.  VL specifies
147    /// the instantiation location (where the macro is expanded), and PL
148    /// specifies the physical location (where the characters from the token
149    /// come from).  Both VL and PL refer to normal File SLocs.
150    static MacroIDInfo get(SourceLocation VL, SourceLocation PL) {
151      MacroIDInfo X;
152      X.VirtualLoc = VL;
153      X.PhysicalLoc = PL;
154      return X;
155    }
156  };
157}  // end SrcMgr namespace.
158} // end clang namespace
159
160namespace std {
161template <> struct less<clang::SrcMgr::ContentCache> {
162  inline bool operator()(const clang::SrcMgr::ContentCache& L,
163                         const clang::SrcMgr::ContentCache& R) const {
164    return L.Entry < R.Entry;
165  }
166};
167} // end std namespace
168
169namespace clang {
170
171/// SourceManager - This file handles loading and caching of source files into
172/// memory.  This object owns the MemoryBuffer objects for all of the loaded
173/// files and assigns unique FileID's for each unique #include chain.
174///
175/// The SourceManager can be queried for information about SourceLocation
176/// objects, turning them into either physical or logical locations.  Physical
177/// locations represent where the bytes corresponding to a token came from and
178/// logical locations represent where the location is in the user's view.  In
179/// the case of a macro expansion, for example, the physical location indicates
180/// where the expanded token came from and the logical location specifies where
181/// it was expanded.  Logical locations are also influenced by #line directives,
182/// etc.
183class SourceManager {
184  /// FileInfos - Memoized information about all of the files tracked by this
185  /// SourceManager.  This set allows us to merge ContentCache entries based
186  /// on their FileEntry*.  All ContentCache objects will thus have unique,
187  /// non-null, FileEntry pointers.
188  std::set<SrcMgr::ContentCache> FileInfos;
189
190  /// MemBufferInfos - Information about various memory buffers that we have
191  /// read in.  This is a list, instead of a vector, because we need pointers to
192  /// the ContentCache objects to be stable.  All FileEntry* within the
193  /// stored ContentCache objects are NULL, as they do not refer to a file.
194  std::list<SrcMgr::ContentCache> MemBufferInfos;
195
196  /// FileIDs - Information about each FileID.  FileID #0 is not valid, so all
197  /// entries are off by one.
198  std::vector<SrcMgr::FileIDInfo> FileIDs;
199
200  /// MacroIDs - Information about each MacroID.
201  std::vector<SrcMgr::MacroIDInfo> MacroIDs;
202
203  /// LastLineNo - These ivars serve as a cache used in the getLineNumber
204  /// method which is used to speedup getLineNumber calls to nearby locations.
205  unsigned LastLineNoFileIDQuery;
206  SrcMgr::ContentCache *LastLineNoContentCache;
207  unsigned LastLineNoFilePos;
208  unsigned LastLineNoResult;
209
210public:
211  SourceManager() : LastLineNoFileIDQuery(~0U) {}
212  ~SourceManager() {}
213
214  void clearIDTables() {
215    FileIDs.clear();
216    MacroIDs.clear();
217    LastLineNoFileIDQuery = ~0U;
218    LastLineNoContentCache = 0;
219  }
220
221  /// createFileID - Create a new FileID that represents the specified file
222  /// being #included from the specified IncludePosition.  This returns 0 on
223  /// error and translates NULL into standard input.
224  unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){
225    const SrcMgr::ContentCache *IR = getContentCache(SourceFile);
226    if (IR == 0) return 0;    // Error opening file?
227    return createFileID(IR, IncludePos);
228  }
229
230  /// createFileIDForMemBuffer - Create a new FileID that represents the
231  /// specified memory buffer.  This does no caching of the buffer and takes
232  /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once.
233  unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
234    return createFileID(createMemBufferContentCache(Buffer), SourceLocation());
235  }
236
237  /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
238  /// that a token at Loc should actually be referenced from InstantiationLoc.
239  SourceLocation getInstantiationLoc(SourceLocation Loc,
240                                     SourceLocation InstantiationLoc);
241
242  /// getBuffer - Return the buffer for the specified FileID.
243  ///
244  const llvm::MemoryBuffer *getBuffer(unsigned FileID) const {
245    return getContentCache(FileID)->Buffer;
246  }
247
248  /// getBufferData - Return a pointer to the start and end of the character
249  /// data for the specified FileID.
250  std::pair<const char*, const char*> getBufferData(unsigned FileID) const;
251
252  /// getIncludeLoc - Return the location of the #include for the specified
253  /// SourceLocation.  If this is a macro expansion, this transparently figures
254  /// out which file includes the file being expanded into.
255  SourceLocation getIncludeLoc(SourceLocation ID) const {
256    return getFIDInfo(getLogicalLoc(ID).getFileID())->getIncludeLoc();
257  }
258
259  /// getCharacterData - Return a pointer to the start of the specified location
260  /// in the appropriate MemoryBuffer.
261  const char *getCharacterData(SourceLocation SL) const;
262
263  /// getColumnNumber - Return the column # for the specified file position.
264  /// This is significantly cheaper to compute than the line number.  This
265  /// returns zero if the column number isn't known.  This may only be called on
266  /// a file sloc, so you must choose a physical or logical location before
267  /// calling this method.
268  unsigned getColumnNumber(SourceLocation Loc) const;
269
270  unsigned getPhysicalColumnNumber(SourceLocation Loc) const {
271    return getColumnNumber(getPhysicalLoc(Loc));
272  }
273  unsigned getLogicalColumnNumber(SourceLocation Loc) const {
274    return getColumnNumber(getLogicalLoc(Loc));
275  }
276
277
278  /// getLineNumber - Given a SourceLocation, return the physical line number
279  /// for the position indicated.  This requires building and caching a table of
280  /// line offsets for the MemoryBuffer, so this is not cheap: use only when
281  /// about to emit a diagnostic.
282  unsigned getLineNumber(SourceLocation Loc);
283
284  unsigned getLogicalLineNumber(SourceLocation Loc) {
285    return getLineNumber(getLogicalLoc(Loc));
286  }
287  unsigned getPhysicalLineNumber(SourceLocation Loc) {
288    return getLineNumber(getPhysicalLoc(Loc));
289  }
290
291  /// getSourceName - This method returns the name of the file or buffer that
292  /// the SourceLocation specifies.  This can be modified with #line directives,
293  /// etc.
294  const char *getSourceName(SourceLocation Loc) const;
295
296  /// Given a SourceLocation object, return the logical location referenced by
297  /// the ID.  This logical location is subject to #line directives, etc.
298  SourceLocation getLogicalLoc(SourceLocation Loc) const {
299    // File locations are both physical and logical.
300    if (Loc.isFileID()) return Loc;
301
302    return MacroIDs[Loc.getMacroID()].getVirtualLoc();
303  }
304
305  /// getPhysicalLoc - Given a SourceLocation object, return the physical
306  /// location referenced by the ID.
307  SourceLocation getPhysicalLoc(SourceLocation Loc) const {
308    // File locations are both physical and logical.
309    if (Loc.isFileID()) return Loc;
310
311    SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getPhysicalLoc();
312    return PLoc.getFileLocWithOffset(Loc.getMacroPhysOffs());
313  }
314
315  /// getContentCacheForLoc - Return the ContentCache for the physloc of the
316  /// specified SourceLocation, if one exists.
317  const SrcMgr::ContentCache* getContentCacheForLoc(SourceLocation Loc) const {
318    Loc = getPhysicalLoc(Loc);
319    unsigned FileID = Loc.getFileID();
320    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
321    return FileIDs[FileID-1].getContentCache();
322  }
323
324  /// getFileEntryForLoc - Return the FileEntry record for the physloc of the
325  ///  specified SourceLocation, if one exists.
326  const FileEntry* getFileEntryForLoc(SourceLocation Loc) const {
327    return getContentCacheForLoc(Loc)->Entry;
328  }
329
330  /// getDecomposedFileLoc - Decompose the specified file location into a raw
331  /// FileID + Offset pair.  The first element is the FileID, the second is the
332  /// offset from the start of the buffer of the location.
333  std::pair<unsigned, unsigned> getDecomposedFileLoc(SourceLocation Loc) const {
334    assert(Loc.isFileID() && "Isn't a File SourceLocation");
335
336    // TODO: Add a flag "is first chunk" to SLOC.
337    const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
338
339    // If this file has been split up into chunks, factor in the chunk number
340    // that the FileID references.
341    unsigned ChunkNo = FIDInfo->getChunkNo();
342    unsigned Offset = Loc.getRawFilePos();
343    Offset += (ChunkNo << SourceLocation::FilePosBits);
344
345    return std::pair<unsigned,unsigned>(Loc.getFileID()-ChunkNo, Offset);
346  }
347
348  /// PrintStats - Print statistics to stderr.
349  ///
350  void PrintStats() const;
351
352private:
353  /// createFileID - Create a new fileID for the specified ContentCache and
354  ///  include position.  This works regardless of whether the ContentCache
355  ///  corresponds to a file or some other input source.
356  unsigned createFileID(const SrcMgr::ContentCache* File,
357                        SourceLocation IncludePos);
358
359  /// getContentCache - Create or return a cached ContentCache for the specified
360  ///  file.  This returns null on failure.
361  const SrcMgr::ContentCache* getContentCache(const FileEntry* SourceFile);
362
363  /// createMemBufferContentCache - Create a new ContentCache for the specified
364  ///  memory buffer.
365  const SrcMgr::ContentCache*
366  createMemBufferContentCache(const llvm::MemoryBuffer* Buf);
367
368  const SrcMgr::FileIDInfo* getFIDInfo(unsigned FileID) const {
369    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
370    return &FileIDs[FileID-1];
371  }
372
373  const SrcMgr::ContentCache *getContentCache(unsigned FileID) const {
374    return getContentCache(getFIDInfo(FileID));
375  }
376
377  /// Return the ContentCache structure for the specified FileID.
378  ///  This is always the physical reference for the ID.
379  const SrcMgr::ContentCache*
380  getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const {
381    return FIDInfo->getContentCache();
382  }
383
384  /// getFullFilePos - This (efficient) method returns the offset from the start
385  /// of the file that the specified physical SourceLocation represents.  This
386  /// returns the location of the physical character data, not the logical file
387  /// position.
388  unsigned getFullFilePos(SourceLocation PhysLoc) const {
389    return getDecomposedFileLoc(PhysLoc).second;
390  }
391};
392
393
394}  // end namespace clang
395
396#endif
397