SourceManager.h revision 27e2eb40b9adc876c05e0260d1100385c9f0e5f5
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_SOURCEMANAGER_H
15#define LLVM_CLANG_SOURCEMANAGER_H
16
17#include "clang/Basic/SourceLocation.h"
18#include "llvm/Bitcode/SerializationFwd.h"
19#include <vector>
20#include <set>
21#include <list>
22#include <cassert>
23
24namespace llvm {
25class MemoryBuffer;
26}
27
28namespace clang {
29
30class SourceManager;
31class FileManager;
32class FileEntry;
33class IdentifierTokenInfo;
34
35/// SrcMgr - Private classes that are part of the SourceManager implementation.
36///
37namespace SrcMgr {
38  /// ContentCache - Once instance of this struct is kept for every file
39  ///  loaded or used.  This object owns the MemoryBuffer object.
40  struct ContentCache {
41    /// Reference to the file entry.  This reference does not own
42    /// the FileEntry object.  It is possible for this to be NULL if
43    /// the ContentCache encapsulates an imaginary text buffer.
44    const FileEntry* Entry;
45
46    /// Buffer - The actual buffer containing the characters from the input
47    /// file.  This is owned by the ContentCache object.
48    const llvm::MemoryBuffer* Buffer;
49
50    /// SourceLineCache - A new[]'d array of offsets for each source line.  This
51    /// is lazily computed.  This is owned by the ContentCache object.
52    unsigned* SourceLineCache;
53
54    /// NumLines - The number of lines in this ContentCache.  This is only valid
55    /// if SourceLineCache is non-null.
56    unsigned NumLines;
57
58    ContentCache(const FileEntry* e = NULL)
59    : Entry(e), Buffer(NULL), SourceLineCache(NULL), NumLines(0) {}
60
61    ~ContentCache();
62
63    /// The copy ctor does not allow copies where source object has either
64    ///  a non-NULL Buffer or SourceLineCache.  Ownership of allocated memory
65    ///  is not transfered, so this is a logical error.
66    ContentCache(const ContentCache& RHS) : Buffer(NULL),SourceLineCache(NULL) {
67      Entry = RHS.Entry;
68
69      assert (RHS.Buffer == NULL && RHS.SourceLineCache == NULL
70              && "Passed ContentCache object cannot own a buffer.");
71
72      NumLines = RHS.NumLines;
73    }
74
75    /// Emit - Emit this ContentCache to Bitcode.
76    void Emit(llvm::Serializer& S) const;
77
78    /// ReadToSourceManager - Reconstitute a ContentCache from Bitcode
79    //   and store it in the specified SourceManager.
80    static void ReadToSourceManager(llvm::Deserializer& D, SourceManager& SMgr,
81                                    FileManager* FMgr, std::vector<char>&  Buf);
82
83  private:
84    // Disable assignments.
85    ContentCache& operator=(const ContentCache& RHS);
86  };
87
88  /// FileIDInfo - Information about a FileID, basically just the logical file
89  /// that it represents and include stack information.  A File SourceLocation
90  /// is a byte offset from the start of this.
91  ///
92  /// FileID's are used to compute the location of a character in memory as well
93  /// as the logical source location, which can be differ from the physical
94  /// location.  It is different when #line's are active or when macros have
95  /// been expanded.
96  ///
97  /// Each FileID has include stack information, indicating where it came from.
98  /// For the primary translation unit, it comes from SourceLocation() aka 0.
99  /// This information encodes the #include chain that a token was instantiated
100  /// from.
101  ///
102  /// FileIDInfos contain a "ContentCache *", describing the source file,
103  /// and a Chunk number, which allows a SourceLocation to index into very
104  /// large files (those which there are not enough FilePosBits to address).
105  ///
106  struct FileIDInfo {
107  private:
108    /// IncludeLoc - The location of the #include that brought in this file.
109    /// This SourceLocation object has an invalid SLOC for the main file.
110    SourceLocation IncludeLoc;
111
112    /// ChunkNo - Really large buffers are broken up into chunks that are
113    /// each (1 << SourceLocation::FilePosBits) in size.  This specifies the
114    /// chunk number of this FileID.
115    unsigned ChunkNo;
116
117    /// Content - Information about the source buffer itself.
118    const ContentCache* Content;
119
120  public:
121    /// get - Return a FileIDInfo object.
122    static FileIDInfo get(SourceLocation IL, unsigned CN,
123                          const ContentCache *Con) {
124      FileIDInfo X;
125      X.IncludeLoc = IL;
126      X.ChunkNo = CN;
127      X.Content = Con;
128      return X;
129    }
130
131    SourceLocation getIncludeLoc() const { return IncludeLoc; }
132    unsigned getChunkNo() const { return ChunkNo; }
133    const ContentCache* getContentCache() const { return Content; }
134
135    /// Emit - Emit this FileIDInfo to Bitcode.
136    void Emit(llvm::Serializer& S) const;
137
138    /// ReadVal - Reconstitute a FileIDInfo from Bitcode.
139    static FileIDInfo ReadVal(llvm::Deserializer& S);
140  };
141
142  /// MacroIDInfo - Macro SourceLocations refer to these records by their ID.
143  /// Each MacroIDInfo encodes the Instantiation location - where the macro was
144  /// instantiated, and the PhysicalLoc - where the actual character data for
145  /// the token came from.  An actual macro SourceLocation stores deltas from
146  /// these positions.
147  class MacroIDInfo {
148    SourceLocation VirtualLoc, PhysicalLoc;
149  public:
150    SourceLocation getVirtualLoc() const { return VirtualLoc; }
151    SourceLocation getPhysicalLoc() const { return PhysicalLoc; }
152
153    /// get - Return a MacroID for a macro expansion.  VL specifies
154    /// the instantiation location (where the macro is expanded), and PL
155    /// specifies the physical location (where the characters from the token
156    /// come from).  Both VL and PL refer to normal File SLocs.
157    static MacroIDInfo get(SourceLocation VL, SourceLocation PL) {
158      MacroIDInfo X;
159      X.VirtualLoc = VL;
160      X.PhysicalLoc = PL;
161      return X;
162    }
163
164    /// Emit - Emit this MacroIDInfo to Bitcode.
165    void Emit(llvm::Serializer& S) const;
166
167    /// ReadVal - Reconstitute a MacroIDInfo from Bitcode.
168    static MacroIDInfo ReadVal(llvm::Deserializer& S);
169  };
170}  // end SrcMgr namespace.
171} // end clang namespace
172
173namespace std {
174template <> struct less<clang::SrcMgr::ContentCache> {
175  inline bool operator()(const clang::SrcMgr::ContentCache& L,
176                         const clang::SrcMgr::ContentCache& R) const {
177    return L.Entry < R.Entry;
178  }
179};
180} // end std namespace
181
182namespace clang {
183
184/// SourceManager - This file handles loading and caching of source files into
185/// memory.  This object owns the MemoryBuffer objects for all of the loaded
186/// files and assigns unique FileID's for each unique #include chain.
187///
188/// The SourceManager can be queried for information about SourceLocation
189/// objects, turning them into either physical or logical locations.  Physical
190/// locations represent where the bytes corresponding to a token came from and
191/// logical locations represent where the location is in the user's view.  In
192/// the case of a macro expansion, for example, the physical location indicates
193/// where the expanded token came from and the logical location specifies where
194/// it was expanded.  Logical locations are also influenced by #line directives,
195/// etc.
196class SourceManager {
197  /// FileInfos - Memoized information about all of the files tracked by this
198  /// SourceManager.  This set allows us to merge ContentCache entries based
199  /// on their FileEntry*.  All ContentCache objects will thus have unique,
200  /// non-null, FileEntry pointers.
201  std::set<SrcMgr::ContentCache> FileInfos;
202
203  /// MemBufferInfos - Information about various memory buffers that we have
204  /// read in.  This is a list, instead of a vector, because we need pointers to
205  /// the ContentCache objects to be stable.  All FileEntry* within the
206  /// stored ContentCache objects are NULL, as they do not refer to a file.
207  std::list<SrcMgr::ContentCache> MemBufferInfos;
208
209  /// FileIDs - Information about each FileID.  FileID #0 is not valid, so all
210  /// entries are off by one.
211  std::vector<SrcMgr::FileIDInfo> FileIDs;
212
213  /// MacroIDs - Information about each MacroID.
214  std::vector<SrcMgr::MacroIDInfo> MacroIDs;
215
216  /// LastLineNo - These ivars serve as a cache used in the getLineNumber
217  /// method which is used to speedup getLineNumber calls to nearby locations.
218  unsigned LastLineNoFileIDQuery;
219  SrcMgr::ContentCache *LastLineNoContentCache;
220  unsigned LastLineNoFilePos;
221  unsigned LastLineNoResult;
222
223  /// MainFileID - The file ID for the main source file of the translation unit.
224  unsigned MainFileID;
225
226public:
227  SourceManager() : LastLineNoFileIDQuery(~0U), MainFileID(0) {}
228  ~SourceManager() {}
229
230  // SourceManager doesn't support copy construction.
231  explicit SourceManager(const SourceManager&);
232
233  void clearIDTables() {
234    FileIDs.clear();
235    MacroIDs.clear();
236    LastLineNoFileIDQuery = ~0U;
237    LastLineNoContentCache = 0;
238  }
239
240  /// getMainFileID - Returns the FileID of the main source file.
241  unsigned getMainFileID() const { return MainFileID; }
242
243  /// createFileID - Create a new FileID that represents the specified file
244  /// being #included from the specified IncludePosition.  This returns 0 on
245  /// error and translates NULL into standard input.
246  unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){
247    const SrcMgr::ContentCache *IR = getContentCache(SourceFile);
248    if (IR == 0) return 0;    // Error opening file?
249    return createFileID(IR, IncludePos);
250  }
251
252  /// createMainFileID - Create the FileID for the main source file.
253  unsigned createMainFileID(const FileEntry *SourceFile,
254                            SourceLocation IncludePos) {
255
256    assert (MainFileID == 0 && "MainFileID already set!");
257    MainFileID = createFileID(SourceFile,IncludePos);
258    return MainFileID;
259  }
260
261  /// createFileIDForMemBuffer - Create a new FileID that represents the
262  /// specified memory buffer.  This does no caching of the buffer and takes
263  /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once.
264  unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
265    return createFileID(createMemBufferContentCache(Buffer), SourceLocation());
266  }
267
268  /// createMainFileIDForMembuffer - Create the FileID for a memory buffer
269  ///  that will represent the FileID for the main source.  One example
270  ///  of when this would be used is when the main source is read from STDIN.
271  unsigned createMainFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
272    assert (MainFileID == 0 && "MainFileID already set!");
273    MainFileID = createFileIDForMemBuffer(Buffer);
274    return MainFileID;
275  }
276
277  /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
278  /// that a token at Loc should actually be referenced from InstantiationLoc.
279  SourceLocation getInstantiationLoc(SourceLocation Loc,
280                                     SourceLocation InstantiationLoc);
281
282  /// getBuffer - Return the buffer for the specified FileID.
283  ///
284  const llvm::MemoryBuffer *getBuffer(unsigned FileID) const {
285    return getContentCache(FileID)->Buffer;
286  }
287
288  /// getBufferData - Return a pointer to the start and end of the character
289  /// data for the specified FileID.
290  std::pair<const char*, const char*> getBufferData(unsigned FileID) const;
291
292  /// getIncludeLoc - Return the location of the #include for the specified
293  /// SourceLocation.  If this is a macro expansion, this transparently figures
294  /// out which file includes the file being expanded into.
295  SourceLocation getIncludeLoc(SourceLocation ID) const {
296    return getFIDInfo(getLogicalLoc(ID).getFileID())->getIncludeLoc();
297  }
298
299  /// getCharacterData - Return a pointer to the start of the specified location
300  /// in the appropriate MemoryBuffer.
301  const char *getCharacterData(SourceLocation SL) const;
302
303  /// getColumnNumber - Return the column # for the specified file position.
304  /// This is significantly cheaper to compute than the line number.  This
305  /// returns zero if the column number isn't known.  This may only be called on
306  /// a file sloc, so you must choose a physical or logical location before
307  /// calling this method.
308  unsigned getColumnNumber(SourceLocation Loc) const;
309
310  unsigned getPhysicalColumnNumber(SourceLocation Loc) const {
311    return getColumnNumber(getPhysicalLoc(Loc));
312  }
313  unsigned getLogicalColumnNumber(SourceLocation Loc) const {
314    return getColumnNumber(getLogicalLoc(Loc));
315  }
316
317
318  /// getLineNumber - Given a SourceLocation, return the physical line number
319  /// for the position indicated.  This requires building and caching a table of
320  /// line offsets for the MemoryBuffer, so this is not cheap: use only when
321  /// about to emit a diagnostic.
322  unsigned getLineNumber(SourceLocation Loc);
323
324  unsigned getLogicalLineNumber(SourceLocation Loc) {
325    return getLineNumber(getLogicalLoc(Loc));
326  }
327  unsigned getPhysicalLineNumber(SourceLocation Loc) {
328    return getLineNumber(getPhysicalLoc(Loc));
329  }
330
331  /// getSourceName - This method returns the name of the file or buffer that
332  /// the SourceLocation specifies.  This can be modified with #line directives,
333  /// etc.
334  const char *getSourceName(SourceLocation Loc) const;
335
336  /// Given a SourceLocation object, return the logical location referenced by
337  /// the ID.  This logical location is subject to #line directives, etc.
338  SourceLocation getLogicalLoc(SourceLocation Loc) const {
339    // File locations are both physical and logical.
340    if (Loc.isFileID()) return Loc;
341
342    return MacroIDs[Loc.getMacroID()].getVirtualLoc();
343  }
344
345  /// getPhysicalLoc - Given a SourceLocation object, return the physical
346  /// location referenced by the ID.
347  SourceLocation getPhysicalLoc(SourceLocation Loc) const {
348    // File locations are both physical and logical.
349    if (Loc.isFileID()) return Loc;
350
351    SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getPhysicalLoc();
352    return PLoc.getFileLocWithOffset(Loc.getMacroPhysOffs());
353  }
354
355  /// getContentCacheForLoc - Return the ContentCache for the physloc of the
356  /// specified SourceLocation, if one exists.
357  const SrcMgr::ContentCache* getContentCacheForLoc(SourceLocation Loc) const {
358    Loc = getPhysicalLoc(Loc);
359    unsigned FileID = Loc.getFileID();
360    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
361    return FileIDs[FileID-1].getContentCache();
362  }
363
364  /// getFileEntryForLoc - Return the FileEntry record for the physloc of the
365  ///  specified SourceLocation, if one exists.
366  const FileEntry* getFileEntryForLoc(SourceLocation Loc) const {
367    return getContentCacheForLoc(Loc)->Entry;
368  }
369
370  /// getFileEntryForID - Returns the FileEntry record for the provided FileID.
371  const FileEntry* getFileEntryForID(unsigned id) const {
372    return getContentCache(id)->Entry;
373  }
374
375
376  /// getDecomposedFileLoc - Decompose the specified file location into a raw
377  /// FileID + Offset pair.  The first element is the FileID, the second is the
378  /// offset from the start of the buffer of the location.
379  std::pair<unsigned, unsigned> getDecomposedFileLoc(SourceLocation Loc) const {
380    assert(Loc.isFileID() && "Isn't a File SourceLocation");
381
382    // TODO: Add a flag "is first chunk" to SLOC.
383    const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
384
385    // If this file has been split up into chunks, factor in the chunk number
386    // that the FileID references.
387    unsigned ChunkNo = FIDInfo->getChunkNo();
388    unsigned Offset = Loc.getRawFilePos();
389    Offset += (ChunkNo << SourceLocation::FilePosBits);
390
391    return std::pair<unsigned,unsigned>(Loc.getFileID()-ChunkNo, Offset);
392  }
393
394  /// PrintStats - Print statistics to stderr.
395  ///
396  void PrintStats() const;
397
398  /// Emit - Emit this SourceManager to Bitcode.
399  void Emit(llvm::Serializer& S) const;
400
401  /// Read - Reconstitute a SourceManager from Bitcode.
402  static SourceManager* CreateAndRegister(llvm::Deserializer& S,
403                                          FileManager &FMgr);
404
405private:
406  friend class SrcMgr::ContentCache; // Used for deserialization.
407
408  /// createFileID - Create a new fileID for the specified ContentCache and
409  ///  include position.  This works regardless of whether the ContentCache
410  ///  corresponds to a file or some other input source.
411  unsigned createFileID(const SrcMgr::ContentCache* File,
412                        SourceLocation IncludePos);
413
414  /// getContentCache - Create or return a cached ContentCache for the specified
415  ///  file.  This returns null on failure.
416  const SrcMgr::ContentCache* getContentCache(const FileEntry* SourceFile);
417
418  /// createMemBufferContentCache - Create a new ContentCache for the specified
419  ///  memory buffer.
420  const SrcMgr::ContentCache*
421  createMemBufferContentCache(const llvm::MemoryBuffer* Buf);
422
423  const SrcMgr::FileIDInfo* getFIDInfo(unsigned FileID) const {
424    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
425    return &FileIDs[FileID-1];
426  }
427
428  const SrcMgr::ContentCache *getContentCache(unsigned FileID) const {
429    return getContentCache(getFIDInfo(FileID));
430  }
431
432  /// Return the ContentCache structure for the specified FileID.
433  ///  This is always the physical reference for the ID.
434  const SrcMgr::ContentCache*
435  getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const {
436    return FIDInfo->getContentCache();
437  }
438
439  /// getFullFilePos - This (efficient) method returns the offset from the start
440  /// of the file that the specified physical SourceLocation represents.  This
441  /// returns the location of the physical character data, not the logical file
442  /// position.
443  unsigned getFullFilePos(SourceLocation PhysLoc) const {
444    return getDecomposedFileLoc(PhysLoc).second;
445  }
446};
447
448
449}  // end namespace clang
450
451#endif
452