SourceManager.h revision b7489d8129136437953d412e2a6cf0ef87f4a461
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_SOURCEMANAGER_H
15#define LLVM_CLANG_SOURCEMANAGER_H
16
17#include "clang/Basic/SourceLocation.h"
18#include "llvm/Bitcode/SerializationFwd.h"
19#include <vector>
20#include <set>
21#include <list>
22#include <cassert>
23
24namespace llvm {
25class MemoryBuffer;
26}
27
28namespace clang {
29
30class SourceManager;
31class FileEntry;
32class IdentifierTokenInfo;
33
34/// SrcMgr - Private classes that are part of the SourceManager implementation.
35///
36namespace SrcMgr {
37  /// ContentCache - Once instance of this struct is kept for every file
38  ///  loaded or used.  This object owns the MemoryBuffer object.
39  struct ContentCache {
40    /// Reference to the file entry.  This reference does not own
41    /// the FileEntry object.  It is possible for this to be NULL if
42    /// the ContentCache encapsulates an imaginary text buffer.
43    const FileEntry* Entry;
44
45    /// Buffer - The actual buffer containing the characters from the input
46    /// file.  This is owned by the FileInfo object.
47    const llvm::MemoryBuffer* Buffer;
48
49    /// SourceLineCache - A new[]'d array of offsets for each source line.  This
50    /// is lazily computed.  This is owned by the FileInfo object.
51    unsigned* SourceLineCache;
52
53    /// NumLines - The number of lines in this FileInfo.  This is only valid if
54    /// SourceLineCache is non-null.
55    unsigned NumLines;
56
57    ContentCache(const FileEntry* e = NULL)
58    : Entry(e), Buffer(NULL), SourceLineCache(NULL), NumLines(0) {}
59
60    ~ContentCache();
61
62    /// The copy ctor does not allow copies where source object has either
63    ///  a non-NULL Buffer or SourceLineCache.  Ownership of allocated memory
64    ///  is not transfered, so this is a logical error.
65    ContentCache(const ContentCache& RHS) : Buffer(NULL),SourceLineCache(NULL) {
66      Entry = RHS.Entry;
67
68      assert (RHS.Buffer == NULL && RHS.SourceLineCache == NULL
69              && "Passed ContentCache object cannot own a buffer.");
70
71      NumLines = RHS.NumLines;
72    }
73
74  private:
75    // Disable assignments.
76    ContentCache& operator=(const ContentCache& RHS);
77  };
78
79  /// FileIDInfo - Information about a FileID, basically just the logical file
80  /// that it represents and include stack information.  A File SourceLocation
81  /// is a byte offset from the start of this.
82  ///
83  /// FileID's are used to compute the location of a character in memory as well
84  /// as the logical source location, which can be differ from the physical
85  /// location.  It is different when #line's are active or when macros have
86  /// been expanded.
87  ///
88  /// Each FileID has include stack information, indicating where it came from.
89  /// For the primary translation unit, it comes from SourceLocation() aka 0.
90  /// This information encodes the #include chain that a token was instantiated
91  /// from.
92  ///
93  /// FileIDInfos contain a "ContentCache *", describing the source file,
94  /// and a Chunk number, which allows a SourceLocation to index into very
95  /// large files (those which there are not enough FilePosBits to address).
96  ///
97  struct FileIDInfo {
98  private:
99    /// IncludeLoc - The location of the #include that brought in this file.
100    /// This SourceLocation object has an invalid SLOC for the main file.
101    SourceLocation IncludeLoc;
102
103    /// ChunkNo - Really large buffers are broken up into chunks that are
104    /// each (1 << SourceLocation::FilePosBits) in size.  This specifies the
105    /// chunk number of this FileID.
106    unsigned ChunkNo;
107
108    /// Content - Information about the source buffer itself.
109    const ContentCache* Content;
110
111  public:
112    /// get - Return a FileIDInfo object.
113    static FileIDInfo get(SourceLocation IL, unsigned CN,
114                          const ContentCache *Con) {
115      FileIDInfo X;
116      X.IncludeLoc = IL;
117      X.ChunkNo = CN;
118      X.Content = Con;
119      return X;
120    }
121
122    SourceLocation getIncludeLoc() const { return IncludeLoc; }
123    unsigned getChunkNo() const { return ChunkNo; }
124    const ContentCache* getContentCache() const { return Content; }
125  };
126
127  /// MacroIDInfo - Macro SourceLocations refer to these records by their ID.
128  /// Each MacroIDInfo encodes the Instantiation location - where the macro was
129  /// instantiated, and the PhysicalLoc - where the actual character data for
130  /// the token came from.  An actual macro SourceLocation stores deltas from
131  /// these positions.
132  class MacroIDInfo {
133    SourceLocation InstantiationLoc, PhysicalLoc;
134  public:
135    SourceLocation getInstantiationLoc() const { return InstantiationLoc; }
136    SourceLocation getPhysicalLoc() const { return PhysicalLoc; }
137
138    /// get - Return a MacroID for a macro expansion.  IL specifies
139    /// the instantiation location, and PL specifies the physical location
140    /// (where the characters from the token come from).  Both IL and PL refer
141    /// to normal File SLocs.
142    static MacroIDInfo get(SourceLocation IL, SourceLocation PL) {
143      MacroIDInfo X;
144      X.InstantiationLoc = IL;
145      X.PhysicalLoc = PL;
146      return X;
147    }
148  };
149}  // end SrcMgr namespace.
150} // end clang namespace
151
152namespace std {
153template <> struct less<clang::SrcMgr::ContentCache> {
154  inline bool operator()(const clang::SrcMgr::ContentCache& L,
155                         const clang::SrcMgr::ContentCache& R) const {
156    return L.Entry < R.Entry;
157  }
158};
159} // end std namespace
160
161namespace clang {
162
163/// SourceManager - This file handles loading and caching of source files into
164/// memory.  This object owns the MemoryBuffer objects for all of the loaded
165/// files and assigns unique FileID's for each unique #include chain.
166///
167/// The SourceManager can be queried for information about SourceLocation
168/// objects, turning them into either physical or logical locations.  Physical
169/// locations represent where the bytes corresponding to a token came from and
170/// logical locations represent where the location is in the user's view.  In
171/// the case of a macro expansion, for example, the physical location indicates
172/// where the expanded token came from and the logical location specifies where
173/// it was expanded.  Logical locations are also influenced by #line directives,
174/// etc.
175class SourceManager {
176  /// FileInfos - Memoized information about all of the files tracked by this
177  /// SourceManager.  This set allows us to merge ContentCache entries based
178  /// on their FileEntry*.  All ContentCache objects will thus have unique,
179  /// non-null, FileEntry pointers.
180  std::set<SrcMgr::ContentCache> FileInfos;
181
182  /// MemBufferInfos - Information about various memory buffers that we have
183  /// read in.  This is a list, instead of a vector, because we need pointers to
184  /// the FileInfo objects to be stable.  All FileEntry* within the
185  /// stored ContentCache objects are NULL, as they do not refer to a file.
186  std::list<SrcMgr::ContentCache> MemBufferInfos;
187
188  /// FileIDs - Information about each FileID.  FileID #0 is not valid, so all
189  /// entries are off by one.
190  std::vector<SrcMgr::FileIDInfo> FileIDs;
191
192  /// MacroIDs - Information about each MacroID.
193  std::vector<SrcMgr::MacroIDInfo> MacroIDs;
194
195  /// LastLineNo - These ivars serve as a cache used in the getLineNumber
196  /// method which is used to speedup getLineNumber calls to nearby locations.
197  unsigned LastLineNoFileIDQuery;
198  SrcMgr::ContentCache *LastLineNoContentCache;
199  unsigned LastLineNoFilePos;
200  unsigned LastLineNoResult;
201
202public:
203  SourceManager() : LastLineNoFileIDQuery(~0U) {}
204  ~SourceManager() {}
205
206  void clearIDTables() {
207    FileIDs.clear();
208    MacroIDs.clear();
209    LastLineNoFileIDQuery = ~0U;
210    LastLineNoContentCache = 0;
211  }
212
213  /// createFileID - Create a new FileID that represents the specified file
214  /// being #included from the specified IncludePosition.  This returns 0 on
215  /// error and translates NULL into standard input.
216  unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){
217    const SrcMgr::ContentCache *IR = getContentCache(SourceFile);
218    if (IR == 0) return 0;    // Error opening file?
219    return createFileID(IR, IncludePos);
220  }
221
222  /// createFileIDForMemBuffer - Create a new FileID that represents the
223  /// specified memory buffer.  This does no caching of the buffer and takes
224  /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once.
225  unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
226    return createFileID(createMemBufferContentCache(Buffer), SourceLocation());
227  }
228
229  /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
230  /// that a token at Loc should actually be referenced from InstantiationLoc.
231  SourceLocation getInstantiationLoc(SourceLocation Loc,
232                                     SourceLocation InstantiationLoc);
233
234  /// getBuffer - Return the buffer for the specified FileID.
235  ///
236  const llvm::MemoryBuffer *getBuffer(unsigned FileID) const {
237    return getContentCache(FileID)->Buffer;
238  }
239
240  /// getBufferData - Return a pointer to the start and end of the character
241  /// data for the specified FileID.
242  std::pair<const char*, const char*> getBufferData(unsigned FileID) const;
243
244  /// getIncludeLoc - Return the location of the #include for the specified
245  /// SourceLocation.  If this is a macro expansion, this transparently figures
246  /// out which file includes the file being expanded into.
247  SourceLocation getIncludeLoc(SourceLocation ID) const {
248    return getFIDInfo(getLogicalLoc(ID).getFileID())->getIncludeLoc();
249  }
250
251  /// getCharacterData - Return a pointer to the start of the specified location
252  /// in the appropriate MemoryBuffer.
253  const char *getCharacterData(SourceLocation SL) const;
254
255  /// getColumnNumber - Return the column # for the specified file position.
256  /// This is significantly cheaper to compute than the line number.  This
257  /// returns zero if the column number isn't known.  This may only be called on
258  /// a file sloc, so you must choose a physical or logical location before
259  /// calling this method.
260  unsigned getColumnNumber(SourceLocation Loc) const;
261
262  unsigned getPhysicalColumnNumber(SourceLocation Loc) const {
263    return getColumnNumber(getPhysicalLoc(Loc));
264  }
265  unsigned getLogicalColumnNumber(SourceLocation Loc) const {
266    return getColumnNumber(getLogicalLoc(Loc));
267  }
268
269
270  /// getLineNumber - Given a SourceLocation, return the physical line number
271  /// for the position indicated.  This requires building and caching a table of
272  /// line offsets for the MemoryBuffer, so this is not cheap: use only when
273  /// about to emit a diagnostic.
274  unsigned getLineNumber(SourceLocation Loc);
275
276  unsigned getLogicalLineNumber(SourceLocation Loc) {
277    return getLineNumber(getLogicalLoc(Loc));
278  }
279  unsigned getPhysicalLineNumber(SourceLocation Loc) {
280    return getLineNumber(getPhysicalLoc(Loc));
281  }
282
283  /// getSourceName - This method returns the name of the file or buffer that
284  /// the SourceLocation specifies.  This can be modified with #line directives,
285  /// etc.
286  const char *getSourceName(SourceLocation Loc) const;
287
288  /// Given a SourceLocation object, return the logical location referenced by
289  /// the ID.  This logical location is subject to #line directives, etc.
290  SourceLocation getLogicalLoc(SourceLocation Loc) const {
291    // File locations are both physical and logical.
292    if (Loc.isFileID()) return Loc;
293
294    return MacroIDs[Loc.getMacroID()].getInstantiationLoc();
295  }
296
297  /// getPhysicalLoc - Given a SourceLocation object, return the physical
298  /// location referenced by the ID.
299  SourceLocation getPhysicalLoc(SourceLocation Loc) const {
300    // File locations are both physical and logical.
301    if (Loc.isFileID()) return Loc;
302
303    SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getPhysicalLoc();
304    return PLoc.getFileLocWithOffset(Loc.getMacroPhysOffs());
305  }
306
307  /// getContentCacheForLoc - Return the ContentCache for the physloc of the
308  /// specified SourceLocation, if one exists.
309  const SrcMgr::ContentCache* getContentCacheForLoc(SourceLocation Loc) const {
310    Loc = getPhysicalLoc(Loc);
311    unsigned FileID = Loc.getFileID();
312    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
313    return FileIDs[FileID-1].getContentCache();
314  }
315
316  /// getFileEntryForLoc - Return the FileEntry record for the physloc of the
317  ///  specified SourceLocation, if one exists.
318  const FileEntry* getFileEntryForLoc(SourceLocation Loc) const {
319    return getContentCacheForLoc(Loc)->Entry;
320  }
321
322  /// getDecomposedFileLoc - Decompose the specified file location into a raw
323  /// FileID + Offset pair.  The first element is the FileID, the second is the
324  /// offset from the start of the buffer of the location.
325  std::pair<unsigned, unsigned> getDecomposedFileLoc(SourceLocation Loc) const {
326    assert(Loc.isFileID() && "Isn't a File SourceLocation");
327
328    // TODO: Add a flag "is first chunk" to SLOC.
329    const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
330
331    // If this file has been split up into chunks, factor in the chunk number
332    // that the FileID references.
333    unsigned ChunkNo = FIDInfo->getChunkNo();
334    unsigned Offset = Loc.getRawFilePos();
335    Offset += (ChunkNo << SourceLocation::FilePosBits);
336
337    return std::pair<unsigned,unsigned>(Loc.getFileID()-ChunkNo, Offset);
338  }
339
340  /// PrintStats - Print statistics to stderr.
341  ///
342  void PrintStats() const;
343
344private:
345  /// createFileID - Create a new fileID for the specified ContentCache and
346  ///  include position.  This works regardless of whether the ContentCache
347  ///  corresponds to a file or some other input source.
348  unsigned createFileID(const SrcMgr::ContentCache* File,
349                        SourceLocation IncludePos);
350
351  /// getContentCache - Create or return a cached ContentCache for the specified
352  ///  file.  This returns null on failure.
353  const SrcMgr::ContentCache* getContentCache(const FileEntry* SourceFile);
354
355  /// createMemBufferContentCache - Create a new ContentCache for the specified
356  ///  memory buffer.
357  const SrcMgr::ContentCache*
358  createMemBufferContentCache(const llvm::MemoryBuffer* Buf);
359
360  const SrcMgr::FileIDInfo* getFIDInfo(unsigned FileID) const {
361    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
362    return &FileIDs[FileID-1];
363  }
364
365  const SrcMgr::ContentCache *getContentCache(unsigned FileID) const {
366    return getContentCache(getFIDInfo(FileID));
367  }
368
369  /// Return the ContentCache structure for the specified FileID.
370  ///  This is always the physical reference for the ID.
371  const SrcMgr::ContentCache*
372  getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const {
373    return FIDInfo->getContentCache();
374  }
375
376  /// getFullFilePos - This (efficient) method returns the offset from the start
377  /// of the file that the specified physical SourceLocation represents.  This
378  /// returns the location of the physical character data, not the logical file
379  /// position.
380  unsigned getFullFilePos(SourceLocation PhysLoc) const {
381    return getDecomposedFileLoc(PhysLoc).second;
382  }
383};
384
385
386}  // end namespace clang
387
388#endif
389