SourceManager.h revision 5f016e2cb5d11daeb237544de1c5d59f20fe1a6e
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_SOURCEMANAGER_H
15#define LLVM_CLANG_SOURCEMANAGER_H
16
17#include "clang/Basic/SourceLocation.h"
18#include <vector>
19#include <map>
20#include <list>
21
22namespace llvm {
23class MemoryBuffer;
24}
25
26namespace clang {
27
28class SourceManager;
29class FileEntry;
30class IdentifierTokenInfo;
31
32/// SrcMgr - Private classes that are part of the SourceManager implementation.
33///
34namespace SrcMgr {
35  /// FileInfo - Once instance of this struct is kept for every file loaded or
36  /// used.  This object owns the MemoryBuffer object.
37  struct FileInfo {
38    /// Buffer - The actual buffer containing the characters from the input
39    /// file.
40    const llvm::MemoryBuffer *Buffer;
41
42    /// SourceLineCache - A new[]'d array of offsets for each source line.  This
43    /// is lazily computed.
44    ///
45    unsigned *SourceLineCache;
46
47    /// NumLines - The number of lines in this FileInfo.  This is only valid if
48    /// SourceLineCache is non-null.
49    unsigned NumLines;
50  };
51
52  typedef std::pair<const FileEntry * const, FileInfo> InfoRec;
53
54  /// FileIDInfo - Information about a FileID, basically just the logical file
55  /// that it represents and include stack information.  A SourceLocation is a
56  /// byte offset from the start of this.
57  ///
58  /// FileID's are used to compute the location of a character in memory as well
59  /// as the logical source location, which can be differ from the physical
60  /// location.  It is different when #line's are active or when macros have
61  /// been expanded.
62  ///
63  /// Each FileID has include stack information, indicating where it came from.
64  /// For the primary translation unit, it comes from SourceLocation() aka 0.
65  ///
66  /// There are three types of FileID's:
67  ///   1. Normal MemoryBuffer (file).  These are represented by a "InfoRec *",
68  ///      describing the source file, and a Chunk number, which factors into
69  ///      the SourceLocation's offset from the start of the buffer.
70  ///   2. Macro Expansions.  These indicate that the logical location is
71  ///      totally different than the physical location.  The logical source
72  ///      location is specified by the IncludeLoc.  The physical location is
73  ///      the FilePos of the token's SourceLocation combined with the FileID
74  ///      from MacroTokenFileID.
75  ///
76  struct FileIDInfo {
77    enum FileIDType {
78      NormalBuffer,
79      MacroExpansion
80    };
81
82    /// The type of this FileID.
83    FileIDType IDType;
84
85    /// IncludeLoc - The location of the #include that brought in this file.
86    /// This SourceLocation object has a FileId of 0 for the main file.
87    SourceLocation IncludeLoc;
88
89    /// This union is discriminated by IDType.
90    ///
91    union {
92      struct NormalBufferInfo {
93        /// ChunkNo - Really large buffers are broken up into chunks that are
94        /// each (1 << SourceLocation::FilePosBits) in size.  This specifies the
95        /// chunk number of this FileID.
96        unsigned ChunkNo;
97
98        /// FileInfo - Information about the source buffer itself.
99        ///
100        const InfoRec *Info;
101      } NormalBuffer;
102
103      /// MacroTokenFileID - This is the File ID that contains the characters
104      /// that make up the expanded token.
105      unsigned MacroTokenFileID;
106    } u;
107
108    /// getNormalBuffer - Return a FileIDInfo object for a normal buffer
109    /// reference.
110    static FileIDInfo getNormalBuffer(SourceLocation IL, unsigned CN,
111                                      const InfoRec *Inf) {
112      FileIDInfo X;
113      X.IDType = NormalBuffer;
114      X.IncludeLoc = IL;
115      X.u.NormalBuffer.ChunkNo = CN;
116      X.u.NormalBuffer.Info = Inf;
117      return X;
118    }
119
120    /// getMacroExpansion - Return a FileID for a macro expansion.  IL specifies
121    /// the instantiation location, and MacroFID specifies the FileID that the
122    /// token's characters come from.
123    static FileIDInfo getMacroExpansion(SourceLocation IL,
124                                        unsigned MacroFID) {
125      FileIDInfo X;
126      X.IDType = MacroExpansion;
127      X.IncludeLoc = IL;
128      X.u.MacroTokenFileID = MacroFID;
129      return X;
130    }
131
132    unsigned getNormalBufferChunkNo() const {
133      assert(IDType == NormalBuffer && "Not a normal buffer!");
134      return u.NormalBuffer.ChunkNo;
135    }
136
137    const InfoRec *getNormalBufferInfo() const {
138      assert(IDType == NormalBuffer && "Not a normal buffer!");
139      return u.NormalBuffer.Info;
140    }
141  };
142}  // end SrcMgr namespace.
143
144
145/// SourceManager - This file handles loading and caching of source files into
146/// memory.  This object owns the MemoryBuffer objects for all of the loaded
147/// files and assigns unique FileID's for each unique #include chain.
148///
149/// The SourceManager can be queried for information about SourceLocation
150/// objects, turning them into either physical or logical locations.  Physical
151/// locations represent where the bytes corresponding to a token came from and
152/// logical locations represent where the location is in the user's view.  In
153/// the case of a macro expansion, for example, the physical location indicates
154/// where the expanded token came from and the logical location specifies where
155/// it was expanded.  Logical locations are also influenced by #line directives,
156/// etc.
157class SourceManager {
158  /// FileInfos - Memoized information about all of the files tracked by this
159  /// SourceManager.
160  std::map<const FileEntry *, SrcMgr::FileInfo> FileInfos;
161
162  /// MemBufferInfos - Information about various memory buffers that we have
163  /// read in.  This is a list, instead of a vector, because we need pointers to
164  /// the FileInfo objects to be stable.
165  std::list<SrcMgr::InfoRec> MemBufferInfos;
166
167  /// FileIDs - Information about each FileID.  FileID #0 is not valid, so all
168  /// entries are off by one.
169  std::vector<SrcMgr::FileIDInfo> FileIDs;
170
171  /// LastInstantiationLoc_* - Cache the last instantiation request for fast
172  /// lookup.  Macros often want many tokens instantated at the same location.
173  SourceLocation LastInstantiationLoc_InstantLoc;
174  unsigned       LastInstantiationLoc_MacroFID;
175  unsigned       LastInstantiationLoc_Result;
176public:
177  SourceManager() { LastInstantiationLoc_MacroFID = ~0U; }
178  ~SourceManager();
179
180  /// createFileID - Create a new FileID that represents the specified file
181  /// being #included from the specified IncludePosition.  This returns 0 on
182  /// error and translates NULL into standard input.
183  unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){
184    const SrcMgr::InfoRec *IR = getInfoRec(SourceFile);
185    if (IR == 0) return 0;    // Error opening file?
186    return createFileID(IR, IncludePos);
187  }
188
189  /// createFileIDForMemBuffer - Create a new FileID that represents the
190  /// specified memory buffer.  This does no caching of the buffer and takes
191  /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once.
192  unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
193    return createFileID(createMemBufferInfoRec(Buffer), SourceLocation());
194  }
195
196  /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
197  /// that a token from physloc PhysLoc should actually be referenced from
198  /// InstantiationLoc.
199  SourceLocation getInstantiationLoc(SourceLocation PhysLoc,
200                                     SourceLocation InstantiationLoc);
201
202  /// getBuffer - Return the buffer for the specified FileID.
203  ///
204  const llvm::MemoryBuffer *getBuffer(unsigned FileID) const {
205    return getFileInfo(FileID)->Buffer;
206  }
207
208  /// getIncludeLoc - Return the location of the #include for the specified
209  /// FileID.
210  SourceLocation getIncludeLoc(unsigned FileID) const;
211
212  /// getFilePos - This (efficient) method returns the offset from the start of
213  /// the file that the specified SourceLocation represents.  This returns the
214  /// location of the physical character data, not the logical file position.
215  unsigned getFilePos(SourceLocation Loc) const {
216    const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
217
218    // For Macros, the physical loc is specified by the MacroTokenFileID.
219    if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion)
220      FIDInfo = &FileIDs[FIDInfo->u.MacroTokenFileID-1];
221
222    // If this file has been split up into chunks, factor in the chunk number
223    // that the FileID references.
224    unsigned ChunkNo = FIDInfo->getNormalBufferChunkNo();
225    return Loc.getRawFilePos() + (ChunkNo << SourceLocation::FilePosBits);
226  }
227
228  /// getCharacterData - Return a pointer to the start of the specified location
229  /// in the appropriate MemoryBuffer.
230  const char *getCharacterData(SourceLocation SL) const;
231
232  /// getColumnNumber - Return the column # for the specified include position.
233  /// this is significantly cheaper to compute than the line number.  This
234  /// returns zero if the column number isn't known.
235  unsigned getColumnNumber(SourceLocation Loc) const;
236
237  /// getLineNumber - Given a SourceLocation, return the physical line number
238  /// for the position indicated.  This requires building and caching a table of
239  /// line offsets for the MemoryBuffer, so this is not cheap: use only when
240  /// about to emit a diagnostic.
241  unsigned getLineNumber(SourceLocation Loc);
242
243  /// getSourceFilePos - This method returns the *logical* offset from the start
244  /// of the file that the specified SourceLocation represents.  This returns
245  /// the location of the *logical* character data, not the physical file
246  /// position.  In the case of macros, for example, this returns where the
247  /// macro was instantiated, not where the characters for the macro can be
248  /// found.
249  unsigned getSourceFilePos(SourceLocation Loc) const;
250
251  /// getSourceName - This method returns the name of the file or buffer that
252  /// the SourceLocation specifies.  This can be modified with #line directives,
253  /// etc.
254  std::string getSourceName(SourceLocation Loc);
255
256  /// getFileEntryForFileID - Return the FileEntry record for the specified
257  /// FileID if one exists.
258  const FileEntry *getFileEntryForFileID(unsigned FileID) const {
259    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
260    return FileIDs[FileID-1].getNormalBufferInfo()->first;
261  }
262
263  /// Given a SourceLocation object, return the logical location referenced by
264  /// the ID.  This logical location is subject to #line directives, etc.
265  SourceLocation getLogicalLoc(SourceLocation Loc) const {
266    if (Loc.getFileID() == 0) return Loc;
267
268    const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
269    if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion)
270      return FIDInfo->IncludeLoc;
271    return Loc;
272  }
273
274  /// getPhysicalLoc - Given a SourceLocation object, return the physical
275  /// location referenced by the ID.
276  SourceLocation getPhysicalLoc(SourceLocation Loc) const {
277    if (Loc.getFileID() == 0) return Loc;
278
279    // For Macros, the physical loc is specified by the MacroTokenFileID.
280    const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
281    if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion)
282      return SourceLocation(FIDInfo->u.MacroTokenFileID,
283                            Loc.getRawFilePos());
284    return Loc;
285  }
286
287  /// PrintStats - Print statistics to stderr.
288  ///
289  void PrintStats() const;
290private:
291  /// createFileID - Create a new fileID for the specified InfoRec and include
292  /// position.  This works regardless of whether the InfoRec corresponds to a
293  /// file or some other input source.
294  unsigned createFileID(const SrcMgr::InfoRec *File, SourceLocation IncludePos);
295
296  /// getInfoRec - Create or return a cached FileInfo for the specified file.
297  /// This returns null on failure.
298  const SrcMgr::InfoRec *getInfoRec(const FileEntry *SourceFile);
299
300  /// createMemBufferInfoRec - Create a new info record for the specified memory
301  /// buffer.  This does no caching.
302  const SrcMgr::InfoRec *createMemBufferInfoRec(const llvm::MemoryBuffer *Buf);
303
304  const SrcMgr::FileIDInfo *getFIDInfo(unsigned FileID) const {
305    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
306    return &FileIDs[FileID-1];
307  }
308
309  /// Return the InfoRec structure for the specified FileID.  This is always the
310  /// physical reference for the ID.
311  const SrcMgr::InfoRec *getInfoRec(const SrcMgr::FileIDInfo *FIDInfo) const {
312    // For Macros, the physical loc is specified by the MacroTokenFileID.
313    if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion)
314      FIDInfo = &FileIDs[FIDInfo->u.MacroTokenFileID-1];
315    return FIDInfo->getNormalBufferInfo();
316  }
317  const SrcMgr::InfoRec *getInfoRec(unsigned FileID) const {
318    return getInfoRec(getFIDInfo(FileID));
319  }
320
321  SrcMgr::FileInfo *getFileInfo(const SrcMgr::FileIDInfo *FIDInfo) const {
322    if (const SrcMgr::InfoRec *IR = getInfoRec(FIDInfo))
323      return const_cast<SrcMgr::FileInfo *>(&IR->second);
324    return 0;
325  }
326  SrcMgr::FileInfo *getFileInfo(unsigned FileID) const {
327    if (const SrcMgr::InfoRec *IR = getInfoRec(FileID))
328      return const_cast<SrcMgr::FileInfo *>(&IR->second);
329    return 0;
330  }
331  SrcMgr::FileInfo *getFileInfo(const FileEntry *SourceFile) {
332    if (const SrcMgr::InfoRec *IR = getInfoRec(SourceFile))
333      return const_cast<SrcMgr::FileInfo *>(&IR->second);
334    return 0;
335  }
336};
337
338
339}  // end namespace clang
340
341#endif
342