SourceManager.h revision 9dc62f044a6ba21f503bd56607d94b32704e7945
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_SOURCEMANAGER_H
15#define LLVM_CLANG_SOURCEMANAGER_H
16
17#include "clang/Basic/SourceLocation.h"
18#include <vector>
19#include <map>
20#include <list>
21#include <cassert>
22
23namespace llvm {
24class MemoryBuffer;
25}
26
27namespace clang {
28
29class SourceManager;
30class FileEntry;
31class IdentifierTokenInfo;
32
33/// SrcMgr - Private classes that are part of the SourceManager implementation.
34///
35namespace SrcMgr {
36  /// FileInfo - Once instance of this struct is kept for every file loaded or
37  /// used.  This object owns the MemoryBuffer object.
38  struct FileInfo {
39    /// Buffer - The actual buffer containing the characters from the input
40    /// file.
41    const llvm::MemoryBuffer *Buffer;
42
43    /// SourceLineCache - A new[]'d array of offsets for each source line.  This
44    /// is lazily computed.
45    ///
46    unsigned *SourceLineCache;
47
48    /// NumLines - The number of lines in this FileInfo.  This is only valid if
49    /// SourceLineCache is non-null.
50    unsigned NumLines;
51  };
52
53  typedef std::pair<const FileEntry * const, FileInfo> InfoRec;
54
55  /// FileIDInfo - Information about a FileID, basically just the logical file
56  /// that it represents and include stack information.  A SourceLocation is a
57  /// byte offset from the start of this.
58  ///
59  /// FileID's are used to compute the location of a character in memory as well
60  /// as the logical source location, which can be differ from the physical
61  /// location.  It is different when #line's are active or when macros have
62  /// been expanded.
63  ///
64  /// Each FileID has include stack information, indicating where it came from.
65  /// For the primary translation unit, it comes from SourceLocation() aka 0.
66  ///
67  /// There are three types of FileID's:
68  ///   1. Normal MemoryBuffer (file).  These are represented by a "InfoRec *",
69  ///      describing the source file, and a Chunk number, which factors into
70  ///      the SourceLocation's offset from the start of the buffer.
71  ///   2. Macro Expansions.  These indicate that the logical location is
72  ///      totally different than the physical location.  The logical source
73  ///      location is specified by the IncludeLoc.  The physical location is
74  ///      the FilePos of the token's SourceLocation combined with the FileID
75  ///      from MacroTokenFileID.
76  ///
77  struct FileIDInfo {
78    enum FileIDType {
79      NormalBuffer,
80      MacroExpansion
81    };
82
83    /// The type of this FileID.
84    FileIDType IDType;
85
86    /// IncludeLoc - The location of the #include that brought in this file.
87    /// This SourceLocation object has a FileId of 0 for the main file.
88    SourceLocation IncludeLoc;
89
90    /// This union is discriminated by IDType.
91    ///
92    union {
93      struct NormalBufferInfo {
94        /// ChunkNo - Really large buffers are broken up into chunks that are
95        /// each (1 << SourceLocation::FilePosBits) in size.  This specifies the
96        /// chunk number of this FileID.
97        unsigned ChunkNo;
98
99        /// FileInfo - Information about the source buffer itself.
100        ///
101        const InfoRec *Info;
102      } NormalBuffer;
103
104      /// MacroTokenFileID - This is the File ID that contains the characters
105      /// that make up the expanded token.
106      unsigned MacroTokenFileID;
107    } u;
108
109    /// getNormalBuffer - Return a FileIDInfo object for a normal buffer
110    /// reference.
111    static FileIDInfo getNormalBuffer(SourceLocation IL, unsigned CN,
112                                      const InfoRec *Inf) {
113      FileIDInfo X;
114      X.IDType = NormalBuffer;
115      X.IncludeLoc = IL;
116      X.u.NormalBuffer.ChunkNo = CN;
117      X.u.NormalBuffer.Info = Inf;
118      return X;
119    }
120
121    /// getMacroExpansion - Return a FileID for a macro expansion.  IL specifies
122    /// the instantiation location, and MacroFID specifies the FileID that the
123    /// token's characters come from.
124    static FileIDInfo getMacroExpansion(SourceLocation IL,
125                                        unsigned MacroFID) {
126      FileIDInfo X;
127      X.IDType = MacroExpansion;
128      X.IncludeLoc = IL;
129      X.u.MacroTokenFileID = MacroFID;
130      return X;
131    }
132
133    unsigned getNormalBufferChunkNo() const {
134      assert(IDType == NormalBuffer && "Not a normal buffer!");
135      return u.NormalBuffer.ChunkNo;
136    }
137
138    const InfoRec *getNormalBufferInfo() const {
139      assert(IDType == NormalBuffer && "Not a normal buffer!");
140      return u.NormalBuffer.Info;
141    }
142  };
143}  // end SrcMgr namespace.
144
145
146/// SourceManager - This file handles loading and caching of source files into
147/// memory.  This object owns the MemoryBuffer objects for all of the loaded
148/// files and assigns unique FileID's for each unique #include chain.
149///
150/// The SourceManager can be queried for information about SourceLocation
151/// objects, turning them into either physical or logical locations.  Physical
152/// locations represent where the bytes corresponding to a token came from and
153/// logical locations represent where the location is in the user's view.  In
154/// the case of a macro expansion, for example, the physical location indicates
155/// where the expanded token came from and the logical location specifies where
156/// it was expanded.  Logical locations are also influenced by #line directives,
157/// etc.
158class SourceManager {
159  /// FileInfos - Memoized information about all of the files tracked by this
160  /// SourceManager.
161  std::map<const FileEntry *, SrcMgr::FileInfo> FileInfos;
162
163  /// MemBufferInfos - Information about various memory buffers that we have
164  /// read in.  This is a list, instead of a vector, because we need pointers to
165  /// the FileInfo objects to be stable.
166  std::list<SrcMgr::InfoRec> MemBufferInfos;
167
168  /// FileIDs - Information about each FileID.  FileID #0 is not valid, so all
169  /// entries are off by one.
170  std::vector<SrcMgr::FileIDInfo> FileIDs;
171
172  /// LastInstantiationLoc_* - Cache the last instantiation request for fast
173  /// lookup.  Macros often want many tokens instantated at the same location.
174  SourceLocation LastInstantiationLoc_InstantLoc;
175  unsigned       LastInstantiationLoc_MacroFID;
176  unsigned       LastInstantiationLoc_Result;
177public:
178  SourceManager() { LastInstantiationLoc_MacroFID = ~0U; }
179  ~SourceManager();
180
181  /// createFileID - Create a new FileID that represents the specified file
182  /// being #included from the specified IncludePosition.  This returns 0 on
183  /// error and translates NULL into standard input.
184  unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){
185    const SrcMgr::InfoRec *IR = getInfoRec(SourceFile);
186    if (IR == 0) return 0;    // Error opening file?
187    return createFileID(IR, IncludePos);
188  }
189
190  /// createFileIDForMemBuffer - Create a new FileID that represents the
191  /// specified memory buffer.  This does no caching of the buffer and takes
192  /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once.
193  unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
194    return createFileID(createMemBufferInfoRec(Buffer), SourceLocation());
195  }
196
197  /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
198  /// that a token from physloc PhysLoc should actually be referenced from
199  /// InstantiationLoc.
200  SourceLocation getInstantiationLoc(SourceLocation PhysLoc,
201                                     SourceLocation InstantiationLoc);
202
203  /// getBuffer - Return the buffer for the specified FileID.
204  ///
205  const llvm::MemoryBuffer *getBuffer(unsigned FileID) const {
206    return getFileInfo(FileID)->Buffer;
207  }
208
209  /// getIncludeLoc - Return the location of the #include for the specified
210  /// FileID.
211  SourceLocation getIncludeLoc(unsigned FileID) const;
212
213  /// getFilePos - This (efficient) method returns the offset from the start of
214  /// the file that the specified SourceLocation represents.  This returns the
215  /// location of the physical character data, not the logical file position.
216  unsigned getFilePos(SourceLocation Loc) const {
217    const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
218
219    // For Macros, the physical loc is specified by the MacroTokenFileID.
220    if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion)
221      FIDInfo = &FileIDs[FIDInfo->u.MacroTokenFileID-1];
222
223    // If this file has been split up into chunks, factor in the chunk number
224    // that the FileID references.
225    unsigned ChunkNo = FIDInfo->getNormalBufferChunkNo();
226    return Loc.getRawFilePos() + (ChunkNo << SourceLocation::FilePosBits);
227  }
228
229  /// getCharacterData - Return a pointer to the start of the specified location
230  /// in the appropriate MemoryBuffer.
231  const char *getCharacterData(SourceLocation SL) const;
232
233  /// getColumnNumber - Return the column # for the specified include position.
234  /// this is significantly cheaper to compute than the line number.  This
235  /// returns zero if the column number isn't known.
236  unsigned getColumnNumber(SourceLocation Loc) const;
237
238  /// getLineNumber - Given a SourceLocation, return the physical line number
239  /// for the position indicated.  This requires building and caching a table of
240  /// line offsets for the MemoryBuffer, so this is not cheap: use only when
241  /// about to emit a diagnostic.
242  unsigned getLineNumber(SourceLocation Loc);
243
244  /// getSourceFilePos - This method returns the *logical* offset from the start
245  /// of the file that the specified SourceLocation represents.  This returns
246  /// the location of the *logical* character data, not the physical file
247  /// position.  In the case of macros, for example, this returns where the
248  /// macro was instantiated, not where the characters for the macro can be
249  /// found.
250  unsigned getSourceFilePos(SourceLocation Loc) const;
251
252  /// getSourceName - This method returns the name of the file or buffer that
253  /// the SourceLocation specifies.  This can be modified with #line directives,
254  /// etc.
255  std::string getSourceName(SourceLocation Loc);
256
257  /// getFileEntryForFileID - Return the FileEntry record for the specified
258  /// FileID if one exists.
259  const FileEntry *getFileEntryForFileID(unsigned FileID) const {
260    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
261    return FileIDs[FileID-1].getNormalBufferInfo()->first;
262  }
263
264  /// Given a SourceLocation object, return the logical location referenced by
265  /// the ID.  This logical location is subject to #line directives, etc.
266  SourceLocation getLogicalLoc(SourceLocation Loc) const {
267    if (Loc.getFileID() == 0) return Loc;
268
269    const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
270    if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion)
271      return FIDInfo->IncludeLoc;
272    return Loc;
273  }
274
275  /// getPhysicalLoc - Given a SourceLocation object, return the physical
276  /// location referenced by the ID.
277  SourceLocation getPhysicalLoc(SourceLocation Loc) const {
278    if (Loc.getFileID() == 0) return Loc;
279
280    // For Macros, the physical loc is specified by the MacroTokenFileID.
281    const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
282    if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion)
283      return SourceLocation(FIDInfo->u.MacroTokenFileID,
284                            Loc.getRawFilePos());
285    return Loc;
286  }
287
288  /// PrintStats - Print statistics to stderr.
289  ///
290  void PrintStats() const;
291private:
292  /// createFileID - Create a new fileID for the specified InfoRec and include
293  /// position.  This works regardless of whether the InfoRec corresponds to a
294  /// file or some other input source.
295  unsigned createFileID(const SrcMgr::InfoRec *File, SourceLocation IncludePos);
296
297  /// getInfoRec - Create or return a cached FileInfo for the specified file.
298  /// This returns null on failure.
299  const SrcMgr::InfoRec *getInfoRec(const FileEntry *SourceFile);
300
301  /// createMemBufferInfoRec - Create a new info record for the specified memory
302  /// buffer.  This does no caching.
303  const SrcMgr::InfoRec *createMemBufferInfoRec(const llvm::MemoryBuffer *Buf);
304
305  const SrcMgr::FileIDInfo *getFIDInfo(unsigned FileID) const {
306    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
307    return &FileIDs[FileID-1];
308  }
309
310  /// Return the InfoRec structure for the specified FileID.  This is always the
311  /// physical reference for the ID.
312  const SrcMgr::InfoRec *getInfoRec(const SrcMgr::FileIDInfo *FIDInfo) const {
313    // For Macros, the physical loc is specified by the MacroTokenFileID.
314    if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion)
315      FIDInfo = &FileIDs[FIDInfo->u.MacroTokenFileID-1];
316    return FIDInfo->getNormalBufferInfo();
317  }
318  const SrcMgr::InfoRec *getInfoRec(unsigned FileID) const {
319    return getInfoRec(getFIDInfo(FileID));
320  }
321
322  SrcMgr::FileInfo *getFileInfo(const SrcMgr::FileIDInfo *FIDInfo) const {
323    if (const SrcMgr::InfoRec *IR = getInfoRec(FIDInfo))
324      return const_cast<SrcMgr::FileInfo *>(&IR->second);
325    return 0;
326  }
327  SrcMgr::FileInfo *getFileInfo(unsigned FileID) const {
328    if (const SrcMgr::InfoRec *IR = getInfoRec(FileID))
329      return const_cast<SrcMgr::FileInfo *>(&IR->second);
330    return 0;
331  }
332  SrcMgr::FileInfo *getFileInfo(const FileEntry *SourceFile) {
333    if (const SrcMgr::InfoRec *IR = getInfoRec(SourceFile))
334      return const_cast<SrcMgr::FileInfo *>(&IR->second);
335    return 0;
336  }
337};
338
339
340}  // end namespace clang
341
342#endif
343