SourceManager.h revision 3457e8cbaa8a6fec5d69173450655fe0bc38634b
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_SOURCEMANAGER_H
15#define LLVM_CLANG_SOURCEMANAGER_H
16
17#include "clang/Basic/SourceLocation.h"
18#include <vector>
19#include <map>
20#include <list>
21#include <cassert>
22
23namespace llvm {
24class MemoryBuffer;
25}
26
27namespace clang {
28
29class SourceManager;
30class FileEntry;
31class IdentifierTokenInfo;
32
33/// SrcMgr - Private classes that are part of the SourceManager implementation.
34///
35namespace SrcMgr {
36  /// FileInfo - Once instance of this struct is kept for every file loaded or
37  /// used.  This object owns the MemoryBuffer object.
38  struct FileInfo {
39    /// Buffer - The actual buffer containing the characters from the input
40    /// file.
41    const llvm::MemoryBuffer *Buffer;
42
43    /// SourceLineCache - A new[]'d array of offsets for each source line.  This
44    /// is lazily computed.
45    ///
46    unsigned *SourceLineCache;
47
48    /// NumLines - The number of lines in this FileInfo.  This is only valid if
49    /// SourceLineCache is non-null.
50    unsigned NumLines;
51  };
52
53  typedef std::pair<const FileEntry * const, FileInfo> InfoRec;
54
55  /// FileIDInfo - Information about a FileID, basically just the logical file
56  /// that it represents and include stack information.  A File SourceLocation
57  /// is a byte offset from the start of this.
58  ///
59  /// FileID's are used to compute the location of a character in memory as well
60  /// as the logical source location, which can be differ from the physical
61  /// location.  It is different when #line's are active or when macros have
62  /// been expanded.
63  ///
64  /// Each FileID has include stack information, indicating where it came from.
65  /// For the primary translation unit, it comes from SourceLocation() aka 0.
66  /// This information encodes the #include chain that a token was instantiated
67  /// from.
68  ///
69  /// FileIDInfos contain a "InfoRec *", describing the source file, and a Chunk
70  /// number, which allows a SourceLocation to index into very large files
71  /// (those which there are not enough FilePosBits to address).
72  ///
73  struct FileIDInfo {
74  private:
75    /// IncludeLoc - The location of the #include that brought in this file.
76    /// This SourceLocation object has an invalid SLOC for the main file.
77    SourceLocation IncludeLoc;
78
79    /// ChunkNo - Really large buffers are broken up into chunks that are
80    /// each (1 << SourceLocation::FilePosBits) in size.  This specifies the
81    /// chunk number of this FileID.
82    unsigned ChunkNo;
83
84    /// FileInfo - Information about the source buffer itself.
85    ///
86    const InfoRec *Info;
87  public:
88
89    /// get - Return a FileIDInfo object.
90    static FileIDInfo get(SourceLocation IL, unsigned CN, const InfoRec *Inf) {
91      FileIDInfo X;
92      X.IncludeLoc = IL;
93      X.ChunkNo = CN;
94      X.Info = Inf;
95      return X;
96    }
97
98    SourceLocation getIncludeLoc() const { return IncludeLoc; }
99    unsigned getChunkNo() const { return ChunkNo; }
100    const InfoRec *getInfo() const { return Info; }
101  };
102
103  /// MacroIDInfo - Macro SourceLocations refer to these records by their ID.
104  /// Each MacroIDInfo encodes the Instantiation location - where the macro was
105  /// instantiated, and the PhysicalLoc - where the actual character data for
106  /// the token came from.  An actual macro SourceLocation stores deltas from
107  /// these positions.
108  class MacroIDInfo {
109    SourceLocation InstantiationLoc, PhysicalLoc;
110  public:
111    SourceLocation getInstantiationLoc() const { return InstantiationLoc; }
112    SourceLocation getPhysicalLoc() const { return PhysicalLoc; }
113
114    /// get - Return a MacroID for a macro expansion.  IL specifies
115    /// the instantiation location, and PL specifies the physical location
116    /// (where the characters from the token come from).  Both IL and PL refer
117    /// to normal File SLocs.
118    static MacroIDInfo get(SourceLocation IL, SourceLocation PL) {
119      MacroIDInfo X;
120      X.InstantiationLoc = IL;
121      X.PhysicalLoc = PL;
122      return X;
123    }
124  };
125}  // end SrcMgr namespace.
126
127
128/// SourceManager - This file handles loading and caching of source files into
129/// memory.  This object owns the MemoryBuffer objects for all of the loaded
130/// files and assigns unique FileID's for each unique #include chain.
131///
132/// The SourceManager can be queried for information about SourceLocation
133/// objects, turning them into either physical or logical locations.  Physical
134/// locations represent where the bytes corresponding to a token came from and
135/// logical locations represent where the location is in the user's view.  In
136/// the case of a macro expansion, for example, the physical location indicates
137/// where the expanded token came from and the logical location specifies where
138/// it was expanded.  Logical locations are also influenced by #line directives,
139/// etc.
140class SourceManager {
141  /// FileInfos - Memoized information about all of the files tracked by this
142  /// SourceManager.
143  std::map<const FileEntry *, SrcMgr::FileInfo> FileInfos;
144
145  /// MemBufferInfos - Information about various memory buffers that we have
146  /// read in.  This is a list, instead of a vector, because we need pointers to
147  /// the FileInfo objects to be stable.
148  std::list<SrcMgr::InfoRec> MemBufferInfos;
149
150  /// FileIDs - Information about each FileID.  FileID #0 is not valid, so all
151  /// entries are off by one.
152  std::vector<SrcMgr::FileIDInfo> FileIDs;
153
154  /// MacroIDs - Information about each MacroID.
155  std::vector<SrcMgr::MacroIDInfo> MacroIDs;
156
157  /// LastLineNo - These ivars serve as a cache used in the getLineNumber
158  /// method which is used to speedup getLineNumber calls to nearby locations.
159  unsigned LastLineNoFileIDQuery;
160  SrcMgr::FileInfo *LastLineNoFileInfo;
161  unsigned LastLineNoFilePos;
162  unsigned LastLineNoResult;
163public:
164  SourceManager() : LastLineNoFileIDQuery(~0U) {}
165  ~SourceManager();
166
167  void clearIDTables() {
168    FileIDs.clear();
169    MacroIDs.clear();
170    LastLineNoFileIDQuery = ~0U;
171    LastLineNoFileInfo = 0;
172  }
173
174  /// createFileID - Create a new FileID that represents the specified file
175  /// being #included from the specified IncludePosition.  This returns 0 on
176  /// error and translates NULL into standard input.
177  unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){
178    const SrcMgr::InfoRec *IR = getInfoRec(SourceFile);
179    if (IR == 0) return 0;    // Error opening file?
180    return createFileID(IR, IncludePos);
181  }
182
183  /// createFileIDForMemBuffer - Create a new FileID that represents the
184  /// specified memory buffer.  This does no caching of the buffer and takes
185  /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once.
186  unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
187    return createFileID(createMemBufferInfoRec(Buffer), SourceLocation());
188  }
189
190  /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
191  /// that a token at Loc should actually be referenced from InstantiationLoc.
192  SourceLocation getInstantiationLoc(SourceLocation Loc,
193                                     SourceLocation InstantiationLoc);
194
195  /// getBuffer - Return the buffer for the specified FileID.
196  ///
197  const llvm::MemoryBuffer *getBuffer(unsigned FileID) const {
198    return getFileInfo(FileID)->Buffer;
199  }
200
201  /// getBufferData - Return a pointer to the start and end of the character
202  /// data for the specified FileID.
203  std::pair<const char*, const char*> getBufferData(unsigned FileID) const;
204
205  /// getIncludeLoc - Return the location of the #include for the specified
206  /// SourceLocation.  If this is a macro expansion, this transparently figures
207  /// out which file includes the file being expanded into.
208  SourceLocation getIncludeLoc(SourceLocation ID) const {
209    return getFIDInfo(getLogicalLoc(ID).getFileID())->getIncludeLoc();
210  }
211
212  /// getCharacterData - Return a pointer to the start of the specified location
213  /// in the appropriate MemoryBuffer.
214  const char *getCharacterData(SourceLocation SL) const;
215
216  /// getColumnNumber - Return the column # for the specified file position.
217  /// This is significantly cheaper to compute than the line number.  This
218  /// returns zero if the column number isn't known.  This may only be called on
219  /// a file sloc, so you must choose a physical or logical location before
220  /// calling this method.
221  unsigned getColumnNumber(SourceLocation Loc) const;
222
223  unsigned getPhysicalColumnNumber(SourceLocation Loc) const {
224    return getColumnNumber(getPhysicalLoc(Loc));
225  }
226  unsigned getLogicalColumnNumber(SourceLocation Loc) const {
227    return getColumnNumber(getLogicalLoc(Loc));
228  }
229
230
231  /// getLineNumber - Given a SourceLocation, return the physical line number
232  /// for the position indicated.  This requires building and caching a table of
233  /// line offsets for the MemoryBuffer, so this is not cheap: use only when
234  /// about to emit a diagnostic.
235  unsigned getLineNumber(SourceLocation Loc);
236
237  unsigned getLogicalLineNumber(SourceLocation Loc) {
238    return getLineNumber(getLogicalLoc(Loc));
239  }
240  unsigned getPhysicalLineNumber(SourceLocation Loc) {
241    return getLineNumber(getPhysicalLoc(Loc));
242  }
243
244  /// getSourceName - This method returns the name of the file or buffer that
245  /// the SourceLocation specifies.  This can be modified with #line directives,
246  /// etc.
247  const char *getSourceName(SourceLocation Loc) const;
248
249  /// Given a SourceLocation object, return the logical location referenced by
250  /// the ID.  This logical location is subject to #line directives, etc.
251  SourceLocation getLogicalLoc(SourceLocation Loc) const {
252    // File locations are both physical and logical.
253    if (Loc.isFileID()) return Loc;
254
255    SourceLocation ILoc = MacroIDs[Loc.getMacroID()].getInstantiationLoc();
256    return ILoc.getFileLocWithOffset(Loc.getMacroLogOffs());
257  }
258
259  /// getPhysicalLoc - Given a SourceLocation object, return the physical
260  /// location referenced by the ID.
261  SourceLocation getPhysicalLoc(SourceLocation Loc) const {
262    // File locations are both physical and logical.
263    if (Loc.isFileID()) return Loc;
264
265    SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getPhysicalLoc();
266    return PLoc.getFileLocWithOffset(Loc.getMacroPhysOffs());
267  }
268
269  /// getFileEntryForLoc - Return the FileEntry record for the physloc of the
270  /// specified SourceLocation, if one exists.
271  const FileEntry *getFileEntryForLoc(SourceLocation Loc) const {
272    Loc = getPhysicalLoc(Loc);
273    unsigned FileID = Loc.getFileID();
274    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
275    return FileIDs[FileID-1].getInfo()->first;
276  }
277
278  /// getDecomposedFileLoc - Decompose the specified file location into a raw
279  /// FileID + Offset pair.  The first element is the FileID, the second is the
280  /// offset from the start of the buffer of the location.
281  std::pair<unsigned, unsigned> getDecomposedFileLoc(SourceLocation Loc) const {
282    assert(Loc.isFileID() && "Isn't a File SourceLocation");
283
284    // TODO: Add a flag "is first chunk" to SLOC.
285    const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
286
287    // If this file has been split up into chunks, factor in the chunk number
288    // that the FileID references.
289    unsigned ChunkNo = FIDInfo->getChunkNo();
290    unsigned Offset = Loc.getRawFilePos();
291    Offset += (ChunkNo << SourceLocation::FilePosBits);
292
293    return std::pair<unsigned,unsigned>(Loc.getFileID()-ChunkNo, Offset);
294  }
295
296  /// PrintStats - Print statistics to stderr.
297  ///
298  void PrintStats() const;
299private:
300  /// createFileID - Create a new fileID for the specified InfoRec and include
301  /// position.  This works regardless of whether the InfoRec corresponds to a
302  /// file or some other input source.
303  unsigned createFileID(const SrcMgr::InfoRec *File, SourceLocation IncludePos);
304
305  /// getInfoRec - Create or return a cached FileInfo for the specified file.
306  /// This returns null on failure.
307  const SrcMgr::InfoRec *getInfoRec(const FileEntry *SourceFile);
308
309  /// createMemBufferInfoRec - Create a new info record for the specified memory
310  /// buffer.  This does no caching.
311  const SrcMgr::InfoRec *createMemBufferInfoRec(const llvm::MemoryBuffer *Buf);
312
313  const SrcMgr::FileIDInfo *getFIDInfo(unsigned FileID) const {
314    assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
315    return &FileIDs[FileID-1];
316  }
317
318  const SrcMgr::InfoRec *getInfoRec(unsigned FileID) const {
319    return getInfoRec(getFIDInfo(FileID));
320  }
321
322  SrcMgr::FileInfo *getFileInfo(unsigned FileID) const {
323    if (const SrcMgr::InfoRec *IR = getInfoRec(FileID))
324      return const_cast<SrcMgr::FileInfo *>(&IR->second);
325    return 0;
326  }
327
328  /// Return the InfoRec structure for the specified FileID.  This is always the
329  /// physical reference for the ID.
330  const SrcMgr::InfoRec *getInfoRec(const SrcMgr::FileIDInfo *FIDInfo) const {
331    return FIDInfo->getInfo();
332  }
333
334
335  /// getFullFilePos - This (efficient) method returns the offset from the start
336  /// of the file that the specified physical SourceLocation represents.  This
337  /// returns the location of the physical character data, not the logical file
338  /// position.
339  unsigned getFullFilePos(SourceLocation PhysLoc) const {
340    return getDecomposedFileLoc(PhysLoc).second;
341  }
342};
343
344
345}  // end namespace clang
346
347#endif
348