SourceManager.h revision 3b4d5e955e819dd3a4bed37ea2e47d6e4cb05274
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_SOURCEMANAGER_H
15#define LLVM_CLANG_SOURCEMANAGER_H
16
17#include "clang/Basic/SourceLocation.h"
18#include "llvm/Bitcode/SerializationFwd.h"
19#include <vector>
20#include <set>
21#include <list>
22#include <cassert>
23
24namespace llvm {
25class MemoryBuffer;
26}
27
28namespace clang {
29
30class SourceManager;
31class FileManager;
32class FileEntry;
33class IdentifierTokenInfo;
34
35/// SrcMgr - Public enums and private classes that are part of the
36/// SourceManager implementation.
37///
38namespace SrcMgr {
39  /// CharacteristicKind - This is used to represent whether a file or directory
40  /// holds normal user code, system code, or system code which is implicitly
41  /// 'extern "C"' in C++ mode.  Entire directories can be tagged with this
42  /// (this is maintained by DirectoryLookup and friends) as can specific
43  /// FileIDInfos when a #pragma system_header is seen or various other cases.
44  ///
45  enum CharacteristicKind {
46    C_User, C_System, C_ExternCSystem
47  };
48
49  /// ContentCache - Once instance of this struct is kept for every file
50  ///  loaded or used.  This object owns the MemoryBuffer object.
51  class ContentCache {
52    /// Buffer - The actual buffer containing the characters from the input
53    /// file.  This is owned by the ContentCache object.
54    mutable const llvm::MemoryBuffer *Buffer;
55
56  public:
57    /// Reference to the file entry.  This reference does not own
58    /// the FileEntry object.  It is possible for this to be NULL if
59    /// the ContentCache encapsulates an imaginary text buffer.
60    const FileEntry *Entry;
61
62    /// SourceLineCache - A new[]'d array of offsets for each source line.  This
63    /// is lazily computed.  This is owned by the ContentCache object.
64    unsigned *SourceLineCache;
65
66    /// NumLines - The number of lines in this ContentCache.  This is only valid
67    /// if SourceLineCache is non-null.
68    unsigned NumLines;
69
70    /// getBuffer - Returns the memory buffer for the associated content.
71    const llvm::MemoryBuffer *getBuffer() const;
72
73    /// getSize - Returns the size of the content encapsulated by this
74    ///  ContentCache. This can be the size of the source file or the size of an
75    ///  arbitrary scratch buffer.  If the ContentCache encapsulates a source
76    ///  file this size is retrieved from the file's FileEntry.
77    unsigned getSize() const;
78
79    /// getSizeBytesMapped - Returns the number of bytes actually mapped for
80    ///  this ContentCache.  This can be 0 if the MemBuffer was not actually
81    ///  instantiated.
82    unsigned getSizeBytesMapped() const;
83
84    void setBuffer(const llvm::MemoryBuffer *B) {
85      assert(!Buffer && "MemoryBuffer already set.");
86      Buffer = B;
87    }
88
89    ContentCache(const FileEntry *e = NULL)
90      : Buffer(NULL), Entry(e), SourceLineCache(NULL), NumLines(0) {}
91
92    ~ContentCache();
93
94    /// The copy ctor does not allow copies where source object has either
95    ///  a non-NULL Buffer or SourceLineCache.  Ownership of allocated memory
96    ///  is not transfered, so this is a logical error.
97    ContentCache(const ContentCache &RHS) : Buffer(NULL),SourceLineCache(NULL) {
98      Entry = RHS.Entry;
99
100      assert (RHS.Buffer == NULL && RHS.SourceLineCache == NULL
101              && "Passed ContentCache object cannot own a buffer.");
102
103      NumLines = RHS.NumLines;
104    }
105
106    /// Emit - Emit this ContentCache to Bitcode.
107    void Emit(llvm::Serializer &S) const;
108
109    /// ReadToSourceManager - Reconstitute a ContentCache from Bitcode
110    //   and store it in the specified SourceManager.
111    static void ReadToSourceManager(llvm::Deserializer &D, SourceManager &SM,
112                                    FileManager *FMgr, std::vector<char> &Buf);
113
114  private:
115    // Disable assignments.
116    ContentCache &operator=(const ContentCache& RHS);
117  };
118
119  /// FileIDInfo - Information about a FileID, basically just the logical file
120  /// that it represents and include stack information.  A File SourceLocation
121  /// is a byte offset from the start of this.
122  ///
123  /// FileID's are used to compute the location of a character in memory as well
124  /// as the instantiation source location, which can be differ from the
125  /// spelling location.  It is different when #line's are active or when macros
126  /// have been expanded.
127  ///
128  /// Each FileID has include stack information, indicating where it came from.
129  /// For the primary translation unit, it comes from SourceLocation() aka 0.
130  /// This information encodes the #include chain that a token was instantiated
131  /// from.
132  ///
133  /// FileIDInfos contain a "ContentCache *", describing the source file,
134  /// and a Chunk number, which allows a SourceLocation to index into very
135  /// large files (those which there are not enough FilePosBits to address).
136  ///
137  struct FileIDInfo {
138  private:
139    /// IncludeLoc - The location of the #include that brought in this file.
140    /// This SourceLocation object has an invalid SLOC for the main file.
141    SourceLocation IncludeLoc;
142
143    /// ChunkNo - Really large buffers are broken up into chunks that are
144    /// each (1 << SourceLocation::FilePosBits) in size.  This specifies the
145    /// chunk number of this FileID.
146    unsigned ChunkNo : 30;
147
148    /// FileCharacteristic - This is an instance of CharacteristicKind,
149    /// indicating whether this is a system header dir or not.
150    unsigned FileCharacteristic : 2;
151
152    /// Content - Information about the source buffer itself.
153    const ContentCache *Content;
154
155  public:
156    /// get - Return a FileIDInfo object.
157    static FileIDInfo get(SourceLocation IL, unsigned CN,
158                          const ContentCache *Con,
159                          CharacteristicKind FileCharacter) {
160      FileIDInfo X;
161      X.IncludeLoc = IL;
162      X.ChunkNo = CN;
163      X.Content = Con;
164      X.FileCharacteristic = FileCharacter;
165      return X;
166    }
167
168    SourceLocation getIncludeLoc() const { return IncludeLoc; }
169    unsigned getChunkNo() const { return ChunkNo; }
170    const ContentCache* getContentCache() const { return Content; }
171
172    /// getCharacteristic - Return whether this is a system header or not.
173    CharacteristicKind getFileCharacteristic() const {
174      return (CharacteristicKind)FileCharacteristic;
175    }
176
177    /// Emit - Emit this FileIDInfo to Bitcode.
178    void Emit(llvm::Serializer& S) const;
179
180    /// ReadVal - Reconstitute a FileIDInfo from Bitcode.
181    static FileIDInfo ReadVal(llvm::Deserializer& S);
182  };
183
184  /// MacroIDInfo - Macro SourceLocations refer to these records by their ID.
185  /// Each MacroIDInfo encodes the Instantiation location - where the macro was
186  /// instantiated, and the SpellingLoc - where the actual character data for
187  /// the token came from.  An actual macro SourceLocation stores deltas from
188  /// these positions.
189  class MacroIDInfo {
190    SourceLocation InstantiationLoc, SpellingLoc;
191  public:
192    SourceLocation getInstantiationLoc() const { return InstantiationLoc; }
193    SourceLocation getSpellingLoc() const { return SpellingLoc; }
194
195    /// get - Return a MacroID for a macro expansion.  VL specifies
196    /// the instantiation location (where the macro is expanded), and SL
197    /// specifies the spelling location (where the characters from the token
198    /// come from).  Both VL and PL refer to normal File SLocs.
199    static MacroIDInfo get(SourceLocation VL, SourceLocation SL) {
200      MacroIDInfo X;
201      X.InstantiationLoc = VL;
202      X.SpellingLoc = SL;
203      return X;
204    }
205
206    /// Emit - Emit this MacroIDInfo to Bitcode.
207    void Emit(llvm::Serializer& S) const;
208
209    /// ReadVal - Reconstitute a MacroIDInfo from Bitcode.
210    static MacroIDInfo ReadVal(llvm::Deserializer& S);
211  };
212}  // end SrcMgr namespace.
213} // end clang namespace
214
215namespace std {
216template <> struct less<clang::SrcMgr::ContentCache> {
217  inline bool operator()(const clang::SrcMgr::ContentCache& L,
218                         const clang::SrcMgr::ContentCache& R) const {
219    return L.Entry < R.Entry;
220  }
221};
222} // end std namespace
223
224namespace clang {
225
226/// SourceManager - This file handles loading and caching of source files into
227/// memory.  This object owns the MemoryBuffer objects for all of the loaded
228/// files and assigns unique FileID's for each unique #include chain.
229///
230/// The SourceManager can be queried for information about SourceLocation
231/// objects, turning them into either spelling or instantiation locations.
232/// Spelling locations represent where the bytes corresponding to a token came
233/// from and instantiation locations represent where the location is in the
234/// user's view.  In the case of a macro expansion, for example, the spelling
235/// location indicates where the expanded token came from and the instantiation
236/// location specifies where it was expanded.
237class SourceManager {
238  /// FileInfos - Memoized information about all of the files tracked by this
239  /// SourceManager.  This set allows us to merge ContentCache entries based
240  /// on their FileEntry*.  All ContentCache objects will thus have unique,
241  /// non-null, FileEntry pointers.
242  std::set<SrcMgr::ContentCache> FileInfos;
243
244  /// MemBufferInfos - Information about various memory buffers that we have
245  /// read in.  This is a list, instead of a vector, because we need pointers to
246  /// the ContentCache objects to be stable.  All FileEntry* within the
247  /// stored ContentCache objects are NULL, as they do not refer to a file.
248  std::list<SrcMgr::ContentCache> MemBufferInfos;
249
250  /// FileIDs - Information about each FileID.  FileID #0 is not valid, so all
251  /// entries are off by one.
252  std::vector<SrcMgr::FileIDInfo> FileIDs;
253
254  /// MacroIDs - Information about each MacroID.
255  std::vector<SrcMgr::MacroIDInfo> MacroIDs;
256
257  /// LastLineNo - These ivars serve as a cache used in the getLineNumber
258  /// method which is used to speedup getLineNumber calls to nearby locations.
259  mutable FileID LastLineNoFileIDQuery;
260  mutable SrcMgr::ContentCache *LastLineNoContentCache;
261  mutable unsigned LastLineNoFilePos;
262  mutable unsigned LastLineNoResult;
263
264  /// MainFileID - The file ID for the main source file of the translation unit.
265  FileID MainFileID;
266
267  // SourceManager doesn't support copy construction.
268  explicit SourceManager(const SourceManager&);
269  void operator=(const SourceManager&);
270public:
271  SourceManager() {}
272  ~SourceManager() {}
273
274  void clearIDTables() {
275    MainFileID = FileID();
276    FileIDs.clear();
277    MacroIDs.clear();
278    LastLineNoFileIDQuery = FileID();
279    LastLineNoContentCache = 0;
280  }
281
282  /// getMainFileID - Returns the FileID of the main source file.
283  FileID getMainFileID() const { return MainFileID; }
284
285  /// createFileID - Create a new FileID that represents the specified file
286  /// being #included from the specified IncludePosition.  This returns 0 on
287  /// error and translates NULL into standard input.
288  FileID createFileID(const FileEntry *SourceFile, SourceLocation IncludePos,
289                        SrcMgr::CharacteristicKind FileCharacter) {
290    const SrcMgr::ContentCache *IR = getContentCache(SourceFile);
291    if (IR == 0) return FileID();    // Error opening file?
292    return createFileID(IR, IncludePos, FileCharacter);
293  }
294
295  /// createMainFileID - Create the FileID for the main source file.
296  FileID createMainFileID(const FileEntry *SourceFile,
297                            SourceLocation IncludePos) {
298
299    assert(MainFileID.isInvalid() && "MainFileID already set!");
300    MainFileID = createFileID(SourceFile, IncludePos, SrcMgr::C_User);
301    return MainFileID;
302  }
303
304  /// createFileIDForMemBuffer - Create a new FileID that represents the
305  /// specified memory buffer.  This does no caching of the buffer and takes
306  /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once.
307  FileID createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
308    return createFileID(createMemBufferContentCache(Buffer), SourceLocation(),
309                        SrcMgr::C_User);
310  }
311
312  /// createMainFileIDForMembuffer - Create the FileID for a memory buffer
313  ///  that will represent the FileID for the main source.  One example
314  ///  of when this would be used is when the main source is read from STDIN.
315  FileID createMainFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
316    assert(MainFileID.isInvalid() && "MainFileID already set!");
317    MainFileID = createFileIDForMemBuffer(Buffer);
318    return MainFileID;
319  }
320
321  /// getLocForStartOfFile - Return the source location corresponding to the
322  /// first byte of the specified file.
323  SourceLocation getLocForStartOfFile(FileID FID) const {
324    return SourceLocation::getFileLoc(FID.ID, 0);
325  }
326
327
328  /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
329  /// that a token at Loc should actually be referenced from InstantiationLoc.
330  SourceLocation getInstantiationLoc(SourceLocation Loc,
331                                     SourceLocation InstantiationLoc);
332
333  /// getBuffer - Return the buffer for the specified FileID.
334  ///
335  const llvm::MemoryBuffer *getBuffer(FileID FID) const {
336    return getContentCache(FID)->getBuffer();
337  }
338
339  const llvm::MemoryBuffer *getBuffer(SourceLocation Loc) const {
340    return getContentCacheForLoc(Loc)->getBuffer();
341  }
342
343
344  /// getBufferData - Return a pointer to the start and end of the character
345  /// data for the specified FileID.
346  std::pair<const char*, const char*> getBufferData(SourceLocation Loc) const;
347  std::pair<const char*, const char*> getBufferData(FileID FID) const;
348
349  /// getIncludeLoc - Return the location of the #include for the specified
350  /// SourceLocation.  If this is a macro expansion, this transparently figures
351  /// out which file includes the file being expanded into.
352  SourceLocation getIncludeLoc(SourceLocation ID) const {
353    return getFIDInfo(getInstantiationLoc(ID).getChunkID())->getIncludeLoc();
354  }
355
356  /// getCharacterData - Return a pointer to the start of the specified location
357  /// in the appropriate MemoryBuffer.
358  const char *getCharacterData(SourceLocation SL) const;
359
360  /// getColumnNumber - Return the column # for the specified file position.
361  /// This is significantly cheaper to compute than the line number.  This
362  /// returns zero if the column number isn't known.  This may only be called on
363  /// a file sloc, so you must choose a spelling or instantiation location
364  /// before calling this method.
365  unsigned getColumnNumber(SourceLocation Loc) const;
366
367  unsigned getSpellingColumnNumber(SourceLocation Loc) const {
368    return getColumnNumber(getSpellingLoc(Loc));
369  }
370  unsigned getInstantiationColumnNumber(SourceLocation Loc) const {
371    return getColumnNumber(getInstantiationLoc(Loc));
372  }
373
374
375  /// getLineNumber - Given a SourceLocation, return the spelling line number
376  /// for the position indicated.  This requires building and caching a table of
377  /// line offsets for the MemoryBuffer, so this is not cheap: use only when
378  /// about to emit a diagnostic.
379  unsigned getLineNumber(SourceLocation Loc) const;
380
381  unsigned getInstantiationLineNumber(SourceLocation Loc) const {
382    return getLineNumber(getInstantiationLoc(Loc));
383  }
384  unsigned getSpellingLineNumber(SourceLocation Loc) const {
385    return getLineNumber(getSpellingLoc(Loc));
386  }
387
388  /// getSourceName - This method returns the name of the file or buffer that
389  /// the SourceLocation specifies.  This can be modified with #line directives,
390  /// etc.
391  const char *getSourceName(SourceLocation Loc) const;
392
393  /// Given a SourceLocation object, return the instantiation location
394  /// referenced by the ID.
395  SourceLocation getInstantiationLoc(SourceLocation Loc) const {
396    // File locations work.
397    if (Loc.isFileID()) return Loc;
398
399    return MacroIDs[Loc.getMacroID()].getInstantiationLoc();
400  }
401
402  /// getSpellingLoc - Given a SourceLocation object, return the spelling
403  /// location referenced by the ID.  This is the place where the characters
404  /// that make up the lexed token can be found.
405  SourceLocation getSpellingLoc(SourceLocation Loc) const {
406    // File locations work!
407    if (Loc.isFileID()) return Loc;
408
409    // Look up the macro token's spelling location.
410    SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getSpellingLoc();
411    return PLoc.getFileLocWithOffset(Loc.getMacroSpellingOffs());
412  }
413
414  /// getContentCacheForLoc - Return the ContentCache for the spelling loc of
415  /// the specified SourceLocation, if one exists.
416  const SrcMgr::ContentCache* getContentCacheForLoc(SourceLocation Loc) const {
417    Loc = getSpellingLoc(Loc);
418    unsigned ChunkID = Loc.getChunkID();
419    assert(ChunkID-1 < FileIDs.size() && "Invalid FileID!");
420    return FileIDs[ChunkID-1].getContentCache();
421  }
422
423  /// getFileEntryForLoc - Return the FileEntry record for the spelling loc of
424  /// the specified SourceLocation, if one exists.
425  const FileEntry* getFileEntryForLoc(SourceLocation Loc) const {
426    return getContentCacheForLoc(Loc)->Entry;
427  }
428
429  /// getFileEntryForID - Returns the FileEntry record for the provided FileID.
430  const FileEntry *getFileEntryForID(FileID FID) const {
431    return getContentCache(FID)->Entry;
432  }
433
434  /// getCanonicalFileID - Return the canonical FileID for a SourceLocation.
435  ///  A file can have multiple FileIDs if it is large enough to be broken
436  ///  into multiple chunks.  This method returns the unique FileID without
437  ///  chunk information for a given SourceLocation.  Use this method when
438  ///  you want to compare FileIDs across SourceLocations.
439  FileID getCanonicalFileID(SourceLocation SpellingLoc) const {
440    return getDecomposedFileLoc(SpellingLoc).first;
441  }
442
443  /// getDecomposedFileLoc - Decompose the specified file location into a raw
444  /// FileID + Offset pair.  The first element is the FileID, the second is the
445  /// offset from the start of the buffer of the location.
446  std::pair<FileID, unsigned> getDecomposedFileLoc(SourceLocation Loc) const {
447    assert(Loc.isFileID() && "Isn't a File SourceLocation");
448
449    // TODO: Add a flag "is first chunk" to SLOC.
450    const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getChunkID());
451
452    // If this file has been split up into chunks, factor in the chunk number
453    // that the FileID references.
454    unsigned ChunkNo = FIDInfo->getChunkNo();
455    unsigned Offset = Loc.getRawFilePos();
456    Offset += (ChunkNo << SourceLocation::FilePosBits);
457
458    assert(Loc.getChunkID() >= ChunkNo && "Unexpected offset");
459
460    return std::make_pair(FileID::Create(Loc.getChunkID()-ChunkNo), Offset);
461  }
462
463  /// getFullFilePos - This (efficient) method returns the offset from the start
464  /// of the file that the specified spelling SourceLocation represents.  This
465  /// returns the location of the actual character data, not the instantiation
466  /// position.
467  unsigned getFullFilePos(SourceLocation SpellingLoc) const {
468    return getDecomposedFileLoc(SpellingLoc).second;
469  }
470
471  /// isFromSameFile - Returns true if both SourceLocations correspond to
472  ///  the same file.
473  bool isFromSameFile(SourceLocation Loc1, SourceLocation Loc2) const {
474    return getCanonicalFileID(Loc1) == getCanonicalFileID(Loc2);
475  }
476
477  /// isFromMainFile - Returns true if the file of provided SourceLocation is
478  ///   the main file.
479  bool isFromMainFile(SourceLocation Loc) const {
480    return getCanonicalFileID(Loc) == getMainFileID();
481  }
482
483  /// isInSystemHeader - Returns if a SourceLocation is in a system header.
484  bool isInSystemHeader(SourceLocation Loc) const {
485    return getFileCharacteristic(Loc) != SrcMgr::C_User;
486  }
487  SrcMgr::CharacteristicKind getFileCharacteristic(SourceLocation Loc) const {
488    return getFIDInfo(getSpellingLoc(Loc).getChunkID())
489                  ->getFileCharacteristic();
490  }
491  SrcMgr::CharacteristicKind getFileCharacteristic(FileID FID) const {
492    return getFIDInfo(FID)->getFileCharacteristic();
493  }
494
495  // Iterators over FileInfos.
496  typedef std::set<SrcMgr::ContentCache>::const_iterator fileinfo_iterator;
497  fileinfo_iterator fileinfo_begin() const { return FileInfos.begin(); }
498  fileinfo_iterator fileinfo_end() const { return FileInfos.end(); }
499
500  /// PrintStats - Print statistics to stderr.
501  ///
502  void PrintStats() const;
503
504  /// Emit - Emit this SourceManager to Bitcode.
505  void Emit(llvm::Serializer& S) const;
506
507  /// Read - Reconstitute a SourceManager from Bitcode.
508  static SourceManager* CreateAndRegister(llvm::Deserializer& S,
509                                          FileManager &FMgr);
510
511private:
512  friend struct SrcMgr::ContentCache; // Used for deserialization.
513
514  /// createFileID - Create a new fileID for the specified ContentCache and
515  ///  include position.  This works regardless of whether the ContentCache
516  ///  corresponds to a file or some other input source.
517  FileID createFileID(const SrcMgr::ContentCache* File,
518                      SourceLocation IncludePos,
519                      SrcMgr::CharacteristicKind DirCharacter);
520
521  /// getContentCache - Create or return a cached ContentCache for the specified
522  ///  file.  This returns null on failure.
523  const SrcMgr::ContentCache* getContentCache(const FileEntry *SourceFile);
524
525  /// createMemBufferContentCache - Create a new ContentCache for the specified
526  ///  memory buffer.
527  const SrcMgr::ContentCache*
528  createMemBufferContentCache(const llvm::MemoryBuffer *Buf);
529
530  const SrcMgr::FileIDInfo *getFIDInfo(unsigned FID) const {
531    assert(FID-1 < FileIDs.size() && "Invalid FileID!");
532    return &FileIDs[FID-1];
533  }
534  const SrcMgr::FileIDInfo *getFIDInfo(FileID FID) const {
535    return getFIDInfo(FID.ID);
536  }
537
538  const SrcMgr::ContentCache *getContentCache(FileID FID) const {
539    return getContentCache(getFIDInfo(FID.ID));
540  }
541
542  /// Return the ContentCache structure for the specified FileID.
543  ///  This is always the physical reference for the ID.
544  const SrcMgr::ContentCache*
545  getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const {
546    return FIDInfo->getContentCache();
547  }
548};
549
550
551}  // end namespace clang
552
553#endif
554