SourceManager.h revision e21272fbdbfbf5bf3461d3e9b42279f4d47caa42
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LLVM_CLANG_SOURCEMANAGER_H 15#define LLVM_CLANG_SOURCEMANAGER_H 16 17#include "clang/Basic/SourceLocation.h" 18#include "llvm/Bitcode/SerializationFwd.h" 19#include <vector> 20#include <set> 21#include <list> 22#include <cassert> 23 24namespace llvm { 25class MemoryBuffer; 26} 27 28namespace clang { 29 30class SourceManager; 31class FileEntry; 32class IdentifierTokenInfo; 33 34/// SrcMgr - Private classes that are part of the SourceManager implementation. 35/// 36namespace SrcMgr { 37 /// ContentCache - Once instance of this struct is kept for every file 38 /// loaded or used. This object owns the MemoryBuffer object. 39 struct ContentCache { 40 /// Reference to the file entry. This reference does not own 41 /// the FileEntry object. It is possible for this to be NULL if 42 /// the ContentCache encapsulates an imaginary text buffer. 43 const FileEntry* Entry; 44 45 /// Buffer - The actual buffer containing the characters from the input 46 /// file. This is owned by the ContentCache object. 47 const llvm::MemoryBuffer* Buffer; 48 49 /// SourceLineCache - A new[]'d array of offsets for each source line. This 50 /// is lazily computed. This is owned by the ContentCache object. 51 unsigned* SourceLineCache; 52 53 /// NumLines - The number of lines in this ContentCache. This is only valid 54 /// if SourceLineCache is non-null. 55 unsigned NumLines; 56 57 ContentCache(const FileEntry* e = NULL) 58 : Entry(e), Buffer(NULL), SourceLineCache(NULL), NumLines(0) {} 59 60 ~ContentCache(); 61 62 /// The copy ctor does not allow copies where source object has either 63 /// a non-NULL Buffer or SourceLineCache. Ownership of allocated memory 64 /// is not transfered, so this is a logical error. 65 ContentCache(const ContentCache& RHS) : Buffer(NULL),SourceLineCache(NULL) { 66 Entry = RHS.Entry; 67 68 assert (RHS.Buffer == NULL && RHS.SourceLineCache == NULL 69 && "Passed ContentCache object cannot own a buffer."); 70 71 NumLines = RHS.NumLines; 72 } 73 74 /// Emit - Emit this ContentCache to Bitcode. 75 void Emit(llvm::Serializer& S, bool StoreBufferName, 76 bool StoreBufferContents) const; 77 78 /// Read - Reconstitute a ContentCache from Bitcode. 79 void Read(llvm::Deserializer& D, std::vector<char>* BufferNameBuf, 80 bool ReadBufferContents); 81 82 private: 83 // Disable assignments. 84 ContentCache& operator=(const ContentCache& RHS); 85 }; 86 87 /// FileIDInfo - Information about a FileID, basically just the logical file 88 /// that it represents and include stack information. A File SourceLocation 89 /// is a byte offset from the start of this. 90 /// 91 /// FileID's are used to compute the location of a character in memory as well 92 /// as the logical source location, which can be differ from the physical 93 /// location. It is different when #line's are active or when macros have 94 /// been expanded. 95 /// 96 /// Each FileID has include stack information, indicating where it came from. 97 /// For the primary translation unit, it comes from SourceLocation() aka 0. 98 /// This information encodes the #include chain that a token was instantiated 99 /// from. 100 /// 101 /// FileIDInfos contain a "ContentCache *", describing the source file, 102 /// and a Chunk number, which allows a SourceLocation to index into very 103 /// large files (those which there are not enough FilePosBits to address). 104 /// 105 struct FileIDInfo { 106 private: 107 /// IncludeLoc - The location of the #include that brought in this file. 108 /// This SourceLocation object has an invalid SLOC for the main file. 109 SourceLocation IncludeLoc; 110 111 /// ChunkNo - Really large buffers are broken up into chunks that are 112 /// each (1 << SourceLocation::FilePosBits) in size. This specifies the 113 /// chunk number of this FileID. 114 unsigned ChunkNo; 115 116 /// Content - Information about the source buffer itself. 117 const ContentCache* Content; 118 119 public: 120 /// get - Return a FileIDInfo object. 121 static FileIDInfo get(SourceLocation IL, unsigned CN, 122 const ContentCache *Con) { 123 FileIDInfo X; 124 X.IncludeLoc = IL; 125 X.ChunkNo = CN; 126 X.Content = Con; 127 return X; 128 } 129 130 SourceLocation getIncludeLoc() const { return IncludeLoc; } 131 unsigned getChunkNo() const { return ChunkNo; } 132 const ContentCache* getContentCache() const { return Content; } 133 }; 134 135 /// MacroIDInfo - Macro SourceLocations refer to these records by their ID. 136 /// Each MacroIDInfo encodes the Instantiation location - where the macro was 137 /// instantiated, and the PhysicalLoc - where the actual character data for 138 /// the token came from. An actual macro SourceLocation stores deltas from 139 /// these positions. 140 class MacroIDInfo { 141 SourceLocation VirtualLoc, PhysicalLoc; 142 public: 143 SourceLocation getVirtualLoc() const { return VirtualLoc; } 144 SourceLocation getPhysicalLoc() const { return PhysicalLoc; } 145 146 /// get - Return a MacroID for a macro expansion. VL specifies 147 /// the instantiation location (where the macro is expanded), and PL 148 /// specifies the physical location (where the characters from the token 149 /// come from). Both VL and PL refer to normal File SLocs. 150 static MacroIDInfo get(SourceLocation VL, SourceLocation PL) { 151 MacroIDInfo X; 152 X.VirtualLoc = VL; 153 X.PhysicalLoc = PL; 154 return X; 155 } 156 }; 157} // end SrcMgr namespace. 158} // end clang namespace 159 160namespace std { 161template <> struct less<clang::SrcMgr::ContentCache> { 162 inline bool operator()(const clang::SrcMgr::ContentCache& L, 163 const clang::SrcMgr::ContentCache& R) const { 164 return L.Entry < R.Entry; 165 } 166}; 167} // end std namespace 168 169namespace clang { 170 171/// SourceManager - This file handles loading and caching of source files into 172/// memory. This object owns the MemoryBuffer objects for all of the loaded 173/// files and assigns unique FileID's for each unique #include chain. 174/// 175/// The SourceManager can be queried for information about SourceLocation 176/// objects, turning them into either physical or logical locations. Physical 177/// locations represent where the bytes corresponding to a token came from and 178/// logical locations represent where the location is in the user's view. In 179/// the case of a macro expansion, for example, the physical location indicates 180/// where the expanded token came from and the logical location specifies where 181/// it was expanded. Logical locations are also influenced by #line directives, 182/// etc. 183class SourceManager { 184 /// FileInfos - Memoized information about all of the files tracked by this 185 /// SourceManager. This set allows us to merge ContentCache entries based 186 /// on their FileEntry*. All ContentCache objects will thus have unique, 187 /// non-null, FileEntry pointers. 188 std::set<SrcMgr::ContentCache> FileInfos; 189 190 /// MemBufferInfos - Information about various memory buffers that we have 191 /// read in. This is a list, instead of a vector, because we need pointers to 192 /// the ContentCache objects to be stable. All FileEntry* within the 193 /// stored ContentCache objects are NULL, as they do not refer to a file. 194 std::list<SrcMgr::ContentCache> MemBufferInfos; 195 196 /// FileIDs - Information about each FileID. FileID #0 is not valid, so all 197 /// entries are off by one. 198 std::vector<SrcMgr::FileIDInfo> FileIDs; 199 200 /// MacroIDs - Information about each MacroID. 201 std::vector<SrcMgr::MacroIDInfo> MacroIDs; 202 203 /// LastLineNo - These ivars serve as a cache used in the getLineNumber 204 /// method which is used to speedup getLineNumber calls to nearby locations. 205 unsigned LastLineNoFileIDQuery; 206 SrcMgr::ContentCache *LastLineNoContentCache; 207 unsigned LastLineNoFilePos; 208 unsigned LastLineNoResult; 209 210public: 211 SourceManager() : LastLineNoFileIDQuery(~0U) {} 212 ~SourceManager() {} 213 214 void clearIDTables() { 215 FileIDs.clear(); 216 MacroIDs.clear(); 217 LastLineNoFileIDQuery = ~0U; 218 LastLineNoContentCache = 0; 219 } 220 221 /// createFileID - Create a new FileID that represents the specified file 222 /// being #included from the specified IncludePosition. This returns 0 on 223 /// error and translates NULL into standard input. 224 unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){ 225 const SrcMgr::ContentCache *IR = getContentCache(SourceFile); 226 if (IR == 0) return 0; // Error opening file? 227 return createFileID(IR, IncludePos); 228 } 229 230 /// createFileIDForMemBuffer - Create a new FileID that represents the 231 /// specified memory buffer. This does no caching of the buffer and takes 232 /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once. 233 unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { 234 return createFileID(createMemBufferContentCache(Buffer), SourceLocation()); 235 } 236 237 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact 238 /// that a token at Loc should actually be referenced from InstantiationLoc. 239 SourceLocation getInstantiationLoc(SourceLocation Loc, 240 SourceLocation InstantiationLoc); 241 242 /// getBuffer - Return the buffer for the specified FileID. 243 /// 244 const llvm::MemoryBuffer *getBuffer(unsigned FileID) const { 245 return getContentCache(FileID)->Buffer; 246 } 247 248 /// getBufferData - Return a pointer to the start and end of the character 249 /// data for the specified FileID. 250 std::pair<const char*, const char*> getBufferData(unsigned FileID) const; 251 252 /// getIncludeLoc - Return the location of the #include for the specified 253 /// SourceLocation. If this is a macro expansion, this transparently figures 254 /// out which file includes the file being expanded into. 255 SourceLocation getIncludeLoc(SourceLocation ID) const { 256 return getFIDInfo(getLogicalLoc(ID).getFileID())->getIncludeLoc(); 257 } 258 259 /// getCharacterData - Return a pointer to the start of the specified location 260 /// in the appropriate MemoryBuffer. 261 const char *getCharacterData(SourceLocation SL) const; 262 263 /// getColumnNumber - Return the column # for the specified file position. 264 /// This is significantly cheaper to compute than the line number. This 265 /// returns zero if the column number isn't known. This may only be called on 266 /// a file sloc, so you must choose a physical or logical location before 267 /// calling this method. 268 unsigned getColumnNumber(SourceLocation Loc) const; 269 270 unsigned getPhysicalColumnNumber(SourceLocation Loc) const { 271 return getColumnNumber(getPhysicalLoc(Loc)); 272 } 273 unsigned getLogicalColumnNumber(SourceLocation Loc) const { 274 return getColumnNumber(getLogicalLoc(Loc)); 275 } 276 277 278 /// getLineNumber - Given a SourceLocation, return the physical line number 279 /// for the position indicated. This requires building and caching a table of 280 /// line offsets for the MemoryBuffer, so this is not cheap: use only when 281 /// about to emit a diagnostic. 282 unsigned getLineNumber(SourceLocation Loc); 283 284 unsigned getLogicalLineNumber(SourceLocation Loc) { 285 return getLineNumber(getLogicalLoc(Loc)); 286 } 287 unsigned getPhysicalLineNumber(SourceLocation Loc) { 288 return getLineNumber(getPhysicalLoc(Loc)); 289 } 290 291 /// getSourceName - This method returns the name of the file or buffer that 292 /// the SourceLocation specifies. This can be modified with #line directives, 293 /// etc. 294 const char *getSourceName(SourceLocation Loc) const; 295 296 /// Given a SourceLocation object, return the logical location referenced by 297 /// the ID. This logical location is subject to #line directives, etc. 298 SourceLocation getLogicalLoc(SourceLocation Loc) const { 299 // File locations are both physical and logical. 300 if (Loc.isFileID()) return Loc; 301 302 return MacroIDs[Loc.getMacroID()].getVirtualLoc(); 303 } 304 305 /// getPhysicalLoc - Given a SourceLocation object, return the physical 306 /// location referenced by the ID. 307 SourceLocation getPhysicalLoc(SourceLocation Loc) const { 308 // File locations are both physical and logical. 309 if (Loc.isFileID()) return Loc; 310 311 SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getPhysicalLoc(); 312 return PLoc.getFileLocWithOffset(Loc.getMacroPhysOffs()); 313 } 314 315 /// getContentCacheForLoc - Return the ContentCache for the physloc of the 316 /// specified SourceLocation, if one exists. 317 const SrcMgr::ContentCache* getContentCacheForLoc(SourceLocation Loc) const { 318 Loc = getPhysicalLoc(Loc); 319 unsigned FileID = Loc.getFileID(); 320 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 321 return FileIDs[FileID-1].getContentCache(); 322 } 323 324 /// getFileEntryForLoc - Return the FileEntry record for the physloc of the 325 /// specified SourceLocation, if one exists. 326 const FileEntry* getFileEntryForLoc(SourceLocation Loc) const { 327 return getContentCacheForLoc(Loc)->Entry; 328 } 329 330 /// getDecomposedFileLoc - Decompose the specified file location into a raw 331 /// FileID + Offset pair. The first element is the FileID, the second is the 332 /// offset from the start of the buffer of the location. 333 std::pair<unsigned, unsigned> getDecomposedFileLoc(SourceLocation Loc) const { 334 assert(Loc.isFileID() && "Isn't a File SourceLocation"); 335 336 // TODO: Add a flag "is first chunk" to SLOC. 337 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); 338 339 // If this file has been split up into chunks, factor in the chunk number 340 // that the FileID references. 341 unsigned ChunkNo = FIDInfo->getChunkNo(); 342 unsigned Offset = Loc.getRawFilePos(); 343 Offset += (ChunkNo << SourceLocation::FilePosBits); 344 345 return std::pair<unsigned,unsigned>(Loc.getFileID()-ChunkNo, Offset); 346 } 347 348 /// PrintStats - Print statistics to stderr. 349 /// 350 void PrintStats() const; 351 352private: 353 /// createFileID - Create a new fileID for the specified ContentCache and 354 /// include position. This works regardless of whether the ContentCache 355 /// corresponds to a file or some other input source. 356 unsigned createFileID(const SrcMgr::ContentCache* File, 357 SourceLocation IncludePos); 358 359 /// getContentCache - Create or return a cached ContentCache for the specified 360 /// file. This returns null on failure. 361 const SrcMgr::ContentCache* getContentCache(const FileEntry* SourceFile); 362 363 /// createMemBufferContentCache - Create a new ContentCache for the specified 364 /// memory buffer. 365 const SrcMgr::ContentCache* 366 createMemBufferContentCache(const llvm::MemoryBuffer* Buf); 367 368 const SrcMgr::FileIDInfo* getFIDInfo(unsigned FileID) const { 369 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 370 return &FileIDs[FileID-1]; 371 } 372 373 const SrcMgr::ContentCache *getContentCache(unsigned FileID) const { 374 return getContentCache(getFIDInfo(FileID)); 375 } 376 377 /// Return the ContentCache structure for the specified FileID. 378 /// This is always the physical reference for the ID. 379 const SrcMgr::ContentCache* 380 getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const { 381 return FIDInfo->getContentCache(); 382 } 383 384 /// getFullFilePos - This (efficient) method returns the offset from the start 385 /// of the file that the specified physical SourceLocation represents. This 386 /// returns the location of the physical character data, not the logical file 387 /// position. 388 unsigned getFullFilePos(SourceLocation PhysLoc) const { 389 return getDecomposedFileLoc(PhysLoc).second; 390 } 391}; 392 393 394} // end namespace clang 395 396#endif 397