SourceManager.h revision 8429fca639bb4fbb6d589f85a2cc84513db4f748
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LLVM_CLANG_SOURCEMANAGER_H 15#define LLVM_CLANG_SOURCEMANAGER_H 16 17#include "clang/Basic/SourceLocation.h" 18#include "llvm/Bitcode/SerializationFwd.h" 19#include <vector> 20#include <set> 21#include <list> 22#include <cassert> 23 24namespace llvm { 25class MemoryBuffer; 26} 27 28namespace clang { 29 30class SourceManager; 31class FileManager; 32class FileEntry; 33class IdentifierTokenInfo; 34 35/// SrcMgr - Private classes that are part of the SourceManager implementation. 36/// 37namespace SrcMgr { 38 /// ContentCache - Once instance of this struct is kept for every file 39 /// loaded or used. This object owns the MemoryBuffer object. 40 struct ContentCache { 41 /// Reference to the file entry. This reference does not own 42 /// the FileEntry object. It is possible for this to be NULL if 43 /// the ContentCache encapsulates an imaginary text buffer. 44 const FileEntry* Entry; 45 46 /// Buffer - The actual buffer containing the characters from the input 47 /// file. This is owned by the ContentCache object. 48 const llvm::MemoryBuffer* Buffer; 49 50 /// SourceLineCache - A new[]'d array of offsets for each source line. This 51 /// is lazily computed. This is owned by the ContentCache object. 52 unsigned* SourceLineCache; 53 54 /// NumLines - The number of lines in this ContentCache. This is only valid 55 /// if SourceLineCache is non-null. 56 unsigned NumLines; 57 58 ContentCache(const FileEntry* e = NULL) 59 : Entry(e), Buffer(NULL), SourceLineCache(NULL), NumLines(0) {} 60 61 ~ContentCache(); 62 63 /// The copy ctor does not allow copies where source object has either 64 /// a non-NULL Buffer or SourceLineCache. Ownership of allocated memory 65 /// is not transfered, so this is a logical error. 66 ContentCache(const ContentCache& RHS) : Buffer(NULL),SourceLineCache(NULL) { 67 Entry = RHS.Entry; 68 69 assert (RHS.Buffer == NULL && RHS.SourceLineCache == NULL 70 && "Passed ContentCache object cannot own a buffer."); 71 72 NumLines = RHS.NumLines; 73 } 74 75 /// Emit - Emit this ContentCache to Bitcode. 76 void Emit(llvm::Serializer& S) const; 77 78 /// ReadToSourceManager - Reconstitute a ContentCache from Bitcode 79 // and store it in the specified SourceManager. 80 static void ReadToSourceManager(llvm::Deserializer& D, SourceManager& SMgr, 81 FileManager* FMgr, std::vector<char>& Buf); 82 83 private: 84 // Disable assignments. 85 ContentCache& operator=(const ContentCache& RHS); 86 }; 87 88 /// FileIDInfo - Information about a FileID, basically just the logical file 89 /// that it represents and include stack information. A File SourceLocation 90 /// is a byte offset from the start of this. 91 /// 92 /// FileID's are used to compute the location of a character in memory as well 93 /// as the logical source location, which can be differ from the physical 94 /// location. It is different when #line's are active or when macros have 95 /// been expanded. 96 /// 97 /// Each FileID has include stack information, indicating where it came from. 98 /// For the primary translation unit, it comes from SourceLocation() aka 0. 99 /// This information encodes the #include chain that a token was instantiated 100 /// from. 101 /// 102 /// FileIDInfos contain a "ContentCache *", describing the source file, 103 /// and a Chunk number, which allows a SourceLocation to index into very 104 /// large files (those which there are not enough FilePosBits to address). 105 /// 106 struct FileIDInfo { 107 private: 108 /// IncludeLoc - The location of the #include that brought in this file. 109 /// This SourceLocation object has an invalid SLOC for the main file. 110 SourceLocation IncludeLoc; 111 112 /// ChunkNo - Really large buffers are broken up into chunks that are 113 /// each (1 << SourceLocation::FilePosBits) in size. This specifies the 114 /// chunk number of this FileID. 115 unsigned ChunkNo; 116 117 /// Content - Information about the source buffer itself. 118 const ContentCache* Content; 119 120 public: 121 /// get - Return a FileIDInfo object. 122 static FileIDInfo get(SourceLocation IL, unsigned CN, 123 const ContentCache *Con) { 124 FileIDInfo X; 125 X.IncludeLoc = IL; 126 X.ChunkNo = CN; 127 X.Content = Con; 128 return X; 129 } 130 131 SourceLocation getIncludeLoc() const { return IncludeLoc; } 132 unsigned getChunkNo() const { return ChunkNo; } 133 const ContentCache* getContentCache() const { return Content; } 134 135 /// Emit - Emit this FileIDInfo to Bitcode. 136 void Emit(llvm::Serializer& S) const; 137 138 /// ReadVal - Reconstitute a FileIDInfo from Bitcode. 139 static FileIDInfo ReadVal(llvm::Deserializer& S); 140 }; 141 142 /// MacroIDInfo - Macro SourceLocations refer to these records by their ID. 143 /// Each MacroIDInfo encodes the Instantiation location - where the macro was 144 /// instantiated, and the PhysicalLoc - where the actual character data for 145 /// the token came from. An actual macro SourceLocation stores deltas from 146 /// these positions. 147 class MacroIDInfo { 148 SourceLocation VirtualLoc, PhysicalLoc; 149 public: 150 SourceLocation getVirtualLoc() const { return VirtualLoc; } 151 SourceLocation getPhysicalLoc() const { return PhysicalLoc; } 152 153 /// get - Return a MacroID for a macro expansion. VL specifies 154 /// the instantiation location (where the macro is expanded), and PL 155 /// specifies the physical location (where the characters from the token 156 /// come from). Both VL and PL refer to normal File SLocs. 157 static MacroIDInfo get(SourceLocation VL, SourceLocation PL) { 158 MacroIDInfo X; 159 X.VirtualLoc = VL; 160 X.PhysicalLoc = PL; 161 return X; 162 } 163 164 /// Emit - Emit this MacroIDInfo to Bitcode. 165 void Emit(llvm::Serializer& S) const; 166 167 /// ReadVal - Reconstitute a MacroIDInfo from Bitcode. 168 static MacroIDInfo ReadVal(llvm::Deserializer& S); 169 }; 170} // end SrcMgr namespace. 171} // end clang namespace 172 173namespace std { 174template <> struct less<clang::SrcMgr::ContentCache> { 175 inline bool operator()(const clang::SrcMgr::ContentCache& L, 176 const clang::SrcMgr::ContentCache& R) const { 177 return L.Entry < R.Entry; 178 } 179}; 180} // end std namespace 181 182namespace clang { 183 184/// SourceManager - This file handles loading and caching of source files into 185/// memory. This object owns the MemoryBuffer objects for all of the loaded 186/// files and assigns unique FileID's for each unique #include chain. 187/// 188/// The SourceManager can be queried for information about SourceLocation 189/// objects, turning them into either physical or logical locations. Physical 190/// locations represent where the bytes corresponding to a token came from and 191/// logical locations represent where the location is in the user's view. In 192/// the case of a macro expansion, for example, the physical location indicates 193/// where the expanded token came from and the logical location specifies where 194/// it was expanded. Logical locations are also influenced by #line directives, 195/// etc. 196class SourceManager { 197 /// FileInfos - Memoized information about all of the files tracked by this 198 /// SourceManager. This set allows us to merge ContentCache entries based 199 /// on their FileEntry*. All ContentCache objects will thus have unique, 200 /// non-null, FileEntry pointers. 201 std::set<SrcMgr::ContentCache> FileInfos; 202 203 /// MemBufferInfos - Information about various memory buffers that we have 204 /// read in. This is a list, instead of a vector, because we need pointers to 205 /// the ContentCache objects to be stable. All FileEntry* within the 206 /// stored ContentCache objects are NULL, as they do not refer to a file. 207 std::list<SrcMgr::ContentCache> MemBufferInfos; 208 209 /// FileIDs - Information about each FileID. FileID #0 is not valid, so all 210 /// entries are off by one. 211 std::vector<SrcMgr::FileIDInfo> FileIDs; 212 213 /// MacroIDs - Information about each MacroID. 214 std::vector<SrcMgr::MacroIDInfo> MacroIDs; 215 216 /// LastLineNo - These ivars serve as a cache used in the getLineNumber 217 /// method which is used to speedup getLineNumber calls to nearby locations. 218 unsigned LastLineNoFileIDQuery; 219 SrcMgr::ContentCache *LastLineNoContentCache; 220 unsigned LastLineNoFilePos; 221 unsigned LastLineNoResult; 222 223 /// MainFileID - The file ID for the main source file of the translation unit. 224 unsigned MainFileID; 225 226 // SourceManager doesn't support copy construction. 227 explicit SourceManager(const SourceManager&); 228 void operator=(const SourceManager&); 229public: 230 SourceManager() : LastLineNoFileIDQuery(~0U), MainFileID(0) {} 231 ~SourceManager() {} 232 233 void clearIDTables() { 234 MainFileID = 0; 235 FileIDs.clear(); 236 MacroIDs.clear(); 237 LastLineNoFileIDQuery = ~0U; 238 LastLineNoContentCache = 0; 239 } 240 241 /// getMainFileID - Returns the FileID of the main source file. 242 unsigned getMainFileID() const { return MainFileID; } 243 244 /// createFileID - Create a new FileID that represents the specified file 245 /// being #included from the specified IncludePosition. This returns 0 on 246 /// error and translates NULL into standard input. 247 unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){ 248 const SrcMgr::ContentCache *IR = getContentCache(SourceFile); 249 if (IR == 0) return 0; // Error opening file? 250 return createFileID(IR, IncludePos); 251 } 252 253 /// createMainFileID - Create the FileID for the main source file. 254 unsigned createMainFileID(const FileEntry *SourceFile, 255 SourceLocation IncludePos) { 256 257 assert (MainFileID == 0 && "MainFileID already set!"); 258 MainFileID = createFileID(SourceFile,IncludePos); 259 return MainFileID; 260 } 261 262 /// createFileIDForMemBuffer - Create a new FileID that represents the 263 /// specified memory buffer. This does no caching of the buffer and takes 264 /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once. 265 unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { 266 return createFileID(createMemBufferContentCache(Buffer), SourceLocation()); 267 } 268 269 /// createMainFileIDForMembuffer - Create the FileID for a memory buffer 270 /// that will represent the FileID for the main source. One example 271 /// of when this would be used is when the main source is read from STDIN. 272 unsigned createMainFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { 273 assert (MainFileID == 0 && "MainFileID already set!"); 274 MainFileID = createFileIDForMemBuffer(Buffer); 275 return MainFileID; 276 } 277 278 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact 279 /// that a token at Loc should actually be referenced from InstantiationLoc. 280 SourceLocation getInstantiationLoc(SourceLocation Loc, 281 SourceLocation InstantiationLoc); 282 283 /// getBuffer - Return the buffer for the specified FileID. 284 /// 285 const llvm::MemoryBuffer *getBuffer(unsigned FileID) const { 286 return getContentCache(FileID)->Buffer; 287 } 288 289 /// getBufferData - Return a pointer to the start and end of the character 290 /// data for the specified FileID. 291 std::pair<const char*, const char*> getBufferData(unsigned FileID) const; 292 293 /// getIncludeLoc - Return the location of the #include for the specified 294 /// SourceLocation. If this is a macro expansion, this transparently figures 295 /// out which file includes the file being expanded into. 296 SourceLocation getIncludeLoc(SourceLocation ID) const { 297 return getFIDInfo(getLogicalLoc(ID).getFileID())->getIncludeLoc(); 298 } 299 300 /// getCharacterData - Return a pointer to the start of the specified location 301 /// in the appropriate MemoryBuffer. 302 const char *getCharacterData(SourceLocation SL) const; 303 304 /// getColumnNumber - Return the column # for the specified file position. 305 /// This is significantly cheaper to compute than the line number. This 306 /// returns zero if the column number isn't known. This may only be called on 307 /// a file sloc, so you must choose a physical or logical location before 308 /// calling this method. 309 unsigned getColumnNumber(SourceLocation Loc) const; 310 311 unsigned getPhysicalColumnNumber(SourceLocation Loc) const { 312 return getColumnNumber(getPhysicalLoc(Loc)); 313 } 314 unsigned getLogicalColumnNumber(SourceLocation Loc) const { 315 return getColumnNumber(getLogicalLoc(Loc)); 316 } 317 318 319 /// getLineNumber - Given a SourceLocation, return the physical line number 320 /// for the position indicated. This requires building and caching a table of 321 /// line offsets for the MemoryBuffer, so this is not cheap: use only when 322 /// about to emit a diagnostic. 323 unsigned getLineNumber(SourceLocation Loc); 324 325 unsigned getLogicalLineNumber(SourceLocation Loc) { 326 return getLineNumber(getLogicalLoc(Loc)); 327 } 328 unsigned getPhysicalLineNumber(SourceLocation Loc) { 329 return getLineNumber(getPhysicalLoc(Loc)); 330 } 331 332 /// getSourceName - This method returns the name of the file or buffer that 333 /// the SourceLocation specifies. This can be modified with #line directives, 334 /// etc. 335 const char *getSourceName(SourceLocation Loc) const; 336 337 /// Given a SourceLocation object, return the logical location referenced by 338 /// the ID. This logical location is subject to #line directives, etc. 339 SourceLocation getLogicalLoc(SourceLocation Loc) const { 340 // File locations are both physical and logical. 341 if (Loc.isFileID()) return Loc; 342 343 return MacroIDs[Loc.getMacroID()].getVirtualLoc(); 344 } 345 346 /// getPhysicalLoc - Given a SourceLocation object, return the physical 347 /// location referenced by the ID. 348 SourceLocation getPhysicalLoc(SourceLocation Loc) const { 349 // File locations are both physical and logical. 350 if (Loc.isFileID()) return Loc; 351 352 SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getPhysicalLoc(); 353 return PLoc.getFileLocWithOffset(Loc.getMacroPhysOffs()); 354 } 355 356 /// getContentCacheForLoc - Return the ContentCache for the physloc of the 357 /// specified SourceLocation, if one exists. 358 const SrcMgr::ContentCache* getContentCacheForLoc(SourceLocation Loc) const { 359 Loc = getPhysicalLoc(Loc); 360 unsigned FileID = Loc.getFileID(); 361 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 362 return FileIDs[FileID-1].getContentCache(); 363 } 364 365 /// getFileEntryForLoc - Return the FileEntry record for the physloc of the 366 /// specified SourceLocation, if one exists. 367 const FileEntry* getFileEntryForLoc(SourceLocation Loc) const { 368 return getContentCacheForLoc(Loc)->Entry; 369 } 370 371 /// getFileEntryForID - Returns the FileEntry record for the provided FileID. 372 const FileEntry* getFileEntryForID(unsigned id) const { 373 return getContentCache(id)->Entry; 374 } 375 376 /// getCanonicalFileID - Return the canonical FileID for a SourceLocation. 377 /// A file can have multiple FileIDs if it is large enough to be broken 378 /// into multiple chunks. This method returns the unique FileID without 379 /// chunk information for a given SourceLocation. Use this method when 380 /// you want to compare FileIDs across SourceLocations. 381 unsigned getCanonicalFileID(SourceLocation PhysLoc) const { 382 return getDecomposedFileLoc(PhysLoc).first; 383 } 384 385 /// getDecomposedFileLoc - Decompose the specified file location into a raw 386 /// FileID + Offset pair. The first element is the FileID, the second is the 387 /// offset from the start of the buffer of the location. 388 std::pair<unsigned, unsigned> getDecomposedFileLoc(SourceLocation Loc) const { 389 assert(Loc.isFileID() && "Isn't a File SourceLocation"); 390 391 // TODO: Add a flag "is first chunk" to SLOC. 392 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); 393 394 // If this file has been split up into chunks, factor in the chunk number 395 // that the FileID references. 396 unsigned ChunkNo = FIDInfo->getChunkNo(); 397 unsigned Offset = Loc.getRawFilePos(); 398 Offset += (ChunkNo << SourceLocation::FilePosBits); 399 400 return std::pair<unsigned,unsigned>(Loc.getFileID()-ChunkNo, Offset); 401 } 402 403 /// getFullFilePos - This (efficient) method returns the offset from the start 404 /// of the file that the specified physical SourceLocation represents. This 405 /// returns the location of the physical character data, not the logical file 406 /// position. 407 unsigned getFullFilePos(SourceLocation PhysLoc) const { 408 return getDecomposedFileLoc(PhysLoc).second; 409 } 410 411 /// isFromSameFile - Returns true if both SourceLocations correspond to 412 /// the same file. 413 bool isFromSameFile(SourceLocation Loc1, SourceLocation Loc2) const { 414 return getCanonicalFileID(Loc1) == getCanonicalFileID(Loc2); 415 } 416 417 /// isFromMainFile - Returns true if the file of provided SourceLocation is 418 /// the main file. 419 bool isFromMainFile(SourceLocation Loc) const { 420 return getCanonicalFileID(Loc) == getMainFileID(); 421 } 422 423 /// PrintStats - Print statistics to stderr. 424 /// 425 void PrintStats() const; 426 427 /// Emit - Emit this SourceManager to Bitcode. 428 void Emit(llvm::Serializer& S) const; 429 430 /// Read - Reconstitute a SourceManager from Bitcode. 431 static SourceManager* CreateAndRegister(llvm::Deserializer& S, 432 FileManager &FMgr); 433 434private: 435 friend struct SrcMgr::ContentCache; // Used for deserialization. 436 437 /// createFileID - Create a new fileID for the specified ContentCache and 438 /// include position. This works regardless of whether the ContentCache 439 /// corresponds to a file or some other input source. 440 unsigned createFileID(const SrcMgr::ContentCache* File, 441 SourceLocation IncludePos); 442 443 /// getContentCache - Create or return a cached ContentCache for the specified 444 /// file. This returns null on failure. 445 const SrcMgr::ContentCache* getContentCache(const FileEntry* SourceFile); 446 447 /// createMemBufferContentCache - Create a new ContentCache for the specified 448 /// memory buffer. 449 const SrcMgr::ContentCache* 450 createMemBufferContentCache(const llvm::MemoryBuffer* Buf); 451 452 const SrcMgr::FileIDInfo* getFIDInfo(unsigned FileID) const { 453 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 454 return &FileIDs[FileID-1]; 455 } 456 457 const SrcMgr::ContentCache *getContentCache(unsigned FileID) const { 458 return getContentCache(getFIDInfo(FileID)); 459 } 460 461 /// Return the ContentCache structure for the specified FileID. 462 /// This is always the physical reference for the ID. 463 const SrcMgr::ContentCache* 464 getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const { 465 return FIDInfo->getContentCache(); 466 } 467}; 468 469 470} // end namespace clang 471 472#endif 473