SourceManager.h revision 3b4d5e955e819dd3a4bed37ea2e47d6e4cb05274
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LLVM_CLANG_SOURCEMANAGER_H 15#define LLVM_CLANG_SOURCEMANAGER_H 16 17#include "clang/Basic/SourceLocation.h" 18#include "llvm/Bitcode/SerializationFwd.h" 19#include <vector> 20#include <set> 21#include <list> 22#include <cassert> 23 24namespace llvm { 25class MemoryBuffer; 26} 27 28namespace clang { 29 30class SourceManager; 31class FileManager; 32class FileEntry; 33class IdentifierTokenInfo; 34 35/// SrcMgr - Public enums and private classes that are part of the 36/// SourceManager implementation. 37/// 38namespace SrcMgr { 39 /// CharacteristicKind - This is used to represent whether a file or directory 40 /// holds normal user code, system code, or system code which is implicitly 41 /// 'extern "C"' in C++ mode. Entire directories can be tagged with this 42 /// (this is maintained by DirectoryLookup and friends) as can specific 43 /// FileIDInfos when a #pragma system_header is seen or various other cases. 44 /// 45 enum CharacteristicKind { 46 C_User, C_System, C_ExternCSystem 47 }; 48 49 /// ContentCache - Once instance of this struct is kept for every file 50 /// loaded or used. This object owns the MemoryBuffer object. 51 class ContentCache { 52 /// Buffer - The actual buffer containing the characters from the input 53 /// file. This is owned by the ContentCache object. 54 mutable const llvm::MemoryBuffer *Buffer; 55 56 public: 57 /// Reference to the file entry. This reference does not own 58 /// the FileEntry object. It is possible for this to be NULL if 59 /// the ContentCache encapsulates an imaginary text buffer. 60 const FileEntry *Entry; 61 62 /// SourceLineCache - A new[]'d array of offsets for each source line. This 63 /// is lazily computed. This is owned by the ContentCache object. 64 unsigned *SourceLineCache; 65 66 /// NumLines - The number of lines in this ContentCache. This is only valid 67 /// if SourceLineCache is non-null. 68 unsigned NumLines; 69 70 /// getBuffer - Returns the memory buffer for the associated content. 71 const llvm::MemoryBuffer *getBuffer() const; 72 73 /// getSize - Returns the size of the content encapsulated by this 74 /// ContentCache. This can be the size of the source file or the size of an 75 /// arbitrary scratch buffer. If the ContentCache encapsulates a source 76 /// file this size is retrieved from the file's FileEntry. 77 unsigned getSize() const; 78 79 /// getSizeBytesMapped - Returns the number of bytes actually mapped for 80 /// this ContentCache. This can be 0 if the MemBuffer was not actually 81 /// instantiated. 82 unsigned getSizeBytesMapped() const; 83 84 void setBuffer(const llvm::MemoryBuffer *B) { 85 assert(!Buffer && "MemoryBuffer already set."); 86 Buffer = B; 87 } 88 89 ContentCache(const FileEntry *e = NULL) 90 : Buffer(NULL), Entry(e), SourceLineCache(NULL), NumLines(0) {} 91 92 ~ContentCache(); 93 94 /// The copy ctor does not allow copies where source object has either 95 /// a non-NULL Buffer or SourceLineCache. Ownership of allocated memory 96 /// is not transfered, so this is a logical error. 97 ContentCache(const ContentCache &RHS) : Buffer(NULL),SourceLineCache(NULL) { 98 Entry = RHS.Entry; 99 100 assert (RHS.Buffer == NULL && RHS.SourceLineCache == NULL 101 && "Passed ContentCache object cannot own a buffer."); 102 103 NumLines = RHS.NumLines; 104 } 105 106 /// Emit - Emit this ContentCache to Bitcode. 107 void Emit(llvm::Serializer &S) const; 108 109 /// ReadToSourceManager - Reconstitute a ContentCache from Bitcode 110 // and store it in the specified SourceManager. 111 static void ReadToSourceManager(llvm::Deserializer &D, SourceManager &SM, 112 FileManager *FMgr, std::vector<char> &Buf); 113 114 private: 115 // Disable assignments. 116 ContentCache &operator=(const ContentCache& RHS); 117 }; 118 119 /// FileIDInfo - Information about a FileID, basically just the logical file 120 /// that it represents and include stack information. A File SourceLocation 121 /// is a byte offset from the start of this. 122 /// 123 /// FileID's are used to compute the location of a character in memory as well 124 /// as the instantiation source location, which can be differ from the 125 /// spelling location. It is different when #line's are active or when macros 126 /// have been expanded. 127 /// 128 /// Each FileID has include stack information, indicating where it came from. 129 /// For the primary translation unit, it comes from SourceLocation() aka 0. 130 /// This information encodes the #include chain that a token was instantiated 131 /// from. 132 /// 133 /// FileIDInfos contain a "ContentCache *", describing the source file, 134 /// and a Chunk number, which allows a SourceLocation to index into very 135 /// large files (those which there are not enough FilePosBits to address). 136 /// 137 struct FileIDInfo { 138 private: 139 /// IncludeLoc - The location of the #include that brought in this file. 140 /// This SourceLocation object has an invalid SLOC for the main file. 141 SourceLocation IncludeLoc; 142 143 /// ChunkNo - Really large buffers are broken up into chunks that are 144 /// each (1 << SourceLocation::FilePosBits) in size. This specifies the 145 /// chunk number of this FileID. 146 unsigned ChunkNo : 30; 147 148 /// FileCharacteristic - This is an instance of CharacteristicKind, 149 /// indicating whether this is a system header dir or not. 150 unsigned FileCharacteristic : 2; 151 152 /// Content - Information about the source buffer itself. 153 const ContentCache *Content; 154 155 public: 156 /// get - Return a FileIDInfo object. 157 static FileIDInfo get(SourceLocation IL, unsigned CN, 158 const ContentCache *Con, 159 CharacteristicKind FileCharacter) { 160 FileIDInfo X; 161 X.IncludeLoc = IL; 162 X.ChunkNo = CN; 163 X.Content = Con; 164 X.FileCharacteristic = FileCharacter; 165 return X; 166 } 167 168 SourceLocation getIncludeLoc() const { return IncludeLoc; } 169 unsigned getChunkNo() const { return ChunkNo; } 170 const ContentCache* getContentCache() const { return Content; } 171 172 /// getCharacteristic - Return whether this is a system header or not. 173 CharacteristicKind getFileCharacteristic() const { 174 return (CharacteristicKind)FileCharacteristic; 175 } 176 177 /// Emit - Emit this FileIDInfo to Bitcode. 178 void Emit(llvm::Serializer& S) const; 179 180 /// ReadVal - Reconstitute a FileIDInfo from Bitcode. 181 static FileIDInfo ReadVal(llvm::Deserializer& S); 182 }; 183 184 /// MacroIDInfo - Macro SourceLocations refer to these records by their ID. 185 /// Each MacroIDInfo encodes the Instantiation location - where the macro was 186 /// instantiated, and the SpellingLoc - where the actual character data for 187 /// the token came from. An actual macro SourceLocation stores deltas from 188 /// these positions. 189 class MacroIDInfo { 190 SourceLocation InstantiationLoc, SpellingLoc; 191 public: 192 SourceLocation getInstantiationLoc() const { return InstantiationLoc; } 193 SourceLocation getSpellingLoc() const { return SpellingLoc; } 194 195 /// get - Return a MacroID for a macro expansion. VL specifies 196 /// the instantiation location (where the macro is expanded), and SL 197 /// specifies the spelling location (where the characters from the token 198 /// come from). Both VL and PL refer to normal File SLocs. 199 static MacroIDInfo get(SourceLocation VL, SourceLocation SL) { 200 MacroIDInfo X; 201 X.InstantiationLoc = VL; 202 X.SpellingLoc = SL; 203 return X; 204 } 205 206 /// Emit - Emit this MacroIDInfo to Bitcode. 207 void Emit(llvm::Serializer& S) const; 208 209 /// ReadVal - Reconstitute a MacroIDInfo from Bitcode. 210 static MacroIDInfo ReadVal(llvm::Deserializer& S); 211 }; 212} // end SrcMgr namespace. 213} // end clang namespace 214 215namespace std { 216template <> struct less<clang::SrcMgr::ContentCache> { 217 inline bool operator()(const clang::SrcMgr::ContentCache& L, 218 const clang::SrcMgr::ContentCache& R) const { 219 return L.Entry < R.Entry; 220 } 221}; 222} // end std namespace 223 224namespace clang { 225 226/// SourceManager - This file handles loading and caching of source files into 227/// memory. This object owns the MemoryBuffer objects for all of the loaded 228/// files and assigns unique FileID's for each unique #include chain. 229/// 230/// The SourceManager can be queried for information about SourceLocation 231/// objects, turning them into either spelling or instantiation locations. 232/// Spelling locations represent where the bytes corresponding to a token came 233/// from and instantiation locations represent where the location is in the 234/// user's view. In the case of a macro expansion, for example, the spelling 235/// location indicates where the expanded token came from and the instantiation 236/// location specifies where it was expanded. 237class SourceManager { 238 /// FileInfos - Memoized information about all of the files tracked by this 239 /// SourceManager. This set allows us to merge ContentCache entries based 240 /// on their FileEntry*. All ContentCache objects will thus have unique, 241 /// non-null, FileEntry pointers. 242 std::set<SrcMgr::ContentCache> FileInfos; 243 244 /// MemBufferInfos - Information about various memory buffers that we have 245 /// read in. This is a list, instead of a vector, because we need pointers to 246 /// the ContentCache objects to be stable. All FileEntry* within the 247 /// stored ContentCache objects are NULL, as they do not refer to a file. 248 std::list<SrcMgr::ContentCache> MemBufferInfos; 249 250 /// FileIDs - Information about each FileID. FileID #0 is not valid, so all 251 /// entries are off by one. 252 std::vector<SrcMgr::FileIDInfo> FileIDs; 253 254 /// MacroIDs - Information about each MacroID. 255 std::vector<SrcMgr::MacroIDInfo> MacroIDs; 256 257 /// LastLineNo - These ivars serve as a cache used in the getLineNumber 258 /// method which is used to speedup getLineNumber calls to nearby locations. 259 mutable FileID LastLineNoFileIDQuery; 260 mutable SrcMgr::ContentCache *LastLineNoContentCache; 261 mutable unsigned LastLineNoFilePos; 262 mutable unsigned LastLineNoResult; 263 264 /// MainFileID - The file ID for the main source file of the translation unit. 265 FileID MainFileID; 266 267 // SourceManager doesn't support copy construction. 268 explicit SourceManager(const SourceManager&); 269 void operator=(const SourceManager&); 270public: 271 SourceManager() {} 272 ~SourceManager() {} 273 274 void clearIDTables() { 275 MainFileID = FileID(); 276 FileIDs.clear(); 277 MacroIDs.clear(); 278 LastLineNoFileIDQuery = FileID(); 279 LastLineNoContentCache = 0; 280 } 281 282 /// getMainFileID - Returns the FileID of the main source file. 283 FileID getMainFileID() const { return MainFileID; } 284 285 /// createFileID - Create a new FileID that represents the specified file 286 /// being #included from the specified IncludePosition. This returns 0 on 287 /// error and translates NULL into standard input. 288 FileID createFileID(const FileEntry *SourceFile, SourceLocation IncludePos, 289 SrcMgr::CharacteristicKind FileCharacter) { 290 const SrcMgr::ContentCache *IR = getContentCache(SourceFile); 291 if (IR == 0) return FileID(); // Error opening file? 292 return createFileID(IR, IncludePos, FileCharacter); 293 } 294 295 /// createMainFileID - Create the FileID for the main source file. 296 FileID createMainFileID(const FileEntry *SourceFile, 297 SourceLocation IncludePos) { 298 299 assert(MainFileID.isInvalid() && "MainFileID already set!"); 300 MainFileID = createFileID(SourceFile, IncludePos, SrcMgr::C_User); 301 return MainFileID; 302 } 303 304 /// createFileIDForMemBuffer - Create a new FileID that represents the 305 /// specified memory buffer. This does no caching of the buffer and takes 306 /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once. 307 FileID createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { 308 return createFileID(createMemBufferContentCache(Buffer), SourceLocation(), 309 SrcMgr::C_User); 310 } 311 312 /// createMainFileIDForMembuffer - Create the FileID for a memory buffer 313 /// that will represent the FileID for the main source. One example 314 /// of when this would be used is when the main source is read from STDIN. 315 FileID createMainFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { 316 assert(MainFileID.isInvalid() && "MainFileID already set!"); 317 MainFileID = createFileIDForMemBuffer(Buffer); 318 return MainFileID; 319 } 320 321 /// getLocForStartOfFile - Return the source location corresponding to the 322 /// first byte of the specified file. 323 SourceLocation getLocForStartOfFile(FileID FID) const { 324 return SourceLocation::getFileLoc(FID.ID, 0); 325 } 326 327 328 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact 329 /// that a token at Loc should actually be referenced from InstantiationLoc. 330 SourceLocation getInstantiationLoc(SourceLocation Loc, 331 SourceLocation InstantiationLoc); 332 333 /// getBuffer - Return the buffer for the specified FileID. 334 /// 335 const llvm::MemoryBuffer *getBuffer(FileID FID) const { 336 return getContentCache(FID)->getBuffer(); 337 } 338 339 const llvm::MemoryBuffer *getBuffer(SourceLocation Loc) const { 340 return getContentCacheForLoc(Loc)->getBuffer(); 341 } 342 343 344 /// getBufferData - Return a pointer to the start and end of the character 345 /// data for the specified FileID. 346 std::pair<const char*, const char*> getBufferData(SourceLocation Loc) const; 347 std::pair<const char*, const char*> getBufferData(FileID FID) const; 348 349 /// getIncludeLoc - Return the location of the #include for the specified 350 /// SourceLocation. If this is a macro expansion, this transparently figures 351 /// out which file includes the file being expanded into. 352 SourceLocation getIncludeLoc(SourceLocation ID) const { 353 return getFIDInfo(getInstantiationLoc(ID).getChunkID())->getIncludeLoc(); 354 } 355 356 /// getCharacterData - Return a pointer to the start of the specified location 357 /// in the appropriate MemoryBuffer. 358 const char *getCharacterData(SourceLocation SL) const; 359 360 /// getColumnNumber - Return the column # for the specified file position. 361 /// This is significantly cheaper to compute than the line number. This 362 /// returns zero if the column number isn't known. This may only be called on 363 /// a file sloc, so you must choose a spelling or instantiation location 364 /// before calling this method. 365 unsigned getColumnNumber(SourceLocation Loc) const; 366 367 unsigned getSpellingColumnNumber(SourceLocation Loc) const { 368 return getColumnNumber(getSpellingLoc(Loc)); 369 } 370 unsigned getInstantiationColumnNumber(SourceLocation Loc) const { 371 return getColumnNumber(getInstantiationLoc(Loc)); 372 } 373 374 375 /// getLineNumber - Given a SourceLocation, return the spelling line number 376 /// for the position indicated. This requires building and caching a table of 377 /// line offsets for the MemoryBuffer, so this is not cheap: use only when 378 /// about to emit a diagnostic. 379 unsigned getLineNumber(SourceLocation Loc) const; 380 381 unsigned getInstantiationLineNumber(SourceLocation Loc) const { 382 return getLineNumber(getInstantiationLoc(Loc)); 383 } 384 unsigned getSpellingLineNumber(SourceLocation Loc) const { 385 return getLineNumber(getSpellingLoc(Loc)); 386 } 387 388 /// getSourceName - This method returns the name of the file or buffer that 389 /// the SourceLocation specifies. This can be modified with #line directives, 390 /// etc. 391 const char *getSourceName(SourceLocation Loc) const; 392 393 /// Given a SourceLocation object, return the instantiation location 394 /// referenced by the ID. 395 SourceLocation getInstantiationLoc(SourceLocation Loc) const { 396 // File locations work. 397 if (Loc.isFileID()) return Loc; 398 399 return MacroIDs[Loc.getMacroID()].getInstantiationLoc(); 400 } 401 402 /// getSpellingLoc - Given a SourceLocation object, return the spelling 403 /// location referenced by the ID. This is the place where the characters 404 /// that make up the lexed token can be found. 405 SourceLocation getSpellingLoc(SourceLocation Loc) const { 406 // File locations work! 407 if (Loc.isFileID()) return Loc; 408 409 // Look up the macro token's spelling location. 410 SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getSpellingLoc(); 411 return PLoc.getFileLocWithOffset(Loc.getMacroSpellingOffs()); 412 } 413 414 /// getContentCacheForLoc - Return the ContentCache for the spelling loc of 415 /// the specified SourceLocation, if one exists. 416 const SrcMgr::ContentCache* getContentCacheForLoc(SourceLocation Loc) const { 417 Loc = getSpellingLoc(Loc); 418 unsigned ChunkID = Loc.getChunkID(); 419 assert(ChunkID-1 < FileIDs.size() && "Invalid FileID!"); 420 return FileIDs[ChunkID-1].getContentCache(); 421 } 422 423 /// getFileEntryForLoc - Return the FileEntry record for the spelling loc of 424 /// the specified SourceLocation, if one exists. 425 const FileEntry* getFileEntryForLoc(SourceLocation Loc) const { 426 return getContentCacheForLoc(Loc)->Entry; 427 } 428 429 /// getFileEntryForID - Returns the FileEntry record for the provided FileID. 430 const FileEntry *getFileEntryForID(FileID FID) const { 431 return getContentCache(FID)->Entry; 432 } 433 434 /// getCanonicalFileID - Return the canonical FileID for a SourceLocation. 435 /// A file can have multiple FileIDs if it is large enough to be broken 436 /// into multiple chunks. This method returns the unique FileID without 437 /// chunk information for a given SourceLocation. Use this method when 438 /// you want to compare FileIDs across SourceLocations. 439 FileID getCanonicalFileID(SourceLocation SpellingLoc) const { 440 return getDecomposedFileLoc(SpellingLoc).first; 441 } 442 443 /// getDecomposedFileLoc - Decompose the specified file location into a raw 444 /// FileID + Offset pair. The first element is the FileID, the second is the 445 /// offset from the start of the buffer of the location. 446 std::pair<FileID, unsigned> getDecomposedFileLoc(SourceLocation Loc) const { 447 assert(Loc.isFileID() && "Isn't a File SourceLocation"); 448 449 // TODO: Add a flag "is first chunk" to SLOC. 450 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getChunkID()); 451 452 // If this file has been split up into chunks, factor in the chunk number 453 // that the FileID references. 454 unsigned ChunkNo = FIDInfo->getChunkNo(); 455 unsigned Offset = Loc.getRawFilePos(); 456 Offset += (ChunkNo << SourceLocation::FilePosBits); 457 458 assert(Loc.getChunkID() >= ChunkNo && "Unexpected offset"); 459 460 return std::make_pair(FileID::Create(Loc.getChunkID()-ChunkNo), Offset); 461 } 462 463 /// getFullFilePos - This (efficient) method returns the offset from the start 464 /// of the file that the specified spelling SourceLocation represents. This 465 /// returns the location of the actual character data, not the instantiation 466 /// position. 467 unsigned getFullFilePos(SourceLocation SpellingLoc) const { 468 return getDecomposedFileLoc(SpellingLoc).second; 469 } 470 471 /// isFromSameFile - Returns true if both SourceLocations correspond to 472 /// the same file. 473 bool isFromSameFile(SourceLocation Loc1, SourceLocation Loc2) const { 474 return getCanonicalFileID(Loc1) == getCanonicalFileID(Loc2); 475 } 476 477 /// isFromMainFile - Returns true if the file of provided SourceLocation is 478 /// the main file. 479 bool isFromMainFile(SourceLocation Loc) const { 480 return getCanonicalFileID(Loc) == getMainFileID(); 481 } 482 483 /// isInSystemHeader - Returns if a SourceLocation is in a system header. 484 bool isInSystemHeader(SourceLocation Loc) const { 485 return getFileCharacteristic(Loc) != SrcMgr::C_User; 486 } 487 SrcMgr::CharacteristicKind getFileCharacteristic(SourceLocation Loc) const { 488 return getFIDInfo(getSpellingLoc(Loc).getChunkID()) 489 ->getFileCharacteristic(); 490 } 491 SrcMgr::CharacteristicKind getFileCharacteristic(FileID FID) const { 492 return getFIDInfo(FID)->getFileCharacteristic(); 493 } 494 495 // Iterators over FileInfos. 496 typedef std::set<SrcMgr::ContentCache>::const_iterator fileinfo_iterator; 497 fileinfo_iterator fileinfo_begin() const { return FileInfos.begin(); } 498 fileinfo_iterator fileinfo_end() const { return FileInfos.end(); } 499 500 /// PrintStats - Print statistics to stderr. 501 /// 502 void PrintStats() const; 503 504 /// Emit - Emit this SourceManager to Bitcode. 505 void Emit(llvm::Serializer& S) const; 506 507 /// Read - Reconstitute a SourceManager from Bitcode. 508 static SourceManager* CreateAndRegister(llvm::Deserializer& S, 509 FileManager &FMgr); 510 511private: 512 friend struct SrcMgr::ContentCache; // Used for deserialization. 513 514 /// createFileID - Create a new fileID for the specified ContentCache and 515 /// include position. This works regardless of whether the ContentCache 516 /// corresponds to a file or some other input source. 517 FileID createFileID(const SrcMgr::ContentCache* File, 518 SourceLocation IncludePos, 519 SrcMgr::CharacteristicKind DirCharacter); 520 521 /// getContentCache - Create or return a cached ContentCache for the specified 522 /// file. This returns null on failure. 523 const SrcMgr::ContentCache* getContentCache(const FileEntry *SourceFile); 524 525 /// createMemBufferContentCache - Create a new ContentCache for the specified 526 /// memory buffer. 527 const SrcMgr::ContentCache* 528 createMemBufferContentCache(const llvm::MemoryBuffer *Buf); 529 530 const SrcMgr::FileIDInfo *getFIDInfo(unsigned FID) const { 531 assert(FID-1 < FileIDs.size() && "Invalid FileID!"); 532 return &FileIDs[FID-1]; 533 } 534 const SrcMgr::FileIDInfo *getFIDInfo(FileID FID) const { 535 return getFIDInfo(FID.ID); 536 } 537 538 const SrcMgr::ContentCache *getContentCache(FileID FID) const { 539 return getContentCache(getFIDInfo(FID.ID)); 540 } 541 542 /// Return the ContentCache structure for the specified FileID. 543 /// This is always the physical reference for the ID. 544 const SrcMgr::ContentCache* 545 getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const { 546 return FIDInfo->getContentCache(); 547 } 548}; 549 550 551} // end namespace clang 552 553#endif 554