SourceManager.h revision 31530bae788dea5c79e72a1f99a56c3f5c6aa36f
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LLVM_CLANG_SOURCEMANAGER_H 15#define LLVM_CLANG_SOURCEMANAGER_H 16 17#include "clang/Basic/SourceLocation.h" 18#include "llvm/Bitcode/SerializationFwd.h" 19#include <vector> 20#include <set> 21#include <list> 22#include <cassert> 23 24namespace llvm { 25class MemoryBuffer; 26} 27 28namespace clang { 29 30class SourceManager; 31class FileManager; 32class FileEntry; 33class IdentifierTokenInfo; 34 35/// SrcMgr - Public enums and private classes that are part of the 36/// SourceManager implementation. 37/// 38namespace SrcMgr { 39 /// CharacteristicKind - This is used to represent whether a file or directory 40 /// holds normal user code, system code, or system code which is implicitly 41 /// 'extern "C"' in C++ mode. Entire directories can be tagged with this 42 /// (this is maintained by DirectoryLookup and friends) as can specific 43 /// FileIDInfos when a #pragma system_header is seen or various other cases. 44 /// 45 enum CharacteristicKind { 46 C_User, C_System, C_ExternCSystem 47 }; 48 49 /// ContentCache - Once instance of this struct is kept for every file 50 /// loaded or used. This object owns the MemoryBuffer object. 51 class ContentCache { 52 /// Buffer - The actual buffer containing the characters from the input 53 /// file. This is owned by the ContentCache object. 54 mutable const llvm::MemoryBuffer *Buffer; 55 56 public: 57 /// Reference to the file entry. This reference does not own 58 /// the FileEntry object. It is possible for this to be NULL if 59 /// the ContentCache encapsulates an imaginary text buffer. 60 const FileEntry *Entry; 61 62 /// SourceLineCache - A new[]'d array of offsets for each source line. This 63 /// is lazily computed. This is owned by the ContentCache object. 64 unsigned *SourceLineCache; 65 66 /// NumLines - The number of lines in this ContentCache. This is only valid 67 /// if SourceLineCache is non-null. 68 unsigned NumLines; 69 70 /// getBuffer - Returns the memory buffer for the associated content. 71 const llvm::MemoryBuffer *getBuffer() const; 72 73 /// getSize - Returns the size of the content encapsulated by this 74 /// ContentCache. This can be the size of the source file or the size of an 75 /// arbitrary scratch buffer. If the ContentCache encapsulates a source 76 /// file this size is retrieved from the file's FileEntry. 77 unsigned getSize() const; 78 79 /// getSizeBytesMapped - Returns the number of bytes actually mapped for 80 /// this ContentCache. This can be 0 if the MemBuffer was not actually 81 /// instantiated. 82 unsigned getSizeBytesMapped() const; 83 84 void setBuffer(const llvm::MemoryBuffer *B) { 85 assert(!Buffer && "MemoryBuffer already set."); 86 Buffer = B; 87 } 88 89 ContentCache(const FileEntry *e = NULL) 90 : Buffer(NULL), Entry(e), SourceLineCache(NULL), NumLines(0) {} 91 92 ~ContentCache(); 93 94 /// The copy ctor does not allow copies where source object has either 95 /// a non-NULL Buffer or SourceLineCache. Ownership of allocated memory 96 /// is not transfered, so this is a logical error. 97 ContentCache(const ContentCache &RHS) : Buffer(NULL),SourceLineCache(NULL) { 98 Entry = RHS.Entry; 99 100 assert (RHS.Buffer == NULL && RHS.SourceLineCache == NULL 101 && "Passed ContentCache object cannot own a buffer."); 102 103 NumLines = RHS.NumLines; 104 } 105 106 /// Emit - Emit this ContentCache to Bitcode. 107 void Emit(llvm::Serializer &S) const; 108 109 /// ReadToSourceManager - Reconstitute a ContentCache from Bitcode 110 // and store it in the specified SourceManager. 111 static void ReadToSourceManager(llvm::Deserializer &D, SourceManager &SM, 112 FileManager *FMgr, std::vector<char> &Buf); 113 114 private: 115 // Disable assignments. 116 ContentCache &operator=(const ContentCache& RHS); 117 }; 118 119 /// FileIDInfo - Information about a FileID, basically just the logical file 120 /// that it represents and include stack information. A File SourceLocation 121 /// is a byte offset from the start of this. 122 /// 123 /// FileID's are used to compute the location of a character in memory as well 124 /// as the instantiation source location, which can be differ from the 125 /// spelling location. It is different when #line's are active or when macros 126 /// have been expanded. 127 /// 128 /// Each FileID has include stack information, indicating where it came from. 129 /// For the primary translation unit, it comes from SourceLocation() aka 0. 130 /// This information encodes the #include chain that a token was instantiated 131 /// from. 132 /// 133 /// FileIDInfos contain a "ContentCache *", describing the source file, 134 /// and a Chunk number, which allows a SourceLocation to index into very 135 /// large files (those which there are not enough FilePosBits to address). 136 /// 137 struct FileIDInfo { 138 private: 139 /// IncludeLoc - The location of the #include that brought in this file. 140 /// This SourceLocation object has an invalid SLOC for the main file. 141 SourceLocation IncludeLoc; 142 143 /// ChunkNo - Really large buffers are broken up into chunks that are 144 /// each (1 << SourceLocation::FilePosBits) in size. This specifies the 145 /// chunk number of this FileID. 146 unsigned ChunkNo : 30; 147 148 /// FileCharacteristic - This is an instance of CharacteristicKind, 149 /// indicating whether this is a system header dir or not. 150 unsigned FileCharacteristic : 2; 151 152 /// Content - Information about the source buffer itself. 153 const ContentCache *Content; 154 155 public: 156 /// get - Return a FileIDInfo object. 157 static FileIDInfo get(SourceLocation IL, unsigned CN, 158 const ContentCache *Con, 159 CharacteristicKind FileCharacter) { 160 FileIDInfo X; 161 X.IncludeLoc = IL; 162 X.ChunkNo = CN; 163 X.Content = Con; 164 X.FileCharacteristic = FileCharacter; 165 return X; 166 } 167 168 SourceLocation getIncludeLoc() const { return IncludeLoc; } 169 unsigned getChunkNo() const { return ChunkNo; } 170 const ContentCache* getContentCache() const { return Content; } 171 172 /// getCharacteristic - Return whether this is a system header or not. 173 CharacteristicKind getFileCharacteristic() const { 174 return (CharacteristicKind)FileCharacteristic; 175 } 176 177 /// Emit - Emit this FileIDInfo to Bitcode. 178 void Emit(llvm::Serializer& S) const; 179 180 /// ReadVal - Reconstitute a FileIDInfo from Bitcode. 181 static FileIDInfo ReadVal(llvm::Deserializer& S); 182 }; 183 184 /// MacroIDInfo - Macro SourceLocations refer to these records by their ID. 185 /// Each MacroIDInfo encodes the Instantiation location - where the macro was 186 /// instantiated, and the SpellingLoc - where the actual character data for 187 /// the token came from. An actual macro SourceLocation stores deltas from 188 /// these positions. 189 class MacroIDInfo { 190 SourceLocation InstantiationLoc, SpellingLoc; 191 public: 192 SourceLocation getInstantiationLoc() const { return InstantiationLoc; } 193 SourceLocation getSpellingLoc() const { return SpellingLoc; } 194 195 /// get - Return a MacroID for a macro expansion. VL specifies 196 /// the instantiation location (where the macro is expanded), and SL 197 /// specifies the spelling location (where the characters from the token 198 /// come from). Both VL and PL refer to normal File SLocs. 199 static MacroIDInfo get(SourceLocation VL, SourceLocation SL) { 200 MacroIDInfo X; 201 X.InstantiationLoc = VL; 202 X.SpellingLoc = SL; 203 return X; 204 } 205 206 /// Emit - Emit this MacroIDInfo to Bitcode. 207 void Emit(llvm::Serializer& S) const; 208 209 /// ReadVal - Reconstitute a MacroIDInfo from Bitcode. 210 static MacroIDInfo ReadVal(llvm::Deserializer& S); 211 }; 212} // end SrcMgr namespace. 213} // end clang namespace 214 215namespace std { 216template <> struct less<clang::SrcMgr::ContentCache> { 217 inline bool operator()(const clang::SrcMgr::ContentCache& L, 218 const clang::SrcMgr::ContentCache& R) const { 219 return L.Entry < R.Entry; 220 } 221}; 222} // end std namespace 223 224namespace clang { 225 226/// SourceManager - This file handles loading and caching of source files into 227/// memory. This object owns the MemoryBuffer objects for all of the loaded 228/// files and assigns unique FileID's for each unique #include chain. 229/// 230/// The SourceManager can be queried for information about SourceLocation 231/// objects, turning them into either spelling or instantiation locations. 232/// Spelling locations represent where the bytes corresponding to a token came 233/// from and instantiation locations represent where the location is in the 234/// user's view. In the case of a macro expansion, for example, the spelling 235/// location indicates where the expanded token came from and the instantiation 236/// location specifies where it was expanded. 237class SourceManager { 238 /// FileInfos - Memoized information about all of the files tracked by this 239 /// SourceManager. This set allows us to merge ContentCache entries based 240 /// on their FileEntry*. All ContentCache objects will thus have unique, 241 /// non-null, FileEntry pointers. 242 std::set<SrcMgr::ContentCache> FileInfos; 243 244 /// MemBufferInfos - Information about various memory buffers that we have 245 /// read in. This is a list, instead of a vector, because we need pointers to 246 /// the ContentCache objects to be stable. All FileEntry* within the 247 /// stored ContentCache objects are NULL, as they do not refer to a file. 248 std::list<SrcMgr::ContentCache> MemBufferInfos; 249 250 /// FileIDs - Information about each FileID. FileID #0 is not valid, so all 251 /// entries are off by one. 252 std::vector<SrcMgr::FileIDInfo> FileIDs; 253 254 /// MacroIDs - Information about each MacroID. 255 std::vector<SrcMgr::MacroIDInfo> MacroIDs; 256 257 /// LastLineNo - These ivars serve as a cache used in the getLineNumber 258 /// method which is used to speedup getLineNumber calls to nearby locations. 259 mutable FileID LastLineNoFileIDQuery; 260 mutable SrcMgr::ContentCache *LastLineNoContentCache; 261 mutable unsigned LastLineNoFilePos; 262 mutable unsigned LastLineNoResult; 263 264 /// MainFileID - The file ID for the main source file of the translation unit. 265 FileID MainFileID; 266 267 // SourceManager doesn't support copy construction. 268 explicit SourceManager(const SourceManager&); 269 void operator=(const SourceManager&); 270public: 271 SourceManager() {} 272 ~SourceManager() {} 273 274 void clearIDTables() { 275 MainFileID = FileID(); 276 FileIDs.clear(); 277 MacroIDs.clear(); 278 LastLineNoFileIDQuery = FileID(); 279 LastLineNoContentCache = 0; 280 } 281 282 /// getMainFileID - Returns the FileID of the main source file. 283 FileID getMainFileID() const { return MainFileID; } 284 285 /// createFileID - Create a new FileID that represents the specified file 286 /// being #included from the specified IncludePosition. This returns 0 on 287 /// error and translates NULL into standard input. 288 FileID createFileID(const FileEntry *SourceFile, SourceLocation IncludePos, 289 SrcMgr::CharacteristicKind FileCharacter) { 290 const SrcMgr::ContentCache *IR = getContentCache(SourceFile); 291 if (IR == 0) return FileID(); // Error opening file? 292 return createFileID(IR, IncludePos, FileCharacter); 293 } 294 295 /// createMainFileID - Create the FileID for the main source file. 296 FileID createMainFileID(const FileEntry *SourceFile, 297 SourceLocation IncludePos) { 298 assert(MainFileID.isInvalid() && "MainFileID already set!"); 299 MainFileID = createFileID(SourceFile, IncludePos, SrcMgr::C_User); 300 return MainFileID; 301 } 302 303 /// createFileIDForMemBuffer - Create a new FileID that represents the 304 /// specified memory buffer. This does no caching of the buffer and takes 305 /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once. 306 FileID createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { 307 return createFileID(createMemBufferContentCache(Buffer), SourceLocation(), 308 SrcMgr::C_User); 309 } 310 311 /// createMainFileIDForMembuffer - Create the FileID for a memory buffer 312 /// that will represent the FileID for the main source. One example 313 /// of when this would be used is when the main source is read from STDIN. 314 FileID createMainFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { 315 assert(MainFileID.isInvalid() && "MainFileID already set!"); 316 MainFileID = createFileIDForMemBuffer(Buffer); 317 return MainFileID; 318 } 319 320 /// getLocForStartOfFile - Return the source location corresponding to the 321 /// first byte of the specified file. 322 SourceLocation getLocForStartOfFile(FileID FID) const { 323 return SourceLocation::getFileLoc(FID.ID, 0); 324 } 325 326 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact 327 /// that a token at Loc should actually be referenced from InstantiationLoc. 328 SourceLocation getInstantiationLoc(SourceLocation Loc, 329 SourceLocation InstantiationLoc); 330 331 /// getBuffer - Return the buffer for the specified FileID. 332 /// 333 const llvm::MemoryBuffer *getBuffer(FileID FID) const { 334 return getContentCache(FID)->getBuffer(); 335 } 336 337 /// getBufferData - Return a pointer to the start and end of the source buffer 338 /// data for the specified FileID. 339 std::pair<const char*, const char*> getBufferData(FileID FID) const; 340 341 /// getIncludeLoc - Return the location of the #include for the specified 342 /// SourceLocation. If this is a macro expansion, this transparently figures 343 /// out which file includes the file being expanded into. 344 SourceLocation getIncludeLoc(SourceLocation ID) const { 345 return getFIDInfo(getInstantiationLoc(ID).getChunkID())->getIncludeLoc(); 346 } 347 348 /// getCharacterData - Return a pointer to the start of the specified location 349 /// in the appropriate MemoryBuffer. 350 const char *getCharacterData(SourceLocation SL) const; 351 352 /// getColumnNumber - Return the column # for the specified file position. 353 /// This is significantly cheaper to compute than the line number. This 354 /// returns zero if the column number isn't known. This may only be called on 355 /// a file sloc, so you must choose a spelling or instantiation location 356 /// before calling this method. 357 unsigned getColumnNumber(SourceLocation Loc) const; 358 359 unsigned getSpellingColumnNumber(SourceLocation Loc) const { 360 return getColumnNumber(getSpellingLoc(Loc)); 361 } 362 unsigned getInstantiationColumnNumber(SourceLocation Loc) const { 363 return getColumnNumber(getInstantiationLoc(Loc)); 364 } 365 366 367 /// getLineNumber - Given a SourceLocation, return the spelling line number 368 /// for the position indicated. This requires building and caching a table of 369 /// line offsets for the MemoryBuffer, so this is not cheap: use only when 370 /// about to emit a diagnostic. 371 unsigned getLineNumber(SourceLocation Loc) const; 372 373 unsigned getInstantiationLineNumber(SourceLocation Loc) const { 374 return getLineNumber(getInstantiationLoc(Loc)); 375 } 376 unsigned getSpellingLineNumber(SourceLocation Loc) const { 377 return getLineNumber(getSpellingLoc(Loc)); 378 } 379 380 /// getSourceName - This method returns the name of the file or buffer that 381 /// the SourceLocation specifies. This can be modified with #line directives, 382 /// etc. 383 const char *getSourceName(SourceLocation Loc) const; 384 385 /// Given a SourceLocation object, return the instantiation location 386 /// referenced by the ID. 387 SourceLocation getInstantiationLoc(SourceLocation Loc) const { 388 // File locations work. 389 if (Loc.isFileID()) return Loc; 390 391 return MacroIDs[Loc.getMacroID()].getInstantiationLoc(); 392 } 393 394 /// getSpellingLoc - Given a SourceLocation object, return the spelling 395 /// location referenced by the ID. This is the place where the characters 396 /// that make up the lexed token can be found. 397 SourceLocation getSpellingLoc(SourceLocation Loc) const { 398 // File locations work! 399 if (Loc.isFileID()) return Loc; 400 401 // Look up the macro token's spelling location. 402 SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getSpellingLoc(); 403 return PLoc.getFileLocWithOffset(Loc.getMacroSpellingOffs()); 404 } 405 406 /// getContentCacheForLoc - Return the ContentCache for the spelling loc of 407 /// the specified SourceLocation, if one exists. 408 const SrcMgr::ContentCache* getContentCacheForLoc(SourceLocation Loc) const { 409 Loc = getSpellingLoc(Loc); 410 unsigned ChunkID = Loc.getChunkID(); 411 assert(ChunkID-1 < FileIDs.size() && "Invalid FileID!"); 412 return FileIDs[ChunkID-1].getContentCache(); 413 } 414 415 /// getFileEntryForLoc - Return the FileEntry record for the spelling loc of 416 /// the specified SourceLocation, if one exists. 417 const FileEntry* getFileEntryForLoc(SourceLocation Loc) const { 418 return getContentCacheForLoc(Loc)->Entry; 419 } 420 421 /// getFileEntryForID - Returns the FileEntry record for the provided FileID. 422 const FileEntry *getFileEntryForID(FileID FID) const { 423 return getContentCache(FID)->Entry; 424 } 425 426 /// getCanonicalFileID - Return the canonical FileID for a SourceLocation. 427 /// A file can have multiple FileIDs if it is large enough to be broken 428 /// into multiple chunks. This method returns the unique FileID without 429 /// chunk information for a given SourceLocation. Use this method when 430 /// you want to compare FileIDs across SourceLocations. 431 FileID getCanonicalFileID(SourceLocation SpellingLoc) const { 432 return getDecomposedFileLoc(SpellingLoc).first; 433 } 434 435 /// getDecomposedFileLoc - Decompose the specified file location into a raw 436 /// FileID + Offset pair. The first element is the FileID, the second is the 437 /// offset from the start of the buffer of the location. 438 std::pair<FileID, unsigned> getDecomposedFileLoc(SourceLocation Loc) const { 439 assert(Loc.isFileID() && "Isn't a File SourceLocation"); 440 441 // TODO: Add a flag "is first chunk" to SLOC. 442 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getChunkID()); 443 444 // If this file has been split up into chunks, factor in the chunk number 445 // that the FileID references. 446 unsigned ChunkNo = FIDInfo->getChunkNo(); 447 unsigned Offset = Loc.getRawFilePos(); 448 Offset += (ChunkNo << SourceLocation::FilePosBits); 449 450 assert(Loc.getChunkID() >= ChunkNo && "Unexpected offset"); 451 452 return std::make_pair(FileID::Create(Loc.getChunkID()-ChunkNo), Offset); 453 } 454 455 /// getFullFilePos - This (efficient) method returns the offset from the start 456 /// of the file that the specified spelling SourceLocation represents. This 457 /// returns the location of the actual character data, not the instantiation 458 /// position. 459 unsigned getFullFilePos(SourceLocation SpellingLoc) const { 460 return getDecomposedFileLoc(SpellingLoc).second; 461 } 462 463 /// isFromSameFile - Returns true if both SourceLocations correspond to 464 /// the same file. 465 bool isFromSameFile(SourceLocation Loc1, SourceLocation Loc2) const { 466 return getCanonicalFileID(Loc1) == getCanonicalFileID(Loc2); 467 } 468 469 /// isFromMainFile - Returns true if the file of provided SourceLocation is 470 /// the main file. 471 bool isFromMainFile(SourceLocation Loc) const { 472 return getCanonicalFileID(Loc) == getMainFileID(); 473 } 474 475 /// isInSystemHeader - Returns if a SourceLocation is in a system header. 476 bool isInSystemHeader(SourceLocation Loc) const { 477 return getFileCharacteristic(Loc) != SrcMgr::C_User; 478 } 479 SrcMgr::CharacteristicKind getFileCharacteristic(SourceLocation Loc) const { 480 return getFIDInfo(getSpellingLoc(Loc).getChunkID()) 481 ->getFileCharacteristic(); 482 } 483 SrcMgr::CharacteristicKind getFileCharacteristic(FileID FID) const { 484 return getFIDInfo(FID)->getFileCharacteristic(); 485 } 486 487 // Iterators over FileInfos. 488 typedef std::set<SrcMgr::ContentCache>::const_iterator fileinfo_iterator; 489 fileinfo_iterator fileinfo_begin() const { return FileInfos.begin(); } 490 fileinfo_iterator fileinfo_end() const { return FileInfos.end(); } 491 492 /// PrintStats - Print statistics to stderr. 493 /// 494 void PrintStats() const; 495 496 /// Emit - Emit this SourceManager to Bitcode. 497 void Emit(llvm::Serializer& S) const; 498 499 /// Read - Reconstitute a SourceManager from Bitcode. 500 static SourceManager* CreateAndRegister(llvm::Deserializer& S, 501 FileManager &FMgr); 502 503private: 504 friend struct SrcMgr::ContentCache; // Used for deserialization. 505 506 /// createFileID - Create a new fileID for the specified ContentCache and 507 /// include position. This works regardless of whether the ContentCache 508 /// corresponds to a file or some other input source. 509 FileID createFileID(const SrcMgr::ContentCache* File, 510 SourceLocation IncludePos, 511 SrcMgr::CharacteristicKind DirCharacter); 512 513 /// getContentCache - Create or return a cached ContentCache for the specified 514 /// file. This returns null on failure. 515 const SrcMgr::ContentCache* getContentCache(const FileEntry *SourceFile); 516 517 /// createMemBufferContentCache - Create a new ContentCache for the specified 518 /// memory buffer. 519 const SrcMgr::ContentCache* 520 createMemBufferContentCache(const llvm::MemoryBuffer *Buf); 521 522 const SrcMgr::FileIDInfo *getFIDInfo(unsigned FID) const { 523 assert(FID-1 < FileIDs.size() && "Invalid FileID!"); 524 return &FileIDs[FID-1]; 525 } 526 const SrcMgr::FileIDInfo *getFIDInfo(FileID FID) const { 527 return getFIDInfo(FID.ID); 528 } 529 530 const SrcMgr::ContentCache *getContentCache(FileID FID) const { 531 return getContentCache(getFIDInfo(FID.ID)); 532 } 533 534 /// Return the ContentCache structure for the specified FileID. 535 /// This is always the physical reference for the ID. 536 const SrcMgr::ContentCache* 537 getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const { 538 return FIDInfo->getContentCache(); 539 } 540}; 541 542 543} // end namespace clang 544 545#endif 546