SourceManager.h revision 88054dee0402e4d3c1f64e6b697acc47195c0d72
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LLVM_CLANG_SOURCEMANAGER_H 15#define LLVM_CLANG_SOURCEMANAGER_H 16 17#include "clang/Basic/SourceLocation.h" 18#include "llvm/Bitcode/SerializationFwd.h" 19#include <vector> 20#include <set> 21#include <list> 22#include <cassert> 23 24namespace llvm { 25class MemoryBuffer; 26} 27 28namespace clang { 29 30class SourceManager; 31class FileManager; 32class FileEntry; 33class IdentifierTokenInfo; 34 35/// SrcMgr - Public enums and private classes that are part of the 36/// SourceManager implementation. 37/// 38namespace SrcMgr { 39 /// CharacteristicKind - This is used to represent whether a file or directory 40 /// holds normal user code, system code, or system code which is implicitly 41 /// 'extern "C"' in C++ mode. Entire directories can be tagged with this 42 /// (this is maintained by DirectoryLookup and friends) as can specific 43 /// FileIDInfos when a #pragma system_header is seen or various other cases. 44 /// 45 enum CharacteristicKind { 46 C_User, C_System, C_ExternCSystem 47 }; 48 49 /// ContentCache - Once instance of this struct is kept for every file 50 /// loaded or used. This object owns the MemoryBuffer object. 51 class ContentCache { 52 /// Buffer - The actual buffer containing the characters from the input 53 /// file. This is owned by the ContentCache object. 54 const llvm::MemoryBuffer* Buffer; 55 56 public: 57 /// Reference to the file entry. This reference does not own 58 /// the FileEntry object. It is possible for this to be NULL if 59 /// the ContentCache encapsulates an imaginary text buffer. 60 const FileEntry* Entry; 61 62 /// SourceLineCache - A new[]'d array of offsets for each source line. This 63 /// is lazily computed. This is owned by the ContentCache object. 64 unsigned* SourceLineCache; 65 66 /// NumLines - The number of lines in this ContentCache. This is only valid 67 /// if SourceLineCache is non-null. 68 unsigned NumLines; 69 70 /// getBuffer - Returns the memory buffer for the associated content. 71 const llvm::MemoryBuffer* getBuffer() const; 72 73 /// getSize - Returns the size of the content encapsulated by this 74 /// ContentCache. This can be the size of the source file or the size of an 75 /// arbitrary scratch buffer. If the ContentCache encapsulates a source 76 /// file this size is retrieved from the file's FileEntry. 77 unsigned getSize() const; 78 79 /// getSizeBytesMapped - Returns the number of bytes actually mapped for 80 /// this ContentCache. This can be 0 if the MemBuffer was not actually 81 /// instantiated. 82 unsigned getSizeBytesMapped() const; 83 84 void setBuffer(const llvm::MemoryBuffer* B) { 85 assert(!Buffer && "MemoryBuffer already set."); 86 Buffer = B; 87 } 88 89 ContentCache(const FileEntry* e = NULL) 90 : Buffer(NULL), Entry(e), SourceLineCache(NULL), NumLines(0) {} 91 92 ~ContentCache(); 93 94 /// The copy ctor does not allow copies where source object has either 95 /// a non-NULL Buffer or SourceLineCache. Ownership of allocated memory 96 /// is not transfered, so this is a logical error. 97 ContentCache(const ContentCache& RHS) : Buffer(NULL),SourceLineCache(NULL) { 98 Entry = RHS.Entry; 99 100 assert (RHS.Buffer == NULL && RHS.SourceLineCache == NULL 101 && "Passed ContentCache object cannot own a buffer."); 102 103 NumLines = RHS.NumLines; 104 } 105 106 /// Emit - Emit this ContentCache to Bitcode. 107 void Emit(llvm::Serializer& S) const; 108 109 /// ReadToSourceManager - Reconstitute a ContentCache from Bitcode 110 // and store it in the specified SourceManager. 111 static void ReadToSourceManager(llvm::Deserializer& D, SourceManager& SMgr, 112 FileManager* FMgr, std::vector<char>& Buf); 113 114 private: 115 // Disable assignments. 116 ContentCache& operator=(const ContentCache& RHS); 117 }; 118 119 /// FileIDInfo - Information about a FileID, basically just the logical file 120 /// that it represents and include stack information. A File SourceLocation 121 /// is a byte offset from the start of this. 122 /// 123 /// FileID's are used to compute the location of a character in memory as well 124 /// as the logical source location, which can be differ from the spelling 125 /// location. It is different when #line's are active or when macros have 126 /// been expanded. 127 /// 128 /// Each FileID has include stack information, indicating where it came from. 129 /// For the primary translation unit, it comes from SourceLocation() aka 0. 130 /// This information encodes the #include chain that a token was instantiated 131 /// from. 132 /// 133 /// FileIDInfos contain a "ContentCache *", describing the source file, 134 /// and a Chunk number, which allows a SourceLocation to index into very 135 /// large files (those which there are not enough FilePosBits to address). 136 /// 137 struct FileIDInfo { 138 private: 139 /// IncludeLoc - The location of the #include that brought in this file. 140 /// This SourceLocation object has an invalid SLOC for the main file. 141 SourceLocation IncludeLoc; 142 143 /// ChunkNo - Really large buffers are broken up into chunks that are 144 /// each (1 << SourceLocation::FilePosBits) in size. This specifies the 145 /// chunk number of this FileID. 146 unsigned ChunkNo : 30; 147 148 /// FileCharacteristic - This is an instance of CharacteristicKind, 149 /// indicating whether this is a system header dir or not. 150 unsigned FileCharacteristic : 2; 151 152 /// Content - Information about the source buffer itself. 153 const ContentCache* Content; 154 155 public: 156 /// get - Return a FileIDInfo object. 157 static FileIDInfo get(SourceLocation IL, unsigned CN, 158 const ContentCache *Con, 159 CharacteristicKind FileCharacter) { 160 FileIDInfo X; 161 X.IncludeLoc = IL; 162 X.ChunkNo = CN; 163 X.Content = Con; 164 X.FileCharacteristic = FileCharacter; 165 return X; 166 } 167 168 SourceLocation getIncludeLoc() const { return IncludeLoc; } 169 unsigned getChunkNo() const { return ChunkNo; } 170 const ContentCache* getContentCache() const { return Content; } 171 172 /// getCharacteristic - Return whether this is a system header or not. 173 CharacteristicKind getFileCharacteristic() const { 174 return (CharacteristicKind)FileCharacteristic; 175 } 176 177 /// Emit - Emit this FileIDInfo to Bitcode. 178 void Emit(llvm::Serializer& S) const; 179 180 /// ReadVal - Reconstitute a FileIDInfo from Bitcode. 181 static FileIDInfo ReadVal(llvm::Deserializer& S); 182 }; 183 184 /// MacroIDInfo - Macro SourceLocations refer to these records by their ID. 185 /// Each MacroIDInfo encodes the Instantiation location - where the macro was 186 /// instantiated, and the SpellingLoc - where the actual character data for 187 /// the token came from. An actual macro SourceLocation stores deltas from 188 /// these positions. 189 class MacroIDInfo { 190 SourceLocation InstantiationLoc, SpellingLoc; 191 public: 192 SourceLocation getInstantiationLoc() const { return InstantiationLoc; } 193 SourceLocation getSpellingLoc() const { return SpellingLoc; } 194 195 /// get - Return a MacroID for a macro expansion. VL specifies 196 /// the instantiation location (where the macro is expanded), and SL 197 /// specifies the spelling location (where the characters from the token 198 /// come from). Both VL and PL refer to normal File SLocs. 199 static MacroIDInfo get(SourceLocation VL, SourceLocation SL) { 200 MacroIDInfo X; 201 X.InstantiationLoc = VL; 202 X.SpellingLoc = SL; 203 return X; 204 } 205 206 /// Emit - Emit this MacroIDInfo to Bitcode. 207 void Emit(llvm::Serializer& S) const; 208 209 /// ReadVal - Reconstitute a MacroIDInfo from Bitcode. 210 static MacroIDInfo ReadVal(llvm::Deserializer& S); 211 }; 212} // end SrcMgr namespace. 213} // end clang namespace 214 215namespace std { 216template <> struct less<clang::SrcMgr::ContentCache> { 217 inline bool operator()(const clang::SrcMgr::ContentCache& L, 218 const clang::SrcMgr::ContentCache& R) const { 219 return L.Entry < R.Entry; 220 } 221}; 222} // end std namespace 223 224namespace clang { 225 226/// SourceManager - This file handles loading and caching of source files into 227/// memory. This object owns the MemoryBuffer objects for all of the loaded 228/// files and assigns unique FileID's for each unique #include chain. 229/// 230/// The SourceManager can be queried for information about SourceLocation 231/// objects, turning them into either spelling or logical locations. Spelling 232/// locations represent where the bytes corresponding to a token came from and 233/// logical locations represent where the location is in the user's view. In 234/// the case of a macro expansion, for example, the spelling location indicates 235/// where the expanded token came from and the logical location specifies where 236/// it was expanded. Logical locations are also influenced by #line directives, 237/// etc. 238class SourceManager { 239 /// FileInfos - Memoized information about all of the files tracked by this 240 /// SourceManager. This set allows us to merge ContentCache entries based 241 /// on their FileEntry*. All ContentCache objects will thus have unique, 242 /// non-null, FileEntry pointers. 243 std::set<SrcMgr::ContentCache> FileInfos; 244 245 /// MemBufferInfos - Information about various memory buffers that we have 246 /// read in. This is a list, instead of a vector, because we need pointers to 247 /// the ContentCache objects to be stable. All FileEntry* within the 248 /// stored ContentCache objects are NULL, as they do not refer to a file. 249 std::list<SrcMgr::ContentCache> MemBufferInfos; 250 251 /// FileIDs - Information about each FileID. FileID #0 is not valid, so all 252 /// entries are off by one. 253 std::vector<SrcMgr::FileIDInfo> FileIDs; 254 255 /// MacroIDs - Information about each MacroID. 256 std::vector<SrcMgr::MacroIDInfo> MacroIDs; 257 258 /// LastLineNo - These ivars serve as a cache used in the getLineNumber 259 /// method which is used to speedup getLineNumber calls to nearby locations. 260 mutable unsigned LastLineNoFileIDQuery; 261 mutable SrcMgr::ContentCache *LastLineNoContentCache; 262 mutable unsigned LastLineNoFilePos; 263 mutable unsigned LastLineNoResult; 264 265 /// MainFileID - The file ID for the main source file of the translation unit. 266 unsigned MainFileID; 267 268 // SourceManager doesn't support copy construction. 269 explicit SourceManager(const SourceManager&); 270 void operator=(const SourceManager&); 271public: 272 SourceManager() : LastLineNoFileIDQuery(~0U), MainFileID(0) {} 273 ~SourceManager() {} 274 275 void clearIDTables() { 276 MainFileID = 0; 277 FileIDs.clear(); 278 MacroIDs.clear(); 279 LastLineNoFileIDQuery = ~0U; 280 LastLineNoContentCache = 0; 281 } 282 283 /// getMainFileID - Returns the FileID of the main source file. 284 unsigned getMainFileID() const { return MainFileID; } 285 286 /// createFileID - Create a new FileID that represents the specified file 287 /// being #included from the specified IncludePosition. This returns 0 on 288 /// error and translates NULL into standard input. 289 unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos, 290 SrcMgr::CharacteristicKind FileCharacter) { 291 const SrcMgr::ContentCache *IR = getContentCache(SourceFile); 292 if (IR == 0) return 0; // Error opening file? 293 return createFileID(IR, IncludePos, FileCharacter); 294 } 295 296 /// createMainFileID - Create the FileID for the main source file. 297 unsigned createMainFileID(const FileEntry *SourceFile, 298 SourceLocation IncludePos) { 299 300 assert (MainFileID == 0 && "MainFileID already set!"); 301 MainFileID = createFileID(SourceFile, IncludePos, SrcMgr::C_User); 302 return MainFileID; 303 } 304 305 /// createFileIDForMemBuffer - Create a new FileID that represents the 306 /// specified memory buffer. This does no caching of the buffer and takes 307 /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once. 308 unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { 309 return createFileID(createMemBufferContentCache(Buffer), SourceLocation(), 310 SrcMgr::C_User); 311 } 312 313 /// createMainFileIDForMembuffer - Create the FileID for a memory buffer 314 /// that will represent the FileID for the main source. One example 315 /// of when this would be used is when the main source is read from STDIN. 316 unsigned createMainFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { 317 assert (MainFileID == 0 && "MainFileID already set!"); 318 MainFileID = createFileIDForMemBuffer(Buffer); 319 return MainFileID; 320 } 321 322 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact 323 /// that a token at Loc should actually be referenced from InstantiationLoc. 324 SourceLocation getInstantiationLoc(SourceLocation Loc, 325 SourceLocation InstantiationLoc); 326 327 /// getBuffer - Return the buffer for the specified FileID. 328 /// 329 const llvm::MemoryBuffer *getBuffer(unsigned FileID) const { 330 return getContentCache(FileID)->getBuffer(); 331 } 332 333 /// getBufferData - Return a pointer to the start and end of the character 334 /// data for the specified FileID. 335 std::pair<const char*, const char*> getBufferData(unsigned FileID) const; 336 337 /// getIncludeLoc - Return the location of the #include for the specified 338 /// SourceLocation. If this is a macro expansion, this transparently figures 339 /// out which file includes the file being expanded into. 340 SourceLocation getIncludeLoc(SourceLocation ID) const { 341 return getFIDInfo(getLogicalLoc(ID).getFileID())->getIncludeLoc(); 342 } 343 344 /// getCharacterData - Return a pointer to the start of the specified location 345 /// in the appropriate MemoryBuffer. 346 const char *getCharacterData(SourceLocation SL) const; 347 348 /// getColumnNumber - Return the column # for the specified file position. 349 /// This is significantly cheaper to compute than the line number. This 350 /// returns zero if the column number isn't known. This may only be called on 351 /// a file sloc, so you must choose a spelling or logical location before 352 /// calling this method. 353 unsigned getColumnNumber(SourceLocation Loc) const; 354 355 unsigned getSpellingColumnNumber(SourceLocation Loc) const { 356 return getColumnNumber(getSpellingLoc(Loc)); 357 } 358 unsigned getLogicalColumnNumber(SourceLocation Loc) const { 359 return getColumnNumber(getLogicalLoc(Loc)); 360 } 361 362 363 /// getLineNumber - Given a SourceLocation, return the spelling line number 364 /// for the position indicated. This requires building and caching a table of 365 /// line offsets for the MemoryBuffer, so this is not cheap: use only when 366 /// about to emit a diagnostic. 367 unsigned getLineNumber(SourceLocation Loc) const; 368 369 unsigned getLogicalLineNumber(SourceLocation Loc) const { 370 return getLineNumber(getLogicalLoc(Loc)); 371 } 372 unsigned getSpellingLineNumber(SourceLocation Loc) const { 373 return getLineNumber(getSpellingLoc(Loc)); 374 } 375 376 /// getSourceName - This method returns the name of the file or buffer that 377 /// the SourceLocation specifies. This can be modified with #line directives, 378 /// etc. 379 const char *getSourceName(SourceLocation Loc) const; 380 381 /// Given a SourceLocation object, return the logical location referenced by 382 /// the ID. This logical location is subject to #line directives, etc. 383 SourceLocation getLogicalLoc(SourceLocation Loc) const { 384 // File locations work. 385 if (Loc.isFileID()) return Loc; 386 387 return MacroIDs[Loc.getMacroID()].getInstantiationLoc(); 388 } 389 390 /// getSpellingLoc - Given a SourceLocation object, return the spelling 391 /// location referenced by the ID. This is the place where the characters 392 /// that make up the lexed token can be found. 393 SourceLocation getSpellingLoc(SourceLocation Loc) const { 394 // File locations work! 395 if (Loc.isFileID()) return Loc; 396 397 // Look up the macro token's spelling location. 398 SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getSpellingLoc(); 399 return PLoc.getFileLocWithOffset(Loc.getMacroSpellingOffs()); 400 } 401 402 /// getContentCacheForLoc - Return the ContentCache for the spelling loc of 403 /// the specified SourceLocation, if one exists. 404 const SrcMgr::ContentCache* getContentCacheForLoc(SourceLocation Loc) const { 405 Loc = getSpellingLoc(Loc); 406 unsigned FileID = Loc.getFileID(); 407 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 408 return FileIDs[FileID-1].getContentCache(); 409 } 410 411 /// getFileEntryForLoc - Return the FileEntry record for the spelling loc of 412 /// the specified SourceLocation, if one exists. 413 const FileEntry* getFileEntryForLoc(SourceLocation Loc) const { 414 return getContentCacheForLoc(Loc)->Entry; 415 } 416 417 /// getFileEntryForID - Returns the FileEntry record for the provided FileID. 418 const FileEntry* getFileEntryForID(unsigned id) const { 419 return getContentCache(id)->Entry; 420 } 421 422 /// getCanonicalFileID - Return the canonical FileID for a SourceLocation. 423 /// A file can have multiple FileIDs if it is large enough to be broken 424 /// into multiple chunks. This method returns the unique FileID without 425 /// chunk information for a given SourceLocation. Use this method when 426 /// you want to compare FileIDs across SourceLocations. 427 unsigned getCanonicalFileID(SourceLocation SpellingLoc) const { 428 return getDecomposedFileLoc(SpellingLoc).first; 429 } 430 431 /// getDecomposedFileLoc - Decompose the specified file location into a raw 432 /// FileID + Offset pair. The first element is the FileID, the second is the 433 /// offset from the start of the buffer of the location. 434 std::pair<unsigned, unsigned> getDecomposedFileLoc(SourceLocation Loc) const { 435 assert(Loc.isFileID() && "Isn't a File SourceLocation"); 436 437 // TODO: Add a flag "is first chunk" to SLOC. 438 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); 439 440 // If this file has been split up into chunks, factor in the chunk number 441 // that the FileID references. 442 unsigned ChunkNo = FIDInfo->getChunkNo(); 443 unsigned Offset = Loc.getRawFilePos(); 444 Offset += (ChunkNo << SourceLocation::FilePosBits); 445 446 assert(Loc.getFileID() >= ChunkNo && "Unexpected offset"); 447 448 return std::make_pair(Loc.getFileID()-ChunkNo, Offset); 449 } 450 451 /// getFullFilePos - This (efficient) method returns the offset from the start 452 /// of the file that the specified spelling SourceLocation represents. This 453 /// returns the location of the actual character data, not the logical file 454 /// position. 455 unsigned getFullFilePos(SourceLocation SpellingLoc) const { 456 return getDecomposedFileLoc(SpellingLoc).second; 457 } 458 459 /// isFromSameFile - Returns true if both SourceLocations correspond to 460 /// the same file. 461 bool isFromSameFile(SourceLocation Loc1, SourceLocation Loc2) const { 462 return getCanonicalFileID(Loc1) == getCanonicalFileID(Loc2); 463 } 464 465 /// isFromMainFile - Returns true if the file of provided SourceLocation is 466 /// the main file. 467 bool isFromMainFile(SourceLocation Loc) const { 468 return getCanonicalFileID(Loc) == getMainFileID(); 469 } 470 471 /// isInSystemHeader - Returns if a SourceLocation is in a system header. 472 bool isInSystemHeader(SourceLocation Loc) const { 473 return getFileCharacteristic(Loc) != SrcMgr::C_User; 474 } 475 SrcMgr::CharacteristicKind getFileCharacteristic(SourceLocation Loc) const { 476 return getFIDInfo(getSpellingLoc(Loc).getFileID())->getFileCharacteristic(); 477 } 478 SrcMgr::CharacteristicKind getFileCharacteristic(unsigned FileID) const { 479 return getFIDInfo(FileID)->getFileCharacteristic(); 480 } 481 482 // Iterators over FileIDs. 483 484 class fileid_iterator { 485 std::vector<SrcMgr::FileIDInfo>::iterator I; 486 unsigned fid; 487 public: 488 fileid_iterator(std::vector<SrcMgr::FileIDInfo>::iterator i, unsigned f) 489 : I(i), fid(f) {} 490 491 bool operator==(const fileid_iterator& X) const { return X.fid == fid; } 492 bool operator!=(const fileid_iterator& X) const { return X.fid != fid; } 493 fileid_iterator& operator++() { ++fid; ++I; return *this; } 494 495 unsigned getFileID() const { return fid; } 496 SrcMgr::FileIDInfo& getFileIDInfo() { return *I; } 497 }; 498 499 fileid_iterator fileid_begin() { 500 return fileid_iterator(FileIDs.begin(), 1); 501 } 502 503 fileid_iterator fileid_end() { 504 return fileid_iterator(FileIDs.end(), FileIDs.size()+1); 505 } 506 507 /// PrintStats - Print statistics to stderr. 508 /// 509 void PrintStats() const; 510 511 /// Emit - Emit this SourceManager to Bitcode. 512 void Emit(llvm::Serializer& S) const; 513 514 /// Read - Reconstitute a SourceManager from Bitcode. 515 static SourceManager* CreateAndRegister(llvm::Deserializer& S, 516 FileManager &FMgr); 517 518private: 519 friend struct SrcMgr::ContentCache; // Used for deserialization. 520 521 /// createFileID - Create a new fileID for the specified ContentCache and 522 /// include position. This works regardless of whether the ContentCache 523 /// corresponds to a file or some other input source. 524 unsigned createFileID(const SrcMgr::ContentCache* File, 525 SourceLocation IncludePos, 526 SrcMgr::CharacteristicKind DirCharacter); 527 528 /// getContentCache - Create or return a cached ContentCache for the specified 529 /// file. This returns null on failure. 530 const SrcMgr::ContentCache* getContentCache(const FileEntry* SourceFile); 531 532 /// createMemBufferContentCache - Create a new ContentCache for the specified 533 /// memory buffer. 534 const SrcMgr::ContentCache* 535 createMemBufferContentCache(const llvm::MemoryBuffer* Buf); 536 537 const SrcMgr::FileIDInfo* getFIDInfo(unsigned FileID) const { 538 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 539 return &FileIDs[FileID-1]; 540 } 541 542 const SrcMgr::ContentCache *getContentCache(unsigned FileID) const { 543 return getContentCache(getFIDInfo(FileID)); 544 } 545 546 /// Return the ContentCache structure for the specified FileID. 547 /// This is always the physical reference for the ID. 548 const SrcMgr::ContentCache* 549 getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const { 550 return FIDInfo->getContentCache(); 551 } 552}; 553 554 555} // end namespace clang 556 557#endif 558