SourceManager.h revision f7cf85b330bedd2877e1371fb0a83e99751ae162
1//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LLVM_CLANG_SOURCEMANAGER_H 15#define LLVM_CLANG_SOURCEMANAGER_H 16 17#include "clang/Basic/SourceLocation.h" 18#include "llvm/Bitcode/SerializationFwd.h" 19#include <vector> 20#include <set> 21#include <list> 22#include <cassert> 23 24namespace llvm { 25class MemoryBuffer; 26} 27 28namespace clang { 29 30class SourceManager; 31class FileManager; 32class FileEntry; 33class IdentifierTokenInfo; 34 35/// SrcMgr - Public enums and private classes that are part of the 36/// SourceManager implementation. 37/// 38namespace SrcMgr { 39 /// CharacteristicKind - This is used to represent whether a file or directory 40 /// holds normal user code, system code, or system code which is implicitly 41 /// 'extern "C"' in C++ mode. Entire directories can be tagged with this 42 /// (this is maintained by DirectoryLookup and friends) as can specific 43 /// FileIDInfos when a #pragma system_header is seen or various other cases. 44 /// 45 enum CharacteristicKind { 46 C_User, C_System, C_ExternCSystem 47 }; 48 49 /// ContentCache - Once instance of this struct is kept for every file 50 /// loaded or used. This object owns the MemoryBuffer object. 51 class ContentCache { 52 /// Buffer - The actual buffer containing the characters from the input 53 /// file. This is owned by the ContentCache object. 54 const llvm::MemoryBuffer* Buffer; 55 56 public: 57 /// Reference to the file entry. This reference does not own 58 /// the FileEntry object. It is possible for this to be NULL if 59 /// the ContentCache encapsulates an imaginary text buffer. 60 const FileEntry* Entry; 61 62 /// SourceLineCache - A new[]'d array of offsets for each source line. This 63 /// is lazily computed. This is owned by the ContentCache object. 64 unsigned* SourceLineCache; 65 66 /// NumLines - The number of lines in this ContentCache. This is only valid 67 /// if SourceLineCache is non-null. 68 unsigned NumLines; 69 70 /// getBuffer - Returns the memory buffer for the associated content. 71 const llvm::MemoryBuffer* getBuffer() const; 72 73 /// getSize - Returns the size of the content encapsulated by this 74 /// ContentCache. This can be the size of the source file or the size of an 75 /// arbitrary scratch buffer. If the ContentCache encapsulates a source 76 /// file this size is retrieved from the file's FileEntry. 77 unsigned getSize() const; 78 79 /// getSizeBytesMapped - Returns the number of bytes actually mapped for 80 /// this ContentCache. This can be 0 if the MemBuffer was not actually 81 /// instantiated. 82 unsigned getSizeBytesMapped() const; 83 84 void setBuffer(const llvm::MemoryBuffer* B) { 85 assert(!Buffer && "MemoryBuffer already set."); 86 Buffer = B; 87 } 88 89 ContentCache(const FileEntry* e = NULL) 90 : Buffer(NULL), Entry(e), SourceLineCache(NULL), NumLines(0) {} 91 92 ~ContentCache(); 93 94 /// The copy ctor does not allow copies where source object has either 95 /// a non-NULL Buffer or SourceLineCache. Ownership of allocated memory 96 /// is not transfered, so this is a logical error. 97 ContentCache(const ContentCache& RHS) : Buffer(NULL),SourceLineCache(NULL) { 98 Entry = RHS.Entry; 99 100 assert (RHS.Buffer == NULL && RHS.SourceLineCache == NULL 101 && "Passed ContentCache object cannot own a buffer."); 102 103 NumLines = RHS.NumLines; 104 } 105 106 /// Emit - Emit this ContentCache to Bitcode. 107 void Emit(llvm::Serializer& S) const; 108 109 /// ReadToSourceManager - Reconstitute a ContentCache from Bitcode 110 // and store it in the specified SourceManager. 111 static void ReadToSourceManager(llvm::Deserializer& D, SourceManager& SMgr, 112 FileManager* FMgr, std::vector<char>& Buf); 113 114 private: 115 // Disable assignments. 116 ContentCache& operator=(const ContentCache& RHS); 117 }; 118 119 /// FileIDInfo - Information about a FileID, basically just the logical file 120 /// that it represents and include stack information. A File SourceLocation 121 /// is a byte offset from the start of this. 122 /// 123 /// FileID's are used to compute the location of a character in memory as well 124 /// as the instantiation source location, which can be differ from the 125 /// spelling location. It is different when #line's are active or when macros 126 /// have been expanded. 127 /// 128 /// Each FileID has include stack information, indicating where it came from. 129 /// For the primary translation unit, it comes from SourceLocation() aka 0. 130 /// This information encodes the #include chain that a token was instantiated 131 /// from. 132 /// 133 /// FileIDInfos contain a "ContentCache *", describing the source file, 134 /// and a Chunk number, which allows a SourceLocation to index into very 135 /// large files (those which there are not enough FilePosBits to address). 136 /// 137 struct FileIDInfo { 138 private: 139 /// IncludeLoc - The location of the #include that brought in this file. 140 /// This SourceLocation object has an invalid SLOC for the main file. 141 SourceLocation IncludeLoc; 142 143 /// ChunkNo - Really large buffers are broken up into chunks that are 144 /// each (1 << SourceLocation::FilePosBits) in size. This specifies the 145 /// chunk number of this FileID. 146 unsigned ChunkNo : 30; 147 148 /// FileCharacteristic - This is an instance of CharacteristicKind, 149 /// indicating whether this is a system header dir or not. 150 unsigned FileCharacteristic : 2; 151 152 /// Content - Information about the source buffer itself. 153 const ContentCache* Content; 154 155 public: 156 /// get - Return a FileIDInfo object. 157 static FileIDInfo get(SourceLocation IL, unsigned CN, 158 const ContentCache *Con, 159 CharacteristicKind FileCharacter) { 160 FileIDInfo X; 161 X.IncludeLoc = IL; 162 X.ChunkNo = CN; 163 X.Content = Con; 164 X.FileCharacteristic = FileCharacter; 165 return X; 166 } 167 168 SourceLocation getIncludeLoc() const { return IncludeLoc; } 169 unsigned getChunkNo() const { return ChunkNo; } 170 const ContentCache* getContentCache() const { return Content; } 171 172 /// getCharacteristic - Return whether this is a system header or not. 173 CharacteristicKind getFileCharacteristic() const { 174 return (CharacteristicKind)FileCharacteristic; 175 } 176 177 /// Emit - Emit this FileIDInfo to Bitcode. 178 void Emit(llvm::Serializer& S) const; 179 180 /// ReadVal - Reconstitute a FileIDInfo from Bitcode. 181 static FileIDInfo ReadVal(llvm::Deserializer& S); 182 }; 183 184 /// MacroIDInfo - Macro SourceLocations refer to these records by their ID. 185 /// Each MacroIDInfo encodes the Instantiation location - where the macro was 186 /// instantiated, and the SpellingLoc - where the actual character data for 187 /// the token came from. An actual macro SourceLocation stores deltas from 188 /// these positions. 189 class MacroIDInfo { 190 SourceLocation InstantiationLoc, SpellingLoc; 191 public: 192 SourceLocation getInstantiationLoc() const { return InstantiationLoc; } 193 SourceLocation getSpellingLoc() const { return SpellingLoc; } 194 195 /// get - Return a MacroID for a macro expansion. VL specifies 196 /// the instantiation location (where the macro is expanded), and SL 197 /// specifies the spelling location (where the characters from the token 198 /// come from). Both VL and PL refer to normal File SLocs. 199 static MacroIDInfo get(SourceLocation VL, SourceLocation SL) { 200 MacroIDInfo X; 201 X.InstantiationLoc = VL; 202 X.SpellingLoc = SL; 203 return X; 204 } 205 206 /// Emit - Emit this MacroIDInfo to Bitcode. 207 void Emit(llvm::Serializer& S) const; 208 209 /// ReadVal - Reconstitute a MacroIDInfo from Bitcode. 210 static MacroIDInfo ReadVal(llvm::Deserializer& S); 211 }; 212} // end SrcMgr namespace. 213} // end clang namespace 214 215namespace std { 216template <> struct less<clang::SrcMgr::ContentCache> { 217 inline bool operator()(const clang::SrcMgr::ContentCache& L, 218 const clang::SrcMgr::ContentCache& R) const { 219 return L.Entry < R.Entry; 220 } 221}; 222} // end std namespace 223 224namespace clang { 225 226/// SourceManager - This file handles loading and caching of source files into 227/// memory. This object owns the MemoryBuffer objects for all of the loaded 228/// files and assigns unique FileID's for each unique #include chain. 229/// 230/// The SourceManager can be queried for information about SourceLocation 231/// objects, turning them into either spelling or instantiation locations. 232/// Spelling locations represent where the bytes corresponding to a token came 233/// from and instantiation locations represent where the location is in the 234/// user's view. In the case of a macro expansion, for example, the spelling 235/// location indicates where the expanded token came from and the instantiation 236/// location specifies where it was expanded. 237class SourceManager { 238 /// FileInfos - Memoized information about all of the files tracked by this 239 /// SourceManager. This set allows us to merge ContentCache entries based 240 /// on their FileEntry*. All ContentCache objects will thus have unique, 241 /// non-null, FileEntry pointers. 242 std::set<SrcMgr::ContentCache> FileInfos; 243 244 /// MemBufferInfos - Information about various memory buffers that we have 245 /// read in. This is a list, instead of a vector, because we need pointers to 246 /// the ContentCache objects to be stable. All FileEntry* within the 247 /// stored ContentCache objects are NULL, as they do not refer to a file. 248 std::list<SrcMgr::ContentCache> MemBufferInfos; 249 250 /// FileIDs - Information about each FileID. FileID #0 is not valid, so all 251 /// entries are off by one. 252 std::vector<SrcMgr::FileIDInfo> FileIDs; 253 254 /// MacroIDs - Information about each MacroID. 255 std::vector<SrcMgr::MacroIDInfo> MacroIDs; 256 257 /// LastLineNo - These ivars serve as a cache used in the getLineNumber 258 /// method which is used to speedup getLineNumber calls to nearby locations. 259 mutable unsigned LastLineNoFileIDQuery; 260 mutable SrcMgr::ContentCache *LastLineNoContentCache; 261 mutable unsigned LastLineNoFilePos; 262 mutable unsigned LastLineNoResult; 263 264 /// MainFileID - The file ID for the main source file of the translation unit. 265 unsigned MainFileID; 266 267 // SourceManager doesn't support copy construction. 268 explicit SourceManager(const SourceManager&); 269 void operator=(const SourceManager&); 270public: 271 SourceManager() : LastLineNoFileIDQuery(~0U), MainFileID(0) {} 272 ~SourceManager() {} 273 274 void clearIDTables() { 275 MainFileID = 0; 276 FileIDs.clear(); 277 MacroIDs.clear(); 278 LastLineNoFileIDQuery = ~0U; 279 LastLineNoContentCache = 0; 280 } 281 282 /// getMainFileID - Returns the FileID of the main source file. 283 unsigned getMainFileID() const { return MainFileID; } 284 285 /// createFileID - Create a new FileID that represents the specified file 286 /// being #included from the specified IncludePosition. This returns 0 on 287 /// error and translates NULL into standard input. 288 unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos, 289 SrcMgr::CharacteristicKind FileCharacter) { 290 const SrcMgr::ContentCache *IR = getContentCache(SourceFile); 291 if (IR == 0) return 0; // Error opening file? 292 return createFileID(IR, IncludePos, FileCharacter); 293 } 294 295 /// createMainFileID - Create the FileID for the main source file. 296 unsigned createMainFileID(const FileEntry *SourceFile, 297 SourceLocation IncludePos) { 298 299 assert (MainFileID == 0 && "MainFileID already set!"); 300 MainFileID = createFileID(SourceFile, IncludePos, SrcMgr::C_User); 301 return MainFileID; 302 } 303 304 /// createFileIDForMemBuffer - Create a new FileID that represents the 305 /// specified memory buffer. This does no caching of the buffer and takes 306 /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once. 307 unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { 308 return createFileID(createMemBufferContentCache(Buffer), SourceLocation(), 309 SrcMgr::C_User); 310 } 311 312 /// createMainFileIDForMembuffer - Create the FileID for a memory buffer 313 /// that will represent the FileID for the main source. One example 314 /// of when this would be used is when the main source is read from STDIN. 315 unsigned createMainFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { 316 assert (MainFileID == 0 && "MainFileID already set!"); 317 MainFileID = createFileIDForMemBuffer(Buffer); 318 return MainFileID; 319 } 320 321 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact 322 /// that a token at Loc should actually be referenced from InstantiationLoc. 323 SourceLocation getInstantiationLoc(SourceLocation Loc, 324 SourceLocation InstantiationLoc); 325 326 /// getBuffer - Return the buffer for the specified FileID. 327 /// 328 const llvm::MemoryBuffer *getBuffer(unsigned FileID) const { 329 return getContentCache(FileID)->getBuffer(); 330 } 331 332 /// getBufferData - Return a pointer to the start and end of the character 333 /// data for the specified FileID. 334 std::pair<const char*, const char*> getBufferData(unsigned FileID) const; 335 336 /// getIncludeLoc - Return the location of the #include for the specified 337 /// SourceLocation. If this is a macro expansion, this transparently figures 338 /// out which file includes the file being expanded into. 339 SourceLocation getIncludeLoc(SourceLocation ID) const { 340 return getFIDInfo(getInstantiationLoc(ID).getFileID())->getIncludeLoc(); 341 } 342 343 /// getCharacterData - Return a pointer to the start of the specified location 344 /// in the appropriate MemoryBuffer. 345 const char *getCharacterData(SourceLocation SL) const; 346 347 /// getColumnNumber - Return the column # for the specified file position. 348 /// This is significantly cheaper to compute than the line number. This 349 /// returns zero if the column number isn't known. This may only be called on 350 /// a file sloc, so you must choose a spelling or instantiation location 351 /// before calling this method. 352 unsigned getColumnNumber(SourceLocation Loc) const; 353 354 unsigned getSpellingColumnNumber(SourceLocation Loc) const { 355 return getColumnNumber(getSpellingLoc(Loc)); 356 } 357 unsigned getInstantiationColumnNumber(SourceLocation Loc) const { 358 return getColumnNumber(getInstantiationLoc(Loc)); 359 } 360 361 362 /// getLineNumber - Given a SourceLocation, return the spelling line number 363 /// for the position indicated. This requires building and caching a table of 364 /// line offsets for the MemoryBuffer, so this is not cheap: use only when 365 /// about to emit a diagnostic. 366 unsigned getLineNumber(SourceLocation Loc) const; 367 368 unsigned getInstantiationLineNumber(SourceLocation Loc) const { 369 return getLineNumber(getInstantiationLoc(Loc)); 370 } 371 unsigned getSpellingLineNumber(SourceLocation Loc) const { 372 return getLineNumber(getSpellingLoc(Loc)); 373 } 374 375 /// getSourceName - This method returns the name of the file or buffer that 376 /// the SourceLocation specifies. This can be modified with #line directives, 377 /// etc. 378 const char *getSourceName(SourceLocation Loc) const; 379 380 /// Given a SourceLocation object, return the instantiation location 381 /// referenced by the ID. 382 SourceLocation getInstantiationLoc(SourceLocation Loc) const { 383 // File locations work. 384 if (Loc.isFileID()) return Loc; 385 386 return MacroIDs[Loc.getMacroID()].getInstantiationLoc(); 387 } 388 389 /// getSpellingLoc - Given a SourceLocation object, return the spelling 390 /// location referenced by the ID. This is the place where the characters 391 /// that make up the lexed token can be found. 392 SourceLocation getSpellingLoc(SourceLocation Loc) const { 393 // File locations work! 394 if (Loc.isFileID()) return Loc; 395 396 // Look up the macro token's spelling location. 397 SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getSpellingLoc(); 398 return PLoc.getFileLocWithOffset(Loc.getMacroSpellingOffs()); 399 } 400 401 /// getContentCacheForLoc - Return the ContentCache for the spelling loc of 402 /// the specified SourceLocation, if one exists. 403 const SrcMgr::ContentCache* getContentCacheForLoc(SourceLocation Loc) const { 404 Loc = getSpellingLoc(Loc); 405 unsigned FileID = Loc.getFileID(); 406 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 407 return FileIDs[FileID-1].getContentCache(); 408 } 409 410 /// getFileEntryForLoc - Return the FileEntry record for the spelling loc of 411 /// the specified SourceLocation, if one exists. 412 const FileEntry* getFileEntryForLoc(SourceLocation Loc) const { 413 return getContentCacheForLoc(Loc)->Entry; 414 } 415 416 /// getFileEntryForID - Returns the FileEntry record for the provided FileID. 417 const FileEntry* getFileEntryForID(unsigned id) const { 418 return getContentCache(id)->Entry; 419 } 420 421 /// getCanonicalFileID - Return the canonical FileID for a SourceLocation. 422 /// A file can have multiple FileIDs if it is large enough to be broken 423 /// into multiple chunks. This method returns the unique FileID without 424 /// chunk information for a given SourceLocation. Use this method when 425 /// you want to compare FileIDs across SourceLocations. 426 unsigned getCanonicalFileID(SourceLocation SpellingLoc) const { 427 return getDecomposedFileLoc(SpellingLoc).first; 428 } 429 430 /// getDecomposedFileLoc - Decompose the specified file location into a raw 431 /// FileID + Offset pair. The first element is the FileID, the second is the 432 /// offset from the start of the buffer of the location. 433 std::pair<unsigned, unsigned> getDecomposedFileLoc(SourceLocation Loc) const { 434 assert(Loc.isFileID() && "Isn't a File SourceLocation"); 435 436 // TODO: Add a flag "is first chunk" to SLOC. 437 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); 438 439 // If this file has been split up into chunks, factor in the chunk number 440 // that the FileID references. 441 unsigned ChunkNo = FIDInfo->getChunkNo(); 442 unsigned Offset = Loc.getRawFilePos(); 443 Offset += (ChunkNo << SourceLocation::FilePosBits); 444 445 assert(Loc.getFileID() >= ChunkNo && "Unexpected offset"); 446 447 return std::make_pair(Loc.getFileID()-ChunkNo, Offset); 448 } 449 450 /// getFullFilePos - This (efficient) method returns the offset from the start 451 /// of the file that the specified spelling SourceLocation represents. This 452 /// returns the location of the actual character data, not the instantiation 453 /// position. 454 unsigned getFullFilePos(SourceLocation SpellingLoc) const { 455 return getDecomposedFileLoc(SpellingLoc).second; 456 } 457 458 /// isFromSameFile - Returns true if both SourceLocations correspond to 459 /// the same file. 460 bool isFromSameFile(SourceLocation Loc1, SourceLocation Loc2) const { 461 return getCanonicalFileID(Loc1) == getCanonicalFileID(Loc2); 462 } 463 464 /// isFromMainFile - Returns true if the file of provided SourceLocation is 465 /// the main file. 466 bool isFromMainFile(SourceLocation Loc) const { 467 return getCanonicalFileID(Loc) == getMainFileID(); 468 } 469 470 /// isInSystemHeader - Returns if a SourceLocation is in a system header. 471 bool isInSystemHeader(SourceLocation Loc) const { 472 return getFileCharacteristic(Loc) != SrcMgr::C_User; 473 } 474 SrcMgr::CharacteristicKind getFileCharacteristic(SourceLocation Loc) const { 475 return getFIDInfo(getSpellingLoc(Loc).getFileID())->getFileCharacteristic(); 476 } 477 SrcMgr::CharacteristicKind getFileCharacteristic(unsigned FileID) const { 478 return getFIDInfo(FileID)->getFileCharacteristic(); 479 } 480 481 // Iterators over FileIDs. 482 483 class fileid_iterator { 484 std::vector<SrcMgr::FileIDInfo>::iterator I; 485 unsigned fid; 486 public: 487 fileid_iterator(std::vector<SrcMgr::FileIDInfo>::iterator i, unsigned f) 488 : I(i), fid(f) {} 489 490 bool operator==(const fileid_iterator& X) const { return X.fid == fid; } 491 bool operator!=(const fileid_iterator& X) const { return X.fid != fid; } 492 fileid_iterator& operator++() { ++fid; ++I; return *this; } 493 494 unsigned getFileID() const { return fid; } 495 SrcMgr::FileIDInfo& getFileIDInfo() { return *I; } 496 }; 497 498 fileid_iterator fileid_begin() { 499 return fileid_iterator(FileIDs.begin(), 1); 500 } 501 502 fileid_iterator fileid_end() { 503 return fileid_iterator(FileIDs.end(), FileIDs.size()+1); 504 } 505 506 /// PrintStats - Print statistics to stderr. 507 /// 508 void PrintStats() const; 509 510 /// Emit - Emit this SourceManager to Bitcode. 511 void Emit(llvm::Serializer& S) const; 512 513 /// Read - Reconstitute a SourceManager from Bitcode. 514 static SourceManager* CreateAndRegister(llvm::Deserializer& S, 515 FileManager &FMgr); 516 517private: 518 friend struct SrcMgr::ContentCache; // Used for deserialization. 519 520 /// createFileID - Create a new fileID for the specified ContentCache and 521 /// include position. This works regardless of whether the ContentCache 522 /// corresponds to a file or some other input source. 523 unsigned createFileID(const SrcMgr::ContentCache* File, 524 SourceLocation IncludePos, 525 SrcMgr::CharacteristicKind DirCharacter); 526 527 /// getContentCache - Create or return a cached ContentCache for the specified 528 /// file. This returns null on failure. 529 const SrcMgr::ContentCache* getContentCache(const FileEntry* SourceFile); 530 531 /// createMemBufferContentCache - Create a new ContentCache for the specified 532 /// memory buffer. 533 const SrcMgr::ContentCache* 534 createMemBufferContentCache(const llvm::MemoryBuffer* Buf); 535 536 const SrcMgr::FileIDInfo* getFIDInfo(unsigned FileID) const { 537 assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); 538 return &FileIDs[FileID-1]; 539 } 540 541 const SrcMgr::ContentCache *getContentCache(unsigned FileID) const { 542 return getContentCache(getFIDInfo(FileID)); 543 } 544 545 /// Return the ContentCache structure for the specified FileID. 546 /// This is always the physical reference for the ID. 547 const SrcMgr::ContentCache* 548 getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const { 549 return FIDInfo->getContentCache(); 550 } 551}; 552 553 554} // end namespace clang 555 556#endif 557