SourceManager.cpp revision 6b3066780bda02e3117d71a18ca2f430ed1454af
1//===--- SourceManager.cpp - Track and cache source files -----------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "clang/Basic/SourceManager.h" 15#include "clang/Basic/FileManager.h" 16#include "llvm/Support/Compiler.h" 17#include "llvm/Support/MemoryBuffer.h" 18#include "llvm/System/Path.h" 19#include "llvm/Bitcode/Serialize.h" 20#include "llvm/Bitcode/Deserialize.h" 21#include "llvm/Support/Streams.h" 22#include <algorithm> 23using namespace clang; 24using namespace SrcMgr; 25using llvm::MemoryBuffer; 26 27//===----------------------------------------------------------------------===// 28// SourceManager Helper Classes 29//===----------------------------------------------------------------------===// 30 31ContentCache::~ContentCache() { 32 delete Buffer; 33} 34 35/// getSizeBytesMapped - Returns the number of bytes actually mapped for 36/// this ContentCache. This can be 0 if the MemBuffer was not actually 37/// instantiated. 38unsigned ContentCache::getSizeBytesMapped() const { 39 return Buffer ? Buffer->getBufferSize() : 0; 40} 41 42/// getSize - Returns the size of the content encapsulated by this ContentCache. 43/// This can be the size of the source file or the size of an arbitrary 44/// scratch buffer. If the ContentCache encapsulates a source file, that 45/// file is not lazily brought in from disk to satisfy this query. 46unsigned ContentCache::getSize() const { 47 return Entry ? Entry->getSize() : Buffer->getBufferSize(); 48} 49 50const llvm::MemoryBuffer *ContentCache::getBuffer() const { 51 // Lazily create the Buffer for ContentCaches that wrap files. 52 if (!Buffer && Entry) { 53 // FIXME: Should we support a way to not have to do this check over 54 // and over if we cannot open the file? 55 Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize()); 56 } 57 return Buffer; 58} 59 60//===----------------------------------------------------------------------===// 61// Line Table Implementation 62//===----------------------------------------------------------------------===// 63 64namespace clang { 65struct LineEntry { 66 /// FileOffset - The offset in this file that the line entry occurs at. 67 unsigned FileOffset; 68 69 /// LineNo - The presumed line number of this line entry: #line 4. 70 unsigned LineNo; 71 72 /// FilenameID - The ID of the filename identified by this line entry: 73 /// #line 4 "foo.c". This is -1 if not specified. 74 int FilenameID; 75 76 /// Flags - Set the 0 if no flags, 1 if a system header, 77 SrcMgr::CharacteristicKind FileKind; 78 79 static LineEntry get(unsigned Offs, unsigned Line, int Filename, 80 SrcMgr::CharacteristicKind FileKind) { 81 LineEntry E; 82 E.FileOffset = Offs; 83 E.LineNo = Line; 84 E.FilenameID = Filename; 85 E.FileKind = FileKind; 86 return E; 87 } 88}; 89 90inline bool operator<(const LineEntry &E, unsigned Offset) { 91 return E.FileOffset < Offset; 92} 93 94inline bool operator<(unsigned Offset, const LineEntry &E) { 95 return Offset < E.FileOffset; 96} 97 98/// LineTableInfo - This class is used to hold and unique data used to 99/// represent #line information. 100class LineTableInfo { 101 /// FilenameIDs - This map is used to assign unique IDs to filenames in 102 /// #line directives. This allows us to unique the filenames that 103 /// frequently reoccur and reference them with indices. FilenameIDs holds 104 /// the mapping from string -> ID, and FilenamesByID holds the mapping of ID 105 /// to string. 106 llvm::StringMap<unsigned, llvm::BumpPtrAllocator> FilenameIDs; 107 std::vector<llvm::StringMapEntry<unsigned>*> FilenamesByID; 108 109 /// LineEntries - This is a map from FileIDs to a list of line entries (sorted 110 /// by the offset they occur in the file. 111 std::map<unsigned, std::vector<LineEntry> > LineEntries; 112public: 113 LineTableInfo() { 114 } 115 116 void clear() { 117 FilenameIDs.clear(); 118 FilenamesByID.clear(); 119 } 120 121 ~LineTableInfo() {} 122 123 unsigned getLineTableFilenameID(const char *Ptr, unsigned Len); 124 const char *getFilename(unsigned ID) const { 125 assert(ID < FilenamesByID.size() && "Invalid FilenameID"); 126 return FilenamesByID[ID]->getKeyData(); 127 } 128 129 void AddLineNote(unsigned FID, unsigned Offset, 130 unsigned LineNo, int FilenameID); 131 void AddLineNote(unsigned FID, unsigned Offset, 132 unsigned LineNo, int FilenameID, 133 unsigned EntryExit, SrcMgr::CharacteristicKind FileKind); 134 135 136 /// FindNearestLineEntry - Find the line entry nearest to FID that is before 137 /// it. If there is no line entry before Offset in FID, return null. 138 const LineEntry *FindNearestLineEntry(unsigned FID, unsigned Offset); 139}; 140} // namespace clang 141 142unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) { 143 // Look up the filename in the string table, returning the pre-existing value 144 // if it exists. 145 llvm::StringMapEntry<unsigned> &Entry = 146 FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U); 147 if (Entry.getValue() != ~0U) 148 return Entry.getValue(); 149 150 // Otherwise, assign this the next available ID. 151 Entry.setValue(FilenamesByID.size()); 152 FilenamesByID.push_back(&Entry); 153 return FilenamesByID.size()-1; 154} 155 156/// AddLineNote - Add a line note to the line table that indicates that there 157/// is a #line at the specified FID/Offset location which changes the presumed 158/// location to LineNo/FilenameID. 159void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset, 160 unsigned LineNo, int FilenameID) { 161 std::vector<LineEntry> &Entries = LineEntries[FID]; 162 163 assert((Entries.empty() || Entries.back().FileOffset < Offset) && 164 "Adding line entries out of order!"); 165 166 SrcMgr::CharacteristicKind Kind = SrcMgr::C_User; 167 168 if (!Entries.empty()) { 169 // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember 170 // that we are still in "foo.h". 171 if (FilenameID == -1) 172 FilenameID = Entries.back().FilenameID; 173 174 // If we are after a line marker that switched us to system header mode, 175 // preserve it. 176 Kind = Entries.back().FileKind; 177 } 178 179 Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind)); 180} 181 182/// AddLineNote This is the same as the previous version of AddLineNote, but is 183/// used for GNU line markers. If EntryExit is 0, then this doesn't change the 184/// presumed #include stack. If it is 1, this is a file entry, if it is 2 then 185/// this is a file exit. FileKind specifies whether this is a system header or 186/// extern C system header. 187void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset, 188 unsigned LineNo, int FilenameID, 189 unsigned EntryExit, 190 SrcMgr::CharacteristicKind FileKind) { 191 assert(FilenameID != -1 && "Unspecified filename should use other accessor"); 192 193 std::vector<LineEntry> &Entries = LineEntries[FID]; 194 195 assert((Entries.empty() || Entries.back().FileOffset < Offset) && 196 "Adding line entries out of order!"); 197 198 199 // TODO: Handle EntryExit. 200 201 Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind)); 202} 203 204 205/// FindNearestLineEntry - Find the line entry nearest to FID that is before 206/// it. If there is no line entry before Offset in FID, return null. 207const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID, 208 unsigned Offset) { 209 const std::vector<LineEntry> &Entries = LineEntries[FID]; 210 assert(!Entries.empty() && "No #line entries for this FID after all!"); 211 212 // It is very common for the query to be after the last #line, check this 213 // first. 214 if (Entries.back().FileOffset <= Offset) 215 return &Entries.back(); 216 217 // Do a binary search to find the maximal element that is still before Offset. 218 std::vector<LineEntry>::const_iterator I = 219 std::upper_bound(Entries.begin(), Entries.end(), Offset); 220 if (I == Entries.begin()) return 0; 221 return &*--I; 222} 223 224 225/// getLineTableFilenameID - Return the uniqued ID for the specified filename. 226/// 227unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) { 228 if (LineTable == 0) 229 LineTable = new LineTableInfo(); 230 return LineTable->getLineTableFilenameID(Ptr, Len); 231} 232 233 234/// AddLineNote - Add a line note to the line table for the FileID and offset 235/// specified by Loc. If FilenameID is -1, it is considered to be 236/// unspecified. 237void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo, 238 int FilenameID) { 239 std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc); 240 241 const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile(); 242 243 // Remember that this file has #line directives now if it doesn't already. 244 const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives(); 245 246 if (LineTable == 0) 247 LineTable = new LineTableInfo(); 248 LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID); 249} 250 251/// AddLineNote - Add a GNU line marker to the line table. 252void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo, 253 int FilenameID, bool IsFileEntry, 254 bool IsFileExit, bool IsSystemHeader, 255 bool IsExternCHeader) { 256 // If there is no filename and no flags, this is treated just like a #line, 257 // which does not change the flags of the previous line marker. 258 if (FilenameID == -1) { 259 assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader && 260 "Can't set flags without setting the filename!"); 261 return AddLineNote(Loc, LineNo, FilenameID); 262 } 263 264 std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc); 265 const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile(); 266 267 // Remember that this file has #line directives now if it doesn't already. 268 const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives(); 269 270 if (LineTable == 0) 271 LineTable = new LineTableInfo(); 272 273 SrcMgr::CharacteristicKind FileKind; 274 if (IsExternCHeader) 275 FileKind = SrcMgr::C_ExternCSystem; 276 else if (IsSystemHeader) 277 FileKind = SrcMgr::C_System; 278 else 279 FileKind = SrcMgr::C_User; 280 281 unsigned EntryExit = 0; 282 if (IsFileEntry) 283 EntryExit = 1; 284 else if (IsFileExit) 285 EntryExit = 2; 286 287 LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID, 288 EntryExit, FileKind); 289} 290 291 292//===----------------------------------------------------------------------===// 293// Private 'Create' methods. 294//===----------------------------------------------------------------------===// 295 296SourceManager::~SourceManager() { 297 delete LineTable; 298 299 // Delete FileEntry objects corresponding to content caches. Since the actual 300 // content cache objects are bump pointer allocated, we just have to run the 301 // dtors, but we call the deallocate method for completeness. 302 for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) { 303 MemBufferInfos[i]->~ContentCache(); 304 ContentCacheAlloc.Deallocate(MemBufferInfos[i]); 305 } 306 for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator 307 I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) { 308 I->second->~ContentCache(); 309 ContentCacheAlloc.Deallocate(I->second); 310 } 311} 312 313void SourceManager::clearIDTables() { 314 MainFileID = FileID(); 315 SLocEntryTable.clear(); 316 LastLineNoFileIDQuery = FileID(); 317 LastLineNoContentCache = 0; 318 LastFileIDLookup = FileID(); 319 320 if (LineTable) 321 LineTable->clear(); 322 323 // Use up FileID #0 as an invalid instantiation. 324 NextOffset = 0; 325 createInstantiationLoc(SourceLocation(), SourceLocation(), 1); 326} 327 328/// getOrCreateContentCache - Create or return a cached ContentCache for the 329/// specified file. 330const ContentCache * 331SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) { 332 assert(FileEnt && "Didn't specify a file entry to use?"); 333 334 // Do we already have information about this file? 335 ContentCache *&Entry = FileInfos[FileEnt]; 336 if (Entry) return Entry; 337 338 // Nope, create a new Cache entry. Make sure it is at least 8-byte aligned 339 // so that FileInfo can use the low 3 bits of the pointer for its own 340 // nefarious purposes. 341 unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment; 342 EntryAlign = std::max(8U, EntryAlign); 343 Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign); 344 new (Entry) ContentCache(FileEnt); 345 return Entry; 346} 347 348 349/// createMemBufferContentCache - Create a new ContentCache for the specified 350/// memory buffer. This does no caching. 351const ContentCache* 352SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) { 353 // Add a new ContentCache to the MemBufferInfos list and return it. Make sure 354 // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of 355 // the pointer for its own nefarious purposes. 356 unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment; 357 EntryAlign = std::max(8U, EntryAlign); 358 ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign); 359 new (Entry) ContentCache(); 360 MemBufferInfos.push_back(Entry); 361 Entry->setBuffer(Buffer); 362 return Entry; 363} 364 365//===----------------------------------------------------------------------===// 366// Methods to create new FileID's and instantiations. 367//===----------------------------------------------------------------------===// 368 369/// createFileID - Create a new fileID for the specified ContentCache and 370/// include position. This works regardless of whether the ContentCache 371/// corresponds to a file or some other input source. 372FileID SourceManager::createFileID(const ContentCache *File, 373 SourceLocation IncludePos, 374 SrcMgr::CharacteristicKind FileCharacter) { 375 SLocEntryTable.push_back(SLocEntry::get(NextOffset, 376 FileInfo::get(IncludePos, File, 377 FileCharacter))); 378 unsigned FileSize = File->getSize(); 379 assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!"); 380 NextOffset += FileSize+1; 381 382 // Set LastFileIDLookup to the newly created file. The next getFileID call is 383 // almost guaranteed to be from that file. 384 return LastFileIDLookup = FileID::get(SLocEntryTable.size()-1); 385} 386 387/// createInstantiationLoc - Return a new SourceLocation that encodes the fact 388/// that a token from SpellingLoc should actually be referenced from 389/// InstantiationLoc. 390SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc, 391 SourceLocation InstantLoc, 392 unsigned TokLength) { 393 SLocEntryTable.push_back(SLocEntry::get(NextOffset, 394 InstantiationInfo::get(InstantLoc, 395 SpellingLoc))); 396 assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!"); 397 NextOffset += TokLength+1; 398 return SourceLocation::getMacroLoc(NextOffset-(TokLength+1)); 399} 400 401/// getBufferData - Return a pointer to the start and end of the source buffer 402/// data for the specified FileID. 403std::pair<const char*, const char*> 404SourceManager::getBufferData(FileID FID) const { 405 const llvm::MemoryBuffer *Buf = getBuffer(FID); 406 return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd()); 407} 408 409 410//===----------------------------------------------------------------------===// 411// SourceLocation manipulation methods. 412//===----------------------------------------------------------------------===// 413 414/// getFileIDSlow - Return the FileID for a SourceLocation. This is a very hot 415/// method that is used for all SourceManager queries that start with a 416/// SourceLocation object. It is responsible for finding the entry in 417/// SLocEntryTable which contains the specified location. 418/// 419FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const { 420 assert(SLocOffset && "Invalid FileID"); 421 422 // After the first and second level caches, I see two common sorts of 423 // behavior: 1) a lot of searched FileID's are "near" the cached file location 424 // or are "near" the cached instantiation location. 2) others are just 425 // completely random and may be a very long way away. 426 // 427 // To handle this, we do a linear search for up to 8 steps to catch #1 quickly 428 // then we fall back to a less cache efficient, but more scalable, binary 429 // search to find the location. 430 431 // See if this is near the file point - worst case we start scanning from the 432 // most newly created FileID. 433 std::vector<SrcMgr::SLocEntry>::const_iterator I; 434 435 if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) { 436 // Neither loc prunes our search. 437 I = SLocEntryTable.end(); 438 } else { 439 // Perhaps it is near the file point. 440 I = SLocEntryTable.begin()+LastFileIDLookup.ID; 441 } 442 443 // Find the FileID that contains this. "I" is an iterator that points to a 444 // FileID whose offset is known to be larger than SLocOffset. 445 unsigned NumProbes = 0; 446 while (1) { 447 --I; 448 if (I->getOffset() <= SLocOffset) { 449#if 0 450 printf("lin %d -> %d [%s] %d %d\n", SLocOffset, 451 I-SLocEntryTable.begin(), 452 I->isInstantiation() ? "inst" : "file", 453 LastFileIDLookup.ID, int(SLocEntryTable.end()-I)); 454#endif 455 FileID Res = FileID::get(I-SLocEntryTable.begin()); 456 457 // If this isn't an instantiation, remember it. We have good locality 458 // across FileID lookups. 459 if (!I->isInstantiation()) 460 LastFileIDLookup = Res; 461 NumLinearScans += NumProbes+1; 462 return Res; 463 } 464 if (++NumProbes == 8) 465 break; 466 } 467 468 // Convert "I" back into an index. We know that it is an entry whose index is 469 // larger than the offset we are looking for. 470 unsigned GreaterIndex = I-SLocEntryTable.begin(); 471 // LessIndex - This is the lower bound of the range that we're searching. 472 // We know that the offset corresponding to the FileID is is less than 473 // SLocOffset. 474 unsigned LessIndex = 0; 475 NumProbes = 0; 476 while (1) { 477 unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex; 478 unsigned MidOffset = SLocEntryTable[MiddleIndex].getOffset(); 479 480 ++NumProbes; 481 482 // If the offset of the midpoint is too large, chop the high side of the 483 // range to the midpoint. 484 if (MidOffset > SLocOffset) { 485 GreaterIndex = MiddleIndex; 486 continue; 487 } 488 489 // If the middle index contains the value, succeed and return. 490 if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) { 491#if 0 492 printf("bin %d -> %d [%s] %d %d\n", SLocOffset, 493 I-SLocEntryTable.begin(), 494 I->isInstantiation() ? "inst" : "file", 495 LastFileIDLookup.ID, int(SLocEntryTable.end()-I)); 496#endif 497 FileID Res = FileID::get(MiddleIndex); 498 499 // If this isn't an instantiation, remember it. We have good locality 500 // across FileID lookups. 501 if (!I->isInstantiation()) 502 LastFileIDLookup = Res; 503 NumBinaryProbes += NumProbes; 504 return Res; 505 } 506 507 // Otherwise, move the low-side up to the middle index. 508 LessIndex = MiddleIndex; 509 } 510} 511 512SourceLocation SourceManager:: 513getInstantiationLocSlowCase(SourceLocation Loc) const { 514 do { 515 std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc); 516 Loc =getSLocEntry(LocInfo.first).getInstantiation().getInstantiationLoc(); 517 Loc = Loc.getFileLocWithOffset(LocInfo.second); 518 } while (!Loc.isFileID()); 519 520 return Loc; 521} 522 523SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const { 524 do { 525 std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc); 526 Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc(); 527 Loc = Loc.getFileLocWithOffset(LocInfo.second); 528 } while (!Loc.isFileID()); 529 return Loc; 530} 531 532 533std::pair<FileID, unsigned> 534SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E, 535 unsigned Offset) const { 536 // If this is an instantiation record, walk through all the instantiation 537 // points. 538 FileID FID; 539 SourceLocation Loc; 540 do { 541 Loc = E->getInstantiation().getInstantiationLoc(); 542 543 FID = getFileID(Loc); 544 E = &getSLocEntry(FID); 545 Offset += Loc.getOffset()-E->getOffset(); 546 } while (!Loc.isFileID()); 547 548 return std::make_pair(FID, Offset); 549} 550 551std::pair<FileID, unsigned> 552SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E, 553 unsigned Offset) const { 554 // If this is an instantiation record, walk through all the instantiation 555 // points. 556 FileID FID; 557 SourceLocation Loc; 558 do { 559 Loc = E->getInstantiation().getSpellingLoc(); 560 561 FID = getFileID(Loc); 562 E = &getSLocEntry(FID); 563 Offset += Loc.getOffset()-E->getOffset(); 564 } while (!Loc.isFileID()); 565 566 return std::make_pair(FID, Offset); 567} 568 569 570//===----------------------------------------------------------------------===// 571// Queries about the code at a SourceLocation. 572//===----------------------------------------------------------------------===// 573 574/// getCharacterData - Return a pointer to the start of the specified location 575/// in the appropriate MemoryBuffer. 576const char *SourceManager::getCharacterData(SourceLocation SL) const { 577 // Note that this is a hot function in the getSpelling() path, which is 578 // heavily used by -E mode. 579 std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL); 580 581 // Note that calling 'getBuffer()' may lazily page in a source file. 582 return getSLocEntry(LocInfo.first).getFile().getContentCache() 583 ->getBuffer()->getBufferStart() + LocInfo.second; 584} 585 586 587/// getColumnNumber - Return the column # for the specified file position. 588/// this is significantly cheaper to compute than the line number. 589unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const { 590 const char *Buf = getBuffer(FID)->getBufferStart(); 591 592 unsigned LineStart = FilePos; 593 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r') 594 --LineStart; 595 return FilePos-LineStart+1; 596} 597 598unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const { 599 if (Loc.isInvalid()) return 0; 600 std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc); 601 return getColumnNumber(LocInfo.first, LocInfo.second); 602} 603 604unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const { 605 if (Loc.isInvalid()) return 0; 606 std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc); 607 return getColumnNumber(LocInfo.first, LocInfo.second); 608} 609 610 611 612static void ComputeLineNumbers(ContentCache* FI, 613 llvm::BumpPtrAllocator &Alloc) DISABLE_INLINE; 614static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){ 615 // Note that calling 'getBuffer()' may lazily page in the file. 616 const MemoryBuffer *Buffer = FI->getBuffer(); 617 618 // Find the file offsets of all of the *physical* source lines. This does 619 // not look at trigraphs, escaped newlines, or anything else tricky. 620 std::vector<unsigned> LineOffsets; 621 622 // Line #1 starts at char 0. 623 LineOffsets.push_back(0); 624 625 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); 626 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); 627 unsigned Offs = 0; 628 while (1) { 629 // Skip over the contents of the line. 630 // TODO: Vectorize this? This is very performance sensitive for programs 631 // with lots of diagnostics and in -E mode. 632 const unsigned char *NextBuf = (const unsigned char *)Buf; 633 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') 634 ++NextBuf; 635 Offs += NextBuf-Buf; 636 Buf = NextBuf; 637 638 if (Buf[0] == '\n' || Buf[0] == '\r') { 639 // If this is \n\r or \r\n, skip both characters. 640 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) 641 ++Offs, ++Buf; 642 ++Offs, ++Buf; 643 LineOffsets.push_back(Offs); 644 } else { 645 // Otherwise, this is a null. If end of file, exit. 646 if (Buf == End) break; 647 // Otherwise, skip the null. 648 ++Offs, ++Buf; 649 } 650 } 651 652 // Copy the offsets into the FileInfo structure. 653 FI->NumLines = LineOffsets.size(); 654 FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size()); 655 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache); 656} 657 658/// getLineNumber - Given a SourceLocation, return the spelling line number 659/// for the position indicated. This requires building and caching a table of 660/// line offsets for the MemoryBuffer, so this is not cheap: use only when 661/// about to emit a diagnostic. 662unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const { 663 ContentCache *Content; 664 if (LastLineNoFileIDQuery == FID) 665 Content = LastLineNoContentCache; 666 else 667 Content = const_cast<ContentCache*>(getSLocEntry(FID) 668 .getFile().getContentCache()); 669 670 // If this is the first use of line information for this buffer, compute the 671 /// SourceLineCache for it on demand. 672 if (Content->SourceLineCache == 0) 673 ComputeLineNumbers(Content, ContentCacheAlloc); 674 675 // Okay, we know we have a line number table. Do a binary search to find the 676 // line number that this character position lands on. 677 unsigned *SourceLineCache = Content->SourceLineCache; 678 unsigned *SourceLineCacheStart = SourceLineCache; 679 unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines; 680 681 unsigned QueriedFilePos = FilePos+1; 682 683 // If the previous query was to the same file, we know both the file pos from 684 // that query and the line number returned. This allows us to narrow the 685 // search space from the entire file to something near the match. 686 if (LastLineNoFileIDQuery == FID) { 687 if (QueriedFilePos >= LastLineNoFilePos) { 688 SourceLineCache = SourceLineCache+LastLineNoResult-1; 689 690 // The query is likely to be nearby the previous one. Here we check to 691 // see if it is within 5, 10 or 20 lines. It can be far away in cases 692 // where big comment blocks and vertical whitespace eat up lines but 693 // contribute no tokens. 694 if (SourceLineCache+5 < SourceLineCacheEnd) { 695 if (SourceLineCache[5] > QueriedFilePos) 696 SourceLineCacheEnd = SourceLineCache+5; 697 else if (SourceLineCache+10 < SourceLineCacheEnd) { 698 if (SourceLineCache[10] > QueriedFilePos) 699 SourceLineCacheEnd = SourceLineCache+10; 700 else if (SourceLineCache+20 < SourceLineCacheEnd) { 701 if (SourceLineCache[20] > QueriedFilePos) 702 SourceLineCacheEnd = SourceLineCache+20; 703 } 704 } 705 } 706 } else { 707 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1; 708 } 709 } 710 711 // If the spread is large, do a "radix" test as our initial guess, based on 712 // the assumption that lines average to approximately the same length. 713 // NOTE: This is currently disabled, as it does not appear to be profitable in 714 // initial measurements. 715 if (0 && SourceLineCacheEnd-SourceLineCache > 20) { 716 unsigned FileLen = Content->SourceLineCache[Content->NumLines-1]; 717 718 // Take a stab at guessing where it is. 719 unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen; 720 721 // Check for -10 and +10 lines. 722 unsigned LowerBound = std::max(int(ApproxPos-10), 0); 723 unsigned UpperBound = std::min(ApproxPos+10, FileLen); 724 725 // If the computed lower bound is less than the query location, move it in. 726 if (SourceLineCache < SourceLineCacheStart+LowerBound && 727 SourceLineCacheStart[LowerBound] < QueriedFilePos) 728 SourceLineCache = SourceLineCacheStart+LowerBound; 729 730 // If the computed upper bound is greater than the query location, move it. 731 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound && 732 SourceLineCacheStart[UpperBound] >= QueriedFilePos) 733 SourceLineCacheEnd = SourceLineCacheStart+UpperBound; 734 } 735 736 unsigned *Pos 737 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos); 738 unsigned LineNo = Pos-SourceLineCacheStart; 739 740 LastLineNoFileIDQuery = FID; 741 LastLineNoContentCache = Content; 742 LastLineNoFilePos = QueriedFilePos; 743 LastLineNoResult = LineNo; 744 return LineNo; 745} 746 747unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const { 748 if (Loc.isInvalid()) return 0; 749 std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc); 750 return getLineNumber(LocInfo.first, LocInfo.second); 751} 752unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const { 753 if (Loc.isInvalid()) return 0; 754 std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc); 755 return getLineNumber(LocInfo.first, LocInfo.second); 756} 757 758/// getFileCharacteristic - return the file characteristic of the specified 759/// source location, indicating whether this is a normal file, a system 760/// header, or an "implicit extern C" system header. 761/// 762/// This state can be modified with flags on GNU linemarker directives like: 763/// # 4 "foo.h" 3 764/// which changes all source locations in the current file after that to be 765/// considered to be from a system header. 766SrcMgr::CharacteristicKind 767SourceManager::getFileCharacteristic(SourceLocation Loc) const { 768 assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!"); 769 std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc); 770 const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile(); 771 772 // If there are no #line directives in this file, just return the whole-file 773 // state. 774 if (!FI.hasLineDirectives()) 775 return FI.getFileCharacteristic(); 776 777 assert(LineTable && "Can't have linetable entries without a LineTable!"); 778 // See if there is a #line directive before the location. 779 const LineEntry *Entry = 780 LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second); 781 782 // If this is before the first line marker, use the file characteristic. 783 if (!Entry) 784 return FI.getFileCharacteristic(); 785 786 return Entry->FileKind; 787} 788 789 790/// getPresumedLoc - This method returns the "presumed" location of a 791/// SourceLocation specifies. A "presumed location" can be modified by #line 792/// or GNU line marker directives. This provides a view on the data that a 793/// user should see in diagnostics, for example. 794/// 795/// Note that a presumed location is always given as the instantiation point 796/// of an instantiation location, not at the spelling location. 797PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const { 798 if (Loc.isInvalid()) return PresumedLoc(); 799 800 // Presumed locations are always for instantiation points. 801 std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc); 802 803 const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile(); 804 const SrcMgr::ContentCache *C = FI.getContentCache(); 805 806 // To get the source name, first consult the FileEntry (if one exists) 807 // before the MemBuffer as this will avoid unnecessarily paging in the 808 // MemBuffer. 809 const char *Filename = 810 C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier(); 811 unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second); 812 unsigned ColNo = getColumnNumber(LocInfo.first, LocInfo.second); 813 SourceLocation IncludeLoc = FI.getIncludeLoc(); 814 815 // If we have #line directives in this file, update and overwrite the physical 816 // location info if appropriate. 817 if (FI.hasLineDirectives()) { 818 assert(LineTable && "Can't have linetable entries without a LineTable!"); 819 // See if there is a #line directive before this. If so, get it. 820 if (const LineEntry *Entry = 821 LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) { 822 // If the LineEntry indicates a filename, use it. 823 if (Entry->FilenameID != -1) 824 Filename = LineTable->getFilename(Entry->FilenameID); 825 826 // Use the line number specified by the LineEntry. This line number may 827 // be multiple lines down from the line entry. Add the difference in 828 // physical line numbers from the query point and the line marker to the 829 // total. 830 unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset); 831 LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1); 832 833 // Note that column numbers are not molested by line markers. 834 } 835 } 836 837 return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc); 838} 839 840//===----------------------------------------------------------------------===// 841// Other miscellaneous methods. 842//===----------------------------------------------------------------------===// 843 844 845/// PrintStats - Print statistics to stderr. 846/// 847void SourceManager::PrintStats() const { 848 llvm::cerr << "\n*** Source Manager Stats:\n"; 849 llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size() 850 << " mem buffers mapped.\n"; 851 llvm::cerr << SLocEntryTable.size() << " SLocEntry's allocated, " 852 << NextOffset << "B of Sloc address space used.\n"; 853 854 unsigned NumLineNumsComputed = 0; 855 unsigned NumFileBytesMapped = 0; 856 for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){ 857 NumLineNumsComputed += I->second->SourceLineCache != 0; 858 NumFileBytesMapped += I->second->getSizeBytesMapped(); 859 } 860 861 llvm::cerr << NumFileBytesMapped << " bytes of files mapped, " 862 << NumLineNumsComputed << " files with line #'s computed.\n"; 863 llvm::cerr << "FileID scans: " << NumLinearScans << " linear, " 864 << NumBinaryProbes << " binary.\n"; 865} 866 867//===----------------------------------------------------------------------===// 868// Serialization. 869//===----------------------------------------------------------------------===// 870 871void ContentCache::Emit(llvm::Serializer& S) const { 872 S.FlushRecord(); 873 S.EmitPtr(this); 874 875 if (Entry) { 876 llvm::sys::Path Fname(Buffer->getBufferIdentifier()); 877 878 if (Fname.isAbsolute()) 879 S.EmitCStr(Fname.c_str()); 880 else { 881 // Create an absolute path. 882 // FIXME: This will potentially contain ".." and "." in the path. 883 llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory(); 884 path.appendComponent(Fname.c_str()); 885 S.EmitCStr(path.c_str()); 886 } 887 } 888 else { 889 const char* p = Buffer->getBufferStart(); 890 const char* e = Buffer->getBufferEnd(); 891 892 S.EmitInt(e-p); 893 894 for ( ; p != e; ++p) 895 S.EmitInt(*p); 896 } 897 898 S.FlushRecord(); 899} 900 901void ContentCache::ReadToSourceManager(llvm::Deserializer& D, 902 SourceManager& SMgr, 903 FileManager* FMgr, 904 std::vector<char>& Buf) { 905 if (FMgr) { 906 llvm::SerializedPtrID PtrID = D.ReadPtrID(); 907 D.ReadCStr(Buf,false); 908 909 // Create/fetch the FileEntry. 910 const char* start = &Buf[0]; 911 const FileEntry* E = FMgr->getFile(start,start+Buf.size()); 912 913 // FIXME: Ideally we want a lazy materialization of the ContentCache 914 // anyway, because we don't want to read in source files unless this 915 // is absolutely needed. 916 if (!E) 917 D.RegisterPtr(PtrID,NULL); 918 else 919 // Get the ContextCache object and register it with the deserializer. 920 D.RegisterPtr(PtrID, SMgr.getOrCreateContentCache(E)); 921 return; 922 } 923 924 // Register the ContextCache object with the deserializer. 925 /* FIXME: 926 ContentCache *Entry 927 SMgr.MemBufferInfos.push_back(ContentCache()); 928 = const_cast<ContentCache&>(SMgr.MemBufferInfos.back()); 929 D.RegisterPtr(&Entry); 930 931 // Create the buffer. 932 unsigned Size = D.ReadInt(); 933 Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size); 934 935 // Read the contents of the buffer. 936 char* p = const_cast<char*>(Entry.Buffer->getBufferStart()); 937 for (unsigned i = 0; i < Size ; ++i) 938 p[i] = D.ReadInt(); 939 */ 940} 941 942void SourceManager::Emit(llvm::Serializer& S) const { 943 S.EnterBlock(); 944 S.EmitPtr(this); 945 S.EmitInt(MainFileID.getOpaqueValue()); 946 947 // Emit: FileInfos. Just emit the file name. 948 S.EnterBlock(); 949 950 // FIXME: Emit FileInfos. 951 //std::for_each(FileInfos.begin(), FileInfos.end(), 952 // S.MakeEmitter<ContentCache>()); 953 954 S.ExitBlock(); 955 956 // Emit: MemBufferInfos 957 S.EnterBlock(); 958 959 /* FIXME: EMIT. 960 std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(), 961 S.MakeEmitter<ContentCache>()); 962 */ 963 964 S.ExitBlock(); 965 966 // FIXME: Emit SLocEntryTable. 967 968 S.ExitBlock(); 969} 970 971SourceManager* 972SourceManager::CreateAndRegister(llvm::Deserializer &D, FileManager &FMgr) { 973 SourceManager *M = new SourceManager(); 974 D.RegisterPtr(M); 975 976 // Read: the FileID of the main source file of the translation unit. 977 M->MainFileID = FileID::get(D.ReadInt()); 978 979 std::vector<char> Buf; 980 981 /*{ // FIXME Read: FileInfos. 982 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 983 while (!D.FinishedBlock(BLoc)) 984 ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf); 985 }*/ 986 987 { // Read: MemBufferInfos. 988 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 989 while (!D.FinishedBlock(BLoc)) 990 ContentCache::ReadToSourceManager(D,*M,NULL,Buf); 991 } 992 993 // FIXME: Read SLocEntryTable. 994 995 return M; 996} 997