SourceManager.cpp revision 88054dee0402e4d3c1f64e6b697acc47195c0d72
1//===--- SourceManager.cpp - Track and cache source files -----------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "clang/Basic/SourceManager.h" 15#include "clang/Basic/FileManager.h" 16#include "llvm/Support/Compiler.h" 17#include "llvm/Support/MemoryBuffer.h" 18#include "llvm/System/Path.h" 19#include "llvm/Bitcode/Serialize.h" 20#include "llvm/Bitcode/Deserialize.h" 21#include "llvm/Support/Streams.h" 22#include <algorithm> 23using namespace clang; 24using namespace SrcMgr; 25using llvm::MemoryBuffer; 26 27// This (temporary) directive toggles between lazy and eager creation of 28// MemBuffers. This directive is not permanent, and is here to test a few 29// potential optimizations in PTH. Once it is clear whether eager or lazy 30// creation of MemBuffers is better this directive will get removed. 31#define LAZY 32 33ContentCache::~ContentCache() { 34 delete Buffer; 35 delete [] SourceLineCache; 36} 37 38/// getSizeBytesMapped - Returns the number of bytes actually mapped for 39/// this ContentCache. This can be 0 if the MemBuffer was not actually 40/// instantiated. 41unsigned ContentCache::getSizeBytesMapped() const { 42 return Buffer ? Buffer->getBufferSize() : 0; 43} 44 45/// getSize - Returns the size of the content encapsulated by this ContentCache. 46/// This can be the size of the source file or the size of an arbitrary 47/// scratch buffer. If the ContentCache encapsulates a source file, that 48/// file is not lazily brought in from disk to satisfy this query. 49unsigned ContentCache::getSize() const { 50 return Entry ? Entry->getSize() : Buffer->getBufferSize(); 51} 52 53const llvm::MemoryBuffer* ContentCache::getBuffer() const { 54#ifdef LAZY 55 // Lazily create the Buffer for ContentCaches that wrap files. 56 if (!Buffer && Entry) { 57 // FIXME: Should we support a way to not have to do this check over 58 // and over if we cannot open the file? 59 // FIXME: This const_cast is ugly. Should we make getBuffer() non-const? 60 const_cast<ContentCache*>(this)->Buffer = 61 MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize()); 62 } 63#endif 64 return Buffer; 65} 66 67 68/// getFileInfo - Create or return a cached FileInfo for the specified file. 69/// 70const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) { 71 72 assert(FileEnt && "Didn't specify a file entry to use?"); 73 // Do we already have information about this file? 74 std::set<ContentCache>::iterator I = 75 FileInfos.lower_bound(ContentCache(FileEnt)); 76 77 if (I != FileInfos.end() && I->Entry == FileEnt) 78 return &*I; 79 80 // Nope, get information. 81#ifndef LAZY 82 const MemoryBuffer *File = 83 MemoryBuffer::getFile(FileEnt->getName(), 0, FileEnt->getSize()); 84 if (File == 0) 85 return 0; 86#endif 87 88 ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt)); 89#ifndef LAZY 90 Entry.setBuffer(File); 91#endif 92 Entry.SourceLineCache = 0; 93 Entry.NumLines = 0; 94 return &Entry; 95} 96 97 98/// createMemBufferContentCache - Create a new ContentCache for the specified 99/// memory buffer. This does no caching. 100const ContentCache* 101SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) { 102 // Add a new ContentCache to the MemBufferInfos list and return it. We 103 // must default construct the object first that the instance actually 104 // stored within MemBufferInfos actually owns the Buffer, and not any 105 // temporary we would use in the call to "push_back". 106 MemBufferInfos.push_back(ContentCache()); 107 ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back()); 108 Entry.setBuffer(Buffer); 109 return &Entry; 110} 111 112 113/// createFileID - Create a new fileID for the specified ContentCache and 114/// include position. This works regardless of whether the ContentCache 115/// corresponds to a file or some other input source. 116unsigned SourceManager::createFileID(const ContentCache *File, 117 SourceLocation IncludePos, 118 SrcMgr::CharacteristicKind FileCharacter) { 119 // If FileEnt is really large (e.g. it's a large .i file), we may not be able 120 // to fit an arbitrary position in the file in the FilePos field. To handle 121 // this, we create one FileID for each chunk of the file that fits in a 122 // FilePos field. 123 unsigned FileSize = File->getSize(); 124 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) { 125 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File, FileCharacter)); 126 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && 127 "Ran out of file ID's!"); 128 return FileIDs.size(); 129 } 130 131 // Create one FileID for each chunk of the file. 132 unsigned Result = FileIDs.size()+1; 133 134 unsigned ChunkNo = 0; 135 while (1) { 136 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File, 137 FileCharacter)); 138 139 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break; 140 FileSize -= (1 << SourceLocation::FilePosBits); 141 } 142 143 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && 144 "Ran out of file ID's!"); 145 return Result; 146} 147 148/// getInstantiationLoc - Return a new SourceLocation that encodes the fact 149/// that a token from SpellingLoc should actually be referenced from 150/// InstantiationLoc. 151SourceLocation SourceManager::getInstantiationLoc(SourceLocation SpellingLoc, 152 SourceLocation InstantLoc) { 153 // The specified source location may be a mapped location, due to a macro 154 // instantiation or #line directive. Strip off this information to find out 155 // where the characters are actually located. 156 SpellingLoc = getSpellingLoc(SpellingLoc); 157 158 // Resolve InstantLoc down to a real logical location. 159 InstantLoc = getLogicalLoc(InstantLoc); 160 161 162 // If the last macro id is close to the currently requested location, try to 163 // reuse it. This implements a small cache. 164 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){ 165 MacroIDInfo &LastOne = MacroIDs[i]; 166 167 // The instanitation point and source SpellingLoc have to exactly match to 168 // reuse (for now). We could allow "nearby" instantiations in the future. 169 if (LastOne.getInstantiationLoc() != InstantLoc || 170 LastOne.getSpellingLoc().getFileID() != SpellingLoc.getFileID()) 171 continue; 172 173 // Check to see if the spellloc of the token came from near enough to reuse. 174 int SpellDelta = SpellingLoc.getRawFilePos() - 175 LastOne.getSpellingLoc().getRawFilePos(); 176 if (SourceLocation::isValidMacroSpellingOffs(SpellDelta)) 177 return SourceLocation::getMacroLoc(i, SpellDelta); 178 } 179 180 181 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, SpellingLoc)); 182 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0); 183} 184 185/// getBufferData - Return a pointer to the start and end of the character 186/// data for the specified FileID. 187std::pair<const char*, const char*> 188SourceManager::getBufferData(unsigned FileID) const { 189 const llvm::MemoryBuffer *Buf = getBuffer(FileID); 190 return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd()); 191} 192 193 194/// getCharacterData - Return a pointer to the start of the specified location 195/// in the appropriate MemoryBuffer. 196const char *SourceManager::getCharacterData(SourceLocation SL) const { 197 // Note that this is a hot function in the getSpelling() path, which is 198 // heavily used by -E mode. 199 SL = getSpellingLoc(SL); 200 201 // Note that calling 'getBuffer()' may lazily page in a source file. 202 return getContentCache(SL.getFileID())->getBuffer()->getBufferStart() + 203 getFullFilePos(SL); 204} 205 206 207/// getColumnNumber - Return the column # for the specified file position. 208/// this is significantly cheaper to compute than the line number. This returns 209/// zero if the column number isn't known. 210unsigned SourceManager::getColumnNumber(SourceLocation Loc) const { 211 unsigned FileID = Loc.getFileID(); 212 if (FileID == 0) return 0; 213 214 unsigned FilePos = getFullFilePos(Loc); 215 const MemoryBuffer *Buffer = getBuffer(FileID); 216 const char *Buf = Buffer->getBufferStart(); 217 218 unsigned LineStart = FilePos; 219 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r') 220 --LineStart; 221 return FilePos-LineStart+1; 222} 223 224/// getSourceName - This method returns the name of the file or buffer that 225/// the SourceLocation specifies. This can be modified with #line directives, 226/// etc. 227const char *SourceManager::getSourceName(SourceLocation Loc) const { 228 unsigned FileID = Loc.getFileID(); 229 if (FileID == 0) return ""; 230 231 // To get the source name, first consult the FileEntry (if one exists) before 232 // the MemBuffer as this will avoid unnecessarily paging in the MemBuffer. 233 const SrcMgr::ContentCache* C = getContentCache(FileID); 234 return C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier(); 235} 236 237static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE; 238static void ComputeLineNumbers(ContentCache* FI) { 239 // Note that calling 'getBuffer()' may lazily page in the file. 240 const MemoryBuffer *Buffer = FI->getBuffer(); 241 242 // Find the file offsets of all of the *physical* source lines. This does 243 // not look at trigraphs, escaped newlines, or anything else tricky. 244 std::vector<unsigned> LineOffsets; 245 246 // Line #1 starts at char 0. 247 LineOffsets.push_back(0); 248 249 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); 250 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); 251 unsigned Offs = 0; 252 while (1) { 253 // Skip over the contents of the line. 254 // TODO: Vectorize this? This is very performance sensitive for programs 255 // with lots of diagnostics and in -E mode. 256 const unsigned char *NextBuf = (const unsigned char *)Buf; 257 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') 258 ++NextBuf; 259 Offs += NextBuf-Buf; 260 Buf = NextBuf; 261 262 if (Buf[0] == '\n' || Buf[0] == '\r') { 263 // If this is \n\r or \r\n, skip both characters. 264 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) 265 ++Offs, ++Buf; 266 ++Offs, ++Buf; 267 LineOffsets.push_back(Offs); 268 } else { 269 // Otherwise, this is a null. If end of file, exit. 270 if (Buf == End) break; 271 // Otherwise, skip the null. 272 ++Offs, ++Buf; 273 } 274 } 275 276 // Copy the offsets into the FileInfo structure. 277 FI->NumLines = LineOffsets.size(); 278 FI->SourceLineCache = new unsigned[LineOffsets.size()]; 279 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache); 280} 281 282/// getLineNumber - Given a SourceLocation, return the spelling line number 283/// for the position indicated. This requires building and caching a table of 284/// line offsets for the MemoryBuffer, so this is not cheap: use only when 285/// about to emit a diagnostic. 286unsigned SourceManager::getLineNumber(SourceLocation Loc) const { 287 unsigned FileID = Loc.getFileID(); 288 if (FileID == 0) return 0; 289 290 ContentCache* Content; 291 292 if (LastLineNoFileIDQuery == FileID) 293 Content = LastLineNoContentCache; 294 else 295 Content = const_cast<ContentCache*>(getContentCache(FileID)); 296 297 // If this is the first use of line information for this buffer, compute the 298 /// SourceLineCache for it on demand. 299 if (Content->SourceLineCache == 0) 300 ComputeLineNumbers(Content); 301 302 // Okay, we know we have a line number table. Do a binary search to find the 303 // line number that this character position lands on. 304 unsigned *SourceLineCache = Content->SourceLineCache; 305 unsigned *SourceLineCacheStart = SourceLineCache; 306 unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines; 307 308 unsigned QueriedFilePos = getFullFilePos(Loc)+1; 309 310 // If the previous query was to the same file, we know both the file pos from 311 // that query and the line number returned. This allows us to narrow the 312 // search space from the entire file to something near the match. 313 if (LastLineNoFileIDQuery == FileID) { 314 if (QueriedFilePos >= LastLineNoFilePos) { 315 SourceLineCache = SourceLineCache+LastLineNoResult-1; 316 317 // The query is likely to be nearby the previous one. Here we check to 318 // see if it is within 5, 10 or 20 lines. It can be far away in cases 319 // where big comment blocks and vertical whitespace eat up lines but 320 // contribute no tokens. 321 if (SourceLineCache+5 < SourceLineCacheEnd) { 322 if (SourceLineCache[5] > QueriedFilePos) 323 SourceLineCacheEnd = SourceLineCache+5; 324 else if (SourceLineCache+10 < SourceLineCacheEnd) { 325 if (SourceLineCache[10] > QueriedFilePos) 326 SourceLineCacheEnd = SourceLineCache+10; 327 else if (SourceLineCache+20 < SourceLineCacheEnd) { 328 if (SourceLineCache[20] > QueriedFilePos) 329 SourceLineCacheEnd = SourceLineCache+20; 330 } 331 } 332 } 333 } else { 334 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1; 335 } 336 } 337 338 // If the spread is large, do a "radix" test as our initial guess, based on 339 // the assumption that lines average to approximately the same length. 340 // NOTE: This is currently disabled, as it does not appear to be profitable in 341 // initial measurements. 342 if (0 && SourceLineCacheEnd-SourceLineCache > 20) { 343 unsigned FileLen = Content->SourceLineCache[Content->NumLines-1]; 344 345 // Take a stab at guessing where it is. 346 unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen; 347 348 // Check for -10 and +10 lines. 349 unsigned LowerBound = std::max(int(ApproxPos-10), 0); 350 unsigned UpperBound = std::min(ApproxPos+10, FileLen); 351 352 // If the computed lower bound is less than the query location, move it in. 353 if (SourceLineCache < SourceLineCacheStart+LowerBound && 354 SourceLineCacheStart[LowerBound] < QueriedFilePos) 355 SourceLineCache = SourceLineCacheStart+LowerBound; 356 357 // If the computed upper bound is greater than the query location, move it. 358 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound && 359 SourceLineCacheStart[UpperBound] >= QueriedFilePos) 360 SourceLineCacheEnd = SourceLineCacheStart+UpperBound; 361 } 362 363 unsigned *Pos 364 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos); 365 unsigned LineNo = Pos-SourceLineCacheStart; 366 367 LastLineNoFileIDQuery = FileID; 368 LastLineNoContentCache = Content; 369 LastLineNoFilePos = QueriedFilePos; 370 LastLineNoResult = LineNo; 371 return LineNo; 372} 373 374/// PrintStats - Print statistics to stderr. 375/// 376void SourceManager::PrintStats() const { 377 llvm::cerr << "\n*** Source Manager Stats:\n"; 378 llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size() 379 << " mem buffers mapped, " << FileIDs.size() 380 << " file ID's allocated.\n"; 381 llvm::cerr << " " << FileIDs.size() << " normal buffer FileID's, " 382 << MacroIDs.size() << " macro expansion FileID's.\n"; 383 384 unsigned NumLineNumsComputed = 0; 385 unsigned NumFileBytesMapped = 0; 386 for (std::set<ContentCache>::const_iterator I = 387 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) { 388 NumLineNumsComputed += I->SourceLineCache != 0; 389 NumFileBytesMapped += I->getSizeBytesMapped(); 390 } 391 392 llvm::cerr << NumFileBytesMapped << " bytes of files mapped, " 393 << NumLineNumsComputed << " files with line #'s computed.\n"; 394} 395 396//===----------------------------------------------------------------------===// 397// Serialization. 398//===----------------------------------------------------------------------===// 399 400void ContentCache::Emit(llvm::Serializer& S) const { 401 S.FlushRecord(); 402 S.EmitPtr(this); 403 404 if (Entry) { 405 llvm::sys::Path Fname(Buffer->getBufferIdentifier()); 406 407 if (Fname.isAbsolute()) 408 S.EmitCStr(Fname.c_str()); 409 else { 410 // Create an absolute path. 411 // FIXME: This will potentially contain ".." and "." in the path. 412 llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory(); 413 path.appendComponent(Fname.c_str()); 414 S.EmitCStr(path.c_str()); 415 } 416 } 417 else { 418 const char* p = Buffer->getBufferStart(); 419 const char* e = Buffer->getBufferEnd(); 420 421 S.EmitInt(e-p); 422 423 for ( ; p != e; ++p) 424 S.EmitInt(*p); 425 } 426 427 S.FlushRecord(); 428} 429 430void ContentCache::ReadToSourceManager(llvm::Deserializer& D, 431 SourceManager& SMgr, 432 FileManager* FMgr, 433 std::vector<char>& Buf) { 434 if (FMgr) { 435 llvm::SerializedPtrID PtrID = D.ReadPtrID(); 436 D.ReadCStr(Buf,false); 437 438 // Create/fetch the FileEntry. 439 const char* start = &Buf[0]; 440 const FileEntry* E = FMgr->getFile(start,start+Buf.size()); 441 442 // FIXME: Ideally we want a lazy materialization of the ContentCache 443 // anyway, because we don't want to read in source files unless this 444 // is absolutely needed. 445 if (!E) 446 D.RegisterPtr(PtrID,NULL); 447 else 448 // Get the ContextCache object and register it with the deserializer. 449 D.RegisterPtr(PtrID,SMgr.getContentCache(E)); 450 } 451 else { 452 // Register the ContextCache object with the deserializer. 453 SMgr.MemBufferInfos.push_back(ContentCache()); 454 ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back()); 455 D.RegisterPtr(&Entry); 456 457 // Create the buffer. 458 unsigned Size = D.ReadInt(); 459 Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size); 460 461 // Read the contents of the buffer. 462 char* p = const_cast<char*>(Entry.Buffer->getBufferStart()); 463 for (unsigned i = 0; i < Size ; ++i) 464 p[i] = D.ReadInt(); 465 } 466} 467 468void FileIDInfo::Emit(llvm::Serializer& S) const { 469 S.Emit(IncludeLoc); 470 S.EmitInt(ChunkNo); 471 S.EmitPtr(Content); 472} 473 474FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) { 475 FileIDInfo I; 476 I.IncludeLoc = SourceLocation::ReadVal(D); 477 I.ChunkNo = D.ReadInt(); 478 D.ReadPtr(I.Content,false); 479 return I; 480} 481 482void MacroIDInfo::Emit(llvm::Serializer& S) const { 483 S.Emit(InstantiationLoc); 484 S.Emit(SpellingLoc); 485} 486 487MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) { 488 MacroIDInfo I; 489 I.InstantiationLoc = SourceLocation::ReadVal(D); 490 I.SpellingLoc = SourceLocation::ReadVal(D); 491 return I; 492} 493 494void SourceManager::Emit(llvm::Serializer& S) const { 495 S.EnterBlock(); 496 S.EmitPtr(this); 497 S.EmitInt(MainFileID); 498 499 // Emit: FileInfos. Just emit the file name. 500 S.EnterBlock(); 501 502 std::for_each(FileInfos.begin(),FileInfos.end(), 503 S.MakeEmitter<ContentCache>()); 504 505 S.ExitBlock(); 506 507 // Emit: MemBufferInfos 508 S.EnterBlock(); 509 510 std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(), 511 S.MakeEmitter<ContentCache>()); 512 513 S.ExitBlock(); 514 515 // Emit: FileIDs 516 S.EmitInt(FileIDs.size()); 517 std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>()); 518 519 // Emit: MacroIDs 520 S.EmitInt(MacroIDs.size()); 521 std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>()); 522 523 S.ExitBlock(); 524} 525 526SourceManager* 527SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){ 528 SourceManager *M = new SourceManager(); 529 D.RegisterPtr(M); 530 531 // Read: the FileID of the main source file of the translation unit. 532 M->MainFileID = D.ReadInt(); 533 534 std::vector<char> Buf; 535 536 { // Read: FileInfos. 537 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 538 while (!D.FinishedBlock(BLoc)) 539 ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf); 540 } 541 542 { // Read: MemBufferInfos. 543 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 544 while (!D.FinishedBlock(BLoc)) 545 ContentCache::ReadToSourceManager(D,*M,NULL,Buf); 546 } 547 548 // Read: FileIDs. 549 unsigned Size = D.ReadInt(); 550 M->FileIDs.reserve(Size); 551 for (; Size > 0 ; --Size) 552 M->FileIDs.push_back(FileIDInfo::ReadVal(D)); 553 554 // Read: MacroIDs. 555 Size = D.ReadInt(); 556 M->MacroIDs.reserve(Size); 557 for (; Size > 0 ; --Size) 558 M->MacroIDs.push_back(MacroIDInfo::ReadVal(D)); 559 560 return M; 561} 562