SourceManager.cpp revision a90a4d4a0a365f991b92e925436ec63ef4969839
1//===--- SourceManager.cpp - Track and cache source files -----------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "clang/Basic/SourceManager.h" 15#include "clang/Basic/FileManager.h" 16#include "llvm/Support/Compiler.h" 17#include "llvm/Support/MemoryBuffer.h" 18#include "llvm/System/Path.h" 19#include "llvm/Bitcode/Serialize.h" 20#include "llvm/Bitcode/Deserialize.h" 21#include "llvm/Support/Streams.h" 22#include <algorithm> 23using namespace clang; 24using namespace SrcMgr; 25using llvm::MemoryBuffer; 26 27// This (temporary) directive toggles between lazy and eager creation of 28// MemBuffers. This directive is not permanent, and is here to test a few 29// potential optimizations in PTH. Once it is clear whether eager or lazy 30// creation of MemBuffers is better this directive will get removed. 31#define LAZY 32 33ContentCache::~ContentCache() { 34 delete Buffer; 35 delete [] SourceLineCache; 36} 37 38/// getSizeBytesMapped - Returns the number of bytes actually mapped for 39/// this ContentCache. This can be 0 if the MemBuffer was not actually 40/// instantiated. 41unsigned ContentCache::getSizeBytesMapped() const { 42 return Buffer ? Buffer->getBufferSize() : 0; 43} 44 45/// getSize - Returns the size of the content encapsulated by this ContentCache. 46/// This can be the size of the source file or the size of an arbitrary 47/// scratch buffer. If the ContentCache encapsulates a source file, that 48/// file is not lazily brought in from disk to satisfy this query. 49unsigned ContentCache::getSize() const { 50 return Entry ? Entry->getSize() : Buffer->getBufferSize(); 51} 52 53const llvm::MemoryBuffer* ContentCache::getBuffer() const { 54#ifdef LAZY 55 // Lazily create the Buffer for ContentCaches that wrap files. 56 if (!Buffer && Entry) { 57 // FIXME: Should we support a way to not have to do this check over 58 // and over if we cannot open the file? 59 Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize()); 60 } 61#endif 62 return Buffer; 63} 64 65 66/// getFileInfo - Create or return a cached FileInfo for the specified file. 67/// 68const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) { 69 70 assert(FileEnt && "Didn't specify a file entry to use?"); 71 // Do we already have information about this file? 72 std::set<ContentCache>::iterator I = 73 FileInfos.lower_bound(ContentCache(FileEnt)); 74 75 if (I != FileInfos.end() && I->Entry == FileEnt) 76 return &*I; 77 78 // Nope, get information. 79#ifndef LAZY 80 const MemoryBuffer *File = 81 MemoryBuffer::getFile(FileEnt->getName(), 0, FileEnt->getSize()); 82 if (File == 0) 83 return 0; 84#endif 85 86 ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt)); 87#ifndef LAZY 88 Entry.setBuffer(File); 89#endif 90 Entry.SourceLineCache = 0; 91 Entry.NumLines = 0; 92 return &Entry; 93} 94 95 96/// createMemBufferContentCache - Create a new ContentCache for the specified 97/// memory buffer. This does no caching. 98const ContentCache* 99SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) { 100 // Add a new ContentCache to the MemBufferInfos list and return it. We 101 // must default construct the object first that the instance actually 102 // stored within MemBufferInfos actually owns the Buffer, and not any 103 // temporary we would use in the call to "push_back". 104 MemBufferInfos.push_back(ContentCache()); 105 ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back()); 106 Entry.setBuffer(Buffer); 107 return &Entry; 108} 109 110 111/// createFileID - Create a new fileID for the specified ContentCache and 112/// include position. This works regardless of whether the ContentCache 113/// corresponds to a file or some other input source. 114FileID SourceManager::createFileID(const ContentCache *File, 115 SourceLocation IncludePos, 116 SrcMgr::CharacteristicKind FileCharacter) { 117 // If FileEnt is really large (e.g. it's a large .i file), we may not be able 118 // to fit an arbitrary position in the file in the FilePos field. To handle 119 // this, we create one FileID for each chunk of the file that fits in a 120 // FilePos field. 121 unsigned FileSize = File->getSize(); 122 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) { 123 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File, FileCharacter)); 124 assert(FileIDs.size() < (1 << SourceLocation::ChunkIDBits) && 125 "Ran out of file ID's!"); 126 return FileID::Create(FileIDs.size()); 127 } 128 129 // Create one FileID for each chunk of the file. 130 unsigned Result = FileIDs.size()+1; 131 132 unsigned ChunkNo = 0; 133 while (1) { 134 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File, 135 FileCharacter)); 136 137 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break; 138 FileSize -= (1 << SourceLocation::FilePosBits); 139 } 140 141 assert(FileIDs.size() < (1 << SourceLocation::ChunkIDBits) && 142 "Ran out of file ID's!"); 143 return FileID::Create(Result); 144} 145 146/// getInstantiationLoc - Return a new SourceLocation that encodes the fact 147/// that a token from SpellingLoc should actually be referenced from 148/// InstantiationLoc. 149SourceLocation SourceManager::getInstantiationLoc(SourceLocation SpellingLoc, 150 SourceLocation InstantLoc) { 151 // The specified source location may be a mapped location, due to a macro 152 // instantiation or #line directive. Strip off this information to find out 153 // where the characters are actually located. 154 SpellingLoc = getSpellingLoc(SpellingLoc); 155 156 // Resolve InstantLoc down to a real instantiation location. 157 InstantLoc = getInstantiationLoc(InstantLoc); 158 159 160 // If the last macro id is close to the currently requested location, try to 161 // reuse it. This implements a small cache. 162 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){ 163 MacroIDInfo &LastOne = MacroIDs[i]; 164 165 // The instanitation point and source SpellingLoc have to exactly match to 166 // reuse (for now). We could allow "nearby" instantiations in the future. 167 if (LastOne.getInstantiationLoc() != InstantLoc || 168 LastOne.getSpellingLoc().getChunkID() != SpellingLoc.getChunkID()) 169 continue; 170 171 // Check to see if the spellloc of the token came from near enough to reuse. 172 int SpellDelta = SpellingLoc.getRawFilePos() - 173 LastOne.getSpellingLoc().getRawFilePos(); 174 if (SourceLocation::isValidMacroSpellingOffs(SpellDelta)) 175 return SourceLocation::getMacroLoc(i, SpellDelta); 176 } 177 178 179 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, SpellingLoc)); 180 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0); 181} 182 183/// getBufferData - Return a pointer to the start and end of the character 184/// data for the specified location. 185std::pair<const char*, const char*> 186SourceManager::getBufferData(SourceLocation Loc) const { 187 const llvm::MemoryBuffer *Buf = getBuffer(getCanonicalFileID(Loc)); 188 return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd()); 189} 190 191std::pair<const char*, const char*> 192SourceManager::getBufferData(FileID FID) const { 193 const llvm::MemoryBuffer *Buf = getBuffer(FID); 194 return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd()); 195} 196 197 198 199/// getCharacterData - Return a pointer to the start of the specified location 200/// in the appropriate MemoryBuffer. 201const char *SourceManager::getCharacterData(SourceLocation SL) const { 202 // Note that this is a hot function in the getSpelling() path, which is 203 // heavily used by -E mode. 204 SL = getSpellingLoc(SL); 205 206 std::pair<FileID, unsigned> LocInfo = getDecomposedFileLoc(SL); 207 208 // Note that calling 'getBuffer()' may lazily page in a source file. 209 return getContentCache(LocInfo.first)->getBuffer()->getBufferStart() + 210 LocInfo.second; 211} 212 213 214/// getColumnNumber - Return the column # for the specified file position. 215/// this is significantly cheaper to compute than the line number. This returns 216/// zero if the column number isn't known. 217unsigned SourceManager::getColumnNumber(SourceLocation Loc) const { 218 if (Loc.getChunkID() == 0) return 0; 219 220 std::pair<FileID, unsigned> LocInfo = getDecomposedFileLoc(Loc); 221 unsigned FilePos = LocInfo.second; 222 223 const char *Buf = getBuffer(LocInfo.first)->getBufferStart(); 224 225 unsigned LineStart = FilePos; 226 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r') 227 --LineStart; 228 return FilePos-LineStart+1; 229} 230 231/// getSourceName - This method returns the name of the file or buffer that 232/// the SourceLocation specifies. This can be modified with #line directives, 233/// etc. 234const char *SourceManager::getSourceName(SourceLocation Loc) const { 235 if (Loc.getChunkID() == 0) return ""; 236 237 // To get the source name, first consult the FileEntry (if one exists) before 238 // the MemBuffer as this will avoid unnecessarily paging in the MemBuffer. 239 const SrcMgr::ContentCache *C = getContentCacheForLoc(Loc); 240 return C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier(); 241} 242 243static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE; 244static void ComputeLineNumbers(ContentCache* FI) { 245 // Note that calling 'getBuffer()' may lazily page in the file. 246 const MemoryBuffer *Buffer = FI->getBuffer(); 247 248 // Find the file offsets of all of the *physical* source lines. This does 249 // not look at trigraphs, escaped newlines, or anything else tricky. 250 std::vector<unsigned> LineOffsets; 251 252 // Line #1 starts at char 0. 253 LineOffsets.push_back(0); 254 255 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); 256 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); 257 unsigned Offs = 0; 258 while (1) { 259 // Skip over the contents of the line. 260 // TODO: Vectorize this? This is very performance sensitive for programs 261 // with lots of diagnostics and in -E mode. 262 const unsigned char *NextBuf = (const unsigned char *)Buf; 263 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') 264 ++NextBuf; 265 Offs += NextBuf-Buf; 266 Buf = NextBuf; 267 268 if (Buf[0] == '\n' || Buf[0] == '\r') { 269 // If this is \n\r or \r\n, skip both characters. 270 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) 271 ++Offs, ++Buf; 272 ++Offs, ++Buf; 273 LineOffsets.push_back(Offs); 274 } else { 275 // Otherwise, this is a null. If end of file, exit. 276 if (Buf == End) break; 277 // Otherwise, skip the null. 278 ++Offs, ++Buf; 279 } 280 } 281 282 // Copy the offsets into the FileInfo structure. 283 FI->NumLines = LineOffsets.size(); 284 FI->SourceLineCache = new unsigned[LineOffsets.size()]; 285 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache); 286} 287 288/// getLineNumber - Given a SourceLocation, return the spelling line number 289/// for the position indicated. This requires building and caching a table of 290/// line offsets for the MemoryBuffer, so this is not cheap: use only when 291/// about to emit a diagnostic. 292unsigned SourceManager::getLineNumber(SourceLocation Loc) const { 293 if (Loc.getChunkID() == 0) return 0; 294 295 ContentCache *Content; 296 297 std::pair<FileID, unsigned> LocInfo = getDecomposedFileLoc(Loc); 298 299 if (LastLineNoFileIDQuery == LocInfo.first) 300 Content = LastLineNoContentCache; 301 else 302 Content = const_cast<ContentCache*>(getContentCache(LocInfo.first)); 303 304 // If this is the first use of line information for this buffer, compute the 305 /// SourceLineCache for it on demand. 306 if (Content->SourceLineCache == 0) 307 ComputeLineNumbers(Content); 308 309 // Okay, we know we have a line number table. Do a binary search to find the 310 // line number that this character position lands on. 311 unsigned *SourceLineCache = Content->SourceLineCache; 312 unsigned *SourceLineCacheStart = SourceLineCache; 313 unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines; 314 315 unsigned QueriedFilePos = LocInfo.second+1; 316 317 // If the previous query was to the same file, we know both the file pos from 318 // that query and the line number returned. This allows us to narrow the 319 // search space from the entire file to something near the match. 320 if (LastLineNoFileIDQuery == LocInfo.first) { 321 if (QueriedFilePos >= LastLineNoFilePos) { 322 SourceLineCache = SourceLineCache+LastLineNoResult-1; 323 324 // The query is likely to be nearby the previous one. Here we check to 325 // see if it is within 5, 10 or 20 lines. It can be far away in cases 326 // where big comment blocks and vertical whitespace eat up lines but 327 // contribute no tokens. 328 if (SourceLineCache+5 < SourceLineCacheEnd) { 329 if (SourceLineCache[5] > QueriedFilePos) 330 SourceLineCacheEnd = SourceLineCache+5; 331 else if (SourceLineCache+10 < SourceLineCacheEnd) { 332 if (SourceLineCache[10] > QueriedFilePos) 333 SourceLineCacheEnd = SourceLineCache+10; 334 else if (SourceLineCache+20 < SourceLineCacheEnd) { 335 if (SourceLineCache[20] > QueriedFilePos) 336 SourceLineCacheEnd = SourceLineCache+20; 337 } 338 } 339 } 340 } else { 341 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1; 342 } 343 } 344 345 // If the spread is large, do a "radix" test as our initial guess, based on 346 // the assumption that lines average to approximately the same length. 347 // NOTE: This is currently disabled, as it does not appear to be profitable in 348 // initial measurements. 349 if (0 && SourceLineCacheEnd-SourceLineCache > 20) { 350 unsigned FileLen = Content->SourceLineCache[Content->NumLines-1]; 351 352 // Take a stab at guessing where it is. 353 unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen; 354 355 // Check for -10 and +10 lines. 356 unsigned LowerBound = std::max(int(ApproxPos-10), 0); 357 unsigned UpperBound = std::min(ApproxPos+10, FileLen); 358 359 // If the computed lower bound is less than the query location, move it in. 360 if (SourceLineCache < SourceLineCacheStart+LowerBound && 361 SourceLineCacheStart[LowerBound] < QueriedFilePos) 362 SourceLineCache = SourceLineCacheStart+LowerBound; 363 364 // If the computed upper bound is greater than the query location, move it. 365 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound && 366 SourceLineCacheStart[UpperBound] >= QueriedFilePos) 367 SourceLineCacheEnd = SourceLineCacheStart+UpperBound; 368 } 369 370 unsigned *Pos 371 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos); 372 unsigned LineNo = Pos-SourceLineCacheStart; 373 374 LastLineNoFileIDQuery = LocInfo.first; 375 LastLineNoContentCache = Content; 376 LastLineNoFilePos = QueriedFilePos; 377 LastLineNoResult = LineNo; 378 return LineNo; 379} 380 381/// PrintStats - Print statistics to stderr. 382/// 383void SourceManager::PrintStats() const { 384 llvm::cerr << "\n*** Source Manager Stats:\n"; 385 llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size() 386 << " mem buffers mapped, " << FileIDs.size() 387 << " file ID's allocated.\n"; 388 llvm::cerr << " " << FileIDs.size() << " normal buffer FileID's, " 389 << MacroIDs.size() << " macro expansion FileID's.\n"; 390 391 unsigned NumLineNumsComputed = 0; 392 unsigned NumFileBytesMapped = 0; 393 for (std::set<ContentCache>::const_iterator I = 394 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) { 395 NumLineNumsComputed += I->SourceLineCache != 0; 396 NumFileBytesMapped += I->getSizeBytesMapped(); 397 } 398 399 llvm::cerr << NumFileBytesMapped << " bytes of files mapped, " 400 << NumLineNumsComputed << " files with line #'s computed.\n"; 401} 402 403//===----------------------------------------------------------------------===// 404// Serialization. 405//===----------------------------------------------------------------------===// 406 407void ContentCache::Emit(llvm::Serializer& S) const { 408 S.FlushRecord(); 409 S.EmitPtr(this); 410 411 if (Entry) { 412 llvm::sys::Path Fname(Buffer->getBufferIdentifier()); 413 414 if (Fname.isAbsolute()) 415 S.EmitCStr(Fname.c_str()); 416 else { 417 // Create an absolute path. 418 // FIXME: This will potentially contain ".." and "." in the path. 419 llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory(); 420 path.appendComponent(Fname.c_str()); 421 S.EmitCStr(path.c_str()); 422 } 423 } 424 else { 425 const char* p = Buffer->getBufferStart(); 426 const char* e = Buffer->getBufferEnd(); 427 428 S.EmitInt(e-p); 429 430 for ( ; p != e; ++p) 431 S.EmitInt(*p); 432 } 433 434 S.FlushRecord(); 435} 436 437void ContentCache::ReadToSourceManager(llvm::Deserializer& D, 438 SourceManager& SMgr, 439 FileManager* FMgr, 440 std::vector<char>& Buf) { 441 if (FMgr) { 442 llvm::SerializedPtrID PtrID = D.ReadPtrID(); 443 D.ReadCStr(Buf,false); 444 445 // Create/fetch the FileEntry. 446 const char* start = &Buf[0]; 447 const FileEntry* E = FMgr->getFile(start,start+Buf.size()); 448 449 // FIXME: Ideally we want a lazy materialization of the ContentCache 450 // anyway, because we don't want to read in source files unless this 451 // is absolutely needed. 452 if (!E) 453 D.RegisterPtr(PtrID,NULL); 454 else 455 // Get the ContextCache object and register it with the deserializer. 456 D.RegisterPtr(PtrID,SMgr.getContentCache(E)); 457 } 458 else { 459 // Register the ContextCache object with the deserializer. 460 SMgr.MemBufferInfos.push_back(ContentCache()); 461 ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back()); 462 D.RegisterPtr(&Entry); 463 464 // Create the buffer. 465 unsigned Size = D.ReadInt(); 466 Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size); 467 468 // Read the contents of the buffer. 469 char* p = const_cast<char*>(Entry.Buffer->getBufferStart()); 470 for (unsigned i = 0; i < Size ; ++i) 471 p[i] = D.ReadInt(); 472 } 473} 474 475void FileIDInfo::Emit(llvm::Serializer& S) const { 476 S.Emit(IncludeLoc); 477 S.EmitInt(ChunkNo); 478 S.EmitPtr(Content); 479} 480 481FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) { 482 FileIDInfo I; 483 I.IncludeLoc = SourceLocation::ReadVal(D); 484 I.ChunkNo = D.ReadInt(); 485 D.ReadPtr(I.Content,false); 486 return I; 487} 488 489void MacroIDInfo::Emit(llvm::Serializer& S) const { 490 S.Emit(InstantiationLoc); 491 S.Emit(SpellingLoc); 492} 493 494MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) { 495 MacroIDInfo I; 496 I.InstantiationLoc = SourceLocation::ReadVal(D); 497 I.SpellingLoc = SourceLocation::ReadVal(D); 498 return I; 499} 500 501void SourceManager::Emit(llvm::Serializer& S) const { 502 S.EnterBlock(); 503 S.EmitPtr(this); 504 S.EmitInt(MainFileID.getOpaqueValue()); 505 506 // Emit: FileInfos. Just emit the file name. 507 S.EnterBlock(); 508 509 std::for_each(FileInfos.begin(),FileInfos.end(), 510 S.MakeEmitter<ContentCache>()); 511 512 S.ExitBlock(); 513 514 // Emit: MemBufferInfos 515 S.EnterBlock(); 516 517 std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(), 518 S.MakeEmitter<ContentCache>()); 519 520 S.ExitBlock(); 521 522 // Emit: FileIDs 523 S.EmitInt(FileIDs.size()); 524 std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>()); 525 526 // Emit: MacroIDs 527 S.EmitInt(MacroIDs.size()); 528 std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>()); 529 530 S.ExitBlock(); 531} 532 533SourceManager* 534SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){ 535 SourceManager *M = new SourceManager(); 536 D.RegisterPtr(M); 537 538 // Read: the FileID of the main source file of the translation unit. 539 M->MainFileID = FileID::Create(D.ReadInt()); 540 541 std::vector<char> Buf; 542 543 { // Read: FileInfos. 544 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 545 while (!D.FinishedBlock(BLoc)) 546 ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf); 547 } 548 549 { // Read: MemBufferInfos. 550 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 551 while (!D.FinishedBlock(BLoc)) 552 ContentCache::ReadToSourceManager(D,*M,NULL,Buf); 553 } 554 555 // Read: FileIDs. 556 unsigned Size = D.ReadInt(); 557 M->FileIDs.reserve(Size); 558 for (; Size > 0 ; --Size) 559 M->FileIDs.push_back(FileIDInfo::ReadVal(D)); 560 561 // Read: MacroIDs. 562 Size = D.ReadInt(); 563 M->MacroIDs.reserve(Size); 564 for (; Size > 0 ; --Size) 565 M->MacroIDs.push_back(MacroIDInfo::ReadVal(D)); 566 567 return M; 568} 569