SourceManager.cpp revision c16c208e8519476d838ad11fffc8e0ecea50550d
1//===--- SourceManager.cpp - Track and cache source files -----------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "clang/Basic/SourceManager.h" 15#include "clang/Basic/FileManager.h" 16#include "llvm/Support/Compiler.h" 17#include "llvm/Support/MemoryBuffer.h" 18#include "llvm/System/Path.h" 19#include "llvm/Bitcode/Serialize.h" 20#include "llvm/Bitcode/Deserialize.h" 21#include "llvm/Support/Streams.h" 22#include <algorithm> 23using namespace clang; 24using namespace SrcMgr; 25using llvm::MemoryBuffer; 26 27ContentCache::~ContentCache() { 28 delete Buffer; 29 delete [] SourceLineCache; 30} 31 32/// getSizeBytesMapped - Returns the number of bytes actually mapped for 33/// this ContentCache. This can be 0 if the MemBuffer was not actually 34/// instantiated. 35unsigned ContentCache::getSizeBytesMapped() const { 36 return Buffer ? Buffer->getBufferSize() : 0; 37} 38 39/// getSize - Returns the size of the content encapsulated by this ContentCache. 40/// This can be the size of the source file or the size of an arbitrary 41/// scratch buffer. If the ContentCache encapsulates a source file, that 42/// file is not lazily brought in from disk to satisfy this query. 43unsigned ContentCache::getSize() const { 44 return Entry ? Entry->getSize() : Buffer->getBufferSize(); 45} 46 47const llvm::MemoryBuffer* ContentCache::getBuffer() const { 48 return Buffer; 49} 50 51 52/// getFileInfo - Create or return a cached FileInfo for the specified file. 53/// 54const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) { 55 56 assert(FileEnt && "Didn't specify a file entry to use?"); 57 // Do we already have information about this file? 58 std::set<ContentCache>::iterator I = 59 FileInfos.lower_bound(ContentCache(FileEnt)); 60 61 if (I != FileInfos.end() && I->Entry == FileEnt) 62 return &*I; 63 64 // Nope, get information. 65 const MemoryBuffer *File = 66 MemoryBuffer::getFile(FileEnt->getName(), 0, FileEnt->getSize()); 67 if (File == 0) 68 return 0; 69 70 ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt)); 71 72 // FIXME: Shortly the above logic that creates a MemBuffer will be moved 73 // to ContentCache::getBuffer(). This way it can be done lazily. 74 Entry.setBuffer(File); 75 Entry.SourceLineCache = 0; 76 Entry.NumLines = 0; 77 return &Entry; 78} 79 80 81/// createMemBufferContentCache - Create a new ContentCache for the specified 82/// memory buffer. This does no caching. 83const ContentCache* 84SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) { 85 // Add a new ContentCache to the MemBufferInfos list and return it. We 86 // must default construct the object first that the instance actually 87 // stored within MemBufferInfos actually owns the Buffer, and not any 88 // temporary we would use in the call to "push_back". 89 MemBufferInfos.push_back(ContentCache()); 90 ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back()); 91 Entry.setBuffer(Buffer); 92 return &Entry; 93} 94 95 96/// createFileID - Create a new fileID for the specified ContentCache and 97/// include position. This works regardless of whether the ContentCache 98/// corresponds to a file or some other input source. 99unsigned SourceManager::createFileID(const ContentCache *File, 100 SourceLocation IncludePos, 101 SrcMgr::CharacteristicKind FileCharacter) { 102 // If FileEnt is really large (e.g. it's a large .i file), we may not be able 103 // to fit an arbitrary position in the file in the FilePos field. To handle 104 // this, we create one FileID for each chunk of the file that fits in a 105 // FilePos field. 106 unsigned FileSize = File->getSize(); 107 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) { 108 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File, FileCharacter)); 109 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && 110 "Ran out of file ID's!"); 111 return FileIDs.size(); 112 } 113 114 // Create one FileID for each chunk of the file. 115 unsigned Result = FileIDs.size()+1; 116 117 unsigned ChunkNo = 0; 118 while (1) { 119 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File, 120 FileCharacter)); 121 122 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break; 123 FileSize -= (1 << SourceLocation::FilePosBits); 124 } 125 126 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && 127 "Ran out of file ID's!"); 128 return Result; 129} 130 131/// getInstantiationLoc - Return a new SourceLocation that encodes the fact 132/// that a token from physloc PhysLoc should actually be referenced from 133/// InstantiationLoc. 134SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc, 135 SourceLocation InstantLoc) { 136 // The specified source location may be a mapped location, due to a macro 137 // instantiation or #line directive. Strip off this information to find out 138 // where the characters are actually located. 139 PhysLoc = getPhysicalLoc(PhysLoc); 140 141 // Resolve InstantLoc down to a real logical location. 142 InstantLoc = getLogicalLoc(InstantLoc); 143 144 145 // If the last macro id is close to the currently requested location, try to 146 // reuse it. This implements a small cache. 147 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){ 148 MacroIDInfo &LastOne = MacroIDs[i]; 149 150 // The instanitation point and source physloc have to exactly match to reuse 151 // (for now). We could allow "nearby" instantiations in the future. 152 if (LastOne.getVirtualLoc() != InstantLoc || 153 LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID()) 154 continue; 155 156 // Check to see if the physloc of the token came from near enough to reuse. 157 int PhysDelta = PhysLoc.getRawFilePos() - 158 LastOne.getPhysicalLoc().getRawFilePos(); 159 if (SourceLocation::isValidMacroPhysOffs(PhysDelta)) 160 return SourceLocation::getMacroLoc(i, PhysDelta); 161 } 162 163 164 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc)); 165 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0); 166} 167 168/// getBufferData - Return a pointer to the start and end of the character 169/// data for the specified FileID. 170std::pair<const char*, const char*> 171SourceManager::getBufferData(unsigned FileID) const { 172 const llvm::MemoryBuffer *Buf = getBuffer(FileID); 173 return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd()); 174} 175 176 177/// getCharacterData - Return a pointer to the start of the specified location 178/// in the appropriate MemoryBuffer. 179const char *SourceManager::getCharacterData(SourceLocation SL) const { 180 // Note that this is a hot function in the getSpelling() path, which is 181 // heavily used by -E mode. 182 SL = getPhysicalLoc(SL); 183 184 // Note that calling 'getBuffer()' may lazily page in a source file. 185 return getContentCache(SL.getFileID())->getBuffer()->getBufferStart() + 186 getFullFilePos(SL); 187} 188 189 190/// getColumnNumber - Return the column # for the specified file position. 191/// this is significantly cheaper to compute than the line number. This returns 192/// zero if the column number isn't known. 193unsigned SourceManager::getColumnNumber(SourceLocation Loc) const { 194 unsigned FileID = Loc.getFileID(); 195 if (FileID == 0) return 0; 196 197 unsigned FilePos = getFullFilePos(Loc); 198 const MemoryBuffer *Buffer = getBuffer(FileID); 199 const char *Buf = Buffer->getBufferStart(); 200 201 unsigned LineStart = FilePos; 202 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r') 203 --LineStart; 204 return FilePos-LineStart+1; 205} 206 207/// getSourceName - This method returns the name of the file or buffer that 208/// the SourceLocation specifies. This can be modified with #line directives, 209/// etc. 210const char *SourceManager::getSourceName(SourceLocation Loc) const { 211 unsigned FileID = Loc.getFileID(); 212 if (FileID == 0) return ""; 213 214 // To get the source name, first consult the FileEntry (if one exists) before 215 // the MemBuffer as this will avoid unnecessarily paging in the MemBuffer. 216 const SrcMgr::ContentCache* C = getContentCache(FileID); 217 return C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier(); 218} 219 220static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE; 221static void ComputeLineNumbers(ContentCache* FI) { 222 // Note that calling 'getBuffer()' may lazily page in the file. 223 const MemoryBuffer *Buffer = FI->getBuffer(); 224 225 // Find the file offsets of all of the *physical* source lines. This does 226 // not look at trigraphs, escaped newlines, or anything else tricky. 227 std::vector<unsigned> LineOffsets; 228 229 // Line #1 starts at char 0. 230 LineOffsets.push_back(0); 231 232 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); 233 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); 234 unsigned Offs = 0; 235 while (1) { 236 // Skip over the contents of the line. 237 // TODO: Vectorize this? This is very performance sensitive for programs 238 // with lots of diagnostics and in -E mode. 239 const unsigned char *NextBuf = (const unsigned char *)Buf; 240 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') 241 ++NextBuf; 242 Offs += NextBuf-Buf; 243 Buf = NextBuf; 244 245 if (Buf[0] == '\n' || Buf[0] == '\r') { 246 // If this is \n\r or \r\n, skip both characters. 247 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) 248 ++Offs, ++Buf; 249 ++Offs, ++Buf; 250 LineOffsets.push_back(Offs); 251 } else { 252 // Otherwise, this is a null. If end of file, exit. 253 if (Buf == End) break; 254 // Otherwise, skip the null. 255 ++Offs, ++Buf; 256 } 257 } 258 259 // Copy the offsets into the FileInfo structure. 260 FI->NumLines = LineOffsets.size(); 261 FI->SourceLineCache = new unsigned[LineOffsets.size()]; 262 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache); 263} 264 265/// getLineNumber - Given a SourceLocation, return the physical line number 266/// for the position indicated. This requires building and caching a table of 267/// line offsets for the MemoryBuffer, so this is not cheap: use only when 268/// about to emit a diagnostic. 269unsigned SourceManager::getLineNumber(SourceLocation Loc) const { 270 unsigned FileID = Loc.getFileID(); 271 if (FileID == 0) return 0; 272 273 ContentCache* Content; 274 275 if (LastLineNoFileIDQuery == FileID) 276 Content = LastLineNoContentCache; 277 else 278 Content = const_cast<ContentCache*>(getContentCache(FileID)); 279 280 // If this is the first use of line information for this buffer, compute the 281 /// SourceLineCache for it on demand. 282 if (Content->SourceLineCache == 0) 283 ComputeLineNumbers(Content); 284 285 // Okay, we know we have a line number table. Do a binary search to find the 286 // line number that this character position lands on. 287 unsigned *SourceLineCache = Content->SourceLineCache; 288 unsigned *SourceLineCacheStart = SourceLineCache; 289 unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines; 290 291 unsigned QueriedFilePos = getFullFilePos(Loc)+1; 292 293 // If the previous query was to the same file, we know both the file pos from 294 // that query and the line number returned. This allows us to narrow the 295 // search space from the entire file to something near the match. 296 if (LastLineNoFileIDQuery == FileID) { 297 if (QueriedFilePos >= LastLineNoFilePos) { 298 SourceLineCache = SourceLineCache+LastLineNoResult-1; 299 300 // The query is likely to be nearby the previous one. Here we check to 301 // see if it is within 5, 10 or 20 lines. It can be far away in cases 302 // where big comment blocks and vertical whitespace eat up lines but 303 // contribute no tokens. 304 if (SourceLineCache+5 < SourceLineCacheEnd) { 305 if (SourceLineCache[5] > QueriedFilePos) 306 SourceLineCacheEnd = SourceLineCache+5; 307 else if (SourceLineCache+10 < SourceLineCacheEnd) { 308 if (SourceLineCache[10] > QueriedFilePos) 309 SourceLineCacheEnd = SourceLineCache+10; 310 else if (SourceLineCache+20 < SourceLineCacheEnd) { 311 if (SourceLineCache[20] > QueriedFilePos) 312 SourceLineCacheEnd = SourceLineCache+20; 313 } 314 } 315 } 316 } else { 317 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1; 318 } 319 } 320 321 // If the spread is large, do a "radix" test as our initial guess, based on 322 // the assumption that lines average to approximately the same length. 323 // NOTE: This is currently disabled, as it does not appear to be profitable in 324 // initial measurements. 325 if (0 && SourceLineCacheEnd-SourceLineCache > 20) { 326 unsigned FileLen = Content->SourceLineCache[Content->NumLines-1]; 327 328 // Take a stab at guessing where it is. 329 unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen; 330 331 // Check for -10 and +10 lines. 332 unsigned LowerBound = std::max(int(ApproxPos-10), 0); 333 unsigned UpperBound = std::min(ApproxPos+10, FileLen); 334 335 // If the computed lower bound is less than the query location, move it in. 336 if (SourceLineCache < SourceLineCacheStart+LowerBound && 337 SourceLineCacheStart[LowerBound] < QueriedFilePos) 338 SourceLineCache = SourceLineCacheStart+LowerBound; 339 340 // If the computed upper bound is greater than the query location, move it. 341 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound && 342 SourceLineCacheStart[UpperBound] >= QueriedFilePos) 343 SourceLineCacheEnd = SourceLineCacheStart+UpperBound; 344 } 345 346 unsigned *Pos 347 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos); 348 unsigned LineNo = Pos-SourceLineCacheStart; 349 350 LastLineNoFileIDQuery = FileID; 351 LastLineNoContentCache = Content; 352 LastLineNoFilePos = QueriedFilePos; 353 LastLineNoResult = LineNo; 354 return LineNo; 355} 356 357/// PrintStats - Print statistics to stderr. 358/// 359void SourceManager::PrintStats() const { 360 llvm::cerr << "\n*** Source Manager Stats:\n"; 361 llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size() 362 << " mem buffers mapped, " << FileIDs.size() 363 << " file ID's allocated.\n"; 364 llvm::cerr << " " << FileIDs.size() << " normal buffer FileID's, " 365 << MacroIDs.size() << " macro expansion FileID's.\n"; 366 367 unsigned NumLineNumsComputed = 0; 368 unsigned NumFileBytesMapped = 0; 369 for (std::set<ContentCache>::const_iterator I = 370 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) { 371 NumLineNumsComputed += I->SourceLineCache != 0; 372 NumFileBytesMapped += I->getSizeBytesMapped(); 373 } 374 375 llvm::cerr << NumFileBytesMapped << " bytes of files mapped, " 376 << NumLineNumsComputed << " files with line #'s computed.\n"; 377} 378 379//===----------------------------------------------------------------------===// 380// Serialization. 381//===----------------------------------------------------------------------===// 382 383void ContentCache::Emit(llvm::Serializer& S) const { 384 S.FlushRecord(); 385 S.EmitPtr(this); 386 387 if (Entry) { 388 llvm::sys::Path Fname(Buffer->getBufferIdentifier()); 389 390 if (Fname.isAbsolute()) 391 S.EmitCStr(Fname.c_str()); 392 else { 393 // Create an absolute path. 394 // FIXME: This will potentially contain ".." and "." in the path. 395 llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory(); 396 path.appendComponent(Fname.c_str()); 397 S.EmitCStr(path.c_str()); 398 } 399 } 400 else { 401 const char* p = Buffer->getBufferStart(); 402 const char* e = Buffer->getBufferEnd(); 403 404 S.EmitInt(e-p); 405 406 for ( ; p != e; ++p) 407 S.EmitInt(*p); 408 } 409 410 S.FlushRecord(); 411} 412 413void ContentCache::ReadToSourceManager(llvm::Deserializer& D, 414 SourceManager& SMgr, 415 FileManager* FMgr, 416 std::vector<char>& Buf) { 417 if (FMgr) { 418 llvm::SerializedPtrID PtrID = D.ReadPtrID(); 419 D.ReadCStr(Buf,false); 420 421 // Create/fetch the FileEntry. 422 const char* start = &Buf[0]; 423 const FileEntry* E = FMgr->getFile(start,start+Buf.size()); 424 425 // FIXME: Ideally we want a lazy materialization of the ContentCache 426 // anyway, because we don't want to read in source files unless this 427 // is absolutely needed. 428 if (!E) 429 D.RegisterPtr(PtrID,NULL); 430 else 431 // Get the ContextCache object and register it with the deserializer. 432 D.RegisterPtr(PtrID,SMgr.getContentCache(E)); 433 } 434 else { 435 // Register the ContextCache object with the deserializer. 436 SMgr.MemBufferInfos.push_back(ContentCache()); 437 ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back()); 438 D.RegisterPtr(&Entry); 439 440 // Create the buffer. 441 unsigned Size = D.ReadInt(); 442 Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size); 443 444 // Read the contents of the buffer. 445 char* p = const_cast<char*>(Entry.Buffer->getBufferStart()); 446 for (unsigned i = 0; i < Size ; ++i) 447 p[i] = D.ReadInt(); 448 } 449} 450 451void FileIDInfo::Emit(llvm::Serializer& S) const { 452 S.Emit(IncludeLoc); 453 S.EmitInt(ChunkNo); 454 S.EmitPtr(Content); 455} 456 457FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) { 458 FileIDInfo I; 459 I.IncludeLoc = SourceLocation::ReadVal(D); 460 I.ChunkNo = D.ReadInt(); 461 D.ReadPtr(I.Content,false); 462 return I; 463} 464 465void MacroIDInfo::Emit(llvm::Serializer& S) const { 466 S.Emit(VirtualLoc); 467 S.Emit(PhysicalLoc); 468} 469 470MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) { 471 MacroIDInfo I; 472 I.VirtualLoc = SourceLocation::ReadVal(D); 473 I.PhysicalLoc = SourceLocation::ReadVal(D); 474 return I; 475} 476 477void SourceManager::Emit(llvm::Serializer& S) const { 478 S.EnterBlock(); 479 S.EmitPtr(this); 480 S.EmitInt(MainFileID); 481 482 // Emit: FileInfos. Just emit the file name. 483 S.EnterBlock(); 484 485 std::for_each(FileInfos.begin(),FileInfos.end(), 486 S.MakeEmitter<ContentCache>()); 487 488 S.ExitBlock(); 489 490 // Emit: MemBufferInfos 491 S.EnterBlock(); 492 493 std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(), 494 S.MakeEmitter<ContentCache>()); 495 496 S.ExitBlock(); 497 498 // Emit: FileIDs 499 S.EmitInt(FileIDs.size()); 500 std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>()); 501 502 // Emit: MacroIDs 503 S.EmitInt(MacroIDs.size()); 504 std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>()); 505 506 S.ExitBlock(); 507} 508 509SourceManager* 510SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){ 511 SourceManager *M = new SourceManager(); 512 D.RegisterPtr(M); 513 514 // Read: the FileID of the main source file of the translation unit. 515 M->MainFileID = D.ReadInt(); 516 517 std::vector<char> Buf; 518 519 { // Read: FileInfos. 520 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 521 while (!D.FinishedBlock(BLoc)) 522 ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf); 523 } 524 525 { // Read: MemBufferInfos. 526 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 527 while (!D.FinishedBlock(BLoc)) 528 ContentCache::ReadToSourceManager(D,*M,NULL,Buf); 529 } 530 531 // Read: FileIDs. 532 unsigned Size = D.ReadInt(); 533 M->FileIDs.reserve(Size); 534 for (; Size > 0 ; --Size) 535 M->FileIDs.push_back(FileIDInfo::ReadVal(D)); 536 537 // Read: MacroIDs. 538 Size = D.ReadInt(); 539 M->MacroIDs.reserve(Size); 540 for (; Size > 0 ; --Size) 541 M->MacroIDs.push_back(MacroIDInfo::ReadVal(D)); 542 543 return M; 544} 545