SourceManager.cpp revision 721818304ac462d8c6ce05eecd02884033db78f1
1//===--- SourceManager.cpp - Track and cache source files -----------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "clang/Basic/SourceManager.h" 15#include "clang/Basic/FileManager.h" 16#include "llvm/Support/Compiler.h" 17#include "llvm/Support/MemoryBuffer.h" 18#include "llvm/System/Path.h" 19#include "llvm/Bitcode/Serialize.h" 20#include "llvm/Bitcode/Deserialize.h" 21#include "llvm/Support/Streams.h" 22#include <algorithm> 23using namespace clang; 24using namespace SrcMgr; 25using llvm::MemoryBuffer; 26 27ContentCache::~ContentCache() { 28 delete Buffer; 29 delete [] SourceLineCache; 30} 31 32/// getFileInfo - Create or return a cached FileInfo for the specified file. 33/// 34const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) { 35 36 assert(FileEnt && "Didn't specify a file entry to use?"); 37 // Do we already have information about this file? 38 std::set<ContentCache>::iterator I = 39 FileInfos.lower_bound(ContentCache(FileEnt)); 40 41 if (I != FileInfos.end() && I->Entry == FileEnt) 42 return &*I; 43 44 // Nope, get information. 45 const MemoryBuffer *File = 46 MemoryBuffer::getFile(FileEnt->getName(), 0, FileEnt->getSize()); 47 if (File == 0) 48 return 0; 49 50 ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt)); 51 52 Entry.Buffer = File; 53 Entry.SourceLineCache = 0; 54 Entry.NumLines = 0; 55 return &Entry; 56} 57 58 59/// createMemBufferContentCache - Create a new ContentCache for the specified 60/// memory buffer. This does no caching. 61const ContentCache* 62SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) { 63 // Add a new ContentCache to the MemBufferInfos list and return it. We 64 // must default construct the object first that the instance actually 65 // stored within MemBufferInfos actually owns the Buffer, and not any 66 // temporary we would use in the call to "push_back". 67 MemBufferInfos.push_back(ContentCache()); 68 ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back()); 69 Entry.Buffer = Buffer; 70 return &Entry; 71} 72 73 74/// createFileID - Create a new fileID for the specified ContentCache and 75/// include position. This works regardless of whether the ContentCache 76/// corresponds to a file or some other input source. 77unsigned SourceManager::createFileID(const ContentCache *File, 78 SourceLocation IncludePos, 79 unsigned DirCharacter) { 80 // If FileEnt is really large (e.g. it's a large .i file), we may not be able 81 // to fit an arbitrary position in the file in the FilePos field. To handle 82 // this, we create one FileID for each chunk of the file that fits in a 83 // FilePos field. 84 unsigned FileSize = File->Buffer->getBufferSize(); 85 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) { 86 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File, DirCharacter)); 87 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && 88 "Ran out of file ID's!"); 89 return FileIDs.size(); 90 } 91 92 // Create one FileID for each chunk of the file. 93 unsigned Result = FileIDs.size()+1; 94 95 unsigned ChunkNo = 0; 96 while (1) { 97 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File, 98 DirCharacter)); 99 100 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break; 101 FileSize -= (1 << SourceLocation::FilePosBits); 102 } 103 104 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && 105 "Ran out of file ID's!"); 106 return Result; 107} 108 109/// getInstantiationLoc - Return a new SourceLocation that encodes the fact 110/// that a token from physloc PhysLoc should actually be referenced from 111/// InstantiationLoc. 112SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc, 113 SourceLocation InstantLoc) { 114 // The specified source location may be a mapped location, due to a macro 115 // instantiation or #line directive. Strip off this information to find out 116 // where the characters are actually located. 117 PhysLoc = getPhysicalLoc(PhysLoc); 118 119 // Resolve InstantLoc down to a real logical location. 120 InstantLoc = getLogicalLoc(InstantLoc); 121 122 123 // If the last macro id is close to the currently requested location, try to 124 // reuse it. This implements a small cache. 125 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){ 126 MacroIDInfo &LastOne = MacroIDs[i]; 127 128 // The instanitation point and source physloc have to exactly match to reuse 129 // (for now). We could allow "nearby" instantiations in the future. 130 if (LastOne.getVirtualLoc() != InstantLoc || 131 LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID()) 132 continue; 133 134 // Check to see if the physloc of the token came from near enough to reuse. 135 int PhysDelta = PhysLoc.getRawFilePos() - 136 LastOne.getPhysicalLoc().getRawFilePos(); 137 if (SourceLocation::isValidMacroPhysOffs(PhysDelta)) 138 return SourceLocation::getMacroLoc(i, PhysDelta); 139 } 140 141 142 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc)); 143 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0); 144} 145 146/// getBufferData - Return a pointer to the start and end of the character 147/// data for the specified FileID. 148std::pair<const char*, const char*> 149SourceManager::getBufferData(unsigned FileID) const { 150 const llvm::MemoryBuffer *Buf = getBuffer(FileID); 151 return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd()); 152} 153 154 155/// getCharacterData - Return a pointer to the start of the specified location 156/// in the appropriate MemoryBuffer. 157const char *SourceManager::getCharacterData(SourceLocation SL) const { 158 // Note that this is a hot function in the getSpelling() path, which is 159 // heavily used by -E mode. 160 SL = getPhysicalLoc(SL); 161 162 return getContentCache(SL.getFileID())->Buffer->getBufferStart() + 163 getFullFilePos(SL); 164} 165 166 167/// getColumnNumber - Return the column # for the specified file position. 168/// this is significantly cheaper to compute than the line number. This returns 169/// zero if the column number isn't known. 170unsigned SourceManager::getColumnNumber(SourceLocation Loc) const { 171 unsigned FileID = Loc.getFileID(); 172 if (FileID == 0) return 0; 173 174 unsigned FilePos = getFullFilePos(Loc); 175 const MemoryBuffer *Buffer = getBuffer(FileID); 176 const char *Buf = Buffer->getBufferStart(); 177 178 unsigned LineStart = FilePos; 179 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r') 180 --LineStart; 181 return FilePos-LineStart+1; 182} 183 184/// getSourceName - This method returns the name of the file or buffer that 185/// the SourceLocation specifies. This can be modified with #line directives, 186/// etc. 187const char *SourceManager::getSourceName(SourceLocation Loc) const { 188 unsigned FileID = Loc.getFileID(); 189 if (FileID == 0) return ""; 190 return getContentCache(FileID)->Buffer->getBufferIdentifier(); 191} 192 193static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE; 194static void ComputeLineNumbers(ContentCache* FI) { 195 const MemoryBuffer *Buffer = FI->Buffer; 196 197 // Find the file offsets of all of the *physical* source lines. This does 198 // not look at trigraphs, escaped newlines, or anything else tricky. 199 std::vector<unsigned> LineOffsets; 200 201 // Line #1 starts at char 0. 202 LineOffsets.push_back(0); 203 204 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); 205 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); 206 unsigned Offs = 0; 207 while (1) { 208 // Skip over the contents of the line. 209 // TODO: Vectorize this? This is very performance sensitive for programs 210 // with lots of diagnostics and in -E mode. 211 const unsigned char *NextBuf = (const unsigned char *)Buf; 212 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') 213 ++NextBuf; 214 Offs += NextBuf-Buf; 215 Buf = NextBuf; 216 217 if (Buf[0] == '\n' || Buf[0] == '\r') { 218 // If this is \n\r or \r\n, skip both characters. 219 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) 220 ++Offs, ++Buf; 221 ++Offs, ++Buf; 222 LineOffsets.push_back(Offs); 223 } else { 224 // Otherwise, this is a null. If end of file, exit. 225 if (Buf == End) break; 226 // Otherwise, skip the null. 227 ++Offs, ++Buf; 228 } 229 } 230 231 // Copy the offsets into the FileInfo structure. 232 FI->NumLines = LineOffsets.size(); 233 FI->SourceLineCache = new unsigned[LineOffsets.size()]; 234 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache); 235} 236 237/// getLineNumber - Given a SourceLocation, return the physical line number 238/// for the position indicated. This requires building and caching a table of 239/// line offsets for the MemoryBuffer, so this is not cheap: use only when 240/// about to emit a diagnostic. 241unsigned SourceManager::getLineNumber(SourceLocation Loc) { 242 unsigned FileID = Loc.getFileID(); 243 if (FileID == 0) return 0; 244 245 ContentCache* Content; 246 247 if (LastLineNoFileIDQuery == FileID) 248 Content = LastLineNoContentCache; 249 else 250 Content = const_cast<ContentCache*>(getContentCache(FileID)); 251 252 // If this is the first use of line information for this buffer, compute the 253 /// SourceLineCache for it on demand. 254 if (Content->SourceLineCache == 0) 255 ComputeLineNumbers(Content); 256 257 // Okay, we know we have a line number table. Do a binary search to find the 258 // line number that this character position lands on. 259 unsigned *SourceLineCache = Content->SourceLineCache; 260 unsigned *SourceLineCacheStart = SourceLineCache; 261 unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines; 262 263 unsigned QueriedFilePos = getFullFilePos(Loc)+1; 264 265 // If the previous query was to the same file, we know both the file pos from 266 // that query and the line number returned. This allows us to narrow the 267 // search space from the entire file to something near the match. 268 if (LastLineNoFileIDQuery == FileID) { 269 if (QueriedFilePos >= LastLineNoFilePos) { 270 SourceLineCache = SourceLineCache+LastLineNoResult-1; 271 272 // The query is likely to be nearby the previous one. Here we check to 273 // see if it is within 5, 10 or 20 lines. It can be far away in cases 274 // where big comment blocks and vertical whitespace eat up lines but 275 // contribute no tokens. 276 if (SourceLineCache+5 < SourceLineCacheEnd) { 277 if (SourceLineCache[5] > QueriedFilePos) 278 SourceLineCacheEnd = SourceLineCache+5; 279 else if (SourceLineCache+10 < SourceLineCacheEnd) { 280 if (SourceLineCache[10] > QueriedFilePos) 281 SourceLineCacheEnd = SourceLineCache+10; 282 else if (SourceLineCache+20 < SourceLineCacheEnd) { 283 if (SourceLineCache[20] > QueriedFilePos) 284 SourceLineCacheEnd = SourceLineCache+20; 285 } 286 } 287 } 288 } else { 289 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1; 290 } 291 } 292 293 // If the spread is large, do a "radix" test as our initial guess, based on 294 // the assumption that lines average to approximately the same length. 295 // NOTE: This is currently disabled, as it does not appear to be profitable in 296 // initial measurements. 297 if (0 && SourceLineCacheEnd-SourceLineCache > 20) { 298 unsigned FileLen = Content->SourceLineCache[Content->NumLines-1]; 299 300 // Take a stab at guessing where it is. 301 unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen; 302 303 // Check for -10 and +10 lines. 304 unsigned LowerBound = std::max(int(ApproxPos-10), 0); 305 unsigned UpperBound = std::min(ApproxPos+10, FileLen); 306 307 // If the computed lower bound is less than the query location, move it in. 308 if (SourceLineCache < SourceLineCacheStart+LowerBound && 309 SourceLineCacheStart[LowerBound] < QueriedFilePos) 310 SourceLineCache = SourceLineCacheStart+LowerBound; 311 312 // If the computed upper bound is greater than the query location, move it. 313 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound && 314 SourceLineCacheStart[UpperBound] >= QueriedFilePos) 315 SourceLineCacheEnd = SourceLineCacheStart+UpperBound; 316 } 317 318 unsigned *Pos 319 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos); 320 unsigned LineNo = Pos-SourceLineCacheStart; 321 322 LastLineNoFileIDQuery = FileID; 323 LastLineNoContentCache = Content; 324 LastLineNoFilePos = QueriedFilePos; 325 LastLineNoResult = LineNo; 326 return LineNo; 327} 328 329/// PrintStats - Print statistics to stderr. 330/// 331void SourceManager::PrintStats() const { 332 llvm::cerr << "\n*** Source Manager Stats:\n"; 333 llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size() 334 << " mem buffers mapped, " << FileIDs.size() 335 << " file ID's allocated.\n"; 336 llvm::cerr << " " << FileIDs.size() << " normal buffer FileID's, " 337 << MacroIDs.size() << " macro expansion FileID's.\n"; 338 339 unsigned NumLineNumsComputed = 0; 340 unsigned NumFileBytesMapped = 0; 341 for (std::set<ContentCache>::const_iterator I = 342 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) { 343 NumLineNumsComputed += I->SourceLineCache != 0; 344 NumFileBytesMapped += I->Buffer->getBufferSize(); 345 } 346 347 llvm::cerr << NumFileBytesMapped << " bytes of files mapped, " 348 << NumLineNumsComputed << " files with line #'s computed.\n"; 349} 350 351//===----------------------------------------------------------------------===// 352// Serialization. 353//===----------------------------------------------------------------------===// 354 355void ContentCache::Emit(llvm::Serializer& S) const { 356 S.FlushRecord(); 357 S.EmitPtr(this); 358 359 if (Entry) { 360 llvm::sys::Path Fname(Buffer->getBufferIdentifier()); 361 362 if (Fname.isAbsolute()) 363 S.EmitCStr(Fname.c_str()); 364 else { 365 // Create an absolute path. 366 // FIXME: This will potentially contain ".." and "." in the path. 367 llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory(); 368 path.appendComponent(Fname.c_str()); 369 S.EmitCStr(path.c_str()); 370 } 371 } 372 else { 373 const char* p = Buffer->getBufferStart(); 374 const char* e = Buffer->getBufferEnd(); 375 376 S.EmitInt(e-p); 377 378 for ( ; p != e; ++p) 379 S.EmitInt(*p); 380 } 381 382 S.FlushRecord(); 383} 384 385void ContentCache::ReadToSourceManager(llvm::Deserializer& D, 386 SourceManager& SMgr, 387 FileManager* FMgr, 388 std::vector<char>& Buf) { 389 if (FMgr) { 390 llvm::SerializedPtrID PtrID = D.ReadPtrID(); 391 D.ReadCStr(Buf,false); 392 393 // Create/fetch the FileEntry. 394 const char* start = &Buf[0]; 395 const FileEntry* E = FMgr->getFile(start,start+Buf.size()); 396 397 // FIXME: Ideally we want a lazy materialization of the ContentCache 398 // anyway, because we don't want to read in source files unless this 399 // is absolutely needed. 400 if (!E) 401 D.RegisterPtr(PtrID,NULL); 402 else 403 // Get the ContextCache object and register it with the deserializer. 404 D.RegisterPtr(PtrID,SMgr.getContentCache(E)); 405 } 406 else { 407 // Register the ContextCache object with the deserializer. 408 SMgr.MemBufferInfos.push_back(ContentCache()); 409 ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back()); 410 D.RegisterPtr(&Entry); 411 412 // Create the buffer. 413 unsigned Size = D.ReadInt(); 414 Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size); 415 416 // Read the contents of the buffer. 417 char* p = const_cast<char*>(Entry.Buffer->getBufferStart()); 418 for (unsigned i = 0; i < Size ; ++i) 419 p[i] = D.ReadInt(); 420 } 421} 422 423void FileIDInfo::Emit(llvm::Serializer& S) const { 424 S.Emit(IncludeLoc); 425 S.EmitInt(ChunkNo); 426 S.EmitPtr(Content); 427} 428 429FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) { 430 FileIDInfo I; 431 I.IncludeLoc = SourceLocation::ReadVal(D); 432 I.ChunkNo = D.ReadInt(); 433 D.ReadPtr(I.Content,false); 434 return I; 435} 436 437void MacroIDInfo::Emit(llvm::Serializer& S) const { 438 S.Emit(VirtualLoc); 439 S.Emit(PhysicalLoc); 440} 441 442MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) { 443 MacroIDInfo I; 444 I.VirtualLoc = SourceLocation::ReadVal(D); 445 I.PhysicalLoc = SourceLocation::ReadVal(D); 446 return I; 447} 448 449void SourceManager::Emit(llvm::Serializer& S) const { 450 S.EnterBlock(); 451 S.EmitPtr(this); 452 S.EmitInt(MainFileID); 453 454 // Emit: FileInfos. Just emit the file name. 455 S.EnterBlock(); 456 457 std::for_each(FileInfos.begin(),FileInfos.end(), 458 S.MakeEmitter<ContentCache>()); 459 460 S.ExitBlock(); 461 462 // Emit: MemBufferInfos 463 S.EnterBlock(); 464 465 std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(), 466 S.MakeEmitter<ContentCache>()); 467 468 S.ExitBlock(); 469 470 // Emit: FileIDs 471 S.EmitInt(FileIDs.size()); 472 std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>()); 473 474 // Emit: MacroIDs 475 S.EmitInt(MacroIDs.size()); 476 std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>()); 477 478 S.ExitBlock(); 479} 480 481SourceManager* 482SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){ 483 SourceManager *M = new SourceManager(); 484 D.RegisterPtr(M); 485 486 // Read: the FileID of the main source file of the translation unit. 487 M->MainFileID = D.ReadInt(); 488 489 std::vector<char> Buf; 490 491 { // Read: FileInfos. 492 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 493 while (!D.FinishedBlock(BLoc)) 494 ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf); 495 } 496 497 { // Read: MemBufferInfos. 498 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 499 while (!D.FinishedBlock(BLoc)) 500 ContentCache::ReadToSourceManager(D,*M,NULL,Buf); 501 } 502 503 // Read: FileIDs. 504 unsigned Size = D.ReadInt(); 505 M->FileIDs.reserve(Size); 506 for (; Size > 0 ; --Size) 507 M->FileIDs.push_back(FileIDInfo::ReadVal(D)); 508 509 // Read: MacroIDs. 510 Size = D.ReadInt(); 511 M->MacroIDs.reserve(Size); 512 for (; Size > 0 ; --Size) 513 M->MacroIDs.push_back(MacroIDInfo::ReadVal(D)); 514 515 return M; 516} 517