SourceManager.cpp revision 3c1f7b615c03e55f8aaee14a5793c917c050b373
1//===--- SourceManager.cpp - Track and cache source files -----------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the SourceManager interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "clang/Basic/SourceManager.h" 15#include "clang/Basic/FileManager.h" 16#include "llvm/Config/config.h" 17#include "llvm/Support/Compiler.h" 18#include "llvm/Support/MemoryBuffer.h" 19#include "llvm/System/Path.h" 20#include "llvm/Bitcode/Serialize.h" 21#include "llvm/Bitcode/Deserialize.h" 22#include "llvm/Support/Streams.h" 23#include <algorithm> 24#include <fcntl.h> 25using namespace clang; 26using namespace SrcMgr; 27using llvm::MemoryBuffer; 28 29ContentCache::~ContentCache() { 30 delete Buffer; 31 delete [] SourceLineCache; 32} 33 34/// getFileInfo - Create or return a cached FileInfo for the specified file. 35/// 36const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) { 37 38 assert(FileEnt && "Didn't specify a file entry to use?"); 39 // Do we already have information about this file? 40 std::set<ContentCache>::iterator I = 41 FileInfos.lower_bound(ContentCache(FileEnt)); 42 43 if (I != FileInfos.end() && I->Entry == FileEnt) 44 return &*I; 45 46 // Nope, get information. 47 const MemoryBuffer *File = 48 MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()), 0, 49 FileEnt->getSize()); 50 if (File == 0) 51 return 0; 52 53 ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt)); 54 55 Entry.Buffer = File; 56 Entry.SourceLineCache = 0; 57 Entry.NumLines = 0; 58 return &Entry; 59} 60 61 62/// createMemBufferContentCache - Create a new ContentCache for the specified 63/// memory buffer. This does no caching. 64const ContentCache* 65SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) { 66 // Add a new ContentCache to the MemBufferInfos list and return it. We 67 // must default construct the object first that the instance actually 68 // stored within MemBufferInfos actually owns the Buffer, and not any 69 // temporary we would use in the call to "push_back". 70 MemBufferInfos.push_back(ContentCache()); 71 ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back()); 72 Entry.Buffer = Buffer; 73 return &Entry; 74} 75 76 77/// createFileID - Create a new fileID for the specified ContentCache and 78/// include position. This works regardless of whether the ContentCache 79/// corresponds to a file or some other input source. 80unsigned SourceManager::createFileID(const ContentCache *File, 81 SourceLocation IncludePos) { 82 // If FileEnt is really large (e.g. it's a large .i file), we may not be able 83 // to fit an arbitrary position in the file in the FilePos field. To handle 84 // this, we create one FileID for each chunk of the file that fits in a 85 // FilePos field. 86 unsigned FileSize = File->Buffer->getBufferSize(); 87 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) { 88 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File)); 89 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && 90 "Ran out of file ID's!"); 91 return FileIDs.size(); 92 } 93 94 // Create one FileID for each chunk of the file. 95 unsigned Result = FileIDs.size()+1; 96 97 unsigned ChunkNo = 0; 98 while (1) { 99 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File)); 100 101 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break; 102 FileSize -= (1 << SourceLocation::FilePosBits); 103 } 104 105 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && 106 "Ran out of file ID's!"); 107 return Result; 108} 109 110/// getInstantiationLoc - Return a new SourceLocation that encodes the fact 111/// that a token from physloc PhysLoc should actually be referenced from 112/// InstantiationLoc. 113SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc, 114 SourceLocation InstantLoc) { 115 // The specified source location may be a mapped location, due to a macro 116 // instantiation or #line directive. Strip off this information to find out 117 // where the characters are actually located. 118 PhysLoc = getPhysicalLoc(PhysLoc); 119 120 // Resolve InstantLoc down to a real logical location. 121 InstantLoc = getLogicalLoc(InstantLoc); 122 123 124 // If the last macro id is close to the currently requested location, try to 125 // reuse it. This implements a small cache. 126 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){ 127 MacroIDInfo &LastOne = MacroIDs[i]; 128 129 // The instanitation point and source physloc have to exactly match to reuse 130 // (for now). We could allow "nearby" instantiations in the future. 131 if (LastOne.getVirtualLoc() != InstantLoc || 132 LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID()) 133 continue; 134 135 // Check to see if the physloc of the token came from near enough to reuse. 136 int PhysDelta = PhysLoc.getRawFilePos() - 137 LastOne.getPhysicalLoc().getRawFilePos(); 138 if (SourceLocation::isValidMacroPhysOffs(PhysDelta)) 139 return SourceLocation::getMacroLoc(i, PhysDelta); 140 } 141 142 143 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc)); 144 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0); 145} 146 147/// getBufferData - Return a pointer to the start and end of the character 148/// data for the specified FileID. 149std::pair<const char*, const char*> 150SourceManager::getBufferData(unsigned FileID) const { 151 const llvm::MemoryBuffer *Buf = getBuffer(FileID); 152 return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd()); 153} 154 155 156/// getCharacterData - Return a pointer to the start of the specified location 157/// in the appropriate MemoryBuffer. 158const char *SourceManager::getCharacterData(SourceLocation SL) const { 159 // Note that this is a hot function in the getSpelling() path, which is 160 // heavily used by -E mode. 161 SL = getPhysicalLoc(SL); 162 163 return getContentCache(SL.getFileID())->Buffer->getBufferStart() + 164 getFullFilePos(SL); 165} 166 167 168/// getColumnNumber - Return the column # for the specified file position. 169/// this is significantly cheaper to compute than the line number. This returns 170/// zero if the column number isn't known. 171unsigned SourceManager::getColumnNumber(SourceLocation Loc) const { 172 unsigned FileID = Loc.getFileID(); 173 if (FileID == 0) return 0; 174 175 unsigned FilePos = getFullFilePos(Loc); 176 const MemoryBuffer *Buffer = getBuffer(FileID); 177 const char *Buf = Buffer->getBufferStart(); 178 179 unsigned LineStart = FilePos; 180 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r') 181 --LineStart; 182 return FilePos-LineStart+1; 183} 184 185/// getSourceName - This method returns the name of the file or buffer that 186/// the SourceLocation specifies. This can be modified with #line directives, 187/// etc. 188const char *SourceManager::getSourceName(SourceLocation Loc) const { 189 unsigned FileID = Loc.getFileID(); 190 if (FileID == 0) return ""; 191 return getContentCache(FileID)->Buffer->getBufferIdentifier(); 192} 193 194static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE; 195static void ComputeLineNumbers(ContentCache* FI) { 196 const MemoryBuffer *Buffer = FI->Buffer; 197 198 // Find the file offsets of all of the *physical* source lines. This does 199 // not look at trigraphs, escaped newlines, or anything else tricky. 200 std::vector<unsigned> LineOffsets; 201 202 // Line #1 starts at char 0. 203 LineOffsets.push_back(0); 204 205 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); 206 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); 207 unsigned Offs = 0; 208 while (1) { 209 // Skip over the contents of the line. 210 // TODO: Vectorize this? This is very performance sensitive for programs 211 // with lots of diagnostics and in -E mode. 212 const unsigned char *NextBuf = (const unsigned char *)Buf; 213 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') 214 ++NextBuf; 215 Offs += NextBuf-Buf; 216 Buf = NextBuf; 217 218 if (Buf[0] == '\n' || Buf[0] == '\r') { 219 // If this is \n\r or \r\n, skip both characters. 220 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) 221 ++Offs, ++Buf; 222 ++Offs, ++Buf; 223 LineOffsets.push_back(Offs); 224 } else { 225 // Otherwise, this is a null. If end of file, exit. 226 if (Buf == End) break; 227 // Otherwise, skip the null. 228 ++Offs, ++Buf; 229 } 230 } 231 232 // Copy the offsets into the FileInfo structure. 233 FI->NumLines = LineOffsets.size(); 234 FI->SourceLineCache = new unsigned[LineOffsets.size()]; 235 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache); 236} 237 238/// getLineNumber - Given a SourceLocation, return the physical line number 239/// for the position indicated. This requires building and caching a table of 240/// line offsets for the MemoryBuffer, so this is not cheap: use only when 241/// about to emit a diagnostic. 242unsigned SourceManager::getLineNumber(SourceLocation Loc) { 243 unsigned FileID = Loc.getFileID(); 244 if (FileID == 0) return 0; 245 246 ContentCache* Content; 247 248 if (LastLineNoFileIDQuery == FileID) 249 Content = LastLineNoContentCache; 250 else 251 Content = const_cast<ContentCache*>(getContentCache(FileID)); 252 253 // If this is the first use of line information for this buffer, compute the 254 /// SourceLineCache for it on demand. 255 if (Content->SourceLineCache == 0) 256 ComputeLineNumbers(Content); 257 258 // Okay, we know we have a line number table. Do a binary search to find the 259 // line number that this character position lands on. 260 unsigned *SourceLineCache = Content->SourceLineCache; 261 unsigned *SourceLineCacheStart = SourceLineCache; 262 unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines; 263 264 unsigned QueriedFilePos = getFullFilePos(Loc)+1; 265 266 // If the previous query was to the same file, we know both the file pos from 267 // that query and the line number returned. This allows us to narrow the 268 // search space from the entire file to something near the match. 269 if (LastLineNoFileIDQuery == FileID) { 270 if (QueriedFilePos >= LastLineNoFilePos) { 271 SourceLineCache = SourceLineCache+LastLineNoResult-1; 272 273 // The query is likely to be nearby the previous one. Here we check to 274 // see if it is within 5, 10 or 20 lines. It can be far away in cases 275 // where big comment blocks and vertical whitespace eat up lines but 276 // contribute no tokens. 277 if (SourceLineCache+5 < SourceLineCacheEnd) { 278 if (SourceLineCache[5] > QueriedFilePos) 279 SourceLineCacheEnd = SourceLineCache+5; 280 else if (SourceLineCache+10 < SourceLineCacheEnd) { 281 if (SourceLineCache[10] > QueriedFilePos) 282 SourceLineCacheEnd = SourceLineCache+10; 283 else if (SourceLineCache+20 < SourceLineCacheEnd) { 284 if (SourceLineCache[20] > QueriedFilePos) 285 SourceLineCacheEnd = SourceLineCache+20; 286 } 287 } 288 } 289 } else { 290 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1; 291 } 292 } 293 294 // If the spread is large, do a "radix" test as our initial guess, based on 295 // the assumption that lines average to approximately the same length. 296 // NOTE: This is currently disabled, as it does not appear to be profitable in 297 // initial measurements. 298 if (0 && SourceLineCacheEnd-SourceLineCache > 20) { 299 unsigned FileLen = Content->SourceLineCache[Content->NumLines-1]; 300 301 // Take a stab at guessing where it is. 302 unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen; 303 304 // Check for -10 and +10 lines. 305 unsigned LowerBound = std::max(int(ApproxPos-10), 0); 306 unsigned UpperBound = std::min(ApproxPos+10, FileLen); 307 308 // If the computed lower bound is less than the query location, move it in. 309 if (SourceLineCache < SourceLineCacheStart+LowerBound && 310 SourceLineCacheStart[LowerBound] < QueriedFilePos) 311 SourceLineCache = SourceLineCacheStart+LowerBound; 312 313 // If the computed upper bound is greater than the query location, move it. 314 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound && 315 SourceLineCacheStart[UpperBound] >= QueriedFilePos) 316 SourceLineCacheEnd = SourceLineCacheStart+UpperBound; 317 } 318 319 unsigned *Pos 320 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos); 321 unsigned LineNo = Pos-SourceLineCacheStart; 322 323 LastLineNoFileIDQuery = FileID; 324 LastLineNoContentCache = Content; 325 LastLineNoFilePos = QueriedFilePos; 326 LastLineNoResult = LineNo; 327 return LineNo; 328} 329 330/// PrintStats - Print statistics to stderr. 331/// 332void SourceManager::PrintStats() const { 333 llvm::cerr << "\n*** Source Manager Stats:\n"; 334 llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size() 335 << " mem buffers mapped, " << FileIDs.size() 336 << " file ID's allocated.\n"; 337 llvm::cerr << " " << FileIDs.size() << " normal buffer FileID's, " 338 << MacroIDs.size() << " macro expansion FileID's.\n"; 339 340 unsigned NumLineNumsComputed = 0; 341 unsigned NumFileBytesMapped = 0; 342 for (std::set<ContentCache>::const_iterator I = 343 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) { 344 NumLineNumsComputed += I->SourceLineCache != 0; 345 NumFileBytesMapped += I->Buffer->getBufferSize(); 346 } 347 348 llvm::cerr << NumFileBytesMapped << " bytes of files mapped, " 349 << NumLineNumsComputed << " files with line #'s computed.\n"; 350} 351 352//===----------------------------------------------------------------------===// 353// Serialization. 354//===----------------------------------------------------------------------===// 355 356void ContentCache::Emit(llvm::Serializer& S) const { 357 S.FlushRecord(); 358 S.EmitPtr(this); 359 360 if (Entry) { 361 llvm::sys::Path Fname(Buffer->getBufferIdentifier()); 362 363 if (Fname.isAbsolute()) 364 S.EmitCStr(Fname.c_str()); 365 else { 366 // Create an absolute path. 367 // FIXME: This will potentially contain ".." and "." in the path. 368 llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory(); 369 path.appendComponent(Fname.c_str()); 370 S.EmitCStr(path.c_str()); 371 } 372 } 373 else { 374 const char* p = Buffer->getBufferStart(); 375 const char* e = Buffer->getBufferEnd(); 376 377 S.EmitInt(e-p); 378 379 for ( ; p != e; ++p) 380 S.EmitInt(*p); 381 } 382 383 S.FlushRecord(); 384} 385 386void ContentCache::ReadToSourceManager(llvm::Deserializer& D, 387 SourceManager& SMgr, 388 FileManager* FMgr, 389 std::vector<char>& Buf) { 390 if (FMgr) { 391 llvm::SerializedPtrID PtrID = D.ReadPtrID(); 392 D.ReadCStr(Buf,false); 393 394 // Create/fetch the FileEntry. 395 const char* start = &Buf[0]; 396 const FileEntry* E = FMgr->getFile(start,start+Buf.size()); 397 398 // FIXME: Ideally we want a lazy materialization of the ContentCache 399 // anyway, because we don't want to read in source files unless this 400 // is absolutely needed. 401 if (!E) 402 D.RegisterPtr(PtrID,NULL); 403 else 404 // Get the ContextCache object and register it with the deserializer. 405 D.RegisterPtr(PtrID,SMgr.getContentCache(E)); 406 } 407 else { 408 // Register the ContextCache object with the deserializer. 409 SMgr.MemBufferInfos.push_back(ContentCache()); 410 ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back()); 411 D.RegisterPtr(&Entry); 412 413 // Create the buffer. 414 unsigned Size = D.ReadInt(); 415 Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size); 416 417 // Read the contents of the buffer. 418 char* p = const_cast<char*>(Entry.Buffer->getBufferStart()); 419 for (unsigned i = 0; i < Size ; ++i) 420 p[i] = D.ReadInt(); 421 } 422} 423 424void FileIDInfo::Emit(llvm::Serializer& S) const { 425 S.Emit(IncludeLoc); 426 S.EmitInt(ChunkNo); 427 S.EmitPtr(Content); 428} 429 430FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) { 431 FileIDInfo I; 432 I.IncludeLoc = SourceLocation::ReadVal(D); 433 I.ChunkNo = D.ReadInt(); 434 D.ReadPtr(I.Content,false); 435 return I; 436} 437 438void MacroIDInfo::Emit(llvm::Serializer& S) const { 439 S.Emit(VirtualLoc); 440 S.Emit(PhysicalLoc); 441} 442 443MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) { 444 MacroIDInfo I; 445 I.VirtualLoc = SourceLocation::ReadVal(D); 446 I.PhysicalLoc = SourceLocation::ReadVal(D); 447 return I; 448} 449 450void SourceManager::Emit(llvm::Serializer& S) const { 451 S.EnterBlock(); 452 S.EmitPtr(this); 453 S.EmitInt(MainFileID); 454 455 // Emit: FileInfos. Just emit the file name. 456 S.EnterBlock(); 457 458 std::for_each(FileInfos.begin(),FileInfos.end(), 459 S.MakeEmitter<ContentCache>()); 460 461 S.ExitBlock(); 462 463 // Emit: MemBufferInfos 464 S.EnterBlock(); 465 466 std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(), 467 S.MakeEmitter<ContentCache>()); 468 469 S.ExitBlock(); 470 471 // Emit: FileIDs 472 S.EmitInt(FileIDs.size()); 473 std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>()); 474 475 // Emit: MacroIDs 476 S.EmitInt(MacroIDs.size()); 477 std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>()); 478 479 S.ExitBlock(); 480} 481 482SourceManager* 483SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){ 484 SourceManager *M = new SourceManager(); 485 D.RegisterPtr(M); 486 487 // Read: the FileID of the main source file of the translation unit. 488 M->MainFileID = D.ReadInt(); 489 490 std::vector<char> Buf; 491 492 { // Read: FileInfos. 493 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 494 while (!D.FinishedBlock(BLoc)) 495 ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf); 496 } 497 498 { // Read: MemBufferInfos. 499 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 500 while (!D.FinishedBlock(BLoc)) 501 ContentCache::ReadToSourceManager(D,*M,NULL,Buf); 502 } 503 504 // Read: FileIDs. 505 unsigned Size = D.ReadInt(); 506 M->FileIDs.reserve(Size); 507 for (; Size > 0 ; --Size) 508 M->FileIDs.push_back(FileIDInfo::ReadVal(D)); 509 510 // Read: MacroIDs. 511 Size = D.ReadInt(); 512 M->MacroIDs.reserve(Size); 513 for (; Size > 0 ; --Size) 514 M->MacroIDs.push_back(MacroIDInfo::ReadVal(D)); 515 516 return M; 517} 518