SourceManager.cpp revision a90a4d4a0a365f991b92e925436ec63ef4969839
1//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/MemoryBuffer.h"
18#include "llvm/System/Path.h"
19#include "llvm/Bitcode/Serialize.h"
20#include "llvm/Bitcode/Deserialize.h"
21#include "llvm/Support/Streams.h"
22#include <algorithm>
23using namespace clang;
24using namespace SrcMgr;
25using llvm::MemoryBuffer;
26
27// This (temporary) directive toggles between lazy and eager creation of
28// MemBuffers.  This directive is not permanent, and is here to test a few
29// potential optimizations in PTH.  Once it is clear whether eager or lazy
30// creation of MemBuffers is better this directive will get removed.
31#define LAZY
32
33ContentCache::~ContentCache() {
34  delete Buffer;
35  delete [] SourceLineCache;
36}
37
38/// getSizeBytesMapped - Returns the number of bytes actually mapped for
39///  this ContentCache.  This can be 0 if the MemBuffer was not actually
40///  instantiated.
41unsigned ContentCache::getSizeBytesMapped() const {
42  return Buffer ? Buffer->getBufferSize() : 0;
43}
44
45/// getSize - Returns the size of the content encapsulated by this ContentCache.
46///  This can be the size of the source file or the size of an arbitrary
47///  scratch buffer.  If the ContentCache encapsulates a source file, that
48///  file is not lazily brought in from disk to satisfy this query.
49unsigned ContentCache::getSize() const {
50  return Entry ? Entry->getSize() : Buffer->getBufferSize();
51}
52
53const llvm::MemoryBuffer* ContentCache::getBuffer() const {
54#ifdef LAZY
55  // Lazily create the Buffer for ContentCaches that wrap files.
56  if (!Buffer && Entry) {
57    // FIXME: Should we support a way to not have to do this check over
58    //   and over if we cannot open the file?
59    Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize());
60  }
61#endif
62  return Buffer;
63}
64
65
66/// getFileInfo - Create or return a cached FileInfo for the specified file.
67///
68const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) {
69
70  assert(FileEnt && "Didn't specify a file entry to use?");
71  // Do we already have information about this file?
72  std::set<ContentCache>::iterator I =
73    FileInfos.lower_bound(ContentCache(FileEnt));
74
75  if (I != FileInfos.end() && I->Entry == FileEnt)
76    return &*I;
77
78  // Nope, get information.
79#ifndef LAZY
80  const MemoryBuffer *File =
81    MemoryBuffer::getFile(FileEnt->getName(), 0, FileEnt->getSize());
82  if (File == 0)
83    return 0;
84#endif
85
86  ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt));
87#ifndef LAZY
88  Entry.setBuffer(File);
89#endif
90  Entry.SourceLineCache = 0;
91  Entry.NumLines = 0;
92  return &Entry;
93}
94
95
96/// createMemBufferContentCache - Create a new ContentCache for the specified
97///  memory buffer.  This does no caching.
98const ContentCache*
99SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
100  // Add a new ContentCache to the MemBufferInfos list and return it.  We
101  // must default construct the object first that the instance actually
102  // stored within MemBufferInfos actually owns the Buffer, and not any
103  // temporary we would use in the call to "push_back".
104  MemBufferInfos.push_back(ContentCache());
105  ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back());
106  Entry.setBuffer(Buffer);
107  return &Entry;
108}
109
110
111/// createFileID - Create a new fileID for the specified ContentCache and
112/// include position.  This works regardless of whether the ContentCache
113/// corresponds to a file or some other input source.
114FileID SourceManager::createFileID(const ContentCache *File,
115                                     SourceLocation IncludePos,
116                                     SrcMgr::CharacteristicKind FileCharacter) {
117  // If FileEnt is really large (e.g. it's a large .i file), we may not be able
118  // to fit an arbitrary position in the file in the FilePos field.  To handle
119  // this, we create one FileID for each chunk of the file that fits in a
120  // FilePos field.
121  unsigned FileSize = File->getSize();
122  if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
123    FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File, FileCharacter));
124    assert(FileIDs.size() < (1 << SourceLocation::ChunkIDBits) &&
125           "Ran out of file ID's!");
126    return FileID::Create(FileIDs.size());
127  }
128
129  // Create one FileID for each chunk of the file.
130  unsigned Result = FileIDs.size()+1;
131
132  unsigned ChunkNo = 0;
133  while (1) {
134    FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File,
135                                      FileCharacter));
136
137    if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
138    FileSize -= (1 << SourceLocation::FilePosBits);
139  }
140
141  assert(FileIDs.size() < (1 << SourceLocation::ChunkIDBits) &&
142         "Ran out of file ID's!");
143  return FileID::Create(Result);
144}
145
146/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
147/// that a token from SpellingLoc should actually be referenced from
148/// InstantiationLoc.
149SourceLocation SourceManager::getInstantiationLoc(SourceLocation SpellingLoc,
150                                                  SourceLocation InstantLoc) {
151  // The specified source location may be a mapped location, due to a macro
152  // instantiation or #line directive.  Strip off this information to find out
153  // where the characters are actually located.
154  SpellingLoc = getSpellingLoc(SpellingLoc);
155
156  // Resolve InstantLoc down to a real instantiation location.
157  InstantLoc = getInstantiationLoc(InstantLoc);
158
159
160  // If the last macro id is close to the currently requested location, try to
161  // reuse it.  This implements a small cache.
162  for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
163    MacroIDInfo &LastOne = MacroIDs[i];
164
165    // The instanitation point and source SpellingLoc have to exactly match to
166    // reuse (for now).  We could allow "nearby" instantiations in the future.
167    if (LastOne.getInstantiationLoc() != InstantLoc ||
168        LastOne.getSpellingLoc().getChunkID() != SpellingLoc.getChunkID())
169      continue;
170
171    // Check to see if the spellloc of the token came from near enough to reuse.
172    int SpellDelta = SpellingLoc.getRawFilePos() -
173                     LastOne.getSpellingLoc().getRawFilePos();
174    if (SourceLocation::isValidMacroSpellingOffs(SpellDelta))
175      return SourceLocation::getMacroLoc(i, SpellDelta);
176  }
177
178
179  MacroIDs.push_back(MacroIDInfo::get(InstantLoc, SpellingLoc));
180  return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0);
181}
182
183/// getBufferData - Return a pointer to the start and end of the character
184/// data for the specified location.
185std::pair<const char*, const char*>
186SourceManager::getBufferData(SourceLocation Loc) const {
187  const llvm::MemoryBuffer *Buf = getBuffer(getCanonicalFileID(Loc));
188  return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
189}
190
191std::pair<const char*, const char*>
192SourceManager::getBufferData(FileID FID) const {
193  const llvm::MemoryBuffer *Buf = getBuffer(FID);
194  return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
195}
196
197
198
199/// getCharacterData - Return a pointer to the start of the specified location
200/// in the appropriate MemoryBuffer.
201const char *SourceManager::getCharacterData(SourceLocation SL) const {
202  // Note that this is a hot function in the getSpelling() path, which is
203  // heavily used by -E mode.
204  SL = getSpellingLoc(SL);
205
206  std::pair<FileID, unsigned> LocInfo = getDecomposedFileLoc(SL);
207
208  // Note that calling 'getBuffer()' may lazily page in a source file.
209  return getContentCache(LocInfo.first)->getBuffer()->getBufferStart() +
210         LocInfo.second;
211}
212
213
214/// getColumnNumber - Return the column # for the specified file position.
215/// this is significantly cheaper to compute than the line number.  This returns
216/// zero if the column number isn't known.
217unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
218  if (Loc.getChunkID() == 0) return 0;
219
220  std::pair<FileID, unsigned> LocInfo = getDecomposedFileLoc(Loc);
221  unsigned FilePos = LocInfo.second;
222
223  const char *Buf = getBuffer(LocInfo.first)->getBufferStart();
224
225  unsigned LineStart = FilePos;
226  while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
227    --LineStart;
228  return FilePos-LineStart+1;
229}
230
231/// getSourceName - This method returns the name of the file or buffer that
232/// the SourceLocation specifies.  This can be modified with #line directives,
233/// etc.
234const char *SourceManager::getSourceName(SourceLocation Loc) const {
235  if (Loc.getChunkID() == 0) return "";
236
237  // To get the source name, first consult the FileEntry (if one exists) before
238  // the MemBuffer as this will avoid unnecessarily paging in the MemBuffer.
239  const SrcMgr::ContentCache *C = getContentCacheForLoc(Loc);
240  return C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
241}
242
243static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE;
244static void ComputeLineNumbers(ContentCache* FI) {
245  // Note that calling 'getBuffer()' may lazily page in the file.
246  const MemoryBuffer *Buffer = FI->getBuffer();
247
248  // Find the file offsets of all of the *physical* source lines.  This does
249  // not look at trigraphs, escaped newlines, or anything else tricky.
250  std::vector<unsigned> LineOffsets;
251
252  // Line #1 starts at char 0.
253  LineOffsets.push_back(0);
254
255  const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
256  const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
257  unsigned Offs = 0;
258  while (1) {
259    // Skip over the contents of the line.
260    // TODO: Vectorize this?  This is very performance sensitive for programs
261    // with lots of diagnostics and in -E mode.
262    const unsigned char *NextBuf = (const unsigned char *)Buf;
263    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
264      ++NextBuf;
265    Offs += NextBuf-Buf;
266    Buf = NextBuf;
267
268    if (Buf[0] == '\n' || Buf[0] == '\r') {
269      // If this is \n\r or \r\n, skip both characters.
270      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
271        ++Offs, ++Buf;
272      ++Offs, ++Buf;
273      LineOffsets.push_back(Offs);
274    } else {
275      // Otherwise, this is a null.  If end of file, exit.
276      if (Buf == End) break;
277      // Otherwise, skip the null.
278      ++Offs, ++Buf;
279    }
280  }
281
282  // Copy the offsets into the FileInfo structure.
283  FI->NumLines = LineOffsets.size();
284  FI->SourceLineCache = new unsigned[LineOffsets.size()];
285  std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
286}
287
288/// getLineNumber - Given a SourceLocation, return the spelling line number
289/// for the position indicated.  This requires building and caching a table of
290/// line offsets for the MemoryBuffer, so this is not cheap: use only when
291/// about to emit a diagnostic.
292unsigned SourceManager::getLineNumber(SourceLocation Loc) const {
293  if (Loc.getChunkID() == 0) return 0;
294
295  ContentCache *Content;
296
297  std::pair<FileID, unsigned> LocInfo = getDecomposedFileLoc(Loc);
298
299  if (LastLineNoFileIDQuery == LocInfo.first)
300    Content = LastLineNoContentCache;
301  else
302    Content = const_cast<ContentCache*>(getContentCache(LocInfo.first));
303
304  // If this is the first use of line information for this buffer, compute the
305  /// SourceLineCache for it on demand.
306  if (Content->SourceLineCache == 0)
307    ComputeLineNumbers(Content);
308
309  // Okay, we know we have a line number table.  Do a binary search to find the
310  // line number that this character position lands on.
311  unsigned *SourceLineCache = Content->SourceLineCache;
312  unsigned *SourceLineCacheStart = SourceLineCache;
313  unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
314
315  unsigned QueriedFilePos = LocInfo.second+1;
316
317  // If the previous query was to the same file, we know both the file pos from
318  // that query and the line number returned.  This allows us to narrow the
319  // search space from the entire file to something near the match.
320  if (LastLineNoFileIDQuery == LocInfo.first) {
321    if (QueriedFilePos >= LastLineNoFilePos) {
322      SourceLineCache = SourceLineCache+LastLineNoResult-1;
323
324      // The query is likely to be nearby the previous one.  Here we check to
325      // see if it is within 5, 10 or 20 lines.  It can be far away in cases
326      // where big comment blocks and vertical whitespace eat up lines but
327      // contribute no tokens.
328      if (SourceLineCache+5 < SourceLineCacheEnd) {
329        if (SourceLineCache[5] > QueriedFilePos)
330          SourceLineCacheEnd = SourceLineCache+5;
331        else if (SourceLineCache+10 < SourceLineCacheEnd) {
332          if (SourceLineCache[10] > QueriedFilePos)
333            SourceLineCacheEnd = SourceLineCache+10;
334          else if (SourceLineCache+20 < SourceLineCacheEnd) {
335            if (SourceLineCache[20] > QueriedFilePos)
336              SourceLineCacheEnd = SourceLineCache+20;
337          }
338        }
339      }
340    } else {
341      SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
342    }
343  }
344
345  // If the spread is large, do a "radix" test as our initial guess, based on
346  // the assumption that lines average to approximately the same length.
347  // NOTE: This is currently disabled, as it does not appear to be profitable in
348  // initial measurements.
349  if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
350    unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
351
352    // Take a stab at guessing where it is.
353    unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
354
355    // Check for -10 and +10 lines.
356    unsigned LowerBound = std::max(int(ApproxPos-10), 0);
357    unsigned UpperBound = std::min(ApproxPos+10, FileLen);
358
359    // If the computed lower bound is less than the query location, move it in.
360    if (SourceLineCache < SourceLineCacheStart+LowerBound &&
361        SourceLineCacheStart[LowerBound] < QueriedFilePos)
362      SourceLineCache = SourceLineCacheStart+LowerBound;
363
364    // If the computed upper bound is greater than the query location, move it.
365    if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
366        SourceLineCacheStart[UpperBound] >= QueriedFilePos)
367      SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
368  }
369
370  unsigned *Pos
371    = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
372  unsigned LineNo = Pos-SourceLineCacheStart;
373
374  LastLineNoFileIDQuery = LocInfo.first;
375  LastLineNoContentCache = Content;
376  LastLineNoFilePos = QueriedFilePos;
377  LastLineNoResult = LineNo;
378  return LineNo;
379}
380
381/// PrintStats - Print statistics to stderr.
382///
383void SourceManager::PrintStats() const {
384  llvm::cerr << "\n*** Source Manager Stats:\n";
385  llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
386             << " mem buffers mapped, " << FileIDs.size()
387             << " file ID's allocated.\n";
388  llvm::cerr << "  " << FileIDs.size() << " normal buffer FileID's, "
389             << MacroIDs.size() << " macro expansion FileID's.\n";
390
391  unsigned NumLineNumsComputed = 0;
392  unsigned NumFileBytesMapped = 0;
393  for (std::set<ContentCache>::const_iterator I =
394       FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
395    NumLineNumsComputed += I->SourceLineCache != 0;
396    NumFileBytesMapped  += I->getSizeBytesMapped();
397  }
398
399  llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
400             << NumLineNumsComputed << " files with line #'s computed.\n";
401}
402
403//===----------------------------------------------------------------------===//
404// Serialization.
405//===----------------------------------------------------------------------===//
406
407void ContentCache::Emit(llvm::Serializer& S) const {
408  S.FlushRecord();
409  S.EmitPtr(this);
410
411  if (Entry) {
412    llvm::sys::Path Fname(Buffer->getBufferIdentifier());
413
414    if (Fname.isAbsolute())
415      S.EmitCStr(Fname.c_str());
416    else {
417      // Create an absolute path.
418      // FIXME: This will potentially contain ".." and "." in the path.
419      llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
420      path.appendComponent(Fname.c_str());
421      S.EmitCStr(path.c_str());
422    }
423  }
424  else {
425    const char* p = Buffer->getBufferStart();
426    const char* e = Buffer->getBufferEnd();
427
428    S.EmitInt(e-p);
429
430    for ( ; p != e; ++p)
431      S.EmitInt(*p);
432  }
433
434  S.FlushRecord();
435}
436
437void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
438                                       SourceManager& SMgr,
439                                       FileManager* FMgr,
440                                       std::vector<char>& Buf) {
441  if (FMgr) {
442    llvm::SerializedPtrID PtrID = D.ReadPtrID();
443    D.ReadCStr(Buf,false);
444
445    // Create/fetch the FileEntry.
446    const char* start = &Buf[0];
447    const FileEntry* E = FMgr->getFile(start,start+Buf.size());
448
449    // FIXME: Ideally we want a lazy materialization of the ContentCache
450    //  anyway, because we don't want to read in source files unless this
451    //  is absolutely needed.
452    if (!E)
453      D.RegisterPtr(PtrID,NULL);
454    else
455      // Get the ContextCache object and register it with the deserializer.
456      D.RegisterPtr(PtrID,SMgr.getContentCache(E));
457  }
458  else {
459    // Register the ContextCache object with the deserializer.
460    SMgr.MemBufferInfos.push_back(ContentCache());
461    ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
462    D.RegisterPtr(&Entry);
463
464    // Create the buffer.
465    unsigned Size = D.ReadInt();
466    Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
467
468    // Read the contents of the buffer.
469    char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
470    for (unsigned i = 0; i < Size ; ++i)
471      p[i] = D.ReadInt();
472  }
473}
474
475void FileIDInfo::Emit(llvm::Serializer& S) const {
476  S.Emit(IncludeLoc);
477  S.EmitInt(ChunkNo);
478  S.EmitPtr(Content);
479}
480
481FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) {
482  FileIDInfo I;
483  I.IncludeLoc = SourceLocation::ReadVal(D);
484  I.ChunkNo = D.ReadInt();
485  D.ReadPtr(I.Content,false);
486  return I;
487}
488
489void MacroIDInfo::Emit(llvm::Serializer& S) const {
490  S.Emit(InstantiationLoc);
491  S.Emit(SpellingLoc);
492}
493
494MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) {
495  MacroIDInfo I;
496  I.InstantiationLoc = SourceLocation::ReadVal(D);
497  I.SpellingLoc = SourceLocation::ReadVal(D);
498  return I;
499}
500
501void SourceManager::Emit(llvm::Serializer& S) const {
502  S.EnterBlock();
503  S.EmitPtr(this);
504  S.EmitInt(MainFileID.getOpaqueValue());
505
506  // Emit: FileInfos.  Just emit the file name.
507  S.EnterBlock();
508
509  std::for_each(FileInfos.begin(),FileInfos.end(),
510                S.MakeEmitter<ContentCache>());
511
512  S.ExitBlock();
513
514  // Emit: MemBufferInfos
515  S.EnterBlock();
516
517  std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
518                S.MakeEmitter<ContentCache>());
519
520  S.ExitBlock();
521
522  // Emit: FileIDs
523  S.EmitInt(FileIDs.size());
524  std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>());
525
526  // Emit: MacroIDs
527  S.EmitInt(MacroIDs.size());
528  std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>());
529
530  S.ExitBlock();
531}
532
533SourceManager*
534SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){
535  SourceManager *M = new SourceManager();
536  D.RegisterPtr(M);
537
538  // Read: the FileID of the main source file of the translation unit.
539  M->MainFileID = FileID::Create(D.ReadInt());
540
541  std::vector<char> Buf;
542
543  { // Read: FileInfos.
544    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
545    while (!D.FinishedBlock(BLoc))
546    ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
547  }
548
549  { // Read: MemBufferInfos.
550    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
551    while (!D.FinishedBlock(BLoc))
552    ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
553  }
554
555  // Read: FileIDs.
556  unsigned Size = D.ReadInt();
557  M->FileIDs.reserve(Size);
558  for (; Size > 0 ; --Size)
559    M->FileIDs.push_back(FileIDInfo::ReadVal(D));
560
561  // Read: MacroIDs.
562  Size = D.ReadInt();
563  M->MacroIDs.reserve(Size);
564  for (; Size > 0 ; --Size)
565    M->MacroIDs.push_back(MacroIDInfo::ReadVal(D));
566
567  return M;
568}
569