SourceManager.cpp revision 88054dee0402e4d3c1f64e6b697acc47195c0d72
1//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/MemoryBuffer.h"
18#include "llvm/System/Path.h"
19#include "llvm/Bitcode/Serialize.h"
20#include "llvm/Bitcode/Deserialize.h"
21#include "llvm/Support/Streams.h"
22#include <algorithm>
23using namespace clang;
24using namespace SrcMgr;
25using llvm::MemoryBuffer;
26
27// This (temporary) directive toggles between lazy and eager creation of
28// MemBuffers.  This directive is not permanent, and is here to test a few
29// potential optimizations in PTH.  Once it is clear whether eager or lazy
30// creation of MemBuffers is better this directive will get removed.
31#define LAZY
32
33ContentCache::~ContentCache() {
34  delete Buffer;
35  delete [] SourceLineCache;
36}
37
38/// getSizeBytesMapped - Returns the number of bytes actually mapped for
39///  this ContentCache.  This can be 0 if the MemBuffer was not actually
40///  instantiated.
41unsigned ContentCache::getSizeBytesMapped() const {
42  return Buffer ? Buffer->getBufferSize() : 0;
43}
44
45/// getSize - Returns the size of the content encapsulated by this ContentCache.
46///  This can be the size of the source file or the size of an arbitrary
47///  scratch buffer.  If the ContentCache encapsulates a source file, that
48///  file is not lazily brought in from disk to satisfy this query.
49unsigned ContentCache::getSize() const {
50  return Entry ? Entry->getSize() : Buffer->getBufferSize();
51}
52
53const llvm::MemoryBuffer* ContentCache::getBuffer() const {
54#ifdef LAZY
55  // Lazily create the Buffer for ContentCaches that wrap files.
56  if (!Buffer && Entry) {
57    // FIXME: Should we support a way to not have to do this check over
58    //   and over if we cannot open the file?
59    // FIXME: This const_cast is ugly.  Should we make getBuffer() non-const?
60    const_cast<ContentCache*>(this)->Buffer =
61      MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize());
62  }
63#endif
64  return Buffer;
65}
66
67
68/// getFileInfo - Create or return a cached FileInfo for the specified file.
69///
70const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) {
71
72  assert(FileEnt && "Didn't specify a file entry to use?");
73  // Do we already have information about this file?
74  std::set<ContentCache>::iterator I =
75    FileInfos.lower_bound(ContentCache(FileEnt));
76
77  if (I != FileInfos.end() && I->Entry == FileEnt)
78    return &*I;
79
80  // Nope, get information.
81#ifndef LAZY
82  const MemoryBuffer *File =
83    MemoryBuffer::getFile(FileEnt->getName(), 0, FileEnt->getSize());
84  if (File == 0)
85    return 0;
86#endif
87
88  ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt));
89#ifndef LAZY
90  Entry.setBuffer(File);
91#endif
92  Entry.SourceLineCache = 0;
93  Entry.NumLines = 0;
94  return &Entry;
95}
96
97
98/// createMemBufferContentCache - Create a new ContentCache for the specified
99///  memory buffer.  This does no caching.
100const ContentCache*
101SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
102  // Add a new ContentCache to the MemBufferInfos list and return it.  We
103  // must default construct the object first that the instance actually
104  // stored within MemBufferInfos actually owns the Buffer, and not any
105  // temporary we would use in the call to "push_back".
106  MemBufferInfos.push_back(ContentCache());
107  ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back());
108  Entry.setBuffer(Buffer);
109  return &Entry;
110}
111
112
113/// createFileID - Create a new fileID for the specified ContentCache and
114/// include position.  This works regardless of whether the ContentCache
115/// corresponds to a file or some other input source.
116unsigned SourceManager::createFileID(const ContentCache *File,
117                                     SourceLocation IncludePos,
118                                     SrcMgr::CharacteristicKind FileCharacter) {
119  // If FileEnt is really large (e.g. it's a large .i file), we may not be able
120  // to fit an arbitrary position in the file in the FilePos field.  To handle
121  // this, we create one FileID for each chunk of the file that fits in a
122  // FilePos field.
123  unsigned FileSize = File->getSize();
124  if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
125    FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File, FileCharacter));
126    assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
127           "Ran out of file ID's!");
128    return FileIDs.size();
129  }
130
131  // Create one FileID for each chunk of the file.
132  unsigned Result = FileIDs.size()+1;
133
134  unsigned ChunkNo = 0;
135  while (1) {
136    FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File,
137                                      FileCharacter));
138
139    if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
140    FileSize -= (1 << SourceLocation::FilePosBits);
141  }
142
143  assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
144         "Ran out of file ID's!");
145  return Result;
146}
147
148/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
149/// that a token from SpellingLoc should actually be referenced from
150/// InstantiationLoc.
151SourceLocation SourceManager::getInstantiationLoc(SourceLocation SpellingLoc,
152                                                  SourceLocation InstantLoc) {
153  // The specified source location may be a mapped location, due to a macro
154  // instantiation or #line directive.  Strip off this information to find out
155  // where the characters are actually located.
156  SpellingLoc = getSpellingLoc(SpellingLoc);
157
158  // Resolve InstantLoc down to a real logical location.
159  InstantLoc = getLogicalLoc(InstantLoc);
160
161
162  // If the last macro id is close to the currently requested location, try to
163  // reuse it.  This implements a small cache.
164  for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
165    MacroIDInfo &LastOne = MacroIDs[i];
166
167    // The instanitation point and source SpellingLoc have to exactly match to
168    // reuse (for now).  We could allow "nearby" instantiations in the future.
169    if (LastOne.getInstantiationLoc() != InstantLoc ||
170        LastOne.getSpellingLoc().getFileID() != SpellingLoc.getFileID())
171      continue;
172
173    // Check to see if the spellloc of the token came from near enough to reuse.
174    int SpellDelta = SpellingLoc.getRawFilePos() -
175                     LastOne.getSpellingLoc().getRawFilePos();
176    if (SourceLocation::isValidMacroSpellingOffs(SpellDelta))
177      return SourceLocation::getMacroLoc(i, SpellDelta);
178  }
179
180
181  MacroIDs.push_back(MacroIDInfo::get(InstantLoc, SpellingLoc));
182  return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0);
183}
184
185/// getBufferData - Return a pointer to the start and end of the character
186/// data for the specified FileID.
187std::pair<const char*, const char*>
188SourceManager::getBufferData(unsigned FileID) const {
189  const llvm::MemoryBuffer *Buf = getBuffer(FileID);
190  return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
191}
192
193
194/// getCharacterData - Return a pointer to the start of the specified location
195/// in the appropriate MemoryBuffer.
196const char *SourceManager::getCharacterData(SourceLocation SL) const {
197  // Note that this is a hot function in the getSpelling() path, which is
198  // heavily used by -E mode.
199  SL = getSpellingLoc(SL);
200
201  // Note that calling 'getBuffer()' may lazily page in a source file.
202  return getContentCache(SL.getFileID())->getBuffer()->getBufferStart() +
203         getFullFilePos(SL);
204}
205
206
207/// getColumnNumber - Return the column # for the specified file position.
208/// this is significantly cheaper to compute than the line number.  This returns
209/// zero if the column number isn't known.
210unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
211  unsigned FileID = Loc.getFileID();
212  if (FileID == 0) return 0;
213
214  unsigned FilePos = getFullFilePos(Loc);
215  const MemoryBuffer *Buffer = getBuffer(FileID);
216  const char *Buf = Buffer->getBufferStart();
217
218  unsigned LineStart = FilePos;
219  while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
220    --LineStart;
221  return FilePos-LineStart+1;
222}
223
224/// getSourceName - This method returns the name of the file or buffer that
225/// the SourceLocation specifies.  This can be modified with #line directives,
226/// etc.
227const char *SourceManager::getSourceName(SourceLocation Loc) const {
228  unsigned FileID = Loc.getFileID();
229  if (FileID == 0) return "";
230
231  // To get the source name, first consult the FileEntry (if one exists) before
232  // the MemBuffer as this will avoid unnecessarily paging in the MemBuffer.
233  const SrcMgr::ContentCache* C = getContentCache(FileID);
234  return C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
235}
236
237static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE;
238static void ComputeLineNumbers(ContentCache* FI) {
239  // Note that calling 'getBuffer()' may lazily page in the file.
240  const MemoryBuffer *Buffer = FI->getBuffer();
241
242  // Find the file offsets of all of the *physical* source lines.  This does
243  // not look at trigraphs, escaped newlines, or anything else tricky.
244  std::vector<unsigned> LineOffsets;
245
246  // Line #1 starts at char 0.
247  LineOffsets.push_back(0);
248
249  const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
250  const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
251  unsigned Offs = 0;
252  while (1) {
253    // Skip over the contents of the line.
254    // TODO: Vectorize this?  This is very performance sensitive for programs
255    // with lots of diagnostics and in -E mode.
256    const unsigned char *NextBuf = (const unsigned char *)Buf;
257    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
258      ++NextBuf;
259    Offs += NextBuf-Buf;
260    Buf = NextBuf;
261
262    if (Buf[0] == '\n' || Buf[0] == '\r') {
263      // If this is \n\r or \r\n, skip both characters.
264      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
265        ++Offs, ++Buf;
266      ++Offs, ++Buf;
267      LineOffsets.push_back(Offs);
268    } else {
269      // Otherwise, this is a null.  If end of file, exit.
270      if (Buf == End) break;
271      // Otherwise, skip the null.
272      ++Offs, ++Buf;
273    }
274  }
275
276  // Copy the offsets into the FileInfo structure.
277  FI->NumLines = LineOffsets.size();
278  FI->SourceLineCache = new unsigned[LineOffsets.size()];
279  std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
280}
281
282/// getLineNumber - Given a SourceLocation, return the spelling line number
283/// for the position indicated.  This requires building and caching a table of
284/// line offsets for the MemoryBuffer, so this is not cheap: use only when
285/// about to emit a diagnostic.
286unsigned SourceManager::getLineNumber(SourceLocation Loc) const {
287  unsigned FileID = Loc.getFileID();
288  if (FileID == 0) return 0;
289
290  ContentCache* Content;
291
292  if (LastLineNoFileIDQuery == FileID)
293    Content = LastLineNoContentCache;
294  else
295    Content = const_cast<ContentCache*>(getContentCache(FileID));
296
297  // If this is the first use of line information for this buffer, compute the
298  /// SourceLineCache for it on demand.
299  if (Content->SourceLineCache == 0)
300    ComputeLineNumbers(Content);
301
302  // Okay, we know we have a line number table.  Do a binary search to find the
303  // line number that this character position lands on.
304  unsigned *SourceLineCache = Content->SourceLineCache;
305  unsigned *SourceLineCacheStart = SourceLineCache;
306  unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
307
308  unsigned QueriedFilePos = getFullFilePos(Loc)+1;
309
310  // If the previous query was to the same file, we know both the file pos from
311  // that query and the line number returned.  This allows us to narrow the
312  // search space from the entire file to something near the match.
313  if (LastLineNoFileIDQuery == FileID) {
314    if (QueriedFilePos >= LastLineNoFilePos) {
315      SourceLineCache = SourceLineCache+LastLineNoResult-1;
316
317      // The query is likely to be nearby the previous one.  Here we check to
318      // see if it is within 5, 10 or 20 lines.  It can be far away in cases
319      // where big comment blocks and vertical whitespace eat up lines but
320      // contribute no tokens.
321      if (SourceLineCache+5 < SourceLineCacheEnd) {
322        if (SourceLineCache[5] > QueriedFilePos)
323          SourceLineCacheEnd = SourceLineCache+5;
324        else if (SourceLineCache+10 < SourceLineCacheEnd) {
325          if (SourceLineCache[10] > QueriedFilePos)
326            SourceLineCacheEnd = SourceLineCache+10;
327          else if (SourceLineCache+20 < SourceLineCacheEnd) {
328            if (SourceLineCache[20] > QueriedFilePos)
329              SourceLineCacheEnd = SourceLineCache+20;
330          }
331        }
332      }
333    } else {
334      SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
335    }
336  }
337
338  // If the spread is large, do a "radix" test as our initial guess, based on
339  // the assumption that lines average to approximately the same length.
340  // NOTE: This is currently disabled, as it does not appear to be profitable in
341  // initial measurements.
342  if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
343    unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
344
345    // Take a stab at guessing where it is.
346    unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
347
348    // Check for -10 and +10 lines.
349    unsigned LowerBound = std::max(int(ApproxPos-10), 0);
350    unsigned UpperBound = std::min(ApproxPos+10, FileLen);
351
352    // If the computed lower bound is less than the query location, move it in.
353    if (SourceLineCache < SourceLineCacheStart+LowerBound &&
354        SourceLineCacheStart[LowerBound] < QueriedFilePos)
355      SourceLineCache = SourceLineCacheStart+LowerBound;
356
357    // If the computed upper bound is greater than the query location, move it.
358    if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
359        SourceLineCacheStart[UpperBound] >= QueriedFilePos)
360      SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
361  }
362
363  unsigned *Pos
364    = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
365  unsigned LineNo = Pos-SourceLineCacheStart;
366
367  LastLineNoFileIDQuery = FileID;
368  LastLineNoContentCache = Content;
369  LastLineNoFilePos = QueriedFilePos;
370  LastLineNoResult = LineNo;
371  return LineNo;
372}
373
374/// PrintStats - Print statistics to stderr.
375///
376void SourceManager::PrintStats() const {
377  llvm::cerr << "\n*** Source Manager Stats:\n";
378  llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
379             << " mem buffers mapped, " << FileIDs.size()
380             << " file ID's allocated.\n";
381  llvm::cerr << "  " << FileIDs.size() << " normal buffer FileID's, "
382             << MacroIDs.size() << " macro expansion FileID's.\n";
383
384  unsigned NumLineNumsComputed = 0;
385  unsigned NumFileBytesMapped = 0;
386  for (std::set<ContentCache>::const_iterator I =
387       FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
388    NumLineNumsComputed += I->SourceLineCache != 0;
389    NumFileBytesMapped  += I->getSizeBytesMapped();
390  }
391
392  llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
393             << NumLineNumsComputed << " files with line #'s computed.\n";
394}
395
396//===----------------------------------------------------------------------===//
397// Serialization.
398//===----------------------------------------------------------------------===//
399
400void ContentCache::Emit(llvm::Serializer& S) const {
401  S.FlushRecord();
402  S.EmitPtr(this);
403
404  if (Entry) {
405    llvm::sys::Path Fname(Buffer->getBufferIdentifier());
406
407    if (Fname.isAbsolute())
408      S.EmitCStr(Fname.c_str());
409    else {
410      // Create an absolute path.
411      // FIXME: This will potentially contain ".." and "." in the path.
412      llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
413      path.appendComponent(Fname.c_str());
414      S.EmitCStr(path.c_str());
415    }
416  }
417  else {
418    const char* p = Buffer->getBufferStart();
419    const char* e = Buffer->getBufferEnd();
420
421    S.EmitInt(e-p);
422
423    for ( ; p != e; ++p)
424      S.EmitInt(*p);
425  }
426
427  S.FlushRecord();
428}
429
430void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
431                                       SourceManager& SMgr,
432                                       FileManager* FMgr,
433                                       std::vector<char>& Buf) {
434  if (FMgr) {
435    llvm::SerializedPtrID PtrID = D.ReadPtrID();
436    D.ReadCStr(Buf,false);
437
438    // Create/fetch the FileEntry.
439    const char* start = &Buf[0];
440    const FileEntry* E = FMgr->getFile(start,start+Buf.size());
441
442    // FIXME: Ideally we want a lazy materialization of the ContentCache
443    //  anyway, because we don't want to read in source files unless this
444    //  is absolutely needed.
445    if (!E)
446      D.RegisterPtr(PtrID,NULL);
447    else
448      // Get the ContextCache object and register it with the deserializer.
449      D.RegisterPtr(PtrID,SMgr.getContentCache(E));
450  }
451  else {
452    // Register the ContextCache object with the deserializer.
453    SMgr.MemBufferInfos.push_back(ContentCache());
454    ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
455    D.RegisterPtr(&Entry);
456
457    // Create the buffer.
458    unsigned Size = D.ReadInt();
459    Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
460
461    // Read the contents of the buffer.
462    char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
463    for (unsigned i = 0; i < Size ; ++i)
464      p[i] = D.ReadInt();
465  }
466}
467
468void FileIDInfo::Emit(llvm::Serializer& S) const {
469  S.Emit(IncludeLoc);
470  S.EmitInt(ChunkNo);
471  S.EmitPtr(Content);
472}
473
474FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) {
475  FileIDInfo I;
476  I.IncludeLoc = SourceLocation::ReadVal(D);
477  I.ChunkNo = D.ReadInt();
478  D.ReadPtr(I.Content,false);
479  return I;
480}
481
482void MacroIDInfo::Emit(llvm::Serializer& S) const {
483  S.Emit(InstantiationLoc);
484  S.Emit(SpellingLoc);
485}
486
487MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) {
488  MacroIDInfo I;
489  I.InstantiationLoc = SourceLocation::ReadVal(D);
490  I.SpellingLoc = SourceLocation::ReadVal(D);
491  return I;
492}
493
494void SourceManager::Emit(llvm::Serializer& S) const {
495  S.EnterBlock();
496  S.EmitPtr(this);
497  S.EmitInt(MainFileID);
498
499  // Emit: FileInfos.  Just emit the file name.
500  S.EnterBlock();
501
502  std::for_each(FileInfos.begin(),FileInfos.end(),
503                S.MakeEmitter<ContentCache>());
504
505  S.ExitBlock();
506
507  // Emit: MemBufferInfos
508  S.EnterBlock();
509
510  std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
511                S.MakeEmitter<ContentCache>());
512
513  S.ExitBlock();
514
515  // Emit: FileIDs
516  S.EmitInt(FileIDs.size());
517  std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>());
518
519  // Emit: MacroIDs
520  S.EmitInt(MacroIDs.size());
521  std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>());
522
523  S.ExitBlock();
524}
525
526SourceManager*
527SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){
528  SourceManager *M = new SourceManager();
529  D.RegisterPtr(M);
530
531  // Read: the FileID of the main source file of the translation unit.
532  M->MainFileID = D.ReadInt();
533
534  std::vector<char> Buf;
535
536  { // Read: FileInfos.
537    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
538    while (!D.FinishedBlock(BLoc))
539    ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
540  }
541
542  { // Read: MemBufferInfos.
543    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
544    while (!D.FinishedBlock(BLoc))
545    ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
546  }
547
548  // Read: FileIDs.
549  unsigned Size = D.ReadInt();
550  M->FileIDs.reserve(Size);
551  for (; Size > 0 ; --Size)
552    M->FileIDs.push_back(FileIDInfo::ReadVal(D));
553
554  // Read: MacroIDs.
555  Size = D.ReadInt();
556  M->MacroIDs.reserve(Size);
557  for (; Size > 0 ; --Size)
558    M->MacroIDs.push_back(MacroIDInfo::ReadVal(D));
559
560  return M;
561}
562