SourceManager.cpp revision c16c208e8519476d838ad11fffc8e0ecea50550d
1//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/MemoryBuffer.h"
18#include "llvm/System/Path.h"
19#include "llvm/Bitcode/Serialize.h"
20#include "llvm/Bitcode/Deserialize.h"
21#include "llvm/Support/Streams.h"
22#include <algorithm>
23using namespace clang;
24using namespace SrcMgr;
25using llvm::MemoryBuffer;
26
27ContentCache::~ContentCache() {
28  delete Buffer;
29  delete [] SourceLineCache;
30}
31
32/// getSizeBytesMapped - Returns the number of bytes actually mapped for
33///  this ContentCache.  This can be 0 if the MemBuffer was not actually
34///  instantiated.
35unsigned ContentCache::getSizeBytesMapped() const {
36  return Buffer ? Buffer->getBufferSize() : 0;
37}
38
39/// getSize - Returns the size of the content encapsulated by this ContentCache.
40///  This can be the size of the source file or the size of an arbitrary
41///  scratch buffer.  If the ContentCache encapsulates a source file, that
42///  file is not lazily brought in from disk to satisfy this query.
43unsigned ContentCache::getSize() const {
44  return Entry ? Entry->getSize() : Buffer->getBufferSize();
45}
46
47const llvm::MemoryBuffer* ContentCache::getBuffer() const {
48  return Buffer;
49}
50
51
52/// getFileInfo - Create or return a cached FileInfo for the specified file.
53///
54const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) {
55
56  assert(FileEnt && "Didn't specify a file entry to use?");
57  // Do we already have information about this file?
58  std::set<ContentCache>::iterator I =
59    FileInfos.lower_bound(ContentCache(FileEnt));
60
61  if (I != FileInfos.end() && I->Entry == FileEnt)
62    return &*I;
63
64  // Nope, get information.
65  const MemoryBuffer *File =
66    MemoryBuffer::getFile(FileEnt->getName(), 0, FileEnt->getSize());
67  if (File == 0)
68    return 0;
69
70  ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt));
71
72  // FIXME: Shortly the above logic that creates a MemBuffer will be moved
73  // to ContentCache::getBuffer().  This way it can be done lazily.
74  Entry.setBuffer(File);
75  Entry.SourceLineCache = 0;
76  Entry.NumLines = 0;
77  return &Entry;
78}
79
80
81/// createMemBufferContentCache - Create a new ContentCache for the specified
82///  memory buffer.  This does no caching.
83const ContentCache*
84SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
85  // Add a new ContentCache to the MemBufferInfos list and return it.  We
86  // must default construct the object first that the instance actually
87  // stored within MemBufferInfos actually owns the Buffer, and not any
88  // temporary we would use in the call to "push_back".
89  MemBufferInfos.push_back(ContentCache());
90  ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back());
91  Entry.setBuffer(Buffer);
92  return &Entry;
93}
94
95
96/// createFileID - Create a new fileID for the specified ContentCache and
97/// include position.  This works regardless of whether the ContentCache
98/// corresponds to a file or some other input source.
99unsigned SourceManager::createFileID(const ContentCache *File,
100                                     SourceLocation IncludePos,
101                                     SrcMgr::CharacteristicKind FileCharacter) {
102  // If FileEnt is really large (e.g. it's a large .i file), we may not be able
103  // to fit an arbitrary position in the file in the FilePos field.  To handle
104  // this, we create one FileID for each chunk of the file that fits in a
105  // FilePos field.
106  unsigned FileSize = File->getSize();
107  if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
108    FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File, FileCharacter));
109    assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
110           "Ran out of file ID's!");
111    return FileIDs.size();
112  }
113
114  // Create one FileID for each chunk of the file.
115  unsigned Result = FileIDs.size()+1;
116
117  unsigned ChunkNo = 0;
118  while (1) {
119    FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File,
120                                      FileCharacter));
121
122    if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
123    FileSize -= (1 << SourceLocation::FilePosBits);
124  }
125
126  assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
127         "Ran out of file ID's!");
128  return Result;
129}
130
131/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
132/// that a token from physloc PhysLoc should actually be referenced from
133/// InstantiationLoc.
134SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
135                                                  SourceLocation InstantLoc) {
136  // The specified source location may be a mapped location, due to a macro
137  // instantiation or #line directive.  Strip off this information to find out
138  // where the characters are actually located.
139  PhysLoc = getPhysicalLoc(PhysLoc);
140
141  // Resolve InstantLoc down to a real logical location.
142  InstantLoc = getLogicalLoc(InstantLoc);
143
144
145  // If the last macro id is close to the currently requested location, try to
146  // reuse it.  This implements a small cache.
147  for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
148    MacroIDInfo &LastOne = MacroIDs[i];
149
150    // The instanitation point and source physloc have to exactly match to reuse
151    // (for now).  We could allow "nearby" instantiations in the future.
152    if (LastOne.getVirtualLoc() != InstantLoc ||
153        LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID())
154      continue;
155
156    // Check to see if the physloc of the token came from near enough to reuse.
157    int PhysDelta = PhysLoc.getRawFilePos() -
158                    LastOne.getPhysicalLoc().getRawFilePos();
159    if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
160      return SourceLocation::getMacroLoc(i, PhysDelta);
161  }
162
163
164  MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
165  return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0);
166}
167
168/// getBufferData - Return a pointer to the start and end of the character
169/// data for the specified FileID.
170std::pair<const char*, const char*>
171SourceManager::getBufferData(unsigned FileID) const {
172  const llvm::MemoryBuffer *Buf = getBuffer(FileID);
173  return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
174}
175
176
177/// getCharacterData - Return a pointer to the start of the specified location
178/// in the appropriate MemoryBuffer.
179const char *SourceManager::getCharacterData(SourceLocation SL) const {
180  // Note that this is a hot function in the getSpelling() path, which is
181  // heavily used by -E mode.
182  SL = getPhysicalLoc(SL);
183
184  // Note that calling 'getBuffer()' may lazily page in a source file.
185  return getContentCache(SL.getFileID())->getBuffer()->getBufferStart() +
186         getFullFilePos(SL);
187}
188
189
190/// getColumnNumber - Return the column # for the specified file position.
191/// this is significantly cheaper to compute than the line number.  This returns
192/// zero if the column number isn't known.
193unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
194  unsigned FileID = Loc.getFileID();
195  if (FileID == 0) return 0;
196
197  unsigned FilePos = getFullFilePos(Loc);
198  const MemoryBuffer *Buffer = getBuffer(FileID);
199  const char *Buf = Buffer->getBufferStart();
200
201  unsigned LineStart = FilePos;
202  while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
203    --LineStart;
204  return FilePos-LineStart+1;
205}
206
207/// getSourceName - This method returns the name of the file or buffer that
208/// the SourceLocation specifies.  This can be modified with #line directives,
209/// etc.
210const char *SourceManager::getSourceName(SourceLocation Loc) const {
211  unsigned FileID = Loc.getFileID();
212  if (FileID == 0) return "";
213
214  // To get the source name, first consult the FileEntry (if one exists) before
215  // the MemBuffer as this will avoid unnecessarily paging in the MemBuffer.
216  const SrcMgr::ContentCache* C = getContentCache(FileID);
217  return C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
218}
219
220static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE;
221static void ComputeLineNumbers(ContentCache* FI) {
222  // Note that calling 'getBuffer()' may lazily page in the file.
223  const MemoryBuffer *Buffer = FI->getBuffer();
224
225  // Find the file offsets of all of the *physical* source lines.  This does
226  // not look at trigraphs, escaped newlines, or anything else tricky.
227  std::vector<unsigned> LineOffsets;
228
229  // Line #1 starts at char 0.
230  LineOffsets.push_back(0);
231
232  const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
233  const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
234  unsigned Offs = 0;
235  while (1) {
236    // Skip over the contents of the line.
237    // TODO: Vectorize this?  This is very performance sensitive for programs
238    // with lots of diagnostics and in -E mode.
239    const unsigned char *NextBuf = (const unsigned char *)Buf;
240    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
241      ++NextBuf;
242    Offs += NextBuf-Buf;
243    Buf = NextBuf;
244
245    if (Buf[0] == '\n' || Buf[0] == '\r') {
246      // If this is \n\r or \r\n, skip both characters.
247      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
248        ++Offs, ++Buf;
249      ++Offs, ++Buf;
250      LineOffsets.push_back(Offs);
251    } else {
252      // Otherwise, this is a null.  If end of file, exit.
253      if (Buf == End) break;
254      // Otherwise, skip the null.
255      ++Offs, ++Buf;
256    }
257  }
258
259  // Copy the offsets into the FileInfo structure.
260  FI->NumLines = LineOffsets.size();
261  FI->SourceLineCache = new unsigned[LineOffsets.size()];
262  std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
263}
264
265/// getLineNumber - Given a SourceLocation, return the physical line number
266/// for the position indicated.  This requires building and caching a table of
267/// line offsets for the MemoryBuffer, so this is not cheap: use only when
268/// about to emit a diagnostic.
269unsigned SourceManager::getLineNumber(SourceLocation Loc) const {
270  unsigned FileID = Loc.getFileID();
271  if (FileID == 0) return 0;
272
273  ContentCache* Content;
274
275  if (LastLineNoFileIDQuery == FileID)
276    Content = LastLineNoContentCache;
277  else
278    Content = const_cast<ContentCache*>(getContentCache(FileID));
279
280  // If this is the first use of line information for this buffer, compute the
281  /// SourceLineCache for it on demand.
282  if (Content->SourceLineCache == 0)
283    ComputeLineNumbers(Content);
284
285  // Okay, we know we have a line number table.  Do a binary search to find the
286  // line number that this character position lands on.
287  unsigned *SourceLineCache = Content->SourceLineCache;
288  unsigned *SourceLineCacheStart = SourceLineCache;
289  unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
290
291  unsigned QueriedFilePos = getFullFilePos(Loc)+1;
292
293  // If the previous query was to the same file, we know both the file pos from
294  // that query and the line number returned.  This allows us to narrow the
295  // search space from the entire file to something near the match.
296  if (LastLineNoFileIDQuery == FileID) {
297    if (QueriedFilePos >= LastLineNoFilePos) {
298      SourceLineCache = SourceLineCache+LastLineNoResult-1;
299
300      // The query is likely to be nearby the previous one.  Here we check to
301      // see if it is within 5, 10 or 20 lines.  It can be far away in cases
302      // where big comment blocks and vertical whitespace eat up lines but
303      // contribute no tokens.
304      if (SourceLineCache+5 < SourceLineCacheEnd) {
305        if (SourceLineCache[5] > QueriedFilePos)
306          SourceLineCacheEnd = SourceLineCache+5;
307        else if (SourceLineCache+10 < SourceLineCacheEnd) {
308          if (SourceLineCache[10] > QueriedFilePos)
309            SourceLineCacheEnd = SourceLineCache+10;
310          else if (SourceLineCache+20 < SourceLineCacheEnd) {
311            if (SourceLineCache[20] > QueriedFilePos)
312              SourceLineCacheEnd = SourceLineCache+20;
313          }
314        }
315      }
316    } else {
317      SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
318    }
319  }
320
321  // If the spread is large, do a "radix" test as our initial guess, based on
322  // the assumption that lines average to approximately the same length.
323  // NOTE: This is currently disabled, as it does not appear to be profitable in
324  // initial measurements.
325  if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
326    unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
327
328    // Take a stab at guessing where it is.
329    unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
330
331    // Check for -10 and +10 lines.
332    unsigned LowerBound = std::max(int(ApproxPos-10), 0);
333    unsigned UpperBound = std::min(ApproxPos+10, FileLen);
334
335    // If the computed lower bound is less than the query location, move it in.
336    if (SourceLineCache < SourceLineCacheStart+LowerBound &&
337        SourceLineCacheStart[LowerBound] < QueriedFilePos)
338      SourceLineCache = SourceLineCacheStart+LowerBound;
339
340    // If the computed upper bound is greater than the query location, move it.
341    if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
342        SourceLineCacheStart[UpperBound] >= QueriedFilePos)
343      SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
344  }
345
346  unsigned *Pos
347    = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
348  unsigned LineNo = Pos-SourceLineCacheStart;
349
350  LastLineNoFileIDQuery = FileID;
351  LastLineNoContentCache = Content;
352  LastLineNoFilePos = QueriedFilePos;
353  LastLineNoResult = LineNo;
354  return LineNo;
355}
356
357/// PrintStats - Print statistics to stderr.
358///
359void SourceManager::PrintStats() const {
360  llvm::cerr << "\n*** Source Manager Stats:\n";
361  llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
362             << " mem buffers mapped, " << FileIDs.size()
363             << " file ID's allocated.\n";
364  llvm::cerr << "  " << FileIDs.size() << " normal buffer FileID's, "
365             << MacroIDs.size() << " macro expansion FileID's.\n";
366
367  unsigned NumLineNumsComputed = 0;
368  unsigned NumFileBytesMapped = 0;
369  for (std::set<ContentCache>::const_iterator I =
370       FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
371    NumLineNumsComputed += I->SourceLineCache != 0;
372    NumFileBytesMapped  += I->getSizeBytesMapped();
373  }
374
375  llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
376             << NumLineNumsComputed << " files with line #'s computed.\n";
377}
378
379//===----------------------------------------------------------------------===//
380// Serialization.
381//===----------------------------------------------------------------------===//
382
383void ContentCache::Emit(llvm::Serializer& S) const {
384  S.FlushRecord();
385  S.EmitPtr(this);
386
387  if (Entry) {
388    llvm::sys::Path Fname(Buffer->getBufferIdentifier());
389
390    if (Fname.isAbsolute())
391      S.EmitCStr(Fname.c_str());
392    else {
393      // Create an absolute path.
394      // FIXME: This will potentially contain ".." and "." in the path.
395      llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
396      path.appendComponent(Fname.c_str());
397      S.EmitCStr(path.c_str());
398    }
399  }
400  else {
401    const char* p = Buffer->getBufferStart();
402    const char* e = Buffer->getBufferEnd();
403
404    S.EmitInt(e-p);
405
406    for ( ; p != e; ++p)
407      S.EmitInt(*p);
408  }
409
410  S.FlushRecord();
411}
412
413void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
414                                       SourceManager& SMgr,
415                                       FileManager* FMgr,
416                                       std::vector<char>& Buf) {
417  if (FMgr) {
418    llvm::SerializedPtrID PtrID = D.ReadPtrID();
419    D.ReadCStr(Buf,false);
420
421    // Create/fetch the FileEntry.
422    const char* start = &Buf[0];
423    const FileEntry* E = FMgr->getFile(start,start+Buf.size());
424
425    // FIXME: Ideally we want a lazy materialization of the ContentCache
426    //  anyway, because we don't want to read in source files unless this
427    //  is absolutely needed.
428    if (!E)
429      D.RegisterPtr(PtrID,NULL);
430    else
431      // Get the ContextCache object and register it with the deserializer.
432      D.RegisterPtr(PtrID,SMgr.getContentCache(E));
433  }
434  else {
435    // Register the ContextCache object with the deserializer.
436    SMgr.MemBufferInfos.push_back(ContentCache());
437    ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
438    D.RegisterPtr(&Entry);
439
440    // Create the buffer.
441    unsigned Size = D.ReadInt();
442    Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
443
444    // Read the contents of the buffer.
445    char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
446    for (unsigned i = 0; i < Size ; ++i)
447      p[i] = D.ReadInt();
448  }
449}
450
451void FileIDInfo::Emit(llvm::Serializer& S) const {
452  S.Emit(IncludeLoc);
453  S.EmitInt(ChunkNo);
454  S.EmitPtr(Content);
455}
456
457FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) {
458  FileIDInfo I;
459  I.IncludeLoc = SourceLocation::ReadVal(D);
460  I.ChunkNo = D.ReadInt();
461  D.ReadPtr(I.Content,false);
462  return I;
463}
464
465void MacroIDInfo::Emit(llvm::Serializer& S) const {
466  S.Emit(VirtualLoc);
467  S.Emit(PhysicalLoc);
468}
469
470MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) {
471  MacroIDInfo I;
472  I.VirtualLoc = SourceLocation::ReadVal(D);
473  I.PhysicalLoc = SourceLocation::ReadVal(D);
474  return I;
475}
476
477void SourceManager::Emit(llvm::Serializer& S) const {
478  S.EnterBlock();
479  S.EmitPtr(this);
480  S.EmitInt(MainFileID);
481
482  // Emit: FileInfos.  Just emit the file name.
483  S.EnterBlock();
484
485  std::for_each(FileInfos.begin(),FileInfos.end(),
486                S.MakeEmitter<ContentCache>());
487
488  S.ExitBlock();
489
490  // Emit: MemBufferInfos
491  S.EnterBlock();
492
493  std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
494                S.MakeEmitter<ContentCache>());
495
496  S.ExitBlock();
497
498  // Emit: FileIDs
499  S.EmitInt(FileIDs.size());
500  std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>());
501
502  // Emit: MacroIDs
503  S.EmitInt(MacroIDs.size());
504  std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>());
505
506  S.ExitBlock();
507}
508
509SourceManager*
510SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){
511  SourceManager *M = new SourceManager();
512  D.RegisterPtr(M);
513
514  // Read: the FileID of the main source file of the translation unit.
515  M->MainFileID = D.ReadInt();
516
517  std::vector<char> Buf;
518
519  { // Read: FileInfos.
520    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
521    while (!D.FinishedBlock(BLoc))
522    ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
523  }
524
525  { // Read: MemBufferInfos.
526    llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
527    while (!D.FinishedBlock(BLoc))
528    ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
529  }
530
531  // Read: FileIDs.
532  unsigned Size = D.ReadInt();
533  M->FileIDs.reserve(Size);
534  for (; Size > 0 ; --Size)
535    M->FileIDs.push_back(FileIDInfo::ReadVal(D));
536
537  // Read: MacroIDs.
538  Size = D.ReadInt();
539  M->MacroIDs.reserve(Size);
540  for (; Size > 0 ; --Size)
541    M->MacroIDs.push_back(MacroIDInfo::ReadVal(D));
542
543  return M;
544}
545