SourceManager.cpp revision 3963e756e35d61e4a0c973f301a068c9bfd2f346
1//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/SourceManagerInternals.h"
16#include "clang/Basic/FileManager.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/MemoryBuffer.h"
19#include "llvm/Support/raw_ostream.h"
20#include "llvm/System/Path.h"
21#include <algorithm>
22using namespace clang;
23using namespace SrcMgr;
24using llvm::MemoryBuffer;
25
26//===----------------------------------------------------------------------===//
27// SourceManager Helper Classes
28//===----------------------------------------------------------------------===//
29
30ContentCache::~ContentCache() {
31  delete Buffer;
32}
33
34/// getSizeBytesMapped - Returns the number of bytes actually mapped for
35///  this ContentCache.  This can be 0 if the MemBuffer was not actually
36///  instantiated.
37unsigned ContentCache::getSizeBytesMapped() const {
38  return Buffer ? Buffer->getBufferSize() : 0;
39}
40
41/// getSize - Returns the size of the content encapsulated by this ContentCache.
42///  This can be the size of the source file or the size of an arbitrary
43///  scratch buffer.  If the ContentCache encapsulates a source file, that
44///  file is not lazily brought in from disk to satisfy this query unless it
45///  needs to be truncated due to a truncateAt() call.
46unsigned ContentCache::getSize() const {
47  return Buffer ? Buffer->getBufferSize() : Entry->getSize();
48}
49
50const llvm::MemoryBuffer *ContentCache::getBuffer() const {
51  // Lazily create the Buffer for ContentCaches that wrap files.
52  if (!Buffer && Entry) {
53    // FIXME: Should we support a way to not have to do this check over
54    //   and over if we cannot open the file?
55    //   Yes, PR5371.
56    Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize());
57    if (isTruncated())
58      const_cast<ContentCache *>(this)->truncateAt(TruncateAtLine,
59                                                   TruncateAtColumn);
60  }
61  return Buffer;
62}
63
64void ContentCache::truncateAt(unsigned Line, unsigned Column) {
65  TruncateAtLine = Line;
66  TruncateAtColumn = Column;
67
68  if (!isTruncated() || !Buffer)
69    return;
70
71  // Find the byte position of the truncation point.
72  const char *Position = Buffer->getBufferStart();
73  for (unsigned Line = 1; Line < TruncateAtLine; ++Line) {
74    for (; *Position; ++Position) {
75      if (*Position != '\r' && *Position != '\n')
76        continue;
77
78      // Eat \r\n or \n\r as a single line.
79      if ((Position[1] == '\r' || Position[1] == '\n') &&
80          Position[0] != Position[1])
81        ++Position;
82      ++Position;
83      break;
84    }
85  }
86
87  for (unsigned Column = 1; Column < TruncateAtColumn; ++Column, ++Position) {
88    if (!*Position)
89      break;
90
91    if (*Position == '\t')
92      Column += 7;
93  }
94
95  // Truncate the buffer.
96  if (Position != Buffer->getBufferEnd()) {
97    MemoryBuffer *TruncatedBuffer
98      = MemoryBuffer::getMemBufferCopy(Buffer->getBufferStart(), Position,
99                                       Buffer->getBufferIdentifier());
100    delete Buffer;
101    Buffer = TruncatedBuffer;
102  }
103}
104
105unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) {
106  // Look up the filename in the string table, returning the pre-existing value
107  // if it exists.
108  llvm::StringMapEntry<unsigned> &Entry =
109    FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U);
110  if (Entry.getValue() != ~0U)
111    return Entry.getValue();
112
113  // Otherwise, assign this the next available ID.
114  Entry.setValue(FilenamesByID.size());
115  FilenamesByID.push_back(&Entry);
116  return FilenamesByID.size()-1;
117}
118
119/// AddLineNote - Add a line note to the line table that indicates that there
120/// is a #line at the specified FID/Offset location which changes the presumed
121/// location to LineNo/FilenameID.
122void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
123                                unsigned LineNo, int FilenameID) {
124  std::vector<LineEntry> &Entries = LineEntries[FID];
125
126  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
127         "Adding line entries out of order!");
128
129  SrcMgr::CharacteristicKind Kind = SrcMgr::C_User;
130  unsigned IncludeOffset = 0;
131
132  if (!Entries.empty()) {
133    // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember
134    // that we are still in "foo.h".
135    if (FilenameID == -1)
136      FilenameID = Entries.back().FilenameID;
137
138    // If we are after a line marker that switched us to system header mode, or
139    // that set #include information, preserve it.
140    Kind = Entries.back().FileKind;
141    IncludeOffset = Entries.back().IncludeOffset;
142  }
143
144  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind,
145                                   IncludeOffset));
146}
147
148/// AddLineNote This is the same as the previous version of AddLineNote, but is
149/// used for GNU line markers.  If EntryExit is 0, then this doesn't change the
150/// presumed #include stack.  If it is 1, this is a file entry, if it is 2 then
151/// this is a file exit.  FileKind specifies whether this is a system header or
152/// extern C system header.
153void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
154                                unsigned LineNo, int FilenameID,
155                                unsigned EntryExit,
156                                SrcMgr::CharacteristicKind FileKind) {
157  assert(FilenameID != -1 && "Unspecified filename should use other accessor");
158
159  std::vector<LineEntry> &Entries = LineEntries[FID];
160
161  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
162         "Adding line entries out of order!");
163
164  unsigned IncludeOffset = 0;
165  if (EntryExit == 0) {  // No #include stack change.
166    IncludeOffset = Entries.empty() ? 0 : Entries.back().IncludeOffset;
167  } else if (EntryExit == 1) {
168    IncludeOffset = Offset-1;
169  } else if (EntryExit == 2) {
170    assert(!Entries.empty() && Entries.back().IncludeOffset &&
171       "PPDirectives should have caught case when popping empty include stack");
172
173    // Get the include loc of the last entries' include loc as our include loc.
174    IncludeOffset = 0;
175    if (const LineEntry *PrevEntry =
176          FindNearestLineEntry(FID, Entries.back().IncludeOffset))
177      IncludeOffset = PrevEntry->IncludeOffset;
178  }
179
180  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind,
181                                   IncludeOffset));
182}
183
184
185/// FindNearestLineEntry - Find the line entry nearest to FID that is before
186/// it.  If there is no line entry before Offset in FID, return null.
187const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID,
188                                                     unsigned Offset) {
189  const std::vector<LineEntry> &Entries = LineEntries[FID];
190  assert(!Entries.empty() && "No #line entries for this FID after all!");
191
192  // It is very common for the query to be after the last #line, check this
193  // first.
194  if (Entries.back().FileOffset <= Offset)
195    return &Entries.back();
196
197  // Do a binary search to find the maximal element that is still before Offset.
198  std::vector<LineEntry>::const_iterator I =
199    std::upper_bound(Entries.begin(), Entries.end(), Offset);
200  if (I == Entries.begin()) return 0;
201  return &*--I;
202}
203
204/// \brief Add a new line entry that has already been encoded into
205/// the internal representation of the line table.
206void LineTableInfo::AddEntry(unsigned FID,
207                             const std::vector<LineEntry> &Entries) {
208  LineEntries[FID] = Entries;
209}
210
211/// getLineTableFilenameID - Return the uniqued ID for the specified filename.
212///
213unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) {
214  if (LineTable == 0)
215    LineTable = new LineTableInfo();
216  return LineTable->getLineTableFilenameID(Ptr, Len);
217}
218
219
220/// AddLineNote - Add a line note to the line table for the FileID and offset
221/// specified by Loc.  If FilenameID is -1, it is considered to be
222/// unspecified.
223void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
224                                int FilenameID) {
225  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
226
227  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
228
229  // Remember that this file has #line directives now if it doesn't already.
230  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
231
232  if (LineTable == 0)
233    LineTable = new LineTableInfo();
234  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID);
235}
236
237/// AddLineNote - Add a GNU line marker to the line table.
238void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
239                                int FilenameID, bool IsFileEntry,
240                                bool IsFileExit, bool IsSystemHeader,
241                                bool IsExternCHeader) {
242  // If there is no filename and no flags, this is treated just like a #line,
243  // which does not change the flags of the previous line marker.
244  if (FilenameID == -1) {
245    assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader &&
246           "Can't set flags without setting the filename!");
247    return AddLineNote(Loc, LineNo, FilenameID);
248  }
249
250  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
251  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
252
253  // Remember that this file has #line directives now if it doesn't already.
254  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
255
256  if (LineTable == 0)
257    LineTable = new LineTableInfo();
258
259  SrcMgr::CharacteristicKind FileKind;
260  if (IsExternCHeader)
261    FileKind = SrcMgr::C_ExternCSystem;
262  else if (IsSystemHeader)
263    FileKind = SrcMgr::C_System;
264  else
265    FileKind = SrcMgr::C_User;
266
267  unsigned EntryExit = 0;
268  if (IsFileEntry)
269    EntryExit = 1;
270  else if (IsFileExit)
271    EntryExit = 2;
272
273  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID,
274                         EntryExit, FileKind);
275}
276
277LineTableInfo &SourceManager::getLineTable() {
278  if (LineTable == 0)
279    LineTable = new LineTableInfo();
280  return *LineTable;
281}
282
283//===----------------------------------------------------------------------===//
284// Private 'Create' methods.
285//===----------------------------------------------------------------------===//
286
287SourceManager::~SourceManager() {
288  delete LineTable;
289
290  // Delete FileEntry objects corresponding to content caches.  Since the actual
291  // content cache objects are bump pointer allocated, we just have to run the
292  // dtors, but we call the deallocate method for completeness.
293  for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
294    MemBufferInfos[i]->~ContentCache();
295    ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
296  }
297  for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator
298       I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
299    I->second->~ContentCache();
300    ContentCacheAlloc.Deallocate(I->second);
301  }
302}
303
304void SourceManager::clearIDTables() {
305  MainFileID = FileID();
306  SLocEntryTable.clear();
307  LastLineNoFileIDQuery = FileID();
308  LastLineNoContentCache = 0;
309  LastFileIDLookup = FileID();
310
311  if (LineTable)
312    LineTable->clear();
313
314  // Use up FileID #0 as an invalid instantiation.
315  NextOffset = 0;
316  createInstantiationLoc(SourceLocation(),SourceLocation(),SourceLocation(), 1);
317}
318
319/// getOrCreateContentCache - Create or return a cached ContentCache for the
320/// specified file.
321const ContentCache *
322SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
323  assert(FileEnt && "Didn't specify a file entry to use?");
324
325  // Do we already have information about this file?
326  ContentCache *&Entry = FileInfos[FileEnt];
327  if (Entry) return Entry;
328
329  // Nope, create a new Cache entry.  Make sure it is at least 8-byte aligned
330  // so that FileInfo can use the low 3 bits of the pointer for its own
331  // nefarious purposes.
332  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
333  EntryAlign = std::max(8U, EntryAlign);
334  Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
335  new (Entry) ContentCache(FileEnt);
336
337  if (FileEnt == TruncateFile) {
338    // If we had queued up a file truncation request, perform the truncation
339    // now.
340    Entry->truncateAt(TruncateAtLine, TruncateAtColumn);
341    TruncateFile = 0;
342    TruncateAtLine = 0;
343    TruncateAtColumn = 0;
344  }
345
346  return Entry;
347}
348
349
350/// createMemBufferContentCache - Create a new ContentCache for the specified
351///  memory buffer.  This does no caching.
352const ContentCache*
353SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
354  // Add a new ContentCache to the MemBufferInfos list and return it.  Make sure
355  // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of
356  // the pointer for its own nefarious purposes.
357  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
358  EntryAlign = std::max(8U, EntryAlign);
359  ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
360  new (Entry) ContentCache();
361  MemBufferInfos.push_back(Entry);
362  Entry->setBuffer(Buffer);
363  return Entry;
364}
365
366void SourceManager::PreallocateSLocEntries(ExternalSLocEntrySource *Source,
367                                           unsigned NumSLocEntries,
368                                           unsigned NextOffset) {
369  ExternalSLocEntries = Source;
370  this->NextOffset = NextOffset;
371  SLocEntryLoaded.resize(NumSLocEntries + 1);
372  SLocEntryLoaded[0] = true;
373  SLocEntryTable.resize(SLocEntryTable.size() + NumSLocEntries);
374}
375
376void SourceManager::ClearPreallocatedSLocEntries() {
377  unsigned I = 0;
378  for (unsigned N = SLocEntryLoaded.size(); I != N; ++I)
379    if (!SLocEntryLoaded[I])
380      break;
381
382  // We've already loaded all preallocated source location entries.
383  if (I == SLocEntryLoaded.size())
384    return;
385
386  // Remove everything from location I onward.
387  SLocEntryTable.resize(I);
388  SLocEntryLoaded.clear();
389  ExternalSLocEntries = 0;
390}
391
392
393//===----------------------------------------------------------------------===//
394// Methods to create new FileID's and instantiations.
395//===----------------------------------------------------------------------===//
396
397/// createFileID - Create a new fileID for the specified ContentCache and
398/// include position.  This works regardless of whether the ContentCache
399/// corresponds to a file or some other input source.
400FileID SourceManager::createFileID(const ContentCache *File,
401                                   SourceLocation IncludePos,
402                                   SrcMgr::CharacteristicKind FileCharacter,
403                                   unsigned PreallocatedID,
404                                   unsigned Offset) {
405  if (PreallocatedID) {
406    // If we're filling in a preallocated ID, just load in the file
407    // entry and return.
408    assert(PreallocatedID < SLocEntryLoaded.size() &&
409           "Preallocate ID out-of-range");
410    assert(!SLocEntryLoaded[PreallocatedID] &&
411           "Source location entry already loaded");
412    assert(Offset && "Preallocate source location cannot have zero offset");
413    SLocEntryTable[PreallocatedID]
414      = SLocEntry::get(Offset, FileInfo::get(IncludePos, File, FileCharacter));
415    SLocEntryLoaded[PreallocatedID] = true;
416    FileID FID = FileID::get(PreallocatedID);
417    if (File->FirstFID.isInvalid())
418      File->FirstFID = FID;
419    return LastFileIDLookup = FID;
420  }
421
422  SLocEntryTable.push_back(SLocEntry::get(NextOffset,
423                                          FileInfo::get(IncludePos, File,
424                                                        FileCharacter)));
425  unsigned FileSize = File->getSize();
426  assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!");
427  NextOffset += FileSize+1;
428
429  // Set LastFileIDLookup to the newly created file.  The next getFileID call is
430  // almost guaranteed to be from that file.
431  FileID FID = FileID::get(SLocEntryTable.size()-1);
432  if (File->FirstFID.isInvalid())
433    File->FirstFID = FID;
434  return LastFileIDLookup = FID;
435}
436
437/// createInstantiationLoc - Return a new SourceLocation that encodes the fact
438/// that a token from SpellingLoc should actually be referenced from
439/// InstantiationLoc.
440SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
441                                                     SourceLocation ILocStart,
442                                                     SourceLocation ILocEnd,
443                                                     unsigned TokLength,
444                                                     unsigned PreallocatedID,
445                                                     unsigned Offset) {
446  InstantiationInfo II = InstantiationInfo::get(ILocStart,ILocEnd, SpellingLoc);
447  if (PreallocatedID) {
448    // If we're filling in a preallocated ID, just load in the
449    // instantiation entry and return.
450    assert(PreallocatedID < SLocEntryLoaded.size() &&
451           "Preallocate ID out-of-range");
452    assert(!SLocEntryLoaded[PreallocatedID] &&
453           "Source location entry already loaded");
454    assert(Offset && "Preallocate source location cannot have zero offset");
455    SLocEntryTable[PreallocatedID] = SLocEntry::get(Offset, II);
456    SLocEntryLoaded[PreallocatedID] = true;
457    return SourceLocation::getMacroLoc(Offset);
458  }
459  SLocEntryTable.push_back(SLocEntry::get(NextOffset, II));
460  assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
461  NextOffset += TokLength+1;
462  return SourceLocation::getMacroLoc(NextOffset-(TokLength+1));
463}
464
465/// getBufferData - Return a pointer to the start and end of the source buffer
466/// data for the specified FileID.
467std::pair<const char*, const char*>
468SourceManager::getBufferData(FileID FID) const {
469  const llvm::MemoryBuffer *Buf = getBuffer(FID);
470  return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
471}
472
473
474//===----------------------------------------------------------------------===//
475// SourceLocation manipulation methods.
476//===----------------------------------------------------------------------===//
477
478/// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very hot
479/// method that is used for all SourceManager queries that start with a
480/// SourceLocation object.  It is responsible for finding the entry in
481/// SLocEntryTable which contains the specified location.
482///
483FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
484  assert(SLocOffset && "Invalid FileID");
485
486  // After the first and second level caches, I see two common sorts of
487  // behavior: 1) a lot of searched FileID's are "near" the cached file location
488  // or are "near" the cached instantiation location.  2) others are just
489  // completely random and may be a very long way away.
490  //
491  // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
492  // then we fall back to a less cache efficient, but more scalable, binary
493  // search to find the location.
494
495  // See if this is near the file point - worst case we start scanning from the
496  // most newly created FileID.
497  std::vector<SrcMgr::SLocEntry>::const_iterator I;
498
499  if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
500    // Neither loc prunes our search.
501    I = SLocEntryTable.end();
502  } else {
503    // Perhaps it is near the file point.
504    I = SLocEntryTable.begin()+LastFileIDLookup.ID;
505  }
506
507  // Find the FileID that contains this.  "I" is an iterator that points to a
508  // FileID whose offset is known to be larger than SLocOffset.
509  unsigned NumProbes = 0;
510  while (1) {
511    --I;
512    if (ExternalSLocEntries)
513      getSLocEntry(FileID::get(I - SLocEntryTable.begin()));
514    if (I->getOffset() <= SLocOffset) {
515#if 0
516      printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
517             I-SLocEntryTable.begin(),
518             I->isInstantiation() ? "inst" : "file",
519             LastFileIDLookup.ID,  int(SLocEntryTable.end()-I));
520#endif
521      FileID Res = FileID::get(I-SLocEntryTable.begin());
522
523      // If this isn't an instantiation, remember it.  We have good locality
524      // across FileID lookups.
525      if (!I->isInstantiation())
526        LastFileIDLookup = Res;
527      NumLinearScans += NumProbes+1;
528      return Res;
529    }
530    if (++NumProbes == 8)
531      break;
532  }
533
534  // Convert "I" back into an index.  We know that it is an entry whose index is
535  // larger than the offset we are looking for.
536  unsigned GreaterIndex = I-SLocEntryTable.begin();
537  // LessIndex - This is the lower bound of the range that we're searching.
538  // We know that the offset corresponding to the FileID is is less than
539  // SLocOffset.
540  unsigned LessIndex = 0;
541  NumProbes = 0;
542  while (1) {
543    unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
544    unsigned MidOffset = getSLocEntry(FileID::get(MiddleIndex)).getOffset();
545
546    ++NumProbes;
547
548    // If the offset of the midpoint is too large, chop the high side of the
549    // range to the midpoint.
550    if (MidOffset > SLocOffset) {
551      GreaterIndex = MiddleIndex;
552      continue;
553    }
554
555    // If the middle index contains the value, succeed and return.
556    if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
557#if 0
558      printf("bin %d -> %d [%s] %d %d\n", SLocOffset,
559             I-SLocEntryTable.begin(),
560             I->isInstantiation() ? "inst" : "file",
561             LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
562#endif
563      FileID Res = FileID::get(MiddleIndex);
564
565      // If this isn't an instantiation, remember it.  We have good locality
566      // across FileID lookups.
567      if (!I->isInstantiation())
568        LastFileIDLookup = Res;
569      NumBinaryProbes += NumProbes;
570      return Res;
571    }
572
573    // Otherwise, move the low-side up to the middle index.
574    LessIndex = MiddleIndex;
575  }
576}
577
578SourceLocation SourceManager::
579getInstantiationLocSlowCase(SourceLocation Loc) const {
580  do {
581    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
582    Loc = getSLocEntry(LocInfo.first).getInstantiation()
583                   .getInstantiationLocStart();
584    Loc = Loc.getFileLocWithOffset(LocInfo.second);
585  } while (!Loc.isFileID());
586
587  return Loc;
588}
589
590SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
591  do {
592    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
593    Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
594    Loc = Loc.getFileLocWithOffset(LocInfo.second);
595  } while (!Loc.isFileID());
596  return Loc;
597}
598
599
600std::pair<FileID, unsigned>
601SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
602                                                     unsigned Offset) const {
603  // If this is an instantiation record, walk through all the instantiation
604  // points.
605  FileID FID;
606  SourceLocation Loc;
607  do {
608    Loc = E->getInstantiation().getInstantiationLocStart();
609
610    FID = getFileID(Loc);
611    E = &getSLocEntry(FID);
612    Offset += Loc.getOffset()-E->getOffset();
613  } while (!Loc.isFileID());
614
615  return std::make_pair(FID, Offset);
616}
617
618std::pair<FileID, unsigned>
619SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
620                                                unsigned Offset) const {
621  // If this is an instantiation record, walk through all the instantiation
622  // points.
623  FileID FID;
624  SourceLocation Loc;
625  do {
626    Loc = E->getInstantiation().getSpellingLoc();
627
628    FID = getFileID(Loc);
629    E = &getSLocEntry(FID);
630    Offset += Loc.getOffset()-E->getOffset();
631  } while (!Loc.isFileID());
632
633  return std::make_pair(FID, Offset);
634}
635
636/// getImmediateSpellingLoc - Given a SourceLocation object, return the
637/// spelling location referenced by the ID.  This is the first level down
638/// towards the place where the characters that make up the lexed token can be
639/// found.  This should not generally be used by clients.
640SourceLocation SourceManager::getImmediateSpellingLoc(SourceLocation Loc) const{
641  if (Loc.isFileID()) return Loc;
642  std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
643  Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
644  return Loc.getFileLocWithOffset(LocInfo.second);
645}
646
647
648/// getImmediateInstantiationRange - Loc is required to be an instantiation
649/// location.  Return the start/end of the instantiation information.
650std::pair<SourceLocation,SourceLocation>
651SourceManager::getImmediateInstantiationRange(SourceLocation Loc) const {
652  assert(Loc.isMacroID() && "Not an instantiation loc!");
653  const InstantiationInfo &II = getSLocEntry(getFileID(Loc)).getInstantiation();
654  return II.getInstantiationLocRange();
655}
656
657/// getInstantiationRange - Given a SourceLocation object, return the
658/// range of tokens covered by the instantiation in the ultimate file.
659std::pair<SourceLocation,SourceLocation>
660SourceManager::getInstantiationRange(SourceLocation Loc) const {
661  if (Loc.isFileID()) return std::make_pair(Loc, Loc);
662
663  std::pair<SourceLocation,SourceLocation> Res =
664    getImmediateInstantiationRange(Loc);
665
666  // Fully resolve the start and end locations to their ultimate instantiation
667  // points.
668  while (!Res.first.isFileID())
669    Res.first = getImmediateInstantiationRange(Res.first).first;
670  while (!Res.second.isFileID())
671    Res.second = getImmediateInstantiationRange(Res.second).second;
672  return Res;
673}
674
675
676
677//===----------------------------------------------------------------------===//
678// Queries about the code at a SourceLocation.
679//===----------------------------------------------------------------------===//
680
681/// getCharacterData - Return a pointer to the start of the specified location
682/// in the appropriate MemoryBuffer.
683const char *SourceManager::getCharacterData(SourceLocation SL) const {
684  // Note that this is a hot function in the getSpelling() path, which is
685  // heavily used by -E mode.
686  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
687
688  // Note that calling 'getBuffer()' may lazily page in a source file.
689  return getSLocEntry(LocInfo.first).getFile().getContentCache()
690              ->getBuffer()->getBufferStart() + LocInfo.second;
691}
692
693
694/// getColumnNumber - Return the column # for the specified file position.
695/// this is significantly cheaper to compute than the line number.
696unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
697  const char *Buf = getBuffer(FID)->getBufferStart();
698
699  unsigned LineStart = FilePos;
700  while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
701    --LineStart;
702  return FilePos-LineStart+1;
703}
704
705unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
706  if (Loc.isInvalid()) return 0;
707  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
708  return getColumnNumber(LocInfo.first, LocInfo.second);
709}
710
711unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
712  if (Loc.isInvalid()) return 0;
713  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
714  return getColumnNumber(LocInfo.first, LocInfo.second);
715}
716
717
718
719static void ComputeLineNumbers(ContentCache* FI,
720                               llvm::BumpPtrAllocator &Alloc) DISABLE_INLINE;
721static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){
722  // Note that calling 'getBuffer()' may lazily page in the file.
723  const MemoryBuffer *Buffer = FI->getBuffer();
724
725  // Find the file offsets of all of the *physical* source lines.  This does
726  // not look at trigraphs, escaped newlines, or anything else tricky.
727  std::vector<unsigned> LineOffsets;
728
729  // Line #1 starts at char 0.
730  LineOffsets.push_back(0);
731
732  const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
733  const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
734  unsigned Offs = 0;
735  while (1) {
736    // Skip over the contents of the line.
737    // TODO: Vectorize this?  This is very performance sensitive for programs
738    // with lots of diagnostics and in -E mode.
739    const unsigned char *NextBuf = (const unsigned char *)Buf;
740    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
741      ++NextBuf;
742    Offs += NextBuf-Buf;
743    Buf = NextBuf;
744
745    if (Buf[0] == '\n' || Buf[0] == '\r') {
746      // If this is \n\r or \r\n, skip both characters.
747      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
748        ++Offs, ++Buf;
749      ++Offs, ++Buf;
750      LineOffsets.push_back(Offs);
751    } else {
752      // Otherwise, this is a null.  If end of file, exit.
753      if (Buf == End) break;
754      // Otherwise, skip the null.
755      ++Offs, ++Buf;
756    }
757  }
758
759  // Copy the offsets into the FileInfo structure.
760  FI->NumLines = LineOffsets.size();
761  FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
762  std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
763}
764
765/// getLineNumber - Given a SourceLocation, return the spelling line number
766/// for the position indicated.  This requires building and caching a table of
767/// line offsets for the MemoryBuffer, so this is not cheap: use only when
768/// about to emit a diagnostic.
769unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
770  ContentCache *Content;
771  if (LastLineNoFileIDQuery == FID)
772    Content = LastLineNoContentCache;
773  else
774    Content = const_cast<ContentCache*>(getSLocEntry(FID)
775                                        .getFile().getContentCache());
776
777  // If this is the first use of line information for this buffer, compute the
778  /// SourceLineCache for it on demand.
779  if (Content->SourceLineCache == 0)
780    ComputeLineNumbers(Content, ContentCacheAlloc);
781
782  // Okay, we know we have a line number table.  Do a binary search to find the
783  // line number that this character position lands on.
784  unsigned *SourceLineCache = Content->SourceLineCache;
785  unsigned *SourceLineCacheStart = SourceLineCache;
786  unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
787
788  unsigned QueriedFilePos = FilePos+1;
789
790  // FIXME: I would like to be convinced that this code is worth being as
791  // complicated as it is, binary search isn't that slow.
792  //
793  // If it is worth being optimized, then in my opinion it could be more
794  // performant, simpler, and more obviously correct by just "galloping" outward
795  // from the queried file position. In fact, this could be incorporated into a
796  // generic algorithm such as lower_bound_with_hint.
797  //
798  // If someone gives me a test case where this matters, and I will do it! - DWD
799
800  // If the previous query was to the same file, we know both the file pos from
801  // that query and the line number returned.  This allows us to narrow the
802  // search space from the entire file to something near the match.
803  if (LastLineNoFileIDQuery == FID) {
804    if (QueriedFilePos >= LastLineNoFilePos) {
805      // FIXME: Potential overflow?
806      SourceLineCache = SourceLineCache+LastLineNoResult-1;
807
808      // The query is likely to be nearby the previous one.  Here we check to
809      // see if it is within 5, 10 or 20 lines.  It can be far away in cases
810      // where big comment blocks and vertical whitespace eat up lines but
811      // contribute no tokens.
812      if (SourceLineCache+5 < SourceLineCacheEnd) {
813        if (SourceLineCache[5] > QueriedFilePos)
814          SourceLineCacheEnd = SourceLineCache+5;
815        else if (SourceLineCache+10 < SourceLineCacheEnd) {
816          if (SourceLineCache[10] > QueriedFilePos)
817            SourceLineCacheEnd = SourceLineCache+10;
818          else if (SourceLineCache+20 < SourceLineCacheEnd) {
819            if (SourceLineCache[20] > QueriedFilePos)
820              SourceLineCacheEnd = SourceLineCache+20;
821          }
822        }
823      }
824    } else {
825      if (LastLineNoResult < Content->NumLines)
826        SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
827    }
828  }
829
830  // If the spread is large, do a "radix" test as our initial guess, based on
831  // the assumption that lines average to approximately the same length.
832  // NOTE: This is currently disabled, as it does not appear to be profitable in
833  // initial measurements.
834  if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
835    unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
836
837    // Take a stab at guessing where it is.
838    unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
839
840    // Check for -10 and +10 lines.
841    unsigned LowerBound = std::max(int(ApproxPos-10), 0);
842    unsigned UpperBound = std::min(ApproxPos+10, FileLen);
843
844    // If the computed lower bound is less than the query location, move it in.
845    if (SourceLineCache < SourceLineCacheStart+LowerBound &&
846        SourceLineCacheStart[LowerBound] < QueriedFilePos)
847      SourceLineCache = SourceLineCacheStart+LowerBound;
848
849    // If the computed upper bound is greater than the query location, move it.
850    if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
851        SourceLineCacheStart[UpperBound] >= QueriedFilePos)
852      SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
853  }
854
855  unsigned *Pos
856    = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
857  unsigned LineNo = Pos-SourceLineCacheStart;
858
859  LastLineNoFileIDQuery = FID;
860  LastLineNoContentCache = Content;
861  LastLineNoFilePos = QueriedFilePos;
862  LastLineNoResult = LineNo;
863  return LineNo;
864}
865
866unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
867  if (Loc.isInvalid()) return 0;
868  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
869  return getLineNumber(LocInfo.first, LocInfo.second);
870}
871unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
872  if (Loc.isInvalid()) return 0;
873  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
874  return getLineNumber(LocInfo.first, LocInfo.second);
875}
876
877/// getFileCharacteristic - return the file characteristic of the specified
878/// source location, indicating whether this is a normal file, a system
879/// header, or an "implicit extern C" system header.
880///
881/// This state can be modified with flags on GNU linemarker directives like:
882///   # 4 "foo.h" 3
883/// which changes all source locations in the current file after that to be
884/// considered to be from a system header.
885SrcMgr::CharacteristicKind
886SourceManager::getFileCharacteristic(SourceLocation Loc) const {
887  assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!");
888  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
889  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
890
891  // If there are no #line directives in this file, just return the whole-file
892  // state.
893  if (!FI.hasLineDirectives())
894    return FI.getFileCharacteristic();
895
896  assert(LineTable && "Can't have linetable entries without a LineTable!");
897  // See if there is a #line directive before the location.
898  const LineEntry *Entry =
899    LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second);
900
901  // If this is before the first line marker, use the file characteristic.
902  if (!Entry)
903    return FI.getFileCharacteristic();
904
905  return Entry->FileKind;
906}
907
908/// Return the filename or buffer identifier of the buffer the location is in.
909/// Note that this name does not respect #line directives.  Use getPresumedLoc
910/// for normal clients.
911const char *SourceManager::getBufferName(SourceLocation Loc) const {
912  if (Loc.isInvalid()) return "<invalid loc>";
913
914  return getBuffer(getFileID(Loc))->getBufferIdentifier();
915}
916
917
918/// getPresumedLoc - This method returns the "presumed" location of a
919/// SourceLocation specifies.  A "presumed location" can be modified by #line
920/// or GNU line marker directives.  This provides a view on the data that a
921/// user should see in diagnostics, for example.
922///
923/// Note that a presumed location is always given as the instantiation point
924/// of an instantiation location, not at the spelling location.
925PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const {
926  if (Loc.isInvalid()) return PresumedLoc();
927
928  // Presumed locations are always for instantiation points.
929  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
930
931  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
932  const SrcMgr::ContentCache *C = FI.getContentCache();
933
934  // To get the source name, first consult the FileEntry (if one exists)
935  // before the MemBuffer as this will avoid unnecessarily paging in the
936  // MemBuffer.
937  const char *Filename =
938    C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
939  unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second);
940  unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second);
941  SourceLocation IncludeLoc = FI.getIncludeLoc();
942
943  // If we have #line directives in this file, update and overwrite the physical
944  // location info if appropriate.
945  if (FI.hasLineDirectives()) {
946    assert(LineTable && "Can't have linetable entries without a LineTable!");
947    // See if there is a #line directive before this.  If so, get it.
948    if (const LineEntry *Entry =
949          LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) {
950      // If the LineEntry indicates a filename, use it.
951      if (Entry->FilenameID != -1)
952        Filename = LineTable->getFilename(Entry->FilenameID);
953
954      // Use the line number specified by the LineEntry.  This line number may
955      // be multiple lines down from the line entry.  Add the difference in
956      // physical line numbers from the query point and the line marker to the
957      // total.
958      unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
959      LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
960
961      // Note that column numbers are not molested by line markers.
962
963      // Handle virtual #include manipulation.
964      if (Entry->IncludeOffset) {
965        IncludeLoc = getLocForStartOfFile(LocInfo.first);
966        IncludeLoc = IncludeLoc.getFileLocWithOffset(Entry->IncludeOffset);
967      }
968    }
969  }
970
971  return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc);
972}
973
974//===----------------------------------------------------------------------===//
975// Other miscellaneous methods.
976//===----------------------------------------------------------------------===//
977
978/// \brief Get the source location for the given file:line:col triplet.
979///
980/// If the source file is included multiple times, the source location will
981/// be based upon the first inclusion.
982SourceLocation SourceManager::getLocation(const FileEntry *SourceFile,
983                                          unsigned Line, unsigned Col) const {
984  assert(SourceFile && "Null source file!");
985  assert(Line && Col && "Line and column should start from 1!");
986
987  fileinfo_iterator FI = FileInfos.find(SourceFile);
988  if (FI == FileInfos.end())
989    return SourceLocation();
990  ContentCache *Content = FI->second;
991
992  // If this is the first use of line information for this buffer, compute the
993  /// SourceLineCache for it on demand.
994  if (Content->SourceLineCache == 0)
995    ComputeLineNumbers(Content, ContentCacheAlloc);
996
997  if (Line > Content->NumLines)
998    return SourceLocation();
999
1000  unsigned FilePos = Content->SourceLineCache[Line - 1];
1001  const char *Buf = Content->getBuffer()->getBufferStart() + FilePos;
1002  unsigned BufLength = Content->getBuffer()->getBufferEnd() - Buf;
1003  unsigned i = 0;
1004
1005  // Check that the given column is valid.
1006  while (i < BufLength-1 && i < Col-1 && Buf[i] != '\n' && Buf[i] != '\r')
1007    ++i;
1008  if (i < Col-1)
1009    return SourceLocation();
1010
1011  return getLocForStartOfFile(Content->FirstFID).
1012            getFileLocWithOffset(FilePos + Col - 1);
1013}
1014
1015/// \brief Determines the order of 2 source locations in the translation unit.
1016///
1017/// \returns true if LHS source location comes before RHS, false otherwise.
1018bool SourceManager::isBeforeInTranslationUnit(SourceLocation LHS,
1019                                              SourceLocation RHS) const {
1020  assert(LHS.isValid() && RHS.isValid() && "Passed invalid source location!");
1021  if (LHS == RHS)
1022    return false;
1023
1024  std::pair<FileID, unsigned> LOffs = getDecomposedLoc(LHS);
1025  std::pair<FileID, unsigned> ROffs = getDecomposedLoc(RHS);
1026
1027  // If the source locations are in the same file, just compare offsets.
1028  if (LOffs.first == ROffs.first)
1029    return LOffs.second < ROffs.second;
1030
1031  // If we are comparing a source location with multiple locations in the same
1032  // file, we get a big win by caching the result.
1033
1034  if (LastLFIDForBeforeTUCheck == LOffs.first &&
1035      LastRFIDForBeforeTUCheck == ROffs.first)
1036    return LastResForBeforeTUCheck;
1037
1038  LastLFIDForBeforeTUCheck = LOffs.first;
1039  LastRFIDForBeforeTUCheck = ROffs.first;
1040
1041  // "Traverse" the include/instantiation stacks of both locations and try to
1042  // find a common "ancestor".
1043  //
1044  // First we traverse the stack of the right location and check each level
1045  // against the level of the left location, while collecting all levels in a
1046  // "stack map".
1047
1048  std::map<FileID, unsigned> ROffsMap;
1049  ROffsMap[ROffs.first] = ROffs.second;
1050
1051  while (1) {
1052    SourceLocation UpperLoc;
1053    const SrcMgr::SLocEntry &Entry = getSLocEntry(ROffs.first);
1054    if (Entry.isInstantiation())
1055      UpperLoc = Entry.getInstantiation().getInstantiationLocStart();
1056    else
1057      UpperLoc = Entry.getFile().getIncludeLoc();
1058
1059    if (UpperLoc.isInvalid())
1060      break; // We reached the top.
1061
1062    ROffs = getDecomposedLoc(UpperLoc);
1063
1064    if (LOffs.first == ROffs.first)
1065      return LastResForBeforeTUCheck = LOffs.second < ROffs.second;
1066
1067    ROffsMap[ROffs.first] = ROffs.second;
1068  }
1069
1070  // We didn't find a common ancestor. Now traverse the stack of the left
1071  // location, checking against the stack map of the right location.
1072
1073  while (1) {
1074    SourceLocation UpperLoc;
1075    const SrcMgr::SLocEntry &Entry = getSLocEntry(LOffs.first);
1076    if (Entry.isInstantiation())
1077      UpperLoc = Entry.getInstantiation().getInstantiationLocStart();
1078    else
1079      UpperLoc = Entry.getFile().getIncludeLoc();
1080
1081    if (UpperLoc.isInvalid())
1082      break; // We reached the top.
1083
1084    LOffs = getDecomposedLoc(UpperLoc);
1085
1086    std::map<FileID, unsigned>::iterator I = ROffsMap.find(LOffs.first);
1087    if (I != ROffsMap.end())
1088      return LastResForBeforeTUCheck = LOffs.second < I->second;
1089  }
1090
1091  // No common ancestor.
1092  // Now we are getting into murky waters. Most probably this is because one
1093  // location is in the predefines buffer.
1094
1095  const FileEntry *LEntry =
1096    getSLocEntry(LOffs.first).getFile().getContentCache()->Entry;
1097  const FileEntry *REntry =
1098    getSLocEntry(ROffs.first).getFile().getContentCache()->Entry;
1099
1100  // If the locations are in two memory buffers we give up, we can't answer
1101  // which one should be considered first.
1102  // FIXME: Should there be a way to "include" memory buffers in the translation
1103  // unit ?
1104  assert((LEntry != 0 || REntry != 0) && "Locations in memory buffers.");
1105  (void) REntry;
1106
1107  // Consider the memory buffer as coming before the file in the translation
1108  // unit.
1109  if (LEntry == 0)
1110    return LastResForBeforeTUCheck = true;
1111  else {
1112    assert(REntry == 0 && "Locations in not #included files ?");
1113    return LastResForBeforeTUCheck = false;
1114  }
1115}
1116
1117void SourceManager::truncateFileAt(const FileEntry *Entry, unsigned Line,
1118                                   unsigned Column) {
1119  llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator FI
1120     = FileInfos.find(Entry);
1121  if (FI != FileInfos.end()) {
1122    FI->second->truncateAt(Line, Column);
1123    return;
1124  }
1125
1126  // We cannot perform the truncation until we actually see the file, so
1127  // save the truncation information.
1128  assert(TruncateFile == 0 && "Can't queue up multiple file truncations!");
1129  TruncateFile = Entry;
1130  TruncateAtLine = Line;
1131  TruncateAtColumn = Column;
1132}
1133
1134/// \brief Determine whether this file was truncated.
1135bool SourceManager::isTruncatedFile(FileID FID) const {
1136  return getSLocEntry(FID).getFile().getContentCache()->isTruncated();
1137}
1138
1139/// PrintStats - Print statistics to stderr.
1140///
1141void SourceManager::PrintStats() const {
1142  llvm::errs() << "\n*** Source Manager Stats:\n";
1143  llvm::errs() << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
1144               << " mem buffers mapped.\n";
1145  llvm::errs() << SLocEntryTable.size() << " SLocEntry's allocated, "
1146               << NextOffset << "B of Sloc address space used.\n";
1147
1148  unsigned NumLineNumsComputed = 0;
1149  unsigned NumFileBytesMapped = 0;
1150  for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
1151    NumLineNumsComputed += I->second->SourceLineCache != 0;
1152    NumFileBytesMapped  += I->second->getSizeBytesMapped();
1153  }
1154
1155  llvm::errs() << NumFileBytesMapped << " bytes of files mapped, "
1156               << NumLineNumsComputed << " files with line #'s computed.\n";
1157  llvm::errs() << "FileID scans: " << NumLinearScans << " linear, "
1158               << NumBinaryProbes << " binary.\n";
1159}
1160
1161ExternalSLocEntrySource::~ExternalSLocEntrySource() { }
1162