SourceManager.cpp revision 39d9841ed4c0568d4b44dfbc12ac04491f60a374
1//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/SourceManagerInternals.h"
16#include "clang/Basic/FileManager.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/MemoryBuffer.h"
19#include "llvm/Support/raw_ostream.h"
20#include "llvm/System/Path.h"
21#include <algorithm>
22using namespace clang;
23using namespace SrcMgr;
24using llvm::MemoryBuffer;
25
26//===----------------------------------------------------------------------===//
27// SourceManager Helper Classes
28//===----------------------------------------------------------------------===//
29
30ContentCache::~ContentCache() {
31  delete Buffer;
32}
33
34/// getSizeBytesMapped - Returns the number of bytes actually mapped for
35///  this ContentCache.  This can be 0 if the MemBuffer was not actually
36///  instantiated.
37unsigned ContentCache::getSizeBytesMapped() const {
38  return Buffer ? Buffer->getBufferSize() : 0;
39}
40
41/// getSize - Returns the size of the content encapsulated by this ContentCache.
42///  This can be the size of the source file or the size of an arbitrary
43///  scratch buffer.  If the ContentCache encapsulates a source file, that
44///  file is not lazily brought in from disk to satisfy this query unless it
45///  needs to be truncated due to a truncateAt() call.
46unsigned ContentCache::getSize() const {
47  return Buffer ? Buffer->getBufferSize() : Entry->getSize();
48}
49
50const llvm::MemoryBuffer *ContentCache::getBuffer(std::string *ErrorStr) const {
51  // Lazily create the Buffer for ContentCaches that wrap files.
52  if (!Buffer && Entry) {
53    // FIXME: Should we support a way to not have to do this check over
54    //   and over if we cannot open the file?
55    Buffer = MemoryBuffer::getFile(Entry->getName(), ErrorStr,Entry->getSize());
56    if (isTruncated())
57      const_cast<ContentCache *>(this)->truncateAt(TruncateAtLine,
58                                                   TruncateAtColumn);
59  }
60  return Buffer;
61}
62
63void ContentCache::truncateAt(unsigned Line, unsigned Column) {
64  TruncateAtLine = Line;
65  TruncateAtColumn = Column;
66
67  if (!isTruncated() || !Buffer)
68    return;
69
70  // Find the byte position of the truncation point.
71  const char *Position = Buffer->getBufferStart();
72  for (unsigned Line = 1; Line < TruncateAtLine; ++Line) {
73    for (; *Position; ++Position) {
74      if (*Position != '\r' && *Position != '\n')
75        continue;
76
77      // Eat \r\n or \n\r as a single line.
78      if ((Position[1] == '\r' || Position[1] == '\n') &&
79          Position[0] != Position[1])
80        ++Position;
81      ++Position;
82      break;
83    }
84  }
85
86  for (unsigned Column = 1; Column < TruncateAtColumn; ++Column, ++Position) {
87    if (!*Position)
88      break;
89
90    if (*Position == '\t')
91      Column += 7;
92  }
93
94  // Truncate the buffer.
95  if (Position != Buffer->getBufferEnd()) {
96    MemoryBuffer *TruncatedBuffer
97      = MemoryBuffer::getMemBufferCopy(Buffer->getBufferStart(), Position,
98                                       Buffer->getBufferIdentifier());
99    delete Buffer;
100    Buffer = TruncatedBuffer;
101  }
102}
103
104unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) {
105  // Look up the filename in the string table, returning the pre-existing value
106  // if it exists.
107  llvm::StringMapEntry<unsigned> &Entry =
108    FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U);
109  if (Entry.getValue() != ~0U)
110    return Entry.getValue();
111
112  // Otherwise, assign this the next available ID.
113  Entry.setValue(FilenamesByID.size());
114  FilenamesByID.push_back(&Entry);
115  return FilenamesByID.size()-1;
116}
117
118/// AddLineNote - Add a line note to the line table that indicates that there
119/// is a #line at the specified FID/Offset location which changes the presumed
120/// location to LineNo/FilenameID.
121void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
122                                unsigned LineNo, int FilenameID) {
123  std::vector<LineEntry> &Entries = LineEntries[FID];
124
125  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
126         "Adding line entries out of order!");
127
128  SrcMgr::CharacteristicKind Kind = SrcMgr::C_User;
129  unsigned IncludeOffset = 0;
130
131  if (!Entries.empty()) {
132    // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember
133    // that we are still in "foo.h".
134    if (FilenameID == -1)
135      FilenameID = Entries.back().FilenameID;
136
137    // If we are after a line marker that switched us to system header mode, or
138    // that set #include information, preserve it.
139    Kind = Entries.back().FileKind;
140    IncludeOffset = Entries.back().IncludeOffset;
141  }
142
143  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind,
144                                   IncludeOffset));
145}
146
147/// AddLineNote This is the same as the previous version of AddLineNote, but is
148/// used for GNU line markers.  If EntryExit is 0, then this doesn't change the
149/// presumed #include stack.  If it is 1, this is a file entry, if it is 2 then
150/// this is a file exit.  FileKind specifies whether this is a system header or
151/// extern C system header.
152void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
153                                unsigned LineNo, int FilenameID,
154                                unsigned EntryExit,
155                                SrcMgr::CharacteristicKind FileKind) {
156  assert(FilenameID != -1 && "Unspecified filename should use other accessor");
157
158  std::vector<LineEntry> &Entries = LineEntries[FID];
159
160  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
161         "Adding line entries out of order!");
162
163  unsigned IncludeOffset = 0;
164  if (EntryExit == 0) {  // No #include stack change.
165    IncludeOffset = Entries.empty() ? 0 : Entries.back().IncludeOffset;
166  } else if (EntryExit == 1) {
167    IncludeOffset = Offset-1;
168  } else if (EntryExit == 2) {
169    assert(!Entries.empty() && Entries.back().IncludeOffset &&
170       "PPDirectives should have caught case when popping empty include stack");
171
172    // Get the include loc of the last entries' include loc as our include loc.
173    IncludeOffset = 0;
174    if (const LineEntry *PrevEntry =
175          FindNearestLineEntry(FID, Entries.back().IncludeOffset))
176      IncludeOffset = PrevEntry->IncludeOffset;
177  }
178
179  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind,
180                                   IncludeOffset));
181}
182
183
184/// FindNearestLineEntry - Find the line entry nearest to FID that is before
185/// it.  If there is no line entry before Offset in FID, return null.
186const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID,
187                                                     unsigned Offset) {
188  const std::vector<LineEntry> &Entries = LineEntries[FID];
189  assert(!Entries.empty() && "No #line entries for this FID after all!");
190
191  // It is very common for the query to be after the last #line, check this
192  // first.
193  if (Entries.back().FileOffset <= Offset)
194    return &Entries.back();
195
196  // Do a binary search to find the maximal element that is still before Offset.
197  std::vector<LineEntry>::const_iterator I =
198    std::upper_bound(Entries.begin(), Entries.end(), Offset);
199  if (I == Entries.begin()) return 0;
200  return &*--I;
201}
202
203/// \brief Add a new line entry that has already been encoded into
204/// the internal representation of the line table.
205void LineTableInfo::AddEntry(unsigned FID,
206                             const std::vector<LineEntry> &Entries) {
207  LineEntries[FID] = Entries;
208}
209
210/// getLineTableFilenameID - Return the uniqued ID for the specified filename.
211///
212unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) {
213  if (LineTable == 0)
214    LineTable = new LineTableInfo();
215  return LineTable->getLineTableFilenameID(Ptr, Len);
216}
217
218
219/// AddLineNote - Add a line note to the line table for the FileID and offset
220/// specified by Loc.  If FilenameID is -1, it is considered to be
221/// unspecified.
222void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
223                                int FilenameID) {
224  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
225
226  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
227
228  // Remember that this file has #line directives now if it doesn't already.
229  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
230
231  if (LineTable == 0)
232    LineTable = new LineTableInfo();
233  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID);
234}
235
236/// AddLineNote - Add a GNU line marker to the line table.
237void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
238                                int FilenameID, bool IsFileEntry,
239                                bool IsFileExit, bool IsSystemHeader,
240                                bool IsExternCHeader) {
241  // If there is no filename and no flags, this is treated just like a #line,
242  // which does not change the flags of the previous line marker.
243  if (FilenameID == -1) {
244    assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader &&
245           "Can't set flags without setting the filename!");
246    return AddLineNote(Loc, LineNo, FilenameID);
247  }
248
249  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
250  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
251
252  // Remember that this file has #line directives now if it doesn't already.
253  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
254
255  if (LineTable == 0)
256    LineTable = new LineTableInfo();
257
258  SrcMgr::CharacteristicKind FileKind;
259  if (IsExternCHeader)
260    FileKind = SrcMgr::C_ExternCSystem;
261  else if (IsSystemHeader)
262    FileKind = SrcMgr::C_System;
263  else
264    FileKind = SrcMgr::C_User;
265
266  unsigned EntryExit = 0;
267  if (IsFileEntry)
268    EntryExit = 1;
269  else if (IsFileExit)
270    EntryExit = 2;
271
272  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID,
273                         EntryExit, FileKind);
274}
275
276LineTableInfo &SourceManager::getLineTable() {
277  if (LineTable == 0)
278    LineTable = new LineTableInfo();
279  return *LineTable;
280}
281
282//===----------------------------------------------------------------------===//
283// Private 'Create' methods.
284//===----------------------------------------------------------------------===//
285
286SourceManager::~SourceManager() {
287  delete LineTable;
288
289  // Delete FileEntry objects corresponding to content caches.  Since the actual
290  // content cache objects are bump pointer allocated, we just have to run the
291  // dtors, but we call the deallocate method for completeness.
292  for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
293    MemBufferInfos[i]->~ContentCache();
294    ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
295  }
296  for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator
297       I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
298    I->second->~ContentCache();
299    ContentCacheAlloc.Deallocate(I->second);
300  }
301}
302
303void SourceManager::clearIDTables() {
304  MainFileID = FileID();
305  SLocEntryTable.clear();
306  LastLineNoFileIDQuery = FileID();
307  LastLineNoContentCache = 0;
308  LastFileIDLookup = FileID();
309
310  if (LineTable)
311    LineTable->clear();
312
313  // Use up FileID #0 as an invalid instantiation.
314  NextOffset = 0;
315  createInstantiationLoc(SourceLocation(),SourceLocation(),SourceLocation(), 1);
316}
317
318/// getOrCreateContentCache - Create or return a cached ContentCache for the
319/// specified file.
320const ContentCache *
321SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
322  assert(FileEnt && "Didn't specify a file entry to use?");
323
324  // Do we already have information about this file?
325  ContentCache *&Entry = FileInfos[FileEnt];
326  if (Entry) return Entry;
327
328  // Nope, create a new Cache entry.  Make sure it is at least 8-byte aligned
329  // so that FileInfo can use the low 3 bits of the pointer for its own
330  // nefarious purposes.
331  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
332  EntryAlign = std::max(8U, EntryAlign);
333  Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
334  new (Entry) ContentCache(FileEnt);
335
336  if (FileEnt == TruncateFile) {
337    // If we had queued up a file truncation request, perform the truncation
338    // now.
339    Entry->truncateAt(TruncateAtLine, TruncateAtColumn);
340    TruncateFile = 0;
341    TruncateAtLine = 0;
342    TruncateAtColumn = 0;
343  }
344
345  return Entry;
346}
347
348
349/// createMemBufferContentCache - Create a new ContentCache for the specified
350///  memory buffer.  This does no caching.
351const ContentCache*
352SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
353  // Add a new ContentCache to the MemBufferInfos list and return it.  Make sure
354  // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of
355  // the pointer for its own nefarious purposes.
356  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
357  EntryAlign = std::max(8U, EntryAlign);
358  ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
359  new (Entry) ContentCache();
360  MemBufferInfos.push_back(Entry);
361  Entry->setBuffer(Buffer);
362  return Entry;
363}
364
365void SourceManager::PreallocateSLocEntries(ExternalSLocEntrySource *Source,
366                                           unsigned NumSLocEntries,
367                                           unsigned NextOffset) {
368  ExternalSLocEntries = Source;
369  this->NextOffset = NextOffset;
370  SLocEntryLoaded.resize(NumSLocEntries + 1);
371  SLocEntryLoaded[0] = true;
372  SLocEntryTable.resize(SLocEntryTable.size() + NumSLocEntries);
373}
374
375void SourceManager::ClearPreallocatedSLocEntries() {
376  unsigned I = 0;
377  for (unsigned N = SLocEntryLoaded.size(); I != N; ++I)
378    if (!SLocEntryLoaded[I])
379      break;
380
381  // We've already loaded all preallocated source location entries.
382  if (I == SLocEntryLoaded.size())
383    return;
384
385  // Remove everything from location I onward.
386  SLocEntryTable.resize(I);
387  SLocEntryLoaded.clear();
388  ExternalSLocEntries = 0;
389}
390
391
392//===----------------------------------------------------------------------===//
393// Methods to create new FileID's and instantiations.
394//===----------------------------------------------------------------------===//
395
396/// createFileID - Create a new fileID for the specified ContentCache and
397/// include position.  This works regardless of whether the ContentCache
398/// corresponds to a file or some other input source.
399FileID SourceManager::createFileID(const ContentCache *File,
400                                   SourceLocation IncludePos,
401                                   SrcMgr::CharacteristicKind FileCharacter,
402                                   unsigned PreallocatedID,
403                                   unsigned Offset) {
404  if (PreallocatedID) {
405    // If we're filling in a preallocated ID, just load in the file
406    // entry and return.
407    assert(PreallocatedID < SLocEntryLoaded.size() &&
408           "Preallocate ID out-of-range");
409    assert(!SLocEntryLoaded[PreallocatedID] &&
410           "Source location entry already loaded");
411    assert(Offset && "Preallocate source location cannot have zero offset");
412    SLocEntryTable[PreallocatedID]
413      = SLocEntry::get(Offset, FileInfo::get(IncludePos, File, FileCharacter));
414    SLocEntryLoaded[PreallocatedID] = true;
415    FileID FID = FileID::get(PreallocatedID);
416    if (File->FirstFID.isInvalid())
417      File->FirstFID = FID;
418    return LastFileIDLookup = FID;
419  }
420
421  SLocEntryTable.push_back(SLocEntry::get(NextOffset,
422                                          FileInfo::get(IncludePos, File,
423                                                        FileCharacter)));
424  unsigned FileSize = File->getSize();
425  assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!");
426  NextOffset += FileSize+1;
427
428  // Set LastFileIDLookup to the newly created file.  The next getFileID call is
429  // almost guaranteed to be from that file.
430  FileID FID = FileID::get(SLocEntryTable.size()-1);
431  if (File->FirstFID.isInvalid())
432    File->FirstFID = FID;
433  return LastFileIDLookup = FID;
434}
435
436/// createInstantiationLoc - Return a new SourceLocation that encodes the fact
437/// that a token from SpellingLoc should actually be referenced from
438/// InstantiationLoc.
439SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
440                                                     SourceLocation ILocStart,
441                                                     SourceLocation ILocEnd,
442                                                     unsigned TokLength,
443                                                     unsigned PreallocatedID,
444                                                     unsigned Offset) {
445  InstantiationInfo II = InstantiationInfo::get(ILocStart,ILocEnd, SpellingLoc);
446  if (PreallocatedID) {
447    // If we're filling in a preallocated ID, just load in the
448    // instantiation entry and return.
449    assert(PreallocatedID < SLocEntryLoaded.size() &&
450           "Preallocate ID out-of-range");
451    assert(!SLocEntryLoaded[PreallocatedID] &&
452           "Source location entry already loaded");
453    assert(Offset && "Preallocate source location cannot have zero offset");
454    SLocEntryTable[PreallocatedID] = SLocEntry::get(Offset, II);
455    SLocEntryLoaded[PreallocatedID] = true;
456    return SourceLocation::getMacroLoc(Offset);
457  }
458  SLocEntryTable.push_back(SLocEntry::get(NextOffset, II));
459  assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
460  NextOffset += TokLength+1;
461  return SourceLocation::getMacroLoc(NextOffset-(TokLength+1));
462}
463
464/// getBufferData - Return a pointer to the start and end of the source buffer
465/// data for the specified FileID.
466std::pair<const char*, const char*>
467SourceManager::getBufferData(FileID FID) const {
468  const llvm::MemoryBuffer *Buf = getBuffer(FID);
469  return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
470}
471
472
473//===----------------------------------------------------------------------===//
474// SourceLocation manipulation methods.
475//===----------------------------------------------------------------------===//
476
477/// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very hot
478/// method that is used for all SourceManager queries that start with a
479/// SourceLocation object.  It is responsible for finding the entry in
480/// SLocEntryTable which contains the specified location.
481///
482FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
483  assert(SLocOffset && "Invalid FileID");
484
485  // After the first and second level caches, I see two common sorts of
486  // behavior: 1) a lot of searched FileID's are "near" the cached file location
487  // or are "near" the cached instantiation location.  2) others are just
488  // completely random and may be a very long way away.
489  //
490  // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
491  // then we fall back to a less cache efficient, but more scalable, binary
492  // search to find the location.
493
494  // See if this is near the file point - worst case we start scanning from the
495  // most newly created FileID.
496  std::vector<SrcMgr::SLocEntry>::const_iterator I;
497
498  if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
499    // Neither loc prunes our search.
500    I = SLocEntryTable.end();
501  } else {
502    // Perhaps it is near the file point.
503    I = SLocEntryTable.begin()+LastFileIDLookup.ID;
504  }
505
506  // Find the FileID that contains this.  "I" is an iterator that points to a
507  // FileID whose offset is known to be larger than SLocOffset.
508  unsigned NumProbes = 0;
509  while (1) {
510    --I;
511    if (ExternalSLocEntries)
512      getSLocEntry(FileID::get(I - SLocEntryTable.begin()));
513    if (I->getOffset() <= SLocOffset) {
514#if 0
515      printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
516             I-SLocEntryTable.begin(),
517             I->isInstantiation() ? "inst" : "file",
518             LastFileIDLookup.ID,  int(SLocEntryTable.end()-I));
519#endif
520      FileID Res = FileID::get(I-SLocEntryTable.begin());
521
522      // If this isn't an instantiation, remember it.  We have good locality
523      // across FileID lookups.
524      if (!I->isInstantiation())
525        LastFileIDLookup = Res;
526      NumLinearScans += NumProbes+1;
527      return Res;
528    }
529    if (++NumProbes == 8)
530      break;
531  }
532
533  // Convert "I" back into an index.  We know that it is an entry whose index is
534  // larger than the offset we are looking for.
535  unsigned GreaterIndex = I-SLocEntryTable.begin();
536  // LessIndex - This is the lower bound of the range that we're searching.
537  // We know that the offset corresponding to the FileID is is less than
538  // SLocOffset.
539  unsigned LessIndex = 0;
540  NumProbes = 0;
541  while (1) {
542    unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
543    unsigned MidOffset = getSLocEntry(FileID::get(MiddleIndex)).getOffset();
544
545    ++NumProbes;
546
547    // If the offset of the midpoint is too large, chop the high side of the
548    // range to the midpoint.
549    if (MidOffset > SLocOffset) {
550      GreaterIndex = MiddleIndex;
551      continue;
552    }
553
554    // If the middle index contains the value, succeed and return.
555    if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
556#if 0
557      printf("bin %d -> %d [%s] %d %d\n", SLocOffset,
558             I-SLocEntryTable.begin(),
559             I->isInstantiation() ? "inst" : "file",
560             LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
561#endif
562      FileID Res = FileID::get(MiddleIndex);
563
564      // If this isn't an instantiation, remember it.  We have good locality
565      // across FileID lookups.
566      if (!I->isInstantiation())
567        LastFileIDLookup = Res;
568      NumBinaryProbes += NumProbes;
569      return Res;
570    }
571
572    // Otherwise, move the low-side up to the middle index.
573    LessIndex = MiddleIndex;
574  }
575}
576
577SourceLocation SourceManager::
578getInstantiationLocSlowCase(SourceLocation Loc) const {
579  do {
580    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
581    Loc = getSLocEntry(LocInfo.first).getInstantiation()
582                   .getInstantiationLocStart();
583    Loc = Loc.getFileLocWithOffset(LocInfo.second);
584  } while (!Loc.isFileID());
585
586  return Loc;
587}
588
589SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
590  do {
591    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
592    Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
593    Loc = Loc.getFileLocWithOffset(LocInfo.second);
594  } while (!Loc.isFileID());
595  return Loc;
596}
597
598
599std::pair<FileID, unsigned>
600SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
601                                                     unsigned Offset) const {
602  // If this is an instantiation record, walk through all the instantiation
603  // points.
604  FileID FID;
605  SourceLocation Loc;
606  do {
607    Loc = E->getInstantiation().getInstantiationLocStart();
608
609    FID = getFileID(Loc);
610    E = &getSLocEntry(FID);
611    Offset += Loc.getOffset()-E->getOffset();
612  } while (!Loc.isFileID());
613
614  return std::make_pair(FID, Offset);
615}
616
617std::pair<FileID, unsigned>
618SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
619                                                unsigned Offset) const {
620  // If this is an instantiation record, walk through all the instantiation
621  // points.
622  FileID FID;
623  SourceLocation Loc;
624  do {
625    Loc = E->getInstantiation().getSpellingLoc();
626
627    FID = getFileID(Loc);
628    E = &getSLocEntry(FID);
629    Offset += Loc.getOffset()-E->getOffset();
630  } while (!Loc.isFileID());
631
632  return std::make_pair(FID, Offset);
633}
634
635/// getImmediateSpellingLoc - Given a SourceLocation object, return the
636/// spelling location referenced by the ID.  This is the first level down
637/// towards the place where the characters that make up the lexed token can be
638/// found.  This should not generally be used by clients.
639SourceLocation SourceManager::getImmediateSpellingLoc(SourceLocation Loc) const{
640  if (Loc.isFileID()) return Loc;
641  std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
642  Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
643  return Loc.getFileLocWithOffset(LocInfo.second);
644}
645
646
647/// getImmediateInstantiationRange - Loc is required to be an instantiation
648/// location.  Return the start/end of the instantiation information.
649std::pair<SourceLocation,SourceLocation>
650SourceManager::getImmediateInstantiationRange(SourceLocation Loc) const {
651  assert(Loc.isMacroID() && "Not an instantiation loc!");
652  const InstantiationInfo &II = getSLocEntry(getFileID(Loc)).getInstantiation();
653  return II.getInstantiationLocRange();
654}
655
656/// getInstantiationRange - Given a SourceLocation object, return the
657/// range of tokens covered by the instantiation in the ultimate file.
658std::pair<SourceLocation,SourceLocation>
659SourceManager::getInstantiationRange(SourceLocation Loc) const {
660  if (Loc.isFileID()) return std::make_pair(Loc, Loc);
661
662  std::pair<SourceLocation,SourceLocation> Res =
663    getImmediateInstantiationRange(Loc);
664
665  // Fully resolve the start and end locations to their ultimate instantiation
666  // points.
667  while (!Res.first.isFileID())
668    Res.first = getImmediateInstantiationRange(Res.first).first;
669  while (!Res.second.isFileID())
670    Res.second = getImmediateInstantiationRange(Res.second).second;
671  return Res;
672}
673
674
675
676//===----------------------------------------------------------------------===//
677// Queries about the code at a SourceLocation.
678//===----------------------------------------------------------------------===//
679
680/// getCharacterData - Return a pointer to the start of the specified location
681/// in the appropriate MemoryBuffer.
682const char *SourceManager::getCharacterData(SourceLocation SL) const {
683  // Note that this is a hot function in the getSpelling() path, which is
684  // heavily used by -E mode.
685  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
686
687  // Note that calling 'getBuffer()' may lazily page in a source file.
688  return getSLocEntry(LocInfo.first).getFile().getContentCache()
689              ->getBuffer()->getBufferStart() + LocInfo.second;
690}
691
692
693/// getColumnNumber - Return the column # for the specified file position.
694/// this is significantly cheaper to compute than the line number.
695unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
696  const char *Buf = getBuffer(FID)->getBufferStart();
697
698  unsigned LineStart = FilePos;
699  while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
700    --LineStart;
701  return FilePos-LineStart+1;
702}
703
704unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
705  if (Loc.isInvalid()) return 0;
706  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
707  return getColumnNumber(LocInfo.first, LocInfo.second);
708}
709
710unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
711  if (Loc.isInvalid()) return 0;
712  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
713  return getColumnNumber(LocInfo.first, LocInfo.second);
714}
715
716
717
718static DISABLE_INLINE void ComputeLineNumbers(ContentCache* FI,
719                                              llvm::BumpPtrAllocator &Alloc);
720static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){
721  // Note that calling 'getBuffer()' may lazily page in the file.
722  const MemoryBuffer *Buffer = FI->getBuffer();
723
724  // Find the file offsets of all of the *physical* source lines.  This does
725  // not look at trigraphs, escaped newlines, or anything else tricky.
726  std::vector<unsigned> LineOffsets;
727
728  // Line #1 starts at char 0.
729  LineOffsets.push_back(0);
730
731  const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
732  const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
733  unsigned Offs = 0;
734  while (1) {
735    // Skip over the contents of the line.
736    // TODO: Vectorize this?  This is very performance sensitive for programs
737    // with lots of diagnostics and in -E mode.
738    const unsigned char *NextBuf = (const unsigned char *)Buf;
739    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
740      ++NextBuf;
741    Offs += NextBuf-Buf;
742    Buf = NextBuf;
743
744    if (Buf[0] == '\n' || Buf[0] == '\r') {
745      // If this is \n\r or \r\n, skip both characters.
746      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
747        ++Offs, ++Buf;
748      ++Offs, ++Buf;
749      LineOffsets.push_back(Offs);
750    } else {
751      // Otherwise, this is a null.  If end of file, exit.
752      if (Buf == End) break;
753      // Otherwise, skip the null.
754      ++Offs, ++Buf;
755    }
756  }
757
758  // Copy the offsets into the FileInfo structure.
759  FI->NumLines = LineOffsets.size();
760  FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
761  std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
762}
763
764/// getLineNumber - Given a SourceLocation, return the spelling line number
765/// for the position indicated.  This requires building and caching a table of
766/// line offsets for the MemoryBuffer, so this is not cheap: use only when
767/// about to emit a diagnostic.
768unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
769  ContentCache *Content;
770  if (LastLineNoFileIDQuery == FID)
771    Content = LastLineNoContentCache;
772  else
773    Content = const_cast<ContentCache*>(getSLocEntry(FID)
774                                        .getFile().getContentCache());
775
776  // If this is the first use of line information for this buffer, compute the
777  /// SourceLineCache for it on demand.
778  if (Content->SourceLineCache == 0)
779    ComputeLineNumbers(Content, ContentCacheAlloc);
780
781  // Okay, we know we have a line number table.  Do a binary search to find the
782  // line number that this character position lands on.
783  unsigned *SourceLineCache = Content->SourceLineCache;
784  unsigned *SourceLineCacheStart = SourceLineCache;
785  unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
786
787  unsigned QueriedFilePos = FilePos+1;
788
789  // FIXME: I would like to be convinced that this code is worth being as
790  // complicated as it is, binary search isn't that slow.
791  //
792  // If it is worth being optimized, then in my opinion it could be more
793  // performant, simpler, and more obviously correct by just "galloping" outward
794  // from the queried file position. In fact, this could be incorporated into a
795  // generic algorithm such as lower_bound_with_hint.
796  //
797  // If someone gives me a test case where this matters, and I will do it! - DWD
798
799  // If the previous query was to the same file, we know both the file pos from
800  // that query and the line number returned.  This allows us to narrow the
801  // search space from the entire file to something near the match.
802  if (LastLineNoFileIDQuery == FID) {
803    if (QueriedFilePos >= LastLineNoFilePos) {
804      // FIXME: Potential overflow?
805      SourceLineCache = SourceLineCache+LastLineNoResult-1;
806
807      // The query is likely to be nearby the previous one.  Here we check to
808      // see if it is within 5, 10 or 20 lines.  It can be far away in cases
809      // where big comment blocks and vertical whitespace eat up lines but
810      // contribute no tokens.
811      if (SourceLineCache+5 < SourceLineCacheEnd) {
812        if (SourceLineCache[5] > QueriedFilePos)
813          SourceLineCacheEnd = SourceLineCache+5;
814        else if (SourceLineCache+10 < SourceLineCacheEnd) {
815          if (SourceLineCache[10] > QueriedFilePos)
816            SourceLineCacheEnd = SourceLineCache+10;
817          else if (SourceLineCache+20 < SourceLineCacheEnd) {
818            if (SourceLineCache[20] > QueriedFilePos)
819              SourceLineCacheEnd = SourceLineCache+20;
820          }
821        }
822      }
823    } else {
824      if (LastLineNoResult < Content->NumLines)
825        SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
826    }
827  }
828
829  // If the spread is large, do a "radix" test as our initial guess, based on
830  // the assumption that lines average to approximately the same length.
831  // NOTE: This is currently disabled, as it does not appear to be profitable in
832  // initial measurements.
833  if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
834    unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
835
836    // Take a stab at guessing where it is.
837    unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
838
839    // Check for -10 and +10 lines.
840    unsigned LowerBound = std::max(int(ApproxPos-10), 0);
841    unsigned UpperBound = std::min(ApproxPos+10, FileLen);
842
843    // If the computed lower bound is less than the query location, move it in.
844    if (SourceLineCache < SourceLineCacheStart+LowerBound &&
845        SourceLineCacheStart[LowerBound] < QueriedFilePos)
846      SourceLineCache = SourceLineCacheStart+LowerBound;
847
848    // If the computed upper bound is greater than the query location, move it.
849    if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
850        SourceLineCacheStart[UpperBound] >= QueriedFilePos)
851      SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
852  }
853
854  unsigned *Pos
855    = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
856  unsigned LineNo = Pos-SourceLineCacheStart;
857
858  LastLineNoFileIDQuery = FID;
859  LastLineNoContentCache = Content;
860  LastLineNoFilePos = QueriedFilePos;
861  LastLineNoResult = LineNo;
862  return LineNo;
863}
864
865unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
866  if (Loc.isInvalid()) return 0;
867  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
868  return getLineNumber(LocInfo.first, LocInfo.second);
869}
870unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
871  if (Loc.isInvalid()) return 0;
872  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
873  return getLineNumber(LocInfo.first, LocInfo.second);
874}
875
876/// getFileCharacteristic - return the file characteristic of the specified
877/// source location, indicating whether this is a normal file, a system
878/// header, or an "implicit extern C" system header.
879///
880/// This state can be modified with flags on GNU linemarker directives like:
881///   # 4 "foo.h" 3
882/// which changes all source locations in the current file after that to be
883/// considered to be from a system header.
884SrcMgr::CharacteristicKind
885SourceManager::getFileCharacteristic(SourceLocation Loc) const {
886  assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!");
887  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
888  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
889
890  // If there are no #line directives in this file, just return the whole-file
891  // state.
892  if (!FI.hasLineDirectives())
893    return FI.getFileCharacteristic();
894
895  assert(LineTable && "Can't have linetable entries without a LineTable!");
896  // See if there is a #line directive before the location.
897  const LineEntry *Entry =
898    LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second);
899
900  // If this is before the first line marker, use the file characteristic.
901  if (!Entry)
902    return FI.getFileCharacteristic();
903
904  return Entry->FileKind;
905}
906
907/// Return the filename or buffer identifier of the buffer the location is in.
908/// Note that this name does not respect #line directives.  Use getPresumedLoc
909/// for normal clients.
910const char *SourceManager::getBufferName(SourceLocation Loc) const {
911  if (Loc.isInvalid()) return "<invalid loc>";
912
913  return getBuffer(getFileID(Loc))->getBufferIdentifier();
914}
915
916
917/// getPresumedLoc - This method returns the "presumed" location of a
918/// SourceLocation specifies.  A "presumed location" can be modified by #line
919/// or GNU line marker directives.  This provides a view on the data that a
920/// user should see in diagnostics, for example.
921///
922/// Note that a presumed location is always given as the instantiation point
923/// of an instantiation location, not at the spelling location.
924PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const {
925  if (Loc.isInvalid()) return PresumedLoc();
926
927  // Presumed locations are always for instantiation points.
928  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
929
930  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
931  const SrcMgr::ContentCache *C = FI.getContentCache();
932
933  // To get the source name, first consult the FileEntry (if one exists)
934  // before the MemBuffer as this will avoid unnecessarily paging in the
935  // MemBuffer.
936  const char *Filename =
937    C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
938  unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second);
939  unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second);
940  SourceLocation IncludeLoc = FI.getIncludeLoc();
941
942  // If we have #line directives in this file, update and overwrite the physical
943  // location info if appropriate.
944  if (FI.hasLineDirectives()) {
945    assert(LineTable && "Can't have linetable entries without a LineTable!");
946    // See if there is a #line directive before this.  If so, get it.
947    if (const LineEntry *Entry =
948          LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) {
949      // If the LineEntry indicates a filename, use it.
950      if (Entry->FilenameID != -1)
951        Filename = LineTable->getFilename(Entry->FilenameID);
952
953      // Use the line number specified by the LineEntry.  This line number may
954      // be multiple lines down from the line entry.  Add the difference in
955      // physical line numbers from the query point and the line marker to the
956      // total.
957      unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
958      LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
959
960      // Note that column numbers are not molested by line markers.
961
962      // Handle virtual #include manipulation.
963      if (Entry->IncludeOffset) {
964        IncludeLoc = getLocForStartOfFile(LocInfo.first);
965        IncludeLoc = IncludeLoc.getFileLocWithOffset(Entry->IncludeOffset);
966      }
967    }
968  }
969
970  return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc);
971}
972
973//===----------------------------------------------------------------------===//
974// Other miscellaneous methods.
975//===----------------------------------------------------------------------===//
976
977/// \brief Get the source location for the given file:line:col triplet.
978///
979/// If the source file is included multiple times, the source location will
980/// be based upon the first inclusion.
981SourceLocation SourceManager::getLocation(const FileEntry *SourceFile,
982                                          unsigned Line, unsigned Col) const {
983  assert(SourceFile && "Null source file!");
984  assert(Line && Col && "Line and column should start from 1!");
985
986  fileinfo_iterator FI = FileInfos.find(SourceFile);
987  if (FI == FileInfos.end())
988    return SourceLocation();
989  ContentCache *Content = FI->second;
990
991  // If this is the first use of line information for this buffer, compute the
992  /// SourceLineCache for it on demand.
993  if (Content->SourceLineCache == 0)
994    ComputeLineNumbers(Content, ContentCacheAlloc);
995
996  if (Line > Content->NumLines)
997    return SourceLocation();
998
999  unsigned FilePos = Content->SourceLineCache[Line - 1];
1000  const char *Buf = Content->getBuffer()->getBufferStart() + FilePos;
1001  unsigned BufLength = Content->getBuffer()->getBufferEnd() - Buf;
1002  unsigned i = 0;
1003
1004  // Check that the given column is valid.
1005  while (i < BufLength-1 && i < Col-1 && Buf[i] != '\n' && Buf[i] != '\r')
1006    ++i;
1007  if (i < Col-1)
1008    return SourceLocation();
1009
1010  return getLocForStartOfFile(Content->FirstFID).
1011            getFileLocWithOffset(FilePos + Col - 1);
1012}
1013
1014/// \brief Determines the order of 2 source locations in the translation unit.
1015///
1016/// \returns true if LHS source location comes before RHS, false otherwise.
1017bool SourceManager::isBeforeInTranslationUnit(SourceLocation LHS,
1018                                              SourceLocation RHS) const {
1019  assert(LHS.isValid() && RHS.isValid() && "Passed invalid source location!");
1020  if (LHS == RHS)
1021    return false;
1022
1023  std::pair<FileID, unsigned> LOffs = getDecomposedLoc(LHS);
1024  std::pair<FileID, unsigned> ROffs = getDecomposedLoc(RHS);
1025
1026  // If the source locations are in the same file, just compare offsets.
1027  if (LOffs.first == ROffs.first)
1028    return LOffs.second < ROffs.second;
1029
1030  // If we are comparing a source location with multiple locations in the same
1031  // file, we get a big win by caching the result.
1032
1033  if (LastLFIDForBeforeTUCheck == LOffs.first &&
1034      LastRFIDForBeforeTUCheck == ROffs.first)
1035    return LastResForBeforeTUCheck;
1036
1037  LastLFIDForBeforeTUCheck = LOffs.first;
1038  LastRFIDForBeforeTUCheck = ROffs.first;
1039
1040  // "Traverse" the include/instantiation stacks of both locations and try to
1041  // find a common "ancestor".
1042  //
1043  // First we traverse the stack of the right location and check each level
1044  // against the level of the left location, while collecting all levels in a
1045  // "stack map".
1046
1047  std::map<FileID, unsigned> ROffsMap;
1048  ROffsMap[ROffs.first] = ROffs.second;
1049
1050  while (1) {
1051    SourceLocation UpperLoc;
1052    const SrcMgr::SLocEntry &Entry = getSLocEntry(ROffs.first);
1053    if (Entry.isInstantiation())
1054      UpperLoc = Entry.getInstantiation().getInstantiationLocStart();
1055    else
1056      UpperLoc = Entry.getFile().getIncludeLoc();
1057
1058    if (UpperLoc.isInvalid())
1059      break; // We reached the top.
1060
1061    ROffs = getDecomposedLoc(UpperLoc);
1062
1063    if (LOffs.first == ROffs.first)
1064      return LastResForBeforeTUCheck = LOffs.second < ROffs.second;
1065
1066    ROffsMap[ROffs.first] = ROffs.second;
1067  }
1068
1069  // We didn't find a common ancestor. Now traverse the stack of the left
1070  // location, checking against the stack map of the right location.
1071
1072  while (1) {
1073    SourceLocation UpperLoc;
1074    const SrcMgr::SLocEntry &Entry = getSLocEntry(LOffs.first);
1075    if (Entry.isInstantiation())
1076      UpperLoc = Entry.getInstantiation().getInstantiationLocStart();
1077    else
1078      UpperLoc = Entry.getFile().getIncludeLoc();
1079
1080    if (UpperLoc.isInvalid())
1081      break; // We reached the top.
1082
1083    LOffs = getDecomposedLoc(UpperLoc);
1084
1085    std::map<FileID, unsigned>::iterator I = ROffsMap.find(LOffs.first);
1086    if (I != ROffsMap.end())
1087      return LastResForBeforeTUCheck = LOffs.second < I->second;
1088  }
1089
1090  // No common ancestor.
1091  // Now we are getting into murky waters. Most probably this is because one
1092  // location is in the predefines buffer.
1093
1094  const FileEntry *LEntry =
1095    getSLocEntry(LOffs.first).getFile().getContentCache()->Entry;
1096  const FileEntry *REntry =
1097    getSLocEntry(ROffs.first).getFile().getContentCache()->Entry;
1098
1099  // If the locations are in two memory buffers we give up, we can't answer
1100  // which one should be considered first.
1101  // FIXME: Should there be a way to "include" memory buffers in the translation
1102  // unit ?
1103  assert((LEntry != 0 || REntry != 0) && "Locations in memory buffers.");
1104  (void) REntry;
1105
1106  // Consider the memory buffer as coming before the file in the translation
1107  // unit.
1108  if (LEntry == 0)
1109    return LastResForBeforeTUCheck = true;
1110  else {
1111    assert(REntry == 0 && "Locations in not #included files ?");
1112    return LastResForBeforeTUCheck = false;
1113  }
1114}
1115
1116void SourceManager::truncateFileAt(const FileEntry *Entry, unsigned Line,
1117                                   unsigned Column) {
1118  llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator FI
1119     = FileInfos.find(Entry);
1120  if (FI != FileInfos.end()) {
1121    FI->second->truncateAt(Line, Column);
1122    return;
1123  }
1124
1125  // We cannot perform the truncation until we actually see the file, so
1126  // save the truncation information.
1127  assert(TruncateFile == 0 && "Can't queue up multiple file truncations!");
1128  TruncateFile = Entry;
1129  TruncateAtLine = Line;
1130  TruncateAtColumn = Column;
1131}
1132
1133/// \brief Determine whether this file was truncated.
1134bool SourceManager::isTruncatedFile(FileID FID) const {
1135  return getSLocEntry(FID).getFile().getContentCache()->isTruncated();
1136}
1137
1138/// PrintStats - Print statistics to stderr.
1139///
1140void SourceManager::PrintStats() const {
1141  llvm::errs() << "\n*** Source Manager Stats:\n";
1142  llvm::errs() << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
1143               << " mem buffers mapped.\n";
1144  llvm::errs() << SLocEntryTable.size() << " SLocEntry's allocated, "
1145               << NextOffset << "B of Sloc address space used.\n";
1146
1147  unsigned NumLineNumsComputed = 0;
1148  unsigned NumFileBytesMapped = 0;
1149  for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
1150    NumLineNumsComputed += I->second->SourceLineCache != 0;
1151    NumFileBytesMapped  += I->second->getSizeBytesMapped();
1152  }
1153
1154  llvm::errs() << NumFileBytesMapped << " bytes of files mapped, "
1155               << NumLineNumsComputed << " files with line #'s computed.\n";
1156  llvm::errs() << "FileID scans: " << NumLinearScans << " linear, "
1157               << NumBinaryProbes << " binary.\n";
1158}
1159
1160ExternalSLocEntrySource::~ExternalSLocEntrySource() { }
1161