SourceManager.cpp revision 6cb7c1a43b0c8f739d1f54b7fdae5ede86033496
1//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/SourceManagerInternals.h"
16#include "clang/Basic/FileManager.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/MemoryBuffer.h"
19#include "llvm/System/Path.h"
20#include <algorithm>
21using namespace clang;
22using namespace SrcMgr;
23using llvm::MemoryBuffer;
24
25//===----------------------------------------------------------------------===//
26// SourceManager Helper Classes
27//===----------------------------------------------------------------------===//
28
29ContentCache::~ContentCache() {
30  delete Buffer;
31}
32
33/// getSizeBytesMapped - Returns the number of bytes actually mapped for
34///  this ContentCache.  This can be 0 if the MemBuffer was not actually
35///  instantiated.
36unsigned ContentCache::getSizeBytesMapped() const {
37  return Buffer ? Buffer->getBufferSize() : 0;
38}
39
40/// getSize - Returns the size of the content encapsulated by this ContentCache.
41///  This can be the size of the source file or the size of an arbitrary
42///  scratch buffer.  If the ContentCache encapsulates a source file, that
43///  file is not lazily brought in from disk to satisfy this query.
44unsigned ContentCache::getSize() const {
45  return Entry ? Entry->getSize() : Buffer->getBufferSize();
46}
47
48const llvm::MemoryBuffer *ContentCache::getBuffer() const {
49  // Lazily create the Buffer for ContentCaches that wrap files.
50  if (!Buffer && Entry) {
51    // FIXME: Should we support a way to not have to do this check over
52    //   and over if we cannot open the file?
53    Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize());
54  }
55  return Buffer;
56}
57
58unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) {
59  // Look up the filename in the string table, returning the pre-existing value
60  // if it exists.
61  llvm::StringMapEntry<unsigned> &Entry =
62    FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U);
63  if (Entry.getValue() != ~0U)
64    return Entry.getValue();
65
66  // Otherwise, assign this the next available ID.
67  Entry.setValue(FilenamesByID.size());
68  FilenamesByID.push_back(&Entry);
69  return FilenamesByID.size()-1;
70}
71
72/// AddLineNote - Add a line note to the line table that indicates that there
73/// is a #line at the specified FID/Offset location which changes the presumed
74/// location to LineNo/FilenameID.
75void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
76                                unsigned LineNo, int FilenameID) {
77  std::vector<LineEntry> &Entries = LineEntries[FID];
78
79  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
80         "Adding line entries out of order!");
81
82  SrcMgr::CharacteristicKind Kind = SrcMgr::C_User;
83  unsigned IncludeOffset = 0;
84
85  if (!Entries.empty()) {
86    // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember
87    // that we are still in "foo.h".
88    if (FilenameID == -1)
89      FilenameID = Entries.back().FilenameID;
90
91    // If we are after a line marker that switched us to system header mode, or
92    // that set #include information, preserve it.
93    Kind = Entries.back().FileKind;
94    IncludeOffset = Entries.back().IncludeOffset;
95  }
96
97  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind,
98                                   IncludeOffset));
99}
100
101/// AddLineNote This is the same as the previous version of AddLineNote, but is
102/// used for GNU line markers.  If EntryExit is 0, then this doesn't change the
103/// presumed #include stack.  If it is 1, this is a file entry, if it is 2 then
104/// this is a file exit.  FileKind specifies whether this is a system header or
105/// extern C system header.
106void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
107                                unsigned LineNo, int FilenameID,
108                                unsigned EntryExit,
109                                SrcMgr::CharacteristicKind FileKind) {
110  assert(FilenameID != -1 && "Unspecified filename should use other accessor");
111
112  std::vector<LineEntry> &Entries = LineEntries[FID];
113
114  assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
115         "Adding line entries out of order!");
116
117  unsigned IncludeOffset = 0;
118  if (EntryExit == 0) {  // No #include stack change.
119    IncludeOffset = Entries.empty() ? 0 : Entries.back().IncludeOffset;
120  } else if (EntryExit == 1) {
121    IncludeOffset = Offset-1;
122  } else if (EntryExit == 2) {
123    assert(!Entries.empty() && Entries.back().IncludeOffset &&
124       "PPDirectives should have caught case when popping empty include stack");
125
126    // Get the include loc of the last entries' include loc as our include loc.
127    IncludeOffset = 0;
128    if (const LineEntry *PrevEntry =
129          FindNearestLineEntry(FID, Entries.back().IncludeOffset))
130      IncludeOffset = PrevEntry->IncludeOffset;
131  }
132
133  Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind,
134                                   IncludeOffset));
135}
136
137
138/// FindNearestLineEntry - Find the line entry nearest to FID that is before
139/// it.  If there is no line entry before Offset in FID, return null.
140const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID,
141                                                     unsigned Offset) {
142  const std::vector<LineEntry> &Entries = LineEntries[FID];
143  assert(!Entries.empty() && "No #line entries for this FID after all!");
144
145  // It is very common for the query to be after the last #line, check this
146  // first.
147  if (Entries.back().FileOffset <= Offset)
148    return &Entries.back();
149
150  // Do a binary search to find the maximal element that is still before Offset.
151  std::vector<LineEntry>::const_iterator I =
152    std::upper_bound(Entries.begin(), Entries.end(), Offset);
153  if (I == Entries.begin()) return 0;
154  return &*--I;
155}
156
157/// \brief Add a new line entry that has already been encoded into
158/// the internal representation of the line table.
159void LineTableInfo::AddEntry(unsigned FID,
160                             const std::vector<LineEntry> &Entries) {
161  LineEntries[FID] = Entries;
162}
163
164/// getLineTableFilenameID - Return the uniqued ID for the specified filename.
165///
166unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) {
167  if (LineTable == 0)
168    LineTable = new LineTableInfo();
169  return LineTable->getLineTableFilenameID(Ptr, Len);
170}
171
172
173/// AddLineNote - Add a line note to the line table for the FileID and offset
174/// specified by Loc.  If FilenameID is -1, it is considered to be
175/// unspecified.
176void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
177                                int FilenameID) {
178  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
179
180  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
181
182  // Remember that this file has #line directives now if it doesn't already.
183  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
184
185  if (LineTable == 0)
186    LineTable = new LineTableInfo();
187  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID);
188}
189
190/// AddLineNote - Add a GNU line marker to the line table.
191void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
192                                int FilenameID, bool IsFileEntry,
193                                bool IsFileExit, bool IsSystemHeader,
194                                bool IsExternCHeader) {
195  // If there is no filename and no flags, this is treated just like a #line,
196  // which does not change the flags of the previous line marker.
197  if (FilenameID == -1) {
198    assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader &&
199           "Can't set flags without setting the filename!");
200    return AddLineNote(Loc, LineNo, FilenameID);
201  }
202
203  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
204  const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
205
206  // Remember that this file has #line directives now if it doesn't already.
207  const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
208
209  if (LineTable == 0)
210    LineTable = new LineTableInfo();
211
212  SrcMgr::CharacteristicKind FileKind;
213  if (IsExternCHeader)
214    FileKind = SrcMgr::C_ExternCSystem;
215  else if (IsSystemHeader)
216    FileKind = SrcMgr::C_System;
217  else
218    FileKind = SrcMgr::C_User;
219
220  unsigned EntryExit = 0;
221  if (IsFileEntry)
222    EntryExit = 1;
223  else if (IsFileExit)
224    EntryExit = 2;
225
226  LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID,
227                         EntryExit, FileKind);
228}
229
230LineTableInfo &SourceManager::getLineTable() {
231  if (LineTable == 0)
232    LineTable = new LineTableInfo();
233  return *LineTable;
234}
235
236//===----------------------------------------------------------------------===//
237// Private 'Create' methods.
238//===----------------------------------------------------------------------===//
239
240SourceManager::~SourceManager() {
241  delete LineTable;
242
243  // Delete FileEntry objects corresponding to content caches.  Since the actual
244  // content cache objects are bump pointer allocated, we just have to run the
245  // dtors, but we call the deallocate method for completeness.
246  for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
247    MemBufferInfos[i]->~ContentCache();
248    ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
249  }
250  for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator
251       I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
252    I->second->~ContentCache();
253    ContentCacheAlloc.Deallocate(I->second);
254  }
255}
256
257void SourceManager::clearIDTables() {
258  MainFileID = FileID();
259  SLocEntryTable.clear();
260  LastLineNoFileIDQuery = FileID();
261  LastLineNoContentCache = 0;
262  LastFileIDLookup = FileID();
263
264  if (LineTable)
265    LineTable->clear();
266
267  // Use up FileID #0 as an invalid instantiation.
268  NextOffset = 0;
269  createInstantiationLoc(SourceLocation(),SourceLocation(),SourceLocation(), 1);
270}
271
272/// getOrCreateContentCache - Create or return a cached ContentCache for the
273/// specified file.
274const ContentCache *
275SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
276  assert(FileEnt && "Didn't specify a file entry to use?");
277
278  // Do we already have information about this file?
279  ContentCache *&Entry = FileInfos[FileEnt];
280  if (Entry) return Entry;
281
282  // Nope, create a new Cache entry.  Make sure it is at least 8-byte aligned
283  // so that FileInfo can use the low 3 bits of the pointer for its own
284  // nefarious purposes.
285  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
286  EntryAlign = std::max(8U, EntryAlign);
287  Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
288  new (Entry) ContentCache(FileEnt);
289  return Entry;
290}
291
292
293/// createMemBufferContentCache - Create a new ContentCache for the specified
294///  memory buffer.  This does no caching.
295const ContentCache*
296SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
297  // Add a new ContentCache to the MemBufferInfos list and return it.  Make sure
298  // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of
299  // the pointer for its own nefarious purposes.
300  unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
301  EntryAlign = std::max(8U, EntryAlign);
302  ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
303  new (Entry) ContentCache();
304  MemBufferInfos.push_back(Entry);
305  Entry->setBuffer(Buffer);
306  return Entry;
307}
308
309void SourceManager::PreallocateSLocEntries(ExternalSLocEntrySource *Source,
310                                           unsigned NumSLocEntries,
311                                           unsigned NextOffset) {
312  ExternalSLocEntries = Source;
313  this->NextOffset = NextOffset;
314  SLocEntryLoaded.resize(NumSLocEntries + 1);
315  SLocEntryLoaded[0] = true;
316  SLocEntryTable.resize(SLocEntryTable.size() + NumSLocEntries);
317}
318
319void SourceManager::ClearPreallocatedSLocEntries() {
320  unsigned I = 0;
321  for (unsigned N = SLocEntryLoaded.size(); I != N; ++I)
322    if (!SLocEntryLoaded[I])
323      break;
324
325  // We've already loaded all preallocated source location entries.
326  if (I == SLocEntryLoaded.size())
327    return;
328
329  // Remove everything from location I onward.
330  SLocEntryTable.resize(I);
331  SLocEntryLoaded.clear();
332  ExternalSLocEntries = 0;
333}
334
335
336//===----------------------------------------------------------------------===//
337// Methods to create new FileID's and instantiations.
338//===----------------------------------------------------------------------===//
339
340/// createFileID - Create a new fileID for the specified ContentCache and
341/// include position.  This works regardless of whether the ContentCache
342/// corresponds to a file or some other input source.
343FileID SourceManager::createFileID(const ContentCache *File,
344                                   SourceLocation IncludePos,
345                                   SrcMgr::CharacteristicKind FileCharacter,
346                                   unsigned PreallocatedID,
347                                   unsigned Offset) {
348  if (PreallocatedID) {
349    // If we're filling in a preallocated ID, just load in the file
350    // entry and return.
351    assert(PreallocatedID < SLocEntryLoaded.size() &&
352           "Preallocate ID out-of-range");
353    assert(!SLocEntryLoaded[PreallocatedID] &&
354           "Source location entry already loaded");
355    assert(Offset && "Preallocate source location cannot have zero offset");
356    SLocEntryTable[PreallocatedID]
357      = SLocEntry::get(Offset, FileInfo::get(IncludePos, File, FileCharacter));
358    SLocEntryLoaded[PreallocatedID] = true;
359    FileID FID = FileID::get(PreallocatedID);
360    if (File->FirstFID.isInvalid())
361      File->FirstFID = FID;
362    return LastFileIDLookup = FID;
363  }
364
365  SLocEntryTable.push_back(SLocEntry::get(NextOffset,
366                                          FileInfo::get(IncludePos, File,
367                                                        FileCharacter)));
368  unsigned FileSize = File->getSize();
369  assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!");
370  NextOffset += FileSize+1;
371
372  // Set LastFileIDLookup to the newly created file.  The next getFileID call is
373  // almost guaranteed to be from that file.
374  FileID FID = FileID::get(SLocEntryTable.size()-1);
375  if (File->FirstFID.isInvalid())
376    File->FirstFID = FID;
377  return LastFileIDLookup = FID;
378}
379
380/// createInstantiationLoc - Return a new SourceLocation that encodes the fact
381/// that a token from SpellingLoc should actually be referenced from
382/// InstantiationLoc.
383SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
384                                                     SourceLocation ILocStart,
385                                                     SourceLocation ILocEnd,
386                                                     unsigned TokLength,
387                                                     unsigned PreallocatedID,
388                                                     unsigned Offset) {
389  InstantiationInfo II = InstantiationInfo::get(ILocStart,ILocEnd, SpellingLoc);
390  if (PreallocatedID) {
391    // If we're filling in a preallocated ID, just load in the
392    // instantiation entry and return.
393    assert(PreallocatedID < SLocEntryLoaded.size() &&
394           "Preallocate ID out-of-range");
395    assert(!SLocEntryLoaded[PreallocatedID] &&
396           "Source location entry already loaded");
397    assert(Offset && "Preallocate source location cannot have zero offset");
398    SLocEntryTable[PreallocatedID] = SLocEntry::get(Offset, II);
399    SLocEntryLoaded[PreallocatedID] = true;
400    return SourceLocation::getMacroLoc(Offset);
401  }
402  SLocEntryTable.push_back(SLocEntry::get(NextOffset, II));
403  assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
404  NextOffset += TokLength+1;
405  return SourceLocation::getMacroLoc(NextOffset-(TokLength+1));
406}
407
408/// getBufferData - Return a pointer to the start and end of the source buffer
409/// data for the specified FileID.
410std::pair<const char*, const char*>
411SourceManager::getBufferData(FileID FID) const {
412  const llvm::MemoryBuffer *Buf = getBuffer(FID);
413  return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
414}
415
416
417//===----------------------------------------------------------------------===//
418// SourceLocation manipulation methods.
419//===----------------------------------------------------------------------===//
420
421/// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very hot
422/// method that is used for all SourceManager queries that start with a
423/// SourceLocation object.  It is responsible for finding the entry in
424/// SLocEntryTable which contains the specified location.
425///
426FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
427  assert(SLocOffset && "Invalid FileID");
428
429  // After the first and second level caches, I see two common sorts of
430  // behavior: 1) a lot of searched FileID's are "near" the cached file location
431  // or are "near" the cached instantiation location.  2) others are just
432  // completely random and may be a very long way away.
433  //
434  // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
435  // then we fall back to a less cache efficient, but more scalable, binary
436  // search to find the location.
437
438  // See if this is near the file point - worst case we start scanning from the
439  // most newly created FileID.
440  std::vector<SrcMgr::SLocEntry>::const_iterator I;
441
442  if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
443    // Neither loc prunes our search.
444    I = SLocEntryTable.end();
445  } else {
446    // Perhaps it is near the file point.
447    I = SLocEntryTable.begin()+LastFileIDLookup.ID;
448  }
449
450  // Find the FileID that contains this.  "I" is an iterator that points to a
451  // FileID whose offset is known to be larger than SLocOffset.
452  unsigned NumProbes = 0;
453  while (1) {
454    --I;
455    if (ExternalSLocEntries)
456      getSLocEntry(FileID::get(I - SLocEntryTable.begin()));
457    if (I->getOffset() <= SLocOffset) {
458#if 0
459      printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
460             I-SLocEntryTable.begin(),
461             I->isInstantiation() ? "inst" : "file",
462             LastFileIDLookup.ID,  int(SLocEntryTable.end()-I));
463#endif
464      FileID Res = FileID::get(I-SLocEntryTable.begin());
465
466      // If this isn't an instantiation, remember it.  We have good locality
467      // across FileID lookups.
468      if (!I->isInstantiation())
469        LastFileIDLookup = Res;
470      NumLinearScans += NumProbes+1;
471      return Res;
472    }
473    if (++NumProbes == 8)
474      break;
475  }
476
477  // Convert "I" back into an index.  We know that it is an entry whose index is
478  // larger than the offset we are looking for.
479  unsigned GreaterIndex = I-SLocEntryTable.begin();
480  // LessIndex - This is the lower bound of the range that we're searching.
481  // We know that the offset corresponding to the FileID is is less than
482  // SLocOffset.
483  unsigned LessIndex = 0;
484  NumProbes = 0;
485  while (1) {
486    unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
487    unsigned MidOffset = getSLocEntry(FileID::get(MiddleIndex)).getOffset();
488
489    ++NumProbes;
490
491    // If the offset of the midpoint is too large, chop the high side of the
492    // range to the midpoint.
493    if (MidOffset > SLocOffset) {
494      GreaterIndex = MiddleIndex;
495      continue;
496    }
497
498    // If the middle index contains the value, succeed and return.
499    if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
500#if 0
501      printf("bin %d -> %d [%s] %d %d\n", SLocOffset,
502             I-SLocEntryTable.begin(),
503             I->isInstantiation() ? "inst" : "file",
504             LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
505#endif
506      FileID Res = FileID::get(MiddleIndex);
507
508      // If this isn't an instantiation, remember it.  We have good locality
509      // across FileID lookups.
510      if (!I->isInstantiation())
511        LastFileIDLookup = Res;
512      NumBinaryProbes += NumProbes;
513      return Res;
514    }
515
516    // Otherwise, move the low-side up to the middle index.
517    LessIndex = MiddleIndex;
518  }
519}
520
521SourceLocation SourceManager::
522getInstantiationLocSlowCase(SourceLocation Loc) const {
523  do {
524    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
525    Loc = getSLocEntry(LocInfo.first).getInstantiation()
526                   .getInstantiationLocStart();
527    Loc = Loc.getFileLocWithOffset(LocInfo.second);
528  } while (!Loc.isFileID());
529
530  return Loc;
531}
532
533SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
534  do {
535    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
536    Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
537    Loc = Loc.getFileLocWithOffset(LocInfo.second);
538  } while (!Loc.isFileID());
539  return Loc;
540}
541
542
543std::pair<FileID, unsigned>
544SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
545                                                     unsigned Offset) const {
546  // If this is an instantiation record, walk through all the instantiation
547  // points.
548  FileID FID;
549  SourceLocation Loc;
550  do {
551    Loc = E->getInstantiation().getInstantiationLocStart();
552
553    FID = getFileID(Loc);
554    E = &getSLocEntry(FID);
555    Offset += Loc.getOffset()-E->getOffset();
556  } while (!Loc.isFileID());
557
558  return std::make_pair(FID, Offset);
559}
560
561std::pair<FileID, unsigned>
562SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
563                                                unsigned Offset) const {
564  // If this is an instantiation record, walk through all the instantiation
565  // points.
566  FileID FID;
567  SourceLocation Loc;
568  do {
569    Loc = E->getInstantiation().getSpellingLoc();
570
571    FID = getFileID(Loc);
572    E = &getSLocEntry(FID);
573    Offset += Loc.getOffset()-E->getOffset();
574  } while (!Loc.isFileID());
575
576  return std::make_pair(FID, Offset);
577}
578
579/// getImmediateSpellingLoc - Given a SourceLocation object, return the
580/// spelling location referenced by the ID.  This is the first level down
581/// towards the place where the characters that make up the lexed token can be
582/// found.  This should not generally be used by clients.
583SourceLocation SourceManager::getImmediateSpellingLoc(SourceLocation Loc) const{
584  if (Loc.isFileID()) return Loc;
585  std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
586  Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
587  return Loc.getFileLocWithOffset(LocInfo.second);
588}
589
590
591/// getImmediateInstantiationRange - Loc is required to be an instantiation
592/// location.  Return the start/end of the instantiation information.
593std::pair<SourceLocation,SourceLocation>
594SourceManager::getImmediateInstantiationRange(SourceLocation Loc) const {
595  assert(Loc.isMacroID() && "Not an instantiation loc!");
596  const InstantiationInfo &II = getSLocEntry(getFileID(Loc)).getInstantiation();
597  return II.getInstantiationLocRange();
598}
599
600/// getInstantiationRange - Given a SourceLocation object, return the
601/// range of tokens covered by the instantiation in the ultimate file.
602std::pair<SourceLocation,SourceLocation>
603SourceManager::getInstantiationRange(SourceLocation Loc) const {
604  if (Loc.isFileID()) return std::make_pair(Loc, Loc);
605
606  std::pair<SourceLocation,SourceLocation> Res =
607    getImmediateInstantiationRange(Loc);
608
609  // Fully resolve the start and end locations to their ultimate instantiation
610  // points.
611  while (!Res.first.isFileID())
612    Res.first = getImmediateInstantiationRange(Res.first).first;
613  while (!Res.second.isFileID())
614    Res.second = getImmediateInstantiationRange(Res.second).second;
615  return Res;
616}
617
618
619
620//===----------------------------------------------------------------------===//
621// Queries about the code at a SourceLocation.
622//===----------------------------------------------------------------------===//
623
624/// getCharacterData - Return a pointer to the start of the specified location
625/// in the appropriate MemoryBuffer.
626const char *SourceManager::getCharacterData(SourceLocation SL) const {
627  // Note that this is a hot function in the getSpelling() path, which is
628  // heavily used by -E mode.
629  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
630
631  // Note that calling 'getBuffer()' may lazily page in a source file.
632  return getSLocEntry(LocInfo.first).getFile().getContentCache()
633              ->getBuffer()->getBufferStart() + LocInfo.second;
634}
635
636
637/// getColumnNumber - Return the column # for the specified file position.
638/// this is significantly cheaper to compute than the line number.
639unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
640  const char *Buf = getBuffer(FID)->getBufferStart();
641
642  unsigned LineStart = FilePos;
643  while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
644    --LineStart;
645  return FilePos-LineStart+1;
646}
647
648unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
649  if (Loc.isInvalid()) return 0;
650  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
651  return getColumnNumber(LocInfo.first, LocInfo.second);
652}
653
654unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
655  if (Loc.isInvalid()) return 0;
656  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
657  return getColumnNumber(LocInfo.first, LocInfo.second);
658}
659
660
661
662static void ComputeLineNumbers(ContentCache* FI,
663                               llvm::BumpPtrAllocator &Alloc) DISABLE_INLINE;
664static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){
665  // Note that calling 'getBuffer()' may lazily page in the file.
666  const MemoryBuffer *Buffer = FI->getBuffer();
667
668  // Find the file offsets of all of the *physical* source lines.  This does
669  // not look at trigraphs, escaped newlines, or anything else tricky.
670  std::vector<unsigned> LineOffsets;
671
672  // Line #1 starts at char 0.
673  LineOffsets.push_back(0);
674
675  const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
676  const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
677  unsigned Offs = 0;
678  while (1) {
679    // Skip over the contents of the line.
680    // TODO: Vectorize this?  This is very performance sensitive for programs
681    // with lots of diagnostics and in -E mode.
682    const unsigned char *NextBuf = (const unsigned char *)Buf;
683    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
684      ++NextBuf;
685    Offs += NextBuf-Buf;
686    Buf = NextBuf;
687
688    if (Buf[0] == '\n' || Buf[0] == '\r') {
689      // If this is \n\r or \r\n, skip both characters.
690      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
691        ++Offs, ++Buf;
692      ++Offs, ++Buf;
693      LineOffsets.push_back(Offs);
694    } else {
695      // Otherwise, this is a null.  If end of file, exit.
696      if (Buf == End) break;
697      // Otherwise, skip the null.
698      ++Offs, ++Buf;
699    }
700  }
701
702  // Copy the offsets into the FileInfo structure.
703  FI->NumLines = LineOffsets.size();
704  FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
705  std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
706}
707
708/// getLineNumber - Given a SourceLocation, return the spelling line number
709/// for the position indicated.  This requires building and caching a table of
710/// line offsets for the MemoryBuffer, so this is not cheap: use only when
711/// about to emit a diagnostic.
712unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
713  ContentCache *Content;
714  if (LastLineNoFileIDQuery == FID)
715    Content = LastLineNoContentCache;
716  else
717    Content = const_cast<ContentCache*>(getSLocEntry(FID)
718                                        .getFile().getContentCache());
719
720  // If this is the first use of line information for this buffer, compute the
721  /// SourceLineCache for it on demand.
722  if (Content->SourceLineCache == 0)
723    ComputeLineNumbers(Content, ContentCacheAlloc);
724
725  // Okay, we know we have a line number table.  Do a binary search to find the
726  // line number that this character position lands on.
727  unsigned *SourceLineCache = Content->SourceLineCache;
728  unsigned *SourceLineCacheStart = SourceLineCache;
729  unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
730
731  unsigned QueriedFilePos = FilePos+1;
732
733  // FIXME: I would like to be convinced that this code is worth being as
734  // complicated as it is, binary search isn't that slow.
735  //
736  // If it is worth being optimized, then in my opinion it could be more
737  // performant, simpler, and more obviously correct by just "galloping" outward
738  // from the queried file position. In fact, this could be incorporated into a
739  // generic algorithm such as lower_bound_with_hint.
740  //
741  // If someone gives me a test case where this matters, and I will do it! - DWD
742
743  // If the previous query was to the same file, we know both the file pos from
744  // that query and the line number returned.  This allows us to narrow the
745  // search space from the entire file to something near the match.
746  if (LastLineNoFileIDQuery == FID) {
747    if (QueriedFilePos >= LastLineNoFilePos) {
748      // FIXME: Potential overflow?
749      SourceLineCache = SourceLineCache+LastLineNoResult-1;
750
751      // The query is likely to be nearby the previous one.  Here we check to
752      // see if it is within 5, 10 or 20 lines.  It can be far away in cases
753      // where big comment blocks and vertical whitespace eat up lines but
754      // contribute no tokens.
755      if (SourceLineCache+5 < SourceLineCacheEnd) {
756        if (SourceLineCache[5] > QueriedFilePos)
757          SourceLineCacheEnd = SourceLineCache+5;
758        else if (SourceLineCache+10 < SourceLineCacheEnd) {
759          if (SourceLineCache[10] > QueriedFilePos)
760            SourceLineCacheEnd = SourceLineCache+10;
761          else if (SourceLineCache+20 < SourceLineCacheEnd) {
762            if (SourceLineCache[20] > QueriedFilePos)
763              SourceLineCacheEnd = SourceLineCache+20;
764          }
765        }
766      }
767    } else {
768      if (LastLineNoResult < Content->NumLines)
769        SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
770    }
771  }
772
773  // If the spread is large, do a "radix" test as our initial guess, based on
774  // the assumption that lines average to approximately the same length.
775  // NOTE: This is currently disabled, as it does not appear to be profitable in
776  // initial measurements.
777  if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
778    unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
779
780    // Take a stab at guessing where it is.
781    unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
782
783    // Check for -10 and +10 lines.
784    unsigned LowerBound = std::max(int(ApproxPos-10), 0);
785    unsigned UpperBound = std::min(ApproxPos+10, FileLen);
786
787    // If the computed lower bound is less than the query location, move it in.
788    if (SourceLineCache < SourceLineCacheStart+LowerBound &&
789        SourceLineCacheStart[LowerBound] < QueriedFilePos)
790      SourceLineCache = SourceLineCacheStart+LowerBound;
791
792    // If the computed upper bound is greater than the query location, move it.
793    if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
794        SourceLineCacheStart[UpperBound] >= QueriedFilePos)
795      SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
796  }
797
798  unsigned *Pos
799    = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
800  unsigned LineNo = Pos-SourceLineCacheStart;
801
802  LastLineNoFileIDQuery = FID;
803  LastLineNoContentCache = Content;
804  LastLineNoFilePos = QueriedFilePos;
805  LastLineNoResult = LineNo;
806  return LineNo;
807}
808
809unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
810  if (Loc.isInvalid()) return 0;
811  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
812  return getLineNumber(LocInfo.first, LocInfo.second);
813}
814unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
815  if (Loc.isInvalid()) return 0;
816  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
817  return getLineNumber(LocInfo.first, LocInfo.second);
818}
819
820/// getFileCharacteristic - return the file characteristic of the specified
821/// source location, indicating whether this is a normal file, a system
822/// header, or an "implicit extern C" system header.
823///
824/// This state can be modified with flags on GNU linemarker directives like:
825///   # 4 "foo.h" 3
826/// which changes all source locations in the current file after that to be
827/// considered to be from a system header.
828SrcMgr::CharacteristicKind
829SourceManager::getFileCharacteristic(SourceLocation Loc) const {
830  assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!");
831  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
832  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
833
834  // If there are no #line directives in this file, just return the whole-file
835  // state.
836  if (!FI.hasLineDirectives())
837    return FI.getFileCharacteristic();
838
839  assert(LineTable && "Can't have linetable entries without a LineTable!");
840  // See if there is a #line directive before the location.
841  const LineEntry *Entry =
842    LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second);
843
844  // If this is before the first line marker, use the file characteristic.
845  if (!Entry)
846    return FI.getFileCharacteristic();
847
848  return Entry->FileKind;
849}
850
851/// Return the filename or buffer identifier of the buffer the location is in.
852/// Note that this name does not respect #line directives.  Use getPresumedLoc
853/// for normal clients.
854const char *SourceManager::getBufferName(SourceLocation Loc) const {
855  if (Loc.isInvalid()) return "<invalid loc>";
856
857  return getBuffer(getFileID(Loc))->getBufferIdentifier();
858}
859
860
861/// getPresumedLoc - This method returns the "presumed" location of a
862/// SourceLocation specifies.  A "presumed location" can be modified by #line
863/// or GNU line marker directives.  This provides a view on the data that a
864/// user should see in diagnostics, for example.
865///
866/// Note that a presumed location is always given as the instantiation point
867/// of an instantiation location, not at the spelling location.
868PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const {
869  if (Loc.isInvalid()) return PresumedLoc();
870
871  // Presumed locations are always for instantiation points.
872  std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
873
874  const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
875  const SrcMgr::ContentCache *C = FI.getContentCache();
876
877  // To get the source name, first consult the FileEntry (if one exists)
878  // before the MemBuffer as this will avoid unnecessarily paging in the
879  // MemBuffer.
880  const char *Filename =
881    C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
882  unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second);
883  unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second);
884  SourceLocation IncludeLoc = FI.getIncludeLoc();
885
886  // If we have #line directives in this file, update and overwrite the physical
887  // location info if appropriate.
888  if (FI.hasLineDirectives()) {
889    assert(LineTable && "Can't have linetable entries without a LineTable!");
890    // See if there is a #line directive before this.  If so, get it.
891    if (const LineEntry *Entry =
892          LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) {
893      // If the LineEntry indicates a filename, use it.
894      if (Entry->FilenameID != -1)
895        Filename = LineTable->getFilename(Entry->FilenameID);
896
897      // Use the line number specified by the LineEntry.  This line number may
898      // be multiple lines down from the line entry.  Add the difference in
899      // physical line numbers from the query point and the line marker to the
900      // total.
901      unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
902      LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
903
904      // Note that column numbers are not molested by line markers.
905
906      // Handle virtual #include manipulation.
907      if (Entry->IncludeOffset) {
908        IncludeLoc = getLocForStartOfFile(LocInfo.first);
909        IncludeLoc = IncludeLoc.getFileLocWithOffset(Entry->IncludeOffset);
910      }
911    }
912  }
913
914  return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc);
915}
916
917//===----------------------------------------------------------------------===//
918// Other miscellaneous methods.
919//===----------------------------------------------------------------------===//
920
921/// \brief Get the source location for the given file:line:col triplet.
922///
923/// If the source file is included multiple times, the source location will
924/// be based upon the first inclusion.
925SourceLocation SourceManager::getLocation(const FileEntry *SourceFile,
926                                          unsigned Line, unsigned Col) const {
927  assert(SourceFile && "Null source file!");
928  assert(Line && Col && "Line and column should start from 1!");
929
930  fileinfo_iterator FI = FileInfos.find(SourceFile);
931  if (FI == FileInfos.end())
932    return SourceLocation();
933  ContentCache *Content = FI->second;
934
935  // If this is the first use of line information for this buffer, compute the
936  /// SourceLineCache for it on demand.
937  if (Content->SourceLineCache == 0)
938    ComputeLineNumbers(Content, ContentCacheAlloc);
939
940  if (Line > Content->NumLines)
941    return SourceLocation();
942
943  unsigned FilePos = Content->SourceLineCache[Line - 1];
944  const char *Buf = Content->getBuffer()->getBufferStart() + FilePos;
945  unsigned BufLength = Content->getBuffer()->getBufferEnd() - Buf;
946  unsigned i = 0;
947
948  // Check that the given column is valid.
949  while (i < BufLength-1 && i < Col-1 && Buf[i] != '\n' && Buf[i] != '\r')
950    ++i;
951  if (i < Col-1)
952    return SourceLocation();
953
954  return getLocForStartOfFile(Content->FirstFID).
955            getFileLocWithOffset(FilePos + Col - 1);
956}
957
958/// \brief Determines the order of 2 source locations in the translation unit.
959///
960/// \returns true if LHS source location comes before RHS, false otherwise.
961bool SourceManager::isBeforeInTranslationUnit(SourceLocation LHS,
962                                              SourceLocation RHS) const {
963  assert(LHS.isValid() && RHS.isValid() && "Passed invalid source location!");
964  if (LHS == RHS)
965    return false;
966
967  std::pair<FileID, unsigned> LOffs = getDecomposedLoc(LHS);
968  std::pair<FileID, unsigned> ROffs = getDecomposedLoc(RHS);
969
970  // If the source locations are in the same file, just compare offsets.
971  if (LOffs.first == ROffs.first)
972    return LOffs.second < ROffs.second;
973
974  // If we are comparing a source location with multiple locations in the same
975  // file, we get a big win by caching the result.
976
977  if (LastLFIDForBeforeTUCheck == LOffs.first &&
978      LastRFIDForBeforeTUCheck == ROffs.first)
979    return LastResForBeforeTUCheck;
980
981  LastLFIDForBeforeTUCheck = LOffs.first;
982  LastRFIDForBeforeTUCheck = ROffs.first;
983
984  // "Traverse" the include/instantiation stacks of both locations and try to
985  // find a common "ancestor".
986  //
987  // First we traverse the stack of the right location and check each level
988  // against the level of the left location, while collecting all levels in a
989  // "stack map".
990
991  std::map<FileID, unsigned> ROffsMap;
992  ROffsMap[ROffs.first] = ROffs.second;
993
994  while (1) {
995    SourceLocation UpperLoc;
996    const SrcMgr::SLocEntry &Entry = getSLocEntry(ROffs.first);
997    if (Entry.isInstantiation())
998      UpperLoc = Entry.getInstantiation().getInstantiationLocStart();
999    else
1000      UpperLoc = Entry.getFile().getIncludeLoc();
1001
1002    if (UpperLoc.isInvalid())
1003      break; // We reached the top.
1004
1005    ROffs = getDecomposedLoc(UpperLoc);
1006
1007    if (LOffs.first == ROffs.first)
1008      return LastResForBeforeTUCheck = LOffs.second < ROffs.second;
1009
1010    ROffsMap[ROffs.first] = ROffs.second;
1011  }
1012
1013  // We didn't find a common ancestor. Now traverse the stack of the left
1014  // location, checking against the stack map of the right location.
1015
1016  while (1) {
1017    SourceLocation UpperLoc;
1018    const SrcMgr::SLocEntry &Entry = getSLocEntry(LOffs.first);
1019    if (Entry.isInstantiation())
1020      UpperLoc = Entry.getInstantiation().getInstantiationLocStart();
1021    else
1022      UpperLoc = Entry.getFile().getIncludeLoc();
1023
1024    if (UpperLoc.isInvalid())
1025      break; // We reached the top.
1026
1027    LOffs = getDecomposedLoc(UpperLoc);
1028
1029    std::map<FileID, unsigned>::iterator I = ROffsMap.find(LOffs.first);
1030    if (I != ROffsMap.end())
1031      return LastResForBeforeTUCheck = LOffs.second < I->second;
1032  }
1033
1034  // No common ancestor.
1035  // Now we are getting into murky waters. Most probably this is because one
1036  // location is in the predefines buffer.
1037
1038  const FileEntry *LEntry =
1039    getSLocEntry(LOffs.first).getFile().getContentCache()->Entry;
1040  const FileEntry *REntry =
1041    getSLocEntry(ROffs.first).getFile().getContentCache()->Entry;
1042
1043  // If the locations are in two memory buffers we give up, we can't answer
1044  // which one should be considered first.
1045  // FIXME: Should there be a way to "include" memory buffers in the translation
1046  // unit ?
1047  assert((LEntry != 0 || REntry != 0) && "Locations in memory buffers.");
1048  (void) REntry;
1049
1050  // Consider the memory buffer as coming before the file in the translation
1051  // unit.
1052  if (LEntry == 0)
1053    return LastResForBeforeTUCheck = true;
1054  else {
1055    assert(REntry == 0 && "Locations in not #included files ?");
1056    return LastResForBeforeTUCheck = false;
1057  }
1058}
1059
1060/// PrintStats - Print statistics to stderr.
1061///
1062void SourceManager::PrintStats() const {
1063  llvm::errs() << "\n*** Source Manager Stats:\n";
1064  llvm::errs() << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
1065               << " mem buffers mapped.\n";
1066  llvm::errs() << SLocEntryTable.size() << " SLocEntry's allocated, "
1067               << NextOffset << "B of Sloc address space used.\n";
1068
1069  unsigned NumLineNumsComputed = 0;
1070  unsigned NumFileBytesMapped = 0;
1071  for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
1072    NumLineNumsComputed += I->second->SourceLineCache != 0;
1073    NumFileBytesMapped  += I->second->getSizeBytesMapped();
1074  }
1075
1076  llvm::errs() << NumFileBytesMapped << " bytes of files mapped, "
1077               << NumLineNumsComputed << " files with line #'s computed.\n";
1078  llvm::errs() << "FileID scans: " << NumLinearScans << " linear, "
1079               << NumBinaryProbes << " binary.\n";
1080}
1081
1082ExternalSLocEntrySource::~ExternalSLocEntrySource() { }
1083